Completed
Push — master ( 43645f...f403d7 )
by Makoto
56s
created

tumdlr.TumblrPost.__init__()   B

Complexity

Conditions 2

Size

Total Lines 24

Duplication

Lines 0
Ratio 0 %
Metric Value
dl 0
loc 24
rs 8.9713
cc 2
1
import logging
2
import os
3
4
from pathlib import Path
5
from yurl import URL
6
from youtube_dl import YoutubeDL
7
from hashlib import md5
8
9
from tumdlr.downloader import sanitize_filename, download
10
from tumdlr.errors import TumdlrDownloadError, TumdlrParserError
11
12
13
class TumblrPost:
14
    """
15
    This is the base container class for all Tumblr post types. It contains data that is always available with any
16
    type of post.
17
18
    Additional supported post types may extend this class to provide additional metadata parsing
19
    """
20
    def __init__(self, post, blog):
21
        """
22
        Args:
23
            post(dict): API response
24
            blog(tumdlr.api.TumblrBlog): Parent blog
25
        """
26
        self._post = post
27
        self.blog = blog
28
        self.log = logging.getLogger('tumdlr.containers.post')
29
30
        self.id         = None  # type: int
31
        self.type       = None  # type: str
32
        self.url        = None  # type: URL
33
        self.tags       = set()
34
        self.post_date  = None  # type: str
35
        self.note_count = None  # type: int
36
37
        self.files = []
38
39
        try:
40
            self._parse_post()
41
        except Exception as e:
42
            self.log.warn('Failed to parse post data: %r', self, exc_info=e)
43
            raise TumdlrParserError(post_data=post)
44
45
    @property
46
    def is_text(self):
47
        """
48
        Returns:
49
            bool
50
        """
51
        return self.type == 'text'
52
53
    @property
54
    def is_photo(self):
55
        """
56
        Returns:
57
            bool
58
        """
59
        return self.type in ['photo', 'link']
60
61
    @property
62
    def is_video(self):
63
        """
64
        Returns:
65
            bool
66
        """
67
        return self.type == 'video'
68
69
    def _parse_post(self):
70
        self.id         = self._post['id']
71
        self.type       = self._post['type']
72
        self.url        = URL(self._post['post_url']) if 'post_url' in self._post else None
73
        self.tags       = set(self._post.get('tags', []))
74
        self.note_count = self._post.get('note_count')
75
        self.post_date  = self._post['date']
76
77
    def __repr__(self):
78
        return "<TumblrPost id='{id}' type='{type}' url='{url}'>"\
79
            .format(id=self.id, type=self.type, url=self.url)
80
81
    def __str__(self):
82
        return self.url.as_string() if self.url else ''
83
84
85
class TumblrPhotoSet(TumblrPost):
86
    """
87
    Container class for Photo and Photo Link post types
88
    """
89
    def __init__(self, post, blog):
90
        """
91
        Args:
92
            post(dict): API response
93
            blog(tumdlr.api.blog.TumblrBlog): Parent blog
94
        """
95
        self.log = logging.getLogger('tumdlr.containers.post')
96
        super().__init__(post, blog)
97
98
    def _parse_post(self):
99
        """
100
        Parse all available photos using the best image sizes available
101
        """
102
        super()._parse_post()
103
        self.title  = self._post.get('caption', self._post.get('title'))
104
105
        photos = self._post.get('photos', [])
106
        is_photoset = (len(photos) > 1)
107
108
        for page_no, photo in enumerate(photos, 1):
109
            best_size = photo.get('original_size') or max(photo['alt_sizes'], key='width')
110
            best_size['page_no'] = page_no if is_photoset else False
111
            self.files.append(TumblrPhoto(best_size, self))
112
113
    def __repr__(self):
114
        return "<TumblrPhotoSet title='{title}' id='{id}' photos='{count}'>"\
115
            .format(title=self.title.split("\n")[0].strip(), id=self.id, count=len(self.files))
116
117
118
class TumblrVideoPost(TumblrPost):
119
    """
120
    Container class for Video post types
121
    """
122
    def __init__(self, post, blog):
123
        """
124
        Args:
125
            post(dict): API response
126
            blog(tumdlr.api.blog.TumblrBlog): Parent blog
127
        """
128
        self.log = logging.getLogger('tumdlr.containers.post')
129
130
        self.title          = None
131
        self.description    = None
132
        self.duration       = None
133
        self.format         = None
134
135
        super().__init__(post, blog)
136
137
    def _parse_post(self):
138
        """
139
        Parse all available photos using the best image sizes available
140
        """
141
        super()._parse_post()
142
143
        video_info = YoutubeDL().extract_info(self.url.as_string(), False)
144
145
        self.title = video_info.get('title')
146
147
        self.description    = video_info.get('description')
148
        self.duration       = int(video_info.get('duration', 0))
149
        self.format         = video_info.get('format', 'Unknown')
150
151
        self.files.append(TumblrVideo(video_info, self))
152
153
    def __repr__(self):
154
        return "<TumblrVideoPost id='{id}'>".format(id=self.id)
155
156
157
class TumblrFile:
158
    """
159
    This is the base container class for all downloadable resources associated with Tumblr posts.
160
    """
161
162
    CATEGORY = 'misc'
163
164
    def __init__(self, data, container):
165
        """
166
        Args:
167
            data(dict): API response data
168
            container(TumblrPost): Parent container
169
        """
170
        self.log = logging.getLogger('tumdlr.containers.file')
171
172
        self._data      = data
173
        self.container  = container
174
        self.url        = URL(self._data.get('url', self._data.get('post_url')))
175
176
    def download(self, context, **kwargs):
177
        """
178
        Args:
179
            context(tumdlr.main.Context): CLI request context
180
            kwargs(dict): Additional arguments to send with the download request
181
182
        Returns:
183
            str: Path to the saved file
184
        """
185
        try:
186
            download(self.url.as_string(), str(self.filepath(context, kwargs)), **kwargs)
187
        except Exception as e:
188
            self.log.warn('Post download failed: %r', self, exc_info=e)
189
            raise TumdlrDownloadError(error_message=str(e), download_url=self.url.as_string())
190
191
    def filepath(self, context, request_data):
192
        """
193
        Args:
194
            context(tumdlr.main.Context): CLI request context
195
            request_data(Optional[dict]): Additional arguments to send with the download request
196
197
        Returns:
198
            Path
199
        """
200
        # Construct the save basedir
201
        basedir = Path(context.config['Tumdlr']['SavePath'])
202
203
        # Are we categorizing by user?
204
        if context.config['Categorization']['User']:
205
            self.log.debug('Categorizing by user: %s', self.container.blog.name)
206
            basedir = basedir.joinpath(sanitize_filename(self.container.blog.name))
207
208
        # Are we categorizing by post type?
209
        if context.config['Categorization']['PostType']:
210
            self.log.debug('Categorizing by type: %s', self.CATEGORY)
211
            basedir = basedir.joinpath(self.CATEGORY)
212
213
        self.log.debug('Basedir constructed: %s', basedir)
214
215
        return basedir
216
217
218
class TumblrPhoto(TumblrFile):
219
220
    CATEGORY = 'photos'
221
222
    def __init__(self, photo, photoset):
223
        """
224
        Args:
225
            photo(dict): Photo API data
226
            photoset(TumblrPhotoSet): Parent container
227
        """
228
        super().__init__(photo, photoset)
229
230
        self.width   = self._data.get('width')
231
        self.height  = self._data.get('height')
232
        self.page_no = self._data.get('page_no', False)
233
234
    def filepath(self, context, request_data):
235
        """
236
        Get the full file path to save the downloaded file to
237
238
        Args:
239
            context(tumdlr.main.Context): CLI request context
240
            request_data(Optional[dict]): Additional arguments to send with the download request
241
242
        Returns:
243
            Path
244
        """
245
        assert isinstance(self.container, TumblrPhotoSet)
246
        filepath = super().filepath(context, request_data)
247
248
        request_data['progress_data']['Caption'] = self.container.title
249
250
        # Are we categorizing by photosets?
251
        if self.page_no and context.config['Categorization']['Photosets']:
252
            self.log.debug('Categorizing by photoset: %s', self.container.id)
253
            filepath = filepath.joinpath(sanitize_filename(str(self.container.id)))
254
255
        # Prepend the page number for photosets
256
        if self.page_no:
257
            filepath = filepath.joinpath(sanitize_filename('p{pn}_{pt}'.format(pn=self.page_no,
258
                                                                               pt=self.container.title)))
259
            request_data['progress_data']['Photoset Page'] = '{cur} / {tot}'\
260
                .format(cur=self.page_no, tot=len(self.container.files))
261
        else:
262
            filepath = filepath.joinpath(sanitize_filename(self.container.title))
263
264
        # Work out the file extension and return
265
        return str(filepath) + os.path.splitext(self.url.as_string())[1]
266
267
    def __repr__(self):
268
        return "<TumblrPhoto url='{url}' width='{w}' height='{h}'>".format(url=self.url, w=self.width, h=self.height)
269
270
    def __str__(self):
271
        return self.url.as_string()
272
273
274
class TumblrVideo(TumblrFile):
275
276
    CATEGORY = 'videos'
277
278
    def __init__(self, video, vpost):
279
        """
280
        Args:
281
            video(dict): Video API data
282
            vpost(TumblrVideoPost): Parent container
283
        """
284
        super().__init__(video, vpost)
285
286
    def filepath(self, context, request_data):
287
        """
288
        Get the full file path to save the video to
289
290
        Args:
291
            context(tumdlr.main.Context): CLI request context
292
            request_data(Optional[dict]): Additional arguments to send with the download request
293
294
        Returns:
295
            Path
296
        """
297
        assert isinstance(self.container, TumblrVideoPost)
298
        filepath = super().filepath(context, request_data)
299
300
        minutes  = int(self.container.duration / 60)
301
        seconds  = self.container.duration % 60
302
        duration = '{} minutes {} seconds'.format(minutes, seconds) if minutes else '{} seconds'.format(seconds)
303
304
        if self.container.title:
305
            request_data['progress_data']['Title'] = self.container.title
306
307
        request_data['progress_data']['Description'] = self.container.description
308
        request_data['progress_data']['Duration'] = duration
309
        request_data['progress_data']['Format'] = self.container.format
310
311
        filepath = filepath.joinpath(sanitize_filename(
312
            self.container.description or
313
            md5(self.url.as_string().encode('utf-8')).hexdigest())
314
        )
315
316
        # Work out the file extension and return
317
        return '{}.{}'.format(str(filepath), self._data.get('ext', 'mp4'))
318
319
    def __repr__(self):
320
        return "<TumblrVideo id='{i}'>".format(i=self.container.id)
321
322
    def __str__(self):
323
        return self.url.as_string()
324