Completed
Push — master ( d0b7a4...d8a106 )
by Makoto
57s
created

tumdlr.TumblrVideo.__init__()   A

Complexity

Conditions 1

Size

Total Lines 7

Duplication

Lines 0
Ratio 0 %
Metric Value
dl 0
loc 7
rs 9.4285
cc 1
1
import logging
2
import os
3
4
from pathlib import Path
5
from yurl import URL
6
from youtube_dl import YoutubeDL
7
from hashlib import md5
8
9
from tumdlr.downloader import sanitize_filename, download
10
11
12
class TumblrPost:
13
    """
14
    This is the base container class for all Tumblr post types. It contains data that is always available with any
15
    type of post.
16
17
    Additional supported post types may extend this class to provide additional metadata parsing
18
    """
19
    def __init__(self, post, blog):
20
        """
21
        Args:
22
            post(dict): API response
23
            blog(tumdlr.api.TumblrBlog): Parent blog
24
        """
25
        self._post = post
26
        self.blog = blog
27
        self.log = logging.getLogger('tumdlr.containers.post')
28
29
        self.id         = None  # type: int
30
        self.type       = None  # type: str
31
        self.url        = None  # type: URL
32
        self.tags       = set()
33
        self.post_date  = None  # type: str
34
        self.note_count = None  # type: int
35
36
        self.files = []
37
        self._parse_post()
38
39
    @property
40
    def is_text(self):
41
        """
42
        Returns:
43
            bool
44
        """
45
        return self.type == 'text'
46
47
    @property
48
    def is_photo(self):
49
        """
50
        Returns:
51
            bool
52
        """
53
        return self.type in ['photo', 'link']
54
55
    @property
56
    def is_video(self):
57
        """
58
        Returns:
59
            bool
60
        """
61
        return self.type == 'video'
62
63
    def _parse_post(self):
64
        self.id         = self._post['id']
65
        self.type       = self._post['type']
66
        self.url        = URL(self._post['post_url']) if 'post_url' in self._post else None
67
        self.tags       = set(self._post.get('tags', []))
68
        self.note_count = self._post.get('note_count')
69
        self.post_date  = self._post['date']
70
71
    def __repr__(self):
72
        return "<TumblrPost id='{id}' type='{type}' url='{url}'>"\
73
            .format(id=self.id, type=self.type, url=self.url)
74
75
    def __str__(self):
76
        return self.url.as_string() if self.url else ''
77
78
79
class TumblrPhotoSet(TumblrPost):
80
    """
81
    Container class for Photo and Photo Link post types
82
    """
83
    def __init__(self, post, blog):
84
        """
85
        Args:
86
            post(dict): API response
87
            blog(tumdlr.api.blog.TumblrBlog): Parent blog
88
        """
89
        self.log = logging.getLogger('tumdlr.containers.post')
90
        super().__init__(post, blog)
91
92
    def _parse_post(self):
93
        """
94
        Parse all available photos using the best image sizes available
95
        """
96
        super()._parse_post()
97
        self.title  = self._post.get('caption', self._post.get('title'))
98
99
        photos = self._post.get('photos', [])
100
        is_photoset = (len(photos) > 1)
101
102
        for page_no, photo in enumerate(photos, 1):
103
            best_size = photo.get('original_size') or max(photo['alt_sizes'], key='width')
104
            best_size['page_no'] = page_no if is_photoset else False
105
            self.files.append(TumblrPhoto(best_size, self))
106
107
    def __repr__(self):
108
        return "<TumblrPhotoSet title='{title}' id='{id}' photos='{count}'>"\
109
            .format(title=self.title.split("\n")[0].strip(), id=self.id, count=len(self.files))
110
111
112
class TumblrVideoPost(TumblrPost):
113
    """
114
    Container class for Video post types
115
    """
116
    def __init__(self, post, blog):
117
        """
118
        Args:
119
            post(dict): API response
120
            blog(tumdlr.api.blog.TumblrBlog): Parent blog
121
        """
122
        self.log = logging.getLogger('tumdlr.containers.post')
123
124
        self.title          = None
125
        self.description    = None
126
        self.duration       = None
127
        self.format         = None
128
129
        super().__init__(post, blog)
130
131
    def _parse_post(self):
132
        """
133
        Parse all available photos using the best image sizes available
134
        """
135
        super()._parse_post()
136
137
        video_info = YoutubeDL().extract_info(self.url.as_string(), False)
138
139
        self.title = video_info.get('title')
140
141
        self.description    = video_info.get('description')
142
        self.duration       = int(video_info.get('duration', 0))
143
        self.format         = video_info.get('format', 'Unknown')
144
145
        self.files.append(TumblrVideo(video_info, self))
146
147
    def __repr__(self):
148
        return "<TumblrVideoPost id='{id}'>".format(id=self.id)
149
150
151
class TumblrFile:
152
    """
153
    This is the base container class for all downloadable resources associated with Tumblr posts.
154
    """
155
156
    CATEGORY = 'misc'
157
158
    def __init__(self, data, container):
159
        """
160
        Args:
161
            data(dict): API response data
162
            container(TumblrPost): Parent container
163
        """
164
        self.log = logging.getLogger('tumdlr.containers.file')
165
166
        self._data      = data
167
        self.container  = container
168
        self.url        = URL(self._data.get('url', self._data.get('post_url')))
169
170
    def download(self, context, **kwargs):
171
        """
172
        Args:
173
            context(tumdlr.main.Context): CLI request context
174
            kwargs(dict): Additional arguments to send with the download request
175
176
        Returns:
177
            str: Path to the saved file
178
        """
179
        download(self.url.as_string(), str(self.filepath(context, kwargs)), **kwargs)
180
181
    def filepath(self, context, request_data):
182
        """
183
        Args:
184
            context(tumdlr.main.Context): CLI request context
185
            request_data(Optional[dict]): Additional arguments to send with the download request
186
187
        Returns:
188
            Path
189
        """
190
        # Construct the save basedir
191
        basedir = Path(context.config['Tumdlr']['SavePath'])
192
193
        # Are we categorizing by user?
194
        if context.config['Categorization']['User']:
195
            self.log.debug('Categorizing by user: %s', self.container.blog.name)
196
            basedir = basedir.joinpath(sanitize_filename(self.container.blog.name))
197
198
        # Are we categorizing by post type?
199
        if context.config['Categorization']['PostType']:
200
            self.log.debug('Categorizing by type: %s', self.CATEGORY)
201
            basedir = basedir.joinpath(self.CATEGORY)
202
203
        self.log.debug('Basedir constructed: %s', basedir)
204
205
        return basedir
206
207
208
class TumblrPhoto(TumblrFile):
209
210
    CATEGORY = 'photos'
211
212
    def __init__(self, photo, photoset):
213
        """
214
        Args:
215
            photo(dict): Photo API data
216
            photoset(TumblrPhotoSet): Parent container
217
        """
218
        super().__init__(photo, photoset)
219
220
        self.width   = self._data.get('width')
221
        self.height  = self._data.get('height')
222
        self.page_no = self._data.get('page_no', False)
223
224
    def filepath(self, context, request_data):
225
        """
226
        Get the full file path to save the downloaded file to
227
228
        Args:
229
            context(tumdlr.main.Context): CLI request context
230
            request_data(Optional[dict]): Additional arguments to send with the download request
231
232
        Returns:
233
            Path
234
        """
235
        assert isinstance(self.container, TumblrPhotoSet)
236
        filepath = super().filepath(context, request_data)
237
238
        request_data['progress_data']['Caption'] = self.container.title
239
240
        # Are we categorizing by photosets?
241
        if self.page_no and context.config['Categorization']['Photosets']:
242
            self.log.debug('Categorizing by photoset: %s', self.container.id)
243
            filepath = filepath.joinpath(sanitize_filename(str(self.container.id)))
244
245
        # Prepend the page number for photosets
246
        if self.page_no:
247
            filepath = filepath.joinpath(sanitize_filename('p{pn}_{pt}'.format(pn=self.page_no,
248
                                                                               pt=self.container.title)))
249
            request_data['progress_data']['Photoset Page'] = '{cur} / {tot}'\
250
                .format(cur=self.page_no, tot=len(self.container.files))
251
        else:
252
            filepath = filepath.joinpath(sanitize_filename(self.container.title))
253
254
        # Work out the file extension and return
255
        return str(filepath) + os.path.splitext(self.url.as_string())[1]
256
257
    def __repr__(self):
258
        return "<TumblrPhoto url='{url}' width='{w}' height='{h}'>".format(url=self.url, w=self.width, h=self.height)
259
260
    def __str__(self):
261
        return self.url.as_string()
262
263
264
class TumblrVideo(TumblrFile):
265
266
    CATEGORY = 'videos'
267
268
    def __init__(self, video, vpost):
269
        """
270
        Args:
271
            video(dict): Video API data
272
            vpost(TumblrVideoPost): Parent container
273
        """
274
        super().__init__(video, vpost)
275
276
    def filepath(self, context, request_data):
277
        """
278
        Get the full file path to save the video to
279
280
        Args:
281
            context(tumdlr.main.Context): CLI request context
282
            request_data(Optional[dict]): Additional arguments to send with the download request
283
284
        Returns:
285
            Path
286
        """
287
        assert isinstance(self.container, TumblrVideoPost)
288
        filepath = super().filepath(context, request_data)
289
290
        minutes  = int(self.container.duration / 60)
291
        seconds  = self.container.duration % 60
292
        duration = '{} minutes {} seconds'.format(minutes, seconds) if minutes else '{} seconds'.format(seconds)
293
294
        if self.container.title:
295
            request_data['progress_data']['Title'] = self.container.title
296
297
        request_data['progress_data']['Description'] = self.container.description
298
        request_data['progress_data']['Duration'] = duration
299
        request_data['progress_data']['Format'] = self.container.format
300
301
        filepath = filepath.joinpath(sanitize_filename(
302
            self.container.description or
303
            md5(self.url.as_string().encode('utf-8')).hexdigest())
304
        )
305
306
        # Work out the file extension and return
307
        return '{}.{}'.format(str(filepath), self._data.get('ext', 'mp4'))
308
309
    def __repr__(self):
310
        return "<TumblrVideo id='{i}'>".format(i=self.container.id)
311
312
    def __str__(self):
313
        return self.url.as_string()
314