tumdlr.TumblrVideo.__init__() - Code Metrics - Inspection of "Video downloading support" - FujiMakoto/tumdlr - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( d0b7a4...d8a106 )

by Makoto

created 2016-03-08 21:40 UTC

tumdlr.TumblrVideo.init() A

↳ Parent: tumdlr.TumblrVideo

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Metric	Value
dl	0
loc	7
rs	9.4285
cc	1

import logging
import os

from pathlib import Path
from yurl import URL
from youtube_dl import YoutubeDL
from hashlib import md5

from tumdlr.downloader import sanitize_filename, download


class TumblrPost:
    """
    This is the base container class for all Tumblr post types. It contains data that is always available with any
    type of post.

    Additional supported post types may extend this class to provide additional metadata parsing
    """
    def __init__(self, post, blog):
        """
        Args:
            post(dict): API response
            blog(tumdlr.api.TumblrBlog): Parent blog
        """
        self._post = post
        self.blog = blog
        self.log = logging.getLogger('tumdlr.containers.post')

        self.id         = None  # type: int
        self.type       = None  # type: str
        self.url        = None  # type: URL
        self.tags       = set()
        self.post_date  = None  # type: str
        self.note_count = None  # type: int

        self.files = []
        self._parse_post()

    @property
    def is_text(self):
        """
        Returns:
            bool
        """
        return self.type == 'text'

    @property
    def is_photo(self):
        """
        Returns:
            bool
        """
        return self.type in ['photo', 'link']

    @property
    def is_video(self):
        """
        Returns:
            bool
        """
        return self.type == 'video'

    def _parse_post(self):
        self.id         = self._post['id']
        self.type       = self._post['type']
        self.url        = URL(self._post['post_url']) if 'post_url' in self._post else None
        self.tags       = set(self._post.get('tags', []))
        self.note_count = self._post.get('note_count')
        self.post_date  = self._post['date']

    def __repr__(self):
        return "<TumblrPost id='{id}' type='{type}' url='{url}'>"\
            .format(id=self.id, type=self.type, url=self.url)

    def __str__(self):
        return self.url.as_string() if self.url else ''


class TumblrPhotoSet(TumblrPost):
    """
    Container class for Photo and Photo Link post types
    """
    def __init__(self, post, blog):
        """
        Args:
            post(dict): API response
            blog(tumdlr.api.blog.TumblrBlog): Parent blog
        """
        self.log = logging.getLogger('tumdlr.containers.post')
        super().__init__(post, blog)

    def _parse_post(self):
        """
        Parse all available photos using the best image sizes available
        """
        super()._parse_post()
        self.title  = self._post.get('caption', self._post.get('title'))

        photos = self._post.get('photos', [])
        is_photoset = (len(photos) > 1)

        for page_no, photo in enumerate(photos, 1):
            best_size = photo.get('original_size') or max(photo['alt_sizes'], key='width')
            best_size['page_no'] = page_no if is_photoset else False
            self.files.append(TumblrPhoto(best_size, self))

    def __repr__(self):
        return "<TumblrPhotoSet title='{title}' id='{id}' photos='{count}'>"\
            .format(title=self.title.split("\n")[0].strip(), id=self.id, count=len(self.files))


class TumblrVideoPost(TumblrPost):
    """
    Container class for Video post types
    """
    def __init__(self, post, blog):
        """
        Args:
            post(dict): API response
            blog(tumdlr.api.blog.TumblrBlog): Parent blog
        """
        self.log = logging.getLogger('tumdlr.containers.post')

        self.title          = None
        self.description    = None
        self.duration       = None
        self.format         = None

        super().__init__(post, blog)

    def _parse_post(self):
        """
        Parse all available photos using the best image sizes available
        """
        super()._parse_post()

        video_info = YoutubeDL().extract_info(self.url.as_string(), False)

        self.title = video_info.get('title')

        self.description    = video_info.get('description')
        self.duration       = int(video_info.get('duration', 0))
        self.format         = video_info.get('format', 'Unknown')

        self.files.append(TumblrVideo(video_info, self))

    def __repr__(self):
        return "<TumblrVideoPost id='{id}'>".format(id=self.id)


class TumblrFile:
    """
    This is the base container class for all downloadable resources associated with Tumblr posts.
    """

    CATEGORY = 'misc'

    def __init__(self, data, container):
        """
        Args:
            data(dict): API response data
            container(TumblrPost): Parent container
        """
        self.log = logging.getLogger('tumdlr.containers.file')

        self._data      = data
        self.container  = container
        self.url        = URL(self._data.get('url', self._data.get('post_url')))

    def download(self, context, **kwargs):
        """
        Args:
            context(tumdlr.main.Context): CLI request context
            kwargs(dict): Additional arguments to send with the download request

        Returns:
            str: Path to the saved file
        """
        download(self.url.as_string(), str(self.filepath(context, kwargs)), **kwargs)

    def filepath(self, context, request_data):
        """
        Args:
            context(tumdlr.main.Context): CLI request context
            request_data(Optional[dict]): Additional arguments to send with the download request

        Returns:
            Path
        """
        # Construct the save basedir
        basedir = Path(context.config['Tumdlr']['SavePath'])

        # Are we categorizing by user?
        if context.config['Categorization']['User']:
            self.log.debug('Categorizing by user: %s', self.container.blog.name)
            basedir = basedir.joinpath(sanitize_filename(self.container.blog.name))

        # Are we categorizing by post type?
        if context.config['Categorization']['PostType']:
            self.log.debug('Categorizing by type: %s', self.CATEGORY)
            basedir = basedir.joinpath(self.CATEGORY)

        self.log.debug('Basedir constructed: %s', basedir)

        return basedir


class TumblrPhoto(TumblrFile):

    CATEGORY = 'photos'

    def __init__(self, photo, photoset):
        """
        Args:
            photo(dict): Photo API data
            photoset(TumblrPhotoSet): Parent container
        """
        super().__init__(photo, photoset)

        self.width   = self._data.get('width')
        self.height  = self._data.get('height')
        self.page_no = self._data.get('page_no', False)

    def filepath(self, context, request_data):
        """
        Get the full file path to save the downloaded file to

        Args:
            context(tumdlr.main.Context): CLI request context
            request_data(Optional[dict]): Additional arguments to send with the download request

        Returns:
            Path
        """
        assert isinstance(self.container, TumblrPhotoSet)
        filepath = super().filepath(context, request_data)

        request_data['progress_data']['Caption'] = self.container.title

        # Are we categorizing by photosets?
        if self.page_no and context.config['Categorization']['Photosets']:
            self.log.debug('Categorizing by photoset: %s', self.container.id)
            filepath = filepath.joinpath(sanitize_filename(str(self.container.id)))

        # Prepend the page number for photosets
        if self.page_no:
            filepath = filepath.joinpath(sanitize_filename('p{pn}_{pt}'.format(pn=self.page_no,
                                                                               pt=self.container.title)))
            request_data['progress_data']['Photoset Page'] = '{cur} / {tot}'\
                .format(cur=self.page_no, tot=len(self.container.files))
        else:
            filepath = filepath.joinpath(sanitize_filename(self.container.title))

        # Work out the file extension and return
        return str(filepath) + os.path.splitext(self.url.as_string())[1]

    def __repr__(self):
        return "<TumblrPhoto url='{url}' width='{w}' height='{h}'>".format(url=self.url, w=self.width, h=self.height)

    def __str__(self):
        return self.url.as_string()


class TumblrVideo(TumblrFile):

    CATEGORY = 'videos'

    def __init__(self, video, vpost):
        """
        Args:
            video(dict): Video API data
            vpost(TumblrVideoPost): Parent container
        """
        super().__init__(video, vpost)

    def filepath(self, context, request_data):
        """
        Get the full file path to save the video to

        Args:
            context(tumdlr.main.Context): CLI request context
            request_data(Optional[dict]): Additional arguments to send with the download request

        Returns:
            Path
        """
        assert isinstance(self.container, TumblrVideoPost)
        filepath = super().filepath(context, request_data)

        minutes  = int(self.container.duration / 60)
        seconds  = self.container.duration % 60
        duration = '{} minutes {} seconds'.format(minutes, seconds) if minutes else '{} seconds'.format(seconds)

        if self.container.title:
            request_data['progress_data']['Title'] = self.container.title

        request_data['progress_data']['Description'] = self.container.description
        request_data['progress_data']['Duration'] = duration
        request_data['progress_data']['Format'] = self.container.format

        filepath = filepath.joinpath(sanitize_filename(
            self.container.description or
            md5(self.url.as_string().encode('utf-8')).hexdigest())
        )

        # Work out the file extension and return
        return '{}.{}'.format(str(filepath), self._data.get('ext', 'mp4'))

    def __repr__(self):
        return "<TumblrVideo id='{i}'>".format(i=self.container.id)

    def __str__(self):
        return self.url.as_string()


1			import logging
2			import os
3
4			from pathlib import Path
5			from yurl import URL
6			from youtube_dl import YoutubeDL
7			from hashlib import md5
8
9			from tumdlr.downloader import sanitize_filename, download
10
11
12			class TumblrPost:
13			"""
14			This is the base container class for all Tumblr post types. It contains data that is always available with any
15			type of post.
16
17			Additional supported post types may extend this class to provide additional metadata parsing
18			"""
19			def __init__(self, post, blog):
20			"""
21			Args:
22			post(dict): API response
23			blog(tumdlr.api.TumblrBlog): Parent blog
24			"""
25			self._post = post
26			self.blog = blog
27			self.log = logging.getLogger('tumdlr.containers.post')
28
29			self.id = None # type: int
30			self.type = None # type: str
31			self.url = None # type: URL
32			self.tags = set()
33			self.post_date = None # type: str
34			self.note_count = None # type: int
35
36			self.files = []
37			self._parse_post()
38
39			@property
40			def is_text(self):
41			"""
42			Returns:
43			bool
44			"""
45			return self.type == 'text'
46
47			@property
48			def is_photo(self):
49			"""
50			Returns:
51			bool
52			"""
53			return self.type in ['photo', 'link']
54
55			@property
56			def is_video(self):
57			"""
58			Returns:
59			bool
60			"""
61			return self.type == 'video'
62
63			def _parse_post(self):
64			self.id = self._post['id']
65			self.type = self._post['type']
66			self.url = URL(self._post['post_url']) if 'post_url' in self._post else None
67			self.tags = set(self._post.get('tags', []))
68			self.note_count = self._post.get('note_count')
69			self.post_date = self._post['date']
70
71			def __repr__(self):
72			return "<TumblrPost id='{id}' type='{type}' url='{url}'>"\
73			.format(id=self.id, type=self.type, url=self.url)
74
75			def __str__(self):
76			return self.url.as_string() if self.url else ''
77
78
79			class TumblrPhotoSet(TumblrPost):
80			"""
81			Container class for Photo and Photo Link post types
82			"""
83			def __init__(self, post, blog):
84			"""
85			Args:
86			post(dict): API response
87			blog(tumdlr.api.blog.TumblrBlog): Parent blog
88			"""
89			self.log = logging.getLogger('tumdlr.containers.post')
90			super().__init__(post, blog)
91
92			def _parse_post(self):
93			"""
94			Parse all available photos using the best image sizes available
95			"""
96			super()._parse_post()
97			self.title = self._post.get('caption', self._post.get('title'))
98
99			photos = self._post.get('photos', [])
100			is_photoset = (len(photos) > 1)
101
102			for page_no, photo in enumerate(photos, 1):
103			best_size = photo.get('original_size') or max(photo['alt_sizes'], key='width')
104			best_size['page_no'] = page_no if is_photoset else False
105			self.files.append(TumblrPhoto(best_size, self))
106
107			def __repr__(self):
108			return "<TumblrPhotoSet title='{title}' id='{id}' photos='{count}'>"\
109			.format(title=self.title.split("\n")[0].strip(), id=self.id, count=len(self.files))
110
111
112			class TumblrVideoPost(TumblrPost):
113			"""
114			Container class for Video post types
115			"""
116			def __init__(self, post, blog):
117			"""
118			Args:
119			post(dict): API response
120			blog(tumdlr.api.blog.TumblrBlog): Parent blog
121			"""
122			self.log = logging.getLogger('tumdlr.containers.post')
123
124			self.title = None
125			self.description = None
126			self.duration = None
127			self.format = None
128
129			super().__init__(post, blog)
130
131			def _parse_post(self):
132			"""
133			Parse all available photos using the best image sizes available
134			"""
135			super()._parse_post()
136
137			video_info = YoutubeDL().extract_info(self.url.as_string(), False)
138
139			self.title = video_info.get('title')
140
141			self.description = video_info.get('description')
142			self.duration = int(video_info.get('duration', 0))
143			self.format = video_info.get('format', 'Unknown')
144
145			self.files.append(TumblrVideo(video_info, self))
146
147			def __repr__(self):
148			return "<TumblrVideoPost id='{id}'>".format(id=self.id)
149
150
151			class TumblrFile:
152			"""
153			This is the base container class for all downloadable resources associated with Tumblr posts.
154			"""
155
156			CATEGORY = 'misc'
157
158			def __init__(self, data, container):
159			"""
160			Args:
161			data(dict): API response data
162			container(TumblrPost): Parent container
163			"""
164			self.log = logging.getLogger('tumdlr.containers.file')
165
166			self._data = data
167			self.container = container
168			self.url = URL(self._data.get('url', self._data.get('post_url')))
169
170			def download(self, context, **kwargs):
171			"""
172			Args:
173			context(tumdlr.main.Context): CLI request context
174			kwargs(dict): Additional arguments to send with the download request
175
176			Returns:
177			str: Path to the saved file
178			"""
179			download(self.url.as_string(), str(self.filepath(context, kwargs)), **kwargs)
180
181			def filepath(self, context, request_data):
182			"""
183			Args:
184			context(tumdlr.main.Context): CLI request context
185			request_data(Optional[dict]): Additional arguments to send with the download request
186
187			Returns:
188			Path
189			"""
190			# Construct the save basedir
191			basedir = Path(context.config['Tumdlr']['SavePath'])
192
193			# Are we categorizing by user?
194			if context.config['Categorization']['User']:
195			self.log.debug('Categorizing by user: %s', self.container.blog.name)
196			basedir = basedir.joinpath(sanitize_filename(self.container.blog.name))
197
198			# Are we categorizing by post type?
199			if context.config['Categorization']['PostType']:
200			self.log.debug('Categorizing by type: %s', self.CATEGORY)
201			basedir = basedir.joinpath(self.CATEGORY)
202
203			self.log.debug('Basedir constructed: %s', basedir)
204
205			return basedir
206
207
208			class TumblrPhoto(TumblrFile):
209
210			CATEGORY = 'photos'
211
212			def __init__(self, photo, photoset):
213			"""
214			Args:
215			photo(dict): Photo API data
216			photoset(TumblrPhotoSet): Parent container
217			"""
218			super().__init__(photo, photoset)
219
220			self.width = self._data.get('width')
221			self.height = self._data.get('height')
222			self.page_no = self._data.get('page_no', False)
223
224			def filepath(self, context, request_data):
225			"""
226			Get the full file path to save the downloaded file to
227
228			Args:
229			context(tumdlr.main.Context): CLI request context
230			request_data(Optional[dict]): Additional arguments to send with the download request
231
232			Returns:
233			Path
234			"""
235			assert isinstance(self.container, TumblrPhotoSet)
236			filepath = super().filepath(context, request_data)
237
238			request_data['progress_data']['Caption'] = self.container.title
239
240			# Are we categorizing by photosets?
241			if self.page_no and context.config['Categorization']['Photosets']:
242			self.log.debug('Categorizing by photoset: %s', self.container.id)
243			filepath = filepath.joinpath(sanitize_filename(str(self.container.id)))
244
245			# Prepend the page number for photosets
246			if self.page_no:
247			filepath = filepath.joinpath(sanitize_filename('p{pn}_{pt}'.format(pn=self.page_no,
248			pt=self.container.title)))
249			request_data['progress_data']['Photoset Page'] = '{cur} / {tot}'\
250			.format(cur=self.page_no, tot=len(self.container.files))
251			else:
252			filepath = filepath.joinpath(sanitize_filename(self.container.title))
253
254			# Work out the file extension and return
255			return str(filepath) + os.path.splitext(self.url.as_string())[1]
256
257			def __repr__(self):
258			return "<TumblrPhoto url='{url}' width='{w}' height='{h}'>".format(url=self.url, w=self.width, h=self.height)
259
260			def __str__(self):
261			return self.url.as_string()
262
263
264			class TumblrVideo(TumblrFile):
265
266			CATEGORY = 'videos'
267
268			def __init__(self, video, vpost):
269			"""
270			Args:
271			video(dict): Video API data
272			vpost(TumblrVideoPost): Parent container
273			"""
274			super().__init__(video, vpost)
275
276			def filepath(self, context, request_data):
277			"""
278			Get the full file path to save the video to
279
280			Args:
281			context(tumdlr.main.Context): CLI request context
282			request_data(Optional[dict]): Additional arguments to send with the download request
283
284			Returns:
285			Path
286			"""
287			assert isinstance(self.container, TumblrVideoPost)
288			filepath = super().filepath(context, request_data)
289
290			minutes = int(self.container.duration / 60)
291			seconds = self.container.duration % 60
292			duration = '{} minutes {} seconds'.format(minutes, seconds) if minutes else '{} seconds'.format(seconds)
293
294			if self.container.title:
295			request_data['progress_data']['Title'] = self.container.title
296
297			request_data['progress_data']['Description'] = self.container.description
298			request_data['progress_data']['Duration'] = duration
299			request_data['progress_data']['Format'] = self.container.format
300
301			filepath = filepath.joinpath(sanitize_filename(
302			self.container.description or
303			md5(self.url.as_string().encode('utf-8')).hexdigest())
304			)
305
306			# Work out the file extension and return
307			return '{}.{}'.format(str(filepath), self._data.get('ext', 'mp4'))
308
309			def __repr__(self):
310			return "<TumblrVideo id='{i}'>".format(i=self.container.id)
311
312			def __str__(self):
313			return self.url.as_string()
314

FujiMakoto / tumdlr

Push — master ( d0b7a4...d8a106 )

tumdlr.TumblrVideo.__init__() A

Complexity

Size

Duplication

Duplication Side-by-Side

Filter issues like

tumdlr.TumblrVideo.init() A