tumdlr.TumblrPost.__init__() - Code Metrics - Inspection of "Parser error handling, error classes renamed from..." - FujiMakoto/tumdlr - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( 43645f...f403d7 )

by Makoto

created 2016-03-09 20:21 UTC

tumdlr.TumblrPost.init() B

↳ Parent: tumdlr.TumblrPost

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Metric	Value
dl	0
loc	24
rs	8.9713
cc	2

import logging
import os

from pathlib import Path
from yurl import URL
from youtube_dl import YoutubeDL
from hashlib import md5

from tumdlr.downloader import sanitize_filename, download
from tumdlr.errors import TumdlrDownloadError, TumdlrParserError


class TumblrPost:
    """
    This is the base container class for all Tumblr post types. It contains data that is always available with any
    type of post.

    Additional supported post types may extend this class to provide additional metadata parsing
    """
    def __init__(self, post, blog):
        """
        Args:
            post(dict): API response
            blog(tumdlr.api.TumblrBlog): Parent blog
        """
        self._post = post
        self.blog = blog
        self.log = logging.getLogger('tumdlr.containers.post')

        self.id         = None  # type: int
        self.type       = None  # type: str
        self.url        = None  # type: URL
        self.tags       = set()
        self.post_date  = None  # type: str
        self.note_count = None  # type: int

        self.files = []

        try:
            self._parse_post()
        except Exception as e:
            self.log.warn('Failed to parse post data: %r', self, exc_info=e)
            raise TumdlrParserError(post_data=post)

    @property
    def is_text(self):
        """
        Returns:
            bool
        """
        return self.type == 'text'

    @property
    def is_photo(self):
        """
        Returns:
            bool
        """
        return self.type in ['photo', 'link']

    @property
    def is_video(self):
        """
        Returns:
            bool
        """
        return self.type == 'video'

    def _parse_post(self):
        self.id         = self._post['id']
        self.type       = self._post['type']
        self.url        = URL(self._post['post_url']) if 'post_url' in self._post else None
        self.tags       = set(self._post.get('tags', []))
        self.note_count = self._post.get('note_count')
        self.post_date  = self._post['date']

    def __repr__(self):
        return "<TumblrPost id='{id}' type='{type}' url='{url}'>"\
            .format(id=self.id, type=self.type, url=self.url)

    def __str__(self):
        return self.url.as_string() if self.url else ''


class TumblrPhotoSet(TumblrPost):
    """
    Container class for Photo and Photo Link post types
    """
    def __init__(self, post, blog):
        """
        Args:
            post(dict): API response
            blog(tumdlr.api.blog.TumblrBlog): Parent blog
        """
        self.log = logging.getLogger('tumdlr.containers.post')
        super().__init__(post, blog)

    def _parse_post(self):
        """
        Parse all available photos using the best image sizes available
        """
        super()._parse_post()
        self.title  = self._post.get('caption', self._post.get('title'))

        photos = self._post.get('photos', [])
        is_photoset = (len(photos) > 1)

        for page_no, photo in enumerate(photos, 1):
            best_size = photo.get('original_size') or max(photo['alt_sizes'], key='width')
            best_size['page_no'] = page_no if is_photoset else False
            self.files.append(TumblrPhoto(best_size, self))

    def __repr__(self):
        return "<TumblrPhotoSet title='{title}' id='{id}' photos='{count}'>"\
            .format(title=self.title.split("\n")[0].strip(), id=self.id, count=len(self.files))


class TumblrVideoPost(TumblrPost):
    """
    Container class for Video post types
    """
    def __init__(self, post, blog):
        """
        Args:
            post(dict): API response
            blog(tumdlr.api.blog.TumblrBlog): Parent blog
        """
        self.log = logging.getLogger('tumdlr.containers.post')

        self.title          = None
        self.description    = None
        self.duration       = None
        self.format         = None

        super().__init__(post, blog)

    def _parse_post(self):
        """
        Parse all available photos using the best image sizes available
        """
        super()._parse_post()

        video_info = YoutubeDL().extract_info(self.url.as_string(), False)

        self.title = video_info.get('title')

        self.description    = video_info.get('description')
        self.duration       = int(video_info.get('duration', 0))
        self.format         = video_info.get('format', 'Unknown')

        self.files.append(TumblrVideo(video_info, self))

    def __repr__(self):
        return "<TumblrVideoPost id='{id}'>".format(id=self.id)


class TumblrFile:
    """
    This is the base container class for all downloadable resources associated with Tumblr posts.
    """

    CATEGORY = 'misc'

    def __init__(self, data, container):
        """
        Args:
            data(dict): API response data
            container(TumblrPost): Parent container
        """
        self.log = logging.getLogger('tumdlr.containers.file')

        self._data      = data
        self.container  = container
        self.url        = URL(self._data.get('url', self._data.get('post_url')))

    def download(self, context, **kwargs):
        """
        Args:
            context(tumdlr.main.Context): CLI request context
            kwargs(dict): Additional arguments to send with the download request

        Returns:
            str: Path to the saved file
        """
        try:
            download(self.url.as_string(), str(self.filepath(context, kwargs)), **kwargs)
        except Exception as e:
            self.log.warn('Post download failed: %r', self, exc_info=e)
            raise TumdlrDownloadError(error_message=str(e), download_url=self.url.as_string())

    def filepath(self, context, request_data):
        """
        Args:
            context(tumdlr.main.Context): CLI request context
            request_data(Optional[dict]): Additional arguments to send with the download request

        Returns:
            Path
        """
        # Construct the save basedir
        basedir = Path(context.config['Tumdlr']['SavePath'])

        # Are we categorizing by user?
        if context.config['Categorization']['User']:
            self.log.debug('Categorizing by user: %s', self.container.blog.name)
            basedir = basedir.joinpath(sanitize_filename(self.container.blog.name))

        # Are we categorizing by post type?
        if context.config['Categorization']['PostType']:
            self.log.debug('Categorizing by type: %s', self.CATEGORY)
            basedir = basedir.joinpath(self.CATEGORY)

        self.log.debug('Basedir constructed: %s', basedir)

        return basedir


class TumblrPhoto(TumblrFile):

    CATEGORY = 'photos'

    def __init__(self, photo, photoset):
        """
        Args:
            photo(dict): Photo API data
            photoset(TumblrPhotoSet): Parent container
        """
        super().__init__(photo, photoset)

        self.width   = self._data.get('width')
        self.height  = self._data.get('height')
        self.page_no = self._data.get('page_no', False)

    def filepath(self, context, request_data):
        """
        Get the full file path to save the downloaded file to

        Args:
            context(tumdlr.main.Context): CLI request context
            request_data(Optional[dict]): Additional arguments to send with the download request

        Returns:
            Path
        """
        assert isinstance(self.container, TumblrPhotoSet)
        filepath = super().filepath(context, request_data)

        request_data['progress_data']['Caption'] = self.container.title

        # Are we categorizing by photosets?
        if self.page_no and context.config['Categorization']['Photosets']:
            self.log.debug('Categorizing by photoset: %s', self.container.id)
            filepath = filepath.joinpath(sanitize_filename(str(self.container.id)))

        # Prepend the page number for photosets
        if self.page_no:
            filepath = filepath.joinpath(sanitize_filename('p{pn}_{pt}'.format(pn=self.page_no,
                                                                               pt=self.container.title)))
            request_data['progress_data']['Photoset Page'] = '{cur} / {tot}'\
                .format(cur=self.page_no, tot=len(self.container.files))
        else:
            filepath = filepath.joinpath(sanitize_filename(self.container.title))

        # Work out the file extension and return
        return str(filepath) + os.path.splitext(self.url.as_string())[1]

    def __repr__(self):
        return "<TumblrPhoto url='{url}' width='{w}' height='{h}'>".format(url=self.url, w=self.width, h=self.height)

    def __str__(self):
        return self.url.as_string()


class TumblrVideo(TumblrFile):

    CATEGORY = 'videos'

    def __init__(self, video, vpost):
        """
        Args:
            video(dict): Video API data
            vpost(TumblrVideoPost): Parent container
        """
        super().__init__(video, vpost)

    def filepath(self, context, request_data):
        """
        Get the full file path to save the video to

        Args:
            context(tumdlr.main.Context): CLI request context
            request_data(Optional[dict]): Additional arguments to send with the download request

        Returns:
            Path
        """
        assert isinstance(self.container, TumblrVideoPost)
        filepath = super().filepath(context, request_data)

        minutes  = int(self.container.duration / 60)
        seconds  = self.container.duration % 60
        duration = '{} minutes {} seconds'.format(minutes, seconds) if minutes else '{} seconds'.format(seconds)

        if self.container.title:
            request_data['progress_data']['Title'] = self.container.title

        request_data['progress_data']['Description'] = self.container.description
        request_data['progress_data']['Duration'] = duration
        request_data['progress_data']['Format'] = self.container.format

        filepath = filepath.joinpath(sanitize_filename(
            self.container.description or
            md5(self.url.as_string().encode('utf-8')).hexdigest())
        )

        # Work out the file extension and return
        return '{}.{}'.format(str(filepath), self._data.get('ext', 'mp4'))

    def __repr__(self):
        return "<TumblrVideo id='{i}'>".format(i=self.container.id)

    def __str__(self):
        return self.url.as_string()


1			import logging
2			import os
3
4			from pathlib import Path
5			from yurl import URL
6			from youtube_dl import YoutubeDL
7			from hashlib import md5
8
9			from tumdlr.downloader import sanitize_filename, download
10			from tumdlr.errors import TumdlrDownloadError, TumdlrParserError
11
12
13			class TumblrPost:
14			"""
15			This is the base container class for all Tumblr post types. It contains data that is always available with any
16			type of post.
17
18			Additional supported post types may extend this class to provide additional metadata parsing
19			"""
20			def __init__(self, post, blog):
21			"""
22			Args:
23			post(dict): API response
24			blog(tumdlr.api.TumblrBlog): Parent blog
25			"""
26			self._post = post
27			self.blog = blog
28			self.log = logging.getLogger('tumdlr.containers.post')
29
30			self.id = None # type: int
31			self.type = None # type: str
32			self.url = None # type: URL
33			self.tags = set()
34			self.post_date = None # type: str
35			self.note_count = None # type: int
36
37			self.files = []
38
39			try:
40			self._parse_post()
41			except Exception as e:
42			self.log.warn('Failed to parse post data: %r', self, exc_info=e)
43			raise TumdlrParserError(post_data=post)
44
45			@property
46			def is_text(self):
47			"""
48			Returns:
49			bool
50			"""
51			return self.type == 'text'
52
53			@property
54			def is_photo(self):
55			"""
56			Returns:
57			bool
58			"""
59			return self.type in ['photo', 'link']
60
61			@property
62			def is_video(self):
63			"""
64			Returns:
65			bool
66			"""
67			return self.type == 'video'
68
69			def _parse_post(self):
70			self.id = self._post['id']
71			self.type = self._post['type']
72			self.url = URL(self._post['post_url']) if 'post_url' in self._post else None
73			self.tags = set(self._post.get('tags', []))
74			self.note_count = self._post.get('note_count')
75			self.post_date = self._post['date']
76
77			def __repr__(self):
78			return "<TumblrPost id='{id}' type='{type}' url='{url}'>"\
79			.format(id=self.id, type=self.type, url=self.url)
80
81			def __str__(self):
82			return self.url.as_string() if self.url else ''
83
84
85			class TumblrPhotoSet(TumblrPost):
86			"""
87			Container class for Photo and Photo Link post types
88			"""
89			def __init__(self, post, blog):
90			"""
91			Args:
92			post(dict): API response
93			blog(tumdlr.api.blog.TumblrBlog): Parent blog
94			"""
95			self.log = logging.getLogger('tumdlr.containers.post')
96			super().__init__(post, blog)
97
98			def _parse_post(self):
99			"""
100			Parse all available photos using the best image sizes available
101			"""
102			super()._parse_post()
103			self.title = self._post.get('caption', self._post.get('title'))
104
105			photos = self._post.get('photos', [])
106			is_photoset = (len(photos) > 1)
107
108			for page_no, photo in enumerate(photos, 1):
109			best_size = photo.get('original_size') or max(photo['alt_sizes'], key='width')
110			best_size['page_no'] = page_no if is_photoset else False
111			self.files.append(TumblrPhoto(best_size, self))
112
113			def __repr__(self):
114			return "<TumblrPhotoSet title='{title}' id='{id}' photos='{count}'>"\
115			.format(title=self.title.split("\n")[0].strip(), id=self.id, count=len(self.files))
116
117
118			class TumblrVideoPost(TumblrPost):
119			"""
120			Container class for Video post types
121			"""
122			def __init__(self, post, blog):
123			"""
124			Args:
125			post(dict): API response
126			blog(tumdlr.api.blog.TumblrBlog): Parent blog
127			"""
128			self.log = logging.getLogger('tumdlr.containers.post')
129
130			self.title = None
131			self.description = None
132			self.duration = None
133			self.format = None
134
135			super().__init__(post, blog)
136
137			def _parse_post(self):
138			"""
139			Parse all available photos using the best image sizes available
140			"""
141			super()._parse_post()
142
143			video_info = YoutubeDL().extract_info(self.url.as_string(), False)
144
145			self.title = video_info.get('title')
146
147			self.description = video_info.get('description')
148			self.duration = int(video_info.get('duration', 0))
149			self.format = video_info.get('format', 'Unknown')
150
151			self.files.append(TumblrVideo(video_info, self))
152
153			def __repr__(self):
154			return "<TumblrVideoPost id='{id}'>".format(id=self.id)
155
156
157			class TumblrFile:
158			"""
159			This is the base container class for all downloadable resources associated with Tumblr posts.
160			"""
161
162			CATEGORY = 'misc'
163
164			def __init__(self, data, container):
165			"""
166			Args:
167			data(dict): API response data
168			container(TumblrPost): Parent container
169			"""
170			self.log = logging.getLogger('tumdlr.containers.file')
171
172			self._data = data
173			self.container = container
174			self.url = URL(self._data.get('url', self._data.get('post_url')))
175
176			def download(self, context, **kwargs):
177			"""
178			Args:
179			context(tumdlr.main.Context): CLI request context
180			kwargs(dict): Additional arguments to send with the download request
181
182			Returns:
183			str: Path to the saved file
184			"""
185			try:
186			download(self.url.as_string(), str(self.filepath(context, kwargs)), **kwargs)
187			except Exception as e:
188			self.log.warn('Post download failed: %r', self, exc_info=e)
189			raise TumdlrDownloadError(error_message=str(e), download_url=self.url.as_string())
190
191			def filepath(self, context, request_data):
192			"""
193			Args:
194			context(tumdlr.main.Context): CLI request context
195			request_data(Optional[dict]): Additional arguments to send with the download request
196
197			Returns:
198			Path
199			"""
200			# Construct the save basedir
201			basedir = Path(context.config['Tumdlr']['SavePath'])
202
203			# Are we categorizing by user?
204			if context.config['Categorization']['User']:
205			self.log.debug('Categorizing by user: %s', self.container.blog.name)
206			basedir = basedir.joinpath(sanitize_filename(self.container.blog.name))
207
208			# Are we categorizing by post type?
209			if context.config['Categorization']['PostType']:
210			self.log.debug('Categorizing by type: %s', self.CATEGORY)
211			basedir = basedir.joinpath(self.CATEGORY)
212
213			self.log.debug('Basedir constructed: %s', basedir)
214
215			return basedir
216
217
218			class TumblrPhoto(TumblrFile):
219
220			CATEGORY = 'photos'
221
222			def __init__(self, photo, photoset):
223			"""
224			Args:
225			photo(dict): Photo API data
226			photoset(TumblrPhotoSet): Parent container
227			"""
228			super().__init__(photo, photoset)
229
230			self.width = self._data.get('width')
231			self.height = self._data.get('height')
232			self.page_no = self._data.get('page_no', False)
233
234			def filepath(self, context, request_data):
235			"""
236			Get the full file path to save the downloaded file to
237
238			Args:
239			context(tumdlr.main.Context): CLI request context
240			request_data(Optional[dict]): Additional arguments to send with the download request
241
242			Returns:
243			Path
244			"""
245			assert isinstance(self.container, TumblrPhotoSet)
246			filepath = super().filepath(context, request_data)
247
248			request_data['progress_data']['Caption'] = self.container.title
249
250			# Are we categorizing by photosets?
251			if self.page_no and context.config['Categorization']['Photosets']:
252			self.log.debug('Categorizing by photoset: %s', self.container.id)
253			filepath = filepath.joinpath(sanitize_filename(str(self.container.id)))
254
255			# Prepend the page number for photosets
256			if self.page_no:
257			filepath = filepath.joinpath(sanitize_filename('p{pn}_{pt}'.format(pn=self.page_no,
258			pt=self.container.title)))
259			request_data['progress_data']['Photoset Page'] = '{cur} / {tot}'\
260			.format(cur=self.page_no, tot=len(self.container.files))
261			else:
262			filepath = filepath.joinpath(sanitize_filename(self.container.title))
263
264			# Work out the file extension and return
265			return str(filepath) + os.path.splitext(self.url.as_string())[1]
266
267			def __repr__(self):
268			return "<TumblrPhoto url='{url}' width='{w}' height='{h}'>".format(url=self.url, w=self.width, h=self.height)
269
270			def __str__(self):
271			return self.url.as_string()
272
273
274			class TumblrVideo(TumblrFile):
275
276			CATEGORY = 'videos'
277
278			def __init__(self, video, vpost):
279			"""
280			Args:
281			video(dict): Video API data
282			vpost(TumblrVideoPost): Parent container
283			"""
284			super().__init__(video, vpost)
285
286			def filepath(self, context, request_data):
287			"""
288			Get the full file path to save the video to
289
290			Args:
291			context(tumdlr.main.Context): CLI request context
292			request_data(Optional[dict]): Additional arguments to send with the download request
293
294			Returns:
295			Path
296			"""
297			assert isinstance(self.container, TumblrVideoPost)
298			filepath = super().filepath(context, request_data)
299
300			minutes = int(self.container.duration / 60)
301			seconds = self.container.duration % 60
302			duration = '{} minutes {} seconds'.format(minutes, seconds) if minutes else '{} seconds'.format(seconds)
303
304			if self.container.title:
305			request_data['progress_data']['Title'] = self.container.title
306
307			request_data['progress_data']['Description'] = self.container.description
308			request_data['progress_data']['Duration'] = duration
309			request_data['progress_data']['Format'] = self.container.format
310
311			filepath = filepath.joinpath(sanitize_filename(
312			self.container.description or
313			md5(self.url.as_string().encode('utf-8')).hexdigest())
314			)
315
316			# Work out the file extension and return
317			return '{}.{}'.format(str(filepath), self._data.get('ext', 'mp4'))
318
319			def __repr__(self):
320			return "<TumblrVideo id='{i}'>".format(i=self.container.id)
321
322			def __str__(self):
323			return self.url.as_string()
324

FujiMakoto / tumdlr

Push — master ( 43645f...f403d7 )

tumdlr.TumblrPost.__init__() B

Complexity

Size

Duplication

Duplication Side-by-Side

Filter issues like

tumdlr.TumblrPost.init() B