tumdlr.TumblrFile.__init__() - Code Metrics - Inspection of "Downloader refactoring, support for fully pluggabl..." - FujiMakoto/tumdlr - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( 49ff98...8f356f )

by Makoto

created 2016-03-08 13:30 UTC

tumdlr.TumblrFile.init() A

↳ Parent: tumdlr.TumblrFile

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Metric	Value
dl	0
loc	12
rs	9.4285
cc	1

import logging
import os
from abc import ABCMeta, abstractmethod

from pathlib import Path
from yurl import URL

from tumdlr.downloader import sanitize_filename, download

"""
Post Containers
---
Classes that extend the TumblrPost class are used for parsing and storing post metadata using API response data.

They do not provide any methods for downloading posts directly. Instead, these classes should contain sub-container
objects for their associated post types, which are described in more detail below.
"""


class TumblrPost:
    """
    This is the base container class for all Tumblr post types. It contains data that is always available with any
    type of post.

    Additional supported post types may extend this class to provide additional metadata parsing
    """
    def __init__(self, post, blog):
        """
        Args:
            post(dict): API response
            blog(tumdlr.api.TumblrBlog): Parent blog
        """
        self._post = post
        self.blog = blog
        self.log = logging.getLogger('tumdlr.containers.post')

        self.id         = None  # type: int
        self.type       = None  # type: str
        self.url        = None  # type: URL
        self.tags       = set()
        self.post_date  = None  # type: str
        self.note_count = None  # type: int

        self.files = []
        self._parse_post()

    @property
    def is_text(self):
        """
        Returns:
            bool
        """
        return self.type == 'text'

    @property
    def is_photo(self):
        """
        Returns:
            bool
        """
        return self.type in ['photo', 'link']

    @property
    def is_video(self):
        """
        Returns:
            bool
        """
        return self.type == 'video'

    def _parse_post(self):
        self.id         = self._post['id']
        self.type       = self._post['type']
        self.url        = URL(self._post['post_url']) if 'post_url' in self._post else None
        self.tags       = set(self._post.get('tags', []))
        self.note_count = self._post.get('note_count')
        self.post_date  = self._post['date']

    def __repr__(self):
        return "<TumblrPost id='{id}' type='{type}' url='{url}'>"\
            .format(id=self.id, type=self.type, url=self.url)

    def __str__(self):
        return self.url.as_string() if self.url else ''


class TumblrPhotoSet(TumblrPost):
    """
    Container class for Photo and Photo Link post types
    """
    def __init__(self, post, blog):
        """
        Args:
            post(dict): API response
            blog(tumdlr.api.blog.TumblrBlog): Parent blog
        """
        self.log = logging.getLogger('tumdlr.containers.post')
        super().__init__(post, blog)

        self.title = None

    def _parse_post(self):
        """
        Parse all available photos using the best image sizes available
        """
        super()._parse_post()
        self.title  = self._post.get('caption', self._post.get('title'))  # title else summary else id

        photos = self._post.get('photos', [])
        is_photoset = (len(photos) > 1)

        for page_no, photo in enumerate(photos, 1):
            best_size = photo.get('original_size') or max(photo['alt_sizes'], key='width')
            best_size['page_no'] = page_no if is_photoset else False
            self.files.append(TumblrPhoto(best_size, self))

    def __repr__(self):
        return "<TumblrPhotoSet title='{title}' id='{id}' photos='{count}'>"\
            .format(title=self.title.split("\n")[0].strip(), id=self.id, count=len(self.files))

    def __str__(self):
        return self.url.as_string()


class TumblrFile(metaclass=ABCMeta):
    """
    This is the base container class for all downloadable resources associated with Tumblr posts.
    """
    def __init__(self, data, container):
        """
        Args:
            data(dict): API response data
            container(TumblrPost): Parent container
        """
        self.log = logging.getLogger('tumdlr.containers.file')

        self._data      = data
        self.container  = container
        self.type       = 'misc'
        self.url        = URL(self._data['url'])

    def download(self, context, **kwargs):
        """
        Args:
            context(tumdlr.main.Context): CLI request context
            kwargs(dict): Additional arguments to send with the download request

        Returns:
            str: Path to the saved file
        """
        download(self.url.as_string(), str(self.filepath(context, kwargs)), **kwargs)

    @abstractmethod
    def filepath(self, context, request_data):
        """
        Args:
            context(tumdlr.main.Context): CLI request context
            request_data(Optional[dict]): Additional arguments to send with the download request

        Returns:
            Path
        """
        # Construct the save basedir
        basedir = Path(context.config['Tumdlr']['SavePath'])

        # Are we categorizing by user?
        if context.config['Categorization']['User']:
            self.log.debug('Categorizing by user: %s', self.container.blog.name)
            basedir = basedir.joinpath(sanitize_filename(self.container.blog.name))

        # Are we categorizing by post type?
        if context.config['Categorization']['PostType']:
            self.log.debug('Categorizing by type: photos')
            basedir = basedir.joinpath('photos')

        self.log.debug('Basedir constructed: %s', basedir)

        return basedir


class TumblrPhoto(TumblrFile):

    def __init__(self, photo, photoset):
        """
        Args:
            photo(dict): Photo API data
            photoset(TumblrPhotoSet): Parent container
        """
        super().__init__(photo, photoset)
        self.type = 'photos'

        self.width   = self._data.get('width')
        self.height  = self._data.get('height')
        self.page_no = self._data.get('page_no', False)

    def filepath(self, context, request_data):
        """
        Get the full file path to save the downloaded file to

        Args:
            context(tumdlr.main.Context): CLI request context
            request_data(Optional[dict]): Additional arguments to send with the download request

        Returns:
            Path
        """
        assert isinstance(self.container, TumblrPhotoSet)
        filepath = super().filepath(context)

        request_data['progress_data']['Caption'] = self.container.title

        # Are we categorizing by photosets?
        if self.page_no and context.config['Categorization']['Photosets']:
            self.log.debug('Categorizing by photoset: %s', self.container.id)
            filepath = filepath.joinpath(sanitize_filename(str(self.container.id)))

        # Prepend the page number for photosets
        if self.page_no:
            filepath = filepath.joinpath(sanitize_filename('p{pn}_{pt}'.format(pn=self.page_no,
                                                                               pt=self.container.title)))
            request_data['progress_data']['Photoset Page'] = '{cur} / {tot}'\
                .format(cur=self.page_no, tot=len(self.container.files))
        else:
            filepath = filepath.joinpath(sanitize_filename(self.container.title))

        # Work out the file extension and return
        return str(filepath) + os.path.splitext(self.url.as_string())[1]

    def __repr__(self):
        return "<TumblrPhoto url='{url}' width='{w}' height='{h}'>".format(url=self.url, w=self.width, h=self.height)

    def __str__(self):
        return self.url.as_string()


1			import logging
2			import os
3			from abc import ABCMeta, abstractmethod
4
5			from pathlib import Path
6			from yurl import URL
7
8			from tumdlr.downloader import sanitize_filename, download
9
10			"""
11			Post Containers
12			---
13			Classes that extend the TumblrPost class are used for parsing and storing post metadata using API response data.
14
15			They do not provide any methods for downloading posts directly. Instead, these classes should contain sub-container
16			objects for their associated post types, which are described in more detail below.
17			"""
18
19
20			class TumblrPost:
21			"""
22			This is the base container class for all Tumblr post types. It contains data that is always available with any
23			type of post.
24
25			Additional supported post types may extend this class to provide additional metadata parsing
26			"""
27			def __init__(self, post, blog):
28			"""
29			Args:
30			post(dict): API response
31			blog(tumdlr.api.TumblrBlog): Parent blog
32			"""
33			self._post = post
34			self.blog = blog
35			self.log = logging.getLogger('tumdlr.containers.post')
36
37			self.id = None # type: int
38			self.type = None # type: str
39			self.url = None # type: URL
40			self.tags = set()
41			self.post_date = None # type: str
42			self.note_count = None # type: int
43
44			self.files = []
45			self._parse_post()
46
47			@property
48			def is_text(self):
49			"""
50			Returns:
51			bool
52			"""
53			return self.type == 'text'
54
55			@property
56			def is_photo(self):
57			"""
58			Returns:
59			bool
60			"""
61			return self.type in ['photo', 'link']
62
63			@property
64			def is_video(self):
65			"""
66			Returns:
67			bool
68			"""
69			return self.type == 'video'
70
71			def _parse_post(self):
72			self.id = self._post['id']
73			self.type = self._post['type']
74			self.url = URL(self._post['post_url']) if 'post_url' in self._post else None
75			self.tags = set(self._post.get('tags', []))
76			self.note_count = self._post.get('note_count')
77			self.post_date = self._post['date']
78
79			def __repr__(self):
80			return "<TumblrPost id='{id}' type='{type}' url='{url}'>"\
81			.format(id=self.id, type=self.type, url=self.url)
82
83			def __str__(self):
84			return self.url.as_string() if self.url else ''
85
86
87			class TumblrPhotoSet(TumblrPost):
88			"""
89			Container class for Photo and Photo Link post types
90			"""
91			def __init__(self, post, blog):
92			"""
93			Args:
94			post(dict): API response
95			blog(tumdlr.api.blog.TumblrBlog): Parent blog
96			"""
97			self.log = logging.getLogger('tumdlr.containers.post')
98			super().__init__(post, blog)
99
100			self.title = None
101
102			def _parse_post(self):
103			"""
104			Parse all available photos using the best image sizes available
105			"""
106			super()._parse_post()
107			self.title = self._post.get('caption', self._post.get('title')) # title else summary else id
108
109			photos = self._post.get('photos', [])
110			is_photoset = (len(photos) > 1)
111
112			for page_no, photo in enumerate(photos, 1):
113			best_size = photo.get('original_size') or max(photo['alt_sizes'], key='width')
114			best_size['page_no'] = page_no if is_photoset else False
115			self.files.append(TumblrPhoto(best_size, self))
116
117			def __repr__(self):
118			return "<TumblrPhotoSet title='{title}' id='{id}' photos='{count}'>"\
119			.format(title=self.title.split("\n")[0].strip(), id=self.id, count=len(self.files))
120
121			def __str__(self):
122			return self.url.as_string()
123
124
125			class TumblrFile(metaclass=ABCMeta):
126			"""
127			This is the base container class for all downloadable resources associated with Tumblr posts.
128			"""
129			def __init__(self, data, container):
130			"""
131			Args:
132			data(dict): API response data
133			container(TumblrPost): Parent container
134			"""
135			self.log = logging.getLogger('tumdlr.containers.file')
136
137			self._data = data
138			self.container = container
139			self.type = 'misc'
140			self.url = URL(self._data['url'])
141
142			def download(self, context, **kwargs):
143			"""
144			Args:
145			context(tumdlr.main.Context): CLI request context
146			kwargs(dict): Additional arguments to send with the download request
147
148			Returns:
149			str: Path to the saved file
150			"""
151			download(self.url.as_string(), str(self.filepath(context, kwargs)), **kwargs)
152
153			@abstractmethod
154			def filepath(self, context, request_data):
155			"""
156			Args:
157			context(tumdlr.main.Context): CLI request context
158			request_data(Optional[dict]): Additional arguments to send with the download request
159
160			Returns:
161			Path
162			"""
163			# Construct the save basedir
164			basedir = Path(context.config['Tumdlr']['SavePath'])
165
166			# Are we categorizing by user?
167			if context.config['Categorization']['User']:
168			self.log.debug('Categorizing by user: %s', self.container.blog.name)
169			basedir = basedir.joinpath(sanitize_filename(self.container.blog.name))
170
171			# Are we categorizing by post type?
172			if context.config['Categorization']['PostType']:
173			self.log.debug('Categorizing by type: photos')
174			basedir = basedir.joinpath('photos')
175
176			self.log.debug('Basedir constructed: %s', basedir)
177
178			return basedir
179
180
181			class TumblrPhoto(TumblrFile):
182
183			def __init__(self, photo, photoset):
184			"""
185			Args:
186			photo(dict): Photo API data
187			photoset(TumblrPhotoSet): Parent container
188			"""
189			super().__init__(photo, photoset)
190			self.type = 'photos'
191
192			self.width = self._data.get('width')
193			self.height = self._data.get('height')
194			self.page_no = self._data.get('page_no', False)
195
196			def filepath(self, context, request_data):
197			"""
198			Get the full file path to save the downloaded file to
199
200			Args:
201			context(tumdlr.main.Context): CLI request context
202			request_data(Optional[dict]): Additional arguments to send with the download request
203
204			Returns:
205			Path
206			"""
207			assert isinstance(self.container, TumblrPhotoSet)
208			filepath = super().filepath(context)
209
210			request_data['progress_data']['Caption'] = self.container.title
211
212			# Are we categorizing by photosets?
213			if self.page_no and context.config['Categorization']['Photosets']:
214			self.log.debug('Categorizing by photoset: %s', self.container.id)
215			filepath = filepath.joinpath(sanitize_filename(str(self.container.id)))
216
217			# Prepend the page number for photosets
218			if self.page_no:
219			filepath = filepath.joinpath(sanitize_filename('p{pn}_{pt}'.format(pn=self.page_no,
220			pt=self.container.title)))
221			request_data['progress_data']['Photoset Page'] = '{cur} / {tot}'\
222			.format(cur=self.page_no, tot=len(self.container.files))
223			else:
224			filepath = filepath.joinpath(sanitize_filename(self.container.title))
225
226			# Work out the file extension and return
227			return str(filepath) + os.path.splitext(self.url.as_string())[1]
228
229			def __repr__(self):
230			return "<TumblrPhoto url='{url}' width='{w}' height='{h}'>".format(url=self.url, w=self.width, h=self.height)
231
232			def __str__(self):
233			return self.url.as_string()
234

FujiMakoto / tumdlr

Push — master ( 49ff98...8f356f )

tumdlr.TumblrFile.__init__() A

Complexity

Size

Duplication

Duplication Side-by-Side

Filter issues like

tumdlr.TumblrFile.init() A