Completed
Push — master ( 49ff98...8f356f )
by Makoto
01:10
created

tumdlr.TumblrFile.filepath()   B

Complexity

Conditions 3

Size

Total Lines 26

Duplication

Lines 0
Ratio 0 %
Metric Value
dl 0
loc 26
rs 8.8571
cc 3
1
import logging
2
import os
3
from abc import ABCMeta, abstractmethod
4
5
from pathlib import Path
6
from yurl import URL
7
8
from tumdlr.downloader import sanitize_filename, download
9
10
"""
11
Post Containers
12
---
13
Classes that extend the TumblrPost class are used for parsing and storing post metadata using API response data.
14
15
They do not provide any methods for downloading posts directly. Instead, these classes should contain sub-container
16
objects for their associated post types, which are described in more detail below.
17
"""
18
19
20
class TumblrPost:
21
    """
22
    This is the base container class for all Tumblr post types. It contains data that is always available with any
23
    type of post.
24
25
    Additional supported post types may extend this class to provide additional metadata parsing
26
    """
27
    def __init__(self, post, blog):
28
        """
29
        Args:
30
            post(dict): API response
31
            blog(tumdlr.api.TumblrBlog): Parent blog
32
        """
33
        self._post = post
34
        self.blog = blog
35
        self.log = logging.getLogger('tumdlr.containers.post')
36
37
        self.id         = None  # type: int
38
        self.type       = None  # type: str
39
        self.url        = None  # type: URL
40
        self.tags       = set()
41
        self.post_date  = None  # type: str
42
        self.note_count = None  # type: int
43
44
        self.files = []
45
        self._parse_post()
46
47
    @property
48
    def is_text(self):
49
        """
50
        Returns:
51
            bool
52
        """
53
        return self.type == 'text'
54
55
    @property
56
    def is_photo(self):
57
        """
58
        Returns:
59
            bool
60
        """
61
        return self.type in ['photo', 'link']
62
63
    @property
64
    def is_video(self):
65
        """
66
        Returns:
67
            bool
68
        """
69
        return self.type == 'video'
70
71
    def _parse_post(self):
72
        self.id         = self._post['id']
73
        self.type       = self._post['type']
74
        self.url        = URL(self._post['post_url']) if 'post_url' in self._post else None
75
        self.tags       = set(self._post.get('tags', []))
76
        self.note_count = self._post.get('note_count')
77
        self.post_date  = self._post['date']
78
79
    def __repr__(self):
80
        return "<TumblrPost id='{id}' type='{type}' url='{url}'>"\
81
            .format(id=self.id, type=self.type, url=self.url)
82
83
    def __str__(self):
84
        return self.url.as_string() if self.url else ''
85
86
87
class TumblrPhotoSet(TumblrPost):
88
    """
89
    Container class for Photo and Photo Link post types
90
    """
91
    def __init__(self, post, blog):
92
        """
93
        Args:
94
            post(dict): API response
95
            blog(tumdlr.api.blog.TumblrBlog): Parent blog
96
        """
97
        self.log = logging.getLogger('tumdlr.containers.post')
98
        super().__init__(post, blog)
99
100
        self.title = None
101
102
    def _parse_post(self):
103
        """
104
        Parse all available photos using the best image sizes available
105
        """
106
        super()._parse_post()
107
        self.title  = self._post.get('caption', self._post.get('title'))  # title else summary else id
108
109
        photos = self._post.get('photos', [])
110
        is_photoset = (len(photos) > 1)
111
112
        for page_no, photo in enumerate(photos, 1):
113
            best_size = photo.get('original_size') or max(photo['alt_sizes'], key='width')
114
            best_size['page_no'] = page_no if is_photoset else False
115
            self.files.append(TumblrPhoto(best_size, self))
116
117
    def __repr__(self):
118
        return "<TumblrPhotoSet title='{title}' id='{id}' photos='{count}'>"\
119
            .format(title=self.title.split("\n")[0].strip(), id=self.id, count=len(self.files))
120
121
    def __str__(self):
122
        return self.url.as_string()
123
124
125
class TumblrFile(metaclass=ABCMeta):
126
    """
127
    This is the base container class for all downloadable resources associated with Tumblr posts.
128
    """
129
    def __init__(self, data, container):
130
        """
131
        Args:
132
            data(dict): API response data
133
            container(TumblrPost): Parent container
134
        """
135
        self.log = logging.getLogger('tumdlr.containers.file')
136
137
        self._data      = data
138
        self.container  = container
139
        self.type       = 'misc'
140
        self.url        = URL(self._data['url'])
141
142
    def download(self, context, **kwargs):
143
        """
144
        Args:
145
            context(tumdlr.main.Context): CLI request context
146
            kwargs(dict): Additional arguments to send with the download request
147
148
        Returns:
149
            str: Path to the saved file
150
        """
151
        download(self.url.as_string(), str(self.filepath(context, kwargs)), **kwargs)
152
153
    @abstractmethod
154
    def filepath(self, context, request_data):
155
        """
156
        Args:
157
            context(tumdlr.main.Context): CLI request context
158
            request_data(Optional[dict]): Additional arguments to send with the download request
159
160
        Returns:
161
            Path
162
        """
163
        # Construct the save basedir
164
        basedir = Path(context.config['Tumdlr']['SavePath'])
165
166
        # Are we categorizing by user?
167
        if context.config['Categorization']['User']:
168
            self.log.debug('Categorizing by user: %s', self.container.blog.name)
169
            basedir = basedir.joinpath(sanitize_filename(self.container.blog.name))
170
171
        # Are we categorizing by post type?
172
        if context.config['Categorization']['PostType']:
173
            self.log.debug('Categorizing by type: photos')
174
            basedir = basedir.joinpath('photos')
175
176
        self.log.debug('Basedir constructed: %s', basedir)
177
178
        return basedir
179
180
181
class TumblrPhoto(TumblrFile):
182
183
    def __init__(self, photo, photoset):
184
        """
185
        Args:
186
            photo(dict): Photo API data
187
            photoset(TumblrPhotoSet): Parent container
188
        """
189
        super().__init__(photo, photoset)
190
        self.type = 'photos'
191
192
        self.width   = self._data.get('width')
193
        self.height  = self._data.get('height')
194
        self.page_no = self._data.get('page_no', False)
195
196
    def filepath(self, context, request_data):
197
        """
198
        Get the full file path to save the downloaded file to
199
200
        Args:
201
            context(tumdlr.main.Context): CLI request context
202
            request_data(Optional[dict]): Additional arguments to send with the download request
203
204
        Returns:
205
            Path
206
        """
207
        assert isinstance(self.container, TumblrPhotoSet)
208
        filepath = super().filepath(context)
209
210
        request_data['progress_data']['Caption'] = self.container.title
211
212
        # Are we categorizing by photosets?
213
        if self.page_no and context.config['Categorization']['Photosets']:
214
            self.log.debug('Categorizing by photoset: %s', self.container.id)
215
            filepath = filepath.joinpath(sanitize_filename(str(self.container.id)))
216
217
        # Prepend the page number for photosets
218
        if self.page_no:
219
            filepath = filepath.joinpath(sanitize_filename('p{pn}_{pt}'.format(pn=self.page_no,
220
                                                                               pt=self.container.title)))
221
            request_data['progress_data']['Photoset Page'] = '{cur} / {tot}'\
222
                .format(cur=self.page_no, tot=len(self.container.files))
223
        else:
224
            filepath = filepath.joinpath(sanitize_filename(self.container.title))
225
226
        # Work out the file extension and return
227
        return str(filepath) + os.path.splitext(self.url.as_string())[1]
228
229
    def __repr__(self):
230
        return "<TumblrPhoto url='{url}' width='{w}' height='{h}'>".format(url=self.url, w=self.width, h=self.height)
231
232
    def __str__(self):
233
        return self.url.as_string()
234