Completed
Push — master ( e2c120...c7b81c )
by De
01:18
created

GenericComic.get_last_comic()   A

Complexity

Conditions 3

Size

Total Lines 4

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 3
dl 0
loc 4
rs 10
c 0
b 0
f 0
1
#! /usr/bin/python3
2
# vim: set expandtab tabstop=4 shiftwidth=4 :
3
"""Module to define logic common to all comics."""
4
5
import json
6
import time
7
import os
8
from datetime import date
9
from urlfunctions import get_filename_from_url, get_file_at_url
10
import inspect
11
import logging
12
13
14
def get_date_for_comic(comic):
15
    """Return date object for a given comic."""
16
    return date(comic['year'], comic['month'], comic['day'])
17
18
19
def get_info_before_comic(comic):
20
    """Generates the info to be put before the images."""
21
    author = comic.get('author')
22
    if author:
23
        yield 'by ' + author
24
25
26
def get_info_after_comic(comic):
27
    """Generates the info to be put after the images."""
28
    for name in ['alt', 'title', 'title2', 'texts', 'name', 'description']:
29
        info = comic.get(name)
30
        if info:
31
            yield info
32
33
34
class GenericComic(object):
35
    """Generic class to handle the logic common to all comics
36
37
    Attributes :
38
        name        Name of the comic (for logging, CLI and default output dir)
39
        long_name   Long name of the comic (to be added in the comic info)
40
        url         Base url for the comic (without trailing slash)."""
41
    name = None
42
    long_name = None
43
    url = None
44
    _categories = ('ALL', )
45
46
    @classmethod
47
    def log(cls, string):
48
        """Dirty logging function."""
49
        # TODO: https://docs.python.org/2/library/logging.html#logrecord-attributes
50
        # we do not need to retrieve the function name manually
51
        logging.debug(inspect.stack()[1][3] + " " + cls.name + " " + string)
52
53
    @classmethod
54
    def _get_output_dir(cls):
55
        """Returns the name of the output directory (for comics and JSON file).
56
        To be overridden if needed."""
57
        return cls.name
58
59
    @classmethod
60
    def _create_output_dir(cls):
61
        """Create output directory for the comic on the file system."""
62
        cls.log("start")
63
        os.makedirs(cls._get_output_dir(), exist_ok=True)
64
        cls.log("done")
65
66
    @classmethod
67
    def _get_json_file_path(cls):
68
        """Get the full path to the JSON file."""
69
        return os.path.join(cls._get_output_dir(), cls.name + '.json')
70
71
    @classmethod
72
    def _load_db(cls):
73
        """Load the JSON file to return the list of comics."""
74
        cls.log("start")
75
        try:
76
            with open(cls._get_json_file_path()) as file:
77
                return json.load(file)
78
        except IOError:
79
            return []
80
81
    @classmethod
82
    def get_comics(cls):
83
        """Return the list of comics."""
84
        return [c for c in cls._load_db() if 'deleted' not in c]
85
86
    @classmethod
87
    def get_last_comic(cls, comics):
88
        """Return the last (non-deleted) comic."""
89
        return next((c for c in reversed(comics) if 'deleted' not in c), None)
90
91
    @classmethod
92
    def _save_db(cls, data):
93
        """Save the list of comics in the JSON file."""
94
        cls.log("start")
95
        with open(cls._get_json_file_path(), 'w+') as file:
96
            json.dump(data, file, indent=4, sort_keys=True)
97
        cls.log("done")
98
99
    @classmethod
100
    def get_file_in_output_dir(cls, url, prefix=None):
101
        """Download file from URL and save it in output folder."""
102
        cls.log("start (url:%s)" % url)
103
        filename = os.path.join(
104
            cls._get_output_dir(),
105
            ('' if prefix is None else prefix) +
106
            get_filename_from_url(url))
107
        return get_file_at_url(url, filename)
108
109
    @classmethod
110
    def check_everything_is_ok(cls):
111
        """Perform tests on the database to check that everything is ok."""
112
        cls.log("start")
113
        print(cls.name, ': about to check')
114
        comics = cls.get_comics()  # cls._load_db()
115
        imgs_paths = {}
116
        imgs_urls = {}
117
        prev_date, prev_num = None, None
118
        today = date.today()
119
        for i, comic in enumerate(comics):
120
            cls.print_comic(comic)
121
            url = comic.get('url')
122
            assert isinstance(url, str), "Url %s not a string" % url
123
            assert comic.get('comic') == cls.long_name
124
            assert all(isinstance(comic.get(k), int)
125
                       for k in ['day', 'month', 'year']), \
126
                "Invalid date data (%s)" % url
127
            curr_date = get_date_for_comic(comic)
128
            assert curr_date <= today
129
            curr_num = comic.get('num', 0)
130
            assert isinstance(curr_num, int)
131
            assert prev_date is None or prev_date <= curr_date or \
132
                prev_num < curr_num, \
133
                "Comics are not in order (%s)" % url
134
            prev_date, prev_num = curr_date, curr_num
135
            img = comic.get('img')
136
            local_img = comic.get('local_img')
137
            assert isinstance(img, list)
138
            assert isinstance(local_img, list)
139
            assert len(local_img) == len(img)
140
            for path in local_img:
141
                if path is not None:
142
                    assert os.path.isfile(path)
143
                    imgs_paths.setdefault(path, set()).add(i)
144
            for img_url in img:
145
                imgs_urls.setdefault(img_url, set()).add(i)
146
        print()
147
        if False:  # To check if imgs are not overriding themselves
148
            for path, nums in imgs_paths.items():
149
                if len(nums) > 1:
150
                    print("Image used multiple times", path, nums)
151
            for img_url, nums in imgs_urls.items():
152
                if len(nums) > 1:
153
                    print("Url used multiple times", img_url, nums)
154
        if False:  # To check that all files in folder are useful
155
            json = cls._get_json_file_path()
156
            output_dir = cls._get_output_dir()
157
            for file_ in os.listdir(output_dir):
158
                file_path = os.path.join(output_dir, file_)
159
                if file_path not in imgs_paths and file_path != json:
160
                    print("Unused image", file_path)
161
        cls.log("done")
162
163
    @classmethod
164
    def get_next_comic(cls, _):
165
        """Generator to get the next comic.
166
167
        First argument is the last properly downloaded comic which gives
168
        a starting point to download more.
169
170
        This is the method called by update(). It should yield comics which
171
        are basically dictionnaries with the following property :
172
            - 'url' is linked to a string
173
            - 'img' is linked to a list of url (that will get downloaded)
174
            - 'day'/'month'/'year' are self explicit. They are linked to
175
                integers corresponding to the comic dates. There should be
176
                all of them or none of them
177
            - more fields can be provided."""
178
        raise NotImplementedError
179
180
    @classmethod
181
    def print_text(cls, text):
182
        """Print text by returning to the beginning of the line every time."""
183
        print(cls.name, ':', text, ' ' * 10, '\r', end='')
184
185
    @classmethod
186
    def print_comic(cls, comic):
187
        """Print information about a comic."""
188
        cls.print_text(comic['url'])
189
190
    @classmethod
191
    def update(cls):
192
        """Update the database : get the latest comics and save in the DB.
193
194
        This is a wrapper around get_next_comic() providing the following
195
        generic features :
196
            - logging
197
            - database handling (open and save)
198
            - exception handling (properly retrieved data are always saved)
199
            - file download
200
            - data management (adds current date if no date is provided)."""
201
        cls.log("start")
202
        print(cls.name, ': about to update')
203
        cls._create_output_dir()
204
        comics = cls._load_db()
205
        new_comics = []
206
        start = time.time()
207
        try:
208
            last_comic = cls.get_last_comic(comics)
209
            cls.log("last comic is %s" % ('None' if last_comic is None else last_comic['url']))
210
            for comic in cls.get_next_comic(last_comic):
211
                cls.log("got %s" % str(comic))
212
                if 'day' in comic:
213
                    assert all(isinstance(comic.get(k), int) for k in ['day', 'month', 'year'])
214
                else:
215
                    assert all(k not in comic for k in ['day', 'month', 'year'])
216
                    day = date.today()
217
                    comic['day'], comic['month'], comic['year'] = \
218
                        day.day, day.month, day.year
219
                prefix = comic.get('prefix', '')
220
                comic['local_img'] = [cls.get_file_in_output_dir(i, prefix)
221
                                      for i in comic['img']]
222
                comic['comic'] = cls.long_name
223
                comic['new'] = None  # "'new' in comic" to check if new
224
                new_comics.append(comic)
225
                cls.print_comic(comic)
226
        finally:
227
            end = time.time()
228
            if new_comics:
229
                print()
230
                cls._save_db(comics + new_comics)
231
                print(cls.name, ": added", len(new_comics),
232
                      "comics in", end - start, "seconds")
233
            else:
234
                print(cls.name, ": nothing new")
235
        cls.log("done")
236
237
    @classmethod
238
    def try_to_get_missing_resources(cls):
239
        """Download images that might not have been downloaded properly in
240
        the first place."""
241
        cls.log("start")
242
        print(cls.name, ': about to try to get missing resources')
243
        cls._create_output_dir()
244
        comics = cls._load_db()
245
        change = False
246
        for comic in comics:
247
            local = comic['local_img']
248
            prefix = comic.get('prefix', '')
249
            for i, (path, url) in enumerate(zip(local, comic['img'])):
250
                if path is None:
251
                    new_path = cls.get_file_in_output_dir(url, prefix)
252
                    if new_path is None:
253
                        print(cls.name, ': failed to get', url)
254
                    else:
255
                        print(cls.name, ': got', url, 'at', new_path)
256
                        local[i] = new_path
257
                        change = True
258
                        comic['new'] = None
259
        if change:
260
            cls._save_db(comics)
261
            print(cls.name, ": some missing resources have been downloaded")
262
        cls.log("done")
263
264
    @classmethod
265
    def reset_new(cls):
266
        """Remove the 'new' flag on comics in the DB."""
267
        cls.log("start")
268
        cls._create_output_dir()
269
        cls._save_db([{key: val for key, val in c.items() if key != 'new'} for c in cls._load_db()])
270
        cls.log("done")
271
272
    @classmethod
273
    def delete_last(cls):
274
        """Delete last (non-deleted) comic."""
275
        cls.log("start")
276
        comics = cls._load_db()
277
        last_comic = cls.get_last_comic(comics)
278
        if last_comic is None:
279
            cls.log("no comic to delete")
280
        else:
281
            cls.log("about to delete %s" % last_comic['url'])
282
            last_comic['deleted'] = None  # "'deleted' in comic" to check if deleted
283
            cls._save_db(comics)
284
        cls.log("done")
285
286
    @classmethod
287
    def info(cls):
288
        """Print information about the comics."""
289
        cls.log("start")
290
        print("%s (%s) : " % (cls.long_name, cls.url))
291
        cls._create_output_dir()
292
        comics = cls.get_comics()  # cls._load_db()
293
        dates = [get_date_for_comic(c) for c in comics]
294
        print("%d comics (%d new)" % (len(comics), sum(1 for c in comics if 'new' in c)))
295
        print("%d images" % sum(len(c['img']) for c in comics))
296
        if dates:
297
            date_min, date_max = min(dates), max(dates)
298
            print("from %s to %s (%d days)" % (date_min, date_max, (date_max - date_min).days))
299
        print()
300
        cls.log("done")
301
302
    @classmethod
303
    def readme(cls):
304
        """Return information to generate README."""
305
        return ' * [%s](%s)\n' % (cls.long_name, cls.url)
306
307
    @classmethod
308
    def gitignore(cls):
309
        """Return information to generate gitignore."""
310
        return '%s\n' % (cls.name)
311
312
    @classmethod
313
    def get_categories(cls):
314
        """Return categories to be able to group comics.
315
316
        Categories are such that all classes have their ancestrors'
317
        categories and their own (provided as an iterable in the
318
        `_categories` class member)."""
319
        return sorted(set(cat
320
                          for klass in cls.__mro__
321
                          for cat in getattr(klass, '_categories', [])))
322