Completed
Push — master ( c3d761...46e76e )
by De
01:17
created

GenericComic.get_categories()   A

Complexity

Conditions 3

Size

Total Lines 10

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 3
c 1
b 0
f 0
dl 0
loc 10
rs 9.4285
1
#! /usr/bin/python3
2
# vim: set expandtab tabstop=4 shiftwidth=4 :
3
"""Module to define logic common to all comics."""
4
5
import json
6
import time
7
import os
8
from datetime import date
9
from urlfunctions import get_filename_from_url, get_file_at_url
10
import inspect
11
import logging
12
13
14
def get_date_for_comic(comic):
15
    """Return date object for a given comic."""
16
    return date(comic['year'], comic['month'], comic['day'])
17
18
19
def get_info_before_comic(comic):
20
    """Generates the info to be put before the images."""
21
    author = comic.get('author')
22
    if author:
23
        yield 'by ' + author
24
25
26
def get_info_after_comic(comic):
27
    """Generates the info to be put after the images."""
28
    for name in ['alt', 'title', 'title2', 'texts', 'name', 'description']:
29
        info = comic.get(name)
30
        if info:
31
            yield info
32
33
34
class GenericComic(object):
35
    """Generic class to handle the logic common to all comics
36
37
    Attributes :
38
        name        Name of the comic (for logging, CLI and default output dir)
39
        long_name   Long name of the comic (to be added in the comic info)
40
        url         Base url for the comic (without trailing slash)."""
41
    name = None
42
    long_name = None
43
    url = None
44
    _categories = ('ALL', )
45
46
    @classmethod
47
    def log(cls, string):
48
        """Dirty logging function."""
49
        # TODO: https://docs.python.org/2/library/logging.html#logrecord-attributes
50
        # we do not need to retrieve the function name manually
51
        logging.debug(inspect.stack()[1][3] + " " + cls.name + " " + string)
52
53
    @classmethod
54
    def get_output_dir(cls):
55
        """Returns the name of the output directory (for comics and JSON file).
56
        To be overridden if needed."""
57
        return cls.name
58
59
    @classmethod
60
    def create_output_dir(cls):
61
        """Create output directory for the comic on the file system."""
62
        cls.log("start")
63
        os.makedirs(cls.get_output_dir(), exist_ok=True)
64
        cls.log("done")
65
66
    @classmethod
67
    def get_json_file_path(cls):
68
        """Get the full path to the JSON file."""
69
        return os.path.join(cls.get_output_dir(), cls.name + '.json')
70
71
    @classmethod
72
    def load_db(cls):
73
        """Load the JSON file to return a list of comics."""
74
        cls.log("start")
75
        try:
76
            with open(cls.get_json_file_path()) as file:
77
                return json.load(file)
78
        except IOError:
79
            return []
80
81
    @classmethod
82
    def save_db(cls, data):
83
        """Save the list of comics in the JSON file."""
84
        cls.log("start")
85
        with open(cls.get_json_file_path(), 'w+') as file:
86
            json.dump(data, file, indent=4, sort_keys=True)
87
        cls.log("done")
88
89
    @classmethod
90
    def get_file_in_output_dir(cls, url, prefix=None):
91
        """Download file from URL and save it in output folder."""
92
        cls.log("start (url:%s)" % url)
93
        filename = os.path.join(
94
            cls.get_output_dir(),
95
            ('' if prefix is None else prefix) +
96
            get_filename_from_url(url))
97
        return get_file_at_url(url, filename)
98
99
    @classmethod
100
    def check_everything_is_ok(cls):
101
        """Perform tests on the database to check that everything is ok."""
102
        cls.log("start")
103
        print(cls.name, ': about to check')
104
        comics = cls.load_db()
105
        imgs_paths = {}
106
        imgs_urls = {}
107
        prev_date, prev_num = None, None
108
        today = date.today()
109
        for i, comic in enumerate(comics):
110
            cls.print_comic(comic)
111
            url = comic.get('url')
112
            assert isinstance(url, str), "Url %s not a string" % url
113
            assert comic.get('comic') == cls.long_name
114
            assert all(isinstance(comic.get(k), int)
115
                       for k in ['day', 'month', 'year']), \
116
                "Invalid date data (%s)" % url
117
            curr_date = get_date_for_comic(comic)
118
            assert curr_date <= today
119
            curr_num = comic.get('num', 0)
120
            assert isinstance(curr_num, int)
121
            assert prev_date is None or prev_date <= curr_date or \
122
                prev_num < curr_num, \
123
                "Comics are not in order (%s)" % url
124
            prev_date, prev_num = curr_date, curr_num
125
            img = comic.get('img')
126
            local_img = comic.get('local_img')
127
            assert isinstance(img, list)
128
            assert isinstance(local_img, list)
129
            assert len(local_img) == len(img)
130
            for path in local_img:
131
                if path is not None:
132
                    assert os.path.isfile(path)
133
                    imgs_paths.setdefault(path, set()).add(i)
134
            for img_url in img:
135
                imgs_urls.setdefault(img_url, set()).add(i)
136
        print()
137
        if False:  # To check if imgs are not overriding themselves
138
            for path, nums in imgs_paths.items():
139
                if len(nums) > 1:
140
                    print("Image used multiple times", path, nums)
141
            for img_url, nums in imgs_urls.items():
142
                if len(nums) > 1:
143
                    print("Url used multiple times", img_url, nums)
144
        if False:  # To check that all files in folder are useful
145
            json = cls.get_json_file_path()
146
            output_dir = cls.get_output_dir()
147
            for file_ in os.listdir(output_dir):
148
                file_path = os.path.join(output_dir, file_)
149
                if file_path not in imgs_paths and file_path != json:
150
                    print("Unused image", file_path)
151
        cls.log("done")
152
153
    @classmethod
154
    def get_next_comic(cls, _):
155
        """Generator to get the next comic.
156
157
        First argument is the last properly downloaded comic which gives
158
        a starting point to download more.
159
160
        This is the method called by update(). It should yield comics which
161
        are basically dictionnaries with the following property :
162
            - 'url' is linked to a string
163
            - 'img' is linked to a list of url (that will get downloaded)
164
            - 'day'/'month'/'year' are self explicit. They are linked to
165
                integers corresponding to the comic dates. There should be
166
                all of them or none of them
167
            - more fields can be provided."""
168
        raise NotImplementedError
169
170
    @classmethod
171
    def print_text(cls, text):
172
        """Print text by returning to the beginning of the line every time."""
173
        print(cls.name, ':', text, ' ' * 10, '\r', end='')
174
175
    @classmethod
176
    def print_comic(cls, comic):
177
        """Print information about a comic."""
178
        cls.print_text(comic['url'])
179
180
    @classmethod
181
    def update(cls):
182
        """Update the database : get the latest comics and save in the DB.
183
184
        This is a wrapper around get_next_comic() providing the following
185
        generic features :
186
            - logging
187
            - database handling (open and save)
188
            - exception handling (properly retrieved data are always saved)
189
            - file download
190
            - data management (adds current date if no date is provided)."""
191
        cls.log("start")
192
        print(cls.name, ': about to update')
193
        cls.create_output_dir()
194
        comics = cls.load_db()
195
        new_comics = []
196
        start = time.time()
197
        try:
198
            last_comic = comics[-1] if comics else None
199
            cls.log("last comic is %s" % ('None' if last_comic is None else last_comic['url']))
200
            for comic in cls.get_next_comic(last_comic):
201
                cls.log("got %s" % str(comic))
202
                if 'day' in comic:
203
                    assert all(isinstance(comic.get(k), int) for k in ['day', 'month', 'year'])
204
                else:
205
                    assert all(k not in comic for k in ['day', 'month', 'year'])
206
                    day = date.today()
207
                    comic['day'], comic['month'], comic['year'] = \
208
                        day.day, day.month, day.year
209
                prefix = comic.get('prefix', '')
210
                comic['local_img'] = [cls.get_file_in_output_dir(i, prefix)
211
                                      for i in comic['img']]
212
                comic['comic'] = cls.long_name
213
                comic['new'] = None  # "'new' in comic" to check if new
214
                new_comics.append(comic)
215
                cls.print_comic(comic)
216
        finally:
217
            end = time.time()
218
            if new_comics:
219
                print()
220
                cls.save_db(comics + new_comics)
221
                print(cls.name, ": added", len(new_comics),
222
                      "comics in", end - start, "seconds")
223
            else:
224
                print(cls.name, ": nothing new")
225
        cls.log("done")
226
227
    @classmethod
228
    def try_to_get_missing_resources(cls):
229
        """Download images that might not have been downloaded properly in
230
        the first place."""
231
        cls.log("start")
232
        print(cls.name, ': about to try to get missing resources')
233
        cls.create_output_dir()
234
        comics = cls.load_db()
235
        change = False
236
        for comic in comics:
237
            local = comic['local_img']
238
            prefix = comic.get('prefix', '')
239
            for i, (path, url) in enumerate(zip(local, comic['img'])):
240
                if path is None:
241
                    new_path = cls.get_file_in_output_dir(url, prefix)
242
                    if new_path is None:
243
                        print(cls.name, ': failed to get', url)
244
                    else:
245
                        print(cls.name, ': got', url, 'at', new_path)
246
                        local[i] = new_path
247
                        change = True
248
                        comic['new'] = None
249
        if change:
250
            cls.save_db(comics)
251
            print(cls.name, ": some missing resources have been downloaded")
252
        cls.log("done")
253
254
    @classmethod
255
    def reset_new(cls):
256
        """Remove the 'new' flag on comics in the DB."""
257
        cls.log("start")
258
        cls.create_output_dir()
259
        cls.save_db([{key: val for key, val in c.items() if key != 'new'} for c in cls.load_db()])
260
        cls.log("done")
261
262
    @classmethod
263
    def info(cls):
264
        """Print information about the comics."""
265
        cls.log("start")
266
        print("%s (%s) : " % (cls.long_name, cls.url))
267
        cls.create_output_dir()
268
        comics = cls.load_db()
269
        dates = [get_date_for_comic(c) for c in comics]
270
        print("%d comics (%d new)" % (len(comics), sum(1 for c in comics if 'new' in c)))
271
        print("%d images" % sum(len(c['img']) for c in comics))
272
        if dates:
273
            date_min, date_max = min(dates), max(dates)
274
            print("from %s to %s (%d days)" % (date_min, date_max, (date_max - date_min).days))
275
        print()
276
        cls.log("done")
277
278
    @classmethod
279
    def readme(cls):
280
        """Return information to generate README."""
281
        return ' * [%s](%s)\n' % (cls.long_name, cls.url)
282
283
    @classmethod
284
    def gitignore(cls):
285
        """Return information to generate gitignore."""
286
        return '%s\n' % (cls.name)
287
288
    @classmethod
289
    def get_categories(cls):
290
        """Return categories to be able to group comics.
291
292
        Categories are such that all classes have their ancestrors'
293
        categories and their own (provided as an iterable in the
294
        `_categories` class member)."""
295
        return sorted(set(cat
296
                          for klass in cls.__mro__
297
                          for cat in getattr(klass, '_categories', [])))
298