GenericComic   F
last analyzed

Complexity

Total Complexity 84

Size/Duplication

Total Lines 310
Duplicated Lines 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
c 1
b 0
f 0
dl 0
loc 310
rs 1.5789
wmc 84

23 Methods

Rating   Name   Duplication   Size   Complexity  
A log() 0 6 1
F check_everything_is_ok() 0 53 25
A get_last_comic() 0 4 3
A get_file_in_output_dir() 0 9 2
A _get_json_file_path() 0 4 1
A _create_output_dir() 0 6 1
A _load_db() 0 9 3
A print_comic() 0 4 1
A _get_output_dir() 0 5 1
A get_comics() 0 4 3
A _save_db() 0 7 2
A get_next_comic() 0 16 1
A print_text() 0 4 1
F update() 0 48 12
A gitignore() 0 4 1
A reset_new() 0 7 4
A delete_last() 0 13 2
A get_categories() 0 10 3
B try_to_get_missing_resources() 0 27 6
A print_name() 0 6 1
B info() 0 16 6
A readme() 0 4 1
A delete_all() 0 10 3

How to fix   Complexity   

Complex Class

Complex classes like GenericComic often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
#! /usr/bin/python3
2
# vim: set expandtab tabstop=4 shiftwidth=4 :
3
"""Module to define logic common to all comics."""
4
5
import json
6
import time
7
import os
8
from datetime import date
9
from urlfunctions import get_filename_from_url, get_file_at_url
10
import inspect
11
import logging
12
13
14
def get_date_for_comic(comic):
15
    """Return date object for a given comic."""
16
    return date(comic['year'], comic['month'], comic['day'])
17
18
19
def get_info_before_comic(comic):
20
    """Generates the info to be put before the images."""
21
    author = comic.get('author')
22
    if author:
23
        yield 'by ' + author
24
25
26
def get_info_after_comic(comic):
27
    """Generates the info to be put after the images."""
28
    for name in ['alt', 'title', 'title2', 'texts', 'name', 'description']:
29
        info = comic.get(name)
30
        if info:
31
            yield info
32
33
34
class GenericComic(object):
35
    """Generic class to handle the logic common to all comics
36
37
    Attributes :
38
        name        Name of the comic (for logging, CLI and default output dir)
39
        long_name   Long name of the comic (to be added in the comic info)
40
        url         Base url for the comic (without trailing slash)."""
41
    name = None
42
    long_name = None
43
    url = None
44
    _categories = ('ALL', )
45
46
    @classmethod
47
    def log(cls, string):
48
        """Dirty logging function."""
49
        # TODO: https://docs.python.org/2/library/logging.html#logrecord-attributes
50
        # we do not need to retrieve the function name manually
51
        logging.debug(inspect.stack()[1][3] + " " + cls.name + " " + string)
52
53
    @classmethod
54
    def _get_output_dir(cls):
55
        """Returns the name of the output directory (for comics and JSON file).
56
        To be overridden if needed."""
57
        return cls.name
58
59
    @classmethod
60
    def _create_output_dir(cls):
61
        """Create output directory for the comic on the file system."""
62
        cls.log("start")
63
        os.makedirs(cls._get_output_dir(), exist_ok=True)
64
        cls.log("done")
65
66
    @classmethod
67
    def _get_json_file_path(cls):
68
        """Get the full path to the JSON file."""
69
        return os.path.join(cls._get_output_dir(), cls.name + '.json')
70
71
    @classmethod
72
    def _load_db(cls):
73
        """Load the JSON file to return the list of comics."""
74
        cls.log("start")
75
        try:
76
            with open(cls._get_json_file_path()) as file:
77
                return json.load(file)
78
        except IOError:
79
            return []
80
81
    @classmethod
82
    def get_comics(cls):
83
        """Return the list of comics."""
84
        return [c for c in cls._load_db() if 'deleted' not in c]
85
86
    @classmethod
87
    def get_last_comic(cls, comics):
88
        """Return the last (non-deleted) comic."""
89
        return next((c for c in reversed(comics) if 'deleted' not in c), None)
90
91
    @classmethod
92
    def _save_db(cls, data):
93
        """Save the list of comics in the JSON file."""
94
        cls.log("start")
95
        with open(cls._get_json_file_path(), 'w+') as file:
96
            json.dump(data, file, indent=4, sort_keys=True)
97
        cls.log("done")
98
99
    @classmethod
100
    def get_file_in_output_dir(cls, url, prefix=None, referer=None):
101
        """Download file from URL and save it in output folder."""
102
        cls.log("start (url:%s)" % url)
103
        filename = os.path.join(
104
            cls._get_output_dir(),
105
            ('' if prefix is None else prefix) +
106
            get_filename_from_url(url))
107
        return get_file_at_url(url, filename, referer)
108
109
    @classmethod
110
    def check_everything_is_ok(cls):
111
        """Perform tests on the database to check that everything is ok."""
112
        cls.log("start")
113
        print(cls.name, ': about to check')
114
        comics = cls.get_comics()  # cls._load_db()
115
        imgs_paths = {}
116
        imgs_urls = {}
117
        prev_date, prev_num = None, None
118
        today = date.today()
119
        for i, comic in enumerate(comics):
120
            cls.print_comic(comic)
121
            url = comic.get('url')
122
            assert isinstance(url, str), "Url %s not a string" % url
123
            assert comic.get('comic') == cls.long_name
124
            assert all(isinstance(comic.get(k), int)
125
                       for k in ['day', 'month', 'year']), \
126
                "Invalid date data (%s)" % url
127
            curr_date = get_date_for_comic(comic)
128
            assert curr_date <= today
129
            curr_num = comic.get('num', 0)
130
            assert isinstance(curr_num, int)
131
            assert prev_date is None or prev_date <= curr_date or \
132
                prev_num < curr_num, \
133
                "Comics are not in order (%s)" % url
134
            prev_date, prev_num = curr_date, curr_num
135
            img = comic.get('img')
136
            local_img = comic.get('local_img')
137
            assert isinstance(img, list)
138
            assert isinstance(local_img, list)
139
            assert len(local_img) == len(img)
140
            for path in local_img:
141
                if path is not None:
142
                    assert os.path.isfile(path)
143
                    imgs_paths.setdefault(path, set()).add(i)
144
            for img_url in img:
145
                imgs_urls.setdefault(img_url, set()).add(i)
146
        print()
147
        if False:  # To check if imgs are not overriding themselves
148
            for path, nums in imgs_paths.items():
149
                if len(nums) > 1:
150
                    print("Image used multiple times", path, nums)
151
            for img_url, nums in imgs_urls.items():
152
                if len(nums) > 1:
153
                    print("Url used multiple times", img_url, nums)
154
        if False:  # To check that all files in folder are useful
155
            json = cls._get_json_file_path()
156
            output_dir = cls._get_output_dir()
157
            for file_ in os.listdir(output_dir):
158
                file_path = os.path.join(output_dir, file_)
159
                if file_path not in imgs_paths and file_path != json:
160
                    print("Unused image", file_path)
161
        cls.log("done")
162
163
    @classmethod
164
    def get_next_comic(cls, _):
165
        """Generator to get the next comic.
166
167
        First argument is the last properly downloaded comic which gives
168
        a starting point to download more.
169
170
        This is the method called by update(). It should yield comics which
171
        are basically dictionnaries with the following property :
172
            - 'url' is linked to a string
173
            - 'img' is linked to a list of url (that will get downloaded)
174
            - 'day'/'month'/'year' are self explicit. They are linked to
175
                integers corresponding to the comic dates. There should be
176
                all of them or none of them
177
            - more fields can be provided."""
178
        raise NotImplementedError
179
180
    @classmethod
181
    def print_text(cls, text):
182
        """Print text by returning to the beginning of the line every time."""
183
        print(cls.name, ':', text, ' ' * 10, '\r', end='')
184
185
    @classmethod
186
    def print_comic(cls, comic):
187
        """Print information about a comic."""
188
        cls.print_text(comic['url'])
189
190
    @classmethod
191
    def update(cls):
192
        """Update the database : get the latest comics and save in the DB.
193
194
        This is a wrapper around get_next_comic() providing the following
195
        generic features :
196
            - logging
197
            - database handling (open and save)
198
            - exception handling (properly retrieved data are always saved)
199
            - file download
200
            - data management (adds current date if no date is provided)."""
201
        cls.log("start")
202
        print(cls.name, ': about to update')
203
        cls._create_output_dir()
204
        comics = cls._load_db()
205
        new_comics = []
206
        start = time.time()
207
        try:
208
            last_comic = cls.get_last_comic(comics)
209
            cls.log("last comic is %s" % ('None' if last_comic is None else last_comic['url']))
210
            for comic in cls.get_next_comic(last_comic):
211
                cls.log("got %s" % str(comic))
212
                assert 'url' in comic
213
                assert 'img' in comic
214
                if 'day' in comic:
215
                    assert all(isinstance(comic.get(k), int) for k in ['day', 'month', 'year'])
216
                else:
217
                    assert all(k not in comic for k in ['day', 'month', 'year'])
218
                    day = date.today()
219
                    comic['day'], comic['month'], comic['year'] = \
220
                        day.day, day.month, day.year
221
                prefix = comic.get('prefix', '')
222
                comic['local_img'] = [cls.get_file_in_output_dir(i, prefix, referer=comic['url'])
223
                                      for i in comic['img']]
224
                comic['comic'] = cls.long_name
225
                comic['new'] = None  # "'new' in comic" to check if new
226
                new_comics.append(comic)
227
                cls.print_comic(comic)
228
        finally:
229
            end = time.time()
230
            if new_comics:
231
                print()
232
                cls._save_db(comics + new_comics)
233
                print(cls.name, ": added", len(new_comics),
234
                      "comics in", end - start, "seconds")
235
            else:
236
                print(cls.name, ": nothing new")
237
        cls.log("done")
238
239
    @classmethod
240
    def try_to_get_missing_resources(cls):
241
        """Download images that might not have been downloaded properly in
242
        the first place."""
243
        cls.log("start")
244
        print(cls.name, ': about to try to get missing resources')
245
        cls._create_output_dir()
246
        comics = cls._load_db()
247
        change = False
248
        for comic in comics:
249
            comicurl = comic['url']
250
            local = comic['local_img']
251
            prefix = comic.get('prefix', '')
252
            for i, (path, url) in enumerate(zip(local, comic['img'])):
253
                if path is None:
254
                    new_path = cls.get_file_in_output_dir(url, prefix, referer=comicurl)
255
                    if new_path is None:
256
                        print(cls.name, ': failed to get', url)
257
                    else:
258
                        print(cls.name, ': got', url, 'at', new_path)
259
                        local[i] = new_path
260
                        change = True
261
                        comic['new'] = None
262
        if change:
263
            cls._save_db(comics)
264
            print(cls.name, ": some missing resources have been downloaded")
265
        cls.log("done")
266
267
    @classmethod
268
    def reset_new(cls):
269
        """Remove the 'new' flag on comics in the DB."""
270
        cls.log("start")
271
        cls._create_output_dir()
272
        cls._save_db([{key: val for key, val in c.items() if key != 'new'} for c in cls._load_db()])
273
        cls.log("done")
274
275
    @classmethod
276
    def delete_last(cls):
277
        """Delete last (non-deleted) comic."""
278
        cls.log("start")
279
        comics = cls._load_db()
280
        last_comic = cls.get_last_comic(comics)
281
        if last_comic is None:
282
            cls.log("no comic to delete")
283
        else:
284
            cls.log("about to delete %s" % last_comic['url'])
285
            last_comic['deleted'] = None  # "'deleted' in comic" to check if deleted
286
            cls._save_db(comics)
287
        cls.log("done")
288
289
    @classmethod
290
    def delete_all(cls):
291
        """Delete all comics."""
292
        cls.log("start")
293
        comics = cls._load_db()
294
        if comics:
295
            for c in comics:
296
                c['deleted'] = None  # "'deleted' in comic" to check if deleted
297
            cls._save_db(comics)
298
        cls.log("done")
299
300
    @classmethod
301
    def print_name(cls):
302
        """Print name."""
303
        cls.log("start")
304
        print(cls.name)
305
        cls.log("end")
306
307
    @classmethod
308
    def info(cls):
309
        """Print information about the comics."""
310
        cls.log("start")
311
        print("%s (%s) : " % (cls.long_name, cls.url))
312
        print("In " + ', '.join(cls.get_categories()))
313
        cls._create_output_dir()
314
        comics = cls.get_comics()  # cls._load_db()
315
        dates = [get_date_for_comic(c) for c in comics]
316
        print("%d comics (%d new)" % (len(comics), sum(1 for c in comics if 'new' in c)))
317
        print("%d images" % sum(len(c['img']) for c in comics))
318
        if dates:
319
            date_min, date_max = min(dates), max(dates)
320
            print("from %s to %s (%d days)" % (date_min, date_max, (date_max - date_min).days))
321
        print()
322
        cls.log("done")
323
324
    @classmethod
325
    def readme(cls):
326
        """Return information to generate README."""
327
        return ' * [%s](%s)\n' % (cls.long_name, cls.url)
328
329
    @classmethod
330
    def gitignore(cls):
331
        """Return information to generate gitignore."""
332
        return '%s\n' % (cls.name)
333
334
    @classmethod
335
    def get_categories(cls):
336
        """Return categories to be able to group comics.
337
338
        Categories are such that all classes have their ancestrors'
339
        categories and their own (provided as an iterable in the
340
        `_categories` class member)."""
341
        return sorted(set(cat
342
                          for klass in cls.__mro__
343
                          for cat in getattr(klass, '_categories', [])))
344