Completed
Push — master ( 6f7896...0de88c )
by De
06:41
created

GenericComic   F

Complexity

Total Complexity 68

Size/Duplication

Total Lines 252
Duplicated Lines 0 %

Importance

Changes 3
Bugs 0 Features 0
Metric Value
dl 0
loc 252
rs 2.9411
c 3
b 0
f 0
wmc 68

17 Methods

Rating   Name   Duplication   Size   Complexity  
A log() 0 6 1
A gitignore() 0 4 1
A get_output_dir() 0 5 1
A create_output_dir() 0 6 1
A reset_new() 0 7 4
F check_everything_is_ok() 0 53 25
A get_file_in_output_dir() 0 9 2
A get_json_file_path() 0 4 1
B try_to_get_missing_resources() 0 26 6
A print_comic() 0 4 1
A load_db() 0 9 3
A save_db() 0 7 2
F update() 0 46 11
A get_next_comic() 0 16 1
A print_text() 0 4 1
A readme() 0 4 1
B info() 0 15 6

How to fix   Complexity   

Complex Class

Complex classes like GenericComic often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

1
#! /usr/bin/python3
2
# vim: set expandtab tabstop=4 shiftwidth=4 :
3
"""Module to define logic common to all comics."""
4
5
import json
6
import time
7
import os
8
from datetime import date
9
from urlfunctions import get_filename_from_url, get_file_at_url
10
import inspect
11
import logging
12
13
14
def get_date_for_comic(comic):
15
    """Return date object for a given comic."""
16
    return date(comic['year'], comic['month'], comic['day'])
17
18
19
def get_info_before_comic(comic):
20
    """Generates the info to be put before the images."""
21
    author = comic.get('author')
22
    if author:
23
        yield 'by ' + author
24
25
26
def get_info_after_comic(comic):
27
    """Generates the info to be put after the images."""
28
    for name in ['alt', 'title', 'title2', 'texts', 'name', 'description']:
29
        info = comic.get(name)
30
        if info:
31
            yield info
32
33
34
class GenericComic(object):
35
    """Generic class to handle the logic common to all comics
36
37
    Attributes :
38
        name        Name of the comic (for logging, CLI and default output dir)
39
        long_name   Long name of the comic (to be added in the comic info)
40
        url         Base url for the comic (without trailing slash)."""
41
    name = None
42
    long_name = None
43
    url = None
44
45
    @classmethod
46
    def log(cls, string):
47
        """Dirty logging function."""
48
        # TODO: https://docs.python.org/2/library/logging.html#logrecord-attributes
49
        # we do not need to retrieve the function name manually
50
        logging.debug(inspect.stack()[1][3] + " " + cls.name + " " + string)
51
52
    @classmethod
53
    def get_output_dir(cls):
54
        """Returns the name of the output directory (for comics and JSON file).
55
        To be overridden if needed."""
56
        return cls.name
57
58
    @classmethod
59
    def create_output_dir(cls):
60
        """Create output directory for the comic on the file system."""
61
        cls.log("start")
62
        os.makedirs(cls.get_output_dir(), exist_ok=True)
63
        cls.log("done")
64
65
    @classmethod
66
    def get_json_file_path(cls):
67
        """Get the full path to the JSON file."""
68
        return os.path.join(cls.get_output_dir(), cls.name + '.json')
69
70
    @classmethod
71
    def load_db(cls):
72
        """Load the JSON file to return a list of comics."""
73
        cls.log("start")
74
        try:
75
            with open(cls.get_json_file_path()) as file:
76
                return json.load(file)
77
        except IOError:
78
            return []
79
80
    @classmethod
81
    def save_db(cls, data):
82
        """Save the list of comics in the JSON file."""
83
        cls.log("start")
84
        with open(cls.get_json_file_path(), 'w+') as file:
85
            json.dump(data, file, indent=4, sort_keys=True)
86
        cls.log("done")
87
88
    @classmethod
89
    def get_file_in_output_dir(cls, url, prefix=None):
90
        """Download file from URL and save it in output folder."""
91
        cls.log("start (url:%s)" % url)
92
        filename = os.path.join(
93
            cls.get_output_dir(),
94
            ('' if prefix is None else prefix) +
95
            get_filename_from_url(url))
96
        return get_file_at_url(url, filename)
97
98
    @classmethod
99
    def check_everything_is_ok(cls):
100
        """Perform tests on the database to check that everything is ok."""
101
        cls.log("start")
102
        print(cls.name, ': about to check')
103
        comics = cls.load_db()
104
        imgs_paths = {}
105
        imgs_urls = {}
106
        prev_date, prev_num = None, None
107
        today = date.today()
108
        for i, comic in enumerate(comics):
109
            cls.print_comic(comic)
110
            url = comic.get('url')
111
            assert isinstance(url, str), "Url %s not a string" % url
112
            assert comic.get('comic') == cls.long_name
113
            assert all(isinstance(comic.get(k), int)
114
                       for k in ['day', 'month', 'year']), \
115
                "Invalid date data (%s)" % url
116
            curr_date = get_date_for_comic(comic)
117
            assert curr_date <= today
118
            curr_num = comic.get('num', 0)
119
            assert isinstance(curr_num, int)
120
            assert prev_date is None or prev_date <= curr_date or \
121
                prev_num < curr_num, \
122
                "Comics are not in order (%s)" % url
123
            prev_date, prev_num = curr_date, curr_num
124
            img = comic.get('img')
125
            local_img = comic.get('local_img')
126
            assert isinstance(img, list)
127
            assert isinstance(local_img, list)
128
            assert len(local_img) == len(img)
129
            for path in local_img:
130
                if path is not None:
131
                    assert os.path.isfile(path)
132
                    imgs_paths.setdefault(path, set()).add(i)
133
            for img_url in img:
134
                imgs_urls.setdefault(img_url, set()).add(i)
135
        print()
136
        if False:  # To check if imgs are not overriding themselves
137
            for path, nums in imgs_paths.items():
138
                if len(nums) > 1:
139
                    print("Image used multiple times", path, nums)
140
            for img_url, nums in imgs_urls.items():
141
                if len(nums) > 1:
142
                    print("Url used multiple times", img_url, nums)
143
        if False:  # To check that all files in folder are useful
144
            json = cls.get_json_file_path()
145
            output_dir = cls.get_output_dir()
146
            for file_ in os.listdir(output_dir):
147
                file_path = os.path.join(output_dir, file_)
148
                if file_path not in imgs_paths and file_path != json:
149
                    print("Unused image", file_path)
150
        cls.log("done")
151
152
    @classmethod
153
    def get_next_comic(cls, _):
154
        """Generator to get the next comic.
155
156
        First argument is the last properly downloaded comic which gives
157
        a starting point to download more.
158
159
        This is the method called by update(). It should yield comics which
160
        are basically dictionnaries with the following property :
161
            - 'url' is linked to a string
162
            - 'img' is linked to a list of url (that will get downloaded)
163
            - 'day'/'month'/'year' are self explicit. They are linked to
164
                integers corresponding to the comic dates. There should be
165
                all of them or none of them
166
            - more fields can be provided."""
167
        raise NotImplementedError
168
169
    @classmethod
170
    def print_text(cls, text):
171
        """Print text by returning to the beginning of the line every time."""
172
        print(cls.name, ':', text, ' ' * 10, '\r', end='')
173
174
    @classmethod
175
    def print_comic(cls, comic):
176
        """Print information about a comic."""
177
        cls.print_text(comic['url'])
178
179
    @classmethod
180
    def update(cls):
181
        """Update the database : get the latest comics and save in the DB.
182
183
        This is a wrapper around get_next_comic() providing the following
184
        generic features :
185
            - logging
186
            - database handling (open and save)
187
            - exception handling (properly retrieved data are always saved)
188
            - file download
189
            - data management (adds current date if no date is provided)."""
190
        cls.log("start")
191
        print(cls.name, ': about to update')
192
        cls.create_output_dir()
193
        comics = cls.load_db()
194
        new_comics = []
195
        start = time.time()
196
        try:
197
            last_comic = comics[-1] if comics else None
198
            cls.log("last comic is %s" % ('None' if last_comic is None else last_comic['url']))
199
            for comic in cls.get_next_comic(last_comic):
200
                cls.log("got %s" % str(comic))
201
                if 'day' in comic:
202
                    assert all(isinstance(comic.get(k), int) for k in ['day', 'month', 'year'])
203
                else:
204
                    assert all(k not in comic for k in ['day', 'month', 'year'])
205
                    day = date.today()
206
                    comic['day'], comic['month'], comic['year'] = \
207
                        day.day, day.month, day.year
208
                prefix = comic.get('prefix', '')
209
                comic['local_img'] = [cls.get_file_in_output_dir(i, prefix)
210
                                      for i in comic['img']]
211
                comic['comic'] = cls.long_name
212
                comic['new'] = None  # "'new' in comic" to check if new
213
                new_comics.append(comic)
214
                cls.print_comic(comic)
215
        finally:
216
            end = time.time()
217
            if new_comics:
218
                print()
219
                cls.save_db(comics + new_comics)
220
                print(cls.name, ": added", len(new_comics),
221
                      "comics in", end - start, "seconds")
222
            else:
223
                print(cls.name, ": nothing new")
224
        cls.log("done")
225
226
    @classmethod
227
    def try_to_get_missing_resources(cls):
228
        """Download images that might not have been downloaded properly in
229
        the first place."""
230
        cls.log("start")
231
        print(cls.name, ': about to try to get missing resources')
232
        cls.create_output_dir()
233
        comics = cls.load_db()
234
        change = False
235
        for comic in comics:
236
            local = comic['local_img']
237
            prefix = comic.get('prefix', '')
238
            for i, (path, url) in enumerate(zip(local, comic['img'])):
239
                if path is None:
240
                    new_path = cls.get_file_in_output_dir(url, prefix)
241
                    if new_path is None:
242
                        print(cls.name, ': failed to get', url)
243
                    else:
244
                        print(cls.name, ': got', url, 'at', new_path)
245
                        local[i] = new_path
246
                        change = True
247
                        comic['new'] = None
248
        if change:
249
            cls.save_db(comics)
250
            print(cls.name, ": some missing resources have been downloaded")
251
        cls.log("done")
252
253
    @classmethod
254
    def reset_new(cls):
255
        """Remove the 'new' flag on comics in the DB."""
256
        cls.log("start")
257
        cls.create_output_dir()
258
        cls.save_db([{key: val for key, val in c.items() if key != 'new'} for c in cls.load_db()])
259
        cls.log("done")
260
261
    @classmethod
262
    def info(cls):
263
        """Print information about the comics."""
264
        cls.log("start")
265
        print("%s (%s) : " % (cls.long_name, cls.url))
266
        cls.create_output_dir()
267
        comics = cls.load_db()
268
        dates = [get_date_for_comic(c) for c in comics]
269
        print("%d comics (%d new)" % (len(comics), sum(1 for c in comics if 'new' in c)))
270
        print("%d images" % sum(len(c['img']) for c in comics))
271
        if dates:
272
            date_min, date_max = min(dates), max(dates)
273
            print("from %s to %s (%d days)" % (date_min, date_max, (date_max - date_min).days))
274
        print()
275
        cls.log("done")
276
277
    @classmethod
278
    def readme(cls):
279
        """Return information to generate README."""
280
        return ' * [%s](%s)\n' % (cls.long_name, cls.url)
281
282
    @classmethod
283
    def gitignore(cls):
284
        """Return information to generate gitignore."""
285
        return '%s\n' % (cls.name)
286