GenericComic - Code Metrics - SylvainDe/ComicBookMaker - Measure and Improve Code Quality continuously with Scrutinizer

GenericComic F
last analyzed 2018-06-15 09:38 UTC

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	310
Duplicated Lines	0 %

Importance

Changes	1
Bugs	0	Features	0

Metric	Value
c	1
b	0
f	0
dl	0
loc	310
rs	1.5789
wmc	84

23 Methods

Rating	Name	Size	Complexity
A	log()	6	1
F	check_everything_is_ok()	53	25
A	get_last_comic()	4	3
A	get_file_in_output_dir()	9	2
A	_get_json_file_path()	4	1
A	_create_output_dir()	6	1
A	_load_db()	9	3
A	print_comic()	4	1
A	_get_output_dir()	5	1
A	get_comics()	4	3
A	_save_db()	7	2
A	get_next_comic()	16	1
A	print_text()	4	1
F	update()	48	12
A	gitignore()	4	1
A	reset_new()	7	4
A	delete_last()	13	2
A	get_categories()	10	3
B	try_to_get_missing_resources()	27	6
A	print_name()	6	1
B	info()	16	6
A	readme()	4	1
A	delete_all()	10	3

How to fix Complexity

#! /usr/bin/python3
# vim: set expandtab tabstop=4 shiftwidth=4 :
"""Module to define logic common to all comics."""

import json
import time
import os
from datetime import date
from urlfunctions import get_filename_from_url, get_file_at_url
import inspect
import logging


def get_date_for_comic(comic):
    """Return date object for a given comic."""
    return date(comic['year'], comic['month'], comic['day'])


def get_info_before_comic(comic):
    """Generates the info to be put before the images."""
    author = comic.get('author')
    if author:
        yield 'by ' + author


def get_info_after_comic(comic):
    """Generates the info to be put after the images."""
    for name in ['alt', 'title', 'title2', 'texts', 'name', 'description']:
        info = comic.get(name)
        if info:
            yield info


class GenericComic(object):
    """Generic class to handle the logic common to all comics

    Attributes :
        name        Name of the comic (for logging, CLI and default output dir)
        long_name   Long name of the comic (to be added in the comic info)
        url         Base url for the comic (without trailing slash)."""
    name = None
    long_name = None
    url = None
    _categories = ('ALL', )

    @classmethod
    def log(cls, string):
        """Dirty logging function."""
        # TODO: https://docs.python.org/2/library/logging.html#logrecord-attributes
        # we do not need to retrieve the function name manually
        logging.debug(inspect.stack()[1][3] + " " + cls.name + " " + string)

    @classmethod
    def _get_output_dir(cls):
        """Returns the name of the output directory (for comics and JSON file).
        To be overridden if needed."""
        return cls.name

    @classmethod
    def _create_output_dir(cls):
        """Create output directory for the comic on the file system."""
        cls.log("start")
        os.makedirs(cls._get_output_dir(), exist_ok=True)
        cls.log("done")

    @classmethod
    def _get_json_file_path(cls):
        """Get the full path to the JSON file."""
        return os.path.join(cls._get_output_dir(), cls.name + '.json')

    @classmethod
    def _load_db(cls):
        """Load the JSON file to return the list of comics."""
        cls.log("start")
        try:
            with open(cls._get_json_file_path()) as file:
                return json.load(file)
        except IOError:
            return []

    @classmethod
    def get_comics(cls):
        """Return the list of comics."""
        return [c for c in cls._load_db() if 'deleted' not in c]

    @classmethod
    def get_last_comic(cls, comics):
        """Return the last (non-deleted) comic."""
        return next((c for c in reversed(comics) if 'deleted' not in c), None)

    @classmethod
    def _save_db(cls, data):
        """Save the list of comics in the JSON file."""
        cls.log("start")
        with open(cls._get_json_file_path(), 'w+') as file:
            json.dump(data, file, indent=4, sort_keys=True)
        cls.log("done")

    @classmethod
    def get_file_in_output_dir(cls, url, prefix=None, referer=None):
        """Download file from URL and save it in output folder."""
        cls.log("start (url:%s)" % url)
        filename = os.path.join(
            cls._get_output_dir(),
            ('' if prefix is None else prefix) +
            get_filename_from_url(url))
        return get_file_at_url(url, filename, referer)

    @classmethod
    def check_everything_is_ok(cls):
        """Perform tests on the database to check that everything is ok."""
        cls.log("start")
        print(cls.name, ': about to check')
        comics = cls.get_comics()  # cls._load_db()
        imgs_paths = {}
        imgs_urls = {}
        prev_date, prev_num = None, None
        today = date.today()
        for i, comic in enumerate(comics):
            cls.print_comic(comic)
            url = comic.get('url')
            assert isinstance(url, str), "Url %s not a string" % url
            assert comic.get('comic') == cls.long_name
            assert all(isinstance(comic.get(k), int)
                       for k in ['day', 'month', 'year']), \
                "Invalid date data (%s)" % url
            curr_date = get_date_for_comic(comic)
            assert curr_date <= today
            curr_num = comic.get('num', 0)
            assert isinstance(curr_num, int)
            assert prev_date is None or prev_date <= curr_date or \
                prev_num < curr_num, \
                "Comics are not in order (%s)" % url
            prev_date, prev_num = curr_date, curr_num
            img = comic.get('img')
            local_img = comic.get('local_img')
            assert isinstance(img, list)
            assert isinstance(local_img, list)
            assert len(local_img) == len(img)
            for path in local_img:
                if path is not None:
                    assert os.path.isfile(path)
                    imgs_paths.setdefault(path, set()).add(i)
            for img_url in img:
                imgs_urls.setdefault(img_url, set()).add(i)
        print()
        if False:  # To check if imgs are not overriding themselves
            for path, nums in imgs_paths.items():
                if len(nums) > 1:
                    print("Image used multiple times", path, nums)
            for img_url, nums in imgs_urls.items():
                if len(nums) > 1:
                    print("Url used multiple times", img_url, nums)
        if False:  # To check that all files in folder are useful
            json = cls._get_json_file_path()
            output_dir = cls._get_output_dir()
            for file_ in os.listdir(output_dir):
                file_path = os.path.join(output_dir, file_)
                if file_path not in imgs_paths and file_path != json:
                    print("Unused image", file_path)
        cls.log("done")

    @classmethod
    def get_next_comic(cls, _):
        """Generator to get the next comic.

        First argument is the last properly downloaded comic which gives
        a starting point to download more.

        This is the method called by update(). It should yield comics which
        are basically dictionnaries with the following property :
            - 'url' is linked to a string
            - 'img' is linked to a list of url (that will get downloaded)
            - 'day'/'month'/'year' are self explicit. They are linked to
                integers corresponding to the comic dates. There should be
                all of them or none of them
            - more fields can be provided."""
        raise NotImplementedError

    @classmethod
    def print_text(cls, text):
        """Print text by returning to the beginning of the line every time."""
        print(cls.name, ':', text, ' ' * 10, '\r', end='')

    @classmethod
    def print_comic(cls, comic):
        """Print information about a comic."""
        cls.print_text(comic['url'])

    @classmethod
    def update(cls):
        """Update the database : get the latest comics and save in the DB.

        This is a wrapper around get_next_comic() providing the following
        generic features :
            - logging
            - database handling (open and save)
            - exception handling (properly retrieved data are always saved)
            - file download
            - data management (adds current date if no date is provided)."""
        cls.log("start")
        print(cls.name, ': about to update')
        cls._create_output_dir()
        comics = cls._load_db()
        new_comics = []
        start = time.time()
        try:
            last_comic = cls.get_last_comic(comics)
            cls.log("last comic is %s" % ('None' if last_comic is None else last_comic['url']))
            for comic in cls.get_next_comic(last_comic):
                cls.log("got %s" % str(comic))
                assert 'url' in comic
                assert 'img' in comic
                if 'day' in comic:
                    assert all(isinstance(comic.get(k), int) for k in ['day', 'month', 'year'])
                else:
                    assert all(k not in comic for k in ['day', 'month', 'year'])
                    day = date.today()
                    comic['day'], comic['month'], comic['year'] = \
                        day.day, day.month, day.year
                prefix = comic.get('prefix', '')
                comic['local_img'] = [cls.get_file_in_output_dir(i, prefix, referer=comic['url'])
                                      for i in comic['img']]
                comic['comic'] = cls.long_name
                comic['new'] = None  # "'new' in comic" to check if new
                new_comics.append(comic)
                cls.print_comic(comic)
        finally:
            end = time.time()
            if new_comics:
                print()
                cls._save_db(comics + new_comics)
                print(cls.name, ": added", len(new_comics),
                      "comics in", end - start, "seconds")
            else:
                print(cls.name, ": nothing new")
        cls.log("done")

    @classmethod
    def try_to_get_missing_resources(cls):
        """Download images that might not have been downloaded properly in
        the first place."""
        cls.log("start")
        print(cls.name, ': about to try to get missing resources')
        cls._create_output_dir()
        comics = cls._load_db()
        change = False
        for comic in comics:
            comicurl = comic['url']
            local = comic['local_img']
            prefix = comic.get('prefix', '')
            for i, (path, url) in enumerate(zip(local, comic['img'])):
                if path is None:
                    new_path = cls.get_file_in_output_dir(url, prefix, referer=comicurl)
                    if new_path is None:
                        print(cls.name, ': failed to get', url)
                    else:
                        print(cls.name, ': got', url, 'at', new_path)
                        local[i] = new_path
                        change = True
                        comic['new'] = None
        if change:
            cls._save_db(comics)
            print(cls.name, ": some missing resources have been downloaded")
        cls.log("done")

    @classmethod
    def reset_new(cls):
        """Remove the 'new' flag on comics in the DB."""
        cls.log("start")
        cls._create_output_dir()
        cls._save_db([{key: val for key, val in c.items() if key != 'new'} for c in cls._load_db()])
        cls.log("done")

    @classmethod
    def delete_last(cls):
        """Delete last (non-deleted) comic."""
        cls.log("start")
        comics = cls._load_db()
        last_comic = cls.get_last_comic(comics)
        if last_comic is None:
            cls.log("no comic to delete")
        else:
            cls.log("about to delete %s" % last_comic['url'])
            last_comic['deleted'] = None  # "'deleted' in comic" to check if deleted
            cls._save_db(comics)
        cls.log("done")

    @classmethod
    def delete_all(cls):
        """Delete all comics."""
        cls.log("start")
        comics = cls._load_db()
        if comics:
            for c in comics:
                c['deleted'] = None  # "'deleted' in comic" to check if deleted
            cls._save_db(comics)
        cls.log("done")

    @classmethod
    def print_name(cls):
        """Print name."""
        cls.log("start")
        print(cls.name)
        cls.log("end")

    @classmethod
    def info(cls):
        """Print information about the comics."""
        cls.log("start")
        print("%s (%s) : " % (cls.long_name, cls.url))
        print("In " + ', '.join(cls.get_categories()))
        cls._create_output_dir()
        comics = cls.get_comics()  # cls._load_db()
        dates = [get_date_for_comic(c) for c in comics]
        print("%d comics (%d new)" % (len(comics), sum(1 for c in comics if 'new' in c)))
        print("%d images" % sum(len(c['img']) for c in comics))
        if dates:
            date_min, date_max = min(dates), max(dates)
            print("from %s to %s (%d days)" % (date_min, date_max, (date_max - date_min).days))
        print()
        cls.log("done")

    @classmethod
    def readme(cls):
        """Return information to generate README."""
        return ' * [%s](%s)\n' % (cls.long_name, cls.url)

    @classmethod
    def gitignore(cls):
        """Return information to generate gitignore."""
        return '%s\n' % (cls.name)

    @classmethod
    def get_categories(cls):
        """Return categories to be able to group comics.

        Categories are such that all classes have their ancestrors'
        categories and their own (provided as an iterable in the
        `_categories` class member)."""
        return sorted(set(cat
                          for klass in cls.__mro__
                          for cat in getattr(klass, '_categories', [])))


1			#! /usr/bin/python3
2			# vim: set expandtab tabstop=4 shiftwidth=4 :
3			"""Module to define logic common to all comics."""
4
5			import json
6			import time
7			import os
8			from datetime import date
9			from urlfunctions import get_filename_from_url, get_file_at_url
10			import inspect
11			import logging
12
13
14			def get_date_for_comic(comic):
15			"""Return date object for a given comic."""
16			return date(comic['year'], comic['month'], comic['day'])
17
18
19			def get_info_before_comic(comic):
20			"""Generates the info to be put before the images."""
21			author = comic.get('author')
22			if author:
23			yield 'by ' + author
24
25
26			def get_info_after_comic(comic):
27			"""Generates the info to be put after the images."""
28			for name in ['alt', 'title', 'title2', 'texts', 'name', 'description']:
29			info = comic.get(name)
30			if info:
31			yield info
32
33
34			class GenericComic(object):
35			"""Generic class to handle the logic common to all comics
36
37			Attributes :
38			name Name of the comic (for logging, CLI and default output dir)
39			long_name Long name of the comic (to be added in the comic info)
40			url Base url for the comic (without trailing slash)."""
41			name = None
42			long_name = None
43			url = None
44			_categories = ('ALL', )
45
46			@classmethod
47			def log(cls, string):
48			"""Dirty logging function."""
49			# TODO: https://docs.python.org/2/library/logging.html#logrecord-attributes
50			# we do not need to retrieve the function name manually
51			logging.debug(inspect.stack()[1][3] + " " + cls.name + " " + string)
52
53			@classmethod
54			def _get_output_dir(cls):
55			"""Returns the name of the output directory (for comics and JSON file).
56			To be overridden if needed."""
57			return cls.name
58
59			@classmethod
60			def _create_output_dir(cls):
61			"""Create output directory for the comic on the file system."""
62			cls.log("start")
63			os.makedirs(cls._get_output_dir(), exist_ok=True)
64			cls.log("done")
65
66			@classmethod
67			def _get_json_file_path(cls):
68			"""Get the full path to the JSON file."""
69			return os.path.join(cls._get_output_dir(), cls.name + '.json')
70
71			@classmethod
72			def _load_db(cls):
73			"""Load the JSON file to return the list of comics."""
74			cls.log("start")
75			try:
76			with open(cls._get_json_file_path()) as file:
77			return json.load(file)
78			except IOError:
79			return []
80
81			@classmethod
82			def get_comics(cls):
83			"""Return the list of comics."""
84			return [c for c in cls._load_db() if 'deleted' not in c]
85
86			@classmethod
87			def get_last_comic(cls, comics):
88			"""Return the last (non-deleted) comic."""
89			return next((c for c in reversed(comics) if 'deleted' not in c), None)
90
91			@classmethod
92			def _save_db(cls, data):
93			"""Save the list of comics in the JSON file."""
94			cls.log("start")
95			with open(cls._get_json_file_path(), 'w+') as file:
96			json.dump(data, file, indent=4, sort_keys=True)
97			cls.log("done")
98
99			@classmethod
100			def get_file_in_output_dir(cls, url, prefix=None, referer=None):
101			"""Download file from URL and save it in output folder."""
102			cls.log("start (url:%s)" % url)
103			filename = os.path.join(
104			cls._get_output_dir(),
105			('' if prefix is None else prefix) +
106			get_filename_from_url(url))
107			return get_file_at_url(url, filename, referer)
108
109			@classmethod
110			def check_everything_is_ok(cls):
111			"""Perform tests on the database to check that everything is ok."""
112			cls.log("start")
113			print(cls.name, ': about to check')
114			comics = cls.get_comics() # cls._load_db()
115			imgs_paths = {}
116			imgs_urls = {}
117			prev_date, prev_num = None, None
118			today = date.today()
119			for i, comic in enumerate(comics):
120			cls.print_comic(comic)
121			url = comic.get('url')
122			assert isinstance(url, str), "Url %s not a string" % url
123			assert comic.get('comic') == cls.long_name
124			assert all(isinstance(comic.get(k), int)
125			for k in ['day', 'month', 'year']), \
126			"Invalid date data (%s)" % url
127			curr_date = get_date_for_comic(comic)
128			assert curr_date <= today
129			curr_num = comic.get('num', 0)
130			assert isinstance(curr_num, int)
131			assert prev_date is None or prev_date <= curr_date or \
132			prev_num < curr_num, \
133			"Comics are not in order (%s)" % url
134			prev_date, prev_num = curr_date, curr_num
135			img = comic.get('img')
136			local_img = comic.get('local_img')
137			assert isinstance(img, list)
138			assert isinstance(local_img, list)
139			assert len(local_img) == len(img)
140			for path in local_img:
141			if path is not None:
142			assert os.path.isfile(path)
143			imgs_paths.setdefault(path, set()).add(i)
144			for img_url in img:
145			imgs_urls.setdefault(img_url, set()).add(i)
146			print()
147			if False: # To check if imgs are not overriding themselves
148			for path, nums in imgs_paths.items():
149			if len(nums) > 1:
150			print("Image used multiple times", path, nums)
151			for img_url, nums in imgs_urls.items():
152			if len(nums) > 1:
153			print("Url used multiple times", img_url, nums)
154			if False: # To check that all files in folder are useful
155			json = cls._get_json_file_path()
156			output_dir = cls._get_output_dir()
157			for file_ in os.listdir(output_dir):
158			file_path = os.path.join(output_dir, file_)
159			if file_path not in imgs_paths and file_path != json:
160			print("Unused image", file_path)
161			cls.log("done")
162
163			@classmethod
164			def get_next_comic(cls, _):
165			"""Generator to get the next comic.
166
167			First argument is the last properly downloaded comic which gives
168			a starting point to download more.
169
170			This is the method called by update(). It should yield comics which
171			are basically dictionnaries with the following property :
172			- 'url' is linked to a string
173			- 'img' is linked to a list of url (that will get downloaded)
174			- 'day'/'month'/'year' are self explicit. They are linked to
175			integers corresponding to the comic dates. There should be
176			all of them or none of them
177			- more fields can be provided."""
178			raise NotImplementedError
179
180			@classmethod
181			def print_text(cls, text):
182			"""Print text by returning to the beginning of the line every time."""
183			print(cls.name, ':', text, ' ' * 10, '\r', end='')
184
185			@classmethod
186			def print_comic(cls, comic):
187			"""Print information about a comic."""
188			cls.print_text(comic['url'])
189
190			@classmethod
191			def update(cls):
192			"""Update the database : get the latest comics and save in the DB.
193
194			This is a wrapper around get_next_comic() providing the following
195			generic features :
196			- logging
197			- database handling (open and save)
198			- exception handling (properly retrieved data are always saved)
199			- file download
200			- data management (adds current date if no date is provided)."""
201			cls.log("start")
202			print(cls.name, ': about to update')
203			cls._create_output_dir()
204			comics = cls._load_db()
205			new_comics = []
206			start = time.time()
207			try:
208			last_comic = cls.get_last_comic(comics)
209			cls.log("last comic is %s" % ('None' if last_comic is None else last_comic['url']))
210			for comic in cls.get_next_comic(last_comic):
211			cls.log("got %s" % str(comic))
212			assert 'url' in comic
213			assert 'img' in comic
214			if 'day' in comic:
215			assert all(isinstance(comic.get(k), int) for k in ['day', 'month', 'year'])
216			else:
217			assert all(k not in comic for k in ['day', 'month', 'year'])
218			day = date.today()
219			comic['day'], comic['month'], comic['year'] = \
220			day.day, day.month, day.year
221			prefix = comic.get('prefix', '')
222			comic['local_img'] = [cls.get_file_in_output_dir(i, prefix, referer=comic['url'])
223			for i in comic['img']]
224			comic['comic'] = cls.long_name
225			comic['new'] = None # "'new' in comic" to check if new
226			new_comics.append(comic)
227			cls.print_comic(comic)
228			finally:
229			end = time.time()
230			if new_comics:
231			print()
232			cls._save_db(comics + new_comics)
233			print(cls.name, ": added", len(new_comics),
234			"comics in", end - start, "seconds")
235			else:
236			print(cls.name, ": nothing new")
237			cls.log("done")
238
239			@classmethod
240			def try_to_get_missing_resources(cls):
241			"""Download images that might not have been downloaded properly in
242			the first place."""
243			cls.log("start")
244			print(cls.name, ': about to try to get missing resources')
245			cls._create_output_dir()
246			comics = cls._load_db()
247			change = False
248			for comic in comics:
249			comicurl = comic['url']
250			local = comic['local_img']
251			prefix = comic.get('prefix', '')
252			for i, (path, url) in enumerate(zip(local, comic['img'])):
253			if path is None:
254			new_path = cls.get_file_in_output_dir(url, prefix, referer=comicurl)
255			if new_path is None:
256			print(cls.name, ': failed to get', url)
257			else:
258			print(cls.name, ': got', url, 'at', new_path)
259			local[i] = new_path
260			change = True
261			comic['new'] = None
262			if change:
263			cls._save_db(comics)
264			print(cls.name, ": some missing resources have been downloaded")
265			cls.log("done")
266
267			@classmethod
268			def reset_new(cls):
269			"""Remove the 'new' flag on comics in the DB."""
270			cls.log("start")
271			cls._create_output_dir()
272			cls._save_db([{key: val for key, val in c.items() if key != 'new'} for c in cls._load_db()])
273			cls.log("done")
274
275			@classmethod
276			def delete_last(cls):
277			"""Delete last (non-deleted) comic."""
278			cls.log("start")
279			comics = cls._load_db()
280			last_comic = cls.get_last_comic(comics)
281			if last_comic is None:
282			cls.log("no comic to delete")
283			else:
284			cls.log("about to delete %s" % last_comic['url'])
285			last_comic['deleted'] = None # "'deleted' in comic" to check if deleted
286			cls._save_db(comics)
287			cls.log("done")
288
289			@classmethod
290			def delete_all(cls):
291			"""Delete all comics."""
292			cls.log("start")
293			comics = cls._load_db()
294			if comics:
295			for c in comics:
296			c['deleted'] = None # "'deleted' in comic" to check if deleted
297			cls._save_db(comics)
298			cls.log("done")
299
300			@classmethod
301			def print_name(cls):
302			"""Print name."""
303			cls.log("start")
304			print(cls.name)
305			cls.log("end")
306
307			@classmethod
308			def info(cls):
309			"""Print information about the comics."""
310			cls.log("start")
311			print("%s (%s) : " % (cls.long_name, cls.url))
312			print("In " + ', '.join(cls.get_categories()))
313			cls._create_output_dir()
314			comics = cls.get_comics() # cls._load_db()
315			dates = [get_date_for_comic(c) for c in comics]
316			print("%d comics (%d new)" % (len(comics), sum(1 for c in comics if 'new' in c)))
317			print("%d images" % sum(len(c['img']) for c in comics))
318			if dates:
319			date_min, date_max = min(dates), max(dates)
320			print("from %s to %s (%d days)" % (date_min, date_max, (date_max - date_min).days))
321			print()
322			cls.log("done")
323
324			@classmethod
325			def readme(cls):
326			"""Return information to generate README."""
327			return ' * [%s](%s)\n' % (cls.long_name, cls.url)
328
329			@classmethod
330			def gitignore(cls):
331			"""Return information to generate gitignore."""
332			return '%s\n' % (cls.name)
333
334			@classmethod
335			def get_categories(cls):
336			"""Return categories to be able to group comics.
337
338			Categories are such that all classes have their ancestrors'
339			categories and their own (provided as an iterable in the
340			`_categories` class member)."""
341			return sorted(set(cat
342			for klass in cls.__mro__
343			for cat in getattr(klass, '_categories', [])))
344

SylvainDe / ComicBookMaker

GenericComic F last analyzed 2018-06-15 09:38 UTC

Complexity

Size/Duplication

Importance

23 Methods

How to fix Complexity

Complex Class

Duplication Side-by-Side

Filter issues like

GenericComic F
last analyzed 2018-06-15 09:38 UTC