GenericComic - Code Metrics - Inspection of "Fix retrieval of EFC" - SylvainDe/ComicBookMaker - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( 6f7896...0de88c )

by De

created 2016-08-18 17:06 UTC

GenericComic F

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	252
Duplicated Lines	0 %

Importance

Changes	3
Bugs	0	Features	0

Metric	Value
dl	0
loc	252
rs	2.9411
c	3
b	0
f	0
wmc	68

17 Methods

Rating	Name	Size	Complexity
A	log()	6	1
A	gitignore()	4	1
A	get_output_dir()	5	1
A	create_output_dir()	6	1
A	reset_new()	7	4
F	check_everything_is_ok()	53	25
A	get_file_in_output_dir()	9	2
A	get_json_file_path()	4	1
B	try_to_get_missing_resources()	26	6
A	print_comic()	4	1
A	load_db()	9	3
A	save_db()	7	2
F	update()	46	11
A	get_next_comic()	16	1
A	print_text()	4	1
A	readme()	4	1
B	info()	15	6

How to fix Complexity

#! /usr/bin/python3
# vim: set expandtab tabstop=4 shiftwidth=4 :
"""Module to define logic common to all comics."""

import json
import time
import os
from datetime import date
from urlfunctions import get_filename_from_url, get_file_at_url
import inspect
import logging


def get_date_for_comic(comic):
    """Return date object for a given comic."""
    return date(comic['year'], comic['month'], comic['day'])


def get_info_before_comic(comic):
    """Generates the info to be put before the images."""
    author = comic.get('author')
    if author:
        yield 'by ' + author


def get_info_after_comic(comic):
    """Generates the info to be put after the images."""
    for name in ['alt', 'title', 'title2', 'texts', 'name', 'description']:
        info = comic.get(name)
        if info:
            yield info


class GenericComic(object):
    """Generic class to handle the logic common to all comics

    Attributes :
        name        Name of the comic (for logging, CLI and default output dir)
        long_name   Long name of the comic (to be added in the comic info)
        url         Base url for the comic (without trailing slash)."""
    name = None
    long_name = None
    url = None

    @classmethod
    def log(cls, string):
        """Dirty logging function."""
        # TODO: https://docs.python.org/2/library/logging.html#logrecord-attributes
        # we do not need to retrieve the function name manually
        logging.debug(inspect.stack()[1][3] + " " + cls.name + " " + string)

    @classmethod
    def get_output_dir(cls):
        """Returns the name of the output directory (for comics and JSON file).
        To be overridden if needed."""
        return cls.name

    @classmethod
    def create_output_dir(cls):
        """Create output directory for the comic on the file system."""
        cls.log("start")
        os.makedirs(cls.get_output_dir(), exist_ok=True)
        cls.log("done")

    @classmethod
    def get_json_file_path(cls):
        """Get the full path to the JSON file."""
        return os.path.join(cls.get_output_dir(), cls.name + '.json')

    @classmethod
    def load_db(cls):
        """Load the JSON file to return a list of comics."""
        cls.log("start")
        try:
            with open(cls.get_json_file_path()) as file:
                return json.load(file)
        except IOError:
            return []

    @classmethod
    def save_db(cls, data):
        """Save the list of comics in the JSON file."""
        cls.log("start")
        with open(cls.get_json_file_path(), 'w+') as file:
            json.dump(data, file, indent=4, sort_keys=True)
        cls.log("done")

    @classmethod
    def get_file_in_output_dir(cls, url, prefix=None):
        """Download file from URL and save it in output folder."""
        cls.log("start (url:%s)" % url)
        filename = os.path.join(
            cls.get_output_dir(),
            ('' if prefix is None else prefix) +
            get_filename_from_url(url))
        return get_file_at_url(url, filename)

    @classmethod
    def check_everything_is_ok(cls):
        """Perform tests on the database to check that everything is ok."""
        cls.log("start")
        print(cls.name, ': about to check')
        comics = cls.load_db()
        imgs_paths = {}
        imgs_urls = {}
        prev_date, prev_num = None, None
        today = date.today()
        for i, comic in enumerate(comics):
            cls.print_comic(comic)
            url = comic.get('url')
            assert isinstance(url, str), "Url %s not a string" % url
            assert comic.get('comic') == cls.long_name
            assert all(isinstance(comic.get(k), int)
                       for k in ['day', 'month', 'year']), \
                "Invalid date data (%s)" % url
            curr_date = get_date_for_comic(comic)
            assert curr_date <= today
            curr_num = comic.get('num', 0)
            assert isinstance(curr_num, int)
            assert prev_date is None or prev_date <= curr_date or \
                prev_num < curr_num, \
                "Comics are not in order (%s)" % url
            prev_date, prev_num = curr_date, curr_num
            img = comic.get('img')
            local_img = comic.get('local_img')
            assert isinstance(img, list)
            assert isinstance(local_img, list)
            assert len(local_img) == len(img)
            for path in local_img:
                if path is not None:
                    assert os.path.isfile(path)
                    imgs_paths.setdefault(path, set()).add(i)
            for img_url in img:
                imgs_urls.setdefault(img_url, set()).add(i)
        print()
        if False:  # To check if imgs are not overriding themselves
            for path, nums in imgs_paths.items():
                if len(nums) > 1:
                    print("Image used multiple times", path, nums)
            for img_url, nums in imgs_urls.items():
                if len(nums) > 1:
                    print("Url used multiple times", img_url, nums)
        if False:  # To check that all files in folder are useful
            json = cls.get_json_file_path()
            output_dir = cls.get_output_dir()
            for file_ in os.listdir(output_dir):
                file_path = os.path.join(output_dir, file_)
                if file_path not in imgs_paths and file_path != json:
                    print("Unused image", file_path)
        cls.log("done")

    @classmethod
    def get_next_comic(cls, _):
        """Generator to get the next comic.

        First argument is the last properly downloaded comic which gives
        a starting point to download more.

        This is the method called by update(). It should yield comics which
        are basically dictionnaries with the following property :
            - 'url' is linked to a string
            - 'img' is linked to a list of url (that will get downloaded)
            - 'day'/'month'/'year' are self explicit. They are linked to
                integers corresponding to the comic dates. There should be
                all of them or none of them
            - more fields can be provided."""
        raise NotImplementedError

    @classmethod
    def print_text(cls, text):
        """Print text by returning to the beginning of the line every time."""
        print(cls.name, ':', text, ' ' * 10, '\r', end='')

    @classmethod
    def print_comic(cls, comic):
        """Print information about a comic."""
        cls.print_text(comic['url'])

    @classmethod
    def update(cls):
        """Update the database : get the latest comics and save in the DB.

        This is a wrapper around get_next_comic() providing the following
        generic features :
            - logging
            - database handling (open and save)
            - exception handling (properly retrieved data are always saved)
            - file download
            - data management (adds current date if no date is provided)."""
        cls.log("start")
        print(cls.name, ': about to update')
        cls.create_output_dir()
        comics = cls.load_db()
        new_comics = []
        start = time.time()
        try:
            last_comic = comics[-1] if comics else None
            cls.log("last comic is %s" % ('None' if last_comic is None else last_comic['url']))
            for comic in cls.get_next_comic(last_comic):
                cls.log("got %s" % str(comic))
                if 'day' in comic:
                    assert all(isinstance(comic.get(k), int) for k in ['day', 'month', 'year'])
                else:
                    assert all(k not in comic for k in ['day', 'month', 'year'])
                    day = date.today()
                    comic['day'], comic['month'], comic['year'] = \
                        day.day, day.month, day.year
                prefix = comic.get('prefix', '')
                comic['local_img'] = [cls.get_file_in_output_dir(i, prefix)
                                      for i in comic['img']]
                comic['comic'] = cls.long_name
                comic['new'] = None  # "'new' in comic" to check if new
                new_comics.append(comic)
                cls.print_comic(comic)
        finally:
            end = time.time()
            if new_comics:
                print()
                cls.save_db(comics + new_comics)
                print(cls.name, ": added", len(new_comics),
                      "comics in", end - start, "seconds")
            else:
                print(cls.name, ": nothing new")
        cls.log("done")

    @classmethod
    def try_to_get_missing_resources(cls):
        """Download images that might not have been downloaded properly in
        the first place."""
        cls.log("start")
        print(cls.name, ': about to try to get missing resources')
        cls.create_output_dir()
        comics = cls.load_db()
        change = False
        for comic in comics:
            local = comic['local_img']
            prefix = comic.get('prefix', '')
            for i, (path, url) in enumerate(zip(local, comic['img'])):
                if path is None:
                    new_path = cls.get_file_in_output_dir(url, prefix)
                    if new_path is None:
                        print(cls.name, ': failed to get', url)
                    else:
                        print(cls.name, ': got', url, 'at', new_path)
                        local[i] = new_path
                        change = True
                        comic['new'] = None
        if change:
            cls.save_db(comics)
            print(cls.name, ": some missing resources have been downloaded")
        cls.log("done")

    @classmethod
    def reset_new(cls):
        """Remove the 'new' flag on comics in the DB."""
        cls.log("start")
        cls.create_output_dir()
        cls.save_db([{key: val for key, val in c.items() if key != 'new'} for c in cls.load_db()])
        cls.log("done")

    @classmethod
    def info(cls):
        """Print information about the comics."""
        cls.log("start")
        print("%s (%s) : " % (cls.long_name, cls.url))
        cls.create_output_dir()
        comics = cls.load_db()
        dates = [get_date_for_comic(c) for c in comics]
        print("%d comics (%d new)" % (len(comics), sum(1 for c in comics if 'new' in c)))
        print("%d images" % sum(len(c['img']) for c in comics))
        if dates:
            date_min, date_max = min(dates), max(dates)
            print("from %s to %s (%d days)" % (date_min, date_max, (date_max - date_min).days))
        print()
        cls.log("done")

    @classmethod
    def readme(cls):
        """Return information to generate README."""
        return ' * [%s](%s)\n' % (cls.long_name, cls.url)

    @classmethod
    def gitignore(cls):
        """Return information to generate gitignore."""
        return '%s\n' % (cls.name)


1			#! /usr/bin/python3
2			# vim: set expandtab tabstop=4 shiftwidth=4 :
3			"""Module to define logic common to all comics."""
4
5			import json
6			import time
7			import os
8			from datetime import date
9			from urlfunctions import get_filename_from_url, get_file_at_url
10			import inspect
11			import logging
12
13
14			def get_date_for_comic(comic):
15			"""Return date object for a given comic."""
16			return date(comic['year'], comic['month'], comic['day'])
17
18
19			def get_info_before_comic(comic):
20			"""Generates the info to be put before the images."""
21			author = comic.get('author')
22			if author:
23			yield 'by ' + author
24
25
26			def get_info_after_comic(comic):
27			"""Generates the info to be put after the images."""
28			for name in ['alt', 'title', 'title2', 'texts', 'name', 'description']:
29			info = comic.get(name)
30			if info:
31			yield info
32
33
34			class GenericComic(object):
35			"""Generic class to handle the logic common to all comics
36
37			Attributes :
38			name Name of the comic (for logging, CLI and default output dir)
39			long_name Long name of the comic (to be added in the comic info)
40			url Base url for the comic (without trailing slash)."""
41			name = None
42			long_name = None
43			url = None
44
45			@classmethod
46			def log(cls, string):
47			"""Dirty logging function."""
48			# TODO: https://docs.python.org/2/library/logging.html#logrecord-attributes
49			# we do not need to retrieve the function name manually
50			logging.debug(inspect.stack()[1][3] + " " + cls.name + " " + string)
51
52			@classmethod
53			def get_output_dir(cls):
54			"""Returns the name of the output directory (for comics and JSON file).
55			To be overridden if needed."""
56			return cls.name
57
58			@classmethod
59			def create_output_dir(cls):
60			"""Create output directory for the comic on the file system."""
61			cls.log("start")
62			os.makedirs(cls.get_output_dir(), exist_ok=True)
63			cls.log("done")
64
65			@classmethod
66			def get_json_file_path(cls):
67			"""Get the full path to the JSON file."""
68			return os.path.join(cls.get_output_dir(), cls.name + '.json')
69
70			@classmethod
71			def load_db(cls):
72			"""Load the JSON file to return a list of comics."""
73			cls.log("start")
74			try:
75			with open(cls.get_json_file_path()) as file:
76			return json.load(file)
77			except IOError:
78			return []
79
80			@classmethod
81			def save_db(cls, data):
82			"""Save the list of comics in the JSON file."""
83			cls.log("start")
84			with open(cls.get_json_file_path(), 'w+') as file:
85			json.dump(data, file, indent=4, sort_keys=True)
86			cls.log("done")
87
88			@classmethod
89			def get_file_in_output_dir(cls, url, prefix=None):
90			"""Download file from URL and save it in output folder."""
91			cls.log("start (url:%s)" % url)
92			filename = os.path.join(
93			cls.get_output_dir(),
94			('' if prefix is None else prefix) +
95			get_filename_from_url(url))
96			return get_file_at_url(url, filename)
97
98			@classmethod
99			def check_everything_is_ok(cls):
100			"""Perform tests on the database to check that everything is ok."""
101			cls.log("start")
102			print(cls.name, ': about to check')
103			comics = cls.load_db()
104			imgs_paths = {}
105			imgs_urls = {}
106			prev_date, prev_num = None, None
107			today = date.today()
108			for i, comic in enumerate(comics):
109			cls.print_comic(comic)
110			url = comic.get('url')
111			assert isinstance(url, str), "Url %s not a string" % url
112			assert comic.get('comic') == cls.long_name
113			assert all(isinstance(comic.get(k), int)
114			for k in ['day', 'month', 'year']), \
115			"Invalid date data (%s)" % url
116			curr_date = get_date_for_comic(comic)
117			assert curr_date <= today
118			curr_num = comic.get('num', 0)
119			assert isinstance(curr_num, int)
120			assert prev_date is None or prev_date <= curr_date or \
121			prev_num < curr_num, \
122			"Comics are not in order (%s)" % url
123			prev_date, prev_num = curr_date, curr_num
124			img = comic.get('img')
125			local_img = comic.get('local_img')
126			assert isinstance(img, list)
127			assert isinstance(local_img, list)
128			assert len(local_img) == len(img)
129			for path in local_img:
130			if path is not None:
131			assert os.path.isfile(path)
132			imgs_paths.setdefault(path, set()).add(i)
133			for img_url in img:
134			imgs_urls.setdefault(img_url, set()).add(i)
135			print()
136			if False: # To check if imgs are not overriding themselves
137			for path, nums in imgs_paths.items():
138			if len(nums) > 1:
139			print("Image used multiple times", path, nums)
140			for img_url, nums in imgs_urls.items():
141			if len(nums) > 1:
142			print("Url used multiple times", img_url, nums)
143			if False: # To check that all files in folder are useful
144			json = cls.get_json_file_path()
145			output_dir = cls.get_output_dir()
146			for file_ in os.listdir(output_dir):
147			file_path = os.path.join(output_dir, file_)
148			if file_path not in imgs_paths and file_path != json:
149			print("Unused image", file_path)
150			cls.log("done")
151
152			@classmethod
153			def get_next_comic(cls, _):
154			"""Generator to get the next comic.
155
156			First argument is the last properly downloaded comic which gives
157			a starting point to download more.
158
159			This is the method called by update(). It should yield comics which
160			are basically dictionnaries with the following property :
161			- 'url' is linked to a string
162			- 'img' is linked to a list of url (that will get downloaded)
163			- 'day'/'month'/'year' are self explicit. They are linked to
164			integers corresponding to the comic dates. There should be
165			all of them or none of them
166			- more fields can be provided."""
167			raise NotImplementedError
168
169			@classmethod
170			def print_text(cls, text):
171			"""Print text by returning to the beginning of the line every time."""
172			print(cls.name, ':', text, ' ' * 10, '\r', end='')
173
174			@classmethod
175			def print_comic(cls, comic):
176			"""Print information about a comic."""
177			cls.print_text(comic['url'])
178
179			@classmethod
180			def update(cls):
181			"""Update the database : get the latest comics and save in the DB.
182
183			This is a wrapper around get_next_comic() providing the following
184			generic features :
185			- logging
186			- database handling (open and save)
187			- exception handling (properly retrieved data are always saved)
188			- file download
189			- data management (adds current date if no date is provided)."""
190			cls.log("start")
191			print(cls.name, ': about to update')
192			cls.create_output_dir()
193			comics = cls.load_db()
194			new_comics = []
195			start = time.time()
196			try:
197			last_comic = comics[-1] if comics else None
198			cls.log("last comic is %s" % ('None' if last_comic is None else last_comic['url']))
199			for comic in cls.get_next_comic(last_comic):
200			cls.log("got %s" % str(comic))
201			if 'day' in comic:
202			assert all(isinstance(comic.get(k), int) for k in ['day', 'month', 'year'])
203			else:
204			assert all(k not in comic for k in ['day', 'month', 'year'])
205			day = date.today()
206			comic['day'], comic['month'], comic['year'] = \
207			day.day, day.month, day.year
208			prefix = comic.get('prefix', '')
209			comic['local_img'] = [cls.get_file_in_output_dir(i, prefix)
210			for i in comic['img']]
211			comic['comic'] = cls.long_name
212			comic['new'] = None # "'new' in comic" to check if new
213			new_comics.append(comic)
214			cls.print_comic(comic)
215			finally:
216			end = time.time()
217			if new_comics:
218			print()
219			cls.save_db(comics + new_comics)
220			print(cls.name, ": added", len(new_comics),
221			"comics in", end - start, "seconds")
222			else:
223			print(cls.name, ": nothing new")
224			cls.log("done")
225
226			@classmethod
227			def try_to_get_missing_resources(cls):
228			"""Download images that might not have been downloaded properly in
229			the first place."""
230			cls.log("start")
231			print(cls.name, ': about to try to get missing resources')
232			cls.create_output_dir()
233			comics = cls.load_db()
234			change = False
235			for comic in comics:
236			local = comic['local_img']
237			prefix = comic.get('prefix', '')
238			for i, (path, url) in enumerate(zip(local, comic['img'])):
239			if path is None:
240			new_path = cls.get_file_in_output_dir(url, prefix)
241			if new_path is None:
242			print(cls.name, ': failed to get', url)
243			else:
244			print(cls.name, ': got', url, 'at', new_path)
245			local[i] = new_path
246			change = True
247			comic['new'] = None
248			if change:
249			cls.save_db(comics)
250			print(cls.name, ": some missing resources have been downloaded")
251			cls.log("done")
252
253			@classmethod
254			def reset_new(cls):
255			"""Remove the 'new' flag on comics in the DB."""
256			cls.log("start")
257			cls.create_output_dir()
258			cls.save_db([{key: val for key, val in c.items() if key != 'new'} for c in cls.load_db()])
259			cls.log("done")
260
261			@classmethod
262			def info(cls):
263			"""Print information about the comics."""
264			cls.log("start")
265			print("%s (%s) : " % (cls.long_name, cls.url))
266			cls.create_output_dir()
267			comics = cls.load_db()
268			dates = [get_date_for_comic(c) for c in comics]
269			print("%d comics (%d new)" % (len(comics), sum(1 for c in comics if 'new' in c)))
270			print("%d images" % sum(len(c['img']) for c in comics))
271			if dates:
272			date_min, date_max = min(dates), max(dates)
273			print("from %s to %s (%d days)" % (date_min, date_max, (date_max - date_min).days))
274			print()
275			cls.log("done")
276
277			@classmethod
278			def readme(cls):
279			"""Return information to generate README."""
280			return ' * [%s](%s)\n' % (cls.long_name, cls.url)
281
282			@classmethod
283			def gitignore(cls):
284			"""Return information to generate gitignore."""
285			return '%s\n' % (cls.name)
286

SylvainDe / ComicBookMaker

Push — master ( 6f7896...0de88c )

GenericComic F

Complexity

Size/Duplication

Importance

17 Methods

How to fix Complexity

Complex Class

Duplication Side-by-Side

Filter issues like