GenericComic.get_categories() - Code Metrics - Inspection of "Add categories to group/select comics" - SylvainDe/ComicBookMaker - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( c3d761...46e76e )

by De

created 2016-09-01 17:29 UTC

GenericComic.get_categories() A

↳ Parent: GenericComic

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Importance

Changes	1
Bugs	0	Features	0

Metric	Value
cc	3
c	1
b	0
f	0
dl	0
loc	10
rs	9.4285

#! /usr/bin/python3
# vim: set expandtab tabstop=4 shiftwidth=4 :
"""Module to define logic common to all comics."""

import json
import time
import os
from datetime import date
from urlfunctions import get_filename_from_url, get_file_at_url
import inspect
import logging


def get_date_for_comic(comic):
    """Return date object for a given comic."""
    return date(comic['year'], comic['month'], comic['day'])


def get_info_before_comic(comic):
    """Generates the info to be put before the images."""
    author = comic.get('author')
    if author:
        yield 'by ' + author


def get_info_after_comic(comic):
    """Generates the info to be put after the images."""
    for name in ['alt', 'title', 'title2', 'texts', 'name', 'description']:
        info = comic.get(name)
        if info:
            yield info


class GenericComic(object):
    """Generic class to handle the logic common to all comics

    Attributes :
        name        Name of the comic (for logging, CLI and default output dir)
        long_name   Long name of the comic (to be added in the comic info)
        url         Base url for the comic (without trailing slash)."""
    name = None
    long_name = None
    url = None
    _categories = ('ALL', )

    @classmethod
    def log(cls, string):
        """Dirty logging function."""
        # TODO: https://docs.python.org/2/library/logging.html#logrecord-attributes
        # we do not need to retrieve the function name manually
        logging.debug(inspect.stack()[1][3] + " " + cls.name + " " + string)

    @classmethod
    def get_output_dir(cls):
        """Returns the name of the output directory (for comics and JSON file).
        To be overridden if needed."""
        return cls.name

    @classmethod
    def create_output_dir(cls):
        """Create output directory for the comic on the file system."""
        cls.log("start")
        os.makedirs(cls.get_output_dir(), exist_ok=True)
        cls.log("done")

    @classmethod
    def get_json_file_path(cls):
        """Get the full path to the JSON file."""
        return os.path.join(cls.get_output_dir(), cls.name + '.json')

    @classmethod
    def load_db(cls):
        """Load the JSON file to return a list of comics."""
        cls.log("start")
        try:
            with open(cls.get_json_file_path()) as file:
                return json.load(file)
        except IOError:
            return []

    @classmethod
    def save_db(cls, data):
        """Save the list of comics in the JSON file."""
        cls.log("start")
        with open(cls.get_json_file_path(), 'w+') as file:
            json.dump(data, file, indent=4, sort_keys=True)
        cls.log("done")

    @classmethod
    def get_file_in_output_dir(cls, url, prefix=None):
        """Download file from URL and save it in output folder."""
        cls.log("start (url:%s)" % url)
        filename = os.path.join(
            cls.get_output_dir(),
            ('' if prefix is None else prefix) +
            get_filename_from_url(url))
        return get_file_at_url(url, filename)

    @classmethod
    def check_everything_is_ok(cls):
        """Perform tests on the database to check that everything is ok."""
        cls.log("start")
        print(cls.name, ': about to check')
        comics = cls.load_db()
        imgs_paths = {}
        imgs_urls = {}
        prev_date, prev_num = None, None
        today = date.today()
        for i, comic in enumerate(comics):
            cls.print_comic(comic)
            url = comic.get('url')
            assert isinstance(url, str), "Url %s not a string" % url
            assert comic.get('comic') == cls.long_name
            assert all(isinstance(comic.get(k), int)
                       for k in ['day', 'month', 'year']), \
                "Invalid date data (%s)" % url
            curr_date = get_date_for_comic(comic)
            assert curr_date <= today
            curr_num = comic.get('num', 0)
            assert isinstance(curr_num, int)
            assert prev_date is None or prev_date <= curr_date or \
                prev_num < curr_num, \
                "Comics are not in order (%s)" % url
            prev_date, prev_num = curr_date, curr_num
            img = comic.get('img')
            local_img = comic.get('local_img')
            assert isinstance(img, list)
            assert isinstance(local_img, list)
            assert len(local_img) == len(img)
            for path in local_img:
                if path is not None:
                    assert os.path.isfile(path)
                    imgs_paths.setdefault(path, set()).add(i)
            for img_url in img:
                imgs_urls.setdefault(img_url, set()).add(i)
        print()
        if False:  # To check if imgs are not overriding themselves
            for path, nums in imgs_paths.items():
                if len(nums) > 1:
                    print("Image used multiple times", path, nums)
            for img_url, nums in imgs_urls.items():
                if len(nums) > 1:
                    print("Url used multiple times", img_url, nums)
        if False:  # To check that all files in folder are useful
            json = cls.get_json_file_path()
            output_dir = cls.get_output_dir()
            for file_ in os.listdir(output_dir):
                file_path = os.path.join(output_dir, file_)
                if file_path not in imgs_paths and file_path != json:
                    print("Unused image", file_path)
        cls.log("done")

    @classmethod
    def get_next_comic(cls, _):
        """Generator to get the next comic.

        First argument is the last properly downloaded comic which gives
        a starting point to download more.

        This is the method called by update(). It should yield comics which
        are basically dictionnaries with the following property :
            - 'url' is linked to a string
            - 'img' is linked to a list of url (that will get downloaded)
            - 'day'/'month'/'year' are self explicit. They are linked to
                integers corresponding to the comic dates. There should be
                all of them or none of them
            - more fields can be provided."""
        raise NotImplementedError

    @classmethod
    def print_text(cls, text):
        """Print text by returning to the beginning of the line every time."""
        print(cls.name, ':', text, ' ' * 10, '\r', end='')

    @classmethod
    def print_comic(cls, comic):
        """Print information about a comic."""
        cls.print_text(comic['url'])

    @classmethod
    def update(cls):
        """Update the database : get the latest comics and save in the DB.

        This is a wrapper around get_next_comic() providing the following
        generic features :
            - logging
            - database handling (open and save)
            - exception handling (properly retrieved data are always saved)
            - file download
            - data management (adds current date if no date is provided)."""
        cls.log("start")
        print(cls.name, ': about to update')
        cls.create_output_dir()
        comics = cls.load_db()
        new_comics = []
        start = time.time()
        try:
            last_comic = comics[-1] if comics else None
            cls.log("last comic is %s" % ('None' if last_comic is None else last_comic['url']))
            for comic in cls.get_next_comic(last_comic):
                cls.log("got %s" % str(comic))
                if 'day' in comic:
                    assert all(isinstance(comic.get(k), int) for k in ['day', 'month', 'year'])
                else:
                    assert all(k not in comic for k in ['day', 'month', 'year'])
                    day = date.today()
                    comic['day'], comic['month'], comic['year'] = \
                        day.day, day.month, day.year
                prefix = comic.get('prefix', '')
                comic['local_img'] = [cls.get_file_in_output_dir(i, prefix)
                                      for i in comic['img']]
                comic['comic'] = cls.long_name
                comic['new'] = None  # "'new' in comic" to check if new
                new_comics.append(comic)
                cls.print_comic(comic)
        finally:
            end = time.time()
            if new_comics:
                print()
                cls.save_db(comics + new_comics)
                print(cls.name, ": added", len(new_comics),
                      "comics in", end - start, "seconds")
            else:
                print(cls.name, ": nothing new")
        cls.log("done")

    @classmethod
    def try_to_get_missing_resources(cls):
        """Download images that might not have been downloaded properly in
        the first place."""
        cls.log("start")
        print(cls.name, ': about to try to get missing resources')
        cls.create_output_dir()
        comics = cls.load_db()
        change = False
        for comic in comics:
            local = comic['local_img']
            prefix = comic.get('prefix', '')
            for i, (path, url) in enumerate(zip(local, comic['img'])):
                if path is None:
                    new_path = cls.get_file_in_output_dir(url, prefix)
                    if new_path is None:
                        print(cls.name, ': failed to get', url)
                    else:
                        print(cls.name, ': got', url, 'at', new_path)
                        local[i] = new_path
                        change = True
                        comic['new'] = None
        if change:
            cls.save_db(comics)
            print(cls.name, ": some missing resources have been downloaded")
        cls.log("done")

    @classmethod
    def reset_new(cls):
        """Remove the 'new' flag on comics in the DB."""
        cls.log("start")
        cls.create_output_dir()
        cls.save_db([{key: val for key, val in c.items() if key != 'new'} for c in cls.load_db()])
        cls.log("done")

    @classmethod
    def info(cls):
        """Print information about the comics."""
        cls.log("start")
        print("%s (%s) : " % (cls.long_name, cls.url))
        cls.create_output_dir()
        comics = cls.load_db()
        dates = [get_date_for_comic(c) for c in comics]
        print("%d comics (%d new)" % (len(comics), sum(1 for c in comics if 'new' in c)))
        print("%d images" % sum(len(c['img']) for c in comics))
        if dates:
            date_min, date_max = min(dates), max(dates)
            print("from %s to %s (%d days)" % (date_min, date_max, (date_max - date_min).days))
        print()
        cls.log("done")

    @classmethod
    def readme(cls):
        """Return information to generate README."""
        return ' * [%s](%s)\n' % (cls.long_name, cls.url)

    @classmethod
    def gitignore(cls):
        """Return information to generate gitignore."""
        return '%s\n' % (cls.name)

    @classmethod
    def get_categories(cls):
        """Return categories to be able to group comics.

        Categories are such that all classes have their ancestrors'
        categories and their own (provided as an iterable in the
        `_categories` class member)."""
        return sorted(set(cat
                          for klass in cls.__mro__
                          for cat in getattr(klass, '_categories', [])))


1			#! /usr/bin/python3
2			# vim: set expandtab tabstop=4 shiftwidth=4 :
3			"""Module to define logic common to all comics."""
4
5			import json
6			import time
7			import os
8			from datetime import date
9			from urlfunctions import get_filename_from_url, get_file_at_url
10			import inspect
11			import logging
12
13
14			def get_date_for_comic(comic):
15			"""Return date object for a given comic."""
16			return date(comic['year'], comic['month'], comic['day'])
17
18
19			def get_info_before_comic(comic):
20			"""Generates the info to be put before the images."""
21			author = comic.get('author')
22			if author:
23			yield 'by ' + author
24
25
26			def get_info_after_comic(comic):
27			"""Generates the info to be put after the images."""
28			for name in ['alt', 'title', 'title2', 'texts', 'name', 'description']:
29			info = comic.get(name)
30			if info:
31			yield info
32
33
34			class GenericComic(object):
35			"""Generic class to handle the logic common to all comics
36
37			Attributes :
38			name Name of the comic (for logging, CLI and default output dir)
39			long_name Long name of the comic (to be added in the comic info)
40			url Base url for the comic (without trailing slash)."""
41			name = None
42			long_name = None
43			url = None
44			_categories = ('ALL', )
45
46			@classmethod
47			def log(cls, string):
48			"""Dirty logging function."""
49			# TODO: https://docs.python.org/2/library/logging.html#logrecord-attributes
50			# we do not need to retrieve the function name manually
51			logging.debug(inspect.stack()[1][3] + " " + cls.name + " " + string)
52
53			@classmethod
54			def get_output_dir(cls):
55			"""Returns the name of the output directory (for comics and JSON file).
56			To be overridden if needed."""
57			return cls.name
58
59			@classmethod
60			def create_output_dir(cls):
61			"""Create output directory for the comic on the file system."""
62			cls.log("start")
63			os.makedirs(cls.get_output_dir(), exist_ok=True)
64			cls.log("done")
65
66			@classmethod
67			def get_json_file_path(cls):
68			"""Get the full path to the JSON file."""
69			return os.path.join(cls.get_output_dir(), cls.name + '.json')
70
71			@classmethod
72			def load_db(cls):
73			"""Load the JSON file to return a list of comics."""
74			cls.log("start")
75			try:
76			with open(cls.get_json_file_path()) as file:
77			return json.load(file)
78			except IOError:
79			return []
80
81			@classmethod
82			def save_db(cls, data):
83			"""Save the list of comics in the JSON file."""
84			cls.log("start")
85			with open(cls.get_json_file_path(), 'w+') as file:
86			json.dump(data, file, indent=4, sort_keys=True)
87			cls.log("done")
88
89			@classmethod
90			def get_file_in_output_dir(cls, url, prefix=None):
91			"""Download file from URL and save it in output folder."""
92			cls.log("start (url:%s)" % url)
93			filename = os.path.join(
94			cls.get_output_dir(),
95			('' if prefix is None else prefix) +
96			get_filename_from_url(url))
97			return get_file_at_url(url, filename)
98
99			@classmethod
100			def check_everything_is_ok(cls):
101			"""Perform tests on the database to check that everything is ok."""
102			cls.log("start")
103			print(cls.name, ': about to check')
104			comics = cls.load_db()
105			imgs_paths = {}
106			imgs_urls = {}
107			prev_date, prev_num = None, None
108			today = date.today()
109			for i, comic in enumerate(comics):
110			cls.print_comic(comic)
111			url = comic.get('url')
112			assert isinstance(url, str), "Url %s not a string" % url
113			assert comic.get('comic') == cls.long_name
114			assert all(isinstance(comic.get(k), int)
115			for k in ['day', 'month', 'year']), \
116			"Invalid date data (%s)" % url
117			curr_date = get_date_for_comic(comic)
118			assert curr_date <= today
119			curr_num = comic.get('num', 0)
120			assert isinstance(curr_num, int)
121			assert prev_date is None or prev_date <= curr_date or \
122			prev_num < curr_num, \
123			"Comics are not in order (%s)" % url
124			prev_date, prev_num = curr_date, curr_num
125			img = comic.get('img')
126			local_img = comic.get('local_img')
127			assert isinstance(img, list)
128			assert isinstance(local_img, list)
129			assert len(local_img) == len(img)
130			for path in local_img:
131			if path is not None:
132			assert os.path.isfile(path)
133			imgs_paths.setdefault(path, set()).add(i)
134			for img_url in img:
135			imgs_urls.setdefault(img_url, set()).add(i)
136			print()
137			if False: # To check if imgs are not overriding themselves
138			for path, nums in imgs_paths.items():
139			if len(nums) > 1:
140			print("Image used multiple times", path, nums)
141			for img_url, nums in imgs_urls.items():
142			if len(nums) > 1:
143			print("Url used multiple times", img_url, nums)
144			if False: # To check that all files in folder are useful
145			json = cls.get_json_file_path()
146			output_dir = cls.get_output_dir()
147			for file_ in os.listdir(output_dir):
148			file_path = os.path.join(output_dir, file_)
149			if file_path not in imgs_paths and file_path != json:
150			print("Unused image", file_path)
151			cls.log("done")
152
153			@classmethod
154			def get_next_comic(cls, _):
155			"""Generator to get the next comic.
156
157			First argument is the last properly downloaded comic which gives
158			a starting point to download more.
159
160			This is the method called by update(). It should yield comics which
161			are basically dictionnaries with the following property :
162			- 'url' is linked to a string
163			- 'img' is linked to a list of url (that will get downloaded)
164			- 'day'/'month'/'year' are self explicit. They are linked to
165			integers corresponding to the comic dates. There should be
166			all of them or none of them
167			- more fields can be provided."""
168			raise NotImplementedError
169
170			@classmethod
171			def print_text(cls, text):
172			"""Print text by returning to the beginning of the line every time."""
173			print(cls.name, ':', text, ' ' * 10, '\r', end='')
174
175			@classmethod
176			def print_comic(cls, comic):
177			"""Print information about a comic."""
178			cls.print_text(comic['url'])
179
180			@classmethod
181			def update(cls):
182			"""Update the database : get the latest comics and save in the DB.
183
184			This is a wrapper around get_next_comic() providing the following
185			generic features :
186			- logging
187			- database handling (open and save)
188			- exception handling (properly retrieved data are always saved)
189			- file download
190			- data management (adds current date if no date is provided)."""
191			cls.log("start")
192			print(cls.name, ': about to update')
193			cls.create_output_dir()
194			comics = cls.load_db()
195			new_comics = []
196			start = time.time()
197			try:
198			last_comic = comics[-1] if comics else None
199			cls.log("last comic is %s" % ('None' if last_comic is None else last_comic['url']))
200			for comic in cls.get_next_comic(last_comic):
201			cls.log("got %s" % str(comic))
202			if 'day' in comic:
203			assert all(isinstance(comic.get(k), int) for k in ['day', 'month', 'year'])
204			else:
205			assert all(k not in comic for k in ['day', 'month', 'year'])
206			day = date.today()
207			comic['day'], comic['month'], comic['year'] = \
208			day.day, day.month, day.year
209			prefix = comic.get('prefix', '')
210			comic['local_img'] = [cls.get_file_in_output_dir(i, prefix)
211			for i in comic['img']]
212			comic['comic'] = cls.long_name
213			comic['new'] = None # "'new' in comic" to check if new
214			new_comics.append(comic)
215			cls.print_comic(comic)
216			finally:
217			end = time.time()
218			if new_comics:
219			print()
220			cls.save_db(comics + new_comics)
221			print(cls.name, ": added", len(new_comics),
222			"comics in", end - start, "seconds")
223			else:
224			print(cls.name, ": nothing new")
225			cls.log("done")
226
227			@classmethod
228			def try_to_get_missing_resources(cls):
229			"""Download images that might not have been downloaded properly in
230			the first place."""
231			cls.log("start")
232			print(cls.name, ': about to try to get missing resources')
233			cls.create_output_dir()
234			comics = cls.load_db()
235			change = False
236			for comic in comics:
237			local = comic['local_img']
238			prefix = comic.get('prefix', '')
239			for i, (path, url) in enumerate(zip(local, comic['img'])):
240			if path is None:
241			new_path = cls.get_file_in_output_dir(url, prefix)
242			if new_path is None:
243			print(cls.name, ': failed to get', url)
244			else:
245			print(cls.name, ': got', url, 'at', new_path)
246			local[i] = new_path
247			change = True
248			comic['new'] = None
249			if change:
250			cls.save_db(comics)
251			print(cls.name, ": some missing resources have been downloaded")
252			cls.log("done")
253
254			@classmethod
255			def reset_new(cls):
256			"""Remove the 'new' flag on comics in the DB."""
257			cls.log("start")
258			cls.create_output_dir()
259			cls.save_db([{key: val for key, val in c.items() if key != 'new'} for c in cls.load_db()])
260			cls.log("done")
261
262			@classmethod
263			def info(cls):
264			"""Print information about the comics."""
265			cls.log("start")
266			print("%s (%s) : " % (cls.long_name, cls.url))
267			cls.create_output_dir()
268			comics = cls.load_db()
269			dates = [get_date_for_comic(c) for c in comics]
270			print("%d comics (%d new)" % (len(comics), sum(1 for c in comics if 'new' in c)))
271			print("%d images" % sum(len(c['img']) for c in comics))
272			if dates:
273			date_min, date_max = min(dates), max(dates)
274			print("from %s to %s (%d days)" % (date_min, date_max, (date_max - date_min).days))
275			print()
276			cls.log("done")
277
278			@classmethod
279			def readme(cls):
280			"""Return information to generate README."""
281			return ' * [%s](%s)\n' % (cls.long_name, cls.url)
282
283			@classmethod
284			def gitignore(cls):
285			"""Return information to generate gitignore."""
286			return '%s\n' % (cls.name)
287
288			@classmethod
289			def get_categories(cls):
290			"""Return categories to be able to group comics.
291
292			Categories are such that all classes have their ancestrors'
293			categories and their own (provided as an iterable in the
294			`_categories` class member)."""
295			return sorted(set(cat
296			for klass in cls.__mro__
297			for cat in getattr(klass, '_categories', [])))
298

SylvainDe / ComicBookMaker

Push — master ( c3d761...46e76e )

GenericComic.get_categories() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like