GenericComic.get_last_comic() - Code Metrics - Inspection of "Add delete_last feature #58" - SylvainDe/ComicBookMaker - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Push — master ( e2c120...c7b81c )

by De

created 2017-02-15 19:39 UTC

GenericComic.get_last_comic() A

↳ Parent: GenericComic

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	3
dl	0
loc	4
rs	10
c	0
b	0
f	0

#! /usr/bin/python3
# vim: set expandtab tabstop=4 shiftwidth=4 :
"""Module to define logic common to all comics."""

import json
import time
import os
from datetime import date
from urlfunctions import get_filename_from_url, get_file_at_url
import inspect
import logging


def get_date_for_comic(comic):
    """Return date object for a given comic."""
    return date(comic['year'], comic['month'], comic['day'])


def get_info_before_comic(comic):
    """Generates the info to be put before the images."""
    author = comic.get('author')
    if author:
        yield 'by ' + author


def get_info_after_comic(comic):
    """Generates the info to be put after the images."""
    for name in ['alt', 'title', 'title2', 'texts', 'name', 'description']:
        info = comic.get(name)
        if info:
            yield info


class GenericComic(object):
    """Generic class to handle the logic common to all comics

    Attributes :
        name        Name of the comic (for logging, CLI and default output dir)
        long_name   Long name of the comic (to be added in the comic info)
        url         Base url for the comic (without trailing slash)."""
    name = None
    long_name = None
    url = None
    _categories = ('ALL', )

    @classmethod
    def log(cls, string):
        """Dirty logging function."""
        # TODO: https://docs.python.org/2/library/logging.html#logrecord-attributes
        # we do not need to retrieve the function name manually
        logging.debug(inspect.stack()[1][3] + " " + cls.name + " " + string)

    @classmethod
    def _get_output_dir(cls):
        """Returns the name of the output directory (for comics and JSON file).
        To be overridden if needed."""
        return cls.name

    @classmethod
    def _create_output_dir(cls):
        """Create output directory for the comic on the file system."""
        cls.log("start")
        os.makedirs(cls._get_output_dir(), exist_ok=True)
        cls.log("done")

    @classmethod
    def _get_json_file_path(cls):
        """Get the full path to the JSON file."""
        return os.path.join(cls._get_output_dir(), cls.name + '.json')

    @classmethod
    def _load_db(cls):
        """Load the JSON file to return the list of comics."""
        cls.log("start")
        try:
            with open(cls._get_json_file_path()) as file:
                return json.load(file)
        except IOError:
            return []

    @classmethod
    def get_comics(cls):
        """Return the list of comics."""
        return [c for c in cls._load_db() if 'deleted' not in c]

    @classmethod
    def get_last_comic(cls, comics):
        """Return the last (non-deleted) comic."""
        return next((c for c in reversed(comics) if 'deleted' not in c), None)

    @classmethod
    def _save_db(cls, data):
        """Save the list of comics in the JSON file."""
        cls.log("start")
        with open(cls._get_json_file_path(), 'w+') as file:
            json.dump(data, file, indent=4, sort_keys=True)
        cls.log("done")

    @classmethod
    def get_file_in_output_dir(cls, url, prefix=None):
        """Download file from URL and save it in output folder."""
        cls.log("start (url:%s)" % url)
        filename = os.path.join(
            cls._get_output_dir(),
            ('' if prefix is None else prefix) +
            get_filename_from_url(url))
        return get_file_at_url(url, filename)

    @classmethod
    def check_everything_is_ok(cls):
        """Perform tests on the database to check that everything is ok."""
        cls.log("start")
        print(cls.name, ': about to check')
        comics = cls.get_comics()  # cls._load_db()
        imgs_paths = {}
        imgs_urls = {}
        prev_date, prev_num = None, None
        today = date.today()
        for i, comic in enumerate(comics):
            cls.print_comic(comic)
            url = comic.get('url')
            assert isinstance(url, str), "Url %s not a string" % url
            assert comic.get('comic') == cls.long_name
            assert all(isinstance(comic.get(k), int)
                       for k in ['day', 'month', 'year']), \
                "Invalid date data (%s)" % url
            curr_date = get_date_for_comic(comic)
            assert curr_date <= today
            curr_num = comic.get('num', 0)
            assert isinstance(curr_num, int)
            assert prev_date is None or prev_date <= curr_date or \
                prev_num < curr_num, \
                "Comics are not in order (%s)" % url
            prev_date, prev_num = curr_date, curr_num
            img = comic.get('img')
            local_img = comic.get('local_img')
            assert isinstance(img, list)
            assert isinstance(local_img, list)
            assert len(local_img) == len(img)
            for path in local_img:
                if path is not None:
                    assert os.path.isfile(path)
                    imgs_paths.setdefault(path, set()).add(i)
            for img_url in img:
                imgs_urls.setdefault(img_url, set()).add(i)
        print()
        if False:  # To check if imgs are not overriding themselves
            for path, nums in imgs_paths.items():
                if len(nums) > 1:
                    print("Image used multiple times", path, nums)
            for img_url, nums in imgs_urls.items():
                if len(nums) > 1:
                    print("Url used multiple times", img_url, nums)
        if False:  # To check that all files in folder are useful
            json = cls._get_json_file_path()
            output_dir = cls._get_output_dir()
            for file_ in os.listdir(output_dir):
                file_path = os.path.join(output_dir, file_)
                if file_path not in imgs_paths and file_path != json:
                    print("Unused image", file_path)
        cls.log("done")

    @classmethod
    def get_next_comic(cls, _):
        """Generator to get the next comic.

        First argument is the last properly downloaded comic which gives
        a starting point to download more.

        This is the method called by update(). It should yield comics which
        are basically dictionnaries with the following property :
            - 'url' is linked to a string
            - 'img' is linked to a list of url (that will get downloaded)
            - 'day'/'month'/'year' are self explicit. They are linked to
                integers corresponding to the comic dates. There should be
                all of them or none of them
            - more fields can be provided."""
        raise NotImplementedError

    @classmethod
    def print_text(cls, text):
        """Print text by returning to the beginning of the line every time."""
        print(cls.name, ':', text, ' ' * 10, '\r', end='')

    @classmethod
    def print_comic(cls, comic):
        """Print information about a comic."""
        cls.print_text(comic['url'])

    @classmethod
    def update(cls):
        """Update the database : get the latest comics and save in the DB.

        This is a wrapper around get_next_comic() providing the following
        generic features :
            - logging
            - database handling (open and save)
            - exception handling (properly retrieved data are always saved)
            - file download
            - data management (adds current date if no date is provided)."""
        cls.log("start")
        print(cls.name, ': about to update')
        cls._create_output_dir()
        comics = cls._load_db()
        new_comics = []
        start = time.time()
        try:
            last_comic = cls.get_last_comic(comics)
            cls.log("last comic is %s" % ('None' if last_comic is None else last_comic['url']))
            for comic in cls.get_next_comic(last_comic):
                cls.log("got %s" % str(comic))
                if 'day' in comic:
                    assert all(isinstance(comic.get(k), int) for k in ['day', 'month', 'year'])
                else:
                    assert all(k not in comic for k in ['day', 'month', 'year'])
                    day = date.today()
                    comic['day'], comic['month'], comic['year'] = \
                        day.day, day.month, day.year
                prefix = comic.get('prefix', '')
                comic['local_img'] = [cls.get_file_in_output_dir(i, prefix)
                                      for i in comic['img']]
                comic['comic'] = cls.long_name
                comic['new'] = None  # "'new' in comic" to check if new
                new_comics.append(comic)
                cls.print_comic(comic)
        finally:
            end = time.time()
            if new_comics:
                print()
                cls._save_db(comics + new_comics)
                print(cls.name, ": added", len(new_comics),
                      "comics in", end - start, "seconds")
            else:
                print(cls.name, ": nothing new")
        cls.log("done")

    @classmethod
    def try_to_get_missing_resources(cls):
        """Download images that might not have been downloaded properly in
        the first place."""
        cls.log("start")
        print(cls.name, ': about to try to get missing resources')
        cls._create_output_dir()
        comics = cls._load_db()
        change = False
        for comic in comics:
            local = comic['local_img']
            prefix = comic.get('prefix', '')
            for i, (path, url) in enumerate(zip(local, comic['img'])):
                if path is None:
                    new_path = cls.get_file_in_output_dir(url, prefix)
                    if new_path is None:
                        print(cls.name, ': failed to get', url)
                    else:
                        print(cls.name, ': got', url, 'at', new_path)
                        local[i] = new_path
                        change = True
                        comic['new'] = None
        if change:
            cls._save_db(comics)
            print(cls.name, ": some missing resources have been downloaded")
        cls.log("done")

    @classmethod
    def reset_new(cls):
        """Remove the 'new' flag on comics in the DB."""
        cls.log("start")
        cls._create_output_dir()
        cls._save_db([{key: val for key, val in c.items() if key != 'new'} for c in cls._load_db()])
        cls.log("done")

    @classmethod
    def delete_last(cls):
        """Delete last (non-deleted) comic."""
        cls.log("start")
        comics = cls._load_db()
        last_comic = cls.get_last_comic(comics)
        if last_comic is None:
            cls.log("no comic to delete")
        else:
            cls.log("about to delete %s" % last_comic['url'])
            last_comic['deleted'] = None  # "'deleted' in comic" to check if deleted
            cls._save_db(comics)
        cls.log("done")

    @classmethod
    def info(cls):
        """Print information about the comics."""
        cls.log("start")
        print("%s (%s) : " % (cls.long_name, cls.url))
        cls._create_output_dir()
        comics = cls.get_comics()  # cls._load_db()
        dates = [get_date_for_comic(c) for c in comics]
        print("%d comics (%d new)" % (len(comics), sum(1 for c in comics if 'new' in c)))
        print("%d images" % sum(len(c['img']) for c in comics))
        if dates:
            date_min, date_max = min(dates), max(dates)
            print("from %s to %s (%d days)" % (date_min, date_max, (date_max - date_min).days))
        print()
        cls.log("done")

    @classmethod
    def readme(cls):
        """Return information to generate README."""
        return ' * [%s](%s)\n' % (cls.long_name, cls.url)

    @classmethod
    def gitignore(cls):
        """Return information to generate gitignore."""
        return '%s\n' % (cls.name)

    @classmethod
    def get_categories(cls):
        """Return categories to be able to group comics.

        Categories are such that all classes have their ancestrors'
        categories and their own (provided as an iterable in the
        `_categories` class member)."""
        return sorted(set(cat
                          for klass in cls.__mro__
                          for cat in getattr(klass, '_categories', [])))


1			#! /usr/bin/python3
2			# vim: set expandtab tabstop=4 shiftwidth=4 :
3			"""Module to define logic common to all comics."""
4
5			import json
6			import time
7			import os
8			from datetime import date
9			from urlfunctions import get_filename_from_url, get_file_at_url
10			import inspect
11			import logging
12
13
14			def get_date_for_comic(comic):
15			"""Return date object for a given comic."""
16			return date(comic['year'], comic['month'], comic['day'])
17
18
19			def get_info_before_comic(comic):
20			"""Generates the info to be put before the images."""
21			author = comic.get('author')
22			if author:
23			yield 'by ' + author
24
25
26			def get_info_after_comic(comic):
27			"""Generates the info to be put after the images."""
28			for name in ['alt', 'title', 'title2', 'texts', 'name', 'description']:
29			info = comic.get(name)
30			if info:
31			yield info
32
33
34			class GenericComic(object):
35			"""Generic class to handle the logic common to all comics
36
37			Attributes :
38			name Name of the comic (for logging, CLI and default output dir)
39			long_name Long name of the comic (to be added in the comic info)
40			url Base url for the comic (without trailing slash)."""
41			name = None
42			long_name = None
43			url = None
44			_categories = ('ALL', )
45
46			@classmethod
47			def log(cls, string):
48			"""Dirty logging function."""
49			# TODO: https://docs.python.org/2/library/logging.html#logrecord-attributes
50			# we do not need to retrieve the function name manually
51			logging.debug(inspect.stack()[1][3] + " " + cls.name + " " + string)
52
53			@classmethod
54			def _get_output_dir(cls):
55			"""Returns the name of the output directory (for comics and JSON file).
56			To be overridden if needed."""
57			return cls.name
58
59			@classmethod
60			def _create_output_dir(cls):
61			"""Create output directory for the comic on the file system."""
62			cls.log("start")
63			os.makedirs(cls._get_output_dir(), exist_ok=True)
64			cls.log("done")
65
66			@classmethod
67			def _get_json_file_path(cls):
68			"""Get the full path to the JSON file."""
69			return os.path.join(cls._get_output_dir(), cls.name + '.json')
70
71			@classmethod
72			def _load_db(cls):
73			"""Load the JSON file to return the list of comics."""
74			cls.log("start")
75			try:
76			with open(cls._get_json_file_path()) as file:
77			return json.load(file)
78			except IOError:
79			return []
80
81			@classmethod
82			def get_comics(cls):
83			"""Return the list of comics."""
84			return [c for c in cls._load_db() if 'deleted' not in c]
85
86			@classmethod
87			def get_last_comic(cls, comics):
88			"""Return the last (non-deleted) comic."""
89			return next((c for c in reversed(comics) if 'deleted' not in c), None)
90
91			@classmethod
92			def _save_db(cls, data):
93			"""Save the list of comics in the JSON file."""
94			cls.log("start")
95			with open(cls._get_json_file_path(), 'w+') as file:
96			json.dump(data, file, indent=4, sort_keys=True)
97			cls.log("done")
98
99			@classmethod
100			def get_file_in_output_dir(cls, url, prefix=None):
101			"""Download file from URL and save it in output folder."""
102			cls.log("start (url:%s)" % url)
103			filename = os.path.join(
104			cls._get_output_dir(),
105			('' if prefix is None else prefix) +
106			get_filename_from_url(url))
107			return get_file_at_url(url, filename)
108
109			@classmethod
110			def check_everything_is_ok(cls):
111			"""Perform tests on the database to check that everything is ok."""
112			cls.log("start")
113			print(cls.name, ': about to check')
114			comics = cls.get_comics() # cls._load_db()
115			imgs_paths = {}
116			imgs_urls = {}
117			prev_date, prev_num = None, None
118			today = date.today()
119			for i, comic in enumerate(comics):
120			cls.print_comic(comic)
121			url = comic.get('url')
122			assert isinstance(url, str), "Url %s not a string" % url
123			assert comic.get('comic') == cls.long_name
124			assert all(isinstance(comic.get(k), int)
125			for k in ['day', 'month', 'year']), \
126			"Invalid date data (%s)" % url
127			curr_date = get_date_for_comic(comic)
128			assert curr_date <= today
129			curr_num = comic.get('num', 0)
130			assert isinstance(curr_num, int)
131			assert prev_date is None or prev_date <= curr_date or \
132			prev_num < curr_num, \
133			"Comics are not in order (%s)" % url
134			prev_date, prev_num = curr_date, curr_num
135			img = comic.get('img')
136			local_img = comic.get('local_img')
137			assert isinstance(img, list)
138			assert isinstance(local_img, list)
139			assert len(local_img) == len(img)
140			for path in local_img:
141			if path is not None:
142			assert os.path.isfile(path)
143			imgs_paths.setdefault(path, set()).add(i)
144			for img_url in img:
145			imgs_urls.setdefault(img_url, set()).add(i)
146			print()
147			if False: # To check if imgs are not overriding themselves
148			for path, nums in imgs_paths.items():
149			if len(nums) > 1:
150			print("Image used multiple times", path, nums)
151			for img_url, nums in imgs_urls.items():
152			if len(nums) > 1:
153			print("Url used multiple times", img_url, nums)
154			if False: # To check that all files in folder are useful
155			json = cls._get_json_file_path()
156			output_dir = cls._get_output_dir()
157			for file_ in os.listdir(output_dir):
158			file_path = os.path.join(output_dir, file_)
159			if file_path not in imgs_paths and file_path != json:
160			print("Unused image", file_path)
161			cls.log("done")
162
163			@classmethod
164			def get_next_comic(cls, _):
165			"""Generator to get the next comic.
166
167			First argument is the last properly downloaded comic which gives
168			a starting point to download more.
169
170			This is the method called by update(). It should yield comics which
171			are basically dictionnaries with the following property :
172			- 'url' is linked to a string
173			- 'img' is linked to a list of url (that will get downloaded)
174			- 'day'/'month'/'year' are self explicit. They are linked to
175			integers corresponding to the comic dates. There should be
176			all of them or none of them
177			- more fields can be provided."""
178			raise NotImplementedError
179
180			@classmethod
181			def print_text(cls, text):
182			"""Print text by returning to the beginning of the line every time."""
183			print(cls.name, ':', text, ' ' * 10, '\r', end='')
184
185			@classmethod
186			def print_comic(cls, comic):
187			"""Print information about a comic."""
188			cls.print_text(comic['url'])
189
190			@classmethod
191			def update(cls):
192			"""Update the database : get the latest comics and save in the DB.
193
194			This is a wrapper around get_next_comic() providing the following
195			generic features :
196			- logging
197			- database handling (open and save)
198			- exception handling (properly retrieved data are always saved)
199			- file download
200			- data management (adds current date if no date is provided)."""
201			cls.log("start")
202			print(cls.name, ': about to update')
203			cls._create_output_dir()
204			comics = cls._load_db()
205			new_comics = []
206			start = time.time()
207			try:
208			last_comic = cls.get_last_comic(comics)
209			cls.log("last comic is %s" % ('None' if last_comic is None else last_comic['url']))
210			for comic in cls.get_next_comic(last_comic):
211			cls.log("got %s" % str(comic))
212			if 'day' in comic:
213			assert all(isinstance(comic.get(k), int) for k in ['day', 'month', 'year'])
214			else:
215			assert all(k not in comic for k in ['day', 'month', 'year'])
216			day = date.today()
217			comic['day'], comic['month'], comic['year'] = \
218			day.day, day.month, day.year
219			prefix = comic.get('prefix', '')
220			comic['local_img'] = [cls.get_file_in_output_dir(i, prefix)
221			for i in comic['img']]
222			comic['comic'] = cls.long_name
223			comic['new'] = None # "'new' in comic" to check if new
224			new_comics.append(comic)
225			cls.print_comic(comic)
226			finally:
227			end = time.time()
228			if new_comics:
229			print()
230			cls._save_db(comics + new_comics)
231			print(cls.name, ": added", len(new_comics),
232			"comics in", end - start, "seconds")
233			else:
234			print(cls.name, ": nothing new")
235			cls.log("done")
236
237			@classmethod
238			def try_to_get_missing_resources(cls):
239			"""Download images that might not have been downloaded properly in
240			the first place."""
241			cls.log("start")
242			print(cls.name, ': about to try to get missing resources')
243			cls._create_output_dir()
244			comics = cls._load_db()
245			change = False
246			for comic in comics:
247			local = comic['local_img']
248			prefix = comic.get('prefix', '')
249			for i, (path, url) in enumerate(zip(local, comic['img'])):
250			if path is None:
251			new_path = cls.get_file_in_output_dir(url, prefix)
252			if new_path is None:
253			print(cls.name, ': failed to get', url)
254			else:
255			print(cls.name, ': got', url, 'at', new_path)
256			local[i] = new_path
257			change = True
258			comic['new'] = None
259			if change:
260			cls._save_db(comics)
261			print(cls.name, ": some missing resources have been downloaded")
262			cls.log("done")
263
264			@classmethod
265			def reset_new(cls):
266			"""Remove the 'new' flag on comics in the DB."""
267			cls.log("start")
268			cls._create_output_dir()
269			cls._save_db([{key: val for key, val in c.items() if key != 'new'} for c in cls._load_db()])
270			cls.log("done")
271
272			@classmethod
273			def delete_last(cls):
274			"""Delete last (non-deleted) comic."""
275			cls.log("start")
276			comics = cls._load_db()
277			last_comic = cls.get_last_comic(comics)
278			if last_comic is None:
279			cls.log("no comic to delete")
280			else:
281			cls.log("about to delete %s" % last_comic['url'])
282			last_comic['deleted'] = None # "'deleted' in comic" to check if deleted
283			cls._save_db(comics)
284			cls.log("done")
285
286			@classmethod
287			def info(cls):
288			"""Print information about the comics."""
289			cls.log("start")
290			print("%s (%s) : " % (cls.long_name, cls.url))
291			cls._create_output_dir()
292			comics = cls.get_comics() # cls._load_db()
293			dates = [get_date_for_comic(c) for c in comics]
294			print("%d comics (%d new)" % (len(comics), sum(1 for c in comics if 'new' in c)))
295			print("%d images" % sum(len(c['img']) for c in comics))
296			if dates:
297			date_min, date_max = min(dates), max(dates)
298			print("from %s to %s (%d days)" % (date_min, date_max, (date_max - date_min).days))
299			print()
300			cls.log("done")
301
302			@classmethod
303			def readme(cls):
304			"""Return information to generate README."""
305			return ' * [%s](%s)\n' % (cls.long_name, cls.url)
306
307			@classmethod
308			def gitignore(cls):
309			"""Return information to generate gitignore."""
310			return '%s\n' % (cls.name)
311
312			@classmethod
313			def get_categories(cls):
314			"""Return categories to be able to group comics.
315
316			Categories are such that all classes have their ancestrors'
317			categories and their own (provided as an iterable in the
318			`_categories` class member)."""
319			return sorted(set(cat
320			for klass in cls.__mro__
321			for cat in getattr(klass, '_categories', [])))
322

SylvainDe / ComicBookMaker

Push — master ( e2c120...c7b81c )

GenericComic.get_last_comic() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like