1
|
|
|
#! /usr/bin/python3 |
2
|
|
|
# vim: set expandtab tabstop=4 shiftwidth=4 : |
3
|
|
|
"""Module to define logic common to all comics.""" |
4
|
|
|
|
5
|
|
|
import json |
6
|
|
|
import time |
7
|
|
|
import os |
8
|
|
|
from datetime import date |
9
|
|
|
from urlfunctions import get_filename_from_url, get_file_at_url |
10
|
|
|
import inspect |
11
|
|
|
import logging |
12
|
|
|
|
13
|
|
|
|
14
|
|
|
def get_date_for_comic(comic): |
15
|
|
|
"""Return date object for a given comic.""" |
16
|
|
|
return date(comic['year'], comic['month'], comic['day']) |
17
|
|
|
|
18
|
|
|
|
19
|
|
|
def get_info_before_comic(comic): |
20
|
|
|
"""Generates the info to be put before the images.""" |
21
|
|
|
author = comic.get('author') |
22
|
|
|
if author: |
23
|
|
|
yield 'by ' + author |
24
|
|
|
|
25
|
|
|
|
26
|
|
|
def get_info_after_comic(comic): |
27
|
|
|
"""Generates the info to be put after the images.""" |
28
|
|
|
for name in ['alt', 'title', 'title2', 'texts', 'name', 'description']: |
29
|
|
|
info = comic.get(name) |
30
|
|
|
if info: |
31
|
|
|
yield info |
32
|
|
|
|
33
|
|
|
|
34
|
|
|
class GenericComic(object): |
35
|
|
|
"""Generic class to handle the logic common to all comics |
36
|
|
|
|
37
|
|
|
Attributes : |
38
|
|
|
name Name of the comic (for logging, CLI and default output dir) |
39
|
|
|
long_name Long name of the comic (to be added in the comic info) |
40
|
|
|
url Base url for the comic (without trailing slash).""" |
41
|
|
|
name = None |
42
|
|
|
long_name = None |
43
|
|
|
url = None |
44
|
|
|
|
45
|
|
|
@classmethod |
46
|
|
|
def log(cls, string): |
47
|
|
|
"""Dirty logging function.""" |
48
|
|
|
# TODO: https://docs.python.org/2/library/logging.html#logrecord-attributes |
49
|
|
|
# we do not need to retrieve the function name manually |
50
|
|
|
logging.debug(inspect.stack()[1][3] + " " + cls.name + " " + string) |
51
|
|
|
|
52
|
|
|
@classmethod |
53
|
|
|
def get_output_dir(cls): |
54
|
|
|
"""Returns the name of the output directory (for comics and JSON file). |
55
|
|
|
To be overridden if needed.""" |
56
|
|
|
return cls.name |
57
|
|
|
|
58
|
|
|
@classmethod |
59
|
|
|
def create_output_dir(cls): |
60
|
|
|
"""Create output directory for the comic on the file system.""" |
61
|
|
|
cls.log("start") |
62
|
|
|
os.makedirs(cls.get_output_dir(), exist_ok=True) |
63
|
|
|
cls.log("done") |
64
|
|
|
|
65
|
|
|
@classmethod |
66
|
|
|
def get_json_file_path(cls): |
67
|
|
|
"""Get the full path to the JSON file.""" |
68
|
|
|
return os.path.join(cls.get_output_dir(), cls.name + '.json') |
69
|
|
|
|
70
|
|
|
@classmethod |
71
|
|
|
def load_db(cls): |
72
|
|
|
"""Load the JSON file to return a list of comics.""" |
73
|
|
|
cls.log("start") |
74
|
|
|
try: |
75
|
|
|
with open(cls.get_json_file_path()) as file: |
76
|
|
|
return json.load(file) |
77
|
|
|
except IOError: |
78
|
|
|
return [] |
79
|
|
|
|
80
|
|
|
@classmethod |
81
|
|
|
def save_db(cls, data): |
82
|
|
|
"""Save the list of comics in the JSON file.""" |
83
|
|
|
cls.log("start") |
84
|
|
|
with open(cls.get_json_file_path(), 'w+') as file: |
85
|
|
|
json.dump(data, file, indent=4, sort_keys=True) |
86
|
|
|
cls.log("done") |
87
|
|
|
|
88
|
|
|
@classmethod |
89
|
|
|
def get_file_in_output_dir(cls, url, prefix=None): |
90
|
|
|
"""Download file from URL and save it in output folder.""" |
91
|
|
|
cls.log("start (url:%s)" % url) |
92
|
|
|
filename = os.path.join( |
93
|
|
|
cls.get_output_dir(), |
94
|
|
|
('' if prefix is None else prefix) + |
95
|
|
|
get_filename_from_url(url)) |
96
|
|
|
return get_file_at_url(url, filename) |
97
|
|
|
|
98
|
|
|
@classmethod |
99
|
|
|
def check_everything_is_ok(cls): |
100
|
|
|
"""Perform tests on the database to check that everything is ok.""" |
101
|
|
|
cls.log("start") |
102
|
|
|
print(cls.name, ': about to check') |
103
|
|
|
comics = cls.load_db() |
104
|
|
|
imgs_paths = {} |
105
|
|
|
imgs_urls = {} |
106
|
|
|
prev_date, prev_num = None, None |
107
|
|
|
today = date.today() |
108
|
|
|
for i, comic in enumerate(comics): |
109
|
|
|
cls.print_comic(comic) |
110
|
|
|
url = comic.get('url') |
111
|
|
|
assert isinstance(url, str), "Url %s not a string" % url |
112
|
|
|
assert comic.get('comic') == cls.long_name |
113
|
|
|
assert all(isinstance(comic.get(k), int) |
114
|
|
|
for k in ['day', 'month', 'year']), \ |
115
|
|
|
"Invalid date data (%s)" % url |
116
|
|
|
curr_date = get_date_for_comic(comic) |
117
|
|
|
assert curr_date <= today |
118
|
|
|
curr_num = comic.get('num', 0) |
119
|
|
|
assert isinstance(curr_num, int) |
120
|
|
|
assert prev_date is None or prev_date <= curr_date or \ |
121
|
|
|
prev_num < curr_num, \ |
122
|
|
|
"Comics are not in order (%s)" % url |
123
|
|
|
prev_date, prev_num = curr_date, curr_num |
124
|
|
|
img = comic.get('img') |
125
|
|
|
local_img = comic.get('local_img') |
126
|
|
|
assert isinstance(img, list) |
127
|
|
|
assert isinstance(local_img, list) |
128
|
|
|
assert len(local_img) == len(img) |
129
|
|
|
for path in local_img: |
130
|
|
|
if path is not None: |
131
|
|
|
assert os.path.isfile(path) |
132
|
|
|
imgs_paths.setdefault(path, set()).add(i) |
133
|
|
|
for img_url in img: |
134
|
|
|
imgs_urls.setdefault(img_url, set()).add(i) |
135
|
|
|
print() |
136
|
|
|
if False: # To check if imgs are not overriding themselves |
137
|
|
|
for path, nums in imgs_paths.items(): |
138
|
|
|
if len(nums) > 1: |
139
|
|
|
print("Image used multiple times", path, nums) |
140
|
|
|
for img_url, nums in imgs_urls.items(): |
141
|
|
|
if len(nums) > 1: |
142
|
|
|
print("Url used multiple times", img_url, nums) |
143
|
|
|
if False: # To check that all files in folder are useful |
144
|
|
|
json = cls.get_json_file_path() |
145
|
|
|
output_dir = cls.get_output_dir() |
146
|
|
|
for file_ in os.listdir(output_dir): |
147
|
|
|
file_path = os.path.join(output_dir, file_) |
148
|
|
|
if file_path not in imgs_paths and file_path != json: |
149
|
|
|
print("Unused image", file_path) |
150
|
|
|
cls.log("done") |
151
|
|
|
|
152
|
|
|
@classmethod |
153
|
|
|
def get_next_comic(cls, _): |
154
|
|
|
"""Generator to get the next comic. |
155
|
|
|
|
156
|
|
|
First argument is the last properly downloaded comic which gives |
157
|
|
|
a starting point to download more. |
158
|
|
|
|
159
|
|
|
This is the method called by update(). It should yield comics which |
160
|
|
|
are basically dictionnaries with the following property : |
161
|
|
|
- 'url' is linked to a string |
162
|
|
|
- 'img' is linked to a list of url (that will get downloaded) |
163
|
|
|
- 'day'/'month'/'year' are self explicit. They are linked to |
164
|
|
|
integers corresponding to the comic dates. There should be |
165
|
|
|
all of them or none of them |
166
|
|
|
- more fields can be provided.""" |
167
|
|
|
raise NotImplementedError |
168
|
|
|
|
169
|
|
|
@classmethod |
170
|
|
|
def print_text(cls, text): |
171
|
|
|
"""Print text by returning to the beginning of the line every time.""" |
172
|
|
|
print(cls.name, ':', text, ' ' * 10, '\r', end='') |
173
|
|
|
|
174
|
|
|
@classmethod |
175
|
|
|
def print_comic(cls, comic): |
176
|
|
|
"""Print information about a comic.""" |
177
|
|
|
cls.print_text(comic['url']) |
178
|
|
|
|
179
|
|
|
@classmethod |
180
|
|
|
def update(cls): |
181
|
|
|
"""Update the database : get the latest comics and save in the DB. |
182
|
|
|
|
183
|
|
|
This is a wrapper around get_next_comic() providing the following |
184
|
|
|
generic features : |
185
|
|
|
- logging |
186
|
|
|
- database handling (open and save) |
187
|
|
|
- exception handling (properly retrieved data are always saved) |
188
|
|
|
- file download |
189
|
|
|
- data management (adds current date if no date is provided).""" |
190
|
|
|
cls.log("start") |
191
|
|
|
print(cls.name, ': about to update') |
192
|
|
|
cls.create_output_dir() |
193
|
|
|
comics = cls.load_db() |
194
|
|
|
new_comics = [] |
195
|
|
|
start = time.time() |
196
|
|
|
try: |
197
|
|
|
last_comic = comics[-1] if comics else None |
198
|
|
|
cls.log("last comic is %s" % ('None' if last_comic is None else last_comic['url'])) |
199
|
|
|
for comic in cls.get_next_comic(last_comic): |
200
|
|
|
cls.log("got %s" % str(comic)) |
201
|
|
|
if 'day' in comic: |
202
|
|
|
assert all(isinstance(comic.get(k), int) for k in ['day', 'month', 'year']) |
203
|
|
|
else: |
204
|
|
|
assert all(k not in comic for k in ['day', 'month', 'year']) |
205
|
|
|
day = date.today() |
206
|
|
|
comic['day'], comic['month'], comic['year'] = \ |
207
|
|
|
day.day, day.month, day.year |
208
|
|
|
prefix = comic.get('prefix', '') |
209
|
|
|
comic['local_img'] = [cls.get_file_in_output_dir(i, prefix) |
210
|
|
|
for i in comic['img']] |
211
|
|
|
comic['comic'] = cls.long_name |
212
|
|
|
comic['new'] = None # "'new' in comic" to check if new |
213
|
|
|
new_comics.append(comic) |
214
|
|
|
cls.print_comic(comic) |
215
|
|
|
finally: |
216
|
|
|
end = time.time() |
217
|
|
|
if new_comics: |
218
|
|
|
print() |
219
|
|
|
cls.save_db(comics + new_comics) |
220
|
|
|
print(cls.name, ": added", len(new_comics), |
221
|
|
|
"comics in", end - start, "seconds") |
222
|
|
|
else: |
223
|
|
|
print(cls.name, ": nothing new") |
224
|
|
|
cls.log("done") |
225
|
|
|
|
226
|
|
|
@classmethod |
227
|
|
|
def try_to_get_missing_resources(cls): |
228
|
|
|
"""Download images that might not have been downloaded properly in |
229
|
|
|
the first place.""" |
230
|
|
|
cls.log("start") |
231
|
|
|
print(cls.name, ': about to try to get missing resources') |
232
|
|
|
cls.create_output_dir() |
233
|
|
|
comics = cls.load_db() |
234
|
|
|
change = False |
235
|
|
|
for comic in comics: |
236
|
|
|
local = comic['local_img'] |
237
|
|
|
prefix = comic.get('prefix', '') |
238
|
|
|
for i, (path, url) in enumerate(zip(local, comic['img'])): |
239
|
|
|
if path is None: |
240
|
|
|
new_path = cls.get_file_in_output_dir(url, prefix) |
241
|
|
|
if new_path is None: |
242
|
|
|
print(cls.name, ': failed to get', url) |
243
|
|
|
else: |
244
|
|
|
print(cls.name, ': got', url, 'at', new_path) |
245
|
|
|
local[i] = new_path |
246
|
|
|
change = True |
247
|
|
|
comic['new'] = None |
248
|
|
|
if change: |
249
|
|
|
cls.save_db(comics) |
250
|
|
|
print(cls.name, ": some missing resources have been downloaded") |
251
|
|
|
cls.log("done") |
252
|
|
|
|
253
|
|
|
@classmethod |
254
|
|
|
def reset_new(cls): |
255
|
|
|
"""Remove the 'new' flag on comics in the DB.""" |
256
|
|
|
cls.log("start") |
257
|
|
|
cls.create_output_dir() |
258
|
|
|
cls.save_db([{key: val for key, val in c.items() if key != 'new'} for c in cls.load_db()]) |
259
|
|
|
cls.log("done") |
260
|
|
|
|
261
|
|
|
@classmethod |
262
|
|
|
def info(cls): |
263
|
|
|
"""Print information about the comics.""" |
264
|
|
|
cls.log("start") |
265
|
|
|
print("%s (%s) : " % (cls.long_name, cls.url)) |
266
|
|
|
cls.create_output_dir() |
267
|
|
|
comics = cls.load_db() |
268
|
|
|
dates = [get_date_for_comic(c) for c in comics] |
269
|
|
|
print("%d comics (%d new)" % (len(comics), sum(1 for c in comics if 'new' in c))) |
270
|
|
|
print("%d images" % sum(len(c['img']) for c in comics)) |
271
|
|
|
if dates: |
272
|
|
|
date_min, date_max = min(dates), max(dates) |
273
|
|
|
print("from %s to %s (%d days)" % (date_min, date_max, (date_max - date_min).days)) |
274
|
|
|
print() |
275
|
|
|
cls.log("done") |
276
|
|
|
|
277
|
|
|
@classmethod |
278
|
|
|
def readme(cls): |
279
|
|
|
"""Return information to generate README.""" |
280
|
|
|
return ' * [%s](%s)\n' % (cls.long_name, cls.url) |
281
|
|
|
|
282
|
|
|
@classmethod |
283
|
|
|
def gitignore(cls): |
284
|
|
|
"""Return information to generate gitignore.""" |
285
|
|
|
return '%s\n' % (cls.name) |
286
|
|
|
|