|
@@ 1309-1335 (lines=27) @@
|
| 1306 |
|
url = 'http://www.qwantz.com' |
| 1307 |
|
get_url_from_archive_element = get_href |
| 1308 |
|
comic_link_re = re.compile('^%s/index.php\\?comic=([0-9]*)$' % url) |
| 1309 |
|
|
| 1310 |
|
@classmethod |
| 1311 |
|
def get_archive_elements(cls): |
| 1312 |
|
archive_url = urljoin_wrapper(cls.url, 'archive.php') |
| 1313 |
|
# first link is random -> skip it |
| 1314 |
|
return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re)[1:]) |
| 1315 |
|
|
| 1316 |
|
@classmethod |
| 1317 |
|
def get_comic_info(cls, soup, link): |
| 1318 |
|
"""Get information about a particular comics.""" |
| 1319 |
|
url = cls.get_url_from_archive_element(link) |
| 1320 |
|
num = int(cls.comic_link_re.match(url).groups()[0]) |
| 1321 |
|
date_str = link.string |
| 1322 |
|
text = link.next_sibling.string |
| 1323 |
|
day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y") |
| 1324 |
|
comic_img_re = re.compile('^%s/comics/' % cls.url) |
| 1325 |
|
img = soup.find('img', src=comic_img_re) |
| 1326 |
|
return { |
| 1327 |
|
'month': day.month, |
| 1328 |
|
'year': day.year, |
| 1329 |
|
'day': day.day, |
| 1330 |
|
'img': [img.get('src')], |
| 1331 |
|
'title': img.get('title'), |
| 1332 |
|
'text': text, |
| 1333 |
|
'num': num, |
| 1334 |
|
} |
| 1335 |
|
|
| 1336 |
|
|
| 1337 |
|
class ButterSafe(GenericListableComic): |
| 1338 |
|
"""Class to retrieve Butter Safe comics.""" |
|
@@ 1374-1395 (lines=22) @@
|
| 1371 |
|
long_name = 'Calvin and Hobbes' |
| 1372 |
|
# This is not through any official webpage but eh... |
| 1373 |
|
url = 'http://marcel-oehler.marcellosendos.ch/comics/ch/' |
| 1374 |
|
|
| 1375 |
|
@classmethod |
| 1376 |
|
def get_next_comic(cls, last_comic): |
| 1377 |
|
last_date = get_date_for_comic( |
| 1378 |
|
last_comic) if last_comic else date(1985, 11, 1) |
| 1379 |
|
link_re = re.compile('^([0-9]*)/([0-9]*)/') |
| 1380 |
|
img_re = re.compile('') |
| 1381 |
|
for link in get_soup_at_url(cls.url).find_all('a', href=link_re): |
| 1382 |
|
url = link['href'] |
| 1383 |
|
year, month = link_re.match(url).groups() |
| 1384 |
|
if date(int(year), int(month), 1) + timedelta(days=31) >= last_date: |
| 1385 |
|
img_re = re.compile('^%s%s([0-9]*)' % (year, month)) |
| 1386 |
|
month_url = urljoin_wrapper(cls.url, url) |
| 1387 |
|
for img in get_soup_at_url(month_url).find_all('img', src=img_re): |
| 1388 |
|
img_src = img['src'] |
| 1389 |
|
day = int(img_re.match(img_src).groups()[0]) |
| 1390 |
|
comic_date = date(int(year), int(month), day) |
| 1391 |
|
if comic_date > last_date: |
| 1392 |
|
yield { |
| 1393 |
|
'url': month_url, |
| 1394 |
|
'year': int(year), |
| 1395 |
|
'month': int(month), |
| 1396 |
|
'day': int(day), |
| 1397 |
|
'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)], |
| 1398 |
|
} |