|
@@ 434-455 (lines=22) @@
|
| 431 |
|
} |
| 432 |
|
|
| 433 |
|
|
| 434 |
|
class GenericLeMondeBlog(GenericNavigableComic): |
| 435 |
|
"""Generic class to retrieve comics from Le Monde blogs.""" |
| 436 |
|
_categories = ('LEMONDE', 'FRANCAIS') |
| 437 |
|
get_navi_link = get_link_rel_next |
| 438 |
|
get_first_comic_link = simulate_first_link |
| 439 |
|
first_url = NotImplemented |
| 440 |
|
|
| 441 |
|
@classmethod |
| 442 |
|
def get_comic_info(cls, soup, link): |
| 443 |
|
"""Get information about a particular comics.""" |
| 444 |
|
url2 = soup.find('link', rel='shortlink')['href'] |
| 445 |
|
title = soup.find('meta', property='og:title')['content'] |
| 446 |
|
date_str = soup.find("span", class_="entry-date").string |
| 447 |
|
day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8") |
| 448 |
|
imgs = soup.find_all('meta', property='og:image') |
| 449 |
|
return { |
| 450 |
|
'title': title, |
| 451 |
|
'url2': url2, |
| 452 |
|
'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs], |
| 453 |
|
'month': day.month, |
| 454 |
|
'year': day.year, |
| 455 |
|
'day': day.day, |
| 456 |
|
} |
| 457 |
|
|
| 458 |
|
|
|
@@ 1004-1029 (lines=26) @@
|
| 1001 |
|
} |
| 1002 |
|
|
| 1003 |
|
|
| 1004 |
|
class MyExtraLife(GenericNavigableComic): |
| 1005 |
|
"""Class to retrieve My Extra Life comics.""" |
| 1006 |
|
name = 'extralife' |
| 1007 |
|
long_name = 'My Extra Life' |
| 1008 |
|
url = 'http://www.myextralife.com' |
| 1009 |
|
get_navi_link = get_link_rel_next |
| 1010 |
|
|
| 1011 |
|
@classmethod |
| 1012 |
|
def get_first_comic_link(cls): |
| 1013 |
|
"""Get link to first comics.""" |
| 1014 |
|
return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link') |
| 1015 |
|
|
| 1016 |
|
@classmethod |
| 1017 |
|
def get_comic_info(cls, soup, link): |
| 1018 |
|
"""Get information about a particular comics.""" |
| 1019 |
|
title = soup.find("h1", class_="comic_title").string |
| 1020 |
|
date_str = soup.find("span", class_="comic_date").string |
| 1021 |
|
day = string_to_date(date_str, "%B %d, %Y") |
| 1022 |
|
imgs = soup.find_all("img", class_="comic") |
| 1023 |
|
assert all(i['alt'] == i['title'] == title for i in imgs) |
| 1024 |
|
return { |
| 1025 |
|
'title': title, |
| 1026 |
|
'img': [i['src'] for i in imgs if i["src"]], |
| 1027 |
|
'day': day.day, |
| 1028 |
|
'month': day.month, |
| 1029 |
|
'year': day.year |
| 1030 |
|
} |
| 1031 |
|
|
| 1032 |
|
|
|
@@ 3516-3537 (lines=22) @@
|
| 3513 |
|
class MacadamValley(GenericNavigableComic): |
| 3514 |
|
"""Class to retrieve Macadam Valley comics.""" |
| 3515 |
|
name = 'macadamvalley' |
| 3516 |
|
long_name = 'Macadam Valley' |
| 3517 |
|
url = 'http://macadamvalley.com' |
| 3518 |
|
get_navi_link = get_a_rel_next |
| 3519 |
|
get_first_comic_link = simulate_first_link |
| 3520 |
|
first_url = 'http://macadamvalley.com/le-debut-de-la-fin/' |
| 3521 |
|
|
| 3522 |
|
@classmethod |
| 3523 |
|
def get_comic_info(cls, soup, link): |
| 3524 |
|
"""Get information about a particular comics.""" |
| 3525 |
|
title = soup.find("h1", class_="entry-title").string |
| 3526 |
|
img = soup.find('div', class_='entry-content').find('img') |
| 3527 |
|
date_str = soup.find('time', class_='entry-date')['datetime'] |
| 3528 |
|
date_str = date_str[:10] |
| 3529 |
|
day = string_to_date(date_str, "%Y-%m-%d") |
| 3530 |
|
author = soup.find('a', rel='author').string |
| 3531 |
|
return { |
| 3532 |
|
'title': title, |
| 3533 |
|
'img': [i['src'] for i in [img]], |
| 3534 |
|
'day': day.day, |
| 3535 |
|
'month': day.month, |
| 3536 |
|
'year': day.year, |
| 3537 |
|
'author': author, |
| 3538 |
|
} |
| 3539 |
|
|
| 3540 |
|
|
|
@@ 2349-2370 (lines=22) @@
|
| 2346 |
|
class PeterLauris(GenericNavigableComic): |
| 2347 |
|
"""Class to retrieve Peter Lauris comics.""" |
| 2348 |
|
name = 'peterlauris' |
| 2349 |
|
long_name = 'Peter Lauris' |
| 2350 |
|
url = 'http://peterlauris.com/comics' |
| 2351 |
|
get_navi_link = get_a_rel_next |
| 2352 |
|
get_first_comic_link = simulate_first_link |
| 2353 |
|
first_url = 'http://peterlauris.com/comics/just-in-case/' |
| 2354 |
|
|
| 2355 |
|
@classmethod |
| 2356 |
|
def get_comic_info(cls, soup, link): |
| 2357 |
|
"""Get information about a particular comics.""" |
| 2358 |
|
title = soup.find('meta', attrs={'name': 'twitter:title'})['content'] |
| 2359 |
|
date_str = soup.find('meta', property='article:published_time')['content'][:10] |
| 2360 |
|
day = string_to_date(date_str, "%Y-%m-%d") |
| 2361 |
|
imgs = soup.find_all('meta', property='og:image') |
| 2362 |
|
return { |
| 2363 |
|
'title': title, |
| 2364 |
|
'img': [i['content'] for i in imgs], |
| 2365 |
|
'month': day.month, |
| 2366 |
|
'year': day.year, |
| 2367 |
|
'day': day.day, |
| 2368 |
|
} |
| 2369 |
|
|
| 2370 |
|
|
| 2371 |
|
class RandomCrab(GenericNavigableComic): |
| 2372 |
|
"""Class to retrieve Random Crab comics.""" |
| 2373 |
|
name = 'randomcrab' |