|
@@ 2725-2757 (lines=33) @@
|
| 2722 |
|
class BoumerieFr(GenericBoumerie): |
| 2723 |
|
"""Class to retrieve Boumeries comics in French.""" |
| 2724 |
|
name = 'boumeries_fr' |
| 2725 |
|
long_name = 'Boumeries (Fr)' |
| 2726 |
|
url = 'http://bd.boumerie.com' |
| 2727 |
|
date_format = "%A, %d %B %Y" |
| 2728 |
|
lang = "fr_FR.utf8" |
| 2729 |
|
|
| 2730 |
|
|
| 2731 |
|
class UnearthedComics(GenericNavigableComic): |
| 2732 |
|
"""Class to retrieve Unearthed comics.""" |
| 2733 |
|
# Also on http://tapastic.com/series/UnearthedComics |
| 2734 |
|
# Also on http://unearthedcomics.tumblr.com |
| 2735 |
|
name = 'unearthed' |
| 2736 |
|
long_name = 'Unearthed Comics' |
| 2737 |
|
url = 'http://unearthedcomics.com' |
| 2738 |
|
get_navi_link = get_link_rel_next |
| 2739 |
|
|
| 2740 |
|
@classmethod |
| 2741 |
|
def get_first_comic_link(cls): |
| 2742 |
|
"""Get link to first comics.""" |
| 2743 |
|
return {'href': 'http://unearthedcomics.com/comics/world-with-turn-signals/'} |
| 2744 |
|
|
| 2745 |
|
@classmethod |
| 2746 |
|
def get_comic_info(cls, soup, link): |
| 2747 |
|
"""Get information about a particular comics.""" |
| 2748 |
|
short_url = soup.find('link', rel='shortlink')['href'] |
| 2749 |
|
title_elt = soup.find('h1') or soup.find('h2') |
| 2750 |
|
title = title_elt.string if title_elt else "" |
| 2751 |
|
desc = soup.find('meta', property='og:description') |
| 2752 |
|
date_str = soup.find('time', class_='published updated hidden')['datetime'] |
| 2753 |
|
day = string_to_date(date_str, "%Y-%m-%d") |
| 2754 |
|
post = soup.find('div', class_="entry content entry-content type-portfolio") |
| 2755 |
|
imgs = post.find_all('img') |
| 2756 |
|
return { |
| 2757 |
|
'title': title, |
| 2758 |
|
'description': desc, |
| 2759 |
|
'url2': short_url, |
| 2760 |
|
'img': [i['src'] for i in imgs], |
|
@@ 445-476 (lines=32) @@
|
| 442 |
|
return "http://uneanneeaulycee.blog.lemonde.fr/2016/06/13/la-semaine-du-bac-est-arrivee/" |
| 443 |
|
|
| 444 |
|
|
| 445 |
|
class Rall(GenericNavigableComic): |
| 446 |
|
"""Class to retrieve Ted Rall comics.""" |
| 447 |
|
# Also on http://www.gocomics.com/tedrall |
| 448 |
|
name = 'rall' |
| 449 |
|
long_name = "Ted Rall" |
| 450 |
|
url = "http://rall.com/comic" |
| 451 |
|
get_navi_link = get_link_rel_next |
| 452 |
|
|
| 453 |
|
@classmethod |
| 454 |
|
def get_first_comic_link(cls): |
| 455 |
|
"""Get link to first comics.""" |
| 456 |
|
# Not the first but I didn't find an efficient way to retrieve it |
| 457 |
|
return {'href': "http://rall.com/2014/01/30/los-angeles-times-cartoon-well-miss-those-california-flowers"} |
| 458 |
|
|
| 459 |
|
@classmethod |
| 460 |
|
def get_comic_info(cls, soup, link): |
| 461 |
|
"""Get information about a particular comics.""" |
| 462 |
|
title = soup.find('meta', property='og:title')['content'] |
| 463 |
|
author = soup.find("span", class_="author vcard").find("a").string |
| 464 |
|
date_str = soup.find("span", class_="entry-date").string |
| 465 |
|
day = string_to_date(date_str, "%B %d, %Y") |
| 466 |
|
desc = soup.find('meta', property='og:description')['content'] |
| 467 |
|
imgs = soup.find('div', class_='entry-content').find_all('img') |
| 468 |
|
imgs = imgs[:-7] # remove social media buttons |
| 469 |
|
return { |
| 470 |
|
'title': title, |
| 471 |
|
'author': author, |
| 472 |
|
'month': day.month, |
| 473 |
|
'year': day.year, |
| 474 |
|
'day': day.day, |
| 475 |
|
'description': desc, |
| 476 |
|
'img': [i['src'] for i in imgs], |
| 477 |
|
} |
| 478 |
|
|
| 479 |
|
|