@@ 2725-2757 (lines=33) @@ | ||
2722 | lang = "fr_FR.utf8" |
|
2723 | ||
2724 | ||
2725 | class UnearthedComics(GenericNavigableComic): |
|
2726 | """Class to retrieve Unearthed comics.""" |
|
2727 | # Also on http://tapastic.com/series/UnearthedComics |
|
2728 | # Also on http://unearthedcomics.tumblr.com |
|
2729 | name = 'unearthed' |
|
2730 | long_name = 'Unearthed Comics' |
|
2731 | url = 'http://unearthedcomics.com' |
|
2732 | get_navi_link = get_link_rel_next |
|
2733 | ||
2734 | @classmethod |
|
2735 | def get_first_comic_link(cls): |
|
2736 | """Get link to first comics.""" |
|
2737 | return {'href': 'http://unearthedcomics.com/comics/world-with-turn-signals/'} |
|
2738 | ||
2739 | @classmethod |
|
2740 | def get_comic_info(cls, soup, link): |
|
2741 | """Get information about a particular comics.""" |
|
2742 | short_url = soup.find('link', rel='shortlink')['href'] |
|
2743 | title_elt = soup.find('h1') or soup.find('h2') |
|
2744 | title = title_elt.string if title_elt else "" |
|
2745 | desc = soup.find('meta', property='og:description') |
|
2746 | date_str = soup.find('time', class_='published updated hidden')['datetime'] |
|
2747 | day = string_to_date(date_str, "%Y-%m-%d") |
|
2748 | post = soup.find('div', class_="entry content entry-content type-portfolio") |
|
2749 | imgs = post.find_all('img') |
|
2750 | return { |
|
2751 | 'title': title, |
|
2752 | 'description': desc, |
|
2753 | 'url2': short_url, |
|
2754 | 'img': [i['src'] for i in imgs], |
|
2755 | 'month': day.month, |
|
2756 | 'year': day.year, |
|
2757 | 'day': day.day, |
|
2758 | } |
|
2759 | ||
2760 | ||
@@ 445-476 (lines=32) @@ | ||
442 | return "http://uneanneeaulycee.blog.lemonde.fr/2016/06/13/la-semaine-du-bac-est-arrivee/" |
|
443 | ||
444 | ||
445 | class Rall(GenericNavigableComic): |
|
446 | """Class to retrieve Ted Rall comics.""" |
|
447 | # Also on http://www.gocomics.com/tedrall |
|
448 | name = 'rall' |
|
449 | long_name = "Ted Rall" |
|
450 | url = "http://rall.com/comic" |
|
451 | get_navi_link = get_link_rel_next |
|
452 | ||
453 | @classmethod |
|
454 | def get_first_comic_link(cls): |
|
455 | """Get link to first comics.""" |
|
456 | # Not the first but I didn't find an efficient way to retrieve it |
|
457 | return {'href': "http://rall.com/2014/01/30/los-angeles-times-cartoon-well-miss-those-california-flowers"} |
|
458 | ||
459 | @classmethod |
|
460 | def get_comic_info(cls, soup, link): |
|
461 | """Get information about a particular comics.""" |
|
462 | title = soup.find('meta', property='og:title')['content'] |
|
463 | author = soup.find("span", class_="author vcard").find("a").string |
|
464 | date_str = soup.find("span", class_="entry-date").string |
|
465 | day = string_to_date(date_str, "%B %d, %Y") |
|
466 | desc = soup.find('meta', property='og:description')['content'] |
|
467 | imgs = soup.find('div', class_='entry-content').find_all('img') |
|
468 | imgs = imgs[:-7] # remove social media buttons |
|
469 | return { |
|
470 | 'title': title, |
|
471 | 'author': author, |
|
472 | 'month': day.month, |
|
473 | 'year': day.year, |
|
474 | 'day': day.day, |
|
475 | 'description': desc, |
|
476 | 'img': [i['src'] for i in imgs], |
|
477 | } |
|
478 | ||
479 |