@@ 603-625 (lines=23) @@ | ||
600 | day = string_to_date(date_str, "%B %d, %Y") |
|
601 | # Bonus images may or may not be in meta og:image. |
|
602 | imgs = soup.find_all('meta', property='og:image') |
|
603 | imgs_src = [i['content'] for i in imgs] |
|
604 | bonus = soup.find_all('img', attrs={'data-oversrc': True}) |
|
605 | bonus_src = [b['data-oversrc'] for b in bonus] |
|
606 | all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src] |
|
607 | all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")] |
|
608 | tag_meta = soup.find('meta', property='article:tag') |
|
609 | tags = tag_meta['content'] if tag_meta else "" |
|
610 | return { |
|
611 | 'title': title, |
|
612 | 'month': day.month, |
|
613 | 'year': day.year, |
|
614 | 'day': day.day, |
|
615 | 'img': all_imgs_src, |
|
616 | 'tags': tags, |
|
617 | } |
|
618 | ||
619 | ||
620 | class PenelopeBagieu(GenericNavigableComic): |
|
621 | """Class to retrieve comics from Penelope Bagieu's blog.""" |
|
622 | name = 'bagieu' |
|
623 | long_name = 'Ma vie est tout a fait fascinante (Bagieu)' |
|
624 | url = 'http://www.penelope-jolicoeur.com' |
|
625 | get_navi_link = get_link_rel_next |
|
626 | ||
627 | @classmethod |
|
628 | def get_first_comic_link(cls): |
|
@@ 851-872 (lines=22) @@ | ||
848 | @classmethod |
|
849 | def get_comic_info(cls, soup, link): |
|
850 | """Get information about a particular comics.""" |
|
851 | title = soup.find('title') |
|
852 | imgs = [img for img in soup.find_all('img') |
|
853 | if not img['src'].endswith( |
|
854 | ('link.gif', '32.png', 'twpbookad.jpg', |
|
855 | 'merchad.jpg', 'header.gif', 'tipjar.jpg'))] |
|
856 | return { |
|
857 | 'title': title.string if title else None, |
|
858 | 'title2': ' '.join(img.get('alt') for img in imgs if img.get('alt')), |
|
859 | 'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs], |
|
860 | } |
|
861 | ||
862 | ||
863 | class DeadlyPanel(GenericEmptyComic, GenericNavigableComic): |
|
864 | """Class to retrieve Deadly Panel comics.""" |
|
865 | # Also on https://tapastic.com/series/deadlypanel |
|
866 | name = 'deadly' |
|
867 | long_name = 'Deadly Panel' |
|
868 | url = 'http://www.deadlypanel.com' |
|
869 | get_first_comic_link = get_a_navi_navifirst |
|
870 | get_navi_link = get_a_navi_comicnavnext_navinext |
|
871 | ||
872 | @classmethod |
|
873 | def get_comic_info(cls, soup, link): |
|
874 | """Get information about a particular comics.""" |
|
875 | imgs = soup.find('div', id='comic').find_all('img') |
|
@@ 629-650 (lines=22) @@ | ||
626 | ||
627 | @classmethod |
|
628 | def get_first_comic_link(cls): |
|
629 | """Get link to first comics.""" |
|
630 | return {'href': 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'} |
|
631 | ||
632 | @classmethod |
|
633 | def get_comic_info(cls, soup, link): |
|
634 | """Get information about a particular comics.""" |
|
635 | date_str = soup.find('h2', class_='date-header').string |
|
636 | day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8") |
|
637 | imgs = soup.find('div', class_='entry-body').find_all('img') |
|
638 | title = soup.find('h3', class_='entry-header').string |
|
639 | return { |
|
640 | 'title': title, |
|
641 | 'img': [i['src'] for i in imgs], |
|
642 | 'month': day.month, |
|
643 | 'year': day.year, |
|
644 | 'day': day.day, |
|
645 | } |
|
646 | ||
647 | ||
648 | class OneOneOneOneComic(GenericNavigableComic): |
|
649 | """Class to retrieve 1111 Comics.""" |
|
650 | # Also on http://comics1111.tumblr.com |
|
651 | # Also on https://tapastic.com/series/1111-Comics |
|
652 | name = '1111' |
|
653 | long_name = '1111 Comics' |
|
@@ 654-674 (lines=21) @@ | ||
651 | # Also on https://tapastic.com/series/1111-Comics |
|
652 | name = '1111' |
|
653 | long_name = '1111 Comics' |
|
654 | url = 'http://www.1111comics.me' |
|
655 | get_first_comic_link = get_div_navfirst_a |
|
656 | get_navi_link = get_link_rel_next |
|
657 | ||
658 | @classmethod |
|
659 | def get_comic_info(cls, soup, link): |
|
660 | """Get information about a particular comics.""" |
|
661 | title = soup.find('h1', class_='comic-title').find('a').string |
|
662 | date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string |
|
663 | day = string_to_date(date_str, "%B %d, %Y") |
|
664 | imgs = soup.find_all('meta', property='og:image') |
|
665 | return { |
|
666 | 'title': title, |
|
667 | 'month': day.month, |
|
668 | 'year': day.year, |
|
669 | 'day': day.day, |
|
670 | 'img': [i['content'] for i in imgs], |
|
671 | } |
|
672 | ||
673 | ||
674 | class AngryAtNothing(GenericNavigableComic): |
|
675 | """Class to retrieve Angry at Nothing comics.""" |
|
676 | # Also on http://tapastic.com/series/Comics-yeah-definitely-comics- |
|
677 | name = 'angry' |
|
@@ 2490-2509 (lines=20) @@ | ||
2487 | imgs = soup.find("div", id="comic").find_all("img") |
|
2488 | assert all(i['alt'] == i['title'] for i in imgs) |
|
2489 | assert len(imgs) <= 1 |
|
2490 | alt = imgs[0]['alt'] if imgs else "" |
|
2491 | return { |
|
2492 | 'img': [i['src'] for i in imgs], |
|
2493 | 'title': title, |
|
2494 | 'alt': alt, |
|
2495 | 'author': author, |
|
2496 | 'day': day.day, |
|
2497 | 'month': day.month, |
|
2498 | 'year': day.year |
|
2499 | } |
|
2500 | ||
2501 | ||
2502 | class LastPlaceComics(GenericNavigableComic): |
|
2503 | """Class to retrieve Last Place Comics.""" |
|
2504 | name = 'lastplace' |
|
2505 | long_name = 'LastPlaceComics' |
|
2506 | url = "http://lastplacecomics.com" |
|
2507 | get_first_comic_link = get_a_comicnavbase_comicnavfirst |
|
2508 | get_navi_link = get_link_rel_next |
|
2509 | ||
2510 | @classmethod |
|
2511 | def get_comic_info(cls, soup, link): |
|
2512 | """Get information about a particular comics.""" |
|
@@ 1594-1613 (lines=20) @@ | ||
1591 | long_name = 'Something Of That Ilk' |
|
1592 | url = 'http://www.somethingofthatilk.com' |
|
1593 | ||
1594 | ||
1595 | class InfiniteMonkeyBusiness(GenericNavigableComic): |
|
1596 | """Generic class to retrieve InfiniteMonkeyBusiness comics.""" |
|
1597 | name = 'monkey' |
|
1598 | long_name = 'Infinite Monkey Business' |
|
1599 | url = 'http://infinitemonkeybusiness.net' |
|
1600 | get_navi_link = get_a_navi_comicnavnext_navinext |
|
1601 | ||
1602 | @classmethod |
|
1603 | def get_first_comic_link(cls): |
|
1604 | """Get link to first comics.""" |
|
1605 | return {'href': 'http://infinitemonkeybusiness.net/comic/pillory/'} |
|
1606 | ||
1607 | @classmethod |
|
1608 | def get_comic_info(cls, soup, link): |
|
1609 | """Get information about a particular comics.""" |
|
1610 | title = soup.find('meta', property='og:title')['content'] |
|
1611 | imgs = soup.find('div', id='comic').find_all('img') |
|
1612 | return { |
|
1613 | 'title': title, |
|
1614 | 'img': [i['src'] for i in imgs], |
|
1615 | } |
|
1616 |