@@ 648-670 (lines=23) @@ | ||
645 | ||
646 | class PenelopeBagieu(GenericNavigableComic): |
|
647 | """Class to retrieve comics from Penelope Bagieu's blog.""" |
|
648 | name = 'bagieu' |
|
649 | long_name = 'Ma vie est tout a fait fascinante (Bagieu)' |
|
650 | url = 'http://www.penelope-jolicoeur.com' |
|
651 | get_navi_link = get_link_rel_next |
|
652 | ||
653 | @classmethod |
|
654 | def get_first_comic_link(cls): |
|
655 | """Get link to first comics.""" |
|
656 | return {'href': 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'} |
|
657 | ||
658 | @classmethod |
|
659 | def get_comic_info(cls, soup, link): |
|
660 | """Get information about a particular comics.""" |
|
661 | date_str = soup.find('h2', class_='date-header').string |
|
662 | day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8") |
|
663 | imgs = soup.find('div', class_='entry-body').find_all('img') |
|
664 | title = soup.find('h3', class_='entry-header').string |
|
665 | return { |
|
666 | 'title': title, |
|
667 | 'img': [i['src'] for i in imgs], |
|
668 | 'month': day.month, |
|
669 | 'year': day.year, |
|
670 | 'day': day.day, |
|
671 | } |
|
672 | ||
673 | ||
@@ 620-644 (lines=25) @@ | ||
617 | url = "http://itsthetie.com" |
|
618 | get_first_comic_link = get_div_navfirst_a |
|
619 | get_navi_link = get_a_rel_next |
|
620 | ||
621 | @classmethod |
|
622 | def get_comic_info(cls, soup, link): |
|
623 | """Get information about a particular comics.""" |
|
624 | title = soup.find('h1', class_='comic-title').find('a').string |
|
625 | date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string |
|
626 | day = string_to_date(date_str, "%B %d, %Y") |
|
627 | # Bonus images may or may not be in meta og:image. |
|
628 | imgs = soup.find_all('meta', property='og:image') |
|
629 | imgs_src = [i['content'] for i in imgs] |
|
630 | bonus = soup.find_all('img', attrs={'data-oversrc': True}) |
|
631 | bonus_src = [b['data-oversrc'] for b in bonus] |
|
632 | all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src] |
|
633 | all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")] |
|
634 | tag_meta = soup.find('meta', property='article:tag') |
|
635 | tags = tag_meta['content'] if tag_meta else "" |
|
636 | return { |
|
637 | 'title': title, |
|
638 | 'month': day.month, |
|
639 | 'year': day.year, |
|
640 | 'day': day.day, |
|
641 | 'img': all_imgs_src, |
|
642 | 'tags': tags, |
|
643 | } |
|
644 | ||
645 | ||
646 | class PenelopeBagieu(GenericNavigableComic): |
|
647 | """Class to retrieve comics from Penelope Bagieu's blog.""" |
|
@@ 1660-1680 (lines=21) @@ | ||
1657 | return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark')) |
|
1658 | ||
1659 | @classmethod |
|
1660 | def get_comic_info(cls, soup, link): |
|
1661 | """Get information about a particular comics.""" |
|
1662 | date_str = soup.find('div', class_='postdate').find('em').string |
|
1663 | day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y") |
|
1664 | div = soup.find('div', id='comic') |
|
1665 | if div: |
|
1666 | img = div.find('img') |
|
1667 | img_src = [img['src']] |
|
1668 | alt = img['alt'] |
|
1669 | assert alt == img['title'] |
|
1670 | title = soup.find('meta', property='og:title')['content'] |
|
1671 | else: |
|
1672 | img_src = [] |
|
1673 | alt = '' |
|
1674 | title = '' |
|
1675 | return { |
|
1676 | 'month': day.month, |
|
1677 | 'year': day.year, |
|
1678 | 'day': day.day, |
|
1679 | 'img': img_src, |
|
1680 | 'title': title, |
|
1681 | 'alt': alt, |
|
1682 | 'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')), |
|
1683 | } |
|
@@ 882-904 (lines=23) @@ | ||
879 | if not img['src'].endswith( |
|
880 | ('link.gif', '32.png', 'twpbookad.jpg', |
|
881 | 'merchad.jpg', 'header.gif', 'tipjar.jpg'))] |
|
882 | return { |
|
883 | 'title': title.string if title else None, |
|
884 | 'title2': ' '.join(img.get('alt') for img in imgs if img.get('alt')), |
|
885 | 'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs], |
|
886 | } |
|
887 | ||
888 | ||
889 | class DeadlyPanel(GenericEmptyComic, GenericNavigableComic): |
|
890 | """Class to retrieve Deadly Panel comics.""" |
|
891 | # Also on https://tapastic.com/series/deadlypanel |
|
892 | name = 'deadly' |
|
893 | long_name = 'Deadly Panel' |
|
894 | url = 'http://www.deadlypanel.com' |
|
895 | get_first_comic_link = get_a_navi_navifirst |
|
896 | get_navi_link = get_a_navi_comicnavnext_navinext |
|
897 | ||
898 | @classmethod |
|
899 | def get_comic_info(cls, soup, link): |
|
900 | """Get information about a particular comics.""" |
|
901 | imgs = soup.find('div', id='comic').find_all('img') |
|
902 | assert all(i['alt'] == i['title'] for i in imgs) |
|
903 | return { |
|
904 | 'img': [i['src'] for i in imgs], |
|
905 | } |
|
906 | ||
907 |