|
@@ 648-670 (lines=23) @@
|
| 645 |
|
|
| 646 |
|
class PenelopeBagieu(GenericNavigableComic): |
| 647 |
|
"""Class to retrieve comics from Penelope Bagieu's blog.""" |
| 648 |
|
name = 'bagieu' |
| 649 |
|
long_name = 'Ma vie est tout a fait fascinante (Bagieu)' |
| 650 |
|
url = 'http://www.penelope-jolicoeur.com' |
| 651 |
|
get_navi_link = get_link_rel_next |
| 652 |
|
|
| 653 |
|
@classmethod |
| 654 |
|
def get_first_comic_link(cls): |
| 655 |
|
"""Get link to first comics.""" |
| 656 |
|
return {'href': 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'} |
| 657 |
|
|
| 658 |
|
@classmethod |
| 659 |
|
def get_comic_info(cls, soup, link): |
| 660 |
|
"""Get information about a particular comics.""" |
| 661 |
|
date_str = soup.find('h2', class_='date-header').string |
| 662 |
|
day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8") |
| 663 |
|
imgs = soup.find('div', class_='entry-body').find_all('img') |
| 664 |
|
title = soup.find('h3', class_='entry-header').string |
| 665 |
|
return { |
| 666 |
|
'title': title, |
| 667 |
|
'img': [i['src'] for i in imgs], |
| 668 |
|
'month': day.month, |
| 669 |
|
'year': day.year, |
| 670 |
|
'day': day.day, |
| 671 |
|
} |
| 672 |
|
|
| 673 |
|
|
|
@@ 620-644 (lines=25) @@
|
| 617 |
|
url = "http://itsthetie.com" |
| 618 |
|
get_first_comic_link = get_div_navfirst_a |
| 619 |
|
get_navi_link = get_a_rel_next |
| 620 |
|
|
| 621 |
|
@classmethod |
| 622 |
|
def get_comic_info(cls, soup, link): |
| 623 |
|
"""Get information about a particular comics.""" |
| 624 |
|
title = soup.find('h1', class_='comic-title').find('a').string |
| 625 |
|
date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string |
| 626 |
|
day = string_to_date(date_str, "%B %d, %Y") |
| 627 |
|
# Bonus images may or may not be in meta og:image. |
| 628 |
|
imgs = soup.find_all('meta', property='og:image') |
| 629 |
|
imgs_src = [i['content'] for i in imgs] |
| 630 |
|
bonus = soup.find_all('img', attrs={'data-oversrc': True}) |
| 631 |
|
bonus_src = [b['data-oversrc'] for b in bonus] |
| 632 |
|
all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src] |
| 633 |
|
all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")] |
| 634 |
|
tag_meta = soup.find('meta', property='article:tag') |
| 635 |
|
tags = tag_meta['content'] if tag_meta else "" |
| 636 |
|
return { |
| 637 |
|
'title': title, |
| 638 |
|
'month': day.month, |
| 639 |
|
'year': day.year, |
| 640 |
|
'day': day.day, |
| 641 |
|
'img': all_imgs_src, |
| 642 |
|
'tags': tags, |
| 643 |
|
} |
| 644 |
|
|
| 645 |
|
|
| 646 |
|
class PenelopeBagieu(GenericNavigableComic): |
| 647 |
|
"""Class to retrieve comics from Penelope Bagieu's blog.""" |
|
@@ 1660-1680 (lines=21) @@
|
| 1657 |
|
return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark')) |
| 1658 |
|
|
| 1659 |
|
@classmethod |
| 1660 |
|
def get_comic_info(cls, soup, link): |
| 1661 |
|
"""Get information about a particular comics.""" |
| 1662 |
|
date_str = soup.find('div', class_='postdate').find('em').string |
| 1663 |
|
day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y") |
| 1664 |
|
div = soup.find('div', id='comic') |
| 1665 |
|
if div: |
| 1666 |
|
img = div.find('img') |
| 1667 |
|
img_src = [img['src']] |
| 1668 |
|
alt = img['alt'] |
| 1669 |
|
assert alt == img['title'] |
| 1670 |
|
title = soup.find('meta', property='og:title')['content'] |
| 1671 |
|
else: |
| 1672 |
|
img_src = [] |
| 1673 |
|
alt = '' |
| 1674 |
|
title = '' |
| 1675 |
|
return { |
| 1676 |
|
'month': day.month, |
| 1677 |
|
'year': day.year, |
| 1678 |
|
'day': day.day, |
| 1679 |
|
'img': img_src, |
| 1680 |
|
'title': title, |
| 1681 |
|
'alt': alt, |
| 1682 |
|
'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')), |
| 1683 |
|
} |
|
@@ 882-904 (lines=23) @@
|
| 879 |
|
if not img['src'].endswith( |
| 880 |
|
('link.gif', '32.png', 'twpbookad.jpg', |
| 881 |
|
'merchad.jpg', 'header.gif', 'tipjar.jpg'))] |
| 882 |
|
return { |
| 883 |
|
'title': title.string if title else None, |
| 884 |
|
'title2': ' '.join(img.get('alt') for img in imgs if img.get('alt')), |
| 885 |
|
'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs], |
| 886 |
|
} |
| 887 |
|
|
| 888 |
|
|
| 889 |
|
class DeadlyPanel(GenericEmptyComic, GenericNavigableComic): |
| 890 |
|
"""Class to retrieve Deadly Panel comics.""" |
| 891 |
|
# Also on https://tapastic.com/series/deadlypanel |
| 892 |
|
name = 'deadly' |
| 893 |
|
long_name = 'Deadly Panel' |
| 894 |
|
url = 'http://www.deadlypanel.com' |
| 895 |
|
get_first_comic_link = get_a_navi_navifirst |
| 896 |
|
get_navi_link = get_a_navi_comicnavnext_navinext |
| 897 |
|
|
| 898 |
|
@classmethod |
| 899 |
|
def get_comic_info(cls, soup, link): |
| 900 |
|
"""Get information about a particular comics.""" |
| 901 |
|
imgs = soup.find('div', id='comic').find_all('img') |
| 902 |
|
assert all(i['alt'] == i['title'] for i in imgs) |
| 903 |
|
return { |
| 904 |
|
'img': [i['src'] for i in imgs], |
| 905 |
|
} |
| 906 |
|
|
| 907 |
|
|