|
@@ 603-625 (lines=23) @@
|
| 600 |
|
day = string_to_date(date_str, "%B %d, %Y") |
| 601 |
|
# Bonus images may or may not be in meta og:image. |
| 602 |
|
imgs = soup.find_all('meta', property='og:image') |
| 603 |
|
imgs_src = [i['content'] for i in imgs] |
| 604 |
|
bonus = soup.find_all('img', attrs={'data-oversrc': True}) |
| 605 |
|
bonus_src = [b['data-oversrc'] for b in bonus] |
| 606 |
|
all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src] |
| 607 |
|
all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")] |
| 608 |
|
tag_meta = soup.find('meta', property='article:tag') |
| 609 |
|
tags = tag_meta['content'] if tag_meta else "" |
| 610 |
|
return { |
| 611 |
|
'title': title, |
| 612 |
|
'month': day.month, |
| 613 |
|
'year': day.year, |
| 614 |
|
'day': day.day, |
| 615 |
|
'img': all_imgs_src, |
| 616 |
|
'tags': tags, |
| 617 |
|
} |
| 618 |
|
|
| 619 |
|
|
| 620 |
|
class PenelopeBagieu(GenericNavigableComic): |
| 621 |
|
"""Class to retrieve comics from Penelope Bagieu's blog.""" |
| 622 |
|
name = 'bagieu' |
| 623 |
|
long_name = 'Ma vie est tout a fait fascinante (Bagieu)' |
| 624 |
|
url = 'http://www.penelope-jolicoeur.com' |
| 625 |
|
get_navi_link = get_link_rel_next |
| 626 |
|
|
| 627 |
|
@classmethod |
| 628 |
|
def get_first_comic_link(cls): |
|
@@ 851-872 (lines=22) @@
|
| 848 |
|
@classmethod |
| 849 |
|
def get_comic_info(cls, soup, link): |
| 850 |
|
"""Get information about a particular comics.""" |
| 851 |
|
title = soup.find('title') |
| 852 |
|
imgs = [img for img in soup.find_all('img') |
| 853 |
|
if not img['src'].endswith( |
| 854 |
|
('link.gif', '32.png', 'twpbookad.jpg', |
| 855 |
|
'merchad.jpg', 'header.gif', 'tipjar.jpg'))] |
| 856 |
|
return { |
| 857 |
|
'title': title.string if title else None, |
| 858 |
|
'title2': ' '.join(img.get('alt') for img in imgs if img.get('alt')), |
| 859 |
|
'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs], |
| 860 |
|
} |
| 861 |
|
|
| 862 |
|
|
| 863 |
|
class DeadlyPanel(GenericEmptyComic, GenericNavigableComic): |
| 864 |
|
"""Class to retrieve Deadly Panel comics.""" |
| 865 |
|
# Also on https://tapastic.com/series/deadlypanel |
| 866 |
|
name = 'deadly' |
| 867 |
|
long_name = 'Deadly Panel' |
| 868 |
|
url = 'http://www.deadlypanel.com' |
| 869 |
|
get_first_comic_link = get_a_navi_navifirst |
| 870 |
|
get_navi_link = get_a_navi_comicnavnext_navinext |
| 871 |
|
|
| 872 |
|
@classmethod |
| 873 |
|
def get_comic_info(cls, soup, link): |
| 874 |
|
"""Get information about a particular comics.""" |
| 875 |
|
imgs = soup.find('div', id='comic').find_all('img') |
|
@@ 629-650 (lines=22) @@
|
| 626 |
|
|
| 627 |
|
@classmethod |
| 628 |
|
def get_first_comic_link(cls): |
| 629 |
|
"""Get link to first comics.""" |
| 630 |
|
return {'href': 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'} |
| 631 |
|
|
| 632 |
|
@classmethod |
| 633 |
|
def get_comic_info(cls, soup, link): |
| 634 |
|
"""Get information about a particular comics.""" |
| 635 |
|
date_str = soup.find('h2', class_='date-header').string |
| 636 |
|
day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8") |
| 637 |
|
imgs = soup.find('div', class_='entry-body').find_all('img') |
| 638 |
|
title = soup.find('h3', class_='entry-header').string |
| 639 |
|
return { |
| 640 |
|
'title': title, |
| 641 |
|
'img': [i['src'] for i in imgs], |
| 642 |
|
'month': day.month, |
| 643 |
|
'year': day.year, |
| 644 |
|
'day': day.day, |
| 645 |
|
} |
| 646 |
|
|
| 647 |
|
|
| 648 |
|
class OneOneOneOneComic(GenericNavigableComic): |
| 649 |
|
"""Class to retrieve 1111 Comics.""" |
| 650 |
|
# Also on http://comics1111.tumblr.com |
| 651 |
|
# Also on https://tapastic.com/series/1111-Comics |
| 652 |
|
name = '1111' |
| 653 |
|
long_name = '1111 Comics' |
|
@@ 654-674 (lines=21) @@
|
| 651 |
|
# Also on https://tapastic.com/series/1111-Comics |
| 652 |
|
name = '1111' |
| 653 |
|
long_name = '1111 Comics' |
| 654 |
|
url = 'http://www.1111comics.me' |
| 655 |
|
get_first_comic_link = get_div_navfirst_a |
| 656 |
|
get_navi_link = get_link_rel_next |
| 657 |
|
|
| 658 |
|
@classmethod |
| 659 |
|
def get_comic_info(cls, soup, link): |
| 660 |
|
"""Get information about a particular comics.""" |
| 661 |
|
title = soup.find('h1', class_='comic-title').find('a').string |
| 662 |
|
date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string |
| 663 |
|
day = string_to_date(date_str, "%B %d, %Y") |
| 664 |
|
imgs = soup.find_all('meta', property='og:image') |
| 665 |
|
return { |
| 666 |
|
'title': title, |
| 667 |
|
'month': day.month, |
| 668 |
|
'year': day.year, |
| 669 |
|
'day': day.day, |
| 670 |
|
'img': [i['content'] for i in imgs], |
| 671 |
|
} |
| 672 |
|
|
| 673 |
|
|
| 674 |
|
class AngryAtNothing(GenericNavigableComic): |
| 675 |
|
"""Class to retrieve Angry at Nothing comics.""" |
| 676 |
|
# Also on http://tapastic.com/series/Comics-yeah-definitely-comics- |
| 677 |
|
name = 'angry' |
|
@@ 2490-2509 (lines=20) @@
|
| 2487 |
|
imgs = soup.find("div", id="comic").find_all("img") |
| 2488 |
|
assert all(i['alt'] == i['title'] for i in imgs) |
| 2489 |
|
assert len(imgs) <= 1 |
| 2490 |
|
alt = imgs[0]['alt'] if imgs else "" |
| 2491 |
|
return { |
| 2492 |
|
'img': [i['src'] for i in imgs], |
| 2493 |
|
'title': title, |
| 2494 |
|
'alt': alt, |
| 2495 |
|
'author': author, |
| 2496 |
|
'day': day.day, |
| 2497 |
|
'month': day.month, |
| 2498 |
|
'year': day.year |
| 2499 |
|
} |
| 2500 |
|
|
| 2501 |
|
|
| 2502 |
|
class LastPlaceComics(GenericNavigableComic): |
| 2503 |
|
"""Class to retrieve Last Place Comics.""" |
| 2504 |
|
name = 'lastplace' |
| 2505 |
|
long_name = 'LastPlaceComics' |
| 2506 |
|
url = "http://lastplacecomics.com" |
| 2507 |
|
get_first_comic_link = get_a_comicnavbase_comicnavfirst |
| 2508 |
|
get_navi_link = get_link_rel_next |
| 2509 |
|
|
| 2510 |
|
@classmethod |
| 2511 |
|
def get_comic_info(cls, soup, link): |
| 2512 |
|
"""Get information about a particular comics.""" |
|
@@ 1594-1613 (lines=20) @@
|
| 1591 |
|
long_name = 'Something Of That Ilk' |
| 1592 |
|
url = 'http://www.somethingofthatilk.com' |
| 1593 |
|
|
| 1594 |
|
|
| 1595 |
|
class InfiniteMonkeyBusiness(GenericNavigableComic): |
| 1596 |
|
"""Generic class to retrieve InfiniteMonkeyBusiness comics.""" |
| 1597 |
|
name = 'monkey' |
| 1598 |
|
long_name = 'Infinite Monkey Business' |
| 1599 |
|
url = 'http://infinitemonkeybusiness.net' |
| 1600 |
|
get_navi_link = get_a_navi_comicnavnext_navinext |
| 1601 |
|
|
| 1602 |
|
@classmethod |
| 1603 |
|
def get_first_comic_link(cls): |
| 1604 |
|
"""Get link to first comics.""" |
| 1605 |
|
return {'href': 'http://infinitemonkeybusiness.net/comic/pillory/'} |
| 1606 |
|
|
| 1607 |
|
@classmethod |
| 1608 |
|
def get_comic_info(cls, soup, link): |
| 1609 |
|
"""Get information about a particular comics.""" |
| 1610 |
|
title = soup.find('meta', property='og:title')['content'] |
| 1611 |
|
imgs = soup.find('div', id='comic').find_all('img') |
| 1612 |
|
return { |
| 1613 |
|
'title': title, |
| 1614 |
|
'img': [i['src'] for i in imgs], |
| 1615 |
|
} |
| 1616 |
|
|