|
@@ 1698-1716 (lines=19) @@
|
| 1695 |
|
assert all(i['alt'] == i['title'] for i in imgs) |
| 1696 |
|
alt = imgs[0]['alt'] |
| 1697 |
|
return { |
| 1698 |
|
'img': [i['src'] for i in imgs], |
| 1699 |
|
'title': title, |
| 1700 |
|
'alt': alt, |
| 1701 |
|
} |
| 1702 |
|
|
| 1703 |
|
|
| 1704 |
|
class MouseBearComedy(GenericNavigableComic): |
| 1705 |
|
"""Class to retrieve Mouse Bear Comedy comics.""" |
| 1706 |
|
# Also on http://mousebearcomedy.tumblr.com |
| 1707 |
|
name = 'mousebear' |
| 1708 |
|
long_name = 'Mouse Bear Comedy' |
| 1709 |
|
url = 'http://www.mousebearcomedy.com' |
| 1710 |
|
get_first_comic_link = get_a_navi_navifirst |
| 1711 |
|
get_navi_link = get_a_navi_comicnavnext_navinext |
| 1712 |
|
|
| 1713 |
|
@classmethod |
| 1714 |
|
def get_comic_info(cls, soup, link): |
| 1715 |
|
"""Get information about a particular comics.""" |
| 1716 |
|
title = soup.find('h2', class_='post-title').string |
| 1717 |
|
author = soup.find("span", class_="post-author").find("a").string |
| 1718 |
|
date_str = soup.find("span", class_="post-date").string |
| 1719 |
|
day = string_to_date(date_str, '%B %d, %Y') |
|
@@ 974-991 (lines=18) @@
|
| 971 |
|
"""Class to retrieve Perry Bible Fellowship comics.""" |
| 972 |
|
name = 'pbf' |
| 973 |
|
long_name = 'Perry Bible Fellowship' |
| 974 |
|
url = 'http://pbfcomics.com' |
| 975 |
|
get_url_from_archive_element = join_cls_url_to_href |
| 976 |
|
|
| 977 |
|
@classmethod |
| 978 |
|
def get_archive_elements(cls): |
| 979 |
|
comic_link_re = re.compile('^/[0-9]*/$') |
| 980 |
|
return reversed(get_soup_at_url(cls.url).find_all('a', href=comic_link_re)) |
| 981 |
|
|
| 982 |
|
@classmethod |
| 983 |
|
def get_comic_info(cls, soup, link): |
| 984 |
|
"""Get information about a particular comics.""" |
| 985 |
|
url = cls.get_url_from_archive_element(link) |
| 986 |
|
comic_img_re = re.compile('^/archive_b/PBF.*') |
| 987 |
|
name = link.string |
| 988 |
|
num = int(link['name']) |
| 989 |
|
href = link['href'] |
| 990 |
|
assert href == '/%d/' % num |
| 991 |
|
imgs = soup.find_all('img', src=comic_img_re) |
| 992 |
|
assert len(imgs) == 1 |
| 993 |
|
assert imgs[0]['alt'] == name |
| 994 |
|
return { |
|
@@ 487-501 (lines=15) @@
|
| 484 |
|
url = 'http://information.tv5monde.com/dilem' |
| 485 |
|
get_url_from_link = join_cls_url_to_href |
| 486 |
|
|
| 487 |
|
@classmethod |
| 488 |
|
def get_first_comic_link(cls): |
| 489 |
|
"""Get link to first comics.""" |
| 490 |
|
return {'href': "http://information.tv5monde.com/dilem/2004-06-26"} |
| 491 |
|
|
| 492 |
|
@classmethod |
| 493 |
|
def get_navi_link(cls, last_soup, next_): |
| 494 |
|
# prev is next / next is prev |
| 495 |
|
li = last_soup.find('li', class_='prev' if next_ else 'next') |
| 496 |
|
return li.find('a') if li else None |
| 497 |
|
|
| 498 |
|
@classmethod |
| 499 |
|
def get_comic_info(cls, soup, link): |
| 500 |
|
"""Get information about a particular comics.""" |
| 501 |
|
short_url = soup.find('link', rel='shortlink')['href'] |
| 502 |
|
title = soup.find('meta', attrs={'name': 'twitter:title'})['content'] |
| 503 |
|
imgs = soup.find_all('meta', property='og:image') |
| 504 |
|
date_str = soup.find('span', property='dc:date')['content'] |