|
@@ 318-344 (lines=27) @@
|
| 315 |
|
imgs = soup.find_all('img', src=img_src_re) |
| 316 |
|
title = soup.find('h2', class_='post-title').string |
| 317 |
|
return { |
| 318 |
|
'title': title, |
| 319 |
|
'img': [i['src'] for i in imgs], |
| 320 |
|
'prefix': title + '-' |
| 321 |
|
} |
| 322 |
|
|
| 323 |
|
|
| 324 |
|
class GenericLeMondeBlog(GenericNavigableComic): |
| 325 |
|
"""Generic class to retrieve comics from Le Monde blogs.""" |
| 326 |
|
get_navi_link = get_link_rel_next |
| 327 |
|
|
| 328 |
|
@classmethod |
| 329 |
|
def get_first_comic_url(cls): |
| 330 |
|
"""Get first comic url.""" |
| 331 |
|
raise NotImplementedError |
| 332 |
|
|
| 333 |
|
@classmethod |
| 334 |
|
def get_first_comic_link(cls): |
| 335 |
|
"""Get link to first comics.""" |
| 336 |
|
return {'href': cls.get_first_comic_url()} |
| 337 |
|
|
| 338 |
|
@classmethod |
| 339 |
|
def get_comic_info(cls, soup, link): |
| 340 |
|
"""Get information about a particular comics.""" |
| 341 |
|
url2 = soup.find('link', rel='shortlink')['href'] |
| 342 |
|
title = soup.find('meta', property='og:title')['content'] |
| 343 |
|
date_str = soup.find("span", class_="entry-date").string |
| 344 |
|
day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8") |
| 345 |
|
imgs = soup.find_all('meta', property='og:image') |
| 346 |
|
return { |
| 347 |
|
'title': title, |
|
@@ 2821-2840 (lines=20) @@
|
| 2818 |
|
'alt': alt, |
| 2819 |
|
'title': title, |
| 2820 |
|
} |
| 2821 |
|
|
| 2822 |
|
|
| 2823 |
|
class MoonBeard(GenericNavigableComic): |
| 2824 |
|
"""Class to retrieve MoonBeard comics.""" |
| 2825 |
|
# Also on http://blog.squiresjam.es/moonbeard |
| 2826 |
|
# Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471 |
| 2827 |
|
name = 'moonbeard' |
| 2828 |
|
long_name = 'Moon Beard' |
| 2829 |
|
url = 'http://moonbeard.com' |
| 2830 |
|
get_first_comic_link = get_a_navi_navifirst |
| 2831 |
|
get_navi_link = get_a_navi_navinext |
| 2832 |
|
|
| 2833 |
|
@classmethod |
| 2834 |
|
def get_comic_info(cls, soup, link): |
| 2835 |
|
"""Get information about a particular comics.""" |
| 2836 |
|
title = soup.find('h2', class_='post-title').string |
| 2837 |
|
short_url = soup.find('link', rel='shortlink')['href'] |
| 2838 |
|
short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url) |
| 2839 |
|
num = int(short_url_re.match(short_url).groups()[0]) |
| 2840 |
|
imgs = soup.find('div', id='comic').find_all('img') |
| 2841 |
|
alt = imgs[0]['title'] |
| 2842 |
|
assert all(i['alt'] == i['title'] == alt for i in imgs) |
| 2843 |
|
date_str = soup.find('span', class_='post-date').string |