@@ 318-344 (lines=27) @@ | ||
315 | imgs = soup.find_all('img', src=img_src_re) |
|
316 | title = soup.find('h2', class_='post-title').string |
|
317 | return { |
|
318 | 'title': title, |
|
319 | 'img': [i['src'] for i in imgs], |
|
320 | 'prefix': title + '-' |
|
321 | } |
|
322 | ||
323 | ||
324 | class GenericLeMondeBlog(GenericNavigableComic): |
|
325 | """Generic class to retrieve comics from Le Monde blogs.""" |
|
326 | get_navi_link = get_link_rel_next |
|
327 | ||
328 | @classmethod |
|
329 | def get_first_comic_url(cls): |
|
330 | """Get first comic url.""" |
|
331 | raise NotImplementedError |
|
332 | ||
333 | @classmethod |
|
334 | def get_first_comic_link(cls): |
|
335 | """Get link to first comics.""" |
|
336 | return {'href': cls.get_first_comic_url()} |
|
337 | ||
338 | @classmethod |
|
339 | def get_comic_info(cls, soup, link): |
|
340 | """Get information about a particular comics.""" |
|
341 | url2 = soup.find('link', rel='shortlink')['href'] |
|
342 | title = soup.find('meta', property='og:title')['content'] |
|
343 | date_str = soup.find("span", class_="entry-date").string |
|
344 | day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8") |
|
345 | imgs = soup.find_all('meta', property='og:image') |
|
346 | return { |
|
347 | 'title': title, |
|
@@ 2821-2840 (lines=20) @@ | ||
2818 | 'alt': alt, |
|
2819 | 'title': title, |
|
2820 | } |
|
2821 | ||
2822 | ||
2823 | class MoonBeard(GenericNavigableComic): |
|
2824 | """Class to retrieve MoonBeard comics.""" |
|
2825 | # Also on http://blog.squiresjam.es/moonbeard |
|
2826 | # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471 |
|
2827 | name = 'moonbeard' |
|
2828 | long_name = 'Moon Beard' |
|
2829 | url = 'http://moonbeard.com' |
|
2830 | get_first_comic_link = get_a_navi_navifirst |
|
2831 | get_navi_link = get_a_navi_navinext |
|
2832 | ||
2833 | @classmethod |
|
2834 | def get_comic_info(cls, soup, link): |
|
2835 | """Get information about a particular comics.""" |
|
2836 | title = soup.find('h2', class_='post-title').string |
|
2837 | short_url = soup.find('link', rel='shortlink')['href'] |
|
2838 | short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url) |
|
2839 | num = int(short_url_re.match(short_url).groups()[0]) |
|
2840 | imgs = soup.find('div', id='comic').find_all('img') |
|
2841 | alt = imgs[0]['title'] |
|
2842 | assert all(i['alt'] == i['title'] == alt for i in imgs) |
|
2843 | date_str = soup.find('span', class_='post-date').string |