|
@@ 4852-4866 (lines=15) @@
|
| 4849 |
|
gocomics = 'http://www.gocomics.com' |
| 4850 |
|
return urljoin_wrapper(gocomics, link['href']) |
| 4851 |
|
|
| 4852 |
|
@classmethod |
| 4853 |
|
def get_comic_info(cls, soup, link): |
| 4854 |
|
"""Get information about a particular comics.""" |
| 4855 |
|
date_str = soup.find('meta', property='article:published_time')['content'] |
| 4856 |
|
day = string_to_date(date_str, "%Y-%m-%d") |
| 4857 |
|
imgs = soup.find('picture', class_='img-fluid item-comic-image').find_all('img') |
| 4858 |
|
author = soup.find('meta', property='article:author')['content'] |
| 4859 |
|
tags = soup.find('meta', property='article:tag')['content'] |
| 4860 |
|
return { |
| 4861 |
|
'day': day.day, |
| 4862 |
|
'month': day.month, |
| 4863 |
|
'year': day.year, |
| 4864 |
|
'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs], |
| 4865 |
|
'author': author, |
| 4866 |
|
'tags': tags, |
| 4867 |
|
} |
| 4868 |
|
|
| 4869 |
|
|
|
@@ 441-455 (lines=15) @@
|
| 438 |
|
get_first_comic_link = simulate_first_link |
| 439 |
|
first_url = NotImplemented |
| 440 |
|
|
| 441 |
|
@classmethod |
| 442 |
|
def get_comic_info(cls, soup, link): |
| 443 |
|
"""Get information about a particular comics.""" |
| 444 |
|
url2 = soup.find('link', rel='shortlink')['href'] |
| 445 |
|
title = soup.find('meta', property='og:title')['content'] |
| 446 |
|
date_str = soup.find("span", class_="entry-date").string |
| 447 |
|
day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8") |
| 448 |
|
imgs = soup.find_all('meta', property='og:image') |
| 449 |
|
return { |
| 450 |
|
'title': title, |
| 451 |
|
'url2': url2, |
| 452 |
|
'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs], |
| 453 |
|
'month': day.month, |
| 454 |
|
'year': day.year, |
| 455 |
|
'day': day.day, |
| 456 |
|
} |
| 457 |
|
|
| 458 |
|
|
|
@@ 416-430 (lines=15) @@
|
| 413 |
|
get_first_comic_link = simulate_first_link |
| 414 |
|
first_url = 'http://extrafabulouscomics.com/comic/buttfly/' |
| 415 |
|
|
| 416 |
|
@classmethod |
| 417 |
|
def get_comic_info(cls, soup, link): |
| 418 |
|
"""Get information about a particular comics.""" |
| 419 |
|
img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url) |
| 420 |
|
imgs = soup.find_all('img', src=img_src_re) |
| 421 |
|
title = soup.find('meta', property='og:title')['content'] |
| 422 |
|
date_str = soup.find('meta', property='article:published_time')['content'][:10] |
| 423 |
|
day = string_to_date(date_str, "%Y-%m-%d") |
| 424 |
|
return { |
| 425 |
|
'title': title, |
| 426 |
|
'img': [i['src'] for i in imgs], |
| 427 |
|
'month': day.month, |
| 428 |
|
'year': day.year, |
| 429 |
|
'day': day.day, |
| 430 |
|
'prefix': title + '-' |
| 431 |
|
} |
| 432 |
|
|
| 433 |
|
|
|
@@ 1016-1029 (lines=14) @@
|
| 1013 |
|
"""Get link to first comics.""" |
| 1014 |
|
return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link') |
| 1015 |
|
|
| 1016 |
|
@classmethod |
| 1017 |
|
def get_comic_info(cls, soup, link): |
| 1018 |
|
"""Get information about a particular comics.""" |
| 1019 |
|
title = soup.find("h1", class_="comic_title").string |
| 1020 |
|
date_str = soup.find("span", class_="comic_date").string |
| 1021 |
|
day = string_to_date(date_str, "%B %d, %Y") |
| 1022 |
|
imgs = soup.find_all("img", class_="comic") |
| 1023 |
|
assert all(i['alt'] == i['title'] == title for i in imgs) |
| 1024 |
|
return { |
| 1025 |
|
'title': title, |
| 1026 |
|
'img': [i['src'] for i in imgs if i["src"]], |
| 1027 |
|
'day': day.day, |
| 1028 |
|
'month': day.month, |
| 1029 |
|
'year': day.year |
| 1030 |
|
} |
| 1031 |
|
|
| 1032 |
|
|
|
@@ 3089-3101 (lines=13) @@
|
| 3086 |
|
"""Get link to first comics.""" |
| 3087 |
|
return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link') |
| 3088 |
|
|
| 3089 |
|
@classmethod |
| 3090 |
|
def get_comic_info(cls, soup, link): |
| 3091 |
|
"""Get information about a particular comics.""" |
| 3092 |
|
title = soup.find('meta', property='og:title')['content'] |
| 3093 |
|
imgs = soup.find('div', class_='webcomic-image').find_all('img') |
| 3094 |
|
date_str = soup.find('meta', property='article:published_time')['content'][:10] |
| 3095 |
|
day = string_to_date(date_str, "%Y-%m-%d") |
| 3096 |
|
return { |
| 3097 |
|
'title': title, |
| 3098 |
|
'day': day.day, |
| 3099 |
|
'month': day.month, |
| 3100 |
|
'year': day.year, |
| 3101 |
|
'img': [i['src'] for i in imgs], |
| 3102 |
|
} |
| 3103 |
|
|
| 3104 |
|
|
|
@@ 2324-2336 (lines=13) @@
|
| 2321 |
|
articles = get_soup_at_url(cls.url).find_all('article', class_='li post') |
| 2322 |
|
return [art.find('a') for art in reversed(articles)] |
| 2323 |
|
|
| 2324 |
|
@classmethod |
| 2325 |
|
def get_comic_info(cls, soup, archive_elt): |
| 2326 |
|
"""Get information about a particular comics.""" |
| 2327 |
|
date_str = soup.find('meta', property='og:article:published_time')['content'][:10] |
| 2328 |
|
day = string_to_date(date_str, "%Y-%m-%d") |
| 2329 |
|
title = soup.find('h3', class_='p-post-title').string |
| 2330 |
|
imgs = soup.find('section', class_='post-content').find_all('img') |
| 2331 |
|
return { |
| 2332 |
|
'title': title, |
| 2333 |
|
'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs], |
| 2334 |
|
'month': day.month, |
| 2335 |
|
'year': day.year, |
| 2336 |
|
'day': day.day, |
| 2337 |
|
} |
| 2338 |
|
|
| 2339 |
|
|
|
@@ 2793-2804 (lines=12) @@
|
| 2790 |
|
get_first_comic_link = simulate_first_link |
| 2791 |
|
first_url = NotImplemented |
| 2792 |
|
|
| 2793 |
|
@classmethod |
| 2794 |
|
def get_comic_info(cls, soup, link): |
| 2795 |
|
"""Get information about a particular comics.""" |
| 2796 |
|
desc = soup.find('meta', property='og:description')['content'] |
| 2797 |
|
title = soup.find('meta', property='og:title')['content'] |
| 2798 |
|
imgs = soup.find('div', class_='entry-content').find_all('img') |
| 2799 |
|
title2 = ' '.join(i.get('title', '') for i in imgs) |
| 2800 |
|
return { |
| 2801 |
|
'title': title, |
| 2802 |
|
'title2': title2, |
| 2803 |
|
'description': desc, |
| 2804 |
|
'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs], |
| 2805 |
|
} |
| 2806 |
|
|
| 2807 |
|
|