@@ 4852-4866 (lines=15) @@ | ||
4849 | gocomics = 'http://www.gocomics.com' |
|
4850 | return urljoin_wrapper(gocomics, link['href']) |
|
4851 | ||
4852 | @classmethod |
|
4853 | def get_comic_info(cls, soup, link): |
|
4854 | """Get information about a particular comics.""" |
|
4855 | date_str = soup.find('meta', property='article:published_time')['content'] |
|
4856 | day = string_to_date(date_str, "%Y-%m-%d") |
|
4857 | imgs = soup.find('picture', class_='img-fluid item-comic-image').find_all('img') |
|
4858 | author = soup.find('meta', property='article:author')['content'] |
|
4859 | tags = soup.find('meta', property='article:tag')['content'] |
|
4860 | return { |
|
4861 | 'day': day.day, |
|
4862 | 'month': day.month, |
|
4863 | 'year': day.year, |
|
4864 | 'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs], |
|
4865 | 'author': author, |
|
4866 | 'tags': tags, |
|
4867 | } |
|
4868 | ||
4869 | ||
@@ 441-455 (lines=15) @@ | ||
438 | get_first_comic_link = simulate_first_link |
|
439 | first_url = NotImplemented |
|
440 | ||
441 | @classmethod |
|
442 | def get_comic_info(cls, soup, link): |
|
443 | """Get information about a particular comics.""" |
|
444 | url2 = soup.find('link', rel='shortlink')['href'] |
|
445 | title = soup.find('meta', property='og:title')['content'] |
|
446 | date_str = soup.find("span", class_="entry-date").string |
|
447 | day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8") |
|
448 | imgs = soup.find_all('meta', property='og:image') |
|
449 | return { |
|
450 | 'title': title, |
|
451 | 'url2': url2, |
|
452 | 'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs], |
|
453 | 'month': day.month, |
|
454 | 'year': day.year, |
|
455 | 'day': day.day, |
|
456 | } |
|
457 | ||
458 | ||
@@ 416-430 (lines=15) @@ | ||
413 | get_first_comic_link = simulate_first_link |
|
414 | first_url = 'http://extrafabulouscomics.com/comic/buttfly/' |
|
415 | ||
416 | @classmethod |
|
417 | def get_comic_info(cls, soup, link): |
|
418 | """Get information about a particular comics.""" |
|
419 | img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url) |
|
420 | imgs = soup.find_all('img', src=img_src_re) |
|
421 | title = soup.find('meta', property='og:title')['content'] |
|
422 | date_str = soup.find('meta', property='article:published_time')['content'][:10] |
|
423 | day = string_to_date(date_str, "%Y-%m-%d") |
|
424 | return { |
|
425 | 'title': title, |
|
426 | 'img': [i['src'] for i in imgs], |
|
427 | 'month': day.month, |
|
428 | 'year': day.year, |
|
429 | 'day': day.day, |
|
430 | 'prefix': title + '-' |
|
431 | } |
|
432 | ||
433 | ||
@@ 1016-1029 (lines=14) @@ | ||
1013 | """Get link to first comics.""" |
|
1014 | return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link') |
|
1015 | ||
1016 | @classmethod |
|
1017 | def get_comic_info(cls, soup, link): |
|
1018 | """Get information about a particular comics.""" |
|
1019 | title = soup.find("h1", class_="comic_title").string |
|
1020 | date_str = soup.find("span", class_="comic_date").string |
|
1021 | day = string_to_date(date_str, "%B %d, %Y") |
|
1022 | imgs = soup.find_all("img", class_="comic") |
|
1023 | assert all(i['alt'] == i['title'] == title for i in imgs) |
|
1024 | return { |
|
1025 | 'title': title, |
|
1026 | 'img': [i['src'] for i in imgs if i["src"]], |
|
1027 | 'day': day.day, |
|
1028 | 'month': day.month, |
|
1029 | 'year': day.year |
|
1030 | } |
|
1031 | ||
1032 | ||
@@ 3089-3101 (lines=13) @@ | ||
3086 | """Get link to first comics.""" |
|
3087 | return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link') |
|
3088 | ||
3089 | @classmethod |
|
3090 | def get_comic_info(cls, soup, link): |
|
3091 | """Get information about a particular comics.""" |
|
3092 | title = soup.find('meta', property='og:title')['content'] |
|
3093 | imgs = soup.find('div', class_='webcomic-image').find_all('img') |
|
3094 | date_str = soup.find('meta', property='article:published_time')['content'][:10] |
|
3095 | day = string_to_date(date_str, "%Y-%m-%d") |
|
3096 | return { |
|
3097 | 'title': title, |
|
3098 | 'day': day.day, |
|
3099 | 'month': day.month, |
|
3100 | 'year': day.year, |
|
3101 | 'img': [i['src'] for i in imgs], |
|
3102 | } |
|
3103 | ||
3104 | ||
@@ 2324-2336 (lines=13) @@ | ||
2321 | articles = get_soup_at_url(cls.url).find_all('article', class_='li post') |
|
2322 | return [art.find('a') for art in reversed(articles)] |
|
2323 | ||
2324 | @classmethod |
|
2325 | def get_comic_info(cls, soup, archive_elt): |
|
2326 | """Get information about a particular comics.""" |
|
2327 | date_str = soup.find('meta', property='og:article:published_time')['content'][:10] |
|
2328 | day = string_to_date(date_str, "%Y-%m-%d") |
|
2329 | title = soup.find('h3', class_='p-post-title').string |
|
2330 | imgs = soup.find('section', class_='post-content').find_all('img') |
|
2331 | return { |
|
2332 | 'title': title, |
|
2333 | 'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs], |
|
2334 | 'month': day.month, |
|
2335 | 'year': day.year, |
|
2336 | 'day': day.day, |
|
2337 | } |
|
2338 | ||
2339 | ||
@@ 2793-2804 (lines=12) @@ | ||
2790 | get_first_comic_link = simulate_first_link |
|
2791 | first_url = NotImplemented |
|
2792 | ||
2793 | @classmethod |
|
2794 | def get_comic_info(cls, soup, link): |
|
2795 | """Get information about a particular comics.""" |
|
2796 | desc = soup.find('meta', property='og:description')['content'] |
|
2797 | title = soup.find('meta', property='og:title')['content'] |
|
2798 | imgs = soup.find('div', class_='entry-content').find_all('img') |
|
2799 | title2 = ' '.join(i.get('title', '') for i in imgs) |
|
2800 | return { |
|
2801 | 'title': title, |
|
2802 | 'title2': title2, |
|
2803 | 'description': desc, |
|
2804 | 'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs], |
|
2805 | } |
|
2806 | ||
2807 |