|
@@ 2893-2929 (lines=37) @@
|
| 2890 |
|
date_str = soup.find('time', class_='published')['datetime'] |
| 2891 |
|
day = string_to_date(date_str, "%Y-%m-%d") |
| 2892 |
|
author = soup.find('a', rel='author').string |
| 2893 |
|
div_content = soup.find('div', class_="body entry-content") |
| 2894 |
|
imgs = div_content.find_all('img') |
| 2895 |
|
imgs = [i for i in imgs if i.get('src') is not None] |
| 2896 |
|
alt = imgs[0]['alt'] |
| 2897 |
|
return { |
| 2898 |
|
'title': title, |
| 2899 |
|
'alt': alt, |
| 2900 |
|
'description': desc, |
| 2901 |
|
'author': author, |
| 2902 |
|
'day': day.day, |
| 2903 |
|
'month': day.month, |
| 2904 |
|
'year': day.year, |
| 2905 |
|
'img': [i['src'] for i in imgs], |
| 2906 |
|
} |
| 2907 |
|
|
| 2908 |
|
|
| 2909 |
|
class GenericWordPressInkblot(GenericNavigableComic): |
| 2910 |
|
"""Generic class to retrieve comics using WordPress with Inkblot.""" |
| 2911 |
|
get_navi_link = get_link_rel_next |
| 2912 |
|
|
| 2913 |
|
@classmethod |
| 2914 |
|
def get_first_comic_link(cls): |
| 2915 |
|
"""Get link to first comics.""" |
| 2916 |
|
return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link') |
| 2917 |
|
|
| 2918 |
|
@classmethod |
| 2919 |
|
def get_comic_info(cls, soup, link): |
| 2920 |
|
"""Get information about a particular comics.""" |
| 2921 |
|
title = soup.find('meta', property='og:title')['content'] |
| 2922 |
|
imgs = soup.find('div', class_='webcomic-image').find_all('img') |
| 2923 |
|
date_str = soup.find('meta', property='article:published_time')['content'][:10] |
| 2924 |
|
day = string_to_date(date_str, "%Y-%m-%d") |
| 2925 |
|
return { |
| 2926 |
|
'title': title, |
| 2927 |
|
'day': day.day, |
| 2928 |
|
'month': day.month, |
| 2929 |
|
'year': day.year, |
| 2930 |
|
'img': [i['src'] for i in imgs], |
| 2931 |
|
} |
| 2932 |
|
|
|
@@ 781-815 (lines=35) @@
|
| 778 |
|
imgs = soup.find_all('meta', property='og:image') |
| 779 |
|
desc = soup.find('meta', property='og:description')['content'] |
| 780 |
|
date_str = soup.find('meta', property='article:publish_date')['content'] |
| 781 |
|
day = string_to_date(date_str, "%B %d, %Y") |
| 782 |
|
author = soup.find('meta', property='article:author')['content'] |
| 783 |
|
tags = soup.find('meta', property='article:tag')['content'] |
| 784 |
|
return { |
| 785 |
|
'title': title, |
| 786 |
|
'description': desc, |
| 787 |
|
'img': [i['content'] for i in imgs], |
| 788 |
|
'author': author, |
| 789 |
|
'tags': tags, |
| 790 |
|
'day': day.day, |
| 791 |
|
'month': day.month, |
| 792 |
|
'year': day.year |
| 793 |
|
} |
| 794 |
|
|
| 795 |
|
|
| 796 |
|
class VictimsOfCircumsolar(GenericNavigableComic): |
| 797 |
|
"""Class to retrieve VictimsOfCircumsolar comics.""" |
| 798 |
|
name = 'circumsolar' |
| 799 |
|
long_name = 'Victims Of Circumsolar' |
| 800 |
|
url = 'http://www.victimsofcircumsolar.com' |
| 801 |
|
get_navi_link = get_a_navi_comicnavnext_navinext |
| 802 |
|
get_first_comic_link = simulate_first_link |
| 803 |
|
first_url = 'http://www.victimsofcircumsolar.com/comic/modern-addiction' |
| 804 |
|
|
| 805 |
|
@classmethod |
| 806 |
|
def get_comic_info(cls, soup, link): |
| 807 |
|
"""Get information about a particular comics.""" |
| 808 |
|
# Date is on the archive page |
| 809 |
|
title = soup.find_all('meta', property='og:title')[-1]['content'] |
| 810 |
|
desc = soup.find_all('meta', property='og:description')[-1]['content'] |
| 811 |
|
imgs = soup.find('div', id='comic').find_all('img') |
| 812 |
|
assert all(i['title'] == i['alt'] == title for i in imgs) |
| 813 |
|
return { |
| 814 |
|
'title': title, |
| 815 |
|
'description': desc, |
| 816 |
|
'img': [i['src'] for i in imgs], |
| 817 |
|
} |
| 818 |
|
|