|
@@ 3105-3125 (lines=21) @@
|
| 3102 |
|
li = last_soup.find('li', class_='prev' if next_ else 'next') |
| 3103 |
|
return li.find('a') if li else None |
| 3104 |
|
|
| 3105 |
|
@classmethod |
| 3106 |
|
def get_comic_info(cls, soup, link): |
| 3107 |
|
"""Get information about a particular comics.""" |
| 3108 |
|
title = soup.find('meta', property='og:title')['content'] |
| 3109 |
|
desc = soup.find('meta', property='og:description')['content'] |
| 3110 |
|
date_str = soup.find('time', class_='published')['datetime'] |
| 3111 |
|
day = string_to_date(date_str, "%Y-%m-%d") |
| 3112 |
|
author = soup.find('a', rel='author').string |
| 3113 |
|
div_content = soup.find('div', class_="body entry-content") |
| 3114 |
|
imgs = div_content.find_all('img') |
| 3115 |
|
imgs = [i for i in imgs if i.get('src') is not None] |
| 3116 |
|
alt = imgs[0]['alt'] |
| 3117 |
|
return { |
| 3118 |
|
'title': title, |
| 3119 |
|
'alt': alt, |
| 3120 |
|
'description': desc, |
| 3121 |
|
'author': author, |
| 3122 |
|
'day': day.day, |
| 3123 |
|
'month': day.month, |
| 3124 |
|
'year': day.year, |
| 3125 |
|
'img': [i['src'] for i in imgs], |
| 3126 |
|
} |
| 3127 |
|
|
| 3128 |
|
|
|
@@ 828-846 (lines=19) @@
|
| 825 |
|
link = last_soup.find('div', class_='nav-comic nav-right' if next_ else 'nav-comic nav-left') |
| 826 |
|
return link.find('a') if link else None |
| 827 |
|
|
| 828 |
|
@classmethod |
| 829 |
|
def get_comic_info(cls, soup, link): |
| 830 |
|
"""Get information about a particular comics.""" |
| 831 |
|
title = soup.find('meta', property='og:title')['content'] |
| 832 |
|
imgs = soup.find_all('meta', property='og:image') |
| 833 |
|
desc = soup.find('meta', property='og:description')['content'] |
| 834 |
|
date_str = soup.find('meta', property='article:publish_date')['content'] |
| 835 |
|
day = string_to_date(date_str, "%B %d, %Y") |
| 836 |
|
author = soup.find('meta', property='article:author')['content'] |
| 837 |
|
tags = soup.find('meta', property='article:tag')['content'] |
| 838 |
|
return { |
| 839 |
|
'title': title, |
| 840 |
|
'description': desc, |
| 841 |
|
'img': [i['content'] for i in imgs], |
| 842 |
|
'author': author, |
| 843 |
|
'tags': tags, |
| 844 |
|
'day': day.day, |
| 845 |
|
'month': day.month, |
| 846 |
|
'year': day.year |
| 847 |
|
} |
| 848 |
|
|
| 849 |
|
|
|
@@ 2962-2982 (lines=21) @@
|
| 2959 |
|
get_first_comic_link = get_a_navi_navifirst |
| 2960 |
|
get_navi_link = get_link_rel_next |
| 2961 |
|
|
| 2962 |
|
@classmethod |
| 2963 |
|
def get_comic_info(cls, soup, link): |
| 2964 |
|
"""Get information about a particular comics.""" |
| 2965 |
|
title = soup.find('h2', class_='post-title').string |
| 2966 |
|
short_url = soup.find('link', rel='shortlink')['href'] |
| 2967 |
|
short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url) |
| 2968 |
|
num = int(short_url_re.match(short_url).groups()[0]) |
| 2969 |
|
imgs = soup.find('div', id='comic').find_all('img') |
| 2970 |
|
alt = imgs[0]['title'] |
| 2971 |
|
assert all(i['alt'] == i['title'] == alt for i in imgs) |
| 2972 |
|
date_str = soup.find('span', class_='post-date').string |
| 2973 |
|
day = string_to_date(date_str, "%d/%m/%Y") |
| 2974 |
|
return { |
| 2975 |
|
'short_url': short_url, |
| 2976 |
|
'num': num, |
| 2977 |
|
'img': [i['src'] for i in imgs], |
| 2978 |
|
'month': day.month, |
| 2979 |
|
'year': day.year, |
| 2980 |
|
'day': day.day, |
| 2981 |
|
'alt': alt, |
| 2982 |
|
'title': title, |
| 2983 |
|
} |
| 2984 |
|
|
| 2985 |
|
|