|
@@ 2919-2939 (lines=21) @@
|
| 2916 |
|
li = last_soup.find('li', class_='prev' if next_ else 'next') |
| 2917 |
|
return li.find('a') if li else None |
| 2918 |
|
|
| 2919 |
|
@classmethod |
| 2920 |
|
def get_comic_info(cls, soup, link): |
| 2921 |
|
"""Get information about a particular comics.""" |
| 2922 |
|
title = soup.find('meta', property='og:title')['content'] |
| 2923 |
|
desc = soup.find('meta', property='og:description')['content'] |
| 2924 |
|
date_str = soup.find('time', class_='published')['datetime'] |
| 2925 |
|
day = string_to_date(date_str, "%Y-%m-%d") |
| 2926 |
|
author = soup.find('a', rel='author').string |
| 2927 |
|
div_content = soup.find('div', class_="body entry-content") |
| 2928 |
|
imgs = div_content.find_all('img') |
| 2929 |
|
imgs = [i for i in imgs if i.get('src') is not None] |
| 2930 |
|
alt = imgs[0]['alt'] |
| 2931 |
|
return { |
| 2932 |
|
'title': title, |
| 2933 |
|
'alt': alt, |
| 2934 |
|
'description': desc, |
| 2935 |
|
'author': author, |
| 2936 |
|
'day': day.day, |
| 2937 |
|
'month': day.month, |
| 2938 |
|
'year': day.year, |
| 2939 |
|
'img': [i['src'] for i in imgs], |
| 2940 |
|
} |
| 2941 |
|
|
| 2942 |
|
|
|
@@ 2807-2827 (lines=21) @@
|
| 2804 |
|
get_first_comic_link = get_a_navi_navifirst |
| 2805 |
|
get_navi_link = get_link_rel_next |
| 2806 |
|
|
| 2807 |
|
@classmethod |
| 2808 |
|
def get_comic_info(cls, soup, link): |
| 2809 |
|
"""Get information about a particular comics.""" |
| 2810 |
|
title = soup.find('h2', class_='post-title').string |
| 2811 |
|
short_url = soup.find('link', rel='shortlink')['href'] |
| 2812 |
|
short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url) |
| 2813 |
|
num = int(short_url_re.match(short_url).groups()[0]) |
| 2814 |
|
imgs = soup.find('div', id='comic').find_all('img') |
| 2815 |
|
alt = imgs[0]['title'] |
| 2816 |
|
assert all(i['alt'] == i['title'] == alt for i in imgs) |
| 2817 |
|
date_str = soup.find('span', class_='post-date').string |
| 2818 |
|
day = string_to_date(date_str, "%d/%m/%Y") |
| 2819 |
|
return { |
| 2820 |
|
'short_url': short_url, |
| 2821 |
|
'num': num, |
| 2822 |
|
'img': [i['src'] for i in imgs], |
| 2823 |
|
'month': day.month, |
| 2824 |
|
'year': day.year, |
| 2825 |
|
'day': day.day, |
| 2826 |
|
'alt': alt, |
| 2827 |
|
'title': title, |
| 2828 |
|
} |
| 2829 |
|
|
| 2830 |
|
|
|
@@ 2324-2343 (lines=20) @@
|
| 2321 |
|
return link |
| 2322 |
|
return None |
| 2323 |
|
|
| 2324 |
|
@classmethod |
| 2325 |
|
def get_comic_info(cls, soup, link): |
| 2326 |
|
"""Get information about a particular comics.""" |
| 2327 |
|
title = soup.find('meta', attrs={'name': 'description'})["content"] |
| 2328 |
|
description = soup.find('div', itemprop='articleBody').text |
| 2329 |
|
author = soup.find('span', itemprop='author copyrightHolder').string |
| 2330 |
|
imgs = soup.find_all('img', itemprop='image') |
| 2331 |
|
assert all(i['title'] == i['alt'] for i in imgs) |
| 2332 |
|
alt = imgs[0]['alt'] if imgs else "" |
| 2333 |
|
date_str = soup.find('time', itemprop='datePublished')["datetime"] |
| 2334 |
|
day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S") |
| 2335 |
|
return { |
| 2336 |
|
'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs], |
| 2337 |
|
'month': day.month, |
| 2338 |
|
'year': day.year, |
| 2339 |
|
'day': day.day, |
| 2340 |
|
'author': author, |
| 2341 |
|
'title': title, |
| 2342 |
|
'alt': alt, |
| 2343 |
|
'description': description, |
| 2344 |
|
} |
| 2345 |
|
|
| 2346 |
|
|