|
@@ 2947-2967 (lines=21) @@
|
| 2944 |
|
li = last_soup.find('li', class_='prev' if next_ else 'next') |
| 2945 |
|
return li.find('a') if li else None |
| 2946 |
|
|
| 2947 |
|
@classmethod |
| 2948 |
|
def get_comic_info(cls, soup, link): |
| 2949 |
|
"""Get information about a particular comics.""" |
| 2950 |
|
title = soup.find('meta', property='og:title')['content'] |
| 2951 |
|
desc = soup.find('meta', property='og:description')['content'] |
| 2952 |
|
date_str = soup.find('time', class_='published')['datetime'] |
| 2953 |
|
day = string_to_date(date_str, "%Y-%m-%d") |
| 2954 |
|
author = soup.find('a', rel='author').string |
| 2955 |
|
div_content = soup.find('div', class_="body entry-content") |
| 2956 |
|
imgs = div_content.find_all('img') |
| 2957 |
|
imgs = [i for i in imgs if i.get('src') is not None] |
| 2958 |
|
alt = imgs[0]['alt'] |
| 2959 |
|
return { |
| 2960 |
|
'title': title, |
| 2961 |
|
'alt': alt, |
| 2962 |
|
'description': desc, |
| 2963 |
|
'author': author, |
| 2964 |
|
'day': day.day, |
| 2965 |
|
'month': day.month, |
| 2966 |
|
'year': day.year, |
| 2967 |
|
'img': [i['src'] for i in imgs], |
| 2968 |
|
} |
| 2969 |
|
|
| 2970 |
|
|
|
@@ 2835-2855 (lines=21) @@
|
| 2832 |
|
get_first_comic_link = get_a_navi_navifirst |
| 2833 |
|
get_navi_link = get_link_rel_next |
| 2834 |
|
|
| 2835 |
|
@classmethod |
| 2836 |
|
def get_comic_info(cls, soup, link): |
| 2837 |
|
"""Get information about a particular comics.""" |
| 2838 |
|
title = soup.find('h2', class_='post-title').string |
| 2839 |
|
short_url = soup.find('link', rel='shortlink')['href'] |
| 2840 |
|
short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url) |
| 2841 |
|
num = int(short_url_re.match(short_url).groups()[0]) |
| 2842 |
|
imgs = soup.find('div', id='comic').find_all('img') |
| 2843 |
|
alt = imgs[0]['title'] |
| 2844 |
|
assert all(i['alt'] == i['title'] == alt for i in imgs) |
| 2845 |
|
date_str = soup.find('span', class_='post-date').string |
| 2846 |
|
day = string_to_date(date_str, "%d/%m/%Y") |
| 2847 |
|
return { |
| 2848 |
|
'short_url': short_url, |
| 2849 |
|
'num': num, |
| 2850 |
|
'img': [i['src'] for i in imgs], |
| 2851 |
|
'month': day.month, |
| 2852 |
|
'year': day.year, |
| 2853 |
|
'day': day.day, |
| 2854 |
|
'alt': alt, |
| 2855 |
|
'title': title, |
| 2856 |
|
} |
| 2857 |
|
|
| 2858 |
|
|
|
@@ 2352-2371 (lines=20) @@
|
| 2349 |
|
return link |
| 2350 |
|
return None |
| 2351 |
|
|
| 2352 |
|
@classmethod |
| 2353 |
|
def get_comic_info(cls, soup, link): |
| 2354 |
|
"""Get information about a particular comics.""" |
| 2355 |
|
title = soup.find('meta', attrs={'name': 'description'})["content"] |
| 2356 |
|
description = soup.find('div', itemprop='articleBody').text |
| 2357 |
|
author = soup.find('span', itemprop='author copyrightHolder').string |
| 2358 |
|
imgs = soup.find_all('img', itemprop='image') |
| 2359 |
|
assert all(i['title'] == i['alt'] for i in imgs) |
| 2360 |
|
alt = imgs[0]['alt'] if imgs else "" |
| 2361 |
|
date_str = soup.find('time', itemprop='datePublished')["datetime"] |
| 2362 |
|
day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S") |
| 2363 |
|
return { |
| 2364 |
|
'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs], |
| 2365 |
|
'month': day.month, |
| 2366 |
|
'year': day.year, |
| 2367 |
|
'day': day.day, |
| 2368 |
|
'author': author, |
| 2369 |
|
'title': title, |
| 2370 |
|
'alt': alt, |
| 2371 |
|
'description': description, |
| 2372 |
|
} |
| 2373 |
|
|
| 2374 |
|
|