|
@@ 2450-2469 (lines=20) @@
|
| 2447 |
|
return link |
| 2448 |
|
return None |
| 2449 |
|
|
| 2450 |
|
@classmethod |
| 2451 |
|
def get_comic_info(cls, soup, link): |
| 2452 |
|
"""Get information about a particular comics.""" |
| 2453 |
|
title = soup.find('meta', attrs={'name': 'description'})["content"] |
| 2454 |
|
description = soup.find('div', itemprop='articleBody').text |
| 2455 |
|
author = soup.find('span', itemprop='author copyrightHolder').string |
| 2456 |
|
imgs = soup.find_all('img', itemprop='image') |
| 2457 |
|
assert all(i['title'] == i['alt'] for i in imgs) |
| 2458 |
|
alt = imgs[0]['alt'] if imgs else "" |
| 2459 |
|
date_str = soup.find('time', itemprop='datePublished')["datetime"] |
| 2460 |
|
day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S") |
| 2461 |
|
return { |
| 2462 |
|
'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs], |
| 2463 |
|
'month': day.month, |
| 2464 |
|
'year': day.year, |
| 2465 |
|
'day': day.day, |
| 2466 |
|
'author': author, |
| 2467 |
|
'title': title, |
| 2468 |
|
'alt': alt, |
| 2469 |
|
'description': description, |
| 2470 |
|
} |
| 2471 |
|
|
| 2472 |
|
|
|
@@ 1191-1210 (lines=20) @@
|
| 1188 |
|
get_first_comic_link = get_a_comicnavbase_comicnavfirst |
| 1189 |
|
get_navi_link = get_link_rel_next |
| 1190 |
|
|
| 1191 |
|
@classmethod |
| 1192 |
|
def get_comic_info(cls, soup, link): |
| 1193 |
|
"""Get information about a particular comics.""" |
| 1194 |
|
short_url = soup.find('link', rel='shortlink')['href'] |
| 1195 |
|
date_str = soup.find('div', class_='entry-meta').contents[0].strip() |
| 1196 |
|
day = string_to_date(date_str, "%B %d, %Y") |
| 1197 |
|
imgs = soup.find('div', id='comic').find_all('img') |
| 1198 |
|
if imgs: |
| 1199 |
|
img = imgs[0] |
| 1200 |
|
title = img['alt'] |
| 1201 |
|
assert img['title'] == title |
| 1202 |
|
else: |
| 1203 |
|
title = "" |
| 1204 |
|
return { |
| 1205 |
|
'short_url': short_url, |
| 1206 |
|
'title': title, |
| 1207 |
|
'month': day.month, |
| 1208 |
|
'year': day.year, |
| 1209 |
|
'day': day.day, |
| 1210 |
|
'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs], |
| 1211 |
|
} |
| 1212 |
|
|
| 1213 |
|
|