|
@@ 2094-2110 (lines=17) @@
|
| 2091 |
|
get_first_comic_link = get_div_navfirst_a |
| 2092 |
|
get_navi_link = get_link_rel_next |
| 2093 |
|
|
| 2094 |
|
@classmethod |
| 2095 |
|
def get_comic_info(cls, soup, link): |
| 2096 |
|
"""Get information about a particular comics.""" |
| 2097 |
|
date_str = soup.find('span', class_='post-date').string |
| 2098 |
|
day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y") |
| 2099 |
|
author = soup.find('span', class_='post-author').string |
| 2100 |
|
div = soup.find('div', id='comic') |
| 2101 |
|
imgs = div.find_all('img') if div else [] |
| 2102 |
|
title = imgs[0]['title'] if imgs else "" |
| 2103 |
|
assert all(i['title'] == i['alt'] == title for i in imgs) |
| 2104 |
|
return { |
| 2105 |
|
'month': day.month, |
| 2106 |
|
'year': day.year, |
| 2107 |
|
'day': day.day, |
| 2108 |
|
'img': [i['src'] for i in imgs], |
| 2109 |
|
'title': title, |
| 2110 |
|
'author': author, |
| 2111 |
|
} |
| 2112 |
|
|
| 2113 |
|
|
|
@@ 1030-1045 (lines=16) @@
|
| 1027 |
|
"""Get link to first comics.""" |
| 1028 |
|
return get_soup_at_url(cls.url).find('a', rel='start') |
| 1029 |
|
|
| 1030 |
|
@classmethod |
| 1031 |
|
def get_comic_info(cls, soup, link): |
| 1032 |
|
"""Get information about a particular comics.""" |
| 1033 |
|
image1 = soup.find('img', id='cc-comic') |
| 1034 |
|
image_url1 = image1['src'] |
| 1035 |
|
aftercomic = soup.find('div', id='aftercomic') |
| 1036 |
|
image_url2 = aftercomic.find('img')['src'] if aftercomic else '' |
| 1037 |
|
imgs = [image_url1] + ([image_url2] if image_url2 else []) |
| 1038 |
|
date_str = soup.find('div', class_='cc-publishtime').contents[0] |
| 1039 |
|
day = string_to_date(date_str, "%B %d, %Y") |
| 1040 |
|
return { |
| 1041 |
|
'title': image1['title'], |
| 1042 |
|
'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i)) for i in imgs], |
| 1043 |
|
'day': day.day, |
| 1044 |
|
'month': day.month, |
| 1045 |
|
'year': day.year |
| 1046 |
|
} |
| 1047 |
|
|
| 1048 |
|
|