@@ 2450-2469 (lines=20) @@ | ||
2447 | for link in last_soup.find_all('a', rel='next' if next_ else 'prev'): |
|
2448 | if link['href'] != '/comic': |
|
2449 | return link |
|
2450 | return None |
|
2451 | ||
2452 | @classmethod |
|
2453 | def get_comic_info(cls, soup, link): |
|
2454 | """Get information about a particular comics.""" |
|
2455 | title = soup.find('meta', attrs={'name': 'description'})["content"] |
|
2456 | description = soup.find('div', itemprop='articleBody').text |
|
2457 | author = soup.find('span', itemprop='author copyrightHolder').string |
|
2458 | imgs = soup.find_all('img', itemprop='image') |
|
2459 | assert all(i['title'] == i['alt'] for i in imgs) |
|
2460 | alt = imgs[0]['alt'] if imgs else "" |
|
2461 | date_str = soup.find('time', itemprop='datePublished')["datetime"] |
|
2462 | day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S") |
|
2463 | return { |
|
2464 | 'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs], |
|
2465 | 'month': day.month, |
|
2466 | 'year': day.year, |
|
2467 | 'day': day.day, |
|
2468 | 'author': author, |
|
2469 | 'title': title, |
|
2470 | 'alt': alt, |
|
2471 | 'description': description, |
|
2472 | } |
|
@@ 1191-1210 (lines=20) @@ | ||
1188 | long_name = 'Toon Hole' |
|
1189 | url = 'http://www.toonhole.com' |
|
1190 | get_first_comic_link = get_a_comicnavbase_comicnavfirst |
|
1191 | get_navi_link = get_link_rel_next |
|
1192 | ||
1193 | @classmethod |
|
1194 | def get_comic_info(cls, soup, link): |
|
1195 | """Get information about a particular comics.""" |
|
1196 | short_url = soup.find('link', rel='shortlink')['href'] |
|
1197 | date_str = soup.find('div', class_='entry-meta').contents[0].strip() |
|
1198 | day = string_to_date(date_str, "%B %d, %Y") |
|
1199 | imgs = soup.find('div', id='comic').find_all('img') |
|
1200 | if imgs: |
|
1201 | img = imgs[0] |
|
1202 | title = img['alt'] |
|
1203 | assert img['title'] == title |
|
1204 | else: |
|
1205 | title = "" |
|
1206 | return { |
|
1207 | 'short_url': short_url, |
|
1208 | 'title': title, |
|
1209 | 'month': day.month, |
|
1210 | 'year': day.year, |
|
1211 | 'day': day.day, |
|
1212 | 'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs], |
|
1213 | } |