@@ 501-516 (lines=16) @@ | ||
498 | li = last_soup.find('li', class_='prev' if next_ else 'next') |
|
499 | return li.find('a') if li else None |
|
500 | ||
501 | @classmethod |
|
502 | def get_comic_info(cls, soup, link): |
|
503 | """Get information about a particular comics.""" |
|
504 | short_url = soup.find('link', rel='shortlink')['href'] |
|
505 | title = soup.find('meta', attrs={'name': 'twitter:title'})['content'] |
|
506 | imgs = soup.find_all('meta', property='og:image') |
|
507 | date_str = soup.find('span', property='dc:date')['content'] |
|
508 | date_str = date_str[:10] |
|
509 | day = string_to_date(date_str, "%Y-%m-%d") |
|
510 | return { |
|
511 | 'short_url': short_url, |
|
512 | 'title': title, |
|
513 | 'img': [i['content'] for i in imgs], |
|
514 | 'day': day.day, |
|
515 | 'month': day.month, |
|
516 | 'year': day.year, |
|
517 | } |
|
518 | ||
519 | ||
@@ 1003-1021 (lines=19) @@ | ||
1000 | get_first_comic_link = get_a_comicnavbase_comicnavfirst |
|
1001 | get_navi_link = get_a_rel_next |
|
1002 | ||
1003 | @classmethod |
|
1004 | def get_comic_info(cls, soup, link): |
|
1005 | """Get information about a particular comics.""" |
|
1006 | title = soup.find('meta', property='og:title')['content'] |
|
1007 | metadesc = soup.find('meta', property='og:description') |
|
1008 | desc = metadesc['content'] if metadesc else "" |
|
1009 | author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content'] |
|
1010 | date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content'] |
|
1011 | date_str = date_str[:10] |
|
1012 | day = string_to_date(date_str, "%Y-%m-%d") |
|
1013 | imgs = soup.find_all('meta', property='og:image') |
|
1014 | return { |
|
1015 | 'img': [i['content'] for i in imgs], |
|
1016 | 'title': title, |
|
1017 | 'author': author, |
|
1018 | 'desc': desc, |
|
1019 | 'day': day.day, |
|
1020 | 'month': day.month, |
|
1021 | 'year': day.year |
|
1022 | } |
|
1023 | ||
1024 | ||
@@ 1758-1777 (lines=20) @@ | ||
1755 | get_first_comic_link = simulate_first_link |
|
1756 | first_url = 'http://respawncomic.com/comic/c0001/' |
|
1757 | ||
1758 | @classmethod |
|
1759 | def get_comic_info(cls, soup, link): |
|
1760 | """Get information about a particular comics.""" |
|
1761 | title = soup.find('meta', property='og:title')['content'] |
|
1762 | author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content'] |
|
1763 | date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content'] |
|
1764 | date_str = date_str[:10] |
|
1765 | day = string_to_date(date_str, "%Y-%m-%d") |
|
1766 | imgs = soup.find_all('meta', property='og:image') |
|
1767 | skip_imgs = { |
|
1768 | 'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png', |
|
1769 | 'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png' |
|
1770 | } |
|
1771 | return { |
|
1772 | 'title': title, |
|
1773 | 'author': author, |
|
1774 | 'day': day.day, |
|
1775 | 'month': day.month, |
|
1776 | 'year': day.year, |
|
1777 | 'img': [i['content'] for i in imgs if i['content'] not in skip_imgs], |
|
1778 | } |
|
1779 | ||
1780 |