@@ 1809-1828 (lines=20) @@ | ||
1806 | get_first_comic_link = simulate_first_link |
|
1807 | first_url = 'http://respawncomic.com/comic/c0001/' |
|
1808 | ||
1809 | @classmethod |
|
1810 | def get_comic_info(cls, soup, link): |
|
1811 | """Get information about a particular comics.""" |
|
1812 | title = soup.find('meta', property='og:title')['content'] |
|
1813 | author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content'] |
|
1814 | date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content'] |
|
1815 | date_str = date_str[:10] |
|
1816 | day = string_to_date(date_str, "%Y-%m-%d") |
|
1817 | imgs = soup.find_all('meta', property='og:image') |
|
1818 | skip_imgs = { |
|
1819 | 'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png', |
|
1820 | 'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png' |
|
1821 | } |
|
1822 | return { |
|
1823 | 'title': title, |
|
1824 | 'author': author, |
|
1825 | 'day': day.day, |
|
1826 | 'month': day.month, |
|
1827 | 'year': day.year, |
|
1828 | 'img': [i['content'] for i in imgs if i['content'] not in skip_imgs], |
|
1829 | } |
|
1830 | ||
1831 | ||
@@ 526-541 (lines=16) @@ | ||
523 | li = last_soup.find('li', class_='prev' if next_ else 'next') |
|
524 | return li.find('a') if li else None |
|
525 | ||
526 | @classmethod |
|
527 | def get_comic_info(cls, soup, link): |
|
528 | """Get information about a particular comics.""" |
|
529 | short_url = soup.find('link', rel='shortlink')['href'] |
|
530 | title = soup.find('meta', attrs={'name': 'twitter:title'})['content'] |
|
531 | imgs = soup.find_all('meta', property='og:image') |
|
532 | date_str = soup.find('span', property='dc:date')['content'] |
|
533 | date_str = date_str[:10] |
|
534 | day = string_to_date(date_str, "%Y-%m-%d") |
|
535 | return { |
|
536 | 'short_url': short_url, |
|
537 | 'title': title, |
|
538 | 'img': [i['content'] for i in imgs], |
|
539 | 'day': day.day, |
|
540 | 'month': day.month, |
|
541 | 'year': day.year, |
|
542 | } |
|
543 | ||
544 | ||
@@ 4528-4542 (lines=15) @@ | ||
4525 | gocomics = 'http://www.gocomics.com' |
|
4526 | return urljoin_wrapper(gocomics, link['href']) |
|
4527 | ||
4528 | @classmethod |
|
4529 | def get_comic_info(cls, soup, link): |
|
4530 | """Get information about a particular comics.""" |
|
4531 | date_str = soup.find('meta', property='article:published_time')['content'] |
|
4532 | day = string_to_date(date_str, "%Y-%m-%d") |
|
4533 | imgs = soup.find('picture', class_='img-fluid item-comic-image').find_all('img') |
|
4534 | author = soup.find('meta', property='article:author')['content'] |
|
4535 | tags = soup.find('meta', property='article:tag')['content'] |
|
4536 | return { |
|
4537 | 'day': day.day, |
|
4538 | 'month': day.month, |
|
4539 | 'year': day.year, |
|
4540 | 'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs], |
|
4541 | 'author': author, |
|
4542 | 'tags': tags, |
|
4543 | } |
|
4544 | ||
4545 | ||
@@ 2984-2998 (lines=15) @@ | ||
2981 | # prev is next / next is prev |
|
2982 | return last_soup.find('li', class_='previous' if next_ else 'next').find('a') |
|
2983 | ||
2984 | @classmethod |
|
2985 | def get_comic_info(cls, soup, link): |
|
2986 | """Get information about a particular comics.""" |
|
2987 | date_str = soup.find('time', class_='published')['datetime'] |
|
2988 | day = string_to_date(date_str, "%Y-%m-%d") |
|
2989 | author = soup.find('span', class_='blog-author').find('a').string |
|
2990 | title = soup.find('meta', property='og:title')['content'] |
|
2991 | imgs = soup.find_all('meta', itemprop='image') |
|
2992 | return { |
|
2993 | 'img': [i['content'] for i in imgs], |
|
2994 | 'title': title, |
|
2995 | 'author': author, |
|
2996 | 'day': day.day, |
|
2997 | 'month': day.month, |
|
2998 | 'year': day.year, |
|
2999 | } |
|
3000 | ||
3001 | ||
@@ 3052-3064 (lines=13) @@ | ||
3049 | """Get link to first comics.""" |
|
3050 | return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link') |
|
3051 | ||
3052 | @classmethod |
|
3053 | def get_comic_info(cls, soup, link): |
|
3054 | """Get information about a particular comics.""" |
|
3055 | title = soup.find('meta', property='og:title')['content'] |
|
3056 | imgs = soup.find('div', class_='webcomic-image').find_all('img') |
|
3057 | date_str = soup.find('meta', property='article:published_time')['content'][:10] |
|
3058 | day = string_to_date(date_str, "%Y-%m-%d") |
|
3059 | return { |
|
3060 | 'title': title, |
|
3061 | 'day': day.day, |
|
3062 | 'month': day.month, |
|
3063 | 'year': day.year, |
|
3064 | 'img': [i['src'] for i in imgs], |
|
3065 | } |
|
3066 | ||
3067 |