@@ 2927-2947 (lines=21) @@ | ||
2924 | first_url = 'http://www.littlelifelines.com/comics/well-done' |
|
2925 | ||
2926 | @classmethod |
|
2927 | def get_navi_link(cls, last_soup, next_): |
|
2928 | """Get link to next or previous comic.""" |
|
2929 | # prev is next / next is prev |
|
2930 | li = last_soup.find('li', class_='prev' if next_ else 'next') |
|
2931 | return li.find('a') if li else None |
|
2932 | ||
2933 | @classmethod |
|
2934 | def get_comic_info(cls, soup, link): |
|
2935 | """Get information about a particular comics.""" |
|
2936 | title = soup.find('meta', property='og:title')['content'] |
|
2937 | desc = soup.find('meta', property='og:description')['content'] |
|
2938 | date_str = soup.find('time', class_='published')['datetime'] |
|
2939 | day = string_to_date(date_str, "%Y-%m-%d") |
|
2940 | author = soup.find('a', rel='author').string |
|
2941 | div_content = soup.find('div', class_="body entry-content") |
|
2942 | imgs = div_content.find_all('img') |
|
2943 | imgs = [i for i in imgs if i.get('src') is not None] |
|
2944 | alt = imgs[0]['alt'] |
|
2945 | return { |
|
2946 | 'title': title, |
|
2947 | 'alt': alt, |
|
2948 | 'description': desc, |
|
2949 | 'author': author, |
|
2950 | 'day': day.day, |
|
@@ 2815-2835 (lines=21) @@ | ||
2812 | ||
2813 | class PainTrainComic(GenericNavigableComic): |
|
2814 | """Class to retrieve Pain Train Comics.""" |
|
2815 | name = 'paintrain' |
|
2816 | long_name = 'Pain Train Comics' |
|
2817 | url = 'http://paintraincomic.com' |
|
2818 | get_first_comic_link = get_a_navi_navifirst |
|
2819 | get_navi_link = get_link_rel_next |
|
2820 | ||
2821 | @classmethod |
|
2822 | def get_comic_info(cls, soup, link): |
|
2823 | """Get information about a particular comics.""" |
|
2824 | title = soup.find('h2', class_='post-title').string |
|
2825 | short_url = soup.find('link', rel='shortlink')['href'] |
|
2826 | short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url) |
|
2827 | num = int(short_url_re.match(short_url).groups()[0]) |
|
2828 | imgs = soup.find('div', id='comic').find_all('img') |
|
2829 | alt = imgs[0]['title'] |
|
2830 | assert all(i['alt'] == i['title'] == alt for i in imgs) |
|
2831 | date_str = soup.find('span', class_='post-date').string |
|
2832 | day = string_to_date(date_str, "%d/%m/%Y") |
|
2833 | return { |
|
2834 | 'short_url': short_url, |
|
2835 | 'num': num, |
|
2836 | 'img': [i['src'] for i in imgs], |
|
2837 | 'month': day.month, |
|
2838 | 'year': day.year, |
|
@@ 2332-2351 (lines=20) @@ | ||
2329 | ||
2330 | @classmethod |
|
2331 | def get_navi_link(cls, last_soup, next_): |
|
2332 | """Get link to next or previous comic.""" |
|
2333 | for link in last_soup.find_all('a', rel='next' if next_ else 'prev'): |
|
2334 | if link['href'] != '/comic': |
|
2335 | return link |
|
2336 | return None |
|
2337 | ||
2338 | @classmethod |
|
2339 | def get_comic_info(cls, soup, link): |
|
2340 | """Get information about a particular comics.""" |
|
2341 | title = soup.find('meta', attrs={'name': 'description'})["content"] |
|
2342 | description = soup.find('div', itemprop='articleBody').text |
|
2343 | author = soup.find('span', itemprop='author copyrightHolder').string |
|
2344 | imgs = soup.find_all('img', itemprop='image') |
|
2345 | assert all(i['title'] == i['alt'] for i in imgs) |
|
2346 | alt = imgs[0]['alt'] if imgs else "" |
|
2347 | date_str = soup.find('time', itemprop='datePublished')["datetime"] |
|
2348 | day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S") |
|
2349 | return { |
|
2350 | 'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs], |
|
2351 | 'month': day.month, |
|
2352 | 'year': day.year, |
|
2353 | 'day': day.day, |
|
2354 | 'author': author, |