Code Duplication    Length = 13-20 lines in 5 locations

comics.py 5 locations

@@ 1809-1828 (lines=20) @@
1806
    get_first_comic_link = simulate_first_link
1807
    first_url = 'http://respawncomic.com/comic/c0001/'
1808
1809
    @classmethod
1810
    def get_comic_info(cls, soup, link):
1811
        """Get information about a particular comics."""
1812
        title = soup.find('meta', property='og:title')['content']
1813
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1814
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1815
        date_str = date_str[:10]
1816
        day = string_to_date(date_str, "%Y-%m-%d")
1817
        imgs = soup.find_all('meta', property='og:image')
1818
        skip_imgs = {
1819
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1820
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1821
        }
1822
        return {
1823
            'title': title,
1824
            'author': author,
1825
            'day': day.day,
1826
            'month': day.month,
1827
            'year': day.year,
1828
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1829
        }
1830
1831
@@ 526-541 (lines=16) @@
523
        li = last_soup.find('li', class_='prev' if next_ else 'next')
524
        return li.find('a') if li else None
525
526
    @classmethod
527
    def get_comic_info(cls, soup, link):
528
        """Get information about a particular comics."""
529
        short_url = soup.find('link', rel='shortlink')['href']
530
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
531
        imgs = soup.find_all('meta', property='og:image')
532
        date_str = soup.find('span', property='dc:date')['content']
533
        date_str = date_str[:10]
534
        day = string_to_date(date_str, "%Y-%m-%d")
535
        return {
536
            'short_url': short_url,
537
            'title': title,
538
            'img': [i['content'] for i in imgs],
539
            'day': day.day,
540
            'month': day.month,
541
            'year': day.year,
542
        }
543
544
@@ 4528-4542 (lines=15) @@
4525
        gocomics = 'http://www.gocomics.com'
4526
        return urljoin_wrapper(gocomics, link['href'])
4527
4528
    @classmethod
4529
    def get_comic_info(cls, soup, link):
4530
        """Get information about a particular comics."""
4531
        date_str = soup.find('meta', property='article:published_time')['content']
4532
        day = string_to_date(date_str, "%Y-%m-%d")
4533
        imgs = soup.find('picture', class_='img-fluid item-comic-image').find_all('img')
4534
        author = soup.find('meta', property='article:author')['content']
4535
        tags = soup.find('meta', property='article:tag')['content']
4536
        return {
4537
            'day': day.day,
4538
            'month': day.month,
4539
            'year': day.year,
4540
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
4541
            'author': author,
4542
            'tags': tags,
4543
        }
4544
4545
@@ 2984-2998 (lines=15) @@
2981
        # prev is next / next is prev
2982
        return last_soup.find('li', class_='previous' if next_ else 'next').find('a')
2983
2984
    @classmethod
2985
    def get_comic_info(cls, soup, link):
2986
        """Get information about a particular comics."""
2987
        date_str = soup.find('time', class_='published')['datetime']
2988
        day = string_to_date(date_str, "%Y-%m-%d")
2989
        author = soup.find('span', class_='blog-author').find('a').string
2990
        title = soup.find('meta', property='og:title')['content']
2991
        imgs = soup.find_all('meta', itemprop='image')
2992
        return {
2993
            'img': [i['content'] for i in imgs],
2994
            'title': title,
2995
            'author': author,
2996
            'day': day.day,
2997
            'month': day.month,
2998
            'year': day.year,
2999
        }
3000
3001
@@ 3052-3064 (lines=13) @@
3049
        """Get link to first comics."""
3050
        return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')
3051
3052
    @classmethod
3053
    def get_comic_info(cls, soup, link):
3054
        """Get information about a particular comics."""
3055
        title = soup.find('meta', property='og:title')['content']
3056
        imgs = soup.find('div', class_='webcomic-image').find_all('img')
3057
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3058
        day = string_to_date(date_str, "%Y-%m-%d")
3059
        return {
3060
            'title': title,
3061
            'day': day.day,
3062
            'month': day.month,
3063
            'year': day.year,
3064
            'img': [i['src'] for i in imgs],
3065
        }
3066
3067