Code Duplication    Length = 21-25 lines in 4 locations

comics.py 4 locations

@@ 648-670 (lines=23) @@
645
646
class PenelopeBagieu(GenericNavigableComic):
647
    """Class to retrieve comics from Penelope Bagieu's blog."""
648
    name = 'bagieu'
649
    long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
650
    url = 'http://www.penelope-jolicoeur.com'
651
    get_navi_link = get_link_rel_next
652
653
    @classmethod
654
    def get_first_comic_link(cls):
655
        """Get link to first comics."""
656
        return {'href': 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'}
657
658
    @classmethod
659
    def get_comic_info(cls, soup, link):
660
        """Get information about a particular comics."""
661
        date_str = soup.find('h2', class_='date-header').string
662
        day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
663
        imgs = soup.find('div', class_='entry-body').find_all('img')
664
        title = soup.find('h3', class_='entry-header').string
665
        return {
666
            'title': title,
667
            'img': [i['src'] for i in imgs],
668
            'month': day.month,
669
            'year': day.year,
670
            'day': day.day,
671
        }
672
673
@@ 620-644 (lines=25) @@
617
    url = "http://itsthetie.com"
618
    get_first_comic_link = get_div_navfirst_a
619
    get_navi_link = get_a_rel_next
620
621
    @classmethod
622
    def get_comic_info(cls, soup, link):
623
        """Get information about a particular comics."""
624
        title = soup.find('h1', class_='comic-title').find('a').string
625
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
626
        day = string_to_date(date_str, "%B %d, %Y")
627
        # Bonus images may or may not be in meta og:image.
628
        imgs = soup.find_all('meta', property='og:image')
629
        imgs_src = [i['content'] for i in imgs]
630
        bonus = soup.find_all('img', attrs={'data-oversrc': True})
631
        bonus_src = [b['data-oversrc'] for b in bonus]
632
        all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src]
633
        all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")]
634
        tag_meta = soup.find('meta', property='article:tag')
635
        tags = tag_meta['content'] if tag_meta else ""
636
        return {
637
            'title': title,
638
            'month': day.month,
639
            'year': day.year,
640
            'day': day.day,
641
            'img': all_imgs_src,
642
            'tags': tags,
643
        }
644
645
646
class PenelopeBagieu(GenericNavigableComic):
647
    """Class to retrieve comics from Penelope Bagieu's blog."""
@@ 1660-1680 (lines=21) @@
1657
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1658
1659
    @classmethod
1660
    def get_comic_info(cls, soup, link):
1661
        """Get information about a particular comics."""
1662
        date_str = soup.find('div', class_='postdate').find('em').string
1663
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1664
        div = soup.find('div', id='comic')
1665
        if div:
1666
            img = div.find('img')
1667
            img_src = [img['src']]
1668
            alt = img['alt']
1669
            assert alt == img['title']
1670
            title = soup.find('meta', property='og:title')['content']
1671
        else:
1672
            img_src = []
1673
            alt = ''
1674
            title = ''
1675
        return {
1676
            'month': day.month,
1677
            'year': day.year,
1678
            'day': day.day,
1679
            'img': img_src,
1680
            'title': title,
1681
            'alt': alt,
1682
            'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')),
1683
        }
@@ 882-904 (lines=23) @@
879
                if not img['src'].endswith(
880
                    ('link.gif', '32.png', 'twpbookad.jpg',
881
                     'merchad.jpg', 'header.gif', 'tipjar.jpg'))]
882
        return {
883
            'title': title.string if title else None,
884
            'title2': '  '.join(img.get('alt') for img in imgs if img.get('alt')),
885
            'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs],
886
        }
887
888
889
class DeadlyPanel(GenericEmptyComic, GenericNavigableComic):
890
    """Class to retrieve Deadly Panel comics."""
891
    # Also on https://tapastic.com/series/deadlypanel
892
    name = 'deadly'
893
    long_name = 'Deadly Panel'
894
    url = 'http://www.deadlypanel.com'
895
    get_first_comic_link = get_a_navi_navifirst
896
    get_navi_link = get_a_navi_comicnavnext_navinext
897
898
    @classmethod
899
    def get_comic_info(cls, soup, link):
900
        """Get information about a particular comics."""
901
        imgs = soup.find('div', id='comic').find_all('img')
902
        assert all(i['alt'] == i['title'] for i in imgs)
903
        return {
904
            'img': [i['src'] for i in imgs],
905
        }
906
907