Code Duplication    Length = 20-23 lines in 6 locations

comics.py 6 locations

@@ 603-625 (lines=23) @@
600
        day = string_to_date(date_str, "%B %d, %Y")
601
        # Bonus images may or may not be in meta og:image.
602
        imgs = soup.find_all('meta', property='og:image')
603
        imgs_src = [i['content'] for i in imgs]
604
        bonus = soup.find_all('img', attrs={'data-oversrc': True})
605
        bonus_src = [b['data-oversrc'] for b in bonus]
606
        all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src]
607
        all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")]
608
        tag_meta = soup.find('meta', property='article:tag')
609
        tags = tag_meta['content'] if tag_meta else ""
610
        return {
611
            'title': title,
612
            'month': day.month,
613
            'year': day.year,
614
            'day': day.day,
615
            'img': all_imgs_src,
616
            'tags': tags,
617
        }
618
619
620
class PenelopeBagieu(GenericNavigableComic):
621
    """Class to retrieve comics from Penelope Bagieu's blog."""
622
    name = 'bagieu'
623
    long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
624
    url = 'http://www.penelope-jolicoeur.com'
625
    get_navi_link = get_link_rel_next
626
627
    @classmethod
628
    def get_first_comic_link(cls):
@@ 851-872 (lines=22) @@
848
    @classmethod
849
    def get_comic_info(cls, soup, link):
850
        """Get information about a particular comics."""
851
        title = soup.find('title')
852
        imgs = [img for img in soup.find_all('img')
853
                if not img['src'].endswith(
854
                    ('link.gif', '32.png', 'twpbookad.jpg',
855
                     'merchad.jpg', 'header.gif', 'tipjar.jpg'))]
856
        return {
857
            'title': title.string if title else None,
858
            'title2': '  '.join(img.get('alt') for img in imgs if img.get('alt')),
859
            'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs],
860
        }
861
862
863
class DeadlyPanel(GenericEmptyComic, GenericNavigableComic):
864
    """Class to retrieve Deadly Panel comics."""
865
    # Also on https://tapastic.com/series/deadlypanel
866
    name = 'deadly'
867
    long_name = 'Deadly Panel'
868
    url = 'http://www.deadlypanel.com'
869
    get_first_comic_link = get_a_navi_navifirst
870
    get_navi_link = get_a_navi_comicnavnext_navinext
871
872
    @classmethod
873
    def get_comic_info(cls, soup, link):
874
        """Get information about a particular comics."""
875
        imgs = soup.find('div', id='comic').find_all('img')
@@ 629-650 (lines=22) @@
626
627
    @classmethod
628
    def get_first_comic_link(cls):
629
        """Get link to first comics."""
630
        return {'href': 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'}
631
632
    @classmethod
633
    def get_comic_info(cls, soup, link):
634
        """Get information about a particular comics."""
635
        date_str = soup.find('h2', class_='date-header').string
636
        day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
637
        imgs = soup.find('div', class_='entry-body').find_all('img')
638
        title = soup.find('h3', class_='entry-header').string
639
        return {
640
            'title': title,
641
            'img': [i['src'] for i in imgs],
642
            'month': day.month,
643
            'year': day.year,
644
            'day': day.day,
645
        }
646
647
648
class OneOneOneOneComic(GenericNavigableComic):
649
    """Class to retrieve 1111 Comics."""
650
    # Also on http://comics1111.tumblr.com
651
    # Also on https://tapastic.com/series/1111-Comics
652
    name = '1111'
653
    long_name = '1111 Comics'
@@ 654-674 (lines=21) @@
651
    # Also on https://tapastic.com/series/1111-Comics
652
    name = '1111'
653
    long_name = '1111 Comics'
654
    url = 'http://www.1111comics.me'
655
    get_first_comic_link = get_div_navfirst_a
656
    get_navi_link = get_link_rel_next
657
658
    @classmethod
659
    def get_comic_info(cls, soup, link):
660
        """Get information about a particular comics."""
661
        title = soup.find('h1', class_='comic-title').find('a').string
662
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
663
        day = string_to_date(date_str, "%B %d, %Y")
664
        imgs = soup.find_all('meta', property='og:image')
665
        return {
666
            'title': title,
667
            'month': day.month,
668
            'year': day.year,
669
            'day': day.day,
670
            'img': [i['content'] for i in imgs],
671
        }
672
673
674
class AngryAtNothing(GenericNavigableComic):
675
    """Class to retrieve Angry at Nothing comics."""
676
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
677
    name = 'angry'
@@ 2490-2509 (lines=20) @@
2487
        imgs = soup.find("div", id="comic").find_all("img")
2488
        assert all(i['alt'] == i['title'] for i in imgs)
2489
        assert len(imgs) <= 1
2490
        alt = imgs[0]['alt'] if imgs else ""
2491
        return {
2492
            'img': [i['src'] for i in imgs],
2493
            'title': title,
2494
            'alt': alt,
2495
            'author': author,
2496
            'day': day.day,
2497
            'month': day.month,
2498
            'year': day.year
2499
        }
2500
2501
2502
class LastPlaceComics(GenericNavigableComic):
2503
    """Class to retrieve Last Place Comics."""
2504
    name = 'lastplace'
2505
    long_name = 'LastPlaceComics'
2506
    url = "http://lastplacecomics.com"
2507
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2508
    get_navi_link = get_link_rel_next
2509
2510
    @classmethod
2511
    def get_comic_info(cls, soup, link):
2512
        """Get information about a particular comics."""
@@ 1594-1613 (lines=20) @@
1591
    long_name = 'Something Of That Ilk'
1592
    url = 'http://www.somethingofthatilk.com'
1593
1594
1595
class InfiniteMonkeyBusiness(GenericNavigableComic):
1596
    """Generic class to retrieve InfiniteMonkeyBusiness comics."""
1597
    name = 'monkey'
1598
    long_name = 'Infinite Monkey Business'
1599
    url = 'http://infinitemonkeybusiness.net'
1600
    get_navi_link = get_a_navi_comicnavnext_navinext
1601
1602
    @classmethod
1603
    def get_first_comic_link(cls):
1604
        """Get link to first comics."""
1605
        return {'href': 'http://infinitemonkeybusiness.net/comic/pillory/'}
1606
1607
    @classmethod
1608
    def get_comic_info(cls, soup, link):
1609
        """Get information about a particular comics."""
1610
        title = soup.find('meta', property='og:title')['content']
1611
        imgs = soup.find('div', id='comic').find_all('img')
1612
        return {
1613
            'title': title,
1614
            'img': [i['src'] for i in imgs],
1615
        }
1616