Code Duplication - SylvainDe/ComicBookMaker - Measure and Improve Code Quality continuously with Scrutinizer

Code Duplication Length = 20-23 lines in 6 locations

comics.py 6 locations


        day = string_to_date(date_str, "%B %d, %Y")
        # Bonus images may or may not be in meta og:image.
        imgs = soup.find_all('meta', property='og:image')
        imgs_src = [i['content'] for i in imgs]
        bonus = soup.find_all('img', attrs={'data-oversrc': True})
        bonus_src = [b['data-oversrc'] for b in bonus]
        all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src]
        all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")]
        tag_meta = soup.find('meta', property='article:tag')
        tags = tag_meta['content'] if tag_meta else ""
        return {
            'title': title,
            'month': day.month,
            'year': day.year,
            'day': day.day,
            'img': all_imgs_src,
            'tags': tags,
        }


class PenelopeBagieu(GenericNavigableComic):
    """Class to retrieve comics from Penelope Bagieu's blog."""
    name = 'bagieu'
    long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
    url = 'http://www.penelope-jolicoeur.com'
    get_navi_link = get_link_rel_next

    @classmethod
    def get_first_comic_link(cls):

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('title')
        imgs = [img for img in soup.find_all('img')
                if not img['src'].endswith(
                    ('link.gif', '32.png', 'twpbookad.jpg',
                     'merchad.jpg', 'header.gif', 'tipjar.jpg'))]
        return {
            'title': title.string if title else None,
            'title2': '  '.join(img.get('alt') for img in imgs if img.get('alt')),
            'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs],
        }


class DeadlyPanel(GenericEmptyComic, GenericNavigableComic):
    """Class to retrieve Deadly Panel comics."""
    # Also on https://tapastic.com/series/deadlypanel
    name = 'deadly'
    long_name = 'Deadly Panel'
    url = 'http://www.deadlypanel.com'
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_a_navi_comicnavnext_navinext

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        imgs = soup.find('div', id='comic').find_all('img')


    @classmethod
    def get_first_comic_link(cls):
        """Get link to first comics."""
        return {'href': 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'}

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        date_str = soup.find('h2', class_='date-header').string
        day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
        imgs = soup.find('div', class_='entry-body').find_all('img')
        title = soup.find('h3', class_='entry-header').string
        return {
            'title': title,
            'img': [i['src'] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }


class OneOneOneOneComic(GenericNavigableComic):
    """Class to retrieve 1111 Comics."""
    # Also on http://comics1111.tumblr.com
    # Also on https://tapastic.com/series/1111-Comics
    name = '1111'
    long_name = '1111 Comics'

    # Also on https://tapastic.com/series/1111-Comics
    name = '1111'
    long_name = '1111 Comics'
    url = 'http://www.1111comics.me'
    get_first_comic_link = get_div_navfirst_a
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h1', class_='comic-title').find('a').string
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find_all('meta', property='og:image')
        return {
            'title': title,
            'month': day.month,
            'year': day.year,
            'day': day.day,
            'img': [i['content'] for i in imgs],
        }


class AngryAtNothing(GenericNavigableComic):
    """Class to retrieve Angry at Nothing comics."""
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
    name = 'angry'

        imgs = soup.find("div", id="comic").find_all("img")
        assert all(i['alt'] == i['title'] for i in imgs)
        assert len(imgs) <= 1
        alt = imgs[0]['alt'] if imgs else ""
        return {
            'img': [i['src'] for i in imgs],
            'title': title,
            'alt': alt,
            'author': author,
            'day': day.day,
            'month': day.month,
            'year': day.year
        }


class LastPlaceComics(GenericNavigableComic):
    """Class to retrieve Last Place Comics."""
    name = 'lastplace'
    long_name = 'LastPlaceComics'
    url = "http://lastplacecomics.com"
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""

    long_name = 'Something Of That Ilk'
    url = 'http://www.somethingofthatilk.com'


class InfiniteMonkeyBusiness(GenericNavigableComic):
    """Generic class to retrieve InfiniteMonkeyBusiness comics."""
    name = 'monkey'
    long_name = 'Infinite Monkey Business'
    url = 'http://infinitemonkeybusiness.net'
    get_navi_link = get_a_navi_comicnavnext_navinext

    @classmethod
    def get_first_comic_link(cls):
        """Get link to first comics."""
        return {'href': 'http://infinitemonkeybusiness.net/comic/pillory/'}

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('meta', property='og:title')['content']
        imgs = soup.find('div', id='comic').find_all('img')
        return {
            'title': title,
            'img': [i['src'] for i in imgs],
        }


		@@ 603-625 (lines=23) @@
600		day = string_to_date(date_str, "%B %d, %Y")
601		# Bonus images may or may not be in meta og:image.
602		imgs = soup.find_all('meta', property='og:image')
603		imgs_src = [i['content'] for i in imgs]
604		bonus = soup.find_all('img', attrs={'data-oversrc': True})
605		bonus_src = [b['data-oversrc'] for b in bonus]
606		all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src]
607		all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")]
608		tag_meta = soup.find('meta', property='article:tag')
609		tags = tag_meta['content'] if tag_meta else ""
610		return {
611		'title': title,
612		'month': day.month,
613		'year': day.year,
614		'day': day.day,
615		'img': all_imgs_src,
616		'tags': tags,
617		}
618
619
620		class PenelopeBagieu(GenericNavigableComic):
621		"""Class to retrieve comics from Penelope Bagieu's blog."""
622		name = 'bagieu'
623		long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
624		url = 'http://www.penelope-jolicoeur.com'
625		get_navi_link = get_link_rel_next
626
627		@classmethod
628		def get_first_comic_link(cls):
		@@ 851-872 (lines=22) @@
848		@classmethod
849		def get_comic_info(cls, soup, link):
850		"""Get information about a particular comics."""
851		title = soup.find('title')
852		imgs = [img for img in soup.find_all('img')
853		if not img['src'].endswith(
854		('link.gif', '32.png', 'twpbookad.jpg',
855		'merchad.jpg', 'header.gif', 'tipjar.jpg'))]
856		return {
857		'title': title.string if title else None,
858		'title2': ' '.join(img.get('alt') for img in imgs if img.get('alt')),
859		'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs],
860		}
861
862
863		class DeadlyPanel(GenericEmptyComic, GenericNavigableComic):
864		"""Class to retrieve Deadly Panel comics."""
865		# Also on https://tapastic.com/series/deadlypanel
866		name = 'deadly'
867		long_name = 'Deadly Panel'
868		url = 'http://www.deadlypanel.com'
869		get_first_comic_link = get_a_navi_navifirst
870		get_navi_link = get_a_navi_comicnavnext_navinext
871
872		@classmethod
873		def get_comic_info(cls, soup, link):
874		"""Get information about a particular comics."""
875		imgs = soup.find('div', id='comic').find_all('img')
		@@ 629-650 (lines=22) @@
626
627		@classmethod
628		def get_first_comic_link(cls):
629		"""Get link to first comics."""
630		return {'href': 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'}
631
632		@classmethod
633		def get_comic_info(cls, soup, link):
634		"""Get information about a particular comics."""
635		date_str = soup.find('h2', class_='date-header').string
636		day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
637		imgs = soup.find('div', class_='entry-body').find_all('img')
638		title = soup.find('h3', class_='entry-header').string
639		return {
640		'title': title,
641		'img': [i['src'] for i in imgs],
642		'month': day.month,
643		'year': day.year,
644		'day': day.day,
645		}
646
647
648		class OneOneOneOneComic(GenericNavigableComic):
649		"""Class to retrieve 1111 Comics."""
650		# Also on http://comics1111.tumblr.com
651		# Also on https://tapastic.com/series/1111-Comics
652		name = '1111'
653		long_name = '1111 Comics'
		@@ 654-674 (lines=21) @@
651		# Also on https://tapastic.com/series/1111-Comics
652		name = '1111'
653		long_name = '1111 Comics'
654		url = 'http://www.1111comics.me'
655		get_first_comic_link = get_div_navfirst_a
656		get_navi_link = get_link_rel_next
657
658		@classmethod
659		def get_comic_info(cls, soup, link):
660		"""Get information about a particular comics."""
661		title = soup.find('h1', class_='comic-title').find('a').string
662		date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
663		day = string_to_date(date_str, "%B %d, %Y")
664		imgs = soup.find_all('meta', property='og:image')
665		return {
666		'title': title,
667		'month': day.month,
668		'year': day.year,
669		'day': day.day,
670		'img': [i['content'] for i in imgs],
671		}
672
673
674		class AngryAtNothing(GenericNavigableComic):
675		"""Class to retrieve Angry at Nothing comics."""
676		# Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
677		name = 'angry'
		@@ 2490-2509 (lines=20) @@
2487		imgs = soup.find("div", id="comic").find_all("img")
2488		assert all(i['alt'] == i['title'] for i in imgs)
2489		assert len(imgs) <= 1
2490		alt = imgs[0]['alt'] if imgs else ""
2491		return {
2492		'img': [i['src'] for i in imgs],
2493		'title': title,
2494		'alt': alt,
2495		'author': author,
2496		'day': day.day,
2497		'month': day.month,
2498		'year': day.year
2499		}
2500
2501
2502		class LastPlaceComics(GenericNavigableComic):
2503		"""Class to retrieve Last Place Comics."""
2504		name = 'lastplace'
2505		long_name = 'LastPlaceComics'
2506		url = "http://lastplacecomics.com"
2507		get_first_comic_link = get_a_comicnavbase_comicnavfirst
2508		get_navi_link = get_link_rel_next
2509
2510		@classmethod
2511		def get_comic_info(cls, soup, link):
2512		"""Get information about a particular comics."""
		@@ 1594-1613 (lines=20) @@
1591		long_name = 'Something Of That Ilk'
1592		url = 'http://www.somethingofthatilk.com'
1593
1594
1595		class InfiniteMonkeyBusiness(GenericNavigableComic):
1596		"""Generic class to retrieve InfiniteMonkeyBusiness comics."""
1597		name = 'monkey'
1598		long_name = 'Infinite Monkey Business'
1599		url = 'http://infinitemonkeybusiness.net'
1600		get_navi_link = get_a_navi_comicnavnext_navinext
1601
1602		@classmethod
1603		def get_first_comic_link(cls):
1604		"""Get link to first comics."""
1605		return {'href': 'http://infinitemonkeybusiness.net/comic/pillory/'}
1606
1607		@classmethod
1608		def get_comic_info(cls, soup, link):
1609		"""Get information about a particular comics."""
1610		title = soup.find('meta', property='og:title')['content']
1611		imgs = soup.find('div', id='comic').find_all('img')
1612		return {
1613		'title': title,
1614		'img': [i['src'] for i in imgs],
1615		}
1616

SylvainDe / ComicBookMaker

Code Duplication Length = 20-23 lines in 6 locations

comics.py 6 locations