Code Duplication - SylvainDe/ComicBookMaker - Measure and Improve Code Quality continuously with Scrutinizer

Code Duplication Length = 21-25 lines in 4 locations

comics.py 4 locations



class PenelopeBagieu(GenericNavigableComic):
    """Class to retrieve comics from Penelope Bagieu's blog."""
    name = 'bagieu'
    long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
    url = 'http://www.penelope-jolicoeur.com'
    get_navi_link = get_link_rel_next

    @classmethod
    def get_first_comic_link(cls):
        """Get link to first comics."""
        return {'href': 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'}

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        date_str = soup.find('h2', class_='date-header').string
        day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
        imgs = soup.find('div', class_='entry-body').find_all('img')
        title = soup.find('h3', class_='entry-header').string
        return {
            'title': title,
            'img': [i['src'] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }



    url = "http://itsthetie.com"
    get_first_comic_link = get_div_navfirst_a
    get_navi_link = get_a_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h1', class_='comic-title').find('a').string
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
        day = string_to_date(date_str, "%B %d, %Y")
        # Bonus images may or may not be in meta og:image.
        imgs = soup.find_all('meta', property='og:image')
        imgs_src = [i['content'] for i in imgs]
        bonus = soup.find_all('img', attrs={'data-oversrc': True})
        bonus_src = [b['data-oversrc'] for b in bonus]
        all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src]
        all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")]
        tag_meta = soup.find('meta', property='article:tag')
        tags = tag_meta['content'] if tag_meta else ""
        return {
            'title': title,
            'month': day.month,
            'year': day.year,
            'day': day.day,
            'img': all_imgs_src,
            'tags': tags,
        }


class PenelopeBagieu(GenericNavigableComic):
    """Class to retrieve comics from Penelope Bagieu's blog."""

        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        date_str = soup.find('div', class_='postdate').find('em').string
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
        div = soup.find('div', id='comic')
        if div:
            img = div.find('img')
            img_src = [img['src']]
            alt = img['alt']
            assert alt == img['title']
            title = soup.find('meta', property='og:title')['content']
        else:
            img_src = []
            alt = ''
            title = ''
        return {
            'month': day.month,
            'year': day.year,
            'day': day.day,
            'img': img_src,
            'title': title,
            'alt': alt,
            'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')),
        }

                if not img['src'].endswith(
                    ('link.gif', '32.png', 'twpbookad.jpg',
                     'merchad.jpg', 'header.gif', 'tipjar.jpg'))]
        return {
            'title': title.string if title else None,
            'title2': '  '.join(img.get('alt') for img in imgs if img.get('alt')),
            'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs],
        }


class DeadlyPanel(GenericEmptyComic, GenericNavigableComic):
    """Class to retrieve Deadly Panel comics."""
    # Also on https://tapastic.com/series/deadlypanel
    name = 'deadly'
    long_name = 'Deadly Panel'
    url = 'http://www.deadlypanel.com'
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_a_navi_comicnavnext_navinext

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        imgs = soup.find('div', id='comic').find_all('img')
        assert all(i['alt'] == i['title'] for i in imgs)
        return {
            'img': [i['src'] for i in imgs],
        }



		@@ 648-670 (lines=23) @@
645
646		class PenelopeBagieu(GenericNavigableComic):
647		"""Class to retrieve comics from Penelope Bagieu's blog."""
648		name = 'bagieu'
649		long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
650		url = 'http://www.penelope-jolicoeur.com'
651		get_navi_link = get_link_rel_next
652
653		@classmethod
654		def get_first_comic_link(cls):
655		"""Get link to first comics."""
656		return {'href': 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'}
657
658		@classmethod
659		def get_comic_info(cls, soup, link):
660		"""Get information about a particular comics."""
661		date_str = soup.find('h2', class_='date-header').string
662		day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
663		imgs = soup.find('div', class_='entry-body').find_all('img')
664		title = soup.find('h3', class_='entry-header').string
665		return {
666		'title': title,
667		'img': [i['src'] for i in imgs],
668		'month': day.month,
669		'year': day.year,
670		'day': day.day,
671		}
672
673
		@@ 620-644 (lines=25) @@
617		url = "http://itsthetie.com"
618		get_first_comic_link = get_div_navfirst_a
619		get_navi_link = get_a_rel_next
620
621		@classmethod
622		def get_comic_info(cls, soup, link):
623		"""Get information about a particular comics."""
624		title = soup.find('h1', class_='comic-title').find('a').string
625		date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
626		day = string_to_date(date_str, "%B %d, %Y")
627		# Bonus images may or may not be in meta og:image.
628		imgs = soup.find_all('meta', property='og:image')
629		imgs_src = [i['content'] for i in imgs]
630		bonus = soup.find_all('img', attrs={'data-oversrc': True})
631		bonus_src = [b['data-oversrc'] for b in bonus]
632		all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src]
633		all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")]
634		tag_meta = soup.find('meta', property='article:tag')
635		tags = tag_meta['content'] if tag_meta else ""
636		return {
637		'title': title,
638		'month': day.month,
639		'year': day.year,
640		'day': day.day,
641		'img': all_imgs_src,
642		'tags': tags,
643		}
644
645
646		class PenelopeBagieu(GenericNavigableComic):
647		"""Class to retrieve comics from Penelope Bagieu's blog."""
		@@ 1660-1680 (lines=21) @@
1657		return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1658
1659		@classmethod
1660		def get_comic_info(cls, soup, link):
1661		"""Get information about a particular comics."""
1662		date_str = soup.find('div', class_='postdate').find('em').string
1663		day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1664		div = soup.find('div', id='comic')
1665		if div:
1666		img = div.find('img')
1667		img_src = [img['src']]
1668		alt = img['alt']
1669		assert alt == img['title']
1670		title = soup.find('meta', property='og:title')['content']
1671		else:
1672		img_src = []
1673		alt = ''
1674		title = ''
1675		return {
1676		'month': day.month,
1677		'year': day.year,
1678		'day': day.day,
1679		'img': img_src,
1680		'title': title,
1681		'alt': alt,
1682		'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')),
1683		}
		@@ 882-904 (lines=23) @@
879		if not img['src'].endswith(
880		('link.gif', '32.png', 'twpbookad.jpg',
881		'merchad.jpg', 'header.gif', 'tipjar.jpg'))]
882		return {
883		'title': title.string if title else None,
884		'title2': ' '.join(img.get('alt') for img in imgs if img.get('alt')),
885		'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs],
886		}
887
888
889		class DeadlyPanel(GenericEmptyComic, GenericNavigableComic):
890		"""Class to retrieve Deadly Panel comics."""
891		# Also on https://tapastic.com/series/deadlypanel
892		name = 'deadly'
893		long_name = 'Deadly Panel'
894		url = 'http://www.deadlypanel.com'
895		get_first_comic_link = get_a_navi_navifirst
896		get_navi_link = get_a_navi_comicnavnext_navinext
897
898		@classmethod
899		def get_comic_info(cls, soup, link):
900		"""Get information about a particular comics."""
901		imgs = soup.find('div', id='comic').find_all('img')
902		assert all(i['alt'] == i['title'] for i in imgs)
903		return {
904		'img': [i['src'] for i in imgs],
905		}
906
907

SylvainDe / ComicBookMaker

Code Duplication Length = 21-25 lines in 4 locations

comics.py 4 locations