Code Duplication - SylvainDe/ComicBookMaker - Measure and Improve Code Quality continuously with Scrutinizer

Code Duplication Length = 22-27 lines in 4 locations

comics.py 4 locations



class ButterSafe(GenericListableComic):
    """Class to retrieve Butter Safe comics."""
    name = 'butter'
    long_name = 'ButterSafe'
    url = 'http://buttersafe.com'
    get_url_from_archive_element = get_href
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)

    @classmethod
    def get_archive_elements(cls):
        archive_url = urljoin_wrapper(cls.url, 'archive/')
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        url = cls.get_url_from_archive_element(link)
        title = link.string
        year, month, day = [int(s) for s in cls.comic_link_re.match(url).groups()]
        img = soup.find('div', id='comic').find('img')
        assert img['alt'] == title
        return {
            'title': title,
            'day': day,
            'month': month,
            'year': year,
            'img': [img['src']],
        }


class CalvinAndHobbes(GenericComic):
    """Class to retrieve Calvin and Hobbes comics."""


class AbstruseGoose(GenericListableComic):
    """Class to retrieve AbstruseGoose Comics."""
    name = 'abstruse'
    long_name = 'Abstruse Goose'
    url = 'http://abstrusegoose.com'
    get_url_from_archive_element = get_href
    comic_url_re = re.compile('^%s/([0-9]*)$' % url)
    comic_img_re = re.compile('^%s/strips/.*' % url)

    @classmethod
    def get_archive_elements(cls):
        archive_url = urljoin_wrapper(cls.url, 'archive')
        return get_soup_at_url(archive_url).find_all('a', href=cls.comic_url_re)

    @classmethod
    def get_comic_info(cls, soup, archive_elt):
        comic_url = cls.get_url_from_archive_element(archive_elt)
        num = int(cls.comic_url_re.match(comic_url).groups()[0])
        return {
            'num': num,
            'title': archive_elt.string,
            'img': [soup.find('img', src=cls.comic_img_re)['src']]
        }


class PhDComics(GenericNavigableComic):
    """Class to retrieve PHD Comics."""


class PoorlyDrawnLines(GenericListableComic):
    """Class to retrieve Poorly Drawn Lines comics."""
    # Also on http://pdlcomics.tumblr.com
    name = 'poorlydrawn'
    long_name = 'Poorly Drawn Lines'
    url = 'https://www.poorlydrawnlines.com'
    _categories = ('POORLYDRAWN', )
    get_url_from_archive_element = get_href

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        imgs = soup.find('div', class_='post').find_all('img')
        assert len(imgs) <= 1
        return {
            'img': [i['src'] for i in imgs],
            'title': imgs[0].get('title', "") if imgs else "",
        }

    @classmethod
    def get_archive_elements(cls):
        archive_url = urljoin_wrapper(cls.url, 'archive')
        url_re = re.compile('^%s/comic/.' % cls.url)
        return reversed(get_soup_at_url(archive_url).find_all('a', href=url_re))


class LoadingComics(GenericNavigableComic):
    """Class to retrieve Loading Artist comics."""
    name = 'loadingartist'

        }


class PerryBibleFellowship(GenericListableComic):  # Is now navigable too
    """Class to retrieve Perry Bible Fellowship comics."""
    name = 'pbf'
    long_name = 'Perry Bible Fellowship'
    url = 'http://pbfcomics.com'
    get_url_from_archive_element = join_cls_url_to_href

    @classmethod
    def get_archive_elements(cls):
        soup = get_soup_at_url(cls.url)
        thumbnails = soup.find('div', id='all_thumbnails')
        return reversed(thumbnails.find_all('a'))

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        name = soup.find('meta', property='og:title')['content']
        imgs = soup.find_all('meta', property='og:image')
        assert len(imgs) == 1
        return {
            'name': name,
            'img': [i['content'] for i in imgs],
        }



		@@ 1387-1413 (lines=27) @@
1384
1385		class ButterSafe(GenericListableComic):
1386		"""Class to retrieve Butter Safe comics."""
1387		name = 'butter'
1388		long_name = 'ButterSafe'
1389		url = 'http://buttersafe.com'
1390		get_url_from_archive_element = get_href
1391		comic_link_re = re.compile('^%s/([0-9])/([0-9])/([0-9])/.' % url)
1392
1393		@classmethod
1394		def get_archive_elements(cls):
1395		archive_url = urljoin_wrapper(cls.url, 'archive/')
1396		return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
1397
1398		@classmethod
1399		def get_comic_info(cls, soup, link):
1400		"""Get information about a particular comics."""
1401		url = cls.get_url_from_archive_element(link)
1402		title = link.string
1403		year, month, day = [int(s) for s in cls.comic_link_re.match(url).groups()]
1404		img = soup.find('div', id='comic').find('img')
1405		assert img['alt'] == title
1406		return {
1407		'title': title,
1408		'day': day,
1409		'month': month,
1410		'year': year,
1411		'img': [img['src']],
1412		}
1413
1414
1415		class CalvinAndHobbes(GenericComic):
1416		"""Class to retrieve Calvin and Hobbes comics."""
		@@ 1453-1474 (lines=22) @@
1450
1451		class AbstruseGoose(GenericListableComic):
1452		"""Class to retrieve AbstruseGoose Comics."""
1453		name = 'abstruse'
1454		long_name = 'Abstruse Goose'
1455		url = 'http://abstrusegoose.com'
1456		get_url_from_archive_element = get_href
1457		comic_url_re = re.compile('^%s/([0-9]*)$' % url)
1458		comic_img_re = re.compile('^%s/strips/.*' % url)
1459
1460		@classmethod
1461		def get_archive_elements(cls):
1462		archive_url = urljoin_wrapper(cls.url, 'archive')
1463		return get_soup_at_url(archive_url).find_all('a', href=cls.comic_url_re)
1464
1465		@classmethod
1466		def get_comic_info(cls, soup, archive_elt):
1467		comic_url = cls.get_url_from_archive_element(archive_elt)
1468		num = int(cls.comic_url_re.match(comic_url).groups()[0])
1469		return {
1470		'num': num,
1471		'title': archive_elt.string,
1472		'img': [soup.find('img', src=cls.comic_img_re)['src']]
1473		}
1474
1475
1476		class PhDComics(GenericNavigableComic):
1477		"""Class to retrieve PHD Comics."""
		@@ 2063-2086 (lines=24) @@
2060
2061		class PoorlyDrawnLines(GenericListableComic):
2062		"""Class to retrieve Poorly Drawn Lines comics."""
2063		# Also on http://pdlcomics.tumblr.com
2064		name = 'poorlydrawn'
2065		long_name = 'Poorly Drawn Lines'
2066		url = 'https://www.poorlydrawnlines.com'
2067		_categories = ('POORLYDRAWN', )
2068		get_url_from_archive_element = get_href
2069
2070		@classmethod
2071		def get_comic_info(cls, soup, link):
2072		"""Get information about a particular comics."""
2073		imgs = soup.find('div', class_='post').find_all('img')
2074		assert len(imgs) <= 1
2075		return {
2076		'img': [i['src'] for i in imgs],
2077		'title': imgs[0].get('title', "") if imgs else "",
2078		}
2079
2080		@classmethod
2081		def get_archive_elements(cls):
2082		archive_url = urljoin_wrapper(cls.url, 'archive')
2083		url_re = re.compile('^%s/comic/.' % cls.url)
2084		return reversed(get_soup_at_url(archive_url).find_all('a', href=url_re))
2085
2086
2087		class LoadingComics(GenericNavigableComic):
2088		"""Class to retrieve Loading Artist comics."""
2089		name = 'loadingartist'
		@@ 1048-1069 (lines=22) @@
1045		}
1046
1047
1048		class PerryBibleFellowship(GenericListableComic): # Is now navigable too
1049		"""Class to retrieve Perry Bible Fellowship comics."""
1050		name = 'pbf'
1051		long_name = 'Perry Bible Fellowship'
1052		url = 'http://pbfcomics.com'
1053		get_url_from_archive_element = join_cls_url_to_href
1054
1055		@classmethod
1056		def get_archive_elements(cls):
1057		soup = get_soup_at_url(cls.url)
1058		thumbnails = soup.find('div', id='all_thumbnails')
1059		return reversed(thumbnails.find_all('a'))
1060
1061		@classmethod
1062		def get_comic_info(cls, soup, link):
1063		"""Get information about a particular comics."""
1064		name = soup.find('meta', property='og:title')['content']
1065		imgs = soup.find_all('meta', property='og:image')
1066		assert len(imgs) == 1
1067		return {
1068		'name': name,
1069		'img': [i['content'] for i in imgs],
1070		}
1071
1072

SylvainDe / ComicBookMaker

Code Duplication Length = 22-27 lines in 4 locations

comics.py 4 locations