Code Duplication - SylvainDe/ComicBookMaker - Measure and Improve Code Quality continuously with Scrutinizer

Code Duplication Length = 22-27 lines in 2 locations

comics.py 2 locations




class DinosaurComics(GenericListableComic):
    """Class to retrieve Dinosaur Comics comics."""
    name = 'dinosaur'
    long_name = 'Dinosaur Comics'
    url = 'http://www.qwantz.com'
    get_url_from_archive_element = get_href
    comic_link_re = re.compile('^%s/index.php\\?comic=([0-9]*)$' % url)

    @classmethod
    def get_archive_elements(cls):
        archive_url = urljoin_wrapper(cls.url, 'archive.php')
        # first link is random -> skip it
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re)[1:])

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        url = cls.get_url_from_archive_element(link)
        num = int(cls.comic_link_re.match(url).groups()[0])
        date_str = link.string
        text = link.next_sibling.string
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
        comic_img_re = re.compile('^%s/comics/' % cls.url)
        img = soup.find('img', src=comic_img_re)
        return {
            'month': day.month,
            'year': day.year,
            'day': day.day,
            'img': [img.get('src')],
            'title': img.get('title'),
            'text': text,


class CalvinAndHobbes(GenericComic):
    """Class to retrieve Calvin and Hobbes comics."""
    # Also on http://www.gocomics.com/calvinandhobbes/
    name = 'calvin'
    long_name = 'Calvin and Hobbes'
    # This is not through any official webpage but eh...
    url = 'http://marcel-oehler.marcellosendos.ch/comics/ch/'

    @classmethod
    def get_next_comic(cls, last_comic):
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
        last_date = get_date_for_comic(
            last_comic) if last_comic else date(1985, 11, 1)
        link_re = re.compile('^([0-9]*)/([0-9]*)/')
        img_re = re.compile('')
        for link in get_soup_at_url(cls.url).find_all('a', href=link_re):
            url = link['href']
            year, month = link_re.match(url).groups()
            if date(int(year), int(month), 1) + timedelta(days=31) >= last_date:
                img_re = re.compile('^%s%s([0-9]*)' % (year, month))
                month_url = urljoin_wrapper(cls.url, url)
                for img in get_soup_at_url(month_url).find_all('img', src=img_re):
                    img_src = img['src']
                    day = int(img_re.match(img_src).groups()[0])
                    comic_date = date(int(year), int(month), day)
                    if comic_date > last_date:
                        yield {

		@@ 1327-1353 (lines=27) @@
1324
1325
1326		class DinosaurComics(GenericListableComic):
1327		"""Class to retrieve Dinosaur Comics comics."""
1328		name = 'dinosaur'
1329		long_name = 'Dinosaur Comics'
1330		url = 'http://www.qwantz.com'
1331		get_url_from_archive_element = get_href
1332		comic_link_re = re.compile('^%s/index.php\\?comic=([0-9]*)$' % url)
1333
1334		@classmethod
1335		def get_archive_elements(cls):
1336		archive_url = urljoin_wrapper(cls.url, 'archive.php')
1337		# first link is random -> skip it
1338		return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re)[1:])
1339
1340		@classmethod
1341		def get_comic_info(cls, soup, link):
1342		"""Get information about a particular comics."""
1343		url = cls.get_url_from_archive_element(link)
1344		num = int(cls.comic_link_re.match(url).groups()[0])
1345		date_str = link.string
1346		text = link.next_sibling.string
1347		day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1348		comic_img_re = re.compile('^%s/comics/' % cls.url)
1349		img = soup.find('img', src=comic_img_re)
1350		return {
1351		'month': day.month,
1352		'year': day.year,
1353		'day': day.day,
1354		'img': [img.get('src')],
1355		'title': img.get('title'),
1356		'text': text,
		@@ 1393-1414 (lines=22) @@
1390
1391		class CalvinAndHobbes(GenericComic):
1392		"""Class to retrieve Calvin and Hobbes comics."""
1393		# Also on http://www.gocomics.com/calvinandhobbes/
1394		name = 'calvin'
1395		long_name = 'Calvin and Hobbes'
1396		# This is not through any official webpage but eh...
1397		url = 'http://marcel-oehler.marcellosendos.ch/comics/ch/'
1398
1399		@classmethod
1400		def get_next_comic(cls, last_comic):
1401		"""Generator to get the next comic. Implementation of GenericComic's abstract method."""
1402		last_date = get_date_for_comic(
1403		last_comic) if last_comic else date(1985, 11, 1)
1404		link_re = re.compile('^([0-9])/([0-9])/')
1405		img_re = re.compile('')
1406		for link in get_soup_at_url(cls.url).find_all('a', href=link_re):
1407		url = link['href']
1408		year, month = link_re.match(url).groups()
1409		if date(int(year), int(month), 1) + timedelta(days=31) >= last_date:
1410		img_re = re.compile('^%s%s([0-9]*)' % (year, month))
1411		month_url = urljoin_wrapper(cls.url, url)
1412		for img in get_soup_at_url(month_url).find_all('img', src=img_re):
1413		img_src = img['src']
1414		day = int(img_re.match(img_src).groups()[0])
1415		comic_date = date(int(year), int(month), day)
1416		if comic_date > last_date:
1417		yield {

SylvainDe / ComicBookMaker

Code Duplication Length = 22-27 lines in 2 locations

comics.py 2 locations