Code Duplication - SylvainDe/ComicBookMaker - Measure and Improve Code Quality continuously with Scrutinizer

Code Duplication Length = 22-27 lines in 2 locations

comics.py 2 locations


    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        url = cls.get_url_from_archive_element(link)
        num = int(cls.comic_link_re.match(url).groups()[0])
        date_str = link.string
        text = link.next_sibling.string
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
        comic_img_re = re.compile('^%s/comics/' % cls.url)
        img = soup.find('img', src=comic_img_re)
        return {
            'month': day.month,
            'year': day.year,
            'day': day.day,
            'img': [img.get('src')],
            'title': img.get('title'),
            'text': text,
            'num': num,
        }


class ButterSafe(GenericListableComic):
    """Class to retrieve Butter Safe comics."""
    name = 'butter'
    long_name = 'ButterSafe'
    url = 'http://buttersafe.com'
    get_url_from_archive_element = get_href
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)

    @classmethod
    def get_archive_elements(cls):
        archive_url = urljoin_wrapper(cls.url, 'archive/')
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))

        for link in get_soup_at_url(cls.url).find_all('a', href=link_re):
            url = link['href']
            year, month = link_re.match(url).groups()
            if date(int(year), int(month), 1) + timedelta(days=31) >= last_date:
                img_re = re.compile('^%s%s([0-9]*)' % (year, month))
                month_url = urljoin_wrapper(cls.url, url)
                for img in get_soup_at_url(month_url).find_all('img', src=img_re):
                    img_src = img['src']
                    day = int(img_re.match(img_src).groups()[0])
                    comic_date = date(int(year), int(month), day)
                    if comic_date > last_date:
                        yield {
                            'url': month_url,
                            'year': int(year),
                            'month': int(month),
                            'day': int(day),
                            'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)],
                        }
                        last_date = comic_date


class AbstruseGoose(GenericListableComic):
    """Class to retrieve AbstruseGoose Comics."""
    name = 'abstruse'
    long_name = 'Abstruse Goose'
    url = 'http://abstrusegoose.com'
    get_url_from_archive_element = get_href
    comic_url_re = re.compile('^%s/([0-9]*)$' % url)

		@@ 1386-1412 (lines=27) @@
1383		@classmethod
1384		def get_comic_info(cls, soup, link):
1385		"""Get information about a particular comics."""
1386		url = cls.get_url_from_archive_element(link)
1387		num = int(cls.comic_link_re.match(url).groups()[0])
1388		date_str = link.string
1389		text = link.next_sibling.string
1390		day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1391		comic_img_re = re.compile('^%s/comics/' % cls.url)
1392		img = soup.find('img', src=comic_img_re)
1393		return {
1394		'month': day.month,
1395		'year': day.year,
1396		'day': day.day,
1397		'img': [img.get('src')],
1398		'title': img.get('title'),
1399		'text': text,
1400		'num': num,
1401		}
1402
1403
1404		class ButterSafe(GenericListableComic):
1405		"""Class to retrieve Butter Safe comics."""
1406		name = 'butter'
1407		long_name = 'ButterSafe'
1408		url = 'http://buttersafe.com'
1409		get_url_from_archive_element = get_href
1410		comic_link_re = re.compile('^%s/([0-9])/([0-9])/([0-9])/.' % url)
1411
1412		@classmethod
1413		def get_archive_elements(cls):
1414		archive_url = urljoin_wrapper(cls.url, 'archive/')
1415		return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
		@@ 1452-1473 (lines=22) @@
1449		for link in get_soup_at_url(cls.url).find_all('a', href=link_re):
1450		url = link['href']
1451		year, month = link_re.match(url).groups()
1452		if date(int(year), int(month), 1) + timedelta(days=31) >= last_date:
1453		img_re = re.compile('^%s%s([0-9]*)' % (year, month))
1454		month_url = urljoin_wrapper(cls.url, url)
1455		for img in get_soup_at_url(month_url).find_all('img', src=img_re):
1456		img_src = img['src']
1457		day = int(img_re.match(img_src).groups()[0])
1458		comic_date = date(int(year), int(month), day)
1459		if comic_date > last_date:
1460		yield {
1461		'url': month_url,
1462		'year': int(year),
1463		'month': int(month),
1464		'day': int(day),
1465		'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)],
1466		}
1467		last_date = comic_date
1468
1469
1470		class AbstruseGoose(GenericListableComic):
1471		"""Class to retrieve AbstruseGoose Comics."""
1472		name = 'abstruse'
1473		long_name = 'Abstruse Goose'
1474		url = 'http://abstrusegoose.com'
1475		get_url_from_archive_element = get_href
1476		comic_url_re = re.compile('^%s/([0-9]*)$' % url)

SylvainDe / ComicBookMaker

Code Duplication Length = 22-27 lines in 2 locations

comics.py 2 locations