Code Duplication - SylvainDe/ComicBookMaker - Measure and Improve Code Quality continuously with Scrutinizer

Code Duplication Length = 19-20 lines in 2 locations

comics.py 2 locations


    long_name = 'Fat Awesome'
    url = 'http://fatawesome.com/comics'
    get_navi_link = get_a_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'http://fatawesome.com/shortbus/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
        description = soup.find('meta', attrs={'name': 'description'})['content']
        tags_prop = soup.find('meta', property='article:tag')
        tags = tags_prop['content'] if tags_prop else ""
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        imgs = soup.find_all('img', attrs={'data-recalc-dims': "1"})
        assert len(imgs) == 1
        return {
            'title': title,
            'description': description,
            'tags': tags,
            'alt': "".join(i['alt'] for i in imgs),
            'img': [i['src'].rsplit('?', 1)[0] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,

        comic_url = cls.get_url_from_link(link)
        return {
            'title': soup.find('h2', id='titleheader').string,
            'title2': soup.find('div', id='subtext').string,
            'alt': img.get('title'),
            'img': [urljoin_wrapper(comic_url, img['src'].strip())],
            'num': int(comic_url.split('/')[-1]),
        }


class InvisibleBread(GenericListableComic):
    """Class to retrieve Invisible Bread comics."""
    # Also on http://www.gocomics.com/invisible-bread
    name = 'invisiblebread'
    long_name = 'Invisible Bread'
    url = 'http://invisiblebread.com'

    @classmethod
    def get_archive_elements(cls):
        archive_url = urljoin_wrapper(cls.url, 'archives/')
        return reversed(get_soup_at_url(archive_url).find_all('td', class_='archive-title'))

    @classmethod
    def get_url_from_archive_element(cls, td):
        return td.find('a')['href']

		@@ 2223-2242 (lines=20) @@
2220		long_name = 'Fat Awesome'
2221		url = 'http://fatawesome.com/comics'
2222		get_navi_link = get_a_rel_next
2223		get_first_comic_link = simulate_first_link
2224		first_url = 'http://fatawesome.com/shortbus/'
2225
2226		@classmethod
2227		def get_comic_info(cls, soup, link):
2228		"""Get information about a particular comics."""
2229		title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2230		description = soup.find('meta', attrs={'name': 'description'})['content']
2231		tags_prop = soup.find('meta', property='article:tag')
2232		tags = tags_prop['content'] if tags_prop else ""
2233		date_str = soup.find('meta', property='article:published_time')['content'][:10]
2234		day = string_to_date(date_str, "%Y-%m-%d")
2235		imgs = soup.find_all('img', attrs={'data-recalc-dims': "1"})
2236		assert len(imgs) == 1
2237		return {
2238		'title': title,
2239		'description': description,
2240		'tags': tags,
2241		'alt': "".join(i['alt'] for i in imgs),
2242		'img': [i['src'].rsplit('?', 1)[0] for i in imgs],
2243		'month': day.month,
2244		'year': day.year,
2245		'day': day.day,
		@@ 1914-1932 (lines=19) @@
1911		comic_url = cls.get_url_from_link(link)
1912		return {
1913		'title': soup.find('h2', id='titleheader').string,
1914		'title2': soup.find('div', id='subtext').string,
1915		'alt': img.get('title'),
1916		'img': [urljoin_wrapper(comic_url, img['src'].strip())],
1917		'num': int(comic_url.split('/')[-1]),
1918		}
1919
1920
1921		class InvisibleBread(GenericListableComic):
1922		"""Class to retrieve Invisible Bread comics."""
1923		# Also on http://www.gocomics.com/invisible-bread
1924		name = 'invisiblebread'
1925		long_name = 'Invisible Bread'
1926		url = 'http://invisiblebread.com'
1927
1928		@classmethod
1929		def get_archive_elements(cls):
1930		archive_url = urljoin_wrapper(cls.url, 'archives/')
1931		return reversed(get_soup_at_url(archive_url).find_all('td', class_='archive-title'))
1932
1933		@classmethod
1934		def get_url_from_archive_element(cls, td):
1935		return td.find('a')['href']

SylvainDe / ComicBookMaker

Code Duplication Length = 19-20 lines in 2 locations

comics.py 2 locations