Code Duplication - SylvainDe/ComicBookMaker - Measure and Improve Code Quality continuously with Scrutinizer

Code Duplication Length = 22-26 lines in 8 locations

comics.py 8 locations


            'day': day.day,
        }


class LinsEditions(GenericNavigableComic):
    """Class to retrieve L.I.N.S. Editions comics."""
    # Also on http://linscomics.tumblr.com
    # Now on https://warandpeas.com
    name = 'lins'
    long_name = 'L.I.N.S. Editions'
    url = 'https://linsedition.com'
    _categories = ('LINS', )
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('meta', property='og:title')['content']
        imgs = soup.find_all('meta', property='og:image')
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        return {
            'title': title,
            'img': [i['content'] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }


            'prefix': '%d-' % num,
        }


class Mercworks(GenericNavigableComic):
    """Class to retrieve Mercworks comics."""
    # Also on http://mercworks.tumblr.com
    name = 'mercworks'
    long_name = 'Mercworks'
    url = 'http://mercworks.net'
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('meta', property='og:title')['content']
        metadesc = soup.find('meta', property='og:description')
        desc = metadesc['content'] if metadesc else ""
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        imgs = soup.find_all('meta', property='og:image')
        return {
            'img': [i['content'] for i in imgs],
            'title': title,
            'desc': desc,
            'day': day.day,
            'month': day.month,
            'year': day.year
        }


        return []


class ExtraFabulousComics(GenericNavigableComic):
    """Class to retrieve Extra Fabulous Comics."""
    name = 'efc'
    long_name = 'Extra Fabulous Comics'
    url = 'http://extrafabulouscomics.com'
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
        imgs = soup.find_all('img', src=img_src_re)
        title = soup.find('meta', property='og:title')['content']
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        return {
            'title': title,
            'img': [i['src'] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
            'prefix': title + '-'
        }



            'img': [i['src'] for i in imgs],
        }


class MarketoonistComics(GenericNavigableComic):
    """Class to retrieve Marketoonist Comics."""
    name = 'marketoonist'
    long_name = 'Marketoonist'
    url = 'https://marketoonist.com/cartoons'
    get_first_comic_link = simulate_first_link
    get_navi_link = get_link_rel_next
    first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        imgs = soup.find_all('meta', property='og:image')
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        title = soup.find('meta', property='og:title')['content']
        return {
            'img': [i['content'] for i in imgs],
            'day': day.day,
            'month': day.month,
            'year': day.year,
            'title': title,
        }


            'author': author,
        }


class Penmen(GenericNavigableComic):
    """Class to retrieve Penmen comics."""
    name = 'penmen'
    long_name = 'Penmen'
    url = 'http://penmen.com'
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('title').string
        imgs = soup.find('div', class_='entry-content').find_all('img')
        short_url = soup.find('link', rel='shortlink')['href']
        tags = ' '.join(t.string for t in soup.find_all('a', rel='tag'))
        date_str = soup.find('time')['datetime'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        return {
            'title': title,
            'short_url': short_url,
            'img': [i['src'] for i in imgs],
            'tags': tags,
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }


            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
        }


class SafelyEndangered(GenericNavigableComic):
    """Class to retrieve Safely Endangered comics."""
    # Also on http://tumblr.safelyendangered.com
    name = 'endangered'
    long_name = 'Safely Endangered'
    url = 'http://www.safelyendangered.com'
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'http://www.safelyendangered.com/comic/ignored/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        date_str = soup.find('span', class_='post-date').string
        day = string_to_date(date_str, '%B %d, %Y')
        imgs = soup.find('div', id='comic').find_all('img')
        alt = imgs[0]['alt']
        assert all(i['alt'] == i['title'] for i in imgs)
        return {
            'day': day.day,
            'month': day.month,
            'year': day.year,
            'img': [i['src'] for i in imgs],
            'title': title,
            'alt': alt,
        }


        }


class GenericLeMondeBlog(GenericNavigableComic):
    """Generic class to retrieve comics from Le Monde blogs."""
    _categories = ('LEMONDE', 'FRANCAIS')
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = NotImplemented

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        url2 = soup.find('link', rel='shortlink')['href']
        title = soup.find('meta', property='og:title')['content']
        date_str = soup.find("span", class_="entry-date").string
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
        imgs = soup.find_all('meta', property='og:image')
        return {
            'title': title,
            'url2': url2,
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }



            'author': author,
        }


class MyExtraLife(GenericNavigableComic):
    """Class to retrieve My Extra Life comics."""
    name = 'extralife'
    long_name = 'My Extra Life'
    url = 'http://www.myextralife.com'
    get_navi_link = get_link_rel_next

    @classmethod
    def get_first_comic_link(cls):
        """Get link to first comics."""
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find("h1", class_="comic_title").string
        date_str = soup.find("span", class_="comic_date").string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find_all("img", class_="comic")
        assert all(i['alt'] == i['title'] == title for i in imgs)
        return {
            'title': title,
            'img': [i['src'] for i in imgs if i["src"]],
            'day': day.day,
            'month': day.month,
            'year': day.year
        }


		@@ 2353-2377 (lines=25) @@
2350		'day': day.day,
2351		}
2352
2353
2354		class LinsEditions(GenericNavigableComic):
2355		"""Class to retrieve L.I.N.S. Editions comics."""
2356		# Also on http://linscomics.tumblr.com
2357		# Now on https://warandpeas.com
2358		name = 'lins'
2359		long_name = 'L.I.N.S. Editions'
2360		url = 'https://linsedition.com'
2361		_categories = ('LINS', )
2362		get_navi_link = get_link_rel_next
2363		get_first_comic_link = simulate_first_link
2364		first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2365
2366		@classmethod
2367		def get_comic_info(cls, soup, link):
2368		"""Get information about a particular comics."""
2369		title = soup.find('meta', property='og:title')['content']
2370		imgs = soup.find_all('meta', property='og:image')
2371		date_str = soup.find('meta', property='article:published_time')['content'][:10]
2372		day = string_to_date(date_str, "%Y-%m-%d")
2373		return {
2374		'title': title,
2375		'img': [i['content'] for i in imgs],
2376		'month': day.month,
2377		'year': day.year,
2378		'day': day.day,
2379		}
2380
		@@ 1049-1073 (lines=25) @@
1046		'prefix': '%d-' % num,
1047		}
1048
1049
1050		class Mercworks(GenericNavigableComic):
1051		"""Class to retrieve Mercworks comics."""
1052		# Also on http://mercworks.tumblr.com
1053		name = 'mercworks'
1054		long_name = 'Mercworks'
1055		url = 'http://mercworks.net'
1056		get_first_comic_link = get_a_comicnavbase_comicnavfirst
1057		get_navi_link = get_link_rel_next
1058
1059		@classmethod
1060		def get_comic_info(cls, soup, link):
1061		"""Get information about a particular comics."""
1062		title = soup.find('meta', property='og:title')['content']
1063		metadesc = soup.find('meta', property='og:description')
1064		desc = metadesc['content'] if metadesc else ""
1065		date_str = soup.find('meta', property='article:published_time')['content'][:10]
1066		day = string_to_date(date_str, "%Y-%m-%d")
1067		imgs = soup.find_all('meta', property='og:image')
1068		return {
1069		'img': [i['content'] for i in imgs],
1070		'title': title,
1071		'desc': desc,
1072		'day': day.day,
1073		'month': day.month,
1074		'year': day.year
1075		}
1076
		@@ 357-379 (lines=23) @@
354		return []
355
356
357		class ExtraFabulousComics(GenericNavigableComic):
358		"""Class to retrieve Extra Fabulous Comics."""
359		name = 'efc'
360		long_name = 'Extra Fabulous Comics'
361		url = 'http://extrafabulouscomics.com'
362		get_first_comic_link = get_a_navi_navifirst
363		get_navi_link = get_link_rel_next
364
365		@classmethod
366		def get_comic_info(cls, soup, link):
367		"""Get information about a particular comics."""
368		img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
369		imgs = soup.find_all('img', src=img_src_re)
370		title = soup.find('meta', property='og:title')['content']
371		date_str = soup.find('meta', property='article:published_time')['content'][:10]
372		day = string_to_date(date_str, "%Y-%m-%d")
373		return {
374		'title': title,
375		'img': [i['src'] for i in imgs],
376		'month': day.month,
377		'year': day.year,
378		'day': day.day,
379		'prefix': title + '-'
380		}
381
382
		@@ 3294-3315 (lines=22) @@
3291		'img': [i['src'] for i in imgs],
3292		}
3293
3294
3295		class MarketoonistComics(GenericNavigableComic):
3296		"""Class to retrieve Marketoonist Comics."""
3297		name = 'marketoonist'
3298		long_name = 'Marketoonist'
3299		url = 'https://marketoonist.com/cartoons'
3300		get_first_comic_link = simulate_first_link
3301		get_navi_link = get_link_rel_next
3302		first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'
3303
3304		@classmethod
3305		def get_comic_info(cls, soup, link):
3306		"""Get information about a particular comics."""
3307		imgs = soup.find_all('meta', property='og:image')
3308		date_str = soup.find('meta', property='article:published_time')['content'][:10]
3309		day = string_to_date(date_str, "%Y-%m-%d")
3310		title = soup.find('meta', property='og:title')['content']
3311		return {
3312		'img': [i['content'] for i in imgs],
3313		'day': day.day,
3314		'month': day.month,
3315		'year': day.year,
3316		'title': title,
3317		}
3318
		@@ 1894-1919 (lines=26) @@
1891		'author': author,
1892		}
1893
1894
1895		class Penmen(GenericNavigableComic):
1896		"""Class to retrieve Penmen comics."""
1897		name = 'penmen'
1898		long_name = 'Penmen'
1899		url = 'http://penmen.com'
1900		get_navi_link = get_link_rel_next
1901		get_first_comic_link = simulate_first_link
1902		first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/'
1903
1904		@classmethod
1905		def get_comic_info(cls, soup, link):
1906		"""Get information about a particular comics."""
1907		title = soup.find('title').string
1908		imgs = soup.find('div', class_='entry-content').find_all('img')
1909		short_url = soup.find('link', rel='shortlink')['href']
1910		tags = ' '.join(t.string for t in soup.find_all('a', rel='tag'))
1911		date_str = soup.find('time')['datetime'][:10]
1912		day = string_to_date(date_str, "%Y-%m-%d")
1913		return {
1914		'title': title,
1915		'short_url': short_url,
1916		'img': [i['src'] for i in imgs],
1917		'tags': tags,
1918		'month': day.month,
1919		'year': day.year,
1920		'day': day.day,
1921		}
1922
		@@ 1835-1860 (lines=26) @@
1832		'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1833		}
1834
1835
1836		class SafelyEndangered(GenericNavigableComic):
1837		"""Class to retrieve Safely Endangered comics."""
1838		# Also on http://tumblr.safelyendangered.com
1839		name = 'endangered'
1840		long_name = 'Safely Endangered'
1841		url = 'http://www.safelyendangered.com'
1842		get_navi_link = get_link_rel_next
1843		get_first_comic_link = simulate_first_link
1844		first_url = 'http://www.safelyendangered.com/comic/ignored/'
1845
1846		@classmethod
1847		def get_comic_info(cls, soup, link):
1848		"""Get information about a particular comics."""
1849		title = soup.find('h2', class_='post-title').string
1850		date_str = soup.find('span', class_='post-date').string
1851		day = string_to_date(date_str, '%B %d, %Y')
1852		imgs = soup.find('div', id='comic').find_all('img')
1853		alt = imgs[0]['alt']
1854		assert all(i['alt'] == i['title'] for i in imgs)
1855		return {
1856		'day': day.day,
1857		'month': day.month,
1858		'year': day.year,
1859		'img': [i['src'] for i in imgs],
1860		'title': title,
1861		'alt': alt,
1862		}
1863
		@@ 383-404 (lines=22) @@
380		}
381
382
383		class GenericLeMondeBlog(GenericNavigableComic):
384		"""Generic class to retrieve comics from Le Monde blogs."""
385		_categories = ('LEMONDE', 'FRANCAIS')
386		get_navi_link = get_link_rel_next
387		get_first_comic_link = simulate_first_link
388		first_url = NotImplemented
389
390		@classmethod
391		def get_comic_info(cls, soup, link):
392		"""Get information about a particular comics."""
393		url2 = soup.find('link', rel='shortlink')['href']
394		title = soup.find('meta', property='og:title')['content']
395		date_str = soup.find("span", class_="entry-date").string
396		day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
397		imgs = soup.find_all('meta', property='og:image')
398		return {
399		'title': title,
400		'url2': url2,
401		'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
402		'month': day.month,
403		'year': day.year,
404		'day': day.day,
405		}
406
407
		@@ 954-979 (lines=26) @@
951		'author': author,
952		}
953
954
955		class MyExtraLife(GenericNavigableComic):
956		"""Class to retrieve My Extra Life comics."""
957		name = 'extralife'
958		long_name = 'My Extra Life'
959		url = 'http://www.myextralife.com'
960		get_navi_link = get_link_rel_next
961
962		@classmethod
963		def get_first_comic_link(cls):
964		"""Get link to first comics."""
965		return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
966
967		@classmethod
968		def get_comic_info(cls, soup, link):
969		"""Get information about a particular comics."""
970		title = soup.find("h1", class_="comic_title").string
971		date_str = soup.find("span", class_="comic_date").string
972		day = string_to_date(date_str, "%B %d, %Y")
973		imgs = soup.find_all("img", class_="comic")
974		assert all(i['alt'] == i['title'] == title for i in imgs)
975		return {
976		'title': title,
977		'img': [i['src'] for i in imgs if i["src"]],
978		'day': day.day,
979		'month': day.month,
980		'year': day.year
981		}
982

SylvainDe / ComicBookMaker

Code Duplication Length = 22-26 lines in 8 locations

comics.py 8 locations