Code Duplication - SylvainDe/ComicBookMaker - Measure and Improve Code Quality continuously with Scrutinizer

Code Duplication Length = 22-26 lines in 8 locations

comics.py 8 locations


    _categories = ('DELETED', )


class ExtraFabulousComics(GenericNavigableComic):
    """Class to retrieve Extra Fabulous Comics."""
    # Also on https://extrafabulouscomics.tumblr.com
    name = 'efc'
    long_name = 'Extra Fabulous Comics'
    url = 'http://extrafabulouscomics.com'
    _categories = ('EFC', )
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'http://extrafabulouscomics.com/comic/buttfly/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
        imgs = soup.find_all('img', src=img_src_re)
        title = soup.find('meta', property='og:title')['content']
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        return {
            'title': title,
            'img': [i['src'] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
            'prefix': title + '-'
        }



        }


class LinsEditions(GenericNavigableComic):
    """Class to retrieve L.I.N.S. Editions comics."""
    # Also on https://linscomics.tumblr.com
    # Now on https://warandpeas.com
    name = 'lins'
    long_name = 'L.I.N.S. Editions'
    url = 'https://linsedition.com'
    _categories = ('LINS', )
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('meta', property='og:title')['content']
        imgs = soup.find_all('meta', property='og:image')
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        return {
            'title': title,
            'img': [i['content'] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }



        }


class Mercworks(GenericNavigableComic):
    """Class to retrieve Mercworks comics."""
    # Also on http://mercworks.tumblr.com
    name = 'mercworks'
    long_name = 'Mercworks'
    url = 'http://mercworks.net'
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('meta', property='og:title')['content']
        metadesc = soup.find('meta', property='og:description')
        desc = metadesc['content'] if metadesc else ""
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        imgs = soup.find_all('meta', property='og:image')
        return {
            'img': [i['content'] for i in imgs],
            'title': title,
            'desc': desc,
            'day': day.day,
            'month': day.month,
            'year': day.year
        }



        }


class MarketoonistComics(GenericNavigableComic):
    """Class to retrieve Marketoonist Comics."""
    name = 'marketoonist'
    long_name = 'Marketoonist'
    url = 'https://marketoonist.com/cartoons'
    get_first_comic_link = simulate_first_link
    get_navi_link = get_link_rel_next
    first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        imgs = soup.find_all('meta', property='og:image')
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        title = soup.find('meta', property='og:title')['content']
        return {
            'img': [i['content'] for i in imgs],
            'day': day.day,
            'month': day.month,
            'year': day.year,
            'title': title,
        }



        }


class GenericLeMondeBlog(GenericNavigableComic):
    """Generic class to retrieve comics from Le Monde blogs."""
    _categories = ('LEMONDE', 'FRANCAIS')
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = NotImplemented

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        url2 = soup.find('link', rel='shortlink')['href']
        title = soup.find('meta', property='og:title')['content']
        date_str = soup.find("span", class_="entry-date").string
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
        imgs = soup.find_all('meta', property='og:image')
        return {
            'title': title,
            'url2': url2,
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }



        }


class SafelyEndangered(GenericNavigableComic):
    """Class to retrieve Safely Endangered comics."""
    # Also on http://tumblr.safelyendangered.com
    name = 'endangered'
    long_name = 'Safely Endangered'
    url = 'http://www.safelyendangered.com'
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'http://www.safelyendangered.com/comic/ignored/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        date_str = soup.find('span', class_='post-date').string
        day = string_to_date(date_str, '%B %d, %Y')
        imgs = soup.find('div', id='comic').find_all('img')
        alt = imgs[0]['alt']
        assert all(i['alt'] == i['title'] for i in imgs)
        return {
            'day': day.day,
            'month': day.month,
            'year': day.year,
            'img': [i['src'] for i in imgs],
            'title': title,
            'alt': alt,
        }



        }


class MyExtraLife(GenericNavigableComic):
    """Class to retrieve My Extra Life comics."""
    name = 'extralife'
    long_name = 'My Extra Life'
    url = 'http://www.myextralife.com'
    get_navi_link = get_link_rel_next

    @classmethod
    def get_first_comic_link(cls):
        """Get link to first comics."""
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find("h1", class_="comic_title").string
        date_str = soup.find("span", class_="comic_date").string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find_all("img", class_="comic")
        assert all(i['alt'] == i['title'] == title for i in imgs)
        return {
            'title': title,
            'img': [i['src'] for i in imgs if i["src"]],
            'day': day.day,
            'month': day.month,
            'year': day.year
        }



        }


class JuliasDrawings(GenericListableComic):
    """Class to retrieve Julia's Drawings."""
    name = 'julia'
    long_name = "Julia's Drawings"
    url = 'https://drawings.jvns.ca'
    get_url_from_archive_element = get_href

    @classmethod
    def get_archive_elements(cls):
        articles = get_soup_at_url(cls.url).find_all('article', class_='li post')
        return [art.find('a') for art in reversed(articles)]

    @classmethod
    def get_comic_info(cls, soup, archive_elt):
        """Get information about a particular comics."""
        date_str = soup.find('meta', property='og:article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        title = soup.find('h3', class_='p-post-title').string
        imgs = soup.find('section', class_='post-content').find_all('img')
        return {
            'title': title,
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }



		@@ 389-414 (lines=26) @@
386		_categories = ('DELETED', )
387
388
389		class ExtraFabulousComics(GenericNavigableComic):
390		"""Class to retrieve Extra Fabulous Comics."""
391		# Also on https://extrafabulouscomics.tumblr.com
392		name = 'efc'
393		long_name = 'Extra Fabulous Comics'
394		url = 'http://extrafabulouscomics.com'
395		_categories = ('EFC', )
396		get_navi_link = get_link_rel_next
397		get_first_comic_link = simulate_first_link
398		first_url = 'http://extrafabulouscomics.com/comic/buttfly/'
399
400		@classmethod
401		def get_comic_info(cls, soup, link):
402		"""Get information about a particular comics."""
403		img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
404		imgs = soup.find_all('img', src=img_src_re)
405		title = soup.find('meta', property='og:title')['content']
406		date_str = soup.find('meta', property='article:published_time')['content'][:10]
407		day = string_to_date(date_str, "%Y-%m-%d")
408		return {
409		'title': title,
410		'img': [i['src'] for i in imgs],
411		'month': day.month,
412		'year': day.year,
413		'day': day.day,
414		'prefix': title + '-'
415		}
416
417
		@@ 2395-2419 (lines=25) @@
2392		}
2393
2394
2395		class LinsEditions(GenericNavigableComic):
2396		"""Class to retrieve L.I.N.S. Editions comics."""
2397		# Also on https://linscomics.tumblr.com
2398		# Now on https://warandpeas.com
2399		name = 'lins'
2400		long_name = 'L.I.N.S. Editions'
2401		url = 'https://linsedition.com'
2402		_categories = ('LINS', )
2403		get_navi_link = get_link_rel_next
2404		get_first_comic_link = simulate_first_link
2405		first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2406
2407		@classmethod
2408		def get_comic_info(cls, soup, link):
2409		"""Get information about a particular comics."""
2410		title = soup.find('meta', property='og:title')['content']
2411		imgs = soup.find_all('meta', property='og:image')
2412		date_str = soup.find('meta', property='article:published_time')['content'][:10]
2413		day = string_to_date(date_str, "%Y-%m-%d")
2414		return {
2415		'title': title,
2416		'img': [i['content'] for i in imgs],
2417		'month': day.month,
2418		'year': day.year,
2419		'day': day.day,
2420		}
2421
2422
		@@ 1076-1100 (lines=25) @@
1073		}
1074
1075
1076		class Mercworks(GenericNavigableComic):
1077		"""Class to retrieve Mercworks comics."""
1078		# Also on http://mercworks.tumblr.com
1079		name = 'mercworks'
1080		long_name = 'Mercworks'
1081		url = 'http://mercworks.net'
1082		get_first_comic_link = get_a_comicnavbase_comicnavfirst
1083		get_navi_link = get_link_rel_next
1084
1085		@classmethod
1086		def get_comic_info(cls, soup, link):
1087		"""Get information about a particular comics."""
1088		title = soup.find('meta', property='og:title')['content']
1089		metadesc = soup.find('meta', property='og:description')
1090		desc = metadesc['content'] if metadesc else ""
1091		date_str = soup.find('meta', property='article:published_time')['content'][:10]
1092		day = string_to_date(date_str, "%Y-%m-%d")
1093		imgs = soup.find_all('meta', property='og:image')
1094		return {
1095		'img': [i['content'] for i in imgs],
1096		'title': title,
1097		'desc': desc,
1098		'day': day.day,
1099		'month': day.month,
1100		'year': day.year
1101		}
1102
1103
		@@ 3402-3423 (lines=22) @@
3399		}
3400
3401
3402		class MarketoonistComics(GenericNavigableComic):
3403		"""Class to retrieve Marketoonist Comics."""
3404		name = 'marketoonist'
3405		long_name = 'Marketoonist'
3406		url = 'https://marketoonist.com/cartoons'
3407		get_first_comic_link = simulate_first_link
3408		get_navi_link = get_link_rel_next
3409		first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'
3410
3411		@classmethod
3412		def get_comic_info(cls, soup, link):
3413		"""Get information about a particular comics."""
3414		imgs = soup.find_all('meta', property='og:image')
3415		date_str = soup.find('meta', property='article:published_time')['content'][:10]
3416		day = string_to_date(date_str, "%Y-%m-%d")
3417		title = soup.find('meta', property='og:title')['content']
3418		return {
3419		'img': [i['content'] for i in imgs],
3420		'day': day.day,
3421		'month': day.month,
3422		'year': day.year,
3423		'title': title,
3424		}
3425
3426
		@@ 418-439 (lines=22) @@
415		}
416
417
418		class GenericLeMondeBlog(GenericNavigableComic):
419		"""Generic class to retrieve comics from Le Monde blogs."""
420		_categories = ('LEMONDE', 'FRANCAIS')
421		get_navi_link = get_link_rel_next
422		get_first_comic_link = simulate_first_link
423		first_url = NotImplemented
424
425		@classmethod
426		def get_comic_info(cls, soup, link):
427		"""Get information about a particular comics."""
428		url2 = soup.find('link', rel='shortlink')['href']
429		title = soup.find('meta', property='og:title')['content']
430		date_str = soup.find("span", class_="entry-date").string
431		day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
432		imgs = soup.find_all('meta', property='og:image')
433		return {
434		'title': title,
435		'url2': url2,
436		'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
437		'month': day.month,
438		'year': day.year,
439		'day': day.day,
440		}
441
442
		@@ 1826-1851 (lines=26) @@
1823		}
1824
1825
1826		class SafelyEndangered(GenericNavigableComic):
1827		"""Class to retrieve Safely Endangered comics."""
1828		# Also on http://tumblr.safelyendangered.com
1829		name = 'endangered'
1830		long_name = 'Safely Endangered'
1831		url = 'http://www.safelyendangered.com'
1832		get_navi_link = get_link_rel_next
1833		get_first_comic_link = simulate_first_link
1834		first_url = 'http://www.safelyendangered.com/comic/ignored/'
1835
1836		@classmethod
1837		def get_comic_info(cls, soup, link):
1838		"""Get information about a particular comics."""
1839		title = soup.find('h2', class_='post-title').string
1840		date_str = soup.find('span', class_='post-date').string
1841		day = string_to_date(date_str, '%B %d, %Y')
1842		imgs = soup.find('div', id='comic').find_all('img')
1843		alt = imgs[0]['alt']
1844		assert all(i['alt'] == i['title'] for i in imgs)
1845		return {
1846		'day': day.day,
1847		'month': day.month,
1848		'year': day.year,
1849		'img': [i['src'] for i in imgs],
1850		'title': title,
1851		'alt': alt,
1852		}
1853
1854
		@@ 988-1013 (lines=26) @@
985		}
986
987
988		class MyExtraLife(GenericNavigableComic):
989		"""Class to retrieve My Extra Life comics."""
990		name = 'extralife'
991		long_name = 'My Extra Life'
992		url = 'http://www.myextralife.com'
993		get_navi_link = get_link_rel_next
994
995		@classmethod
996		def get_first_comic_link(cls):
997		"""Get link to first comics."""
998		return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
999
1000		@classmethod
1001		def get_comic_info(cls, soup, link):
1002		"""Get information about a particular comics."""
1003		title = soup.find("h1", class_="comic_title").string
1004		date_str = soup.find("span", class_="comic_date").string
1005		day = string_to_date(date_str, "%B %d, %Y")
1006		imgs = soup.find_all("img", class_="comic")
1007		assert all(i['alt'] == i['title'] == title for i in imgs)
1008		return {
1009		'title': title,
1010		'img': [i['src'] for i in imgs if i["src"]],
1011		'day': day.day,
1012		'month': day.month,
1013		'year': day.year
1014		}
1015
1016
		@@ 2296-2320 (lines=25) @@
2293		}
2294
2295
2296		class JuliasDrawings(GenericListableComic):
2297		"""Class to retrieve Julia's Drawings."""
2298		name = 'julia'
2299		long_name = "Julia's Drawings"
2300		url = 'https://drawings.jvns.ca'
2301		get_url_from_archive_element = get_href
2302
2303		@classmethod
2304		def get_archive_elements(cls):
2305		articles = get_soup_at_url(cls.url).find_all('article', class_='li post')
2306		return [art.find('a') for art in reversed(articles)]
2307
2308		@classmethod
2309		def get_comic_info(cls, soup, archive_elt):
2310		"""Get information about a particular comics."""
2311		date_str = soup.find('meta', property='og:article:published_time')['content'][:10]
2312		day = string_to_date(date_str, "%Y-%m-%d")
2313		title = soup.find('h3', class_='p-post-title').string
2314		imgs = soup.find('section', class_='post-content').find_all('img')
2315		return {
2316		'title': title,
2317		'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2318		'month': day.month,
2319		'year': day.year,
2320		'day': day.day,
2321		}
2322
2323

SylvainDe / ComicBookMaker

Code Duplication Length = 22-26 lines in 8 locations

comics.py 8 locations