Code Duplication - SylvainDe/ComicBookMaker - Measure and Improve Code Quality continuously with Scrutinizer

Code Duplication Length = 22-26 lines in 8 locations

comics.py 8 locations


    _categories = ('DELETED', )


class ExtraFabulousComics(GenericNavigableComic):
    """Class to retrieve Extra Fabulous Comics."""
    # Also on https://extrafabulouscomics.tumblr.com
    name = 'efc'
    long_name = 'Extra Fabulous Comics'
    url = 'http://extrafabulouscomics.com'
    _categories = ('EFC', )
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'http://extrafabulouscomics.com/comic/buttfly/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
        imgs = soup.find_all('img', src=img_src_re)
        title = soup.find('meta', property='og:title')['content']
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        return {
            'title': title,
            'img': [i['src'] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
            'prefix': title + '-'
        }



        }


class LinsEditions(GenericNavigableComic):
    """Class to retrieve L.I.N.S. Editions comics."""
    # Also on https://linscomics.tumblr.com
    # Now on https://warandpeas.com
    name = 'lins'
    long_name = 'L.I.N.S. Editions'
    url = 'https://linsedition.com'
    _categories = ('LINS', )
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('meta', property='og:title')['content']
        imgs = soup.find_all('meta', property='og:image')
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        return {
            'title': title,
            'img': [i['content'] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }



        }


class Mercworks(GenericNavigableComic):
    """Class to retrieve Mercworks comics."""
    # Also on http://mercworks.tumblr.com
    name = 'mercworks'
    long_name = 'Mercworks'
    url = 'http://mercworks.net'
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('meta', property='og:title')['content']
        metadesc = soup.find('meta', property='og:description')
        desc = metadesc['content'] if metadesc else ""
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        imgs = soup.find_all('meta', property='og:image')
        return {
            'img': [i['content'] for i in imgs],
            'title': title,
            'desc': desc,
            'day': day.day,
            'month': day.month,
            'year': day.year
        }



        }


class MarketoonistComics(GenericNavigableComic):
    """Class to retrieve Marketoonist Comics."""
    name = 'marketoonist'
    long_name = 'Marketoonist'
    url = 'https://marketoonist.com/cartoons'
    get_first_comic_link = simulate_first_link
    get_navi_link = get_link_rel_next
    first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        imgs = soup.find_all('meta', property='og:image')
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        title = soup.find('meta', property='og:title')['content']
        return {
            'img': [i['content'] for i in imgs],
            'day': day.day,
            'month': day.month,
            'year': day.year,
            'title': title,
        }



        }


class PeterLauris(GenericNavigableComic):
    """Class to retrieve Peter Lauris comics."""
    name = 'peterlauris'
    long_name = 'Peter Lauris'
    url = 'http://peterlauris.com/comics'
    get_navi_link = get_a_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'http://peterlauris.com/comics/just-in-case/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        imgs = soup.find_all('meta', property='og:image')
        return {
            'title': title,
            'img': [i['content'] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }



        }


class GenericLeMondeBlog(GenericNavigableComic):
    """Generic class to retrieve comics from Le Monde blogs."""
    _categories = ('LEMONDE', 'FRANCAIS')
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = NotImplemented

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        url2 = soup.find('link', rel='shortlink')['href']
        title = soup.find('meta', property='og:title')['content']
        date_str = soup.find("span", class_="entry-date").string
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
        imgs = soup.find_all('meta', property='og:image')
        return {
            'title': title,
            'url2': url2,
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }



        }


class MyExtraLife(GenericNavigableComic):
    """Class to retrieve My Extra Life comics."""
    name = 'extralife'
    long_name = 'My Extra Life'
    url = 'http://www.myextralife.com'
    get_navi_link = get_link_rel_next

    @classmethod
    def get_first_comic_link(cls):
        """Get link to first comics."""
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find("h1", class_="comic_title").string
        date_str = soup.find("span", class_="comic_date").string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find_all("img", class_="comic")
        assert all(i['alt'] == i['title'] == title for i in imgs)
        return {
            'title': title,
            'img': [i['src'] for i in imgs if i["src"]],
            'day': day.day,
            'month': day.month,
            'year': day.year
        }



        }


class JuliasDrawings(GenericListableComic):
    """Class to retrieve Julia's Drawings."""
    name = 'julia'
    long_name = "Julia's Drawings"
    url = 'https://drawings.jvns.ca'
    get_url_from_archive_element = get_href

    @classmethod
    def get_archive_elements(cls):
        articles = get_soup_at_url(cls.url).find_all('article', class_='li post')
        return [art.find('a') for art in reversed(articles)]

    @classmethod
    def get_comic_info(cls, soup, archive_elt):
        """Get information about a particular comics."""
        date_str = soup.find('meta', property='og:article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        title = soup.find('h3', class_='p-post-title').string
        imgs = soup.find('section', class_='post-content').find_all('img')
        return {
            'title': title,
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }



		@@ 405-430 (lines=26) @@
402		_categories = ('DELETED', )
403
404
405		class ExtraFabulousComics(GenericNavigableComic):
406		"""Class to retrieve Extra Fabulous Comics."""
407		# Also on https://extrafabulouscomics.tumblr.com
408		name = 'efc'
409		long_name = 'Extra Fabulous Comics'
410		url = 'http://extrafabulouscomics.com'
411		_categories = ('EFC', )
412		get_navi_link = get_link_rel_next
413		get_first_comic_link = simulate_first_link
414		first_url = 'http://extrafabulouscomics.com/comic/buttfly/'
415
416		@classmethod
417		def get_comic_info(cls, soup, link):
418		"""Get information about a particular comics."""
419		img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
420		imgs = soup.find_all('img', src=img_src_re)
421		title = soup.find('meta', property='og:title')['content']
422		date_str = soup.find('meta', property='article:published_time')['content'][:10]
423		day = string_to_date(date_str, "%Y-%m-%d")
424		return {
425		'title': title,
426		'img': [i['src'] for i in imgs],
427		'month': day.month,
428		'year': day.year,
429		'day': day.day,
430		'prefix': title + '-'
431		}
432
433
		@@ 2469-2493 (lines=25) @@
2466		}
2467
2468
2469		class LinsEditions(GenericNavigableComic):
2470		"""Class to retrieve L.I.N.S. Editions comics."""
2471		# Also on https://linscomics.tumblr.com
2472		# Now on https://warandpeas.com
2473		name = 'lins'
2474		long_name = 'L.I.N.S. Editions'
2475		url = 'https://linsedition.com'
2476		_categories = ('LINS', )
2477		get_navi_link = get_link_rel_next
2478		get_first_comic_link = simulate_first_link
2479		first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2480
2481		@classmethod
2482		def get_comic_info(cls, soup, link):
2483		"""Get information about a particular comics."""
2484		title = soup.find('meta', property='og:title')['content']
2485		imgs = soup.find_all('meta', property='og:image')
2486		date_str = soup.find('meta', property='article:published_time')['content'][:10]
2487		day = string_to_date(date_str, "%Y-%m-%d")
2488		return {
2489		'title': title,
2490		'img': [i['content'] for i in imgs],
2491		'month': day.month,
2492		'year': day.year,
2493		'day': day.day,
2494		}
2495
2496
		@@ 1092-1116 (lines=25) @@
1089		}
1090
1091
1092		class Mercworks(GenericNavigableComic):
1093		"""Class to retrieve Mercworks comics."""
1094		# Also on http://mercworks.tumblr.com
1095		name = 'mercworks'
1096		long_name = 'Mercworks'
1097		url = 'http://mercworks.net'
1098		get_first_comic_link = get_a_comicnavbase_comicnavfirst
1099		get_navi_link = get_link_rel_next
1100
1101		@classmethod
1102		def get_comic_info(cls, soup, link):
1103		"""Get information about a particular comics."""
1104		title = soup.find('meta', property='og:title')['content']
1105		metadesc = soup.find('meta', property='og:description')
1106		desc = metadesc['content'] if metadesc else ""
1107		date_str = soup.find('meta', property='article:published_time')['content'][:10]
1108		day = string_to_date(date_str, "%Y-%m-%d")
1109		imgs = soup.find_all('meta', property='og:image')
1110		return {
1111		'img': [i['content'] for i in imgs],
1112		'title': title,
1113		'desc': desc,
1114		'day': day.day,
1115		'month': day.month,
1116		'year': day.year
1117		}
1118
1119
		@@ 3504-3525 (lines=22) @@
3501		}
3502
3503
3504		class MarketoonistComics(GenericNavigableComic):
3505		"""Class to retrieve Marketoonist Comics."""
3506		name = 'marketoonist'
3507		long_name = 'Marketoonist'
3508		url = 'https://marketoonist.com/cartoons'
3509		get_first_comic_link = simulate_first_link
3510		get_navi_link = get_link_rel_next
3511		first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'
3512
3513		@classmethod
3514		def get_comic_info(cls, soup, link):
3515		"""Get information about a particular comics."""
3516		imgs = soup.find_all('meta', property='og:image')
3517		date_str = soup.find('meta', property='article:published_time')['content'][:10]
3518		day = string_to_date(date_str, "%Y-%m-%d")
3519		title = soup.find('meta', property='og:title')['content']
3520		return {
3521		'img': [i['content'] for i in imgs],
3522		'day': day.day,
3523		'month': day.month,
3524		'year': day.year,
3525		'title': title,
3526		}
3527
3528
		@@ 2345-2366 (lines=22) @@
2342		}
2343
2344
2345		class PeterLauris(GenericNavigableComic):
2346		"""Class to retrieve Peter Lauris comics."""
2347		name = 'peterlauris'
2348		long_name = 'Peter Lauris'
2349		url = 'http://peterlauris.com/comics'
2350		get_navi_link = get_a_rel_next
2351		get_first_comic_link = simulate_first_link
2352		first_url = 'http://peterlauris.com/comics/just-in-case/'
2353
2354		@classmethod
2355		def get_comic_info(cls, soup, link):
2356		"""Get information about a particular comics."""
2357		title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2358		date_str = soup.find('meta', property='article:published_time')['content'][:10]
2359		day = string_to_date(date_str, "%Y-%m-%d")
2360		imgs = soup.find_all('meta', property='og:image')
2361		return {
2362		'title': title,
2363		'img': [i['content'] for i in imgs],
2364		'month': day.month,
2365		'year': day.year,
2366		'day': day.day,
2367		}
2368
2369
		@@ 434-455 (lines=22) @@
431		}
432
433
434		class GenericLeMondeBlog(GenericNavigableComic):
435		"""Generic class to retrieve comics from Le Monde blogs."""
436		_categories = ('LEMONDE', 'FRANCAIS')
437		get_navi_link = get_link_rel_next
438		get_first_comic_link = simulate_first_link
439		first_url = NotImplemented
440
441		@classmethod
442		def get_comic_info(cls, soup, link):
443		"""Get information about a particular comics."""
444		url2 = soup.find('link', rel='shortlink')['href']
445		title = soup.find('meta', property='og:title')['content']
446		date_str = soup.find("span", class_="entry-date").string
447		day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
448		imgs = soup.find_all('meta', property='og:image')
449		return {
450		'title': title,
451		'url2': url2,
452		'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
453		'month': day.month,
454		'year': day.year,
455		'day': day.day,
456		}
457
458
		@@ 1004-1029 (lines=26) @@
1001		}
1002
1003
1004		class MyExtraLife(GenericNavigableComic):
1005		"""Class to retrieve My Extra Life comics."""
1006		name = 'extralife'
1007		long_name = 'My Extra Life'
1008		url = 'http://www.myextralife.com'
1009		get_navi_link = get_link_rel_next
1010
1011		@classmethod
1012		def get_first_comic_link(cls):
1013		"""Get link to first comics."""
1014		return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
1015
1016		@classmethod
1017		def get_comic_info(cls, soup, link):
1018		"""Get information about a particular comics."""
1019		title = soup.find("h1", class_="comic_title").string
1020		date_str = soup.find("span", class_="comic_date").string
1021		day = string_to_date(date_str, "%B %d, %Y")
1022		imgs = soup.find_all("img", class_="comic")
1023		assert all(i['alt'] == i['title'] == title for i in imgs)
1024		return {
1025		'title': title,
1026		'img': [i['src'] for i in imgs if i["src"]],
1027		'day': day.day,
1028		'month': day.month,
1029		'year': day.year
1030		}
1031
1032
		@@ 2370-2394 (lines=25) @@
2367		}
2368
2369
2370		class JuliasDrawings(GenericListableComic):
2371		"""Class to retrieve Julia's Drawings."""
2372		name = 'julia'
2373		long_name = "Julia's Drawings"
2374		url = 'https://drawings.jvns.ca'
2375		get_url_from_archive_element = get_href
2376
2377		@classmethod
2378		def get_archive_elements(cls):
2379		articles = get_soup_at_url(cls.url).find_all('article', class_='li post')
2380		return [art.find('a') for art in reversed(articles)]
2381
2382		@classmethod
2383		def get_comic_info(cls, soup, archive_elt):
2384		"""Get information about a particular comics."""
2385		date_str = soup.find('meta', property='og:article:published_time')['content'][:10]
2386		day = string_to_date(date_str, "%Y-%m-%d")
2387		title = soup.find('h3', class_='p-post-title').string
2388		imgs = soup.find('section', class_='post-content').find_all('img')
2389		return {
2390		'title': title,
2391		'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2392		'month': day.month,
2393		'year': day.year,
2394		'day': day.day,
2395		}
2396
2397

SylvainDe / ComicBookMaker

Code Duplication Length = 22-26 lines in 8 locations

comics.py 8 locations