Code Duplication - SylvainDe/ComicBookMaker - Measure and Improve Code Quality continuously with Scrutinizer

Code Duplication Length = 22-26 lines in 8 locations

comics.py 8 locations


        }


class LinsEditions(GenericNavigableComic):
    """Class to retrieve L.I.N.S. Editions comics."""
    # Also on https://linscomics.tumblr.com
    # Now on https://warandpeas.com
    name = 'lins'
    long_name = 'L.I.N.S. Editions'
    url = 'https://linsedition.com'
    _categories = ('LINS', )
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('meta', property='og:title')['content']
        imgs = soup.find_all('meta', property='og:image')
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        return {
            'title': title,
            'img': [i['content'] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }



        }


class Mercworks(GenericNavigableComic):
    """Class to retrieve Mercworks comics."""
    # Also on http://mercworks.tumblr.com
    name = 'mercworks'
    long_name = 'Mercworks'
    url = 'http://mercworks.net'
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('meta', property='og:title')['content']
        metadesc = soup.find('meta', property='og:description')
        desc = metadesc['content'] if metadesc else ""
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        imgs = soup.find_all('meta', property='og:image')
        return {
            'img': [i['content'] for i in imgs],
            'title': title,
            'desc': desc,
            'day': day.day,
            'month': day.month,
            'year': day.year
        }



    _categories = ('DELETED', )


class ExtraFabulousComics(GenericNavigableComic):
    """Class to retrieve Extra Fabulous Comics."""
    # Also on https://extrafabulouscomics.tumblr.com
    name = 'efc'
    long_name = 'Extra Fabulous Comics'
    url = 'http://extrafabulouscomics.com'
    _categories = ('EFC', )
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
        imgs = soup.find_all('img', src=img_src_re)
        title = soup.find('meta', property='og:title')['content']
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        return {
            'title': title,
            'img': [i['src'] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
            'prefix': title + '-'
        }



        }


class MarketoonistComics(GenericNavigableComic):
    """Class to retrieve Marketoonist Comics."""
    name = 'marketoonist'
    long_name = 'Marketoonist'
    url = 'https://marketoonist.com/cartoons'
    get_first_comic_link = simulate_first_link
    get_navi_link = get_link_rel_next
    first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        imgs = soup.find_all('meta', property='og:image')
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        title = soup.find('meta', property='og:title')['content']
        return {
            'img': [i['content'] for i in imgs],
            'day': day.day,
            'month': day.month,
            'year': day.year,
            'title': title,
        }



        }


class GenericLeMondeBlog(GenericNavigableComic):
    """Generic class to retrieve comics from Le Monde blogs."""
    _categories = ('LEMONDE', 'FRANCAIS')
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = NotImplemented

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        url2 = soup.find('link', rel='shortlink')['href']
        title = soup.find('meta', property='og:title')['content']
        date_str = soup.find("span", class_="entry-date").string
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
        imgs = soup.find_all('meta', property='og:image')
        return {
            'title': title,
            'url2': url2,
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }



        }


class SafelyEndangered(GenericNavigableComic):
    """Class to retrieve Safely Endangered comics."""
    # Also on http://tumblr.safelyendangered.com
    name = 'endangered'
    long_name = 'Safely Endangered'
    url = 'http://www.safelyendangered.com'
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'http://www.safelyendangered.com/comic/ignored/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        date_str = soup.find('span', class_='post-date').string
        day = string_to_date(date_str, '%B %d, %Y')
        imgs = soup.find('div', id='comic').find_all('img')
        alt = imgs[0]['alt']
        assert all(i['alt'] == i['title'] for i in imgs)
        return {
            'day': day.day,
            'month': day.month,
            'year': day.year,
            'img': [i['src'] for i in imgs],
            'title': title,
            'alt': alt,
        }



        }


class MyExtraLife(GenericNavigableComic):
    """Class to retrieve My Extra Life comics."""
    name = 'extralife'
    long_name = 'My Extra Life'
    url = 'http://www.myextralife.com'
    get_navi_link = get_link_rel_next

    @classmethod
    def get_first_comic_link(cls):
        """Get link to first comics."""
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find("h1", class_="comic_title").string
        date_str = soup.find("span", class_="comic_date").string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find_all("img", class_="comic")
        assert all(i['alt'] == i['title'] == title for i in imgs)
        return {
            'title': title,
            'img': [i['src'] for i in imgs if i["src"]],
            'day': day.day,
            'month': day.month,
            'year': day.year
        }



        }


class JuliasDrawings(GenericListableComic):
    """Class to retrieve Julia's Drawings."""
    name = 'julia'
    long_name = "Julia's Drawings"
    url = 'https://drawings.jvns.ca'
    get_url_from_archive_element = get_href

    @classmethod
    def get_archive_elements(cls):
        articles = get_soup_at_url(cls.url).find_all('article', class_='li post')
        return [art.find('a') for art in reversed(articles)]

    @classmethod
    def get_comic_info(cls, soup, archive_elt):
        """Get information about a particular comics."""
        date_str = soup.find('meta', property='og:article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        title = soup.find('h3', class_='p-post-title').string
        imgs = soup.find('section', class_='post-content').find_all('img')
        return {
            'title': title,
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }



		@@ 2429-2453 (lines=25) @@
2426		}
2427
2428
2429		class LinsEditions(GenericNavigableComic):
2430		"""Class to retrieve L.I.N.S. Editions comics."""
2431		# Also on https://linscomics.tumblr.com
2432		# Now on https://warandpeas.com
2433		name = 'lins'
2434		long_name = 'L.I.N.S. Editions'
2435		url = 'https://linsedition.com'
2436		_categories = ('LINS', )
2437		get_navi_link = get_link_rel_next
2438		get_first_comic_link = simulate_first_link
2439		first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2440
2441		@classmethod
2442		def get_comic_info(cls, soup, link):
2443		"""Get information about a particular comics."""
2444		title = soup.find('meta', property='og:title')['content']
2445		imgs = soup.find_all('meta', property='og:image')
2446		date_str = soup.find('meta', property='article:published_time')['content'][:10]
2447		day = string_to_date(date_str, "%Y-%m-%d")
2448		return {
2449		'title': title,
2450		'img': [i['content'] for i in imgs],
2451		'month': day.month,
2452		'year': day.year,
2453		'day': day.day,
2454		}
2455
2456
		@@ 1073-1097 (lines=25) @@
1070		}
1071
1072
1073		class Mercworks(GenericNavigableComic):
1074		"""Class to retrieve Mercworks comics."""
1075		# Also on http://mercworks.tumblr.com
1076		name = 'mercworks'
1077		long_name = 'Mercworks'
1078		url = 'http://mercworks.net'
1079		get_first_comic_link = get_a_comicnavbase_comicnavfirst
1080		get_navi_link = get_link_rel_next
1081
1082		@classmethod
1083		def get_comic_info(cls, soup, link):
1084		"""Get information about a particular comics."""
1085		title = soup.find('meta', property='og:title')['content']
1086		metadesc = soup.find('meta', property='og:description')
1087		desc = metadesc['content'] if metadesc else ""
1088		date_str = soup.find('meta', property='article:published_time')['content'][:10]
1089		day = string_to_date(date_str, "%Y-%m-%d")
1090		imgs = soup.find_all('meta', property='og:image')
1091		return {
1092		'img': [i['content'] for i in imgs],
1093		'title': title,
1094		'desc': desc,
1095		'day': day.day,
1096		'month': day.month,
1097		'year': day.year
1098		}
1099
1100
		@@ 387-411 (lines=25) @@
384		_categories = ('DELETED', )
385
386
387		class ExtraFabulousComics(GenericNavigableComic):
388		"""Class to retrieve Extra Fabulous Comics."""
389		# Also on https://extrafabulouscomics.tumblr.com
390		name = 'efc'
391		long_name = 'Extra Fabulous Comics'
392		url = 'http://extrafabulouscomics.com'
393		_categories = ('EFC', )
394		get_first_comic_link = get_a_navi_navifirst
395		get_navi_link = get_link_rel_next
396
397		@classmethod
398		def get_comic_info(cls, soup, link):
399		"""Get information about a particular comics."""
400		img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
401		imgs = soup.find_all('img', src=img_src_re)
402		title = soup.find('meta', property='og:title')['content']
403		date_str = soup.find('meta', property='article:published_time')['content'][:10]
404		day = string_to_date(date_str, "%Y-%m-%d")
405		return {
406		'title': title,
407		'img': [i['src'] for i in imgs],
408		'month': day.month,
409		'year': day.year,
410		'day': day.day,
411		'prefix': title + '-'
412		}
413
414
		@@ 3425-3446 (lines=22) @@
3422		}
3423
3424
3425		class MarketoonistComics(GenericNavigableComic):
3426		"""Class to retrieve Marketoonist Comics."""
3427		name = 'marketoonist'
3428		long_name = 'Marketoonist'
3429		url = 'https://marketoonist.com/cartoons'
3430		get_first_comic_link = simulate_first_link
3431		get_navi_link = get_link_rel_next
3432		first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'
3433
3434		@classmethod
3435		def get_comic_info(cls, soup, link):
3436		"""Get information about a particular comics."""
3437		imgs = soup.find_all('meta', property='og:image')
3438		date_str = soup.find('meta', property='article:published_time')['content'][:10]
3439		day = string_to_date(date_str, "%Y-%m-%d")
3440		title = soup.find('meta', property='og:title')['content']
3441		return {
3442		'img': [i['content'] for i in imgs],
3443		'day': day.day,
3444		'month': day.month,
3445		'year': day.year,
3446		'title': title,
3447		}
3448
3449
		@@ 415-436 (lines=22) @@
412		}
413
414
415		class GenericLeMondeBlog(GenericNavigableComic):
416		"""Generic class to retrieve comics from Le Monde blogs."""
417		_categories = ('LEMONDE', 'FRANCAIS')
418		get_navi_link = get_link_rel_next
419		get_first_comic_link = simulate_first_link
420		first_url = NotImplemented
421
422		@classmethod
423		def get_comic_info(cls, soup, link):
424		"""Get information about a particular comics."""
425		url2 = soup.find('link', rel='shortlink')['href']
426		title = soup.find('meta', property='og:title')['content']
427		date_str = soup.find("span", class_="entry-date").string
428		day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
429		imgs = soup.find_all('meta', property='og:image')
430		return {
431		'title': title,
432		'url2': url2,
433		'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
434		'month': day.month,
435		'year': day.year,
436		'day': day.day,
437		}
438
439
		@@ 1859-1884 (lines=26) @@
1856		}
1857
1858
1859		class SafelyEndangered(GenericNavigableComic):
1860		"""Class to retrieve Safely Endangered comics."""
1861		# Also on http://tumblr.safelyendangered.com
1862		name = 'endangered'
1863		long_name = 'Safely Endangered'
1864		url = 'http://www.safelyendangered.com'
1865		get_navi_link = get_link_rel_next
1866		get_first_comic_link = simulate_first_link
1867		first_url = 'http://www.safelyendangered.com/comic/ignored/'
1868
1869		@classmethod
1870		def get_comic_info(cls, soup, link):
1871		"""Get information about a particular comics."""
1872		title = soup.find('h2', class_='post-title').string
1873		date_str = soup.find('span', class_='post-date').string
1874		day = string_to_date(date_str, '%B %d, %Y')
1875		imgs = soup.find('div', id='comic').find_all('img')
1876		alt = imgs[0]['alt']
1877		assert all(i['alt'] == i['title'] for i in imgs)
1878		return {
1879		'day': day.day,
1880		'month': day.month,
1881		'year': day.year,
1882		'img': [i['src'] for i in imgs],
1883		'title': title,
1884		'alt': alt,
1885		}
1886
1887
		@@ 985-1010 (lines=26) @@
982		}
983
984
985		class MyExtraLife(GenericNavigableComic):
986		"""Class to retrieve My Extra Life comics."""
987		name = 'extralife'
988		long_name = 'My Extra Life'
989		url = 'http://www.myextralife.com'
990		get_navi_link = get_link_rel_next
991
992		@classmethod
993		def get_first_comic_link(cls):
994		"""Get link to first comics."""
995		return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
996
997		@classmethod
998		def get_comic_info(cls, soup, link):
999		"""Get information about a particular comics."""
1000		title = soup.find("h1", class_="comic_title").string
1001		date_str = soup.find("span", class_="comic_date").string
1002		day = string_to_date(date_str, "%B %d, %Y")
1003		imgs = soup.find_all("img", class_="comic")
1004		assert all(i['alt'] == i['title'] == title for i in imgs)
1005		return {
1006		'title': title,
1007		'img': [i['src'] for i in imgs if i["src"]],
1008		'day': day.day,
1009		'month': day.month,
1010		'year': day.year
1011		}
1012
1013
		@@ 2330-2354 (lines=25) @@
2327		}
2328
2329
2330		class JuliasDrawings(GenericListableComic):
2331		"""Class to retrieve Julia's Drawings."""
2332		name = 'julia'
2333		long_name = "Julia's Drawings"
2334		url = 'https://drawings.jvns.ca'
2335		get_url_from_archive_element = get_href
2336
2337		@classmethod
2338		def get_archive_elements(cls):
2339		articles = get_soup_at_url(cls.url).find_all('article', class_='li post')
2340		return [art.find('a') for art in reversed(articles)]
2341
2342		@classmethod
2343		def get_comic_info(cls, soup, archive_elt):
2344		"""Get information about a particular comics."""
2345		date_str = soup.find('meta', property='og:article:published_time')['content'][:10]
2346		day = string_to_date(date_str, "%Y-%m-%d")
2347		title = soup.find('h3', class_='p-post-title').string
2348		imgs = soup.find('section', class_='post-content').find_all('img')
2349		return {
2350		'title': title,
2351		'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2352		'month': day.month,
2353		'year': day.year,
2354		'day': day.day,
2355		}
2356
2357

SylvainDe / ComicBookMaker

Code Duplication Length = 22-26 lines in 8 locations

comics.py 8 locations