Code Duplication - SylvainDe/ComicBookMaker - Measure and Improve Code Quality continuously with Scrutinizer

Code Duplication Length = 22-26 lines in 7 locations

comics.py 7 locations


    # Now on https://warandpeas.com
    name = 'lins'
    long_name = 'L.I.N.S. Editions'
    url = 'https://linsedition.com'
    _categories = ('LINS', )
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('meta', property='og:title')['content']
        imgs = soup.find_all('meta', property='og:image')
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        return {
            'title': title,
            'img': [i['content'] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }


class ThorsThundershack(GenericNavigableComic):
    """Class to retrieve Thor's Thundershack comics."""
    # Also on http://tapastic.com/series/Thors-Thundershac
    name = 'thor'
    long_name = 'Thor\'s Thundershack'
    url = 'http://www.thorsthundershack.com'

        }


class Mercworks(GenericNavigableComic):
    """Class to retrieve Mercworks comics."""
    # Also on http://mercworks.tumblr.com
    name = 'mercworks'
    long_name = 'Mercworks'
    url = 'http://mercworks.net'
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('meta', property='og:title')['content']
        metadesc = soup.find('meta', property='og:description')
        desc = metadesc['content'] if metadesc else ""
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        imgs = soup.find_all('meta', property='og:image')
        return {
            'img': [i['content'] for i in imgs],
            'title': title,
            'desc': desc,
            'day': day.day,
            'month': day.month,
            'year': day.year
        }



        return []


class ExtraFabulousComics(GenericNavigableComic):
    """Class to retrieve Extra Fabulous Comics."""
    name = 'efc'
    long_name = 'Extra Fabulous Comics'
    url = 'http://extrafabulouscomics.com'
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
        imgs = soup.find_all('img', src=img_src_re)
        title = soup.find('meta', property='og:title')['content']
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        return {
            'title': title,
            'img': [i['src'] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
            'prefix': title + '-'
        }



    long_name = 'Marketoonist'
    url = 'https://marketoonist.com/cartoons'
    get_first_comic_link = simulate_first_link
    get_navi_link = get_link_rel_next
    first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        imgs = soup.find_all('meta', property='og:image')
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        title = soup.find('meta', property='og:title')['content']
        return {
            'img': [i['content'] for i in imgs],
            'day': day.day,
            'month': day.month,
            'year': day.year,
            'title': title,
        }


class ConsoliaComics(GenericNavigableComic):
    """Class to retrieve Consolia comics."""
    name = 'consolia'
    long_name = 'consolia'
    url = 'https://consolia-comic.com'
    get_url_from_link = join_cls_url_to_href

        }


class SafelyEndangered(GenericNavigableComic):
    """Class to retrieve Safely Endangered comics."""
    # Also on http://tumblr.safelyendangered.com
    name = 'endangered'
    long_name = 'Safely Endangered'
    url = 'http://www.safelyendangered.com'
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'http://www.safelyendangered.com/comic/ignored/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        date_str = soup.find('span', class_='post-date').string
        day = string_to_date(date_str, '%B %d, %Y')
        imgs = soup.find('div', id='comic').find_all('img')
        alt = imgs[0]['alt']
        assert all(i['alt'] == i['title'] for i in imgs)
        return {
            'day': day.day,
            'month': day.month,
            'year': day.year,
            'img': [i['src'] for i in imgs],
            'title': title,
            'alt': alt,
        }



        }


class GenericLeMondeBlog(GenericNavigableComic):
    """Generic class to retrieve comics from Le Monde blogs."""
    _categories = ('LEMONDE', 'FRANCAIS')
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = NotImplemented

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        url2 = soup.find('link', rel='shortlink')['href']
        title = soup.find('meta', property='og:title')['content']
        date_str = soup.find("span", class_="entry-date").string
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
        imgs = soup.find_all('meta', property='og:image')
        return {
            'title': title,
            'url2': url2,
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }



        }


class MyExtraLife(GenericNavigableComic):
    """Class to retrieve My Extra Life comics."""
    name = 'extralife'
    long_name = 'My Extra Life'
    url = 'http://www.myextralife.com'
    get_navi_link = get_link_rel_next

    @classmethod
    def get_first_comic_link(cls):
        """Get link to first comics."""
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find("h1", class_="comic_title").string
        date_str = soup.find("span", class_="comic_date").string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find_all("img", class_="comic")
        assert all(i['alt'] == i['title'] == title for i in imgs)
        return {
            'title': title,
            'img': [i['src'] for i in imgs if i["src"]],
            'day': day.day,
            'month': day.month,
            'year': day.year
        }



		@@ 2285-2309 (lines=25) @@
2282		# Now on https://warandpeas.com
2283		name = 'lins'
2284		long_name = 'L.I.N.S. Editions'
2285		url = 'https://linsedition.com'
2286		_categories = ('LINS', )
2287		get_navi_link = get_link_rel_next
2288		get_first_comic_link = simulate_first_link
2289		first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2290
2291		@classmethod
2292		def get_comic_info(cls, soup, link):
2293		"""Get information about a particular comics."""
2294		title = soup.find('meta', property='og:title')['content']
2295		imgs = soup.find_all('meta', property='og:image')
2296		date_str = soup.find('meta', property='article:published_time')['content'][:10]
2297		day = string_to_date(date_str, "%Y-%m-%d")
2298		return {
2299		'title': title,
2300		'img': [i['content'] for i in imgs],
2301		'month': day.month,
2302		'year': day.year,
2303		'day': day.day,
2304		}
2305
2306
2307		class ThorsThundershack(GenericNavigableComic):
2308		"""Class to retrieve Thor's Thundershack comics."""
2309		# Also on http://tapastic.com/series/Thors-Thundershac
2310		name = 'thor'
2311		long_name = 'Thor\'s Thundershack'
2312		url = 'http://www.thorsthundershack.com'
		@@ 1020-1044 (lines=25) @@
1017		}
1018
1019
1020		class Mercworks(GenericNavigableComic):
1021		"""Class to retrieve Mercworks comics."""
1022		# Also on http://mercworks.tumblr.com
1023		name = 'mercworks'
1024		long_name = 'Mercworks'
1025		url = 'http://mercworks.net'
1026		get_first_comic_link = get_a_comicnavbase_comicnavfirst
1027		get_navi_link = get_link_rel_next
1028
1029		@classmethod
1030		def get_comic_info(cls, soup, link):
1031		"""Get information about a particular comics."""
1032		title = soup.find('meta', property='og:title')['content']
1033		metadesc = soup.find('meta', property='og:description')
1034		desc = metadesc['content'] if metadesc else ""
1035		date_str = soup.find('meta', property='article:published_time')['content'][:10]
1036		day = string_to_date(date_str, "%Y-%m-%d")
1037		imgs = soup.find_all('meta', property='og:image')
1038		return {
1039		'img': [i['content'] for i in imgs],
1040		'title': title,
1041		'desc': desc,
1042		'day': day.day,
1043		'month': day.month,
1044		'year': day.year
1045		}
1046
1047
		@@ 355-377 (lines=23) @@
352		return []
353
354
355		class ExtraFabulousComics(GenericNavigableComic):
356		"""Class to retrieve Extra Fabulous Comics."""
357		name = 'efc'
358		long_name = 'Extra Fabulous Comics'
359		url = 'http://extrafabulouscomics.com'
360		get_first_comic_link = get_a_navi_navifirst
361		get_navi_link = get_link_rel_next
362
363		@classmethod
364		def get_comic_info(cls, soup, link):
365		"""Get information about a particular comics."""
366		img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
367		imgs = soup.find_all('img', src=img_src_re)
368		title = soup.find('meta', property='og:title')['content']
369		date_str = soup.find('meta', property='article:published_time')['content'][:10]
370		day = string_to_date(date_str, "%Y-%m-%d")
371		return {
372		'title': title,
373		'img': [i['src'] for i in imgs],
374		'month': day.month,
375		'year': day.year,
376		'day': day.day,
377		'prefix': title + '-'
378		}
379
380
		@@ 3194-3215 (lines=22) @@
3191		long_name = 'Marketoonist'
3192		url = 'https://marketoonist.com/cartoons'
3193		get_first_comic_link = simulate_first_link
3194		get_navi_link = get_link_rel_next
3195		first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'
3196
3197		@classmethod
3198		def get_comic_info(cls, soup, link):
3199		"""Get information about a particular comics."""
3200		imgs = soup.find_all('meta', property='og:image')
3201		date_str = soup.find('meta', property='article:published_time')['content'][:10]
3202		day = string_to_date(date_str, "%Y-%m-%d")
3203		title = soup.find('meta', property='og:title')['content']
3204		return {
3205		'img': [i['content'] for i in imgs],
3206		'day': day.day,
3207		'month': day.month,
3208		'year': day.year,
3209		'title': title,
3210		}
3211
3212
3213		class ConsoliaComics(GenericNavigableComic):
3214		"""Class to retrieve Consolia comics."""
3215		name = 'consolia'
3216		long_name = 'consolia'
3217		url = 'https://consolia-comic.com'
3218		get_url_from_link = join_cls_url_to_href
		@@ 1806-1831 (lines=26) @@
1803		}
1804
1805
1806		class SafelyEndangered(GenericNavigableComic):
1807		"""Class to retrieve Safely Endangered comics."""
1808		# Also on http://tumblr.safelyendangered.com
1809		name = 'endangered'
1810		long_name = 'Safely Endangered'
1811		url = 'http://www.safelyendangered.com'
1812		get_navi_link = get_link_rel_next
1813		get_first_comic_link = simulate_first_link
1814		first_url = 'http://www.safelyendangered.com/comic/ignored/'
1815
1816		@classmethod
1817		def get_comic_info(cls, soup, link):
1818		"""Get information about a particular comics."""
1819		title = soup.find('h2', class_='post-title').string
1820		date_str = soup.find('span', class_='post-date').string
1821		day = string_to_date(date_str, '%B %d, %Y')
1822		imgs = soup.find('div', id='comic').find_all('img')
1823		alt = imgs[0]['alt']
1824		assert all(i['alt'] == i['title'] for i in imgs)
1825		return {
1826		'day': day.day,
1827		'month': day.month,
1828		'year': day.year,
1829		'img': [i['src'] for i in imgs],
1830		'title': title,
1831		'alt': alt,
1832		}
1833
1834
		@@ 381-402 (lines=22) @@
378		}
379
380
381		class GenericLeMondeBlog(GenericNavigableComic):
382		"""Generic class to retrieve comics from Le Monde blogs."""
383		_categories = ('LEMONDE', 'FRANCAIS')
384		get_navi_link = get_link_rel_next
385		get_first_comic_link = simulate_first_link
386		first_url = NotImplemented
387
388		@classmethod
389		def get_comic_info(cls, soup, link):
390		"""Get information about a particular comics."""
391		url2 = soup.find('link', rel='shortlink')['href']
392		title = soup.find('meta', property='og:title')['content']
393		date_str = soup.find("span", class_="entry-date").string
394		day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
395		imgs = soup.find_all('meta', property='og:image')
396		return {
397		'title': title,
398		'url2': url2,
399		'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
400		'month': day.month,
401		'year': day.year,
402		'day': day.day,
403		}
404
405
		@@ 925-950 (lines=26) @@
922		}
923
924
925		class MyExtraLife(GenericNavigableComic):
926		"""Class to retrieve My Extra Life comics."""
927		name = 'extralife'
928		long_name = 'My Extra Life'
929		url = 'http://www.myextralife.com'
930		get_navi_link = get_link_rel_next
931
932		@classmethod
933		def get_first_comic_link(cls):
934		"""Get link to first comics."""
935		return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
936
937		@classmethod
938		def get_comic_info(cls, soup, link):
939		"""Get information about a particular comics."""
940		title = soup.find("h1", class_="comic_title").string
941		date_str = soup.find("span", class_="comic_date").string
942		day = string_to_date(date_str, "%B %d, %Y")
943		imgs = soup.find_all("img", class_="comic")
944		assert all(i['alt'] == i['title'] == title for i in imgs)
945		return {
946		'title': title,
947		'img': [i['src'] for i in imgs if i["src"]],
948		'day': day.day,
949		'month': day.month,
950		'year': day.year
951		}
952
953

SylvainDe / ComicBookMaker

Code Duplication Length = 22-26 lines in 7 locations

comics.py 7 locations