Code Duplication - SylvainDe/ComicBookMaker - Measure and Improve Code Quality continuously with Scrutinizer

Code Duplication Length = 22-26 lines in 7 locations

comics.py 7 locations


        return []


class ExtraFabulousComics(GenericNavigableComic):
    """Class to retrieve Extra Fabulous Comics."""
    name = 'efc'
    long_name = 'Extra Fabulous Comics'
    url = 'http://extrafabulouscomics.com'
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
        imgs = soup.find_all('img', src=img_src_re)
        title = soup.find('meta', property='og:title')['content']
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        return {
            'title': title,
            'img': [i['src'] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
            'prefix': title + '-'
        }



        }


class GenericLeMondeBlog(GenericNavigableComic):
    """Generic class to retrieve comics from Le Monde blogs."""
    _categories = ('LEMONDE', 'FRANCAIS')
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = NotImplemented

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        url2 = soup.find('link', rel='shortlink')['href']
        title = soup.find('meta', property='og:title')['content']
        date_str = soup.find("span", class_="entry-date").string
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
        imgs = soup.find_all('meta', property='og:image')
        return {
            'title': title,
            'url2': url2,
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }



        }


class LinsEditions(GenericNavigableComic):
    """Class to retrieve L.I.N.S. Editions comics."""
    # Also on http://linscomics.tumblr.com
    # Now on https://warandpeas.com
    name = 'lins'
    long_name = 'L.I.N.S. Editions'
    url = 'https://linsedition.com'
    _categories = ('LINS', )
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('meta', property='og:title')['content']
        imgs = soup.find_all('meta', property='og:image')
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        return {
            'title': title,
            'img': [i['content'] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }



        }


class Mercworks(GenericNavigableComic):
    """Class to retrieve Mercworks comics."""
    # Also on http://mercworks.tumblr.com
    name = 'mercworks'
    long_name = 'Mercworks'
    url = 'http://mercworks.net'
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('meta', property='og:title')['content']
        metadesc = soup.find('meta', property='og:description')
        desc = metadesc['content'] if metadesc else ""
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        imgs = soup.find_all('meta', property='og:image')
        return {
            'img': [i['content'] for i in imgs],
            'title': title,
            'desc': desc,
            'day': day.day,
            'month': day.month,
            'year': day.year
        }



        }


class MarketoonistComics(GenericNavigableComic):
    """Class to retrieve Marketoonist Comics."""
    name = 'marketoonist'
    long_name = 'Marketoonist'
    url = 'https://marketoonist.com/cartoons'
    get_first_comic_link = simulate_first_link
    get_navi_link = get_link_rel_next
    first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        imgs = soup.find_all('meta', property='og:image')
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        title = soup.find('meta', property='og:title')['content']
        return {
            'img': [i['content'] for i in imgs],
            'day': day.day,
            'month': day.month,
            'year': day.year,
            'title': title,
        }



        }


class Penmen(GenericNavigableComic):
    """Class to retrieve Penmen comics."""
    name = 'penmen'
    long_name = 'Penmen'
    url = 'http://penmen.com'
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('title').string
        imgs = soup.find('div', class_='entry-content').find_all('img')
        short_url = soup.find('link', rel='shortlink')['href']
        tags = ' '.join(t.string for t in soup.find_all('a', rel='tag'))
        date_str = soup.find('time')['datetime'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        return {
            'title': title,
            'short_url': short_url,
            'img': [i['src'] for i in imgs],
            'tags': tags,
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }



        }


class SafelyEndangered(GenericNavigableComic):
    """Class to retrieve Safely Endangered comics."""
    # Also on http://tumblr.safelyendangered.com
    name = 'endangered'
    long_name = 'Safely Endangered'
    url = 'http://www.safelyendangered.com'
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'http://www.safelyendangered.com/comic/ignored/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        date_str = soup.find('span', class_='post-date').string
        day = string_to_date(date_str, '%B %d, %Y')
        imgs = soup.find('div', id='comic').find_all('img')
        alt = imgs[0]['alt']
        assert all(i['alt'] == i['title'] for i in imgs)
        return {
            'day': day.day,
            'month': day.month,
            'year': day.year,
            'img': [i['src'] for i in imgs],
            'title': title,
            'alt': alt,
        }



		@@ 355-377 (lines=23) @@
352		return []
353
354
355		class ExtraFabulousComics(GenericNavigableComic):
356		"""Class to retrieve Extra Fabulous Comics."""
357		name = 'efc'
358		long_name = 'Extra Fabulous Comics'
359		url = 'http://extrafabulouscomics.com'
360		get_first_comic_link = get_a_navi_navifirst
361		get_navi_link = get_link_rel_next
362
363		@classmethod
364		def get_comic_info(cls, soup, link):
365		"""Get information about a particular comics."""
366		img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
367		imgs = soup.find_all('img', src=img_src_re)
368		title = soup.find('meta', property='og:title')['content']
369		date_str = soup.find('meta', property='article:published_time')['content'][:10]
370		day = string_to_date(date_str, "%Y-%m-%d")
371		return {
372		'title': title,
373		'img': [i['src'] for i in imgs],
374		'month': day.month,
375		'year': day.year,
376		'day': day.day,
377		'prefix': title + '-'
378		}
379
380
		@@ 381-402 (lines=22) @@
378		}
379
380
381		class GenericLeMondeBlog(GenericNavigableComic):
382		"""Generic class to retrieve comics from Le Monde blogs."""
383		_categories = ('LEMONDE', 'FRANCAIS')
384		get_navi_link = get_link_rel_next
385		get_first_comic_link = simulate_first_link
386		first_url = NotImplemented
387
388		@classmethod
389		def get_comic_info(cls, soup, link):
390		"""Get information about a particular comics."""
391		url2 = soup.find('link', rel='shortlink')['href']
392		title = soup.find('meta', property='og:title')['content']
393		date_str = soup.find("span", class_="entry-date").string
394		day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
395		imgs = soup.find_all('meta', property='og:image')
396		return {
397		'title': title,
398		'url2': url2,
399		'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
400		'month': day.month,
401		'year': day.year,
402		'day': day.day,
403		}
404
405
		@@ 2351-2375 (lines=25) @@
2348		}
2349
2350
2351		class LinsEditions(GenericNavigableComic):
2352		"""Class to retrieve L.I.N.S. Editions comics."""
2353		# Also on http://linscomics.tumblr.com
2354		# Now on https://warandpeas.com
2355		name = 'lins'
2356		long_name = 'L.I.N.S. Editions'
2357		url = 'https://linsedition.com'
2358		_categories = ('LINS', )
2359		get_navi_link = get_link_rel_next
2360		get_first_comic_link = simulate_first_link
2361		first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2362
2363		@classmethod
2364		def get_comic_info(cls, soup, link):
2365		"""Get information about a particular comics."""
2366		title = soup.find('meta', property='og:title')['content']
2367		imgs = soup.find_all('meta', property='og:image')
2368		date_str = soup.find('meta', property='article:published_time')['content'][:10]
2369		day = string_to_date(date_str, "%Y-%m-%d")
2370		return {
2371		'title': title,
2372		'img': [i['content'] for i in imgs],
2373		'month': day.month,
2374		'year': day.year,
2375		'day': day.day,
2376		}
2377
2378
		@@ 1047-1071 (lines=25) @@
1044		}
1045
1046
1047		class Mercworks(GenericNavigableComic):
1048		"""Class to retrieve Mercworks comics."""
1049		# Also on http://mercworks.tumblr.com
1050		name = 'mercworks'
1051		long_name = 'Mercworks'
1052		url = 'http://mercworks.net'
1053		get_first_comic_link = get_a_comicnavbase_comicnavfirst
1054		get_navi_link = get_link_rel_next
1055
1056		@classmethod
1057		def get_comic_info(cls, soup, link):
1058		"""Get information about a particular comics."""
1059		title = soup.find('meta', property='og:title')['content']
1060		metadesc = soup.find('meta', property='og:description')
1061		desc = metadesc['content'] if metadesc else ""
1062		date_str = soup.find('meta', property='article:published_time')['content'][:10]
1063		day = string_to_date(date_str, "%Y-%m-%d")
1064		imgs = soup.find_all('meta', property='og:image')
1065		return {
1066		'img': [i['content'] for i in imgs],
1067		'title': title,
1068		'desc': desc,
1069		'day': day.day,
1070		'month': day.month,
1071		'year': day.year
1072		}
1073
1074
		@@ 3292-3313 (lines=22) @@
3289		}
3290
3291
3292		class MarketoonistComics(GenericNavigableComic):
3293		"""Class to retrieve Marketoonist Comics."""
3294		name = 'marketoonist'
3295		long_name = 'Marketoonist'
3296		url = 'https://marketoonist.com/cartoons'
3297		get_first_comic_link = simulate_first_link
3298		get_navi_link = get_link_rel_next
3299		first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'
3300
3301		@classmethod
3302		def get_comic_info(cls, soup, link):
3303		"""Get information about a particular comics."""
3304		imgs = soup.find_all('meta', property='og:image')
3305		date_str = soup.find('meta', property='article:published_time')['content'][:10]
3306		day = string_to_date(date_str, "%Y-%m-%d")
3307		title = soup.find('meta', property='og:title')['content']
3308		return {
3309		'img': [i['content'] for i in imgs],
3310		'day': day.day,
3311		'month': day.month,
3312		'year': day.year,
3313		'title': title,
3314		}
3315
3316
		@@ 1892-1917 (lines=26) @@
1889		}
1890
1891
1892		class Penmen(GenericNavigableComic):
1893		"""Class to retrieve Penmen comics."""
1894		name = 'penmen'
1895		long_name = 'Penmen'
1896		url = 'http://penmen.com'
1897		get_navi_link = get_link_rel_next
1898		get_first_comic_link = simulate_first_link
1899		first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/'
1900
1901		@classmethod
1902		def get_comic_info(cls, soup, link):
1903		"""Get information about a particular comics."""
1904		title = soup.find('title').string
1905		imgs = soup.find('div', class_='entry-content').find_all('img')
1906		short_url = soup.find('link', rel='shortlink')['href']
1907		tags = ' '.join(t.string for t in soup.find_all('a', rel='tag'))
1908		date_str = soup.find('time')['datetime'][:10]
1909		day = string_to_date(date_str, "%Y-%m-%d")
1910		return {
1911		'title': title,
1912		'short_url': short_url,
1913		'img': [i['src'] for i in imgs],
1914		'tags': tags,
1915		'month': day.month,
1916		'year': day.year,
1917		'day': day.day,
1918		}
1919
1920
		@@ 1833-1858 (lines=26) @@
1830		}
1831
1832
1833		class SafelyEndangered(GenericNavigableComic):
1834		"""Class to retrieve Safely Endangered comics."""
1835		# Also on http://tumblr.safelyendangered.com
1836		name = 'endangered'
1837		long_name = 'Safely Endangered'
1838		url = 'http://www.safelyendangered.com'
1839		get_navi_link = get_link_rel_next
1840		get_first_comic_link = simulate_first_link
1841		first_url = 'http://www.safelyendangered.com/comic/ignored/'
1842
1843		@classmethod
1844		def get_comic_info(cls, soup, link):
1845		"""Get information about a particular comics."""
1846		title = soup.find('h2', class_='post-title').string
1847		date_str = soup.find('span', class_='post-date').string
1848		day = string_to_date(date_str, '%B %d, %Y')
1849		imgs = soup.find('div', id='comic').find_all('img')
1850		alt = imgs[0]['alt']
1851		assert all(i['alt'] == i['title'] for i in imgs)
1852		return {
1853		'day': day.day,
1854		'month': day.month,
1855		'year': day.year,
1856		'img': [i['src'] for i in imgs],
1857		'title': title,
1858		'alt': alt,
1859		}
1860
1861

SylvainDe / ComicBookMaker

Code Duplication Length = 22-26 lines in 7 locations

comics.py 7 locations