Code Duplication - SylvainDe/ComicBookMaker - Measure and Improve Code Quality continuously with Scrutinizer

Code Duplication Length = 23-27 lines in 8 locations

comics.py 8 locations


        }


class ToonHole(GenericNavigableComic):
    """Class to retrieve Toon Holes comics."""
    # Also on http://tapastic.com/series/TOONHOLE
    name = 'toonhole'
    long_name = 'Toon Hole'
    url = 'http://www.toonhole.com'
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
    get_navi_link = get_a_comicnavbase_comicnavnext

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        date_str = soup.find('div', class_='entry-meta').contents[0].strip()
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find('div', id='comic').find_all('img')
        if imgs:
            img = imgs[0]
            title = img['alt']
            assert img['title'] == title
        else:
            title = ""
        return {
            'title': title,
            'month': day.month,
            'year': day.year,
            'day': day.day,
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs],
        }



        }


class ImogenQuest(GenericNavigableComic):
    """Class to retrieve Imogen Quest comics."""
    # Also on http://imoquest.tumblr.com
    name = 'imogen'
    long_name = 'Imogen Quest'
    url = 'http://imogenquest.net'
    get_first_comic_link = get_div_navfirst_a
    get_navi_link = get_a_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        author = soup.find("span", class_="post-author").find("a").string
        date_str = soup.find('span', class_='post-date').string
        day = string_to_date(date_str, '%B %d, %Y')
        imgs = soup.find('div', class_='comicpane').find_all('img')
        assert all(i['alt'] == i['title'] for i in imgs)
        title2 = imgs[0]['title']
        return {
            'day': day.day,
            'month': day.month,
            'year': day.year,
            'img': [i['src'] for i in imgs],
            'title': title,
            'title2': title2,
            'author': author,
        }



        }


class GerbilWithAJetpack(GenericNavigableComic):
    """Class to retrieve GerbilWithAJetpack comics."""
    name = 'gerbil'
    long_name = 'Gerbil With A Jetpack'
    url = 'http://gerbilwithajetpack.com'
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_a_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        author = soup.find("span", class_="post-author").find("a").string
        date_str = soup.find("span", class_="post-date").string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find("div", id="comic").find_all("img")
        alt = imgs[0]['alt']
        assert all(i['alt'] == i['title'] == alt for i in imgs)
        return {
            'img': [i['src'] for i in imgs],
            'title': title,
            'alt': alt,
            'author': author,
            'day': day.day,
            'month': day.month,
            'year': day.year
        }



        }


class Penmen(GenericComicNotWorking, GenericNavigableComic):
    """Class to retrieve Penmen comics."""
    name = 'penmen'
    long_name = 'Penmen'
    url = 'http://penmen.com'
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('title').string
        imgs = soup.find('div', class_='entry-content').find_all('img')
        short_url = soup.find('link', rel='shortlink')['href']
        tags = ' '.join(t.string for t in soup.find_all('a', rel='tag'))
        date_str = soup.find('time')['datetime'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        return {
            'title': title,
            'short_url': short_url,
            'img': [i['src'] for i in imgs],
            'tags': tags,
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }



        }


class SafelyEndangered(GenericNavigableComic):
    """Class to retrieve Safely Endangered comics."""
    # Also on http://tumblr.safelyendangered.com
    name = 'endangered'
    long_name = 'Safely Endangered'
    url = 'http://www.safelyendangered.com'
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'http://www.safelyendangered.com/comic/ignored/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        date_str = soup.find('span', class_='post-date').string
        day = string_to_date(date_str, '%B %d, %Y')
        imgs = soup.find('div', id='comic').find_all('img')
        alt = imgs[0]['alt']
        assert all(i['alt'] == i['title'] for i in imgs)
        return {
            'day': day.day,
            'month': day.month,
            'year': day.year,
            'img': [i['src'] for i in imgs],
            'title': title,
            'alt': alt,
        }



    _categories = ('DELETED', )


class ExtraFabulousComics(GenericNavigableComic):
    """Class to retrieve Extra Fabulous Comics."""
    # Also on https://extrafabulouscomics.tumblr.com
    name = 'efc'
    long_name = 'Extra Fabulous Comics'
    url = 'http://extrafabulouscomics.com'
    _categories = ('EFC', )
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'http://extrafabulouscomics.com/comic/buttfly/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
        imgs = soup.find_all('img', src=img_src_re)
        title = soup.find('meta', property='og:title')['content']
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        return {
            'title': title,
            'img': [i['src'] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
            'prefix': title + '-'
        }



    url = 'http://english.bouletcorp.com'


class AmazingSuperPowers(GenericNavigableComic):
    """Class to retrieve Amazing Super Powers comics."""
    name = 'asp'
    long_name = 'Amazing Super Powers'
    url = 'http://www.amazingsuperpowers.com'
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_a_navi_navinext

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        author = soup.find("span", class_="post-author").find("a").string
        date_str = soup.find('span', class_='post-date').string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find('div', id='comic').find_all('img')
        title = ' '.join(i['title'] for i in imgs)
        assert all(i['alt'] == i['title'] for i in imgs)
        return {
            'title': title,
            'author': author,
            'img': [img['src'] for img in imgs],
            'day': day.day,
            'month': day.month,
            'year': day.year
        }



        }


class GenericLeMondeBlog(GenericNavigableComic):
    """Generic class to retrieve comics from Le Monde blogs."""
    _categories = ('LEMONDE', 'FRANCAIS')
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = NotImplemented
    date_format = "%d %B %Y"

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        url2 = soup.find('link', rel='shortlink')['href']
        title = soup.find('meta', property='og:title')['content']
        date_str = soup.find("span", class_="entry-date").string
        day = string_to_date(date_str, cls.date_format, "fr_FR.utf8")
        imgs = soup.find_all('meta', property='og:image')
        return {
            'title': title,
            'url2': url2,
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }



		@@ 1259-1285 (lines=27) @@
1256		}
1257
1258
1259		class ToonHole(GenericNavigableComic):
1260		"""Class to retrieve Toon Holes comics."""
1261		# Also on http://tapastic.com/series/TOONHOLE
1262		name = 'toonhole'
1263		long_name = 'Toon Hole'
1264		url = 'http://www.toonhole.com'
1265		get_first_comic_link = get_a_comicnavbase_comicnavfirst
1266		get_navi_link = get_a_comicnavbase_comicnavnext
1267
1268		@classmethod
1269		def get_comic_info(cls, soup, link):
1270		"""Get information about a particular comics."""
1271		date_str = soup.find('div', class_='entry-meta').contents[0].strip()
1272		day = string_to_date(date_str, "%B %d, %Y")
1273		imgs = soup.find('div', id='comic').find_all('img')
1274		if imgs:
1275		img = imgs[0]
1276		title = img['alt']
1277		assert img['title'] == title
1278		else:
1279		title = ""
1280		return {
1281		'title': title,
1282		'month': day.month,
1283		'year': day.year,
1284		'day': day.day,
1285		'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs],
1286		}
1287
1288
		@@ 1000-1026 (lines=27) @@
997		}
998
999
1000		class ImogenQuest(GenericNavigableComic):
1001		"""Class to retrieve Imogen Quest comics."""
1002		# Also on http://imoquest.tumblr.com
1003		name = 'imogen'
1004		long_name = 'Imogen Quest'
1005		url = 'http://imogenquest.net'
1006		get_first_comic_link = get_div_navfirst_a
1007		get_navi_link = get_a_rel_next
1008
1009		@classmethod
1010		def get_comic_info(cls, soup, link):
1011		"""Get information about a particular comics."""
1012		title = soup.find('h2', class_='post-title').string
1013		author = soup.find("span", class_="post-author").find("a").string
1014		date_str = soup.find('span', class_='post-date').string
1015		day = string_to_date(date_str, '%B %d, %Y')
1016		imgs = soup.find('div', class_='comicpane').find_all('img')
1017		assert all(i['alt'] == i['title'] for i in imgs)
1018		title2 = imgs[0]['title']
1019		return {
1020		'day': day.day,
1021		'month': day.month,
1022		'year': day.year,
1023		'img': [i['src'] for i in imgs],
1024		'title': title,
1025		'title2': title2,
1026		'author': author,
1027		}
1028
1029
		@@ 2606-2631 (lines=26) @@
2603		}
2604
2605
2606		class GerbilWithAJetpack(GenericNavigableComic):
2607		"""Class to retrieve GerbilWithAJetpack comics."""
2608		name = 'gerbil'
2609		long_name = 'Gerbil With A Jetpack'
2610		url = 'http://gerbilwithajetpack.com'
2611		get_first_comic_link = get_a_navi_navifirst
2612		get_navi_link = get_a_rel_next
2613
2614		@classmethod
2615		def get_comic_info(cls, soup, link):
2616		"""Get information about a particular comics."""
2617		title = soup.find('h2', class_='post-title').string
2618		author = soup.find("span", class_="post-author").find("a").string
2619		date_str = soup.find("span", class_="post-date").string
2620		day = string_to_date(date_str, "%B %d, %Y")
2621		imgs = soup.find("div", id="comic").find_all("img")
2622		alt = imgs[0]['alt']
2623		assert all(i['alt'] == i['title'] == alt for i in imgs)
2624		return {
2625		'img': [i['src'] for i in imgs],
2626		'title': title,
2627		'alt': alt,
2628		'author': author,
2629		'day': day.day,
2630		'month': day.month,
2631		'year': day.year
2632		}
2633
2634
		@@ 1961-1986 (lines=26) @@
1958		}
1959
1960
1961		class Penmen(GenericComicNotWorking, GenericNavigableComic):
1962		"""Class to retrieve Penmen comics."""
1963		name = 'penmen'
1964		long_name = 'Penmen'
1965		url = 'http://penmen.com'
1966		get_navi_link = get_link_rel_next
1967		get_first_comic_link = simulate_first_link
1968		first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/'
1969
1970		@classmethod
1971		def get_comic_info(cls, soup, link):
1972		"""Get information about a particular comics."""
1973		title = soup.find('title').string
1974		imgs = soup.find('div', class_='entry-content').find_all('img')
1975		short_url = soup.find('link', rel='shortlink')['href']
1976		tags = ' '.join(t.string for t in soup.find_all('a', rel='tag'))
1977		date_str = soup.find('time')['datetime'][:10]
1978		day = string_to_date(date_str, "%Y-%m-%d")
1979		return {
1980		'title': title,
1981		'short_url': short_url,
1982		'img': [i['src'] for i in imgs],
1983		'tags': tags,
1984		'month': day.month,
1985		'year': day.year,
1986		'day': day.day,
1987		}
1988
1989
		@@ 1902-1927 (lines=26) @@
1899		}
1900
1901
1902		class SafelyEndangered(GenericNavigableComic):
1903		"""Class to retrieve Safely Endangered comics."""
1904		# Also on http://tumblr.safelyendangered.com
1905		name = 'endangered'
1906		long_name = 'Safely Endangered'
1907		url = 'http://www.safelyendangered.com'
1908		get_navi_link = get_link_rel_next
1909		get_first_comic_link = simulate_first_link
1910		first_url = 'http://www.safelyendangered.com/comic/ignored/'
1911
1912		@classmethod
1913		def get_comic_info(cls, soup, link):
1914		"""Get information about a particular comics."""
1915		title = soup.find('h2', class_='post-title').string
1916		date_str = soup.find('span', class_='post-date').string
1917		day = string_to_date(date_str, '%B %d, %Y')
1918		imgs = soup.find('div', id='comic').find_all('img')
1919		alt = imgs[0]['alt']
1920		assert all(i['alt'] == i['title'] for i in imgs)
1921		return {
1922		'day': day.day,
1923		'month': day.month,
1924		'year': day.year,
1925		'img': [i['src'] for i in imgs],
1926		'title': title,
1927		'alt': alt,
1928		}
1929
1930
		@@ 405-430 (lines=26) @@
402		_categories = ('DELETED', )
403
404
405		class ExtraFabulousComics(GenericNavigableComic):
406		"""Class to retrieve Extra Fabulous Comics."""
407		# Also on https://extrafabulouscomics.tumblr.com
408		name = 'efc'
409		long_name = 'Extra Fabulous Comics'
410		url = 'http://extrafabulouscomics.com'
411		_categories = ('EFC', )
412		get_navi_link = get_link_rel_next
413		get_first_comic_link = simulate_first_link
414		first_url = 'http://extrafabulouscomics.com/comic/buttfly/'
415
416		@classmethod
417		def get_comic_info(cls, soup, link):
418		"""Get information about a particular comics."""
419		img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
420		imgs = soup.find_all('img', src=img_src_re)
421		title = soup.find('meta', property='og:title')['content']
422		date_str = soup.find('meta', property='article:published_time')['content'][:10]
423		day = string_to_date(date_str, "%Y-%m-%d")
424		return {
425		'title': title,
426		'img': [i['src'] for i in imgs],
427		'month': day.month,
428		'year': day.year,
429		'day': day.day,
430		'prefix': title + '-'
431		}
432
433
		@@ 1232-1255 (lines=24) @@
1229		url = 'http://english.bouletcorp.com'
1230
1231
1232		class AmazingSuperPowers(GenericNavigableComic):
1233		"""Class to retrieve Amazing Super Powers comics."""
1234		name = 'asp'
1235		long_name = 'Amazing Super Powers'
1236		url = 'http://www.amazingsuperpowers.com'
1237		get_first_comic_link = get_a_navi_navifirst
1238		get_navi_link = get_a_navi_navinext
1239
1240		@classmethod
1241		def get_comic_info(cls, soup, link):
1242		"""Get information about a particular comics."""
1243		author = soup.find("span", class_="post-author").find("a").string
1244		date_str = soup.find('span', class_='post-date').string
1245		day = string_to_date(date_str, "%B %d, %Y")
1246		imgs = soup.find('div', id='comic').find_all('img')
1247		title = ' '.join(i['title'] for i in imgs)
1248		assert all(i['alt'] == i['title'] for i in imgs)
1249		return {
1250		'title': title,
1251		'author': author,
1252		'img': [img['src'] for img in imgs],
1253		'day': day.day,
1254		'month': day.month,
1255		'year': day.year
1256		}
1257
1258
		@@ 434-456 (lines=23) @@
431		}
432
433
434		class GenericLeMondeBlog(GenericNavigableComic):
435		"""Generic class to retrieve comics from Le Monde blogs."""
436		_categories = ('LEMONDE', 'FRANCAIS')
437		get_navi_link = get_link_rel_next
438		get_first_comic_link = simulate_first_link
439		first_url = NotImplemented
440		date_format = "%d %B %Y"
441
442		@classmethod
443		def get_comic_info(cls, soup, link):
444		"""Get information about a particular comics."""
445		url2 = soup.find('link', rel='shortlink')['href']
446		title = soup.find('meta', property='og:title')['content']
447		date_str = soup.find("span", class_="entry-date").string
448		day = string_to_date(date_str, cls.date_format, "fr_FR.utf8")
449		imgs = soup.find_all('meta', property='og:image')
450		return {
451		'title': title,
452		'url2': url2,
453		'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
454		'month': day.month,
455		'year': day.year,
456		'day': day.day,
457		}
458
459

SylvainDe / ComicBookMaker

Code Duplication Length = 23-27 lines in 8 locations

comics.py 8 locations