Code Duplication - SylvainDe/ComicBookMaker - Measure and Improve Code Quality continuously with Scrutinizer

Code Duplication Length = 23-27 lines in 8 locations

comics.py 8 locations


        }


class ToonHole(GenericNavigableComic):
    """Class to retrieve Toon Holes comics."""
    # Also on http://tapastic.com/series/TOONHOLE
    name = 'toonhole'
    long_name = 'Toon Hole'
    url = 'http://www.toonhole.com'
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
    get_navi_link = get_a_comicnavbase_comicnavnext

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        date_str = soup.find('div', class_='entry-meta').contents[0].strip()
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find('div', id='comic').find_all('img')
        if imgs:
            img = imgs[0]
            title = img['alt']
            assert img['title'] == title
        else:
            title = ""
        return {
            'title': title,
            'month': day.month,
            'year': day.year,
            'day': day.day,
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs],
        }



        }


class ImogenQuest(GenericNavigableComic):
    """Class to retrieve Imogen Quest comics."""
    # Also on http://imoquest.tumblr.com
    name = 'imogen'
    long_name = 'Imogen Quest'
    url = 'http://imogenquest.net'
    get_first_comic_link = get_div_navfirst_a
    get_navi_link = get_a_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        author = soup.find("span", class_="post-author").find("a").string
        date_str = soup.find('span', class_='post-date').string
        day = string_to_date(date_str, '%B %d, %Y')
        imgs = soup.find('div', class_='comicpane').find_all('img')
        assert all(i['alt'] == i['title'] for i in imgs)
        title2 = imgs[0]['title']
        return {
            'day': day.day,
            'month': day.month,
            'year': day.year,
            'img': [i['src'] for i in imgs],
            'title': title,
            'title2': title2,
            'author': author,
        }



        }


class GerbilWithAJetpack(GenericNavigableComic):
    """Class to retrieve GerbilWithAJetpack comics."""
    name = 'gerbil'
    long_name = 'Gerbil With A Jetpack'
    url = 'http://gerbilwithajetpack.com'
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_a_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        author = soup.find("span", class_="post-author").find("a").string
        date_str = soup.find("span", class_="post-date").string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find("div", id="comic").find_all("img")
        alt = imgs[0]['alt']
        assert all(i['alt'] == i['title'] == alt for i in imgs)
        return {
            'img': [i['src'] for i in imgs],
            'title': title,
            'alt': alt,
            'author': author,
            'day': day.day,
            'month': day.month,
            'year': day.year
        }



        }


class Penmen(GenericComicNotWorking, GenericNavigableComic):
    """Class to retrieve Penmen comics."""
    name = 'penmen'
    long_name = 'Penmen'
    url = 'http://penmen.com'
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('title').string
        imgs = soup.find('div', class_='entry-content').find_all('img')
        short_url = soup.find('link', rel='shortlink')['href']
        tags = ' '.join(t.string for t in soup.find_all('a', rel='tag'))
        date_str = soup.find('time')['datetime'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        return {
            'title': title,
            'short_url': short_url,
            'img': [i['src'] for i in imgs],
            'tags': tags,
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }



        }


class SafelyEndangered(GenericNavigableComic):
    """Class to retrieve Safely Endangered comics."""
    # Also on http://tumblr.safelyendangered.com
    name = 'endangered'
    long_name = 'Safely Endangered'
    url = 'http://www.safelyendangered.com'
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'http://www.safelyendangered.com/comic/ignored/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        date_str = soup.find('span', class_='post-date').string
        day = string_to_date(date_str, '%B %d, %Y')
        imgs = soup.find('div', id='comic').find_all('img')
        alt = imgs[0]['alt']
        assert all(i['alt'] == i['title'] for i in imgs)
        return {
            'day': day.day,
            'month': day.month,
            'year': day.year,
            'img': [i['src'] for i in imgs],
            'title': title,
            'alt': alt,
        }



    _categories = ('DELETED', )


class ExtraFabulousComics(GenericNavigableComic):
    """Class to retrieve Extra Fabulous Comics."""
    # Also on https://extrafabulouscomics.tumblr.com
    name = 'efc'
    long_name = 'Extra Fabulous Comics'
    url = 'http://extrafabulouscomics.com'
    _categories = ('EFC', )
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'http://extrafabulouscomics.com/comic/buttfly/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
        imgs = soup.find_all('img', src=img_src_re)
        title = soup.find('meta', property='og:title')['content']
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        return {
            'title': title,
            'img': [i['src'] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
            'prefix': title + '-'
        }



    url = 'http://english.bouletcorp.com'


class AmazingSuperPowers(GenericNavigableComic):
    """Class to retrieve Amazing Super Powers comics."""
    name = 'asp'
    long_name = 'Amazing Super Powers'
    url = 'http://www.amazingsuperpowers.com'
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_a_navi_navinext

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        author = soup.find("span", class_="post-author").find("a").string
        date_str = soup.find('span', class_='post-date').string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find('div', id='comic').find_all('img')
        title = ' '.join(i['title'] for i in imgs)
        assert all(i['alt'] == i['title'] for i in imgs)
        return {
            'title': title,
            'author': author,
            'img': [img['src'] for img in imgs],
            'day': day.day,
            'month': day.month,
            'year': day.year
        }



        }


class GenericLeMondeBlog(GenericNavigableComic):
    """Generic class to retrieve comics from Le Monde blogs."""
    _categories = ('LEMONDE', 'FRANCAIS')
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = NotImplemented
    date_format = "%d %B %Y"

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        url2 = soup.find('link', rel='shortlink')['href']
        title = soup.find('meta', property='og:title')['content']
        date_str = soup.find("span", class_="entry-date").string
        day = string_to_date(date_str, cls.date_format, "fr_FR.utf8")
        imgs = soup.find_all('meta', property='og:image')
        return {
            'title': title,
            'url2': url2,
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }



		@@ 1260-1286 (lines=27) @@
1257		}
1258
1259
1260		class ToonHole(GenericNavigableComic):
1261		"""Class to retrieve Toon Holes comics."""
1262		# Also on http://tapastic.com/series/TOONHOLE
1263		name = 'toonhole'
1264		long_name = 'Toon Hole'
1265		url = 'http://www.toonhole.com'
1266		get_first_comic_link = get_a_comicnavbase_comicnavfirst
1267		get_navi_link = get_a_comicnavbase_comicnavnext
1268
1269		@classmethod
1270		def get_comic_info(cls, soup, link):
1271		"""Get information about a particular comics."""
1272		date_str = soup.find('div', class_='entry-meta').contents[0].strip()
1273		day = string_to_date(date_str, "%B %d, %Y")
1274		imgs = soup.find('div', id='comic').find_all('img')
1275		if imgs:
1276		img = imgs[0]
1277		title = img['alt']
1278		assert img['title'] == title
1279		else:
1280		title = ""
1281		return {
1282		'title': title,
1283		'month': day.month,
1284		'year': day.year,
1285		'day': day.day,
1286		'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs],
1287		}
1288
1289
		@@ 1001-1027 (lines=27) @@
998		}
999
1000
1001		class ImogenQuest(GenericNavigableComic):
1002		"""Class to retrieve Imogen Quest comics."""
1003		# Also on http://imoquest.tumblr.com
1004		name = 'imogen'
1005		long_name = 'Imogen Quest'
1006		url = 'http://imogenquest.net'
1007		get_first_comic_link = get_div_navfirst_a
1008		get_navi_link = get_a_rel_next
1009
1010		@classmethod
1011		def get_comic_info(cls, soup, link):
1012		"""Get information about a particular comics."""
1013		title = soup.find('h2', class_='post-title').string
1014		author = soup.find("span", class_="post-author").find("a").string
1015		date_str = soup.find('span', class_='post-date').string
1016		day = string_to_date(date_str, '%B %d, %Y')
1017		imgs = soup.find('div', class_='comicpane').find_all('img')
1018		assert all(i['alt'] == i['title'] for i in imgs)
1019		title2 = imgs[0]['title']
1020		return {
1021		'day': day.day,
1022		'month': day.month,
1023		'year': day.year,
1024		'img': [i['src'] for i in imgs],
1025		'title': title,
1026		'title2': title2,
1027		'author': author,
1028		}
1029
1030
		@@ 2607-2632 (lines=26) @@
2604		}
2605
2606
2607		class GerbilWithAJetpack(GenericNavigableComic):
2608		"""Class to retrieve GerbilWithAJetpack comics."""
2609		name = 'gerbil'
2610		long_name = 'Gerbil With A Jetpack'
2611		url = 'http://gerbilwithajetpack.com'
2612		get_first_comic_link = get_a_navi_navifirst
2613		get_navi_link = get_a_rel_next
2614
2615		@classmethod
2616		def get_comic_info(cls, soup, link):
2617		"""Get information about a particular comics."""
2618		title = soup.find('h2', class_='post-title').string
2619		author = soup.find("span", class_="post-author").find("a").string
2620		date_str = soup.find("span", class_="post-date").string
2621		day = string_to_date(date_str, "%B %d, %Y")
2622		imgs = soup.find("div", id="comic").find_all("img")
2623		alt = imgs[0]['alt']
2624		assert all(i['alt'] == i['title'] == alt for i in imgs)
2625		return {
2626		'img': [i['src'] for i in imgs],
2627		'title': title,
2628		'alt': alt,
2629		'author': author,
2630		'day': day.day,
2631		'month': day.month,
2632		'year': day.year
2633		}
2634
2635
		@@ 1962-1987 (lines=26) @@
1959		}
1960
1961
1962		class Penmen(GenericComicNotWorking, GenericNavigableComic):
1963		"""Class to retrieve Penmen comics."""
1964		name = 'penmen'
1965		long_name = 'Penmen'
1966		url = 'http://penmen.com'
1967		get_navi_link = get_link_rel_next
1968		get_first_comic_link = simulate_first_link
1969		first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/'
1970
1971		@classmethod
1972		def get_comic_info(cls, soup, link):
1973		"""Get information about a particular comics."""
1974		title = soup.find('title').string
1975		imgs = soup.find('div', class_='entry-content').find_all('img')
1976		short_url = soup.find('link', rel='shortlink')['href']
1977		tags = ' '.join(t.string for t in soup.find_all('a', rel='tag'))
1978		date_str = soup.find('time')['datetime'][:10]
1979		day = string_to_date(date_str, "%Y-%m-%d")
1980		return {
1981		'title': title,
1982		'short_url': short_url,
1983		'img': [i['src'] for i in imgs],
1984		'tags': tags,
1985		'month': day.month,
1986		'year': day.year,
1987		'day': day.day,
1988		}
1989
1990
		@@ 1903-1928 (lines=26) @@
1900		}
1901
1902
1903		class SafelyEndangered(GenericNavigableComic):
1904		"""Class to retrieve Safely Endangered comics."""
1905		# Also on http://tumblr.safelyendangered.com
1906		name = 'endangered'
1907		long_name = 'Safely Endangered'
1908		url = 'http://www.safelyendangered.com'
1909		get_navi_link = get_link_rel_next
1910		get_first_comic_link = simulate_first_link
1911		first_url = 'http://www.safelyendangered.com/comic/ignored/'
1912
1913		@classmethod
1914		def get_comic_info(cls, soup, link):
1915		"""Get information about a particular comics."""
1916		title = soup.find('h2', class_='post-title').string
1917		date_str = soup.find('span', class_='post-date').string
1918		day = string_to_date(date_str, '%B %d, %Y')
1919		imgs = soup.find('div', id='comic').find_all('img')
1920		alt = imgs[0]['alt']
1921		assert all(i['alt'] == i['title'] for i in imgs)
1922		return {
1923		'day': day.day,
1924		'month': day.month,
1925		'year': day.year,
1926		'img': [i['src'] for i in imgs],
1927		'title': title,
1928		'alt': alt,
1929		}
1930
1931
		@@ 406-431 (lines=26) @@
403		_categories = ('DELETED', )
404
405
406		class ExtraFabulousComics(GenericNavigableComic):
407		"""Class to retrieve Extra Fabulous Comics."""
408		# Also on https://extrafabulouscomics.tumblr.com
409		name = 'efc'
410		long_name = 'Extra Fabulous Comics'
411		url = 'http://extrafabulouscomics.com'
412		_categories = ('EFC', )
413		get_navi_link = get_link_rel_next
414		get_first_comic_link = simulate_first_link
415		first_url = 'http://extrafabulouscomics.com/comic/buttfly/'
416
417		@classmethod
418		def get_comic_info(cls, soup, link):
419		"""Get information about a particular comics."""
420		img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
421		imgs = soup.find_all('img', src=img_src_re)
422		title = soup.find('meta', property='og:title')['content']
423		date_str = soup.find('meta', property='article:published_time')['content'][:10]
424		day = string_to_date(date_str, "%Y-%m-%d")
425		return {
426		'title': title,
427		'img': [i['src'] for i in imgs],
428		'month': day.month,
429		'year': day.year,
430		'day': day.day,
431		'prefix': title + '-'
432		}
433
434
		@@ 1233-1256 (lines=24) @@
1230		url = 'http://english.bouletcorp.com'
1231
1232
1233		class AmazingSuperPowers(GenericNavigableComic):
1234		"""Class to retrieve Amazing Super Powers comics."""
1235		name = 'asp'
1236		long_name = 'Amazing Super Powers'
1237		url = 'http://www.amazingsuperpowers.com'
1238		get_first_comic_link = get_a_navi_navifirst
1239		get_navi_link = get_a_navi_navinext
1240
1241		@classmethod
1242		def get_comic_info(cls, soup, link):
1243		"""Get information about a particular comics."""
1244		author = soup.find("span", class_="post-author").find("a").string
1245		date_str = soup.find('span', class_='post-date').string
1246		day = string_to_date(date_str, "%B %d, %Y")
1247		imgs = soup.find('div', id='comic').find_all('img')
1248		title = ' '.join(i['title'] for i in imgs)
1249		assert all(i['alt'] == i['title'] for i in imgs)
1250		return {
1251		'title': title,
1252		'author': author,
1253		'img': [img['src'] for img in imgs],
1254		'day': day.day,
1255		'month': day.month,
1256		'year': day.year
1257		}
1258
1259
		@@ 435-457 (lines=23) @@
432		}
433
434
435		class GenericLeMondeBlog(GenericNavigableComic):
436		"""Generic class to retrieve comics from Le Monde blogs."""
437		_categories = ('LEMONDE', 'FRANCAIS')
438		get_navi_link = get_link_rel_next
439		get_first_comic_link = simulate_first_link
440		first_url = NotImplemented
441		date_format = "%d %B %Y"
442
443		@classmethod
444		def get_comic_info(cls, soup, link):
445		"""Get information about a particular comics."""
446		url2 = soup.find('link', rel='shortlink')['href']
447		title = soup.find('meta', property='og:title')['content']
448		date_str = soup.find("span", class_="entry-date").string
449		day = string_to_date(date_str, cls.date_format, "fr_FR.utf8")
450		imgs = soup.find_all('meta', property='og:image')
451		return {
452		'title': title,
453		'url2': url2,
454		'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
455		'month': day.month,
456		'year': day.year,
457		'day': day.day,
458		}
459
460

SylvainDe / ComicBookMaker

Code Duplication Length = 23-27 lines in 8 locations

comics.py 8 locations