Code Duplication - SylvainDe/ComicBookMaker - Measure and Improve Code Quality continuously with Scrutinizer

Code Duplication Length = 22-26 lines in 7 locations

comics.py 7 locations


            'day': day.day,
        }


class LinsEditions(GenericNavigableComic):
    """Class to retrieve L.I.N.S. Editions comics."""
    # Also on https://linscomics.tumblr.com
    # Now on https://warandpeas.com
    name = 'lins'
    long_name = 'L.I.N.S. Editions'
    url = 'https://linsedition.com'
    _categories = ('LINS', )
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('meta', property='og:title')['content']
        imgs = soup.find_all('meta', property='og:image')
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        return {
            'title': title,
            'img': [i['content'] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }


            'img': [i['content'] for i in imgs],
        }


class Mercworks(GenericNavigableComic):
    """Class to retrieve Mercworks comics."""
    # Also on http://mercworks.tumblr.com
    name = 'mercworks'
    long_name = 'Mercworks'
    url = 'http://mercworks.net'
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('meta', property='og:title')['content']
        metadesc = soup.find('meta', property='og:description')
        desc = metadesc['content'] if metadesc else ""
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        imgs = soup.find_all('meta', property='og:image')
        return {
            'img': [i['content'] for i in imgs],
            'title': title,
            'desc': desc,
            'day': day.day,
            'month': day.month,
            'year': day.year
        }


        return []


class ExtraFabulousComics(GenericNavigableComic):
    """Class to retrieve Extra Fabulous Comics."""
    # Also on https://extrafabulouscomics.tumblr.com
    name = 'efc'
    long_name = 'Extra Fabulous Comics'
    url = 'http://extrafabulouscomics.com'
    _categories = ('EFC', )
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
        imgs = soup.find_all('img', src=img_src_re)
        title = soup.find('meta', property='og:title')['content']
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        return {
            'title': title,
            'img': [i['src'] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
            'prefix': title + '-'
        }



    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = link['title']
        imgs = soup.find_all('img', id='comicimg')
        return {
            'title': title,
            'img': [i['src'] for i in imgs],
        }


class OffTheLeashDog(GenericNavigableComic):
    """Class to retrieve Off The Leash Dog comics."""
    # Also on http://rupertfawcettsdoggyblog.tumblr.com
    # Also on http://www.rupertfawcettcartoons.com
    name = 'offtheleash'
    long_name = 'Off The Leash Dog'
    url = 'http://offtheleashdogcartoons.com'
    _categories = ('FAWCETT', )
    get_navi_link = get_a_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'http://offtheleashdogcartoons.com/uncategorized/can-i-help-you/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        print(link)
        title = soup.find("h1", class_="entry-title").string
        imgs = soup.find('div', class_='entry-content').find_all('img')

        }


class GenericLeMondeBlog(GenericNavigableComic):
    """Generic class to retrieve comics from Le Monde blogs."""
    _categories = ('LEMONDE', 'FRANCAIS')
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = NotImplemented

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        url2 = soup.find('link', rel='shortlink')['href']
        title = soup.find('meta', property='og:title')['content']
        date_str = soup.find("span", class_="entry-date").string
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
        imgs = soup.find_all('meta', property='og:image')
        return {
            'title': title,
            'url2': url2,
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }



        }


class MyExtraLife(GenericNavigableComic):
    """Class to retrieve My Extra Life comics."""
    name = 'extralife'
    long_name = 'My Extra Life'
    url = 'http://www.myextralife.com'
    get_navi_link = get_link_rel_next

    @classmethod
    def get_first_comic_link(cls):
        """Get link to first comics."""
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find("h1", class_="comic_title").string
        date_str = soup.find("span", class_="comic_date").string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find_all("img", class_="comic")
        assert all(i['alt'] == i['title'] == title for i in imgs)
        return {
            'title': title,
            'img': [i['src'] for i in imgs if i["src"]],
            'day': day.day,
            'month': day.month,
            'year': day.year
        }



        }


class JuliasDrawings(GenericListableComic):
    """Class to retrieve Julia's Drawings."""
    name = 'julia'
    long_name = "Julia's Drawings"
    url = 'https://drawings.jvns.ca'
    get_url_from_archive_element = get_href

    @classmethod
    def get_archive_elements(cls):
        articles = get_soup_at_url(cls.url).find_all('article', class_='li post')
        return [art.find('a') for art in reversed(articles)]

    @classmethod
    def get_comic_info(cls, soup, archive_elt):
        """Get information about a particular comics."""
        date_str = soup.find('meta', property='og:article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        title = soup.find('h3', class_='p-post-title').string
        imgs = soup.find('section', class_='post-content').find_all('img')
        return {
            'title': title,
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }



		@@ 2402-2426 (lines=25) @@
2399		'day': day.day,
2400		}
2401
2402
2403		class LinsEditions(GenericNavigableComic):
2404		"""Class to retrieve L.I.N.S. Editions comics."""
2405		# Also on https://linscomics.tumblr.com
2406		# Now on https://warandpeas.com
2407		name = 'lins'
2408		long_name = 'L.I.N.S. Editions'
2409		url = 'https://linsedition.com'
2410		_categories = ('LINS', )
2411		get_navi_link = get_link_rel_next
2412		get_first_comic_link = simulate_first_link
2413		first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2414
2415		@classmethod
2416		def get_comic_info(cls, soup, link):
2417		"""Get information about a particular comics."""
2418		title = soup.find('meta', property='og:title')['content']
2419		imgs = soup.find_all('meta', property='og:image')
2420		date_str = soup.find('meta', property='article:published_time')['content'][:10]
2421		day = string_to_date(date_str, "%Y-%m-%d")
2422		return {
2423		'title': title,
2424		'img': [i['content'] for i in imgs],
2425		'month': day.month,
2426		'year': day.year,
2427		'day': day.day,
2428		}
2429
		@@ 1046-1070 (lines=25) @@
1043		'img': [i['content'] for i in imgs],
1044		}
1045
1046
1047		class Mercworks(GenericNavigableComic):
1048		"""Class to retrieve Mercworks comics."""
1049		# Also on http://mercworks.tumblr.com
1050		name = 'mercworks'
1051		long_name = 'Mercworks'
1052		url = 'http://mercworks.net'
1053		get_first_comic_link = get_a_comicnavbase_comicnavfirst
1054		get_navi_link = get_link_rel_next
1055
1056		@classmethod
1057		def get_comic_info(cls, soup, link):
1058		"""Get information about a particular comics."""
1059		title = soup.find('meta', property='og:title')['content']
1060		metadesc = soup.find('meta', property='og:description')
1061		desc = metadesc['content'] if metadesc else ""
1062		date_str = soup.find('meta', property='article:published_time')['content'][:10]
1063		day = string_to_date(date_str, "%Y-%m-%d")
1064		imgs = soup.find_all('meta', property='og:image')
1065		return {
1066		'img': [i['content'] for i in imgs],
1067		'title': title,
1068		'desc': desc,
1069		'day': day.day,
1070		'month': day.month,
1071		'year': day.year
1072		}
1073
		@@ 360-384 (lines=25) @@
357		return []
358
359
360		class ExtraFabulousComics(GenericNavigableComic):
361		"""Class to retrieve Extra Fabulous Comics."""
362		# Also on https://extrafabulouscomics.tumblr.com
363		name = 'efc'
364		long_name = 'Extra Fabulous Comics'
365		url = 'http://extrafabulouscomics.com'
366		_categories = ('EFC', )
367		get_first_comic_link = get_a_navi_navifirst
368		get_navi_link = get_link_rel_next
369
370		@classmethod
371		def get_comic_info(cls, soup, link):
372		"""Get information about a particular comics."""
373		img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
374		imgs = soup.find_all('img', src=img_src_re)
375		title = soup.find('meta', property='og:title')['content']
376		date_str = soup.find('meta', property='article:published_time')['content'][:10]
377		day = string_to_date(date_str, "%Y-%m-%d")
378		return {
379		'title': title,
380		'img': [i['src'] for i in imgs],
381		'month': day.month,
382		'year': day.year,
383		'day': day.day,
384		'prefix': title + '-'
385		}
386
387
		@@ 3368-3389 (lines=22) @@
3365		def get_comic_info(cls, soup, link):
3366		"""Get information about a particular comics."""
3367		title = link['title']
3368		imgs = soup.find_all('img', id='comicimg')
3369		return {
3370		'title': title,
3371		'img': [i['src'] for i in imgs],
3372		}
3373
3374
3375		class OffTheLeashDog(GenericNavigableComic):
3376		"""Class to retrieve Off The Leash Dog comics."""
3377		# Also on http://rupertfawcettsdoggyblog.tumblr.com
3378		# Also on http://www.rupertfawcettcartoons.com
3379		name = 'offtheleash'
3380		long_name = 'Off The Leash Dog'
3381		url = 'http://offtheleashdogcartoons.com'
3382		_categories = ('FAWCETT', )
3383		get_navi_link = get_a_rel_next
3384		get_first_comic_link = simulate_first_link
3385		first_url = 'http://offtheleashdogcartoons.com/uncategorized/can-i-help-you/'
3386
3387		@classmethod
3388		def get_comic_info(cls, soup, link):
3389		"""Get information about a particular comics."""
3390		print(link)
3391		title = soup.find("h1", class_="entry-title").string
3392		imgs = soup.find('div', class_='entry-content').find_all('img')
		@@ 388-409 (lines=22) @@
385		}
386
387
388		class GenericLeMondeBlog(GenericNavigableComic):
389		"""Generic class to retrieve comics from Le Monde blogs."""
390		_categories = ('LEMONDE', 'FRANCAIS')
391		get_navi_link = get_link_rel_next
392		get_first_comic_link = simulate_first_link
393		first_url = NotImplemented
394
395		@classmethod
396		def get_comic_info(cls, soup, link):
397		"""Get information about a particular comics."""
398		url2 = soup.find('link', rel='shortlink')['href']
399		title = soup.find('meta', property='og:title')['content']
400		date_str = soup.find("span", class_="entry-date").string
401		day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
402		imgs = soup.find_all('meta', property='og:image')
403		return {
404		'title': title,
405		'url2': url2,
406		'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
407		'month': day.month,
408		'year': day.year,
409		'day': day.day,
410		}
411
412
		@@ 958-983 (lines=26) @@
955		}
956
957
958		class MyExtraLife(GenericNavigableComic):
959		"""Class to retrieve My Extra Life comics."""
960		name = 'extralife'
961		long_name = 'My Extra Life'
962		url = 'http://www.myextralife.com'
963		get_navi_link = get_link_rel_next
964
965		@classmethod
966		def get_first_comic_link(cls):
967		"""Get link to first comics."""
968		return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
969
970		@classmethod
971		def get_comic_info(cls, soup, link):
972		"""Get information about a particular comics."""
973		title = soup.find("h1", class_="comic_title").string
974		date_str = soup.find("span", class_="comic_date").string
975		day = string_to_date(date_str, "%B %d, %Y")
976		imgs = soup.find_all("img", class_="comic")
977		assert all(i['alt'] == i['title'] == title for i in imgs)
978		return {
979		'title': title,
980		'img': [i['src'] for i in imgs if i["src"]],
981		'day': day.day,
982		'month': day.month,
983		'year': day.year
984		}
985
986
		@@ 2304-2328 (lines=25) @@
2301		}
2302
2303
2304		class JuliasDrawings(GenericListableComic):
2305		"""Class to retrieve Julia's Drawings."""
2306		name = 'julia'
2307		long_name = "Julia's Drawings"
2308		url = 'https://drawings.jvns.ca'
2309		get_url_from_archive_element = get_href
2310
2311		@classmethod
2312		def get_archive_elements(cls):
2313		articles = get_soup_at_url(cls.url).find_all('article', class_='li post')
2314		return [art.find('a') for art in reversed(articles)]
2315
2316		@classmethod
2317		def get_comic_info(cls, soup, archive_elt):
2318		"""Get information about a particular comics."""
2319		date_str = soup.find('meta', property='og:article:published_time')['content'][:10]
2320		day = string_to_date(date_str, "%Y-%m-%d")
2321		title = soup.find('h3', class_='p-post-title').string
2322		imgs = soup.find('section', class_='post-content').find_all('img')
2323		return {
2324		'title': title,
2325		'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2326		'month': day.month,
2327		'year': day.year,
2328		'day': day.day,
2329		}
2330
2331

SylvainDe / ComicBookMaker

Code Duplication Length = 22-26 lines in 7 locations

comics.py 7 locations