Code Duplication - SylvainDe/ComicBookMaker - Measure and Improve Code Quality continuously with Scrutinizer

Code Duplication Length = 22-26 lines in 5 locations

comics.py 5 locations


        }


class MyExtraLife(GenericNavigableComic):
    """Class to retrieve My Extra Life comics."""
    name = 'extralife'
    long_name = 'My Extra Life'
    url = 'http://www.myextralife.com'
    get_navi_link = get_link_rel_next

    @classmethod
    def get_first_comic_link(cls):
        """Get link to first comics."""
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find("h1", class_="comic_title").string
        date_str = soup.find("span", class_="comic_date").string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find_all("img", class_="comic")
        assert all(i['alt'] == i['title'] == title for i in imgs)
        return {
            'title': title,
            'img': [i['src'] for i in imgs if i["src"]],
            'day': day.day,
            'month': day.month,
            'year': day.year
        }



        }


class LinsEditions(GenericNavigableComic):
    """Class to retrieve L.I.N.S. Editions comics."""
    # Also on http://linscomics.tumblr.com
    # Now on https://warandpeas.com
    name = 'lins'
    long_name = 'L.I.N.S. Editions'
    url = 'https://linsedition.com'
    _categories = ('LINS', )
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('meta', property='og:title')['content']
        imgs = soup.find_all('meta', property='og:image')
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        return {
            'title': title,
            'img': [i['content'] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }



        return []


class ExtraFabulousComics(GenericNavigableComic):
    """Class to retrieve Extra Fabulous Comics."""
    name = 'efc'
    long_name = 'Extra Fabulous Comics'
    url = 'http://extrafabulouscomics.com'
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
        imgs = soup.find_all('img', src=img_src_re)
        title = soup.find('meta', property='og:title')['content']
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        return {
            'title': title,
            'img': [i['src'] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
            'prefix': title + '-'
        }



        }


class GenericLeMondeBlog(GenericNavigableComic):
    """Generic class to retrieve comics from Le Monde blogs."""
    _categories = ('LEMONDE', 'FRANCAIS')
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = NotImplemented

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        url2 = soup.find('link', rel='shortlink')['href']
        title = soup.find('meta', property='og:title')['content']
        date_str = soup.find("span", class_="entry-date").string
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
        imgs = soup.find_all('meta', property='og:image')
        return {
            'title': title,
            'url2': url2,
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }



        }


class SafelyEndangered(GenericNavigableComic):
    """Class to retrieve Safely Endangered comics."""
    # Also on http://tumblr.safelyendangered.com
    name = 'endangered'
    long_name = 'Safely Endangered'
    url = 'http://www.safelyendangered.com'
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'http://www.safelyendangered.com/comic/ignored/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        date_str = soup.find('span', class_='post-date').string
        day = string_to_date(date_str, '%B %d, %Y')
        imgs = soup.find('div', id='comic').find_all('img')
        alt = imgs[0]['alt']
        assert all(i['alt'] == i['title'] for i in imgs)
        return {
            'day': day.day,
            'month': day.month,
            'year': day.year,
            'img': [i['src'] for i in imgs],
            'title': title,
            'alt': alt,
        }



		@@ 911-936 (lines=26) @@
908		}
909
910
911		class MyExtraLife(GenericNavigableComic):
912		"""Class to retrieve My Extra Life comics."""
913		name = 'extralife'
914		long_name = 'My Extra Life'
915		url = 'http://www.myextralife.com'
916		get_navi_link = get_link_rel_next
917
918		@classmethod
919		def get_first_comic_link(cls):
920		"""Get link to first comics."""
921		return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
922
923		@classmethod
924		def get_comic_info(cls, soup, link):
925		"""Get information about a particular comics."""
926		title = soup.find("h1", class_="comic_title").string
927		date_str = soup.find("span", class_="comic_date").string
928		day = string_to_date(date_str, "%B %d, %Y")
929		imgs = soup.find_all("img", class_="comic")
930		assert all(i['alt'] == i['title'] == title for i in imgs)
931		return {
932		'title': title,
933		'img': [i['src'] for i in imgs if i["src"]],
934		'day': day.day,
935		'month': day.month,
936		'year': day.year
937		}
938
939
		@@ 2274-2298 (lines=25) @@
2271		}
2272
2273
2274		class LinsEditions(GenericNavigableComic):
2275		"""Class to retrieve L.I.N.S. Editions comics."""
2276		# Also on http://linscomics.tumblr.com
2277		# Now on https://warandpeas.com
2278		name = 'lins'
2279		long_name = 'L.I.N.S. Editions'
2280		url = 'https://linsedition.com'
2281		_categories = ('LINS', )
2282		get_navi_link = get_link_rel_next
2283		get_first_comic_link = simulate_first_link
2284		first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2285
2286		@classmethod
2287		def get_comic_info(cls, soup, link):
2288		"""Get information about a particular comics."""
2289		title = soup.find('meta', property='og:title')['content']
2290		imgs = soup.find_all('meta', property='og:image')
2291		date_str = soup.find('meta', property='article:published_time')['content'][:10]
2292		day = string_to_date(date_str, "%Y-%m-%d")
2293		return {
2294		'title': title,
2295		'img': [i['content'] for i in imgs],
2296		'month': day.month,
2297		'year': day.year,
2298		'day': day.day,
2299		}
2300
2301
		@@ 341-363 (lines=23) @@
338		return []
339
340
341		class ExtraFabulousComics(GenericNavigableComic):
342		"""Class to retrieve Extra Fabulous Comics."""
343		name = 'efc'
344		long_name = 'Extra Fabulous Comics'
345		url = 'http://extrafabulouscomics.com'
346		get_first_comic_link = get_a_navi_navifirst
347		get_navi_link = get_link_rel_next
348
349		@classmethod
350		def get_comic_info(cls, soup, link):
351		"""Get information about a particular comics."""
352		img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
353		imgs = soup.find_all('img', src=img_src_re)
354		title = soup.find('meta', property='og:title')['content']
355		date_str = soup.find('meta', property='article:published_time')['content'][:10]
356		day = string_to_date(date_str, "%Y-%m-%d")
357		return {
358		'title': title,
359		'img': [i['src'] for i in imgs],
360		'month': day.month,
361		'year': day.year,
362		'day': day.day,
363		'prefix': title + '-'
364		}
365
366
		@@ 367-388 (lines=22) @@
364		}
365
366
367		class GenericLeMondeBlog(GenericNavigableComic):
368		"""Generic class to retrieve comics from Le Monde blogs."""
369		_categories = ('LEMONDE', 'FRANCAIS')
370		get_navi_link = get_link_rel_next
371		get_first_comic_link = simulate_first_link
372		first_url = NotImplemented
373
374		@classmethod
375		def get_comic_info(cls, soup, link):
376		"""Get information about a particular comics."""
377		url2 = soup.find('link', rel='shortlink')['href']
378		title = soup.find('meta', property='og:title')['content']
379		date_str = soup.find("span", class_="entry-date").string
380		day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
381		imgs = soup.find_all('meta', property='og:image')
382		return {
383		'title': title,
384		'url2': url2,
385		'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
386		'month': day.month,
387		'year': day.year,
388		'day': day.day,
389		}
390
391
		@@ 1801-1826 (lines=26) @@
1798		}
1799
1800
1801		class SafelyEndangered(GenericNavigableComic):
1802		"""Class to retrieve Safely Endangered comics."""
1803		# Also on http://tumblr.safelyendangered.com
1804		name = 'endangered'
1805		long_name = 'Safely Endangered'
1806		url = 'http://www.safelyendangered.com'
1807		get_navi_link = get_link_rel_next
1808		get_first_comic_link = simulate_first_link
1809		first_url = 'http://www.safelyendangered.com/comic/ignored/'
1810
1811		@classmethod
1812		def get_comic_info(cls, soup, link):
1813		"""Get information about a particular comics."""
1814		title = soup.find('h2', class_='post-title').string
1815		date_str = soup.find('span', class_='post-date').string
1816		day = string_to_date(date_str, '%B %d, %Y')
1817		imgs = soup.find('div', id='comic').find_all('img')
1818		alt = imgs[0]['alt']
1819		assert all(i['alt'] == i['title'] for i in imgs)
1820		return {
1821		'day': day.day,
1822		'month': day.month,
1823		'year': day.year,
1824		'img': [i['src'] for i in imgs],
1825		'title': title,
1826		'alt': alt,
1827		}
1828
1829

SylvainDe / ComicBookMaker

Code Duplication Length = 22-26 lines in 5 locations

comics.py 5 locations