Code Duplication - SylvainDe/ComicBookMaker - Measure and Improve Code Quality continuously with Scrutinizer

Code Duplication Length = 22-26 lines in 6 locations

comics.py 6 locations


            'title': title,
            'author': author,
            'img': [i['src'] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }


class LinsEditions(GenericNavigableComic):
    """Class to retrieve L.I.N.S. Editions comics."""
    # Also on http://linscomics.tumblr.com
    # Now on https://warandpeas.com
    name = 'lins'
    long_name = 'L.I.N.S. Editions'
    url = 'https://linsedition.com'
    _categories = ('LINS', )
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('meta', property='og:title')['content']
        imgs = soup.find_all('meta', property='og:image')
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        return {
            'title': title,
            'img': [i['content'] for i in imgs],

    _categories = ('EMPTY', )

    @classmethod
    def get_next_comic(cls, last_comic):
        """Implementation of get_next_comic returning no comics."""
        cls.log("comic is considered as empty - returning no comic")
        return []


class ExtraFabulousComics(GenericNavigableComic):
    """Class to retrieve Extra Fabulous Comics."""
    name = 'efc'
    long_name = 'Extra Fabulous Comics'
    url = 'http://extrafabulouscomics.com'
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
        imgs = soup.find_all('img', src=img_src_re)
        title = soup.find('meta', property='og:title')['content']
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        return {
            'title': title,
            'img': [i['src'] for i in imgs],
            'month': day.month,

            'title': title,
            'img': [i['src'] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
            'prefix': title + '-'
        }


class GenericLeMondeBlog(GenericNavigableComic):
    """Generic class to retrieve comics from Le Monde blogs."""
    _categories = ('LEMONDE', 'FRANCAIS')
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = NotImplemented

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        url2 = soup.find('link', rel='shortlink')['href']
        title = soup.find('meta', property='og:title')['content']
        date_str = soup.find("span", class_="entry-date").string
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
        imgs = soup.find_all('meta', property='og:image')
        return {
            'title': title,
            'url2': url2,
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],

            'title': title,
            'author': author,
            'day': day.day,
            'month': day.month,
            'year': day.year,
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
        }


class SafelyEndangered(GenericNavigableComic):
    """Class to retrieve Safely Endangered comics."""
    # Also on http://tumblr.safelyendangered.com
    name = 'endangered'
    long_name = 'Safely Endangered'
    url = 'http://www.safelyendangered.com'
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'http://www.safelyendangered.com/comic/ignored/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        date_str = soup.find('span', class_='post-date').string
        day = string_to_date(date_str, '%B %d, %Y')
        imgs = soup.find('div', id='comic').find_all('img')
        alt = imgs[0]['alt']
        assert all(i['alt'] == i['title'] for i in imgs)
        return {
            'day': day.day,
            'month': day.month,
            'year': day.year,

            'img': [i['src'] for i in imgs],
            'title': title,
            'author': author,
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }


class MyExtraLife(GenericNavigableComic):
    """Class to retrieve My Extra Life comics."""
    name = 'extralife'
    long_name = 'My Extra Life'
    url = 'http://www.myextralife.com'
    get_navi_link = get_link_rel_next

    @classmethod
    def get_first_comic_link(cls):
        """Get link to first comics."""
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find("h1", class_="comic_title").string
        date_str = soup.find("span", class_="comic_date").string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find_all("img", class_="comic")
        assert all(i['alt'] == i['title'] == title for i in imgs)
        return {
            'title': title,
            'img': [i['src'] for i in imgs if i["src"]],

        """Get information about a particular comics."""
        title = link['title']
        imgs = soup.find_all('img', id='comicimg')
        return {
            'title': title,
            'img': [i['src'] for i in imgs],
        }


class MarketoonistComics(GenericNavigableComic):
    """Class to retrieve Marketoonist Comics."""
    name = 'marketoonist'
    long_name = 'Marketoonist'
    url = 'https://marketoonist.com/cartoons'
    get_first_comic_link = simulate_first_link
    get_navi_link = get_link_rel_next
    first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        imgs = soup.find_all('meta', property='og:image')
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        title = soup.find('meta', property='og:title')['content']
        return {
            'img': [i['content'] for i in imgs],
            'day': day.day,

		@@ 2282-2306 (lines=25) @@
2279		'title': title,
2280		'author': author,
2281		'img': [i['src'] for i in imgs],
2282		'month': day.month,
2283		'year': day.year,
2284		'day': day.day,
2285		}
2286
2287
2288		class LinsEditions(GenericNavigableComic):
2289		"""Class to retrieve L.I.N.S. Editions comics."""
2290		# Also on http://linscomics.tumblr.com
2291		# Now on https://warandpeas.com
2292		name = 'lins'
2293		long_name = 'L.I.N.S. Editions'
2294		url = 'https://linsedition.com'
2295		_categories = ('LINS', )
2296		get_navi_link = get_link_rel_next
2297		get_first_comic_link = simulate_first_link
2298		first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2299
2300		@classmethod
2301		def get_comic_info(cls, soup, link):
2302		"""Get information about a particular comics."""
2303		title = soup.find('meta', property='og:title')['content']
2304		imgs = soup.find_all('meta', property='og:image')
2305		date_str = soup.find('meta', property='article:published_time')['content'][:10]
2306		day = string_to_date(date_str, "%Y-%m-%d")
2307		return {
2308		'title': title,
2309		'img': [i['content'] for i in imgs],
		@@ 349-371 (lines=23) @@
346		_categories = ('EMPTY', )
347
348		@classmethod
349		def get_next_comic(cls, last_comic):
350		"""Implementation of get_next_comic returning no comics."""
351		cls.log("comic is considered as empty - returning no comic")
352		return []
353
354
355		class ExtraFabulousComics(GenericNavigableComic):
356		"""Class to retrieve Extra Fabulous Comics."""
357		name = 'efc'
358		long_name = 'Extra Fabulous Comics'
359		url = 'http://extrafabulouscomics.com'
360		get_first_comic_link = get_a_navi_navifirst
361		get_navi_link = get_link_rel_next
362
363		@classmethod
364		def get_comic_info(cls, soup, link):
365		"""Get information about a particular comics."""
366		img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
367		imgs = soup.find_all('img', src=img_src_re)
368		title = soup.find('meta', property='og:title')['content']
369		date_str = soup.find('meta', property='article:published_time')['content'][:10]
370		day = string_to_date(date_str, "%Y-%m-%d")
371		return {
372		'title': title,
373		'img': [i['src'] for i in imgs],
374		'month': day.month,
		@@ 375-396 (lines=22) @@
372		'title': title,
373		'img': [i['src'] for i in imgs],
374		'month': day.month,
375		'year': day.year,
376		'day': day.day,
377		'prefix': title + '-'
378		}
379
380
381		class GenericLeMondeBlog(GenericNavigableComic):
382		"""Generic class to retrieve comics from Le Monde blogs."""
383		_categories = ('LEMONDE', 'FRANCAIS')
384		get_navi_link = get_link_rel_next
385		get_first_comic_link = simulate_first_link
386		first_url = NotImplemented
387
388		@classmethod
389		def get_comic_info(cls, soup, link):
390		"""Get information about a particular comics."""
391		url2 = soup.find('link', rel='shortlink')['href']
392		title = soup.find('meta', property='og:title')['content']
393		date_str = soup.find("span", class_="entry-date").string
394		day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
395		imgs = soup.find_all('meta', property='og:image')
396		return {
397		'title': title,
398		'url2': url2,
399		'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
		@@ 1809-1834 (lines=26) @@
1806		'title': title,
1807		'author': author,
1808		'day': day.day,
1809		'month': day.month,
1810		'year': day.year,
1811		'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1812		}
1813
1814
1815		class SafelyEndangered(GenericNavigableComic):
1816		"""Class to retrieve Safely Endangered comics."""
1817		# Also on http://tumblr.safelyendangered.com
1818		name = 'endangered'
1819		long_name = 'Safely Endangered'
1820		url = 'http://www.safelyendangered.com'
1821		get_navi_link = get_link_rel_next
1822		get_first_comic_link = simulate_first_link
1823		first_url = 'http://www.safelyendangered.com/comic/ignored/'
1824
1825		@classmethod
1826		def get_comic_info(cls, soup, link):
1827		"""Get information about a particular comics."""
1828		title = soup.find('h2', class_='post-title').string
1829		date_str = soup.find('span', class_='post-date').string
1830		day = string_to_date(date_str, '%B %d, %Y')
1831		imgs = soup.find('div', id='comic').find_all('img')
1832		alt = imgs[0]['alt']
1833		assert all(i['alt'] == i['title'] for i in imgs)
1834		return {
1835		'day': day.day,
1836		'month': day.month,
1837		'year': day.year,
		@@ 919-944 (lines=26) @@
916		'img': [i['src'] for i in imgs],
917		'title': title,
918		'author': author,
919		'month': day.month,
920		'year': day.year,
921		'day': day.day,
922		}
923
924
925		class MyExtraLife(GenericNavigableComic):
926		"""Class to retrieve My Extra Life comics."""
927		name = 'extralife'
928		long_name = 'My Extra Life'
929		url = 'http://www.myextralife.com'
930		get_navi_link = get_link_rel_next
931
932		@classmethod
933		def get_first_comic_link(cls):
934		"""Get link to first comics."""
935		return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
936
937		@classmethod
938		def get_comic_info(cls, soup, link):
939		"""Get information about a particular comics."""
940		title = soup.find("h1", class_="comic_title").string
941		date_str = soup.find("span", class_="comic_date").string
942		day = string_to_date(date_str, "%B %d, %Y")
943		imgs = soup.find_all("img", class_="comic")
944		assert all(i['alt'] == i['title'] == title for i in imgs)
945		return {
946		'title': title,
947		'img': [i['src'] for i in imgs if i["src"]],
		@@ 3191-3212 (lines=22) @@
3188		"""Get information about a particular comics."""
3189		title = link['title']
3190		imgs = soup.find_all('img', id='comicimg')
3191		return {
3192		'title': title,
3193		'img': [i['src'] for i in imgs],
3194		}
3195
3196
3197		class MarketoonistComics(GenericNavigableComic):
3198		"""Class to retrieve Marketoonist Comics."""
3199		name = 'marketoonist'
3200		long_name = 'Marketoonist'
3201		url = 'https://marketoonist.com/cartoons'
3202		get_first_comic_link = simulate_first_link
3203		get_navi_link = get_link_rel_next
3204		first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'
3205
3206		@classmethod
3207		def get_comic_info(cls, soup, link):
3208		"""Get information about a particular comics."""
3209		imgs = soup.find_all('meta', property='og:image')
3210		date_str = soup.find('meta', property='article:published_time')['content'][:10]
3211		day = string_to_date(date_str, "%Y-%m-%d")
3212		title = soup.find('meta', property='og:title')['content']
3213		return {
3214		'img': [i['content'] for i in imgs],
3215		'day': day.day,

SylvainDe / ComicBookMaker

Code Duplication Length = 22-26 lines in 6 locations

comics.py 6 locations