Code Duplication - SylvainDe/ComicBookMaker - Measure and Improve Code Quality continuously with Scrutinizer

Code Duplication Length = 22-26 lines in 7 locations

comics.py 7 locations


        return []


class ExtraFabulousComics(GenericNavigableComic):
    """Class to retrieve Extra Fabulous Comics."""
    name = 'efc'
    long_name = 'Extra Fabulous Comics'
    url = 'http://extrafabulouscomics.com'
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
        imgs = soup.find_all('img', src=img_src_re)
        title = soup.find('meta', property='og:title')['content']
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        return {
            'title': title,
            'img': [i['src'] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
            'prefix': title + '-'
        }



        }


class GenericLeMondeBlog(GenericNavigableComic):
    """Generic class to retrieve comics from Le Monde blogs."""
    _categories = ('LEMONDE', 'FRANCAIS')
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = NotImplemented

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        url2 = soup.find('link', rel='shortlink')['href']
        title = soup.find('meta', property='og:title')['content']
        date_str = soup.find("span", class_="entry-date").string
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
        imgs = soup.find_all('meta', property='og:image')
        return {
            'title': title,
            'url2': url2,
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }



        }


class LinsEditions(GenericNavigableComic):
    """Class to retrieve L.I.N.S. Editions comics."""
    # Also on http://linscomics.tumblr.com
    # Now on https://warandpeas.com
    name = 'lins'
    long_name = 'L.I.N.S. Editions'
    url = 'https://linsedition.com'
    _categories = ('LINS', )
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('meta', property='og:title')['content']
        imgs = soup.find_all('meta', property='og:image')
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        return {
            'title': title,
            'img': [i['content'] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }



        }


class Mercworks(GenericNavigableComic):
    """Class to retrieve Mercworks comics."""
    # Also on http://mercworks.tumblr.com
    name = 'mercworks'
    long_name = 'Mercworks'
    url = 'http://mercworks.net'
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('meta', property='og:title')['content']
        metadesc = soup.find('meta', property='og:description')
        desc = metadesc['content'] if metadesc else ""
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        imgs = soup.find_all('meta', property='og:image')
        return {
            'img': [i['content'] for i in imgs],
            'title': title,
            'desc': desc,
            'day': day.day,
            'month': day.month,
            'year': day.year
        }



        }


class MarketoonistComics(GenericNavigableComic):
    """Class to retrieve Marketoonist Comics."""
    name = 'marketoonist'
    long_name = 'Marketoonist'
    url = 'https://marketoonist.com/cartoons'
    get_first_comic_link = simulate_first_link
    get_navi_link = get_link_rel_next
    first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        imgs = soup.find_all('meta', property='og:image')
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        title = soup.find('meta', property='og:title')['content']
        return {
            'img': [i['content'] for i in imgs],
            'day': day.day,
            'month': day.month,
            'year': day.year,
            'title': title,
        }



        }


class Penmen(GenericNavigableComic):
    """Class to retrieve Penmen comics."""
    name = 'penmen'
    long_name = 'Penmen'
    url = 'http://penmen.com'
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('title').string
        imgs = soup.find('div', class_='entry-content').find_all('img')
        short_url = soup.find('link', rel='shortlink')['href']
        tags = ' '.join(t.string for t in soup.find_all('a', rel='tag'))
        date_str = soup.find('time')['datetime'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        return {
            'title': title,
            'short_url': short_url,
            'img': [i['src'] for i in imgs],
            'tags': tags,
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }



        }


class SafelyEndangered(GenericNavigableComic):
    """Class to retrieve Safely Endangered comics."""
    # Also on http://tumblr.safelyendangered.com
    name = 'endangered'
    long_name = 'Safely Endangered'
    url = 'http://www.safelyendangered.com'
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'http://www.safelyendangered.com/comic/ignored/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        date_str = soup.find('span', class_='post-date').string
        day = string_to_date(date_str, '%B %d, %Y')
        imgs = soup.find('div', id='comic').find_all('img')
        alt = imgs[0]['alt']
        assert all(i['alt'] == i['title'] for i in imgs)
        return {
            'day': day.day,
            'month': day.month,
            'year': day.year,
            'img': [i['src'] for i in imgs],
            'title': title,
            'alt': alt,
        }



		@@ 355-377 (lines=23) @@
352		return []
353
354
355		class ExtraFabulousComics(GenericNavigableComic):
356		"""Class to retrieve Extra Fabulous Comics."""
357		name = 'efc'
358		long_name = 'Extra Fabulous Comics'
359		url = 'http://extrafabulouscomics.com'
360		get_first_comic_link = get_a_navi_navifirst
361		get_navi_link = get_link_rel_next
362
363		@classmethod
364		def get_comic_info(cls, soup, link):
365		"""Get information about a particular comics."""
366		img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
367		imgs = soup.find_all('img', src=img_src_re)
368		title = soup.find('meta', property='og:title')['content']
369		date_str = soup.find('meta', property='article:published_time')['content'][:10]
370		day = string_to_date(date_str, "%Y-%m-%d")
371		return {
372		'title': title,
373		'img': [i['src'] for i in imgs],
374		'month': day.month,
375		'year': day.year,
376		'day': day.day,
377		'prefix': title + '-'
378		}
379
380
		@@ 381-402 (lines=22) @@
378		}
379
380
381		class GenericLeMondeBlog(GenericNavigableComic):
382		"""Generic class to retrieve comics from Le Monde blogs."""
383		_categories = ('LEMONDE', 'FRANCAIS')
384		get_navi_link = get_link_rel_next
385		get_first_comic_link = simulate_first_link
386		first_url = NotImplemented
387
388		@classmethod
389		def get_comic_info(cls, soup, link):
390		"""Get information about a particular comics."""
391		url2 = soup.find('link', rel='shortlink')['href']
392		title = soup.find('meta', property='og:title')['content']
393		date_str = soup.find("span", class_="entry-date").string
394		day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
395		imgs = soup.find_all('meta', property='og:image')
396		return {
397		'title': title,
398		'url2': url2,
399		'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
400		'month': day.month,
401		'year': day.year,
402		'day': day.day,
403		}
404
405
		@@ 2325-2349 (lines=25) @@
2322		}
2323
2324
2325		class LinsEditions(GenericNavigableComic):
2326		"""Class to retrieve L.I.N.S. Editions comics."""
2327		# Also on http://linscomics.tumblr.com
2328		# Now on https://warandpeas.com
2329		name = 'lins'
2330		long_name = 'L.I.N.S. Editions'
2331		url = 'https://linsedition.com'
2332		_categories = ('LINS', )
2333		get_navi_link = get_link_rel_next
2334		get_first_comic_link = simulate_first_link
2335		first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2336
2337		@classmethod
2338		def get_comic_info(cls, soup, link):
2339		"""Get information about a particular comics."""
2340		title = soup.find('meta', property='og:title')['content']
2341		imgs = soup.find_all('meta', property='og:image')
2342		date_str = soup.find('meta', property='article:published_time')['content'][:10]
2343		day = string_to_date(date_str, "%Y-%m-%d")
2344		return {
2345		'title': title,
2346		'img': [i['content'] for i in imgs],
2347		'month': day.month,
2348		'year': day.year,
2349		'day': day.day,
2350		}
2351
2352
		@@ 1021-1045 (lines=25) @@
1018		}
1019
1020
1021		class Mercworks(GenericNavigableComic):
1022		"""Class to retrieve Mercworks comics."""
1023		# Also on http://mercworks.tumblr.com
1024		name = 'mercworks'
1025		long_name = 'Mercworks'
1026		url = 'http://mercworks.net'
1027		get_first_comic_link = get_a_comicnavbase_comicnavfirst
1028		get_navi_link = get_link_rel_next
1029
1030		@classmethod
1031		def get_comic_info(cls, soup, link):
1032		"""Get information about a particular comics."""
1033		title = soup.find('meta', property='og:title')['content']
1034		metadesc = soup.find('meta', property='og:description')
1035		desc = metadesc['content'] if metadesc else ""
1036		date_str = soup.find('meta', property='article:published_time')['content'][:10]
1037		day = string_to_date(date_str, "%Y-%m-%d")
1038		imgs = soup.find_all('meta', property='og:image')
1039		return {
1040		'img': [i['content'] for i in imgs],
1041		'title': title,
1042		'desc': desc,
1043		'day': day.day,
1044		'month': day.month,
1045		'year': day.year
1046		}
1047
1048
		@@ 3266-3287 (lines=22) @@
3263		}
3264
3265
3266		class MarketoonistComics(GenericNavigableComic):
3267		"""Class to retrieve Marketoonist Comics."""
3268		name = 'marketoonist'
3269		long_name = 'Marketoonist'
3270		url = 'https://marketoonist.com/cartoons'
3271		get_first_comic_link = simulate_first_link
3272		get_navi_link = get_link_rel_next
3273		first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'
3274
3275		@classmethod
3276		def get_comic_info(cls, soup, link):
3277		"""Get information about a particular comics."""
3278		imgs = soup.find_all('meta', property='og:image')
3279		date_str = soup.find('meta', property='article:published_time')['content'][:10]
3280		day = string_to_date(date_str, "%Y-%m-%d")
3281		title = soup.find('meta', property='og:title')['content']
3282		return {
3283		'img': [i['content'] for i in imgs],
3284		'day': day.day,
3285		'month': day.month,
3286		'year': day.year,
3287		'title': title,
3288		}
3289
3290
		@@ 1866-1891 (lines=26) @@
1863		}
1864
1865
1866		class Penmen(GenericNavigableComic):
1867		"""Class to retrieve Penmen comics."""
1868		name = 'penmen'
1869		long_name = 'Penmen'
1870		url = 'http://penmen.com'
1871		get_navi_link = get_link_rel_next
1872		get_first_comic_link = simulate_first_link
1873		first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/'
1874
1875		@classmethod
1876		def get_comic_info(cls, soup, link):
1877		"""Get information about a particular comics."""
1878		title = soup.find('title').string
1879		imgs = soup.find('div', class_='entry-content').find_all('img')
1880		short_url = soup.find('link', rel='shortlink')['href']
1881		tags = ' '.join(t.string for t in soup.find_all('a', rel='tag'))
1882		date_str = soup.find('time')['datetime'][:10]
1883		day = string_to_date(date_str, "%Y-%m-%d")
1884		return {
1885		'title': title,
1886		'short_url': short_url,
1887		'img': [i['src'] for i in imgs],
1888		'tags': tags,
1889		'month': day.month,
1890		'year': day.year,
1891		'day': day.day,
1892		}
1893
1894
		@@ 1807-1832 (lines=26) @@
1804		}
1805
1806
1807		class SafelyEndangered(GenericNavigableComic):
1808		"""Class to retrieve Safely Endangered comics."""
1809		# Also on http://tumblr.safelyendangered.com
1810		name = 'endangered'
1811		long_name = 'Safely Endangered'
1812		url = 'http://www.safelyendangered.com'
1813		get_navi_link = get_link_rel_next
1814		get_first_comic_link = simulate_first_link
1815		first_url = 'http://www.safelyendangered.com/comic/ignored/'
1816
1817		@classmethod
1818		def get_comic_info(cls, soup, link):
1819		"""Get information about a particular comics."""
1820		title = soup.find('h2', class_='post-title').string
1821		date_str = soup.find('span', class_='post-date').string
1822		day = string_to_date(date_str, '%B %d, %Y')
1823		imgs = soup.find('div', id='comic').find_all('img')
1824		alt = imgs[0]['alt']
1825		assert all(i['alt'] == i['title'] for i in imgs)
1826		return {
1827		'day': day.day,
1828		'month': day.month,
1829		'year': day.year,
1830		'img': [i['src'] for i in imgs],
1831		'title': title,
1832		'alt': alt,
1833		}
1834
1835

SylvainDe / ComicBookMaker

Code Duplication Length = 22-26 lines in 7 locations

comics.py 7 locations