Code Duplication - SylvainDe/ComicBookMaker - Measure and Improve Code Quality continuously with Scrutinizer

Code Duplication Length = 22-26 lines in 7 locations

comics.py 7 locations


        return []


class ExtraFabulousComics(GenericNavigableComic):
    """Class to retrieve Extra Fabulous Comics."""
    name = 'efc'
    long_name = 'Extra Fabulous Comics'
    url = 'http://extrafabulouscomics.com'
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
        imgs = soup.find_all('img', src=img_src_re)
        title = soup.find('meta', property='og:title')['content']
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        return {
            'title': title,
            'img': [i['src'] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
            'prefix': title + '-'
        }



        }


class GenericLeMondeBlog(GenericNavigableComic):
    """Generic class to retrieve comics from Le Monde blogs."""
    _categories = ('LEMONDE', 'FRANCAIS')
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = NotImplemented

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        url2 = soup.find('link', rel='shortlink')['href']
        title = soup.find('meta', property='og:title')['content']
        date_str = soup.find("span", class_="entry-date").string
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
        imgs = soup.find_all('meta', property='og:image')
        return {
            'title': title,
            'url2': url2,
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }



        }


class LinsEditions(GenericNavigableComic):
    """Class to retrieve L.I.N.S. Editions comics."""
    # Also on http://linscomics.tumblr.com
    # Now on https://warandpeas.com
    name = 'lins'
    long_name = 'L.I.N.S. Editions'
    url = 'https://linsedition.com'
    _categories = ('LINS', )
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('meta', property='og:title')['content']
        imgs = soup.find_all('meta', property='og:image')
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        return {
            'title': title,
            'img': [i['content'] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }



        }


class Mercworks(GenericNavigableComic):
    """Class to retrieve Mercworks comics."""
    # Also on http://mercworks.tumblr.com
    name = 'mercworks'
    long_name = 'Mercworks'
    url = 'http://mercworks.net'
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('meta', property='og:title')['content']
        metadesc = soup.find('meta', property='og:description')
        desc = metadesc['content'] if metadesc else ""
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        imgs = soup.find_all('meta', property='og:image')
        return {
            'img': [i['content'] for i in imgs],
            'title': title,
            'desc': desc,
            'day': day.day,
            'month': day.month,
            'year': day.year
        }



        }


class MarketoonistComics(GenericNavigableComic):
    """Class to retrieve Marketoonist Comics."""
    name = 'marketoonist'
    long_name = 'Marketoonist'
    url = 'https://marketoonist.com/cartoons'
    get_first_comic_link = simulate_first_link
    get_navi_link = get_link_rel_next
    first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        imgs = soup.find_all('meta', property='og:image')
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        title = soup.find('meta', property='og:title')['content']
        return {
            'img': [i['content'] for i in imgs],
            'day': day.day,
            'month': day.month,
            'year': day.year,
            'title': title,
        }



        }


class Penmen(GenericNavigableComic):
    """Class to retrieve Penmen comics."""
    name = 'penmen'
    long_name = 'Penmen'
    url = 'http://penmen.com'
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('title').string
        imgs = soup.find('div', class_='entry-content').find_all('img')
        short_url = soup.find('link', rel='shortlink')['href']
        tags = ' '.join(t.string for t in soup.find_all('a', rel='tag'))
        date_str = soup.find('time')['datetime'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        return {
            'title': title,
            'short_url': short_url,
            'img': [i['src'] for i in imgs],
            'tags': tags,
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }



        }


class SafelyEndangered(GenericNavigableComic):
    """Class to retrieve Safely Endangered comics."""
    # Also on http://tumblr.safelyendangered.com
    name = 'endangered'
    long_name = 'Safely Endangered'
    url = 'http://www.safelyendangered.com'
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'http://www.safelyendangered.com/comic/ignored/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        date_str = soup.find('span', class_='post-date').string
        day = string_to_date(date_str, '%B %d, %Y')
        imgs = soup.find('div', id='comic').find_all('img')
        alt = imgs[0]['alt']
        assert all(i['alt'] == i['title'] for i in imgs)
        return {
            'day': day.day,
            'month': day.month,
            'year': day.year,
            'img': [i['src'] for i in imgs],
            'title': title,
            'alt': alt,
        }



		@@ 355-377 (lines=23) @@
352		return []
353
354
355		class ExtraFabulousComics(GenericNavigableComic):
356		"""Class to retrieve Extra Fabulous Comics."""
357		name = 'efc'
358		long_name = 'Extra Fabulous Comics'
359		url = 'http://extrafabulouscomics.com'
360		get_first_comic_link = get_a_navi_navifirst
361		get_navi_link = get_link_rel_next
362
363		@classmethod
364		def get_comic_info(cls, soup, link):
365		"""Get information about a particular comics."""
366		img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
367		imgs = soup.find_all('img', src=img_src_re)
368		title = soup.find('meta', property='og:title')['content']
369		date_str = soup.find('meta', property='article:published_time')['content'][:10]
370		day = string_to_date(date_str, "%Y-%m-%d")
371		return {
372		'title': title,
373		'img': [i['src'] for i in imgs],
374		'month': day.month,
375		'year': day.year,
376		'day': day.day,
377		'prefix': title + '-'
378		}
379
380
		@@ 381-402 (lines=22) @@
378		}
379
380
381		class GenericLeMondeBlog(GenericNavigableComic):
382		"""Generic class to retrieve comics from Le Monde blogs."""
383		_categories = ('LEMONDE', 'FRANCAIS')
384		get_navi_link = get_link_rel_next
385		get_first_comic_link = simulate_first_link
386		first_url = NotImplemented
387
388		@classmethod
389		def get_comic_info(cls, soup, link):
390		"""Get information about a particular comics."""
391		url2 = soup.find('link', rel='shortlink')['href']
392		title = soup.find('meta', property='og:title')['content']
393		date_str = soup.find("span", class_="entry-date").string
394		day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
395		imgs = soup.find_all('meta', property='og:image')
396		return {
397		'title': title,
398		'url2': url2,
399		'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
400		'month': day.month,
401		'year': day.year,
402		'day': day.day,
403		}
404
405
		@@ 2320-2344 (lines=25) @@
2317		}
2318
2319
2320		class LinsEditions(GenericNavigableComic):
2321		"""Class to retrieve L.I.N.S. Editions comics."""
2322		# Also on http://linscomics.tumblr.com
2323		# Now on https://warandpeas.com
2324		name = 'lins'
2325		long_name = 'L.I.N.S. Editions'
2326		url = 'https://linsedition.com'
2327		_categories = ('LINS', )
2328		get_navi_link = get_link_rel_next
2329		get_first_comic_link = simulate_first_link
2330		first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2331
2332		@classmethod
2333		def get_comic_info(cls, soup, link):
2334		"""Get information about a particular comics."""
2335		title = soup.find('meta', property='og:title')['content']
2336		imgs = soup.find_all('meta', property='og:image')
2337		date_str = soup.find('meta', property='article:published_time')['content'][:10]
2338		day = string_to_date(date_str, "%Y-%m-%d")
2339		return {
2340		'title': title,
2341		'img': [i['content'] for i in imgs],
2342		'month': day.month,
2343		'year': day.year,
2344		'day': day.day,
2345		}
2346
2347
		@@ 1016-1040 (lines=25) @@
1013		}
1014
1015
1016		class Mercworks(GenericNavigableComic):
1017		"""Class to retrieve Mercworks comics."""
1018		# Also on http://mercworks.tumblr.com
1019		name = 'mercworks'
1020		long_name = 'Mercworks'
1021		url = 'http://mercworks.net'
1022		get_first_comic_link = get_a_comicnavbase_comicnavfirst
1023		get_navi_link = get_link_rel_next
1024
1025		@classmethod
1026		def get_comic_info(cls, soup, link):
1027		"""Get information about a particular comics."""
1028		title = soup.find('meta', property='og:title')['content']
1029		metadesc = soup.find('meta', property='og:description')
1030		desc = metadesc['content'] if metadesc else ""
1031		date_str = soup.find('meta', property='article:published_time')['content'][:10]
1032		day = string_to_date(date_str, "%Y-%m-%d")
1033		imgs = soup.find_all('meta', property='og:image')
1034		return {
1035		'img': [i['content'] for i in imgs],
1036		'title': title,
1037		'desc': desc,
1038		'day': day.day,
1039		'month': day.month,
1040		'year': day.year
1041		}
1042
1043
		@@ 3261-3282 (lines=22) @@
3258		}
3259
3260
3261		class MarketoonistComics(GenericNavigableComic):
3262		"""Class to retrieve Marketoonist Comics."""
3263		name = 'marketoonist'
3264		long_name = 'Marketoonist'
3265		url = 'https://marketoonist.com/cartoons'
3266		get_first_comic_link = simulate_first_link
3267		get_navi_link = get_link_rel_next
3268		first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'
3269
3270		@classmethod
3271		def get_comic_info(cls, soup, link):
3272		"""Get information about a particular comics."""
3273		imgs = soup.find_all('meta', property='og:image')
3274		date_str = soup.find('meta', property='article:published_time')['content'][:10]
3275		day = string_to_date(date_str, "%Y-%m-%d")
3276		title = soup.find('meta', property='og:title')['content']
3277		return {
3278		'img': [i['content'] for i in imgs],
3279		'day': day.day,
3280		'month': day.month,
3281		'year': day.year,
3282		'title': title,
3283		}
3284
3285
		@@ 1861-1886 (lines=26) @@
1858		}
1859
1860
1861		class Penmen(GenericNavigableComic):
1862		"""Class to retrieve Penmen comics."""
1863		name = 'penmen'
1864		long_name = 'Penmen'
1865		url = 'http://penmen.com'
1866		get_navi_link = get_link_rel_next
1867		get_first_comic_link = simulate_first_link
1868		first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/'
1869
1870		@classmethod
1871		def get_comic_info(cls, soup, link):
1872		"""Get information about a particular comics."""
1873		title = soup.find('title').string
1874		imgs = soup.find('div', class_='entry-content').find_all('img')
1875		short_url = soup.find('link', rel='shortlink')['href']
1876		tags = ' '.join(t.string for t in soup.find_all('a', rel='tag'))
1877		date_str = soup.find('time')['datetime'][:10]
1878		day = string_to_date(date_str, "%Y-%m-%d")
1879		return {
1880		'title': title,
1881		'short_url': short_url,
1882		'img': [i['src'] for i in imgs],
1883		'tags': tags,
1884		'month': day.month,
1885		'year': day.year,
1886		'day': day.day,
1887		}
1888
1889
		@@ 1802-1827 (lines=26) @@
1799		}
1800
1801
1802		class SafelyEndangered(GenericNavigableComic):
1803		"""Class to retrieve Safely Endangered comics."""
1804		# Also on http://tumblr.safelyendangered.com
1805		name = 'endangered'
1806		long_name = 'Safely Endangered'
1807		url = 'http://www.safelyendangered.com'
1808		get_navi_link = get_link_rel_next
1809		get_first_comic_link = simulate_first_link
1810		first_url = 'http://www.safelyendangered.com/comic/ignored/'
1811
1812		@classmethod
1813		def get_comic_info(cls, soup, link):
1814		"""Get information about a particular comics."""
1815		title = soup.find('h2', class_='post-title').string
1816		date_str = soup.find('span', class_='post-date').string
1817		day = string_to_date(date_str, '%B %d, %Y')
1818		imgs = soup.find('div', id='comic').find_all('img')
1819		alt = imgs[0]['alt']
1820		assert all(i['alt'] == i['title'] for i in imgs)
1821		return {
1822		'day': day.day,
1823		'month': day.month,
1824		'year': day.year,
1825		'img': [i['src'] for i in imgs],
1826		'title': title,
1827		'alt': alt,
1828		}
1829
1830

SylvainDe / ComicBookMaker

Code Duplication Length = 22-26 lines in 7 locations

comics.py 7 locations