Code Duplication - SylvainDe/ComicBookMaker - Measure and Improve Code Quality continuously with Scrutinizer

Code Duplication Length = 22-26 lines in 8 locations

comics.py 8 locations


        return []


class ExtraFabulousComics(GenericNavigableComic):
    """Class to retrieve Extra Fabulous Comics."""
    name = 'efc'
    long_name = 'Extra Fabulous Comics'
    url = 'http://extrafabulouscomics.com'
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
        imgs = soup.find_all('img', src=img_src_re)
        title = soup.find('meta', property='og:title')['content']
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        return {
            'title': title,
            'img': [i['src'] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
            'prefix': title + '-'
        }



        }


class GenericLeMondeBlog(GenericNavigableComic):
    """Generic class to retrieve comics from Le Monde blogs."""
    _categories = ('LEMONDE', 'FRANCAIS')
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = NotImplemented

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        url2 = soup.find('link', rel='shortlink')['href']
        title = soup.find('meta', property='og:title')['content']
        date_str = soup.find("span", class_="entry-date").string
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
        imgs = soup.find_all('meta', property='og:image')
        return {
            'title': title,
            'url2': url2,
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }



            'day': day.day,
        }


class LinsEditions(GenericNavigableComic):
    """Class to retrieve L.I.N.S. Editions comics."""
    # Also on http://linscomics.tumblr.com
    # Now on https://warandpeas.com
    name = 'lins'
    long_name = 'L.I.N.S. Editions'
    url = 'https://linsedition.com'
    _categories = ('LINS', )
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('meta', property='og:title')['content']
        imgs = soup.find_all('meta', property='og:image')
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        return {
            'title': title,
            'img': [i['content'] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }


            'prefix': '%d-' % num,
        }


class Mercworks(GenericNavigableComic):
    """Class to retrieve Mercworks comics."""
    # Also on http://mercworks.tumblr.com
    name = 'mercworks'
    long_name = 'Mercworks'
    url = 'http://mercworks.net'
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('meta', property='og:title')['content']
        metadesc = soup.find('meta', property='og:description')
        desc = metadesc['content'] if metadesc else ""
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        imgs = soup.find_all('meta', property='og:image')
        return {
            'img': [i['content'] for i in imgs],
            'title': title,
            'desc': desc,
            'day': day.day,
            'month': day.month,
            'year': day.year
        }


            'img': [i['src'] for i in imgs],
        }


class MarketoonistComics(GenericNavigableComic):
    """Class to retrieve Marketoonist Comics."""
    name = 'marketoonist'
    long_name = 'Marketoonist'
    url = 'https://marketoonist.com/cartoons'
    get_first_comic_link = simulate_first_link
    get_navi_link = get_link_rel_next
    first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        imgs = soup.find_all('meta', property='og:image')
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        title = soup.find('meta', property='og:title')['content']
        return {
            'img': [i['content'] for i in imgs],
            'day': day.day,
            'month': day.month,
            'year': day.year,
            'title': title,
        }


            'author': author,
        }


class Penmen(GenericNavigableComic):
    """Class to retrieve Penmen comics."""
    name = 'penmen'
    long_name = 'Penmen'
    url = 'http://penmen.com'
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('title').string
        imgs = soup.find('div', class_='entry-content').find_all('img')
        short_url = soup.find('link', rel='shortlink')['href']
        tags = ' '.join(t.string for t in soup.find_all('a', rel='tag'))
        date_str = soup.find('time')['datetime'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        return {
            'title': title,
            'short_url': short_url,
            'img': [i['src'] for i in imgs],
            'tags': tags,
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }


            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
        }


class SafelyEndangered(GenericNavigableComic):
    """Class to retrieve Safely Endangered comics."""
    # Also on http://tumblr.safelyendangered.com
    name = 'endangered'
    long_name = 'Safely Endangered'
    url = 'http://www.safelyendangered.com'
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'http://www.safelyendangered.com/comic/ignored/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        date_str = soup.find('span', class_='post-date').string
        day = string_to_date(date_str, '%B %d, %Y')
        imgs = soup.find('div', id='comic').find_all('img')
        alt = imgs[0]['alt']
        assert all(i['alt'] == i['title'] for i in imgs)
        return {
            'day': day.day,
            'month': day.month,
            'year': day.year,
            'img': [i['src'] for i in imgs],
            'title': title,
            'alt': alt,
        }


        }


class MyExtraLife(GenericNavigableComic):
    """Class to retrieve My Extra Life comics."""
    name = 'extralife'
    long_name = 'My Extra Life'
    url = 'http://www.myextralife.com'
    get_navi_link = get_link_rel_next

    @classmethod
    def get_first_comic_link(cls):
        """Get link to first comics."""
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find("h1", class_="comic_title").string
        date_str = soup.find("span", class_="comic_date").string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find_all("img", class_="comic")
        assert all(i['alt'] == i['title'] == title for i in imgs)
        return {
            'title': title,
            'img': [i['src'] for i in imgs if i["src"]],
            'day': day.day,
            'month': day.month,
            'year': day.year
        }



		@@ 355-377 (lines=23) @@
352		return []
353
354
355		class ExtraFabulousComics(GenericNavigableComic):
356		"""Class to retrieve Extra Fabulous Comics."""
357		name = 'efc'
358		long_name = 'Extra Fabulous Comics'
359		url = 'http://extrafabulouscomics.com'
360		get_first_comic_link = get_a_navi_navifirst
361		get_navi_link = get_link_rel_next
362
363		@classmethod
364		def get_comic_info(cls, soup, link):
365		"""Get information about a particular comics."""
366		img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
367		imgs = soup.find_all('img', src=img_src_re)
368		title = soup.find('meta', property='og:title')['content']
369		date_str = soup.find('meta', property='article:published_time')['content'][:10]
370		day = string_to_date(date_str, "%Y-%m-%d")
371		return {
372		'title': title,
373		'img': [i['src'] for i in imgs],
374		'month': day.month,
375		'year': day.year,
376		'day': day.day,
377		'prefix': title + '-'
378		}
379
380
		@@ 381-402 (lines=22) @@
378		}
379
380
381		class GenericLeMondeBlog(GenericNavigableComic):
382		"""Generic class to retrieve comics from Le Monde blogs."""
383		_categories = ('LEMONDE', 'FRANCAIS')
384		get_navi_link = get_link_rel_next
385		get_first_comic_link = simulate_first_link
386		first_url = NotImplemented
387
388		@classmethod
389		def get_comic_info(cls, soup, link):
390		"""Get information about a particular comics."""
391		url2 = soup.find('link', rel='shortlink')['href']
392		title = soup.find('meta', property='og:title')['content']
393		date_str = soup.find("span", class_="entry-date").string
394		day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
395		imgs = soup.find_all('meta', property='og:image')
396		return {
397		'title': title,
398		'url2': url2,
399		'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
400		'month': day.month,
401		'year': day.year,
402		'day': day.day,
403		}
404
405
		@@ 2320-2344 (lines=25) @@
2317		'day': day.day,
2318		}
2319
2320
2321		class LinsEditions(GenericNavigableComic):
2322		"""Class to retrieve L.I.N.S. Editions comics."""
2323		# Also on http://linscomics.tumblr.com
2324		# Now on https://warandpeas.com
2325		name = 'lins'
2326		long_name = 'L.I.N.S. Editions'
2327		url = 'https://linsedition.com'
2328		_categories = ('LINS', )
2329		get_navi_link = get_link_rel_next
2330		get_first_comic_link = simulate_first_link
2331		first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2332
2333		@classmethod
2334		def get_comic_info(cls, soup, link):
2335		"""Get information about a particular comics."""
2336		title = soup.find('meta', property='og:title')['content']
2337		imgs = soup.find_all('meta', property='og:image')
2338		date_str = soup.find('meta', property='article:published_time')['content'][:10]
2339		day = string_to_date(date_str, "%Y-%m-%d")
2340		return {
2341		'title': title,
2342		'img': [i['content'] for i in imgs],
2343		'month': day.month,
2344		'year': day.year,
2345		'day': day.day,
2346		}
2347
		@@ 1016-1040 (lines=25) @@
1013		'prefix': '%d-' % num,
1014		}
1015
1016
1017		class Mercworks(GenericNavigableComic):
1018		"""Class to retrieve Mercworks comics."""
1019		# Also on http://mercworks.tumblr.com
1020		name = 'mercworks'
1021		long_name = 'Mercworks'
1022		url = 'http://mercworks.net'
1023		get_first_comic_link = get_a_comicnavbase_comicnavfirst
1024		get_navi_link = get_link_rel_next
1025
1026		@classmethod
1027		def get_comic_info(cls, soup, link):
1028		"""Get information about a particular comics."""
1029		title = soup.find('meta', property='og:title')['content']
1030		metadesc = soup.find('meta', property='og:description')
1031		desc = metadesc['content'] if metadesc else ""
1032		date_str = soup.find('meta', property='article:published_time')['content'][:10]
1033		day = string_to_date(date_str, "%Y-%m-%d")
1034		imgs = soup.find_all('meta', property='og:image')
1035		return {
1036		'img': [i['content'] for i in imgs],
1037		'title': title,
1038		'desc': desc,
1039		'day': day.day,
1040		'month': day.month,
1041		'year': day.year
1042		}
1043
		@@ 3261-3282 (lines=22) @@
3258		'img': [i['src'] for i in imgs],
3259		}
3260
3261
3262		class MarketoonistComics(GenericNavigableComic):
3263		"""Class to retrieve Marketoonist Comics."""
3264		name = 'marketoonist'
3265		long_name = 'Marketoonist'
3266		url = 'https://marketoonist.com/cartoons'
3267		get_first_comic_link = simulate_first_link
3268		get_navi_link = get_link_rel_next
3269		first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'
3270
3271		@classmethod
3272		def get_comic_info(cls, soup, link):
3273		"""Get information about a particular comics."""
3274		imgs = soup.find_all('meta', property='og:image')
3275		date_str = soup.find('meta', property='article:published_time')['content'][:10]
3276		day = string_to_date(date_str, "%Y-%m-%d")
3277		title = soup.find('meta', property='og:title')['content']
3278		return {
3279		'img': [i['content'] for i in imgs],
3280		'day': day.day,
3281		'month': day.month,
3282		'year': day.year,
3283		'title': title,
3284		}
3285
		@@ 1861-1886 (lines=26) @@
1858		'author': author,
1859		}
1860
1861
1862		class Penmen(GenericNavigableComic):
1863		"""Class to retrieve Penmen comics."""
1864		name = 'penmen'
1865		long_name = 'Penmen'
1866		url = 'http://penmen.com'
1867		get_navi_link = get_link_rel_next
1868		get_first_comic_link = simulate_first_link
1869		first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/'
1870
1871		@classmethod
1872		def get_comic_info(cls, soup, link):
1873		"""Get information about a particular comics."""
1874		title = soup.find('title').string
1875		imgs = soup.find('div', class_='entry-content').find_all('img')
1876		short_url = soup.find('link', rel='shortlink')['href']
1877		tags = ' '.join(t.string for t in soup.find_all('a', rel='tag'))
1878		date_str = soup.find('time')['datetime'][:10]
1879		day = string_to_date(date_str, "%Y-%m-%d")
1880		return {
1881		'title': title,
1882		'short_url': short_url,
1883		'img': [i['src'] for i in imgs],
1884		'tags': tags,
1885		'month': day.month,
1886		'year': day.year,
1887		'day': day.day,
1888		}
1889
		@@ 1802-1827 (lines=26) @@
1799		'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1800		}
1801
1802
1803		class SafelyEndangered(GenericNavigableComic):
1804		"""Class to retrieve Safely Endangered comics."""
1805		# Also on http://tumblr.safelyendangered.com
1806		name = 'endangered'
1807		long_name = 'Safely Endangered'
1808		url = 'http://www.safelyendangered.com'
1809		get_navi_link = get_link_rel_next
1810		get_first_comic_link = simulate_first_link
1811		first_url = 'http://www.safelyendangered.com/comic/ignored/'
1812
1813		@classmethod
1814		def get_comic_info(cls, soup, link):
1815		"""Get information about a particular comics."""
1816		title = soup.find('h2', class_='post-title').string
1817		date_str = soup.find('span', class_='post-date').string
1818		day = string_to_date(date_str, '%B %d, %Y')
1819		imgs = soup.find('div', id='comic').find_all('img')
1820		alt = imgs[0]['alt']
1821		assert all(i['alt'] == i['title'] for i in imgs)
1822		return {
1823		'day': day.day,
1824		'month': day.month,
1825		'year': day.year,
1826		'img': [i['src'] for i in imgs],
1827		'title': title,
1828		'alt': alt,
1829		}
1830
		@@ 922-947 (lines=26) @@
919		}
920
921
922		class MyExtraLife(GenericNavigableComic):
923		"""Class to retrieve My Extra Life comics."""
924		name = 'extralife'
925		long_name = 'My Extra Life'
926		url = 'http://www.myextralife.com'
927		get_navi_link = get_link_rel_next
928
929		@classmethod
930		def get_first_comic_link(cls):
931		"""Get link to first comics."""
932		return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
933
934		@classmethod
935		def get_comic_info(cls, soup, link):
936		"""Get information about a particular comics."""
937		title = soup.find("h1", class_="comic_title").string
938		date_str = soup.find("span", class_="comic_date").string
939		day = string_to_date(date_str, "%B %d, %Y")
940		imgs = soup.find_all("img", class_="comic")
941		assert all(i['alt'] == i['title'] == title for i in imgs)
942		return {
943		'title': title,
944		'img': [i['src'] for i in imgs if i["src"]],
945		'day': day.day,
946		'month': day.month,
947		'year': day.year
948		}
949
950

SylvainDe / ComicBookMaker

Code Duplication Length = 22-26 lines in 8 locations

comics.py 8 locations