Code Duplication - SylvainDe/ComicBookMaker - Measure and Improve Code Quality continuously with Scrutinizer

Code Duplication Length = 22-26 lines in 7 locations

comics.py 7 locations




class LinsEditions(GenericNavigableComic):
    """Class to retrieve L.I.N.S. Editions comics."""
    # Also on https://linscomics.tumblr.com
    # Now on https://warandpeas.com
    name = 'lins'
    long_name = 'L.I.N.S. Editions'
    url = 'https://linsedition.com'
    _categories = ('LINS', )
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('meta', property='og:title')['content']
        imgs = soup.find_all('meta', property='og:image')
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        return {
            'title': title,
            'img': [i['content'] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }


class ThorsThundershack(GenericNavigableComic):

            'img': [i['content'] for i in imgs],
        }


class Mercworks(GenericNavigableComic):
    """Class to retrieve Mercworks comics."""
    # Also on http://mercworks.tumblr.com
    name = 'mercworks'
    long_name = 'Mercworks'
    url = 'http://mercworks.net'
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('meta', property='og:title')['content']
        metadesc = soup.find('meta', property='og:description')
        desc = metadesc['content'] if metadesc else ""
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        imgs = soup.find_all('meta', property='og:image')
        return {
            'img': [i['content'] for i in imgs],
            'title': title,
            'desc': desc,
            'day': day.day,
            'month': day.month,
            'year': day.year
        }



    @classmethod
    def get_first_comic_link(cls):
        """Get link to first comics."""
        return get_soup_at_url(cls.url).find('a', class_='first')

    @classmethod
    def get_navi_link(cls, last_soup, next_):
        """Get link to next or previous comic."""
        return last_soup.find('a', class_='next' if next_ else 'prev')

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('meta', property='og:title')['content']
        date_str = soup.find('time')["datetime"]
        day = string_to_date(date_str, "%Y-%m-%d")
        imgs = soup.find_all('meta', property='og:image')
        return {
            'title': title,
            'img': [i['content'] for i in imgs],
            'day': day.day,
            'month': day.month,
            'year': day.year,
        }


class TuMourrasMoinsBete(GenericNavigableComic):

            'prefix': title + '-'
        }


class GenericLeMondeBlog(GenericNavigableComic):
    """Generic class to retrieve comics from Le Monde blogs."""
    _categories = ('LEMONDE', 'FRANCAIS')
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = NotImplemented

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        url2 = soup.find('link', rel='shortlink')['href']
        title = soup.find('meta', property='og:title')['content']
        date_str = soup.find("span", class_="entry-date").string
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
        imgs = soup.find_all('meta', property='og:image')
        return {
            'title': title,
            'url2': url2,
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }




class SafelyEndangered(GenericNavigableComic):
    """Class to retrieve Safely Endangered comics."""
    # Also on http://tumblr.safelyendangered.com
    name = 'endangered'
    long_name = 'Safely Endangered'
    url = 'http://www.safelyendangered.com'
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'http://www.safelyendangered.com/comic/ignored/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        date_str = soup.find('span', class_='post-date').string
        day = string_to_date(date_str, '%B %d, %Y')
        imgs = soup.find('div', id='comic').find_all('img')
        alt = imgs[0]['alt']
        assert all(i['alt'] == i['title'] for i in imgs)
        return {
            'day': day.day,
            'month': day.month,
            'year': day.year,
            'img': [i['src'] for i in imgs],
            'title': title,
            'alt': alt,
        }


class PicturesInBoxes(GenericNavigableComic):

            'author': author,
        }


class MyExtraLife(GenericNavigableComic):
    """Class to retrieve My Extra Life comics."""
    name = 'extralife'
    long_name = 'My Extra Life'
    url = 'http://www.myextralife.com'
    get_navi_link = get_link_rel_next

    @classmethod
    def get_first_comic_link(cls):
        """Get link to first comics."""
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find("h1", class_="comic_title").string
        date_str = soup.find("span", class_="comic_date").string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find_all("img", class_="comic")
        assert all(i['alt'] == i['title'] == title for i in imgs)
        return {
            'title': title,
            'img': [i['src'] for i in imgs if i["src"]],
            'day': day.day,
            'month': day.month,
            'year': day.year
        }




class JuliasDrawings(GenericListableComic):
    """Class to retrieve Julia's Drawings."""
    name = 'julia'
    long_name = "Julia's Drawings"
    url = 'https://drawings.jvns.ca'
    get_url_from_archive_element = get_href

    @classmethod
    def get_archive_elements(cls):
        articles = get_soup_at_url(cls.url).find_all('article', class_='li post')
        return [art.find('a') for art in reversed(articles)]

    @classmethod
    def get_comic_info(cls, soup, archive_elt):
        """Get information about a particular comics."""
        date_str = soup.find('meta', property='og:article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        title = soup.find('h3', class_='p-post-title').string
        imgs = soup.find('section', class_='post-content').find_all('img')
        return {
            'title': title,
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }


class AnythingComic(GenericListableComic):

		@@ 2429-2453 (lines=25) @@
2426
2427
2428		class LinsEditions(GenericNavigableComic):
2429		"""Class to retrieve L.I.N.S. Editions comics."""
2430		# Also on https://linscomics.tumblr.com
2431		# Now on https://warandpeas.com
2432		name = 'lins'
2433		long_name = 'L.I.N.S. Editions'
2434		url = 'https://linsedition.com'
2435		_categories = ('LINS', )
2436		get_navi_link = get_link_rel_next
2437		get_first_comic_link = simulate_first_link
2438		first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2439
2440		@classmethod
2441		def get_comic_info(cls, soup, link):
2442		"""Get information about a particular comics."""
2443		title = soup.find('meta', property='og:title')['content']
2444		imgs = soup.find_all('meta', property='og:image')
2445		date_str = soup.find('meta', property='article:published_time')['content'][:10]
2446		day = string_to_date(date_str, "%Y-%m-%d")
2447		return {
2448		'title': title,
2449		'img': [i['content'] for i in imgs],
2450		'month': day.month,
2451		'year': day.year,
2452		'day': day.day,
2453		}
2454
2455
2456		class ThorsThundershack(GenericNavigableComic):
		@@ 1073-1097 (lines=25) @@
1070		'img': [i['content'] for i in imgs],
1071		}
1072
1073
1074		class Mercworks(GenericNavigableComic):
1075		"""Class to retrieve Mercworks comics."""
1076		# Also on http://mercworks.tumblr.com
1077		name = 'mercworks'
1078		long_name = 'Mercworks'
1079		url = 'http://mercworks.net'
1080		get_first_comic_link = get_a_comicnavbase_comicnavfirst
1081		get_navi_link = get_link_rel_next
1082
1083		@classmethod
1084		def get_comic_info(cls, soup, link):
1085		"""Get information about a particular comics."""
1086		title = soup.find('meta', property='og:title')['content']
1087		metadesc = soup.find('meta', property='og:description')
1088		desc = metadesc['content'] if metadesc else ""
1089		date_str = soup.find('meta', property='article:published_time')['content'][:10]
1090		day = string_to_date(date_str, "%Y-%m-%d")
1091		imgs = soup.find_all('meta', property='og:image')
1092		return {
1093		'img': [i['content'] for i in imgs],
1094		'title': title,
1095		'desc': desc,
1096		'day': day.day,
1097		'month': day.month,
1098		'year': day.year
1099		}
1100
		@@ 3425-3446 (lines=22) @@
3422
3423		@classmethod
3424		def get_first_comic_link(cls):
3425		"""Get link to first comics."""
3426		return get_soup_at_url(cls.url).find('a', class_='first')
3427
3428		@classmethod
3429		def get_navi_link(cls, last_soup, next_):
3430		"""Get link to next or previous comic."""
3431		return last_soup.find('a', class_='next' if next_ else 'prev')
3432
3433		@classmethod
3434		def get_comic_info(cls, soup, link):
3435		"""Get information about a particular comics."""
3436		title = soup.find('meta', property='og:title')['content']
3437		date_str = soup.find('time')["datetime"]
3438		day = string_to_date(date_str, "%Y-%m-%d")
3439		imgs = soup.find_all('meta', property='og:image')
3440		return {
3441		'title': title,
3442		'img': [i['content'] for i in imgs],
3443		'day': day.day,
3444		'month': day.month,
3445		'year': day.year,
3446		}
3447
3448
3449		class TuMourrasMoinsBete(GenericNavigableComic):
		@@ 415-436 (lines=22) @@
412		'prefix': title + '-'
413		}
414
415
416		class GenericLeMondeBlog(GenericNavigableComic):
417		"""Generic class to retrieve comics from Le Monde blogs."""
418		_categories = ('LEMONDE', 'FRANCAIS')
419		get_navi_link = get_link_rel_next
420		get_first_comic_link = simulate_first_link
421		first_url = NotImplemented
422
423		@classmethod
424		def get_comic_info(cls, soup, link):
425		"""Get information about a particular comics."""
426		url2 = soup.find('link', rel='shortlink')['href']
427		title = soup.find('meta', property='og:title')['content']
428		date_str = soup.find("span", class_="entry-date").string
429		day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
430		imgs = soup.find_all('meta', property='og:image')
431		return {
432		'title': title,
433		'url2': url2,
434		'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
435		'month': day.month,
436		'year': day.year,
437		'day': day.day,
438		}
439
		@@ 1859-1884 (lines=26) @@
1856
1857
1858		class SafelyEndangered(GenericNavigableComic):
1859		"""Class to retrieve Safely Endangered comics."""
1860		# Also on http://tumblr.safelyendangered.com
1861		name = 'endangered'
1862		long_name = 'Safely Endangered'
1863		url = 'http://www.safelyendangered.com'
1864		get_navi_link = get_link_rel_next
1865		get_first_comic_link = simulate_first_link
1866		first_url = 'http://www.safelyendangered.com/comic/ignored/'
1867
1868		@classmethod
1869		def get_comic_info(cls, soup, link):
1870		"""Get information about a particular comics."""
1871		title = soup.find('h2', class_='post-title').string
1872		date_str = soup.find('span', class_='post-date').string
1873		day = string_to_date(date_str, '%B %d, %Y')
1874		imgs = soup.find('div', id='comic').find_all('img')
1875		alt = imgs[0]['alt']
1876		assert all(i['alt'] == i['title'] for i in imgs)
1877		return {
1878		'day': day.day,
1879		'month': day.month,
1880		'year': day.year,
1881		'img': [i['src'] for i in imgs],
1882		'title': title,
1883		'alt': alt,
1884		}
1885
1886
1887		class PicturesInBoxes(GenericNavigableComic):
		@@ 985-1010 (lines=26) @@
982		'author': author,
983		}
984
985
986		class MyExtraLife(GenericNavigableComic):
987		"""Class to retrieve My Extra Life comics."""
988		name = 'extralife'
989		long_name = 'My Extra Life'
990		url = 'http://www.myextralife.com'
991		get_navi_link = get_link_rel_next
992
993		@classmethod
994		def get_first_comic_link(cls):
995		"""Get link to first comics."""
996		return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
997
998		@classmethod
999		def get_comic_info(cls, soup, link):
1000		"""Get information about a particular comics."""
1001		title = soup.find("h1", class_="comic_title").string
1002		date_str = soup.find("span", class_="comic_date").string
1003		day = string_to_date(date_str, "%B %d, %Y")
1004		imgs = soup.find_all("img", class_="comic")
1005		assert all(i['alt'] == i['title'] == title for i in imgs)
1006		return {
1007		'title': title,
1008		'img': [i['src'] for i in imgs if i["src"]],
1009		'day': day.day,
1010		'month': day.month,
1011		'year': day.year
1012		}
1013
		@@ 2330-2354 (lines=25) @@
2327
2328
2329		class JuliasDrawings(GenericListableComic):
2330		"""Class to retrieve Julia's Drawings."""
2331		name = 'julia'
2332		long_name = "Julia's Drawings"
2333		url = 'https://drawings.jvns.ca'
2334		get_url_from_archive_element = get_href
2335
2336		@classmethod
2337		def get_archive_elements(cls):
2338		articles = get_soup_at_url(cls.url).find_all('article', class_='li post')
2339		return [art.find('a') for art in reversed(articles)]
2340
2341		@classmethod
2342		def get_comic_info(cls, soup, archive_elt):
2343		"""Get information about a particular comics."""
2344		date_str = soup.find('meta', property='og:article:published_time')['content'][:10]
2345		day = string_to_date(date_str, "%Y-%m-%d")
2346		title = soup.find('h3', class_='p-post-title').string
2347		imgs = soup.find('section', class_='post-content').find_all('img')
2348		return {
2349		'title': title,
2350		'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2351		'month': day.month,
2352		'year': day.year,
2353		'day': day.day,
2354		}
2355
2356
2357		class AnythingComic(GenericListableComic):

SylvainDe / ComicBookMaker

Code Duplication Length = 22-26 lines in 7 locations

comics.py 7 locations