Code Duplication - SylvainDe/ComicBookMaker - Measure and Improve Code Quality continuously with Scrutinizer

Code Duplication Length = 22-26 lines in 6 locations

comics.py 6 locations



class MyExtraLife(GenericNavigableComic):
    """Class to retrieve My Extra Life comics."""
    name = 'extralife'
    long_name = 'My Extra Life'
    url = 'http://www.myextralife.com'
    get_navi_link = get_link_rel_next

    @classmethod
    def get_first_comic_link(cls):
        """Get link to first comics."""
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find("h1", class_="comic_title").string
        date_str = soup.find("span", class_="comic_date").string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find_all("img", class_="comic")
        assert all(i['alt'] == i['title'] == title for i in imgs)
        return {
            'title': title,
            'img': [i['src'] for i in imgs if i["src"]],
            'day': day.day,
            'month': day.month,
            'year': day.year
        }


class SaturdayMorningBreakfastCereal(GenericNavigableComic):
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""

        }


class GenericLeMondeBlog(GenericNavigableComic):
    """Generic class to retrieve comics from Le Monde blogs."""
    _categories = ('LEMONDE', 'FRANCAIS')
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = NotImplemented

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        url2 = soup.find('link', rel='shortlink')['href']
        title = soup.find('meta', property='og:title')['content']
        date_str = soup.find("span", class_="entry-date").string
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
        imgs = soup.find_all('meta', property='og:image')
        return {
            'title': title,
            'url2': url2,
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }




class LonnieMillsap(GenericNavigableComic):
    """Class to retrieve Lonnie Millsap's comics."""
    name = 'millsap'
    long_name = 'Lonnie Millsap'
    url = 'http://www.lonniemillsap.com'
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'http://www.lonniemillsap.com/?p=42'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        post = soup.find('div', class_='post-content')
        author = post.find("span", class_="post-author").find("a").string
        date_str = post.find("span", class_="post-date").string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = post.find("div", class_="entry").find_all("img")
        return {
            'title': title,
            'author': author,
            'img': [i['src'] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }


class LinsEditions(GenericNavigableComic):
    """Class to retrieve L.I.N.S. Editions comics."""

    def get_nav(cls, soup):
        """Get the navigation elements from soup object."""
        cnav = soup.find_all(class_='cnav')
        nav1, nav2 = cnav[:5], cnav[5:]
        assert nav1 == nav2
        # begin, prev, archive, next_, end = nav1
        return [None if i.get('href') is None else i for i in nav1]

    @classmethod
    def get_first_comic_link(cls):
        """Get link to first comics."""
        return cls.get_nav(get_soup_at_url(cls.url))[0]

    @classmethod
    def get_navi_link(cls, last_soup, next_):
        """Get link to next or previous comic."""
        return cls.get_nav(last_soup)[3 if next_ else 1]

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = link['title']
        imgs = soup.find_all('img', id='comicimg')
        return {
            'title': title,
            'img': [i['src'] for i in imgs],
        }



class Mercworks(GenericNavigableComic):
    """Class to retrieve Mercworks comics."""
    # Also on http://mercworks.tumblr.com
    name = 'mercworks'
    long_name = 'Mercworks'
    url = 'http://mercworks.net'
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('meta', property='og:title')['content']
        metadesc = soup.find('meta', property='og:description')
        desc = metadesc['content'] if metadesc else ""
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        imgs = soup.find_all('meta', property='og:image')
        return {
            'img': [i['content'] for i in imgs],
            'title': title,
            'desc': desc,
            'day': day.day,
            'month': day.month,
            'year': day.year
        }


class BerkeleyMews(GenericListableComic):
    """Class to retrieve Berkeley Mews comics."""

        }


class JuliasDrawings(GenericListableComic):
    """Class to retrieve Julia's Drawings."""
    name = 'julia'
    long_name = "Julia's Drawings"
    url = 'https://drawings.jvns.ca'
    get_url_from_archive_element = get_href

    @classmethod
    def get_archive_elements(cls):
        articles = get_soup_at_url(cls.url).find_all('article', class_='li post')
        return [art.find('a') for art in reversed(articles)]

    @classmethod
    def get_comic_info(cls, soup, archive_elt):
        """Get information about a particular comics."""
        date_str = soup.find('meta', property='og:article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        title = soup.find('h3', class_='p-post-title').string
        imgs = soup.find('section', class_='post-content').find_all('img')
        return {
            'title': title,
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }



		@@ 958-983 (lines=26) @@
955
956		class MyExtraLife(GenericNavigableComic):
957		"""Class to retrieve My Extra Life comics."""
958		name = 'extralife'
959		long_name = 'My Extra Life'
960		url = 'http://www.myextralife.com'
961		get_navi_link = get_link_rel_next
962
963		@classmethod
964		def get_first_comic_link(cls):
965		"""Get link to first comics."""
966		return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
967
968		@classmethod
969		def get_comic_info(cls, soup, link):
970		"""Get information about a particular comics."""
971		title = soup.find("h1", class_="comic_title").string
972		date_str = soup.find("span", class_="comic_date").string
973		day = string_to_date(date_str, "%B %d, %Y")
974		imgs = soup.find_all("img", class_="comic")
975		assert all(i['alt'] == i['title'] == title for i in imgs)
976		return {
977		'title': title,
978		'img': [i['src'] for i in imgs if i["src"]],
979		'day': day.day,
980		'month': day.month,
981		'year': day.year
982		}
983
984
985		class SaturdayMorningBreakfastCereal(GenericNavigableComic):
986		"""Class to retrieve Saturday Morning Breakfast Cereal comics."""
		@@ 386-407 (lines=22) @@
383		}
384
385
386		class GenericLeMondeBlog(GenericNavigableComic):
387		"""Generic class to retrieve comics from Le Monde blogs."""
388		_categories = ('LEMONDE', 'FRANCAIS')
389		get_navi_link = get_link_rel_next
390		get_first_comic_link = simulate_first_link
391		first_url = NotImplemented
392
393		@classmethod
394		def get_comic_info(cls, soup, link):
395		"""Get information about a particular comics."""
396		url2 = soup.find('link', rel='shortlink')['href']
397		title = soup.find('meta', property='og:title')['content']
398		date_str = soup.find("span", class_="entry-date").string
399		day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
400		imgs = soup.find_all('meta', property='og:image')
401		return {
402		'title': title,
403		'url2': url2,
404		'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
405		'month': day.month,
406		'year': day.year,
407		'day': day.day,
408		}
409
410
		@@ 2374-2398 (lines=25) @@
2371
2372		class LonnieMillsap(GenericNavigableComic):
2373		"""Class to retrieve Lonnie Millsap's comics."""
2374		name = 'millsap'
2375		long_name = 'Lonnie Millsap'
2376		url = 'http://www.lonniemillsap.com'
2377		get_navi_link = get_link_rel_next
2378		get_first_comic_link = simulate_first_link
2379		first_url = 'http://www.lonniemillsap.com/?p=42'
2380
2381		@classmethod
2382		def get_comic_info(cls, soup, link):
2383		"""Get information about a particular comics."""
2384		title = soup.find('h2', class_='post-title').string
2385		post = soup.find('div', class_='post-content')
2386		author = post.find("span", class_="post-author").find("a").string
2387		date_str = post.find("span", class_="post-date").string
2388		day = string_to_date(date_str, "%B %d, %Y")
2389		imgs = post.find("div", class_="entry").find_all("img")
2390		return {
2391		'title': title,
2392		'author': author,
2393		'img': [i['src'] for i in imgs],
2394		'month': day.month,
2395		'year': day.year,
2396		'day': day.day,
2397		}
2398
2399
2400		class LinsEditions(GenericNavigableComic):
2401		"""Class to retrieve L.I.N.S. Editions comics."""
		@@ 3315-3336 (lines=22) @@
3312		def get_nav(cls, soup):
3313		"""Get the navigation elements from soup object."""
3314		cnav = soup.find_all(class_='cnav')
3315		nav1, nav2 = cnav[:5], cnav[5:]
3316		assert nav1 == nav2
3317		# begin, prev, archive, next_, end = nav1
3318		return [None if i.get('href') is None else i for i in nav1]
3319
3320		@classmethod
3321		def get_first_comic_link(cls):
3322		"""Get link to first comics."""
3323		return cls.get_nav(get_soup_at_url(cls.url))[0]
3324
3325		@classmethod
3326		def get_navi_link(cls, last_soup, next_):
3327		"""Get link to next or previous comic."""
3328		return cls.get_nav(last_soup)[3 if next_ else 1]
3329
3330		@classmethod
3331		def get_comic_info(cls, soup, link):
3332		"""Get information about a particular comics."""
3333		title = link['title']
3334		imgs = soup.find_all('img', id='comicimg')
3335		return {
3336		'title': title,
3337		'img': [i['src'] for i in imgs],
3338		}
3339
		@@ 1046-1070 (lines=25) @@
1043
1044		class Mercworks(GenericNavigableComic):
1045		"""Class to retrieve Mercworks comics."""
1046		# Also on http://mercworks.tumblr.com
1047		name = 'mercworks'
1048		long_name = 'Mercworks'
1049		url = 'http://mercworks.net'
1050		get_first_comic_link = get_a_comicnavbase_comicnavfirst
1051		get_navi_link = get_link_rel_next
1052
1053		@classmethod
1054		def get_comic_info(cls, soup, link):
1055		"""Get information about a particular comics."""
1056		title = soup.find('meta', property='og:title')['content']
1057		metadesc = soup.find('meta', property='og:description')
1058		desc = metadesc['content'] if metadesc else ""
1059		date_str = soup.find('meta', property='article:published_time')['content'][:10]
1060		day = string_to_date(date_str, "%Y-%m-%d")
1061		imgs = soup.find_all('meta', property='og:image')
1062		return {
1063		'img': [i['content'] for i in imgs],
1064		'title': title,
1065		'desc': desc,
1066		'day': day.day,
1067		'month': day.month,
1068		'year': day.year
1069		}
1070
1071
1072		class BerkeleyMews(GenericListableComic):
1073		"""Class to retrieve Berkeley Mews comics."""
		@@ 2301-2325 (lines=25) @@
2298		}
2299
2300
2301		class JuliasDrawings(GenericListableComic):
2302		"""Class to retrieve Julia's Drawings."""
2303		name = 'julia'
2304		long_name = "Julia's Drawings"
2305		url = 'https://drawings.jvns.ca'
2306		get_url_from_archive_element = get_href
2307
2308		@classmethod
2309		def get_archive_elements(cls):
2310		articles = get_soup_at_url(cls.url).find_all('article', class_='li post')
2311		return [art.find('a') for art in reversed(articles)]
2312
2313		@classmethod
2314		def get_comic_info(cls, soup, archive_elt):
2315		"""Get information about a particular comics."""
2316		date_str = soup.find('meta', property='og:article:published_time')['content'][:10]
2317		day = string_to_date(date_str, "%Y-%m-%d")
2318		title = soup.find('h3', class_='p-post-title').string
2319		imgs = soup.find('section', class_='post-content').find_all('img')
2320		return {
2321		'title': title,
2322		'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2323		'month': day.month,
2324		'year': day.year,
2325		'day': day.day,
2326		}
2327
2328

SylvainDe / ComicBookMaker

Code Duplication Length = 22-26 lines in 6 locations

comics.py 6 locations