Code Duplication - SylvainDe/ComicBookMaker - Measure and Improve Code Quality continuously with Scrutinizer

Code Duplication Length = 23-27 lines in 8 locations

comics.py 8 locations


    url = 'http://theawkwardyeti.com'
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        date_str = soup.find("span", class_="post-date").string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find("div", id="comic").find_all("img")
        assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
        return {
            'img': [i['src'] for i in imgs],
            'title': title,
            'day': day.day,
            'month': day.month,
            'year': day.year
        }


class PleasantThoughts(GenericNavigableComic):
    """Class to retrieve Pleasant Thoughts comics."""
    name = 'pleasant'
    long_name = 'Pleasant Thoughts'
    url = 'http://pleasant-thoughts.com'
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        post = soup.find('div', class_='post-content')

    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        post = soup.find('div', class_='post-content')
        title = post.find('h2', class_='post-title').string
        imgs = post.find("div", class_="entry").find_all("img")
        return {
            'title': title,
            'img': [i['src'] for i in imgs],
        }


class MisterAndMe(GenericNavigableComic):
    """Class to retrieve Mister & Me Comics."""
    # Also on http://www.gocomics.com/mister-and-me
    # Also on https://tapastic.com/series/Mister-and-Me
    name = 'mister'
    long_name = 'Mister & Me'
    url = 'http://www.mister-and-me.com'
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        author = soup.find("span", class_="post-author").find("a").string
        date_str = soup.find("span", class_="post-date").string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find("div", id="comic").find_all("img")
        assert all(i['alt'] == i['title'] for i in imgs)
        assert len(imgs) <= 1

    @classmethod
    def get_first_comic_link(cls):
        """Get link to first comics."""
        return {'href': 'http://www.lonniemillsap.com/?p=42'}

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        post = soup.find('div', class_='post-content')
        author = post.find("span", class_="post-author").find("a").string
        date_str = post.find("span", class_="post-date").string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = post.find("div", class_="entry").find_all("img")
        return {
            'title': title,
            'author': author,
            'img': [i['src'] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }


class LinsEditions(GenericNavigableComic):
    """Class to retrieve L.I.N.S. Editions comics."""
    # Also on http://linscomics.tumblr.com
    name = 'lins'
    long_name = 'L.I.N.S. Editions'
    url = 'https://linsedition.com'
    get_navi_link = get_link_rel_next

        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
        author = soup.find('span', class_='post-author').string
        div = soup.find('div', id='comic')
        imgs = div.find_all('img') if div else []
        title = imgs[0]['title'] if imgs else ""
        assert all(i['title'] == i['alt'] == title for i in imgs)
        return {
            'month': day.month,
            'year': day.year,
            'day': day.day,
            'img': [i['src'] for i in imgs],
            'title': title,
            'author': author,
        }


class DepressedAlien(GenericNavigableComic):
    """Class to retrieve Depressed Alien Comics."""
    name = 'depressedalien'
    long_name = 'Depressed Alien'
    url = 'http://depressedalien.com'
    get_url_from_link = join_cls_url_to_href

    @classmethod
    def get_first_comic_link(cls):
        """Get link to first comics."""
        return get_soup_at_url(cls.url).find('img', attrs={'name': 'beginArrow'}).parent

    @classmethod
    def get_navi_link(cls, last_soup, next_):
        return last_soup.find('img', attrs={'name': 'rightArrow' if next_ else 'leftArrow'}).parent

    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        author = soup.find("span", class_="post-author").find("a").string
        date_str = soup.find("span", class_="post-date").string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find("div", id="comic").find_all("img")
        assert all(i['alt'] == i['title'] for i in imgs)
        alt = imgs[0]['alt'] if imgs else ""
        return {
            'img': [i['src'] for i in imgs],
            'title': title,
            'alt': alt,
            'author': author,
            'day': day.day,
            'month': day.month,
            'year': day.year
        }


class PlanC(GenericNavigableComic):
    """Class to retrieve Plan C comics."""
    name = 'planc'
    long_name = 'Plan C'
    url = 'http://www.plancomic.com'
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_a_navi_comicnavnext_navinext

    long_name = 'Completely Serious Comics'
    url = 'http://completelyseriouscomics.com'
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_a_navi_navinext

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        author = soup.find('span', class_='post-author').contents[1].string
        date_str = soup.find('span', class_='post-date').string
        day = string_to_date(date_str, '%B %d, %Y')
        imgs = soup.find('div', class_='comicpane').find_all('img')
        assert imgs
        alt = imgs[0]['title']
        assert all(i['title'] == i['alt'] == alt for i in imgs)
        return {
            'month': day.month,
            'year': day.year,
            'day': day.day,
            'img': [i['src'] for i in imgs],
            'title': title,
            'alt': alt,
            'author': author,
        }


class PoorlyDrawnLines(GenericListableComic):
    """Class to retrieve Poorly Drawn Lines comics."""
    # Also on http://pdlcomics.tumblr.com

        if div:
            img = div.find('img')
            img_src = [img['src']]
            alt = img['alt']
            assert alt == img['title']
            title = soup.find('meta', property='og:title')['content']
        else:
            img_src = []
            alt = ''
            title = ''
        return {
            'month': day.month,
            'year': day.year,
            'day': day.day,
            'img': img_src,
            'title': title,
            'alt': alt,
            'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')),
        }


class WarehouseComic(GenericNavigableComic):
    """Class to retrieve Warehouse Comic comics."""
    name = 'warehouse'
    long_name = 'Warehouse Comic'
    url = 'http://warehousecomic.com'
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_link_rel_next

    @classmethod


class GenericBouletCorp(GenericNavigableComic):
    """Generic class to retrieve BouletCorp comics in different languages."""
    # Also on http://bouletcorp.tumblr.com
    get_navi_link = get_link_rel_next

    @classmethod
    def get_first_comic_link(cls):
        """Get link to first comics."""
        return get_soup_at_url(cls.url).find('div', id='centered_nav').find_all('a')[0]

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        url = cls.get_url_from_link(link)
        date_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/' % cls.url)
        year, month, day = [int(s) for s in date_re.match(url).groups()]
        imgs = soup.find('div', id='notes').find('div', class_='storycontent').find_all('img')
        texts = '  '.join(t for t in (i.get('title') for i in imgs) if t)
        title = soup.find('title').string
        return {
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs if i.get('src') is not None],
            'title': title,
            'texts': texts,
            'year': year,
            'month': month,
            'day': day,
        }


		@@ 2432-2458 (lines=27) @@
2429		url = 'http://theawkwardyeti.com'
2430		get_first_comic_link = get_a_navi_navifirst
2431		get_navi_link = get_link_rel_next
2432
2433		@classmethod
2434		def get_comic_info(cls, soup, link):
2435		"""Get information about a particular comics."""
2436		title = soup.find('h2', class_='post-title').string
2437		date_str = soup.find("span", class_="post-date").string
2438		day = string_to_date(date_str, "%B %d, %Y")
2439		imgs = soup.find("div", id="comic").find_all("img")
2440		assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
2441		return {
2442		'img': [i['src'] for i in imgs],
2443		'title': title,
2444		'day': day.day,
2445		'month': day.month,
2446		'year': day.year
2447		}
2448
2449
2450		class PleasantThoughts(GenericNavigableComic):
2451		"""Class to retrieve Pleasant Thoughts comics."""
2452		name = 'pleasant'
2453		long_name = 'Pleasant Thoughts'
2454		url = 'http://pleasant-thoughts.com'
2455		get_first_comic_link = get_a_comicnavbase_comicnavfirst
2456		get_navi_link = get_link_rel_next
2457
2458		@classmethod
2459		def get_comic_info(cls, soup, link):
2460		"""Get information about a particular comics."""
2461		post = soup.find('div', class_='post-content')
		@@ 2462-2486 (lines=25) @@
2459		def get_comic_info(cls, soup, link):
2460		"""Get information about a particular comics."""
2461		post = soup.find('div', class_='post-content')
2462		title = post.find('h2', class_='post-title').string
2463		imgs = post.find("div", class_="entry").find_all("img")
2464		return {
2465		'title': title,
2466		'img': [i['src'] for i in imgs],
2467		}
2468
2469
2470		class MisterAndMe(GenericNavigableComic):
2471		"""Class to retrieve Mister & Me Comics."""
2472		# Also on http://www.gocomics.com/mister-and-me
2473		# Also on https://tapastic.com/series/Mister-and-Me
2474		name = 'mister'
2475		long_name = 'Mister & Me'
2476		url = 'http://www.mister-and-me.com'
2477		get_first_comic_link = get_a_comicnavbase_comicnavfirst
2478		get_navi_link = get_link_rel_next
2479
2480		@classmethod
2481		def get_comic_info(cls, soup, link):
2482		"""Get information about a particular comics."""
2483		title = soup.find('h2', class_='post-title').string
2484		author = soup.find("span", class_="post-author").find("a").string
2485		date_str = soup.find("span", class_="post-date").string
2486		day = string_to_date(date_str, "%B %d, %Y")
2487		imgs = soup.find("div", id="comic").find_all("img")
2488		assert all(i['alt'] == i['title'] for i in imgs)
2489		assert len(imgs) <= 1
		@@ 2242-2266 (lines=25) @@
2239		@classmethod
2240		def get_first_comic_link(cls):
2241		"""Get link to first comics."""
2242		return {'href': 'http://www.lonniemillsap.com/?p=42'}
2243
2244		@classmethod
2245		def get_comic_info(cls, soup, link):
2246		"""Get information about a particular comics."""
2247		title = soup.find('h2', class_='post-title').string
2248		post = soup.find('div', class_='post-content')
2249		author = post.find("span", class_="post-author").find("a").string
2250		date_str = post.find("span", class_="post-date").string
2251		day = string_to_date(date_str, "%B %d, %Y")
2252		imgs = post.find("div", class_="entry").find_all("img")
2253		return {
2254		'title': title,
2255		'author': author,
2256		'img': [i['src'] for i in imgs],
2257		'month': day.month,
2258		'year': day.year,
2259		'day': day.day,
2260		}
2261
2262
2263		class LinsEditions(GenericNavigableComic):
2264		"""Class to retrieve L.I.N.S. Editions comics."""
2265		# Also on http://linscomics.tumblr.com
2266		name = 'lins'
2267		long_name = 'L.I.N.S. Editions'
2268		url = 'https://linsedition.com'
2269		get_navi_link = get_link_rel_next
		@@ 2046-2070 (lines=25) @@
2043		day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
2044		author = soup.find('span', class_='post-author').string
2045		div = soup.find('div', id='comic')
2046		imgs = div.find_all('img') if div else []
2047		title = imgs[0]['title'] if imgs else ""
2048		assert all(i['title'] == i['alt'] == title for i in imgs)
2049		return {
2050		'month': day.month,
2051		'year': day.year,
2052		'day': day.day,
2053		'img': [i['src'] for i in imgs],
2054		'title': title,
2055		'author': author,
2056		}
2057
2058
2059		class DepressedAlien(GenericNavigableComic):
2060		"""Class to retrieve Depressed Alien Comics."""
2061		name = 'depressedalien'
2062		long_name = 'Depressed Alien'
2063		url = 'http://depressedalien.com'
2064		get_url_from_link = join_cls_url_to_href
2065
2066		@classmethod
2067		def get_first_comic_link(cls):
2068		"""Get link to first comics."""
2069		return get_soup_at_url(cls.url).find('img', attrs={'name': 'beginArrow'}).parent
2070
2071		@classmethod
2072		def get_navi_link(cls, last_soup, next_):
2073		return last_soup.find('img', attrs={'name': 'rightArrow' if next_ else 'leftArrow'}).parent
		@@ 2572-2595 (lines=24) @@
2569		get_navi_link = get_link_rel_next
2570
2571		@classmethod
2572		def get_comic_info(cls, soup, link):
2573		"""Get information about a particular comics."""
2574		title = soup.find('h2', class_='post-title').string
2575		author = soup.find("span", class_="post-author").find("a").string
2576		date_str = soup.find("span", class_="post-date").string
2577		day = string_to_date(date_str, "%B %d, %Y")
2578		imgs = soup.find("div", id="comic").find_all("img")
2579		assert all(i['alt'] == i['title'] for i in imgs)
2580		alt = imgs[0]['alt'] if imgs else ""
2581		return {
2582		'img': [i['src'] for i in imgs],
2583		'title': title,
2584		'alt': alt,
2585		'author': author,
2586		'day': day.day,
2587		'month': day.month,
2588		'year': day.year
2589		}
2590
2591
2592		class PlanC(GenericNavigableComic):
2593		"""Class to retrieve Plan C comics."""
2594		name = 'planc'
2595		long_name = 'Plan C'
2596		url = 'http://www.plancomic.com'
2597		get_first_comic_link = get_a_navi_navifirst
2598		get_navi_link = get_a_navi_comicnavnext_navinext
		@@ 1951-1974 (lines=24) @@
1948		long_name = 'Completely Serious Comics'
1949		url = 'http://completelyseriouscomics.com'
1950		get_first_comic_link = get_a_navi_navifirst
1951		get_navi_link = get_a_navi_navinext
1952
1953		@classmethod
1954		def get_comic_info(cls, soup, link):
1955		"""Get information about a particular comics."""
1956		title = soup.find('h2', class_='post-title').string
1957		author = soup.find('span', class_='post-author').contents[1].string
1958		date_str = soup.find('span', class_='post-date').string
1959		day = string_to_date(date_str, '%B %d, %Y')
1960		imgs = soup.find('div', class_='comicpane').find_all('img')
1961		assert imgs
1962		alt = imgs[0]['title']
1963		assert all(i['title'] == i['alt'] == alt for i in imgs)
1964		return {
1965		'month': day.month,
1966		'year': day.year,
1967		'day': day.day,
1968		'img': [i['src'] for i in imgs],
1969		'title': title,
1970		'alt': alt,
1971		'author': author,
1972		}
1973
1974
1975		class PoorlyDrawnLines(GenericListableComic):
1976		"""Class to retrieve Poorly Drawn Lines comics."""
1977		# Also on http://pdlcomics.tumblr.com
		@@ 1639-1662 (lines=24) @@
1636		if div:
1637		img = div.find('img')
1638		img_src = [img['src']]
1639		alt = img['alt']
1640		assert alt == img['title']
1641		title = soup.find('meta', property='og:title')['content']
1642		else:
1643		img_src = []
1644		alt = ''
1645		title = ''
1646		return {
1647		'month': day.month,
1648		'year': day.year,
1649		'day': day.day,
1650		'img': img_src,
1651		'title': title,
1652		'alt': alt,
1653		'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')),
1654		}
1655
1656
1657		class WarehouseComic(GenericNavigableComic):
1658		"""Class to retrieve Warehouse Comic comics."""
1659		name = 'warehouse'
1660		long_name = 'Warehouse Comic'
1661		url = 'http://warehousecomic.com'
1662		get_first_comic_link = get_a_navi_navifirst
1663		get_navi_link = get_link_rel_next
1664
1665		@classmethod
		@@ 1072-1094 (lines=23) @@
1069
1070		class GenericBouletCorp(GenericNavigableComic):
1071		"""Generic class to retrieve BouletCorp comics in different languages."""
1072		# Also on http://bouletcorp.tumblr.com
1073		get_navi_link = get_link_rel_next
1074
1075		@classmethod
1076		def get_first_comic_link(cls):
1077		"""Get link to first comics."""
1078		return get_soup_at_url(cls.url).find('div', id='centered_nav').find_all('a')[0]
1079
1080		@classmethod
1081		def get_comic_info(cls, soup, link):
1082		"""Get information about a particular comics."""
1083		url = cls.get_url_from_link(link)
1084		date_re = re.compile('^%s/([0-9])/([0-9])/([0-9]*)/' % cls.url)
1085		year, month, day = [int(s) for s in date_re.match(url).groups()]
1086		imgs = soup.find('div', id='notes').find('div', class_='storycontent').find_all('img')
1087		texts = ' '.join(t for t in (i.get('title') for i in imgs) if t)
1088		title = soup.find('title').string
1089		return {
1090		'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs if i.get('src') is not None],
1091		'title': title,
1092		'texts': texts,
1093		'year': year,
1094		'month': month,
1095		'day': day,
1096		}
1097

SylvainDe / ComicBookMaker

Code Duplication Length = 23-27 lines in 8 locations

comics.py 8 locations