Code Duplication - SylvainDe/ComicBookMaker - Measure and Improve Code Quality continuously with Scrutinizer

Code Duplication Length = 22-29 lines in 16 locations

comics.py 16 locations


    _categories = ('TUNEYTOONS', )


class CompletelySeriousComics(GenericNavigableComic):
    """Class to retrieve Completely Serious comics."""
    name = 'completelyserious'
    long_name = 'Completely Serious Comics'
    url = 'http://completelyseriouscomics.com'
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_a_navi_navinext

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        author = soup.find('span', class_='post-author').contents[1].string
        date_str = soup.find('span', class_='post-date').string
        day = string_to_date(date_str, '%B %d, %Y')
        imgs = soup.find('div', class_='comicpane').find_all('img')
        assert imgs
        alt = imgs[0]['title']
        assert all(i['title'] == i['alt'] == alt for i in imgs)
        return {
            'month': day.month,
            'year': day.year,
            'day': day.day,
            'img': [i['src'] for i in imgs],
            'title': title,
            'alt': alt,
            'author': author,
        }



        }


class Penmen(GenericNavigableComic):
    """Class to retrieve Penmen comics."""
    name = 'penmen'
    long_name = 'Penmen'
    url = 'http://penmen.com'
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('title').string
        imgs = soup.find('div', class_='entry-content').find_all('img')
        short_url = soup.find('link', rel='shortlink')['href']
        tags = ' '.join(t.string for t in soup.find_all('a', rel='tag'))
        date_str = soup.find('time')['datetime'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        return {
            'title': title,
            'short_url': short_url,
            'img': [i['src'] for i in imgs],
            'tags': tags,
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }



        }


class SafelyEndangered(GenericNavigableComic):
    """Class to retrieve Safely Endangered comics."""
    # Also on http://tumblr.safelyendangered.com
    name = 'endangered'
    long_name = 'Safely Endangered'
    url = 'http://www.safelyendangered.com'
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'http://www.safelyendangered.com/comic/ignored/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        date_str = soup.find('span', class_='post-date').string
        day = string_to_date(date_str, '%B %d, %Y')
        imgs = soup.find('div', id='comic').find_all('img')
        alt = imgs[0]['alt']
        assert all(i['alt'] == i['title'] for i in imgs)
        return {
            'day': day.day,
            'month': day.month,
            'year': day.year,
            'img': [i['src'] for i in imgs],
            'title': title,
            'alt': alt,
        }



        }


class ChuckleADuck(GenericNavigableComic):
    """Class to retrieve Chuckle-A-Duck comics."""
    name = 'chuckleaduck'
    long_name = 'Chuckle-A-duck'
    url = 'http://chuckleaduck.com'
    get_first_comic_link = get_div_navfirst_a
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        date_str = soup.find('span', class_='post-date').string
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
        author = soup.find('span', class_='post-author').string
        div = soup.find('div', id='comic')
        imgs = div.find_all('img') if div else []
        title = imgs[0]['title'] if imgs else ""
        assert all(i['title'] == i['alt'] == title for i in imgs)
        return {
            'month': day.month,
            'year': day.year,
            'day': day.day,
            'img': [i['src'] for i in imgs],
            'title': title,
            'author': author,
        }



        }


class LinsEditions(GenericNavigableComic):
    """Class to retrieve L.I.N.S. Editions comics."""
    # Also on https://linscomics.tumblr.com
    # Now on https://warandpeas.com
    name = 'lins'
    long_name = 'L.I.N.S. Editions'
    url = 'https://linsedition.com'
    _categories = ('LINS', )
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('meta', property='og:title')['content']
        imgs = soup.find_all('meta', property='og:image')
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        return {
            'title': title,
            'img': [i['content'] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }



        }


class Mercworks(GenericNavigableComic):
    """Class to retrieve Mercworks comics."""
    # Also on http://mercworks.tumblr.com
    name = 'mercworks'
    long_name = 'Mercworks'
    url = 'http://mercworks.net'
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('meta', property='og:title')['content']
        metadesc = soup.find('meta', property='og:description')
        desc = metadesc['content'] if metadesc else ""
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        imgs = soup.find_all('meta', property='og:image')
        return {
            'img': [i['content'] for i in imgs],
            'title': title,
            'desc': desc,
            'day': day.day,
            'month': day.month,
            'year': day.year
        }



        return []


class ExtraFabulousComics(GenericNavigableComic):
    """Class to retrieve Extra Fabulous Comics."""
    # Also on https://extrafabulouscomics.tumblr.com
    name = 'efc'
    long_name = 'Extra Fabulous Comics'
    url = 'http://extrafabulouscomics.com'
    _categories = ('EFC', )
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
        imgs = soup.find_all('img', src=img_src_re)
        title = soup.find('meta', property='og:title')['content']
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        return {
            'title': title,
            'img': [i['src'] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
            'prefix': title + '-'
        }



        """Get information about a particular comics."""
        title = link['title']
        imgs = soup.find_all('img', id='comicimg')
        return {
            'title': title,
            'img': [i['src'] for i in imgs],
        }


class OffTheLeashDog(GenericNavigableComic):
    """Class to retrieve Off The Leash Dog comics."""
    # Also on http://rupertfawcettsdoggyblog.tumblr.com
    # Also on http://www.rupertfawcettcartoons.com
    name = 'offtheleash'
    long_name = 'Off The Leash Dog'
    url = 'http://offtheleashdogcartoons.com'
    _categories = ('FAWCETT', )
    get_navi_link = get_a_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'http://offtheleashdogcartoons.com/uncategorized/can-i-help-you/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        print(link)
        title = soup.find("h1", class_="entry-title").string
        imgs = soup.find('div', class_='entry-content').find_all('img')
        return {

        }


class GenericLeMondeBlog(GenericNavigableComic):
    """Generic class to retrieve comics from Le Monde blogs."""
    _categories = ('LEMONDE', 'FRANCAIS')
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = NotImplemented

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        url2 = soup.find('link', rel='shortlink')['href']
        title = soup.find('meta', property='og:title')['content']
        date_str = soup.find("span", class_="entry-date").string
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
        imgs = soup.find_all('meta', property='og:image')
        return {
            'title': title,
            'url2': url2,
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }



        }


class MyExtraLife(GenericNavigableComic):
    """Class to retrieve My Extra Life comics."""
    name = 'extralife'
    long_name = 'My Extra Life'
    url = 'http://www.myextralife.com'
    get_navi_link = get_link_rel_next

    @classmethod
    def get_first_comic_link(cls):
        """Get link to first comics."""
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find("h1", class_="comic_title").string
        date_str = soup.find("span", class_="comic_date").string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find_all("img", class_="comic")
        assert all(i['alt'] == i['title'] == title for i in imgs)
        return {
            'title': title,
            'img': [i['src'] for i in imgs if i["src"]],
            'day': day.day,
            'month': day.month,
            'year': day.year
        }





class JuliasDrawings(GenericListableComic):
    """Class to retrieve Julia's Drawings."""
    name = 'julia'
    long_name = "Julia's Drawings"
    url = 'https://drawings.jvns.ca'
    get_url_from_archive_element = get_href

    @classmethod
    def get_archive_elements(cls):
        articles = get_soup_at_url(cls.url).find_all('article', class_='li post')
        return [art.find('a') for art in reversed(articles)]

    @classmethod
    def get_comic_info(cls, soup, archive_elt):
        """Get information about a particular comics."""
        date_str = soup.find('meta', property='og:article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        title = soup.find('h3', class_='p-post-title').string
        imgs = soup.find('section', class_='post-content').find_all('img')
        return {
            'title': title,
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }


class AnythingComic(GenericListableComic):

        }


class TalesOfAbsurdity(GenericNavigableComic):
    """Class to retrieve Tales Of Absurdity comics."""
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
    # Also on http://talesofabsurdity.tumblr.com
    name = 'absurdity'
    long_name = 'Tales of Absurdity'
    url = 'http://talesofabsurdity.com'
    _categories = ('ABSURDITY', )
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_a_navi_comicnavnext_navinext

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        author = soup.find("span", class_="post-author").find("a").string
        date_str = soup.find("span", class_="post-date").string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find("div", id="comic").find_all("img")
        assert all(i['alt'] == i['title'] for i in imgs)
        alt = imgs[0]['alt'] if imgs else ""
        return {
            'img': [i['src'] for i in imgs],
            'title': title,
            'alt': alt,
            'author': author,
            'day': day.day,
            'month': day.month,
            'year': day.year
        }



        }


class MisterAndMe(GenericNavigableComic):
    """Class to retrieve Mister & Me Comics."""
    # Also on http://www.gocomics.com/mister-and-me
    # Also on https://tapastic.com/series/Mister-and-Me
    name = 'mister'
    long_name = 'Mister & Me'
    url = 'http://www.mister-and-me.com'
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        author = soup.find("span", class_="post-author").find("a").string
        date_str = soup.find("span", class_="post-date").string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find("div", id="comic").find_all("img")
        assert all(i['alt'] == i['title'] for i in imgs)
        assert len(imgs) <= 1
        alt = imgs[0]['alt'] if imgs else ""
        return {
            'img': [i['src'] for i in imgs],
            'title': title,
            'alt': alt,
            'author': author,
            'day': day.day,
            'month': day.month,
            'year': day.year
        }



        }


class Optipess(GenericNavigableComic):
    """Class to retrieve Optipess comics."""
    name = 'optipess'
    long_name = 'Optipess'
    url = 'http://www.optipess.com'
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        author = soup.find("span", class_="post-author").find("a").string
        comic = soup.find('div', id='comic')
        imgs = comic.find_all('img') if comic else []
        alt = imgs[0]['title'] if imgs else ""
        assert all(i['alt'] == i['title'] == alt for i in imgs)
        date_str = soup.find('span', class_='post-date').string
        day = string_to_date(date_str, "%B %d, %Y")
        return {
            'title': title,
            'alt': alt,
            'author': author,
            'img': [i['src'] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }



        }


class BiterComics(GenericNavigableComic):
    """Class to retrieve Biter Comics."""
    name = "biter"
    long_name = "Biter Comics"
    url = "http://www.bitercomics.com"
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find("h1", class_="entry-title").string
        author = soup.find("span", class_="author vcard").find("a").string
        date_str = soup.find("span", class_="entry-date").string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find("div", id="comic").find_all("img")
        assert all(i['alt'] == i['title'] for i in imgs)
        assert len(imgs) == 1
        alt = imgs[0]['alt']
        return {
            'img': [i['src'] for i in imgs],
            'title': title,
            'alt': alt,
            'author': author,
            'day': day.day,
            'month': day.month,
            'year': day.year
        }



        }


class EndlessOrigami(GenericEmptyComic, GenericNavigableComic):
    """Class to retrieve Endless Origami Comics."""
    name = "origami"
    long_name = "Endless Origami"
    url = "http://endlessorigami.com"
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        author = soup.find("span", class_="post-author").find("a").string
        date_str = soup.find("span", class_="post-date").string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find("div", id="comic").find_all("img")
        assert all(i['alt'] == i['title'] for i in imgs)
        alt = imgs[0]['alt'] if imgs else ""
        return {
            'img': [i['src'] for i in imgs],
            'title': title,
            'alt': alt,
            'author': author,
            'day': day.day,
            'month': day.month,
            'year': day.year
        }



		@@ 2006-2032 (lines=27) @@
2003		_categories = ('TUNEYTOONS', )
2004
2005
2006		class CompletelySeriousComics(GenericNavigableComic):
2007		"""Class to retrieve Completely Serious comics."""
2008		name = 'completelyserious'
2009		long_name = 'Completely Serious Comics'
2010		url = 'http://completelyseriouscomics.com'
2011		get_first_comic_link = get_a_navi_navifirst
2012		get_navi_link = get_a_navi_navinext
2013
2014		@classmethod
2015		def get_comic_info(cls, soup, link):
2016		"""Get information about a particular comics."""
2017		title = soup.find('h2', class_='post-title').string
2018		author = soup.find('span', class_='post-author').contents[1].string
2019		date_str = soup.find('span', class_='post-date').string
2020		day = string_to_date(date_str, '%B %d, %Y')
2021		imgs = soup.find('div', class_='comicpane').find_all('img')
2022		assert imgs
2023		alt = imgs[0]['title']
2024		assert all(i['title'] == i['alt'] == alt for i in imgs)
2025		return {
2026		'month': day.month,
2027		'year': day.year,
2028		'day': day.day,
2029		'img': [i['src'] for i in imgs],
2030		'title': title,
2031		'alt': alt,
2032		'author': author,
2033		}
2034
2035
		@@ 1891-1916 (lines=26) @@
1888		}
1889
1890
1891		class Penmen(GenericNavigableComic):
1892		"""Class to retrieve Penmen comics."""
1893		name = 'penmen'
1894		long_name = 'Penmen'
1895		url = 'http://penmen.com'
1896		get_navi_link = get_link_rel_next
1897		get_first_comic_link = simulate_first_link
1898		first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/'
1899
1900		@classmethod
1901		def get_comic_info(cls, soup, link):
1902		"""Get information about a particular comics."""
1903		title = soup.find('title').string
1904		imgs = soup.find('div', class_='entry-content').find_all('img')
1905		short_url = soup.find('link', rel='shortlink')['href']
1906		tags = ' '.join(t.string for t in soup.find_all('a', rel='tag'))
1907		date_str = soup.find('time')['datetime'][:10]
1908		day = string_to_date(date_str, "%Y-%m-%d")
1909		return {
1910		'title': title,
1911		'short_url': short_url,
1912		'img': [i['src'] for i in imgs],
1913		'tags': tags,
1914		'month': day.month,
1915		'year': day.year,
1916		'day': day.day,
1917		}
1918
1919
		@@ 1832-1857 (lines=26) @@
1829		}
1830
1831
1832		class SafelyEndangered(GenericNavigableComic):
1833		"""Class to retrieve Safely Endangered comics."""
1834		# Also on http://tumblr.safelyendangered.com
1835		name = 'endangered'
1836		long_name = 'Safely Endangered'
1837		url = 'http://www.safelyendangered.com'
1838		get_navi_link = get_link_rel_next
1839		get_first_comic_link = simulate_first_link
1840		first_url = 'http://www.safelyendangered.com/comic/ignored/'
1841
1842		@classmethod
1843		def get_comic_info(cls, soup, link):
1844		"""Get information about a particular comics."""
1845		title = soup.find('h2', class_='post-title').string
1846		date_str = soup.find('span', class_='post-date').string
1847		day = string_to_date(date_str, '%B %d, %Y')
1848		imgs = soup.find('div', id='comic').find_all('img')
1849		alt = imgs[0]['alt']
1850		assert all(i['alt'] == i['title'] for i in imgs)
1851		return {
1852		'day': day.day,
1853		'month': day.month,
1854		'year': day.year,
1855		'img': [i['src'] for i in imgs],
1856		'title': title,
1857		'alt': alt,
1858		}
1859
1860
		@@ 2094-2118 (lines=25) @@
2091		}
2092
2093
2094		class ChuckleADuck(GenericNavigableComic):
2095		"""Class to retrieve Chuckle-A-Duck comics."""
2096		name = 'chuckleaduck'
2097		long_name = 'Chuckle-A-duck'
2098		url = 'http://chuckleaduck.com'
2099		get_first_comic_link = get_div_navfirst_a
2100		get_navi_link = get_link_rel_next
2101
2102		@classmethod
2103		def get_comic_info(cls, soup, link):
2104		"""Get information about a particular comics."""
2105		date_str = soup.find('span', class_='post-date').string
2106		day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
2107		author = soup.find('span', class_='post-author').string
2108		div = soup.find('div', id='comic')
2109		imgs = div.find_all('img') if div else []
2110		title = imgs[0]['title'] if imgs else ""
2111		assert all(i['title'] == i['alt'] == title for i in imgs)
2112		return {
2113		'month': day.month,
2114		'year': day.year,
2115		'day': day.day,
2116		'img': [i['src'] for i in imgs],
2117		'title': title,
2118		'author': author,
2119		}
2120
2121
		@@ 2402-2426 (lines=25) @@
2399		}
2400
2401
2402		class LinsEditions(GenericNavigableComic):
2403		"""Class to retrieve L.I.N.S. Editions comics."""
2404		# Also on https://linscomics.tumblr.com
2405		# Now on https://warandpeas.com
2406		name = 'lins'
2407		long_name = 'L.I.N.S. Editions'
2408		url = 'https://linsedition.com'
2409		_categories = ('LINS', )
2410		get_navi_link = get_link_rel_next
2411		get_first_comic_link = simulate_first_link
2412		first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2413
2414		@classmethod
2415		def get_comic_info(cls, soup, link):
2416		"""Get information about a particular comics."""
2417		title = soup.find('meta', property='og:title')['content']
2418		imgs = soup.find_all('meta', property='og:image')
2419		date_str = soup.find('meta', property='article:published_time')['content'][:10]
2420		day = string_to_date(date_str, "%Y-%m-%d")
2421		return {
2422		'title': title,
2423		'img': [i['content'] for i in imgs],
2424		'month': day.month,
2425		'year': day.year,
2426		'day': day.day,
2427		}
2428
2429
		@@ 1046-1070 (lines=25) @@
1043		}
1044
1045
1046		class Mercworks(GenericNavigableComic):
1047		"""Class to retrieve Mercworks comics."""
1048		# Also on http://mercworks.tumblr.com
1049		name = 'mercworks'
1050		long_name = 'Mercworks'
1051		url = 'http://mercworks.net'
1052		get_first_comic_link = get_a_comicnavbase_comicnavfirst
1053		get_navi_link = get_link_rel_next
1054
1055		@classmethod
1056		def get_comic_info(cls, soup, link):
1057		"""Get information about a particular comics."""
1058		title = soup.find('meta', property='og:title')['content']
1059		metadesc = soup.find('meta', property='og:description')
1060		desc = metadesc['content'] if metadesc else ""
1061		date_str = soup.find('meta', property='article:published_time')['content'][:10]
1062		day = string_to_date(date_str, "%Y-%m-%d")
1063		imgs = soup.find_all('meta', property='og:image')
1064		return {
1065		'img': [i['content'] for i in imgs],
1066		'title': title,
1067		'desc': desc,
1068		'day': day.day,
1069		'month': day.month,
1070		'year': day.year
1071		}
1072
1073
		@@ 360-384 (lines=25) @@
357		return []
358
359
360		class ExtraFabulousComics(GenericNavigableComic):
361		"""Class to retrieve Extra Fabulous Comics."""
362		# Also on https://extrafabulouscomics.tumblr.com
363		name = 'efc'
364		long_name = 'Extra Fabulous Comics'
365		url = 'http://extrafabulouscomics.com'
366		_categories = ('EFC', )
367		get_first_comic_link = get_a_navi_navifirst
368		get_navi_link = get_link_rel_next
369
370		@classmethod
371		def get_comic_info(cls, soup, link):
372		"""Get information about a particular comics."""
373		img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
374		imgs = soup.find_all('img', src=img_src_re)
375		title = soup.find('meta', property='og:title')['content']
376		date_str = soup.find('meta', property='article:published_time')['content'][:10]
377		day = string_to_date(date_str, "%Y-%m-%d")
378		return {
379		'title': title,
380		'img': [i['src'] for i in imgs],
381		'month': day.month,
382		'year': day.year,
383		'day': day.day,
384		'prefix': title + '-'
385		}
386
387
		@@ 3368-3389 (lines=22) @@
3365		"""Get information about a particular comics."""
3366		title = link['title']
3367		imgs = soup.find_all('img', id='comicimg')
3368		return {
3369		'title': title,
3370		'img': [i['src'] for i in imgs],
3371		}
3372
3373
3374		class OffTheLeashDog(GenericNavigableComic):
3375		"""Class to retrieve Off The Leash Dog comics."""
3376		# Also on http://rupertfawcettsdoggyblog.tumblr.com
3377		# Also on http://www.rupertfawcettcartoons.com
3378		name = 'offtheleash'
3379		long_name = 'Off The Leash Dog'
3380		url = 'http://offtheleashdogcartoons.com'
3381		_categories = ('FAWCETT', )
3382		get_navi_link = get_a_rel_next
3383		get_first_comic_link = simulate_first_link
3384		first_url = 'http://offtheleashdogcartoons.com/uncategorized/can-i-help-you/'
3385
3386		@classmethod
3387		def get_comic_info(cls, soup, link):
3388		"""Get information about a particular comics."""
3389		print(link)
3390		title = soup.find("h1", class_="entry-title").string
3391		imgs = soup.find('div', class_='entry-content').find_all('img')
3392		return {
		@@ 388-409 (lines=22) @@
385		}
386
387
388		class GenericLeMondeBlog(GenericNavigableComic):
389		"""Generic class to retrieve comics from Le Monde blogs."""
390		_categories = ('LEMONDE', 'FRANCAIS')
391		get_navi_link = get_link_rel_next
392		get_first_comic_link = simulate_first_link
393		first_url = NotImplemented
394
395		@classmethod
396		def get_comic_info(cls, soup, link):
397		"""Get information about a particular comics."""
398		url2 = soup.find('link', rel='shortlink')['href']
399		title = soup.find('meta', property='og:title')['content']
400		date_str = soup.find("span", class_="entry-date").string
401		day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
402		imgs = soup.find_all('meta', property='og:image')
403		return {
404		'title': title,
405		'url2': url2,
406		'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
407		'month': day.month,
408		'year': day.year,
409		'day': day.day,
410		}
411
412
		@@ 958-983 (lines=26) @@
955		}
956
957
958		class MyExtraLife(GenericNavigableComic):
959		"""Class to retrieve My Extra Life comics."""
960		name = 'extralife'
961		long_name = 'My Extra Life'
962		url = 'http://www.myextralife.com'
963		get_navi_link = get_link_rel_next
964
965		@classmethod
966		def get_first_comic_link(cls):
967		"""Get link to first comics."""
968		return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
969
970		@classmethod
971		def get_comic_info(cls, soup, link):
972		"""Get information about a particular comics."""
973		title = soup.find("h1", class_="comic_title").string
974		date_str = soup.find("span", class_="comic_date").string
975		day = string_to_date(date_str, "%B %d, %Y")
976		imgs = soup.find_all("img", class_="comic")
977		assert all(i['alt'] == i['title'] == title for i in imgs)
978		return {
979		'title': title,
980		'img': [i['src'] for i in imgs if i["src"]],
981		'day': day.day,
982		'month': day.month,
983		'year': day.year
984		}
985
986
		@@ 2304-2328 (lines=25) @@
2301
2302
2303		class JuliasDrawings(GenericListableComic):
2304		"""Class to retrieve Julia's Drawings."""
2305		name = 'julia'
2306		long_name = "Julia's Drawings"
2307		url = 'https://drawings.jvns.ca'
2308		get_url_from_archive_element = get_href
2309
2310		@classmethod
2311		def get_archive_elements(cls):
2312		articles = get_soup_at_url(cls.url).find_all('article', class_='li post')
2313		return [art.find('a') for art in reversed(articles)]
2314
2315		@classmethod
2316		def get_comic_info(cls, soup, archive_elt):
2317		"""Get information about a particular comics."""
2318		date_str = soup.find('meta', property='og:article:published_time')['content'][:10]
2319		day = string_to_date(date_str, "%Y-%m-%d")
2320		title = soup.find('h3', class_='p-post-title').string
2321		imgs = soup.find('section', class_='post-content').find_all('img')
2322		return {
2323		'title': title,
2324		'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2325		'month': day.month,
2326		'year': day.year,
2327		'day': day.day,
2328		}
2329
2330
2331		class AnythingComic(GenericListableComic):
		@@ 2673-2701 (lines=29) @@
2670		}
2671
2672
2673		class TalesOfAbsurdity(GenericNavigableComic):
2674		"""Class to retrieve Tales Of Absurdity comics."""
2675		# Also on http://tapastic.com/series/Tales-Of-Absurdity
2676		# Also on http://talesofabsurdity.tumblr.com
2677		name = 'absurdity'
2678		long_name = 'Tales of Absurdity'
2679		url = 'http://talesofabsurdity.com'
2680		_categories = ('ABSURDITY', )
2681		get_first_comic_link = get_a_navi_navifirst
2682		get_navi_link = get_a_navi_comicnavnext_navinext
2683
2684		@classmethod
2685		def get_comic_info(cls, soup, link):
2686		"""Get information about a particular comics."""
2687		title = soup.find('h2', class_='post-title').string
2688		author = soup.find("span", class_="post-author").find("a").string
2689		date_str = soup.find("span", class_="post-date").string
2690		day = string_to_date(date_str, "%B %d, %Y")
2691		imgs = soup.find("div", id="comic").find_all("img")
2692		assert all(i['alt'] == i['title'] for i in imgs)
2693		alt = imgs[0]['alt'] if imgs else ""
2694		return {
2695		'img': [i['src'] for i in imgs],
2696		'title': title,
2697		'alt': alt,
2698		'author': author,
2699		'day': day.day,
2700		'month': day.month,
2701		'year': day.year
2702		}
2703
2704
		@@ 2611-2639 (lines=29) @@
2608		}
2609
2610
2611		class MisterAndMe(GenericNavigableComic):
2612		"""Class to retrieve Mister & Me Comics."""
2613		# Also on http://www.gocomics.com/mister-and-me
2614		# Also on https://tapastic.com/series/Mister-and-Me
2615		name = 'mister'
2616		long_name = 'Mister & Me'
2617		url = 'http://www.mister-and-me.com'
2618		get_first_comic_link = get_a_comicnavbase_comicnavfirst
2619		get_navi_link = get_link_rel_next
2620
2621		@classmethod
2622		def get_comic_info(cls, soup, link):
2623		"""Get information about a particular comics."""
2624		title = soup.find('h2', class_='post-title').string
2625		author = soup.find("span", class_="post-author").find("a").string
2626		date_str = soup.find("span", class_="post-date").string
2627		day = string_to_date(date_str, "%B %d, %Y")
2628		imgs = soup.find("div", id="comic").find_all("img")
2629		assert all(i['alt'] == i['title'] for i in imgs)
2630		assert len(imgs) <= 1
2631		alt = imgs[0]['alt'] if imgs else ""
2632		return {
2633		'img': [i['src'] for i in imgs],
2634		'title': title,
2635		'alt': alt,
2636		'author': author,
2637		'day': day.day,
2638		'month': day.month,
2639		'year': day.year
2640		}
2641
2642
		@@ 2897-2923 (lines=27) @@
2894		}
2895
2896
2897		class Optipess(GenericNavigableComic):
2898		"""Class to retrieve Optipess comics."""
2899		name = 'optipess'
2900		long_name = 'Optipess'
2901		url = 'http://www.optipess.com'
2902		get_first_comic_link = get_a_navi_navifirst
2903		get_navi_link = get_link_rel_next
2904
2905		@classmethod
2906		def get_comic_info(cls, soup, link):
2907		"""Get information about a particular comics."""
2908		title = soup.find('h2', class_='post-title').string
2909		author = soup.find("span", class_="post-author").find("a").string
2910		comic = soup.find('div', id='comic')
2911		imgs = comic.find_all('img') if comic else []
2912		alt = imgs[0]['title'] if imgs else ""
2913		assert all(i['alt'] == i['title'] == alt for i in imgs)
2914		date_str = soup.find('span', class_='post-date').string
2915		day = string_to_date(date_str, "%B %d, %Y")
2916		return {
2917		'title': title,
2918		'alt': alt,
2919		'author': author,
2920		'img': [i['src'] for i in imgs],
2921		'month': day.month,
2922		'year': day.year,
2923		'day': day.day,
2924		}
2925
2926
		@@ 2532-2558 (lines=27) @@
2529		}
2530
2531
2532		class BiterComics(GenericNavigableComic):
2533		"""Class to retrieve Biter Comics."""
2534		name = "biter"
2535		long_name = "Biter Comics"
2536		url = "http://www.bitercomics.com"
2537		get_first_comic_link = get_a_navi_navifirst
2538		get_navi_link = get_link_rel_next
2539
2540		@classmethod
2541		def get_comic_info(cls, soup, link):
2542		"""Get information about a particular comics."""
2543		title = soup.find("h1", class_="entry-title").string
2544		author = soup.find("span", class_="author vcard").find("a").string
2545		date_str = soup.find("span", class_="entry-date").string
2546		day = string_to_date(date_str, "%B %d, %Y")
2547		imgs = soup.find("div", id="comic").find_all("img")
2548		assert all(i['alt'] == i['title'] for i in imgs)
2549		assert len(imgs) == 1
2550		alt = imgs[0]['alt']
2551		return {
2552		'img': [i['src'] for i in imgs],
2553		'title': title,
2554		'alt': alt,
2555		'author': author,
2556		'day': day.day,
2557		'month': day.month,
2558		'year': day.year
2559		}
2560
2561
		@@ 2705-2730 (lines=26) @@
2702		}
2703
2704
2705		class EndlessOrigami(GenericEmptyComic, GenericNavigableComic):
2706		"""Class to retrieve Endless Origami Comics."""
2707		name = "origami"
2708		long_name = "Endless Origami"
2709		url = "http://endlessorigami.com"
2710		get_first_comic_link = get_a_navi_navifirst
2711		get_navi_link = get_link_rel_next
2712
2713		@classmethod
2714		def get_comic_info(cls, soup, link):
2715		"""Get information about a particular comics."""
2716		title = soup.find('h2', class_='post-title').string
2717		author = soup.find("span", class_="post-author").find("a").string
2718		date_str = soup.find("span", class_="post-date").string
2719		day = string_to_date(date_str, "%B %d, %Y")
2720		imgs = soup.find("div", id="comic").find_all("img")
2721		assert all(i['alt'] == i['title'] for i in imgs)
2722		alt = imgs[0]['alt'] if imgs else ""
2723		return {
2724		'img': [i['src'] for i in imgs],
2725		'title': title,
2726		'alt': alt,
2727		'author': author,
2728		'day': day.day,
2729		'month': day.month,
2730		'year': day.year
2731		}
2732
2733

SylvainDe / ComicBookMaker

Code Duplication Length = 22-29 lines in 16 locations

comics.py 16 locations