Code Duplication - SylvainDe/ComicBookMaker - Measure and Improve Code Quality continuously with Scrutinizer

Code Duplication Length = 23-28 lines in 12 locations

comics.py 12 locations


        """Get information about a particular comics."""
        title = soup.find('meta', property='og:title')['content']
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
        date_str = date_str[:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        imgs = soup.find_all('meta', property='og:image')
        skip_imgs = {
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
        }
        return {
            'title': title,
            'author': author,
            'day': day.day,
            'month': day.month,
            'year': day.year,
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
        }


class SafelyEndangered(GenericNavigableComic):
    """Class to retrieve Safely Endangered comics."""
    # Also on http://tumblr.safelyendangered.com
    name = 'endangered'
    long_name = 'Safely Endangered'
    url = 'http://www.safelyendangered.com'
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'http://www.safelyendangered.com/comic/ignored/'

    @classmethod

    first_url = 'http://www.safelyendangered.com/comic/ignored/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        date_str = soup.find('span', class_='post-date').string
        day = string_to_date(date_str, '%B %d, %Y')
        imgs = soup.find('div', id='comic').find_all('img')
        alt = imgs[0]['alt']
        assert all(i['alt'] == i['title'] for i in imgs)
        return {
            'day': day.day,
            'month': day.month,
            'year': day.year,
            'img': [i['src'] for i in imgs],
            'title': title,
            'alt': alt,
        }


class PicturesInBoxes(GenericNavigableComic):
    """Class to retrieve Pictures In Boxes comics."""
    # Also on http://picturesinboxescomic.tumblr.com
    name = 'picturesinboxes'
    long_name = 'Pictures in Boxes'
    url = 'http://www.picturesinboxes.com'
    get_navi_link = get_a_navi_navinext
    get_first_comic_link = simulate_first_link
    first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'

    @classmethod
    def get_comic_info(cls, soup, link):

    url = "http://lastplacecomics.com"
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        author = soup.find("span", class_="post-author").find("a").string
        date_str = soup.find("span", class_="post-date").string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find("div", id="comic").find_all("img")
        assert all(i['alt'] == i['title'] for i in imgs)
        assert len(imgs) <= 1
        alt = imgs[0]['alt'] if imgs else ""
        return {
            'img': [i['src'] for i in imgs],
            'title': title,
            'alt': alt,
            'author': author,
            'day': day.day,
            'month': day.month,
            'year': day.year
        }


class TalesOfAbsurdity(GenericNavigableComic):
    """Class to retrieve Tales Of Absurdity comics."""
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
    # Also on http://talesofabsurdity.tumblr.com
    name = 'absurdity'
    long_name = 'Tales of Absurdity'
    url = 'http://talesofabsurdity.com'
    _categories = ('ABSURDITY', )

    name = 'unearthed'
    long_name = 'Unearthed Comics'
    url = 'http://unearthedcomics.com'
    _categories = ('UNEARTHED', )
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        short_url = soup.find('link', rel='shortlink')['href']
        title_elt = soup.find('h1') or soup.find('h2')
        title = title_elt.string if title_elt else ""
        desc = soup.find('meta', property='og:description')
        date_str = soup.find('time', class_='published updated hidden')['datetime']
        day = string_to_date(date_str, "%Y-%m-%d")
        post = soup.find('div', class_="entry content entry-content type-portfolio")
        imgs = post.find_all('img')
        return {
            'title': title,
            'description': desc,
            'url2': short_url,
            'img': [i['src'] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }


class Optipess(GenericNavigableComic):
    """Class to retrieve Optipess comics."""
    name = 'optipess'

    url = 'http://www.mister-and-me.com'
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        author = soup.find("span", class_="post-author").find("a").string
        date_str = soup.find("span", class_="post-date").string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find("div", id="comic").find_all("img")
        assert all(i['alt'] == i['title'] for i in imgs)
        assert len(imgs) <= 1
        alt = imgs[0]['alt'] if imgs else ""
        return {
            'img': [i['src'] for i in imgs],
            'title': title,
            'alt': alt,
            'author': author,
            'day': day.day,
            'month': day.month,
            'year': day.year
        }


class LastPlaceComics(GenericNavigableComic):
    """Class to retrieve Last Place Comics."""
    name = 'lastplace'
    long_name = 'Last Place Comics'
    url = "http://lastplacecomics.com"
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
    get_navi_link = get_link_rel_next

        return None

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('meta', attrs={'name': 'description'})["content"]
        description = soup.find('div', itemprop='articleBody').text
        author = soup.find('span', itemprop='author copyrightHolder').string
        imgs = soup.find_all('img', itemprop='image')
        assert all(i['title'] == i['alt'] for i in imgs)
        alt = imgs[0]['alt'] if imgs else ""
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
        return {
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
            'author': author,
            'title': title,
            'alt': alt,
            'description': description,
        }


class GerbilWithAJetpack(GenericNavigableComic):
    """Class to retrieve GerbilWithAJetpack comics."""
    name = 'gerbil'
    long_name = 'Gerbil With A Jetpack'
    url = 'http://gerbilwithajetpack.com'
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_a_rel_next

        return get_soup_at_url(cls.url).find('a', title="First")

    @classmethod
    def get_navi_link(cls, last_soup, next_):
        """Get link to next or previous comic."""
        return last_soup.find('a', title='Next' if next_ else 'Previous')

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h1').string
        date_str = soup.find('span', class_='date').string.strip()
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
        return {
            'title': title,
            'img': [i['src'] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }


class ChuckleADuck(GenericNavigableComic):
    """Class to retrieve Chuckle-A-Duck comics."""
    name = 'chuckleaduck'
    long_name = 'Chuckle-A-duck'
    url = 'http://chuckleaduck.com'
    get_first_comic_link = get_div_navfirst_a
    get_navi_link = get_link_rel_next


    long_name = "Every Day Blues"
    url = "http://everydayblues.net"
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find("h2", class_="post-title").string
        author = soup.find("span", class_="post-author").find("a").string
        date_str = soup.find("span", class_="post-date").string
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
        imgs = soup.find("div", id="comic").find_all("img")
        assert all(i['alt'] == i['title'] == title for i in imgs)
        assert len(imgs) <= 1
        return {
            'img': [i['src'] for i in imgs],
            'title': title,
            'author': author,
            'day': day.day,
            'month': day.month,
            'year': day.year
        }


class BiterComics(GenericNavigableComic):
    """Class to retrieve Biter Comics."""
    name = "biter"
    long_name = "Biter Comics"
    url = "http://www.bitercomics.com"
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_link_rel_next


            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
            'title': title,
        }


class DiscoBleach(GenericEmptyComic):  # Does not work anymore
    """Class to retrieve Disco Bleach Comics."""
    name = 'discobleach'
    long_name = 'Disco Bleach'
    url = 'http://discobleach.com'


class TubeyToons(GenericEmptyComic):  # Does not work anymore
    """Class to retrieve TubeyToons comics."""
    # Also on http://tapastic.com/series/Tubey-Toons
    # Also on http://tubeytoons.tumblr.com
    name = 'tubeytoons'
    long_name = 'Tubey Toons'
    url = 'http://tubeytoons.com'
    _categories = ('TUNEYTOONS', )


class CompletelySeriousComics(GenericNavigableComic):
    """Class to retrieve Completely Serious comics."""
    name = 'completelyserious'
    long_name = 'Completely Serious Comics'
    url = 'http://completelyseriouscomics.com'
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_a_navi_navinext

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""

        return {
            'day': day.day,
            'month': day.month,
            'year': day.year,
            'title': title,
            'title2': title2,
            'description': description,
            'tags': tags,
            'img': [i['src'] for i in imgs],
            'alt': ' '.join(i['alt'] for i in imgs),
        }

    @classmethod
    def get_url_from_archive_element(cls, tr):
        _, td2, td3 = tr.find_all('td')
        return td2.find('a')['href']

    @classmethod
    def get_archive_elements(cls):
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))


class HappleTea(GenericNavigableComic):
    """Class to retrieve Happle Tea Comics."""
    name = 'happletea'
    long_name = 'Happle Tea'
    url = 'http://www.happletea.com'
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_link_rel_next

    @classmethod

        title = soup.find('meta', property='og:title')['content']
        imgs = soup.find('div', class_='entry-content').find_all('img')
        title2 = ' '.join(i.get('title', '') for i in imgs)
        return {
            'title': title,
            'title2': title2,
            'description': desc,
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
        }


class CommitStripFr(GenericCommitStrip):
    """Class to retrieve Commit Strips in French."""
    name = 'commit_fr'
    long_name = 'Commit Strip (Fr)'
    url = 'http://www.commitstrip.com/fr'
    _categories = ('FRANCAIS', )
    first_url = 'http://www.commitstrip.com/fr/2012/02/22/interview/'


class CommitStripEn(GenericCommitStrip):
    """Class to retrieve Commit Strips in English."""
    name = 'commit_en'
    long_name = 'Commit Strip (En)'
    url = 'http://www.commitstrip.com/en'
    first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'


class GenericBoumerie(GenericNavigableComic):
    """Generic class to retrieve Boumeries comics in different languages."""
    get_first_comic_link = get_a_navi_navifirst

    def get_next_comic(cls, last_comic):
        """Implementation of get_next_comic returning no comics."""
        cls.log("comic is considered as empty - returning no comic")
        return []


class ExtraFabulousComics(GenericNavigableComic):
    """Class to retrieve Extra Fabulous Comics."""
    name = 'efc'
    long_name = 'Extra Fabulous Comics'
    url = 'http://extrafabulouscomics.com'
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
        imgs = soup.find_all('img', src=img_src_re)
        title = soup.find('meta', property='og:title')['content']
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        return {
            'title': title,
            'img': [i['src'] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
            'prefix': title + '-'

		@@ 1781-1806 (lines=26) @@
1778		"""Get information about a particular comics."""
1779		title = soup.find('meta', property='og:title')['content']
1780		author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1781		date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1782		date_str = date_str[:10]
1783		day = string_to_date(date_str, "%Y-%m-%d")
1784		imgs = soup.find_all('meta', property='og:image')
1785		skip_imgs = {
1786		'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1787		'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1788		}
1789		return {
1790		'title': title,
1791		'author': author,
1792		'day': day.day,
1793		'month': day.month,
1794		'year': day.year,
1795		'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1796		}
1797
1798
1799		class SafelyEndangered(GenericNavigableComic):
1800		"""Class to retrieve Safely Endangered comics."""
1801		# Also on http://tumblr.safelyendangered.com
1802		name = 'endangered'
1803		long_name = 'Safely Endangered'
1804		url = 'http://www.safelyendangered.com'
1805		get_navi_link = get_link_rel_next
1806		get_first_comic_link = simulate_first_link
1807		first_url = 'http://www.safelyendangered.com/comic/ignored/'
1808
1809		@classmethod
		@@ 1810-1836 (lines=27) @@
1807		first_url = 'http://www.safelyendangered.com/comic/ignored/'
1808
1809		@classmethod
1810		def get_comic_info(cls, soup, link):
1811		"""Get information about a particular comics."""
1812		title = soup.find('h2', class_='post-title').string
1813		date_str = soup.find('span', class_='post-date').string
1814		day = string_to_date(date_str, '%B %d, %Y')
1815		imgs = soup.find('div', id='comic').find_all('img')
1816		alt = imgs[0]['alt']
1817		assert all(i['alt'] == i['title'] for i in imgs)
1818		return {
1819		'day': day.day,
1820		'month': day.month,
1821		'year': day.year,
1822		'img': [i['src'] for i in imgs],
1823		'title': title,
1824		'alt': alt,
1825		}
1826
1827
1828		class PicturesInBoxes(GenericNavigableComic):
1829		"""Class to retrieve Pictures In Boxes comics."""
1830		# Also on http://picturesinboxescomic.tumblr.com
1831		name = 'picturesinboxes'
1832		long_name = 'Pictures in Boxes'
1833		url = 'http://www.picturesinboxes.com'
1834		get_navi_link = get_a_navi_navinext
1835		get_first_comic_link = simulate_first_link
1836		first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'
1837
1838		@classmethod
1839		def get_comic_info(cls, soup, link):
		@@ 2518-2545 (lines=28) @@
2515		url = "http://lastplacecomics.com"
2516		get_first_comic_link = get_a_comicnavbase_comicnavfirst
2517		get_navi_link = get_link_rel_next
2518
2519		@classmethod
2520		def get_comic_info(cls, soup, link):
2521		"""Get information about a particular comics."""
2522		title = soup.find('h2', class_='post-title').string
2523		author = soup.find("span", class_="post-author").find("a").string
2524		date_str = soup.find("span", class_="post-date").string
2525		day = string_to_date(date_str, "%B %d, %Y")
2526		imgs = soup.find("div", id="comic").find_all("img")
2527		assert all(i['alt'] == i['title'] for i in imgs)
2528		assert len(imgs) <= 1
2529		alt = imgs[0]['alt'] if imgs else ""
2530		return {
2531		'img': [i['src'] for i in imgs],
2532		'title': title,
2533		'alt': alt,
2534		'author': author,
2535		'day': day.day,
2536		'month': day.month,
2537		'year': day.year
2538		}
2539
2540
2541		class TalesOfAbsurdity(GenericNavigableComic):
2542		"""Class to retrieve Tales Of Absurdity comics."""
2543		# Also on http://tapastic.com/series/Tales-Of-Absurdity
2544		# Also on http://talesofabsurdity.tumblr.com
2545		name = 'absurdity'
2546		long_name = 'Tales of Absurdity'
2547		url = 'http://talesofabsurdity.com'
2548		_categories = ('ABSURDITY', )
		@@ 2738-2764 (lines=27) @@
2735		name = 'unearthed'
2736		long_name = 'Unearthed Comics'
2737		url = 'http://unearthedcomics.com'
2738		_categories = ('UNEARTHED', )
2739		get_navi_link = get_link_rel_next
2740		get_first_comic_link = simulate_first_link
2741		first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'
2742
2743		@classmethod
2744		def get_comic_info(cls, soup, link):
2745		"""Get information about a particular comics."""
2746		short_url = soup.find('link', rel='shortlink')['href']
2747		title_elt = soup.find('h1') or soup.find('h2')
2748		title = title_elt.string if title_elt else ""
2749		desc = soup.find('meta', property='og:description')
2750		date_str = soup.find('time', class_='published updated hidden')['datetime']
2751		day = string_to_date(date_str, "%Y-%m-%d")
2752		post = soup.find('div', class_="entry content entry-content type-portfolio")
2753		imgs = post.find_all('img')
2754		return {
2755		'title': title,
2756		'description': desc,
2757		'url2': short_url,
2758		'img': [i['src'] for i in imgs],
2759		'month': day.month,
2760		'year': day.year,
2761		'day': day.day,
2762		}
2763
2764
2765		class Optipess(GenericNavigableComic):
2766		"""Class to retrieve Optipess comics."""
2767		name = 'optipess'
		@@ 2488-2514 (lines=27) @@
2485		url = 'http://www.mister-and-me.com'
2486		get_first_comic_link = get_a_comicnavbase_comicnavfirst
2487		get_navi_link = get_link_rel_next
2488
2489		@classmethod
2490		def get_comic_info(cls, soup, link):
2491		"""Get information about a particular comics."""
2492		title = soup.find('h2', class_='post-title').string
2493		author = soup.find("span", class_="post-author").find("a").string
2494		date_str = soup.find("span", class_="post-date").string
2495		day = string_to_date(date_str, "%B %d, %Y")
2496		imgs = soup.find("div", id="comic").find_all("img")
2497		assert all(i['alt'] == i['title'] for i in imgs)
2498		assert len(imgs) <= 1
2499		alt = imgs[0]['alt'] if imgs else ""
2500		return {
2501		'img': [i['src'] for i in imgs],
2502		'title': title,
2503		'alt': alt,
2504		'author': author,
2505		'day': day.day,
2506		'month': day.month,
2507		'year': day.year
2508		}
2509
2510
2511		class LastPlaceComics(GenericNavigableComic):
2512		"""Class to retrieve Last Place Comics."""
2513		name = 'lastplace'
2514		long_name = 'Last Place Comics'
2515		url = "http://lastplacecomics.com"
2516		get_first_comic_link = get_a_comicnavbase_comicnavfirst
2517		get_navi_link = get_link_rel_next
		@@ 2321-2346 (lines=26) @@
2318		return None
2319
2320		@classmethod
2321		def get_comic_info(cls, soup, link):
2322		"""Get information about a particular comics."""
2323		title = soup.find('meta', attrs={'name': 'description'})["content"]
2324		description = soup.find('div', itemprop='articleBody').text
2325		author = soup.find('span', itemprop='author copyrightHolder').string
2326		imgs = soup.find_all('img', itemprop='image')
2327		assert all(i['title'] == i['alt'] for i in imgs)
2328		alt = imgs[0]['alt'] if imgs else ""
2329		date_str = soup.find('time', itemprop='datePublished')["datetime"]
2330		day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2331		return {
2332		'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2333		'month': day.month,
2334		'year': day.year,
2335		'day': day.day,
2336		'author': author,
2337		'title': title,
2338		'alt': alt,
2339		'description': description,
2340		}
2341
2342
2343		class GerbilWithAJetpack(GenericNavigableComic):
2344		"""Class to retrieve GerbilWithAJetpack comics."""
2345		name = 'gerbil'
2346		long_name = 'Gerbil With A Jetpack'
2347		url = 'http://gerbilwithajetpack.com'
2348		get_first_comic_link = get_a_navi_navifirst
2349		get_navi_link = get_a_rel_next
		@@ 2019-2043 (lines=25) @@
2016		return get_soup_at_url(cls.url).find('a', title="First")
2017
2018		@classmethod
2019		def get_navi_link(cls, last_soup, next_):
2020		"""Get link to next or previous comic."""
2021		return last_soup.find('a', title='Next' if next_ else 'Previous')
2022
2023		@classmethod
2024		def get_comic_info(cls, soup, link):
2025		"""Get information about a particular comics."""
2026		title = soup.find('h1').string
2027		date_str = soup.find('span', class_='date').string.strip()
2028		day = string_to_date(date_str, "%B %d, %Y")
2029		imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
2030		return {
2031		'title': title,
2032		'img': [i['src'] for i in imgs],
2033		'month': day.month,
2034		'year': day.year,
2035		'day': day.day,
2036		}
2037
2038
2039		class ChuckleADuck(GenericNavigableComic):
2040		"""Class to retrieve Chuckle-A-Duck comics."""
2041		name = 'chuckleaduck'
2042		long_name = 'Chuckle-A-duck'
2043		url = 'http://chuckleaduck.com'
2044		get_first_comic_link = get_div_navfirst_a
2045		get_navi_link = get_link_rel_next
2046
		@@ 2378-2404 (lines=27) @@
2375		long_name = "Every Day Blues"
2376		url = "http://everydayblues.net"
2377		get_first_comic_link = get_a_navi_navifirst
2378		get_navi_link = get_link_rel_next
2379
2380		@classmethod
2381		def get_comic_info(cls, soup, link):
2382		"""Get information about a particular comics."""
2383		title = soup.find("h2", class_="post-title").string
2384		author = soup.find("span", class_="post-author").find("a").string
2385		date_str = soup.find("span", class_="post-date").string
2386		day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2387		imgs = soup.find("div", id="comic").find_all("img")
2388		assert all(i['alt'] == i['title'] == title for i in imgs)
2389		assert len(imgs) <= 1
2390		return {
2391		'img': [i['src'] for i in imgs],
2392		'title': title,
2393		'author': author,
2394		'day': day.day,
2395		'month': day.month,
2396		'year': day.year
2397		}
2398
2399
2400		class BiterComics(GenericNavigableComic):
2401		"""Class to retrieve Biter Comics."""
2402		name = "biter"
2403		long_name = "Biter Comics"
2404		url = "http://www.bitercomics.com"
2405		get_first_comic_link = get_a_navi_navifirst
2406		get_navi_link = get_link_rel_next
2407
		@@ 1932-1958 (lines=27) @@
1929		'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1930		'title': title,
1931		}
1932
1933
1934		class DiscoBleach(GenericEmptyComic): # Does not work anymore
1935		"""Class to retrieve Disco Bleach Comics."""
1936		name = 'discobleach'
1937		long_name = 'Disco Bleach'
1938		url = 'http://discobleach.com'
1939
1940
1941		class TubeyToons(GenericEmptyComic): # Does not work anymore
1942		"""Class to retrieve TubeyToons comics."""
1943		# Also on http://tapastic.com/series/Tubey-Toons
1944		# Also on http://tubeytoons.tumblr.com
1945		name = 'tubeytoons'
1946		long_name = 'Tubey Toons'
1947		url = 'http://tubeytoons.com'
1948		_categories = ('TUNEYTOONS', )
1949
1950
1951		class CompletelySeriousComics(GenericNavigableComic):
1952		"""Class to retrieve Completely Serious comics."""
1953		name = 'completelyserious'
1954		long_name = 'Completely Serious Comics'
1955		url = 'http://completelyseriouscomics.com'
1956		get_first_comic_link = get_a_navi_navifirst
1957		get_navi_link = get_a_navi_navinext
1958
1959		@classmethod
1960		def get_comic_info(cls, soup, link):
1961		"""Get information about a particular comics."""
		@@ 2119-2144 (lines=26) @@
2116		return {
2117		'day': day.day,
2118		'month': day.month,
2119		'year': day.year,
2120		'title': title,
2121		'title2': title2,
2122		'description': description,
2123		'tags': tags,
2124		'img': [i['src'] for i in imgs],
2125		'alt': ' '.join(i['alt'] for i in imgs),
2126		}
2127
2128		@classmethod
2129		def get_url_from_archive_element(cls, tr):
2130		_, td2, td3 = tr.find_all('td')
2131		return td2.find('a')['href']
2132
2133		@classmethod
2134		def get_archive_elements(cls):
2135		archive_url = urljoin_wrapper(cls.url, 'archive-2')
2136		return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2137
2138
2139		class HappleTea(GenericNavigableComic):
2140		"""Class to retrieve Happle Tea Comics."""
2141		name = 'happletea'
2142		long_name = 'Happle Tea'
2143		url = 'http://www.happletea.com'
2144		get_first_comic_link = get_a_navi_navifirst
2145		get_navi_link = get_link_rel_next
2146
2147		@classmethod
		@@ 2659-2683 (lines=25) @@
2656		title = soup.find('meta', property='og:title')['content']
2657		imgs = soup.find('div', class_='entry-content').find_all('img')
2658		title2 = ' '.join(i.get('title', '') for i in imgs)
2659		return {
2660		'title': title,
2661		'title2': title2,
2662		'description': desc,
2663		'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2664		}
2665
2666
2667		class CommitStripFr(GenericCommitStrip):
2668		"""Class to retrieve Commit Strips in French."""
2669		name = 'commit_fr'
2670		long_name = 'Commit Strip (Fr)'
2671		url = 'http://www.commitstrip.com/fr'
2672		_categories = ('FRANCAIS', )
2673		first_url = 'http://www.commitstrip.com/fr/2012/02/22/interview/'
2674
2675
2676		class CommitStripEn(GenericCommitStrip):
2677		"""Class to retrieve Commit Strips in English."""
2678		name = 'commit_en'
2679		long_name = 'Commit Strip (En)'
2680		url = 'http://www.commitstrip.com/en'
2681		first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'
2682
2683
2684		class GenericBoumerie(GenericNavigableComic):
2685		"""Generic class to retrieve Boumeries comics in different languages."""
2686		get_first_comic_link = get_a_navi_navifirst
		@@ 338-360 (lines=23) @@
335		def get_next_comic(cls, last_comic):
336		"""Implementation of get_next_comic returning no comics."""
337		cls.log("comic is considered as empty - returning no comic")
338		return []
339
340
341		class ExtraFabulousComics(GenericNavigableComic):
342		"""Class to retrieve Extra Fabulous Comics."""
343		name = 'efc'
344		long_name = 'Extra Fabulous Comics'
345		url = 'http://extrafabulouscomics.com'
346		get_first_comic_link = get_a_navi_navifirst
347		get_navi_link = get_link_rel_next
348
349		@classmethod
350		def get_comic_info(cls, soup, link):
351		"""Get information about a particular comics."""
352		img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
353		imgs = soup.find_all('img', src=img_src_re)
354		title = soup.find('meta', property='og:title')['content']
355		date_str = soup.find('meta', property='article:published_time')['content'][:10]
356		day = string_to_date(date_str, "%Y-%m-%d")
357		return {
358		'title': title,
359		'img': [i['src'] for i in imgs],
360		'month': day.month,
361		'year': day.year,
362		'day': day.day,
363		'prefix': title + '-'

SylvainDe / ComicBookMaker

Code Duplication Length = 23-28 lines in 12 locations

comics.py 12 locations