Code Duplication - SylvainDe/ComicBookMaker - Measure and Improve Code Quality continuously with Scrutinizer

Code Duplication Length = 23-28 lines in 12 locations

comics.py 12 locations


    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('meta', property='og:title')['content']
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
        date_str = date_str[:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        imgs = soup.find_all('meta', property='og:image')
        skip_imgs = {
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
        }
        return {
            'title': title,
            'author': author,
            'day': day.day,
            'month': day.month,
            'year': day.year,
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
        }


class SafelyEndangered(GenericNavigableComic):
    """Class to retrieve Safely Endangered comics."""
    # Also on http://tumblr.safelyendangered.com
    name = 'endangered'
    long_name = 'Safely Endangered'
    url = 'http://www.safelyendangered.com'
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'http://www.safelyendangered.com/comic/ignored/'

    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'http://www.safelyendangered.com/comic/ignored/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        date_str = soup.find('span', class_='post-date').string
        day = string_to_date(date_str, '%B %d, %Y')
        imgs = soup.find('div', id='comic').find_all('img')
        alt = imgs[0]['alt']
        assert all(i['alt'] == i['title'] for i in imgs)
        return {
            'day': day.day,
            'month': day.month,
            'year': day.year,
            'img': [i['src'] for i in imgs],
            'title': title,
            'alt': alt,
        }


class PicturesInBoxes(GenericNavigableComic):
    """Class to retrieve Pictures In Boxes comics."""
    # Also on http://picturesinboxescomic.tumblr.com
    name = 'picturesinboxes'
    long_name = 'Pictures in Boxes'
    url = 'http://www.picturesinboxes.com'
    get_navi_link = get_a_navi_navinext
    get_first_comic_link = simulate_first_link
    first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'


    """Class to retrieve Last Place Comics."""
    name = 'lastplace'
    long_name = 'Last Place Comics'
    url = "http://lastplacecomics.com"
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        author = soup.find("span", class_="post-author").find("a").string
        date_str = soup.find("span", class_="post-date").string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find("div", id="comic").find_all("img")
        assert all(i['alt'] == i['title'] for i in imgs)
        assert len(imgs) <= 1
        alt = imgs[0]['alt'] if imgs else ""
        return {
            'img': [i['src'] for i in imgs],
            'title': title,
            'alt': alt,
            'author': author,
            'day': day.day,
            'month': day.month,
            'year': day.year
        }


class TalesOfAbsurdity(GenericNavigableComic):
    """Class to retrieve Tales Of Absurdity comics."""
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
    # Also on http://talesofabsurdity.tumblr.com
    name = 'absurdity'

    """Class to retrieve Unearthed comics."""
    # Also on http://tapastic.com/series/UnearthedComics
    # Also on http://unearthedcomics.tumblr.com
    name = 'unearthed'
    long_name = 'Unearthed Comics'
    url = 'http://unearthedcomics.com'
    _categories = ('UNEARTHED', )
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        short_url = soup.find('link', rel='shortlink')['href']
        title_elt = soup.find('h1') or soup.find('h2')
        title = title_elt.string if title_elt else ""
        desc = soup.find('meta', property='og:description')
        date_str = soup.find('time', class_='published updated hidden')['datetime']
        day = string_to_date(date_str, "%Y-%m-%d")
        post = soup.find('div', class_="entry content entry-content type-portfolio")
        imgs = post.find_all('img')
        return {
            'title': title,
            'description': desc,
            'url2': short_url,
            'img': [i['src'] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }



    # Also on https://tapastic.com/series/Mister-and-Me
    name = 'mister'
    long_name = 'Mister & Me'
    url = 'http://www.mister-and-me.com'
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        author = soup.find("span", class_="post-author").find("a").string
        date_str = soup.find("span", class_="post-date").string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find("div", id="comic").find_all("img")
        assert all(i['alt'] == i['title'] for i in imgs)
        assert len(imgs) <= 1
        alt = imgs[0]['alt'] if imgs else ""
        return {
            'img': [i['src'] for i in imgs],
            'title': title,
            'alt': alt,
            'author': author,
            'day': day.day,
            'month': day.month,
            'year': day.year
        }


class LastPlaceComics(GenericNavigableComic):
    """Class to retrieve Last Place Comics."""
    name = 'lastplace'
    long_name = 'Last Place Comics'

        for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
            if link['href'] != '/comic':
                return link
        return None

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('meta', attrs={'name': 'description'})["content"]
        description = soup.find('div', itemprop='articleBody').text
        author = soup.find('span', itemprop='author copyrightHolder').string
        imgs = soup.find_all('img', itemprop='image')
        assert all(i['title'] == i['alt'] for i in imgs)
        alt = imgs[0]['alt'] if imgs else ""
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
        return {
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
            'author': author,
            'title': title,
            'alt': alt,
            'description': description,
        }


class GerbilWithAJetpack(GenericNavigableComic):
    """Class to retrieve GerbilWithAJetpack comics."""
    name = 'gerbil'
    long_name = 'Gerbil With A Jetpack'

    def get_first_comic_link(cls):
        """Get link to first comics."""
        return get_soup_at_url(cls.url).find('a', title="First")

    @classmethod
    def get_navi_link(cls, last_soup, next_):
        """Get link to next or previous comic."""
        return last_soup.find('a', title='Next' if next_ else 'Previous')

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h1').string
        date_str = soup.find('span', class_='date').string.strip()
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
        return {
            'title': title,
            'img': [i['src'] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }


class ChuckleADuck(GenericNavigableComic):
    """Class to retrieve Chuckle-A-Duck comics."""
    name = 'chuckleaduck'
    long_name = 'Chuckle-A-duck'
    url = 'http://chuckleaduck.com'
    get_first_comic_link = get_div_navfirst_a

class EveryDayBlues(GenericNavigableComic):
    """Class to retrieve EveryDayBlues Comics."""
    name = "blues"
    long_name = "Every Day Blues"
    url = "http://everydayblues.net"
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find("h2", class_="post-title").string
        author = soup.find("span", class_="post-author").find("a").string
        date_str = soup.find("span", class_="post-date").string
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
        imgs = soup.find("div", id="comic").find_all("img")
        assert all(i['alt'] == i['title'] == title for i in imgs)
        assert len(imgs) <= 1
        return {
            'img': [i['src'] for i in imgs],
            'title': title,
            'author': author,
            'day': day.day,
            'month': day.month,
            'year': day.year
        }


class BiterComics(GenericNavigableComic):
    """Class to retrieve Biter Comics."""
    name = "biter"
    long_name = "Biter Comics"
    url = "http://www.bitercomics.com"

            'year': day.year,
            'day': day.day,
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
            'title': title,
        }


class DiscoBleach(GenericEmptyComic):  # Does not work anymore
    """Class to retrieve Disco Bleach Comics."""
    name = 'discobleach'
    long_name = 'Disco Bleach'
    url = 'http://discobleach.com'


class TubeyToons(GenericEmptyComic):  # Does not work anymore
    """Class to retrieve TubeyToons comics."""
    # Also on http://tapastic.com/series/Tubey-Toons
    # Also on http://tubeytoons.tumblr.com
    name = 'tubeytoons'
    long_name = 'Tubey Toons'
    url = 'http://tubeytoons.com'
    _categories = ('TUNEYTOONS', )


class CompletelySeriousComics(GenericNavigableComic):
    """Class to retrieve Completely Serious comics."""
    name = 'completelyserious'
    long_name = 'Completely Serious Comics'
    url = 'http://completelyseriouscomics.com'
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_a_navi_navinext

    @classmethod

        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
        imgs = soup.find('div', class_='entry-content').find_all('img')
        return {
            'day': day.day,
            'month': day.month,
            'year': day.year,
            'title': title,
            'title2': title2,
            'description': description,
            'tags': tags,
            'img': [i['src'] for i in imgs],
            'alt': ' '.join(i['alt'] for i in imgs),
        }

    @classmethod
    def get_url_from_archive_element(cls, tr):
        _, td2, td3 = tr.find_all('td')
        return td2.find('a')['href']

    @classmethod
    def get_archive_elements(cls):
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))


class HappleTea(GenericNavigableComic):
    """Class to retrieve Happle Tea Comics."""
    name = 'happletea'
    long_name = 'Happle Tea'
    url = 'http://www.happletea.com'
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_link_rel_next

    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        desc = soup.find('meta', property='og:description')['content']
        title = soup.find('meta', property='og:title')['content']
        imgs = soup.find('div', class_='entry-content').find_all('img')
        title2 = ' '.join(i.get('title', '') for i in imgs)
        return {
            'title': title,
            'title2': title2,
            'description': desc,
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
        }


class CommitStripFr(GenericCommitStrip):
    """Class to retrieve Commit Strips in French."""
    name = 'commit_fr'
    long_name = 'Commit Strip (Fr)'
    url = 'http://www.commitstrip.com/fr'
    _categories = ('FRANCAIS', )
    first_url = 'http://www.commitstrip.com/fr/2012/02/22/interview/'


class CommitStripEn(GenericCommitStrip):
    """Class to retrieve Commit Strips in English."""
    name = 'commit_en'
    long_name = 'Commit Strip (En)'
    url = 'http://www.commitstrip.com/en'
    first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'



    def get_next_comic(cls, last_comic):
        """Implementation of get_next_comic returning no comics."""
        cls.log("comic is considered as empty - returning no comic")
        return []


class ExtraFabulousComics(GenericNavigableComic):
    """Class to retrieve Extra Fabulous Comics."""
    name = 'efc'
    long_name = 'Extra Fabulous Comics'
    url = 'http://extrafabulouscomics.com'
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
        imgs = soup.find_all('img', src=img_src_re)
        title = soup.find('meta', property='og:title')['content']
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        return {
            'title': title,
            'img': [i['src'] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
            'prefix': title + '-'

		@@ 1781-1806 (lines=26) @@
1778		@classmethod
1779		def get_comic_info(cls, soup, link):
1780		"""Get information about a particular comics."""
1781		title = soup.find('meta', property='og:title')['content']
1782		author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1783		date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1784		date_str = date_str[:10]
1785		day = string_to_date(date_str, "%Y-%m-%d")
1786		imgs = soup.find_all('meta', property='og:image')
1787		skip_imgs = {
1788		'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1789		'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1790		}
1791		return {
1792		'title': title,
1793		'author': author,
1794		'day': day.day,
1795		'month': day.month,
1796		'year': day.year,
1797		'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1798		}
1799
1800
1801		class SafelyEndangered(GenericNavigableComic):
1802		"""Class to retrieve Safely Endangered comics."""
1803		# Also on http://tumblr.safelyendangered.com
1804		name = 'endangered'
1805		long_name = 'Safely Endangered'
1806		url = 'http://www.safelyendangered.com'
1807		get_navi_link = get_link_rel_next
1808		get_first_comic_link = simulate_first_link
1809		first_url = 'http://www.safelyendangered.com/comic/ignored/'
		@@ 1810-1836 (lines=27) @@
1807		get_navi_link = get_link_rel_next
1808		get_first_comic_link = simulate_first_link
1809		first_url = 'http://www.safelyendangered.com/comic/ignored/'
1810
1811		@classmethod
1812		def get_comic_info(cls, soup, link):
1813		"""Get information about a particular comics."""
1814		title = soup.find('h2', class_='post-title').string
1815		date_str = soup.find('span', class_='post-date').string
1816		day = string_to_date(date_str, '%B %d, %Y')
1817		imgs = soup.find('div', id='comic').find_all('img')
1818		alt = imgs[0]['alt']
1819		assert all(i['alt'] == i['title'] for i in imgs)
1820		return {
1821		'day': day.day,
1822		'month': day.month,
1823		'year': day.year,
1824		'img': [i['src'] for i in imgs],
1825		'title': title,
1826		'alt': alt,
1827		}
1828
1829
1830		class PicturesInBoxes(GenericNavigableComic):
1831		"""Class to retrieve Pictures In Boxes comics."""
1832		# Also on http://picturesinboxescomic.tumblr.com
1833		name = 'picturesinboxes'
1834		long_name = 'Pictures in Boxes'
1835		url = 'http://www.picturesinboxes.com'
1836		get_navi_link = get_a_navi_navinext
1837		get_first_comic_link = simulate_first_link
1838		first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'
1839
		@@ 2518-2545 (lines=28) @@
2515		"""Class to retrieve Last Place Comics."""
2516		name = 'lastplace'
2517		long_name = 'Last Place Comics'
2518		url = "http://lastplacecomics.com"
2519		get_first_comic_link = get_a_comicnavbase_comicnavfirst
2520		get_navi_link = get_link_rel_next
2521
2522		@classmethod
2523		def get_comic_info(cls, soup, link):
2524		"""Get information about a particular comics."""
2525		title = soup.find('h2', class_='post-title').string
2526		author = soup.find("span", class_="post-author").find("a").string
2527		date_str = soup.find("span", class_="post-date").string
2528		day = string_to_date(date_str, "%B %d, %Y")
2529		imgs = soup.find("div", id="comic").find_all("img")
2530		assert all(i['alt'] == i['title'] for i in imgs)
2531		assert len(imgs) <= 1
2532		alt = imgs[0]['alt'] if imgs else ""
2533		return {
2534		'img': [i['src'] for i in imgs],
2535		'title': title,
2536		'alt': alt,
2537		'author': author,
2538		'day': day.day,
2539		'month': day.month,
2540		'year': day.year
2541		}
2542
2543
2544		class TalesOfAbsurdity(GenericNavigableComic):
2545		"""Class to retrieve Tales Of Absurdity comics."""
2546		# Also on http://tapastic.com/series/Tales-Of-Absurdity
2547		# Also on http://talesofabsurdity.tumblr.com
2548		name = 'absurdity'
		@@ 2738-2764 (lines=27) @@
2735		"""Class to retrieve Unearthed comics."""
2736		# Also on http://tapastic.com/series/UnearthedComics
2737		# Also on http://unearthedcomics.tumblr.com
2738		name = 'unearthed'
2739		long_name = 'Unearthed Comics'
2740		url = 'http://unearthedcomics.com'
2741		_categories = ('UNEARTHED', )
2742		get_navi_link = get_link_rel_next
2743		get_first_comic_link = simulate_first_link
2744		first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'
2745
2746		@classmethod
2747		def get_comic_info(cls, soup, link):
2748		"""Get information about a particular comics."""
2749		short_url = soup.find('link', rel='shortlink')['href']
2750		title_elt = soup.find('h1') or soup.find('h2')
2751		title = title_elt.string if title_elt else ""
2752		desc = soup.find('meta', property='og:description')
2753		date_str = soup.find('time', class_='published updated hidden')['datetime']
2754		day = string_to_date(date_str, "%Y-%m-%d")
2755		post = soup.find('div', class_="entry content entry-content type-portfolio")
2756		imgs = post.find_all('img')
2757		return {
2758		'title': title,
2759		'description': desc,
2760		'url2': short_url,
2761		'img': [i['src'] for i in imgs],
2762		'month': day.month,
2763		'year': day.year,
2764		'day': day.day,
2765		}
2766
2767
		@@ 2488-2514 (lines=27) @@
2485		# Also on https://tapastic.com/series/Mister-and-Me
2486		name = 'mister'
2487		long_name = 'Mister & Me'
2488		url = 'http://www.mister-and-me.com'
2489		get_first_comic_link = get_a_comicnavbase_comicnavfirst
2490		get_navi_link = get_link_rel_next
2491
2492		@classmethod
2493		def get_comic_info(cls, soup, link):
2494		"""Get information about a particular comics."""
2495		title = soup.find('h2', class_='post-title').string
2496		author = soup.find("span", class_="post-author").find("a").string
2497		date_str = soup.find("span", class_="post-date").string
2498		day = string_to_date(date_str, "%B %d, %Y")
2499		imgs = soup.find("div", id="comic").find_all("img")
2500		assert all(i['alt'] == i['title'] for i in imgs)
2501		assert len(imgs) <= 1
2502		alt = imgs[0]['alt'] if imgs else ""
2503		return {
2504		'img': [i['src'] for i in imgs],
2505		'title': title,
2506		'alt': alt,
2507		'author': author,
2508		'day': day.day,
2509		'month': day.month,
2510		'year': day.year
2511		}
2512
2513
2514		class LastPlaceComics(GenericNavigableComic):
2515		"""Class to retrieve Last Place Comics."""
2516		name = 'lastplace'
2517		long_name = 'Last Place Comics'
		@@ 2321-2346 (lines=26) @@
2318		for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
2319		if link['href'] != '/comic':
2320		return link
2321		return None
2322
2323		@classmethod
2324		def get_comic_info(cls, soup, link):
2325		"""Get information about a particular comics."""
2326		title = soup.find('meta', attrs={'name': 'description'})["content"]
2327		description = soup.find('div', itemprop='articleBody').text
2328		author = soup.find('span', itemprop='author copyrightHolder').string
2329		imgs = soup.find_all('img', itemprop='image')
2330		assert all(i['title'] == i['alt'] for i in imgs)
2331		alt = imgs[0]['alt'] if imgs else ""
2332		date_str = soup.find('time', itemprop='datePublished')["datetime"]
2333		day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2334		return {
2335		'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2336		'month': day.month,
2337		'year': day.year,
2338		'day': day.day,
2339		'author': author,
2340		'title': title,
2341		'alt': alt,
2342		'description': description,
2343		}
2344
2345
2346		class GerbilWithAJetpack(GenericNavigableComic):
2347		"""Class to retrieve GerbilWithAJetpack comics."""
2348		name = 'gerbil'
2349		long_name = 'Gerbil With A Jetpack'
		@@ 2019-2043 (lines=25) @@
2016		def get_first_comic_link(cls):
2017		"""Get link to first comics."""
2018		return get_soup_at_url(cls.url).find('a', title="First")
2019
2020		@classmethod
2021		def get_navi_link(cls, last_soup, next_):
2022		"""Get link to next or previous comic."""
2023		return last_soup.find('a', title='Next' if next_ else 'Previous')
2024
2025		@classmethod
2026		def get_comic_info(cls, soup, link):
2027		"""Get information about a particular comics."""
2028		title = soup.find('h1').string
2029		date_str = soup.find('span', class_='date').string.strip()
2030		day = string_to_date(date_str, "%B %d, %Y")
2031		imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
2032		return {
2033		'title': title,
2034		'img': [i['src'] for i in imgs],
2035		'month': day.month,
2036		'year': day.year,
2037		'day': day.day,
2038		}
2039
2040
2041		class ChuckleADuck(GenericNavigableComic):
2042		"""Class to retrieve Chuckle-A-Duck comics."""
2043		name = 'chuckleaduck'
2044		long_name = 'Chuckle-A-duck'
2045		url = 'http://chuckleaduck.com'
2046		get_first_comic_link = get_div_navfirst_a
		@@ 2378-2404 (lines=27) @@
2375		class EveryDayBlues(GenericNavigableComic):
2376		"""Class to retrieve EveryDayBlues Comics."""
2377		name = "blues"
2378		long_name = "Every Day Blues"
2379		url = "http://everydayblues.net"
2380		get_first_comic_link = get_a_navi_navifirst
2381		get_navi_link = get_link_rel_next
2382
2383		@classmethod
2384		def get_comic_info(cls, soup, link):
2385		"""Get information about a particular comics."""
2386		title = soup.find("h2", class_="post-title").string
2387		author = soup.find("span", class_="post-author").find("a").string
2388		date_str = soup.find("span", class_="post-date").string
2389		day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2390		imgs = soup.find("div", id="comic").find_all("img")
2391		assert all(i['alt'] == i['title'] == title for i in imgs)
2392		assert len(imgs) <= 1
2393		return {
2394		'img': [i['src'] for i in imgs],
2395		'title': title,
2396		'author': author,
2397		'day': day.day,
2398		'month': day.month,
2399		'year': day.year
2400		}
2401
2402
2403		class BiterComics(GenericNavigableComic):
2404		"""Class to retrieve Biter Comics."""
2405		name = "biter"
2406		long_name = "Biter Comics"
2407		url = "http://www.bitercomics.com"
		@@ 1932-1958 (lines=27) @@
1929		'year': day.year,
1930		'day': day.day,
1931		'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1932		'title': title,
1933		}
1934
1935
1936		class DiscoBleach(GenericEmptyComic): # Does not work anymore
1937		"""Class to retrieve Disco Bleach Comics."""
1938		name = 'discobleach'
1939		long_name = 'Disco Bleach'
1940		url = 'http://discobleach.com'
1941
1942
1943		class TubeyToons(GenericEmptyComic): # Does not work anymore
1944		"""Class to retrieve TubeyToons comics."""
1945		# Also on http://tapastic.com/series/Tubey-Toons
1946		# Also on http://tubeytoons.tumblr.com
1947		name = 'tubeytoons'
1948		long_name = 'Tubey Toons'
1949		url = 'http://tubeytoons.com'
1950		_categories = ('TUNEYTOONS', )
1951
1952
1953		class CompletelySeriousComics(GenericNavigableComic):
1954		"""Class to retrieve Completely Serious comics."""
1955		name = 'completelyserious'
1956		long_name = 'Completely Serious Comics'
1957		url = 'http://completelyseriouscomics.com'
1958		get_first_comic_link = get_a_navi_navifirst
1959		get_navi_link = get_a_navi_navinext
1960
1961		@classmethod
		@@ 2119-2144 (lines=26) @@
2116		tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2117		imgs = soup.find('div', class_='entry-content').find_all('img')
2118		return {
2119		'day': day.day,
2120		'month': day.month,
2121		'year': day.year,
2122		'title': title,
2123		'title2': title2,
2124		'description': description,
2125		'tags': tags,
2126		'img': [i['src'] for i in imgs],
2127		'alt': ' '.join(i['alt'] for i in imgs),
2128		}
2129
2130		@classmethod
2131		def get_url_from_archive_element(cls, tr):
2132		_, td2, td3 = tr.find_all('td')
2133		return td2.find('a')['href']
2134
2135		@classmethod
2136		def get_archive_elements(cls):
2137		archive_url = urljoin_wrapper(cls.url, 'archive-2')
2138		return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2139
2140
2141		class HappleTea(GenericNavigableComic):
2142		"""Class to retrieve Happle Tea Comics."""
2143		name = 'happletea'
2144		long_name = 'Happle Tea'
2145		url = 'http://www.happletea.com'
2146		get_first_comic_link = get_a_navi_navifirst
2147		get_navi_link = get_link_rel_next
		@@ 2659-2683 (lines=25) @@
2656		def get_comic_info(cls, soup, link):
2657		"""Get information about a particular comics."""
2658		desc = soup.find('meta', property='og:description')['content']
2659		title = soup.find('meta', property='og:title')['content']
2660		imgs = soup.find('div', class_='entry-content').find_all('img')
2661		title2 = ' '.join(i.get('title', '') for i in imgs)
2662		return {
2663		'title': title,
2664		'title2': title2,
2665		'description': desc,
2666		'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2667		}
2668
2669
2670		class CommitStripFr(GenericCommitStrip):
2671		"""Class to retrieve Commit Strips in French."""
2672		name = 'commit_fr'
2673		long_name = 'Commit Strip (Fr)'
2674		url = 'http://www.commitstrip.com/fr'
2675		_categories = ('FRANCAIS', )
2676		first_url = 'http://www.commitstrip.com/fr/2012/02/22/interview/'
2677
2678
2679		class CommitStripEn(GenericCommitStrip):
2680		"""Class to retrieve Commit Strips in English."""
2681		name = 'commit_en'
2682		long_name = 'Commit Strip (En)'
2683		url = 'http://www.commitstrip.com/en'
2684		first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'
2685
2686
		@@ 338-360 (lines=23) @@
335		def get_next_comic(cls, last_comic):
336		"""Implementation of get_next_comic returning no comics."""
337		cls.log("comic is considered as empty - returning no comic")
338		return []
339
340
341		class ExtraFabulousComics(GenericNavigableComic):
342		"""Class to retrieve Extra Fabulous Comics."""
343		name = 'efc'
344		long_name = 'Extra Fabulous Comics'
345		url = 'http://extrafabulouscomics.com'
346		get_first_comic_link = get_a_navi_navifirst
347		get_navi_link = get_link_rel_next
348
349		@classmethod
350		def get_comic_info(cls, soup, link):
351		"""Get information about a particular comics."""
352		img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
353		imgs = soup.find_all('img', src=img_src_re)
354		title = soup.find('meta', property='og:title')['content']
355		date_str = soup.find('meta', property='article:published_time')['content'][:10]
356		day = string_to_date(date_str, "%Y-%m-%d")
357		return {
358		'title': title,
359		'img': [i['src'] for i in imgs],
360		'month': day.month,
361		'year': day.year,
362		'day': day.day,
363		'prefix': title + '-'

SylvainDe / ComicBookMaker

Code Duplication Length = 23-28 lines in 12 locations

comics.py 12 locations