Code Duplication - SylvainDe/ComicBookMaker - Measure and Improve Code Quality continuously with Scrutinizer

Code Duplication Length = 23-28 lines in 12 locations

comics.py 12 locations


    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('meta', property='og:title')['content']
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
        date_str = date_str[:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        imgs = soup.find_all('meta', property='og:image')
        skip_imgs = {
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
        }
        return {
            'title': title,
            'author': author,
            'day': day.day,
            'month': day.month,
            'year': day.year,
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
        }


class SafelyEndangered(GenericNavigableComic):
    """Class to retrieve Safely Endangered comics."""
    # Also on http://tumblr.safelyendangered.com
    name = 'endangered'
    long_name = 'Safely Endangered'
    url = 'http://www.safelyendangered.com'
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'http://www.safelyendangered.com/comic/ignored/'

    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'http://www.safelyendangered.com/comic/ignored/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        date_str = soup.find('span', class_='post-date').string
        day = string_to_date(date_str, '%B %d, %Y')
        imgs = soup.find('div', id='comic').find_all('img')
        alt = imgs[0]['alt']
        assert all(i['alt'] == i['title'] for i in imgs)
        return {
            'day': day.day,
            'month': day.month,
            'year': day.year,
            'img': [i['src'] for i in imgs],
            'title': title,
            'alt': alt,
        }


class PicturesInBoxes(GenericNavigableComic):
    """Class to retrieve Pictures In Boxes comics."""
    # Also on http://picturesinboxescomic.tumblr.com
    name = 'picturesinboxes'
    long_name = 'Pictures in Boxes'
    url = 'http://www.picturesinboxes.com'
    get_navi_link = get_a_navi_navinext
    get_first_comic_link = simulate_first_link
    first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'


class LastPlaceComics(GenericNavigableComic):
    """Class to retrieve Last Place Comics."""
    name = 'lastplace'
    long_name = 'Last Place Comics'
    url = "http://lastplacecomics.com"
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        author = soup.find("span", class_="post-author").find("a").string
        date_str = soup.find("span", class_="post-date").string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find("div", id="comic").find_all("img")
        assert all(i['alt'] == i['title'] for i in imgs)
        assert len(imgs) <= 1
        alt = imgs[0]['alt'] if imgs else ""
        return {
            'img': [i['src'] for i in imgs],
            'title': title,
            'alt': alt,
            'author': author,
            'day': day.day,
            'month': day.month,
            'year': day.year
        }


class TalesOfAbsurdity(GenericNavigableComic):
    """Class to retrieve Tales Of Absurdity comics."""
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
    # Also on http://talesofabsurdity.tumblr.com

class UnearthedComics(GenericNavigableComic):
    """Class to retrieve Unearthed comics."""
    # Also on http://tapastic.com/series/UnearthedComics
    # Also on http://unearthedcomics.tumblr.com
    name = 'unearthed'
    long_name = 'Unearthed Comics'
    url = 'http://unearthedcomics.com'
    _categories = ('UNEARTHED', )
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        short_url = soup.find('link', rel='shortlink')['href']
        title_elt = soup.find('h1') or soup.find('h2')
        title = title_elt.string if title_elt else ""
        desc = soup.find('meta', property='og:description')
        date_str = soup.find('time', class_='published updated hidden')['datetime']
        day = string_to_date(date_str, "%Y-%m-%d")
        post = soup.find('div', class_="entry content entry-content type-portfolio")
        imgs = post.find_all('img')
        return {
            'title': title,
            'description': desc,
            'url2': short_url,
            'img': [i['src'] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }


    # Also on http://www.gocomics.com/mister-and-me
    # Also on https://tapastic.com/series/Mister-and-Me
    name = 'mister'
    long_name = 'Mister & Me'
    url = 'http://www.mister-and-me.com'
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        author = soup.find("span", class_="post-author").find("a").string
        date_str = soup.find("span", class_="post-date").string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find("div", id="comic").find_all("img")
        assert all(i['alt'] == i['title'] for i in imgs)
        assert len(imgs) <= 1
        alt = imgs[0]['alt'] if imgs else ""
        return {
            'img': [i['src'] for i in imgs],
            'title': title,
            'alt': alt,
            'author': author,
            'day': day.day,
            'month': day.month,
            'year': day.year
        }


class LastPlaceComics(GenericNavigableComic):
    """Class to retrieve Last Place Comics."""
    name = 'lastplace'

        """Get link to next or previous comic."""
        for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
            if link['href'] != '/comic':
                return link
        return None

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('meta', attrs={'name': 'description'})["content"]
        description = soup.find('div', itemprop='articleBody').text
        author = soup.find('span', itemprop='author copyrightHolder').string
        imgs = soup.find_all('img', itemprop='image')
        assert all(i['title'] == i['alt'] for i in imgs)
        alt = imgs[0]['alt'] if imgs else ""
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
        return {
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
            'author': author,
            'title': title,
            'alt': alt,
            'description': description,
        }


class GerbilWithAJetpack(GenericNavigableComic):
    """Class to retrieve GerbilWithAJetpack comics."""
    name = 'gerbil'

    def get_first_comic_link(cls):
        """Get link to first comics."""
        return get_soup_at_url(cls.url).find('a', title="First")

    @classmethod
    def get_navi_link(cls, last_soup, next_):
        """Get link to next or previous comic."""
        return last_soup.find('a', title='Next' if next_ else 'Previous')

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h1').string
        date_str = soup.find('span', class_='date').string.strip()
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
        return {
            'title': title,
            'img': [i['src'] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }


class ChuckleADuck(GenericNavigableComic):
    """Class to retrieve Chuckle-A-Duck comics."""
    name = 'chuckleaduck'
    long_name = 'Chuckle-A-duck'
    url = 'http://chuckleaduck.com'
    get_first_comic_link = get_div_navfirst_a


class EveryDayBlues(GenericNavigableComic):
    """Class to retrieve EveryDayBlues Comics."""
    name = "blues"
    long_name = "Every Day Blues"
    url = "http://everydayblues.net"
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find("h2", class_="post-title").string
        author = soup.find("span", class_="post-author").find("a").string
        date_str = soup.find("span", class_="post-date").string
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
        imgs = soup.find("div", id="comic").find_all("img")
        assert all(i['alt'] == i['title'] == title for i in imgs)
        assert len(imgs) <= 1
        return {
            'img': [i['src'] for i in imgs],
            'title': title,
            'author': author,
            'day': day.day,
            'month': day.month,
            'year': day.year
        }


class BiterComics(GenericNavigableComic):
    """Class to retrieve Biter Comics."""
    name = "biter"
    long_name = "Biter Comics"

            'year': day.year,
            'day': day.day,
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
            'title': title,
        }


class DiscoBleach(GenericEmptyComic):  # Does not work anymore
    """Class to retrieve Disco Bleach Comics."""
    name = 'discobleach'
    long_name = 'Disco Bleach'
    url = 'http://discobleach.com'


class TubeyToons(GenericEmptyComic):  # Does not work anymore
    """Class to retrieve TubeyToons comics."""
    # Also on http://tapastic.com/series/Tubey-Toons
    # Also on http://tubeytoons.tumblr.com
    name = 'tubeytoons'
    long_name = 'Tubey Toons'
    url = 'http://tubeytoons.com'
    _categories = ('TUNEYTOONS', )


class CompletelySeriousComics(GenericNavigableComic):
    """Class to retrieve Completely Serious comics."""
    name = 'completelyserious'
    long_name = 'Completely Serious Comics'
    url = 'http://completelyseriouscomics.com'
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_a_navi_navinext

    @classmethod

        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
        imgs = soup.find('div', class_='entry-content').find_all('img')
        return {
            'day': day.day,
            'month': day.month,
            'year': day.year,
            'title': title,
            'title2': title2,
            'description': description,
            'tags': tags,
            'img': [i['src'] for i in imgs],
            'alt': ' '.join(i['alt'] for i in imgs),
        }

    @classmethod
    def get_url_from_archive_element(cls, tr):
        _, td2, td3 = tr.find_all('td')
        return td2.find('a')['href']

    @classmethod
    def get_archive_elements(cls):
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))


class HappleTea(GenericNavigableComic):
    """Class to retrieve Happle Tea Comics."""
    name = 'happletea'
    long_name = 'Happle Tea'
    url = 'http://www.happletea.com'
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        desc = soup.find('meta', property='og:description')['content']
        title = soup.find('meta', property='og:title')['content']
        imgs = soup.find('div', class_='entry-content').find_all('img')
        title2 = ' '.join(i.get('title', '') for i in imgs)
        return {
            'title': title,
            'title2': title2,
            'description': desc,
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
        }


class CommitStripFr(GenericCommitStrip):
    """Class to retrieve Commit Strips in French."""
    name = 'commit_fr'
    long_name = 'Commit Strip (Fr)'
    url = 'http://www.commitstrip.com/fr'
    _categories = ('FRANCAIS', )
    first_url = 'http://www.commitstrip.com/fr/2012/02/22/interview/'


class CommitStripEn(GenericCommitStrip):
    """Class to retrieve Commit Strips in English."""
    name = 'commit_en'
    long_name = 'Commit Strip (En)'
    url = 'http://www.commitstrip.com/en'
    first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'


    def get_next_comic(cls, last_comic):
        """Implementation of get_next_comic returning no comics."""
        cls.log("comic is considered as empty - returning no comic")
        return []


class ExtraFabulousComics(GenericNavigableComic):
    """Class to retrieve Extra Fabulous Comics."""
    name = 'efc'
    long_name = 'Extra Fabulous Comics'
    url = 'http://extrafabulouscomics.com'
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
        imgs = soup.find_all('img', src=img_src_re)
        title = soup.find('meta', property='og:title')['content']
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        return {
            'title': title,
            'img': [i['src'] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
            'prefix': title + '-'

		@@ 1781-1806 (lines=26) @@
1778		@classmethod
1779		def get_comic_info(cls, soup, link):
1780		"""Get information about a particular comics."""
1781		title = soup.find('meta', property='og:title')['content']
1782		author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1783		date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1784		date_str = date_str[:10]
1785		day = string_to_date(date_str, "%Y-%m-%d")
1786		imgs = soup.find_all('meta', property='og:image')
1787		skip_imgs = {
1788		'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1789		'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1790		}
1791		return {
1792		'title': title,
1793		'author': author,
1794		'day': day.day,
1795		'month': day.month,
1796		'year': day.year,
1797		'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1798		}
1799
1800
1801		class SafelyEndangered(GenericNavigableComic):
1802		"""Class to retrieve Safely Endangered comics."""
1803		# Also on http://tumblr.safelyendangered.com
1804		name = 'endangered'
1805		long_name = 'Safely Endangered'
1806		url = 'http://www.safelyendangered.com'
1807		get_navi_link = get_link_rel_next
1808		get_first_comic_link = simulate_first_link
1809		first_url = 'http://www.safelyendangered.com/comic/ignored/'
		@@ 1810-1836 (lines=27) @@
1807		get_navi_link = get_link_rel_next
1808		get_first_comic_link = simulate_first_link
1809		first_url = 'http://www.safelyendangered.com/comic/ignored/'
1810
1811		@classmethod
1812		def get_comic_info(cls, soup, link):
1813		"""Get information about a particular comics."""
1814		title = soup.find('h2', class_='post-title').string
1815		date_str = soup.find('span', class_='post-date').string
1816		day = string_to_date(date_str, '%B %d, %Y')
1817		imgs = soup.find('div', id='comic').find_all('img')
1818		alt = imgs[0]['alt']
1819		assert all(i['alt'] == i['title'] for i in imgs)
1820		return {
1821		'day': day.day,
1822		'month': day.month,
1823		'year': day.year,
1824		'img': [i['src'] for i in imgs],
1825		'title': title,
1826		'alt': alt,
1827		}
1828
1829
1830		class PicturesInBoxes(GenericNavigableComic):
1831		"""Class to retrieve Pictures In Boxes comics."""
1832		# Also on http://picturesinboxescomic.tumblr.com
1833		name = 'picturesinboxes'
1834		long_name = 'Pictures in Boxes'
1835		url = 'http://www.picturesinboxes.com'
1836		get_navi_link = get_a_navi_navinext
1837		get_first_comic_link = simulate_first_link
1838		first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'
1839
		@@ 2518-2545 (lines=28) @@
2515		class LastPlaceComics(GenericNavigableComic):
2516		"""Class to retrieve Last Place Comics."""
2517		name = 'lastplace'
2518		long_name = 'Last Place Comics'
2519		url = "http://lastplacecomics.com"
2520		get_first_comic_link = get_a_comicnavbase_comicnavfirst
2521		get_navi_link = get_link_rel_next
2522
2523		@classmethod
2524		def get_comic_info(cls, soup, link):
2525		"""Get information about a particular comics."""
2526		title = soup.find('h2', class_='post-title').string
2527		author = soup.find("span", class_="post-author").find("a").string
2528		date_str = soup.find("span", class_="post-date").string
2529		day = string_to_date(date_str, "%B %d, %Y")
2530		imgs = soup.find("div", id="comic").find_all("img")
2531		assert all(i['alt'] == i['title'] for i in imgs)
2532		assert len(imgs) <= 1
2533		alt = imgs[0]['alt'] if imgs else ""
2534		return {
2535		'img': [i['src'] for i in imgs],
2536		'title': title,
2537		'alt': alt,
2538		'author': author,
2539		'day': day.day,
2540		'month': day.month,
2541		'year': day.year
2542		}
2543
2544
2545		class TalesOfAbsurdity(GenericNavigableComic):
2546		"""Class to retrieve Tales Of Absurdity comics."""
2547		# Also on http://tapastic.com/series/Tales-Of-Absurdity
2548		# Also on http://talesofabsurdity.tumblr.com
		@@ 2738-2764 (lines=27) @@
2735		class UnearthedComics(GenericNavigableComic):
2736		"""Class to retrieve Unearthed comics."""
2737		# Also on http://tapastic.com/series/UnearthedComics
2738		# Also on http://unearthedcomics.tumblr.com
2739		name = 'unearthed'
2740		long_name = 'Unearthed Comics'
2741		url = 'http://unearthedcomics.com'
2742		_categories = ('UNEARTHED', )
2743		get_navi_link = get_link_rel_next
2744		get_first_comic_link = simulate_first_link
2745		first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'
2746
2747		@classmethod
2748		def get_comic_info(cls, soup, link):
2749		"""Get information about a particular comics."""
2750		short_url = soup.find('link', rel='shortlink')['href']
2751		title_elt = soup.find('h1') or soup.find('h2')
2752		title = title_elt.string if title_elt else ""
2753		desc = soup.find('meta', property='og:description')
2754		date_str = soup.find('time', class_='published updated hidden')['datetime']
2755		day = string_to_date(date_str, "%Y-%m-%d")
2756		post = soup.find('div', class_="entry content entry-content type-portfolio")
2757		imgs = post.find_all('img')
2758		return {
2759		'title': title,
2760		'description': desc,
2761		'url2': short_url,
2762		'img': [i['src'] for i in imgs],
2763		'month': day.month,
2764		'year': day.year,
2765		'day': day.day,
2766		}
2767
		@@ 2488-2514 (lines=27) @@
2485		# Also on http://www.gocomics.com/mister-and-me
2486		# Also on https://tapastic.com/series/Mister-and-Me
2487		name = 'mister'
2488		long_name = 'Mister & Me'
2489		url = 'http://www.mister-and-me.com'
2490		get_first_comic_link = get_a_comicnavbase_comicnavfirst
2491		get_navi_link = get_link_rel_next
2492
2493		@classmethod
2494		def get_comic_info(cls, soup, link):
2495		"""Get information about a particular comics."""
2496		title = soup.find('h2', class_='post-title').string
2497		author = soup.find("span", class_="post-author").find("a").string
2498		date_str = soup.find("span", class_="post-date").string
2499		day = string_to_date(date_str, "%B %d, %Y")
2500		imgs = soup.find("div", id="comic").find_all("img")
2501		assert all(i['alt'] == i['title'] for i in imgs)
2502		assert len(imgs) <= 1
2503		alt = imgs[0]['alt'] if imgs else ""
2504		return {
2505		'img': [i['src'] for i in imgs],
2506		'title': title,
2507		'alt': alt,
2508		'author': author,
2509		'day': day.day,
2510		'month': day.month,
2511		'year': day.year
2512		}
2513
2514
2515		class LastPlaceComics(GenericNavigableComic):
2516		"""Class to retrieve Last Place Comics."""
2517		name = 'lastplace'
		@@ 2321-2346 (lines=26) @@
2318		"""Get link to next or previous comic."""
2319		for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
2320		if link['href'] != '/comic':
2321		return link
2322		return None
2323
2324		@classmethod
2325		def get_comic_info(cls, soup, link):
2326		"""Get information about a particular comics."""
2327		title = soup.find('meta', attrs={'name': 'description'})["content"]
2328		description = soup.find('div', itemprop='articleBody').text
2329		author = soup.find('span', itemprop='author copyrightHolder').string
2330		imgs = soup.find_all('img', itemprop='image')
2331		assert all(i['title'] == i['alt'] for i in imgs)
2332		alt = imgs[0]['alt'] if imgs else ""
2333		date_str = soup.find('time', itemprop='datePublished')["datetime"]
2334		day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2335		return {
2336		'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2337		'month': day.month,
2338		'year': day.year,
2339		'day': day.day,
2340		'author': author,
2341		'title': title,
2342		'alt': alt,
2343		'description': description,
2344		}
2345
2346
2347		class GerbilWithAJetpack(GenericNavigableComic):
2348		"""Class to retrieve GerbilWithAJetpack comics."""
2349		name = 'gerbil'
		@@ 2019-2043 (lines=25) @@
2016		def get_first_comic_link(cls):
2017		"""Get link to first comics."""
2018		return get_soup_at_url(cls.url).find('a', title="First")
2019
2020		@classmethod
2021		def get_navi_link(cls, last_soup, next_):
2022		"""Get link to next or previous comic."""
2023		return last_soup.find('a', title='Next' if next_ else 'Previous')
2024
2025		@classmethod
2026		def get_comic_info(cls, soup, link):
2027		"""Get information about a particular comics."""
2028		title = soup.find('h1').string
2029		date_str = soup.find('span', class_='date').string.strip()
2030		day = string_to_date(date_str, "%B %d, %Y")
2031		imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
2032		return {
2033		'title': title,
2034		'img': [i['src'] for i in imgs],
2035		'month': day.month,
2036		'year': day.year,
2037		'day': day.day,
2038		}
2039
2040
2041		class ChuckleADuck(GenericNavigableComic):
2042		"""Class to retrieve Chuckle-A-Duck comics."""
2043		name = 'chuckleaduck'
2044		long_name = 'Chuckle-A-duck'
2045		url = 'http://chuckleaduck.com'
2046		get_first_comic_link = get_div_navfirst_a
		@@ 2378-2404 (lines=27) @@
2375
2376		class EveryDayBlues(GenericNavigableComic):
2377		"""Class to retrieve EveryDayBlues Comics."""
2378		name = "blues"
2379		long_name = "Every Day Blues"
2380		url = "http://everydayblues.net"
2381		get_first_comic_link = get_a_navi_navifirst
2382		get_navi_link = get_link_rel_next
2383
2384		@classmethod
2385		def get_comic_info(cls, soup, link):
2386		"""Get information about a particular comics."""
2387		title = soup.find("h2", class_="post-title").string
2388		author = soup.find("span", class_="post-author").find("a").string
2389		date_str = soup.find("span", class_="post-date").string
2390		day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2391		imgs = soup.find("div", id="comic").find_all("img")
2392		assert all(i['alt'] == i['title'] == title for i in imgs)
2393		assert len(imgs) <= 1
2394		return {
2395		'img': [i['src'] for i in imgs],
2396		'title': title,
2397		'author': author,
2398		'day': day.day,
2399		'month': day.month,
2400		'year': day.year
2401		}
2402
2403
2404		class BiterComics(GenericNavigableComic):
2405		"""Class to retrieve Biter Comics."""
2406		name = "biter"
2407		long_name = "Biter Comics"
		@@ 1932-1958 (lines=27) @@
1929		'year': day.year,
1930		'day': day.day,
1931		'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1932		'title': title,
1933		}
1934
1935
1936		class DiscoBleach(GenericEmptyComic): # Does not work anymore
1937		"""Class to retrieve Disco Bleach Comics."""
1938		name = 'discobleach'
1939		long_name = 'Disco Bleach'
1940		url = 'http://discobleach.com'
1941
1942
1943		class TubeyToons(GenericEmptyComic): # Does not work anymore
1944		"""Class to retrieve TubeyToons comics."""
1945		# Also on http://tapastic.com/series/Tubey-Toons
1946		# Also on http://tubeytoons.tumblr.com
1947		name = 'tubeytoons'
1948		long_name = 'Tubey Toons'
1949		url = 'http://tubeytoons.com'
1950		_categories = ('TUNEYTOONS', )
1951
1952
1953		class CompletelySeriousComics(GenericNavigableComic):
1954		"""Class to retrieve Completely Serious comics."""
1955		name = 'completelyserious'
1956		long_name = 'Completely Serious Comics'
1957		url = 'http://completelyseriouscomics.com'
1958		get_first_comic_link = get_a_navi_navifirst
1959		get_navi_link = get_a_navi_navinext
1960
1961		@classmethod
		@@ 2119-2144 (lines=26) @@
2116		tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2117		imgs = soup.find('div', class_='entry-content').find_all('img')
2118		return {
2119		'day': day.day,
2120		'month': day.month,
2121		'year': day.year,
2122		'title': title,
2123		'title2': title2,
2124		'description': description,
2125		'tags': tags,
2126		'img': [i['src'] for i in imgs],
2127		'alt': ' '.join(i['alt'] for i in imgs),
2128		}
2129
2130		@classmethod
2131		def get_url_from_archive_element(cls, tr):
2132		_, td2, td3 = tr.find_all('td')
2133		return td2.find('a')['href']
2134
2135		@classmethod
2136		def get_archive_elements(cls):
2137		archive_url = urljoin_wrapper(cls.url, 'archive-2')
2138		return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2139
2140
2141		class HappleTea(GenericNavigableComic):
2142		"""Class to retrieve Happle Tea Comics."""
2143		name = 'happletea'
2144		long_name = 'Happle Tea'
2145		url = 'http://www.happletea.com'
2146		get_first_comic_link = get_a_navi_navifirst
2147		get_navi_link = get_link_rel_next
		@@ 2659-2683 (lines=25) @@
2656		@classmethod
2657		def get_comic_info(cls, soup, link):
2658		"""Get information about a particular comics."""
2659		desc = soup.find('meta', property='og:description')['content']
2660		title = soup.find('meta', property='og:title')['content']
2661		imgs = soup.find('div', class_='entry-content').find_all('img')
2662		title2 = ' '.join(i.get('title', '') for i in imgs)
2663		return {
2664		'title': title,
2665		'title2': title2,
2666		'description': desc,
2667		'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2668		}
2669
2670
2671		class CommitStripFr(GenericCommitStrip):
2672		"""Class to retrieve Commit Strips in French."""
2673		name = 'commit_fr'
2674		long_name = 'Commit Strip (Fr)'
2675		url = 'http://www.commitstrip.com/fr'
2676		_categories = ('FRANCAIS', )
2677		first_url = 'http://www.commitstrip.com/fr/2012/02/22/interview/'
2678
2679
2680		class CommitStripEn(GenericCommitStrip):
2681		"""Class to retrieve Commit Strips in English."""
2682		name = 'commit_en'
2683		long_name = 'Commit Strip (En)'
2684		url = 'http://www.commitstrip.com/en'
2685		first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'
2686
		@@ 338-360 (lines=23) @@
335		def get_next_comic(cls, last_comic):
336		"""Implementation of get_next_comic returning no comics."""
337		cls.log("comic is considered as empty - returning no comic")
338		return []
339
340
341		class ExtraFabulousComics(GenericNavigableComic):
342		"""Class to retrieve Extra Fabulous Comics."""
343		name = 'efc'
344		long_name = 'Extra Fabulous Comics'
345		url = 'http://extrafabulouscomics.com'
346		get_first_comic_link = get_a_navi_navifirst
347		get_navi_link = get_link_rel_next
348
349		@classmethod
350		def get_comic_info(cls, soup, link):
351		"""Get information about a particular comics."""
352		img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
353		imgs = soup.find_all('img', src=img_src_re)
354		title = soup.find('meta', property='og:title')['content']
355		date_str = soup.find('meta', property='article:published_time')['content'][:10]
356		day = string_to_date(date_str, "%Y-%m-%d")
357		return {
358		'title': title,
359		'img': [i['src'] for i in imgs],
360		'month': day.month,
361		'year': day.year,
362		'day': day.day,
363		'prefix': title + '-'

SylvainDe / ComicBookMaker

Code Duplication Length = 23-28 lines in 12 locations

comics.py 12 locations