Code Duplication - SylvainDe/ComicBookMaker - Measure and Improve Code Quality continuously with Scrutinizer

Code Duplication Length = 24-27 lines in 13 locations

comics.py 13 locations


    name = 'unearthed'
    long_name = 'Unearthed Comics'
    url = 'http://unearthedcomics.com'
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        short_url = soup.find('link', rel='shortlink')['href']
        title_elt = soup.find('h1') or soup.find('h2')
        title = title_elt.string if title_elt else ""
        desc = soup.find('meta', property='og:description')
        date_str = soup.find('time', class_='published updated hidden')['datetime']
        day = string_to_date(date_str, "%Y-%m-%d")
        post = soup.find('div', class_="entry content entry-content type-portfolio")
        imgs = post.find_all('img')
        return {
            'title': title,
            'description': desc,
            'url2': short_url,
            'img': [i['src'] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }


class Optipess(GenericNavigableComic):
    """Class to retrieve Optipess comics."""
    name = 'optipess'
    long_name = 'Optipess'

    # Also on https://tapastic.com/series/Mister-and-Me
    name = 'mister'
    long_name = 'Mister & Me'
    url = 'http://www.mister-and-me.com'
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        author = soup.find("span", class_="post-author").find("a").string
        date_str = soup.find("span", class_="post-date").string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find("div", id="comic").find_all("img")
        assert all(i['alt'] == i['title'] for i in imgs)
        assert len(imgs) <= 1
        alt = imgs[0]['alt'] if imgs else ""
        return {
            'img': [i['src'] for i in imgs],
            'title': title,
            'alt': alt,
            'author': author,
            'day': day.day,
            'month': day.month,
            'year': day.year
        }


class LastPlaceComics(GenericNavigableComic):
    """Class to retrieve Last Place Comics."""
    name = 'lastplace'
    long_name = 'LastPlaceComics'

    # Also on http://tapastic.com/series/Tales-Of-Absurdity
    # Also on http://talesofabsurdity.tumblr.com
    name = 'absurdity'
    long_name = 'Tales of Absurdity'
    url = 'http://talesofabsurdity.com'
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_a_navi_comicnavnext_navinext

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        author = soup.find("span", class_="post-author").find("a").string
        date_str = soup.find("span", class_="post-date").string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find("div", id="comic").find_all("img")
        assert all(i['alt'] == i['title'] for i in imgs)
        alt = imgs[0]['alt'] if imgs else ""
        return {
            'img': [i['src'] for i in imgs],
            'title': title,
            'alt': alt,
            'author': author,
            'day': day.day,
            'month': day.month,
            'year': day.year
        }


class EndlessOrigami(GenericNavigableComic):
    """Class to retrieve Endless Origami Comics."""
    name = "origami"

        """Get link to next or previous comic."""
        for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
            if link['href'] != '/comic':
                return link
        return None

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('meta', attrs={'name': 'description'})["content"]
        description = soup.find('div', itemprop='articleBody').text
        author = soup.find('span', itemprop='author copyrightHolder').string
        imgs = soup.find_all('img', itemprop='image')
        assert all(i['title'] == i['alt'] for i in imgs)
        alt = imgs[0]['alt'] if imgs else ""
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
        return {
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
            'author': author,
            'title': title,
            'alt': alt,
            'description': description,
        }


class GerbilWithAJetpack(GenericNavigableComic):
    """Class to retrieve GerbilWithAJetpack comics."""
    name = 'gerbil'

    first_url = 'http://respawncomic.com/comic/c0001/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('meta', property='og:title')['content']
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
        date_str = date_str[:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        imgs = soup.find_all('meta', property='og:image')
        skip_imgs = {
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
        }
        return {
            'title': title,
            'author': author,
            'day': day.day,
            'month': day.month,
            'year': day.year,
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
        }


class SafelyEndangered(GenericNavigableComic):
    """Class to retrieve Safely Endangered comics."""
    # Also on http://tumblr.safelyendangered.com
    name = 'endangered'
    long_name = 'Safely Endangered'
    url = 'http://www.safelyendangered.com'
    get_navi_link = get_link_rel_next


    @classmethod
    def get_first_comic_link(cls):
        """Get link to first comics."""
        return get_soup_at_url(cls.url).find('a', title="First")

    @classmethod
    def get_navi_link(cls, last_soup, next_):
        """Get link to next or previous comic."""
        return last_soup.find('a', title='Next' if next_ else 'Previous')

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h1').string
        date_str = soup.find('span', class_='date').string.strip()
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
        return {
            'title': title,
            'img': [i['src'] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }


class ChuckleADuck(GenericNavigableComic):
    """Class to retrieve Chuckle-A-Duck comics."""
    name = 'chuckleaduck'
    long_name = 'Chuckle-A-duck'

        assert all(i['title'] == i['alt'] == title for i in imgs)
        return {
            'month': day.month,
            'year': day.year,
            'day': day.day,
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
            'title': title,
        }


class DiscoBleach(GenericEmptyComic):  # Does not work anymore
    """Class to retrieve Disco Bleach Comics."""
    name = 'discobleach'
    long_name = 'Disco Bleach'
    url = 'http://discobleach.com'


class TubeyToons(GenericEmptyComic):  # Does not work anymore
    """Class to retrieve TubeyToons comics."""
    # Also on http://tapastic.com/series/Tubey-Toons
    # Also on http://tubeytoons.tumblr.com
    name = 'tubeytoons'
    long_name = 'Tubey Toons'
    url = 'http://tubeytoons.com'


class CompletelySeriousComics(GenericNavigableComic):
    """Class to retrieve Completely Serious comics."""
    name = 'completelyserious'
    long_name = 'Completely Serious Comics'
    url = 'http://completelyseriouscomics.com'
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_a_navi_navinext

    long_name = 'Safely Endangered'
    url = 'http://www.safelyendangered.com'
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'http://www.safelyendangered.com/comic/ignored/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        date_str = soup.find('span', class_='post-date').string
        day = string_to_date(date_str, '%B %d, %Y')
        imgs = soup.find('div', id='comic').find_all('img')
        alt = imgs[0]['alt']
        assert all(i['alt'] == i['title'] for i in imgs)
        return {
            'day': day.day,
            'month': day.month,
            'year': day.year,
            'img': [i['src'] for i in imgs],
            'title': title,
            'alt': alt,
        }


class PicturesInBoxes(GenericNavigableComic):
    """Class to retrieve Pictures In Boxes comics."""
    # Also on http://picturesinboxescomic.tumblr.com
    name = 'picturesinboxes'
    long_name = 'Pictures in Boxes'
    url = 'http://www.picturesinboxes.com'
    get_navi_link = get_a_navi_navinext
    get_first_comic_link = simulate_first_link



class JustSayEh(GenericNavigableComic):
    """Class to retrieve Just Say Eh comics."""
    # Also on http//tapastic.com/series/Just-Say-Eh
    name = 'justsayeh'
    long_name = 'Just Say Eh'
    url = 'http://www.justsayeh.com'
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_a_navi_comicnavnext_navinext

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        imgs = soup.find("div", id="comic").find_all("img")
        assert all(i['alt'] == i['title'] for i in imgs)
        alt = imgs[0]['alt']
        return {
            'img': [i['src'] for i in imgs],
            'title': title,
            'alt': alt,
        }


class MouseBearComedy(GenericNavigableComic):
    """Class to retrieve Mouse Bear Comedy comics."""
    # Also on http://mousebearcomedy.tumblr.com
    name = 'mousebear'
    long_name = 'Mouse Bear Comedy'
    url = 'http://www.mousebearcomedy.com'



class BouletCorp(GenericBouletCorp):
    """Class to retrieve BouletCorp comics."""
    name = 'boulet'
    long_name = 'Boulet Corp'
    url = 'http://www.bouletcorp.com'


class BouletCorpEn(GenericBouletCorp):
    """Class to retrieve EnglishBouletCorp comics."""
    name = 'boulet_en'
    long_name = 'Boulet Corp English'
    url = 'http://english.bouletcorp.com'


class AmazingSuperPowers(GenericNavigableComic):
    """Class to retrieve Amazing Super Powers comics."""
    name = 'asp'
    long_name = 'Amazing Super Powers'
    url = 'http://www.amazingsuperpowers.com'
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_a_navi_navinext

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        author = soup.find("span", class_="post-author").find("a").string
        date_str = soup.find('span', class_='post-date').string
        day = string_to_date(date_str, "%B %d, %Y")


class EveryDayBlues(GenericNavigableComic):
    """Class to retrieve EveryDayBlues Comics."""
    name = "blues"
    long_name = "Every Day Blues"
    url = "http://everydayblues.net"
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find("h2", class_="post-title").string
        author = soup.find("span", class_="post-author").find("a").string
        date_str = soup.find("span", class_="post-date").string
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
        imgs = soup.find("div", id="comic").find_all("img")
        assert all(i['alt'] == i['title'] == title for i in imgs)
        assert len(imgs) <= 1
        return {
            'img': [i['src'] for i in imgs],
            'title': title,
            'author': author,
            'day': day.day,
            'month': day.month,
            'year': day.year
        }


class BiterComics(GenericNavigableComic):
    """Class to retrieve Biter Comics."""
    name = "biter"
    long_name = "Biter Comics"

        title2 = soup.find('meta', property='og:title')['content']
        desc = soup.find('meta', property='og:description')
        description = desc['content'] if desc else ''
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
        imgs = soup.find('div', class_='entry-content').find_all('img')
        return {
            'day': day.day,
            'month': day.month,
            'year': day.year,
            'title': title,
            'title2': title2,
            'description': description,
            'tags': tags,
            'img': [i['src'] for i in imgs],
            'alt': ' '.join(i['alt'] for i in imgs),
        }

    @classmethod
    def get_url_from_archive_element(cls, tr):
        _, td2, td3 = tr.find_all('td')
        return td2.find('a')['href']

    @classmethod
    def get_archive_elements(cls):
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))


class HappleTea(GenericNavigableComic):
    """Class to retrieve Happle Tea Comics."""
    name = 'happletea'
    long_name = 'Happle Tea'

class GerbilWithAJetpack(GenericNavigableComic):
    """Class to retrieve GerbilWithAJetpack comics."""
    name = 'gerbil'
    long_name = 'Gerbil With A Jetpack'
    url = 'http://gerbilwithajetpack.com'
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_a_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        author = soup.find("span", class_="post-author").find("a").string
        date_str = soup.find("span", class_="post-date").string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find("div", id="comic").find_all("img")
        alt = imgs[0]['alt']
        assert all(i['alt'] == i['title'] == alt for i in imgs)
        return {
            'img': [i['src'] for i in imgs],
            'title': title,
            'alt': alt,
            'author': author,
            'day': day.day,
            'month': day.month,
            'year': day.year
        }


class EveryDayBlues(GenericNavigableComic):
    """Class to retrieve EveryDayBlues Comics."""

		@@ 2734-2760 (lines=27) @@
2731		name = 'unearthed'
2732		long_name = 'Unearthed Comics'
2733		url = 'http://unearthedcomics.com'
2734		get_navi_link = get_link_rel_next
2735		get_first_comic_link = simulate_first_link
2736		first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'
2737
2738		@classmethod
2739		def get_comic_info(cls, soup, link):
2740		"""Get information about a particular comics."""
2741		short_url = soup.find('link', rel='shortlink')['href']
2742		title_elt = soup.find('h1') or soup.find('h2')
2743		title = title_elt.string if title_elt else ""
2744		desc = soup.find('meta', property='og:description')
2745		date_str = soup.find('time', class_='published updated hidden')['datetime']
2746		day = string_to_date(date_str, "%Y-%m-%d")
2747		post = soup.find('div', class_="entry content entry-content type-portfolio")
2748		imgs = post.find_all('img')
2749		return {
2750		'title': title,
2751		'description': desc,
2752		'url2': short_url,
2753		'img': [i['src'] for i in imgs],
2754		'month': day.month,
2755		'year': day.year,
2756		'day': day.day,
2757		}
2758
2759
2760		class Optipess(GenericNavigableComic):
2761		"""Class to retrieve Optipess comics."""
2762		name = 'optipess'
2763		long_name = 'Optipess'
		@@ 2484-2510 (lines=27) @@
2481		# Also on https://tapastic.com/series/Mister-and-Me
2482		name = 'mister'
2483		long_name = 'Mister & Me'
2484		url = 'http://www.mister-and-me.com'
2485		get_first_comic_link = get_a_comicnavbase_comicnavfirst
2486		get_navi_link = get_link_rel_next
2487
2488		@classmethod
2489		def get_comic_info(cls, soup, link):
2490		"""Get information about a particular comics."""
2491		title = soup.find('h2', class_='post-title').string
2492		author = soup.find("span", class_="post-author").find("a").string
2493		date_str = soup.find("span", class_="post-date").string
2494		day = string_to_date(date_str, "%B %d, %Y")
2495		imgs = soup.find("div", id="comic").find_all("img")
2496		assert all(i['alt'] == i['title'] for i in imgs)
2497		assert len(imgs) <= 1
2498		alt = imgs[0]['alt'] if imgs else ""
2499		return {
2500		'img': [i['src'] for i in imgs],
2501		'title': title,
2502		'alt': alt,
2503		'author': author,
2504		'day': day.day,
2505		'month': day.month,
2506		'year': day.year
2507		}
2508
2509
2510		class LastPlaceComics(GenericNavigableComic):
2511		"""Class to retrieve Last Place Comics."""
2512		name = 'lastplace'
2513		long_name = 'LastPlaceComics'
		@@ 2545-2570 (lines=26) @@
2542		# Also on http://tapastic.com/series/Tales-Of-Absurdity
2543		# Also on http://talesofabsurdity.tumblr.com
2544		name = 'absurdity'
2545		long_name = 'Tales of Absurdity'
2546		url = 'http://talesofabsurdity.com'
2547		get_first_comic_link = get_a_navi_navifirst
2548		get_navi_link = get_a_navi_comicnavnext_navinext
2549
2550		@classmethod
2551		def get_comic_info(cls, soup, link):
2552		"""Get information about a particular comics."""
2553		title = soup.find('h2', class_='post-title').string
2554		author = soup.find("span", class_="post-author").find("a").string
2555		date_str = soup.find("span", class_="post-date").string
2556		day = string_to_date(date_str, "%B %d, %Y")
2557		imgs = soup.find("div", id="comic").find_all("img")
2558		assert all(i['alt'] == i['title'] for i in imgs)
2559		alt = imgs[0]['alt'] if imgs else ""
2560		return {
2561		'img': [i['src'] for i in imgs],
2562		'title': title,
2563		'alt': alt,
2564		'author': author,
2565		'day': day.day,
2566		'month': day.month,
2567		'year': day.year
2568		}
2569
2570
2571		class EndlessOrigami(GenericNavigableComic):
2572		"""Class to retrieve Endless Origami Comics."""
2573		name = "origami"
		@@ 2317-2342 (lines=26) @@
2314		"""Get link to next or previous comic."""
2315		for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
2316		if link['href'] != '/comic':
2317		return link
2318		return None
2319
2320		@classmethod
2321		def get_comic_info(cls, soup, link):
2322		"""Get information about a particular comics."""
2323		title = soup.find('meta', attrs={'name': 'description'})["content"]
2324		description = soup.find('div', itemprop='articleBody').text
2325		author = soup.find('span', itemprop='author copyrightHolder').string
2326		imgs = soup.find_all('img', itemprop='image')
2327		assert all(i['title'] == i['alt'] for i in imgs)
2328		alt = imgs[0]['alt'] if imgs else ""
2329		date_str = soup.find('time', itemprop='datePublished')["datetime"]
2330		day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2331		return {
2332		'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2333		'month': day.month,
2334		'year': day.year,
2335		'day': day.day,
2336		'author': author,
2337		'title': title,
2338		'alt': alt,
2339		'description': description,
2340		}
2341
2342
2343		class GerbilWithAJetpack(GenericNavigableComic):
2344		"""Class to retrieve GerbilWithAJetpack comics."""
2345		name = 'gerbil'
		@@ 1781-1806 (lines=26) @@
1778		first_url = 'http://respawncomic.com/comic/c0001/'
1779
1780		@classmethod
1781		def get_comic_info(cls, soup, link):
1782		"""Get information about a particular comics."""
1783		title = soup.find('meta', property='og:title')['content']
1784		author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1785		date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1786		date_str = date_str[:10]
1787		day = string_to_date(date_str, "%Y-%m-%d")
1788		imgs = soup.find_all('meta', property='og:image')
1789		skip_imgs = {
1790		'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1791		'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1792		}
1793		return {
1794		'title': title,
1795		'author': author,
1796		'day': day.day,
1797		'month': day.month,
1798		'year': day.year,
1799		'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1800		}
1801
1802
1803		class SafelyEndangered(GenericNavigableComic):
1804		"""Class to retrieve Safely Endangered comics."""
1805		# Also on http://tumblr.safelyendangered.com
1806		name = 'endangered'
1807		long_name = 'Safely Endangered'
1808		url = 'http://www.safelyendangered.com'
1809		get_navi_link = get_link_rel_next
		@@ 2017-2041 (lines=25) @@
2014
2015		@classmethod
2016		def get_first_comic_link(cls):
2017		"""Get link to first comics."""
2018		return get_soup_at_url(cls.url).find('a', title="First")
2019
2020		@classmethod
2021		def get_navi_link(cls, last_soup, next_):
2022		"""Get link to next or previous comic."""
2023		return last_soup.find('a', title='Next' if next_ else 'Previous')
2024
2025		@classmethod
2026		def get_comic_info(cls, soup, link):
2027		"""Get information about a particular comics."""
2028		title = soup.find('h1').string
2029		date_str = soup.find('span', class_='date').string.strip()
2030		day = string_to_date(date_str, "%B %d, %Y")
2031		imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
2032		return {
2033		'title': title,
2034		'img': [i['src'] for i in imgs],
2035		'month': day.month,
2036		'year': day.year,
2037		'day': day.day,
2038		}
2039
2040
2041		class ChuckleADuck(GenericNavigableComic):
2042		"""Class to retrieve Chuckle-A-Duck comics."""
2043		name = 'chuckleaduck'
2044		long_name = 'Chuckle-A-duck'
		@@ 1931-1957 (lines=27) @@
1928		assert all(i['title'] == i['alt'] == title for i in imgs)
1929		return {
1930		'month': day.month,
1931		'year': day.year,
1932		'day': day.day,
1933		'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1934		'title': title,
1935		}
1936
1937
1938		class DiscoBleach(GenericEmptyComic): # Does not work anymore
1939		"""Class to retrieve Disco Bleach Comics."""
1940		name = 'discobleach'
1941		long_name = 'Disco Bleach'
1942		url = 'http://discobleach.com'
1943
1944
1945		class TubeyToons(GenericEmptyComic): # Does not work anymore
1946		"""Class to retrieve TubeyToons comics."""
1947		# Also on http://tapastic.com/series/Tubey-Toons
1948		# Also on http://tubeytoons.tumblr.com
1949		name = 'tubeytoons'
1950		long_name = 'Tubey Toons'
1951		url = 'http://tubeytoons.com'
1952
1953
1954		class CompletelySeriousComics(GenericNavigableComic):
1955		"""Class to retrieve Completely Serious comics."""
1956		name = 'completelyserious'
1957		long_name = 'Completely Serious Comics'
1958		url = 'http://completelyseriouscomics.com'
1959		get_first_comic_link = get_a_navi_navifirst
1960		get_navi_link = get_a_navi_navinext
		@@ 1810-1836 (lines=27) @@
1807		long_name = 'Safely Endangered'
1808		url = 'http://www.safelyendangered.com'
1809		get_navi_link = get_link_rel_next
1810		get_first_comic_link = simulate_first_link
1811		first_url = 'http://www.safelyendangered.com/comic/ignored/'
1812
1813		@classmethod
1814		def get_comic_info(cls, soup, link):
1815		"""Get information about a particular comics."""
1816		title = soup.find('h2', class_='post-title').string
1817		date_str = soup.find('span', class_='post-date').string
1818		day = string_to_date(date_str, '%B %d, %Y')
1819		imgs = soup.find('div', id='comic').find_all('img')
1820		alt = imgs[0]['alt']
1821		assert all(i['alt'] == i['title'] for i in imgs)
1822		return {
1823		'day': day.day,
1824		'month': day.month,
1825		'year': day.year,
1826		'img': [i['src'] for i in imgs],
1827		'title': title,
1828		'alt': alt,
1829		}
1830
1831
1832		class PicturesInBoxes(GenericNavigableComic):
1833		"""Class to retrieve Pictures In Boxes comics."""
1834		# Also on http://picturesinboxescomic.tumblr.com
1835		name = 'picturesinboxes'
1836		long_name = 'Pictures in Boxes'
1837		url = 'http://www.picturesinboxes.com'
1838		get_navi_link = get_a_navi_navinext
1839		get_first_comic_link = simulate_first_link
		@@ 1699-1723 (lines=25) @@
1696
1697
1698		class JustSayEh(GenericNavigableComic):
1699		"""Class to retrieve Just Say Eh comics."""
1700		# Also on http//tapastic.com/series/Just-Say-Eh
1701		name = 'justsayeh'
1702		long_name = 'Just Say Eh'
1703		url = 'http://www.justsayeh.com'
1704		get_first_comic_link = get_a_navi_navifirst
1705		get_navi_link = get_a_navi_comicnavnext_navinext
1706
1707		@classmethod
1708		def get_comic_info(cls, soup, link):
1709		"""Get information about a particular comics."""
1710		title = soup.find('h2', class_='post-title').string
1711		imgs = soup.find("div", id="comic").find_all("img")
1712		assert all(i['alt'] == i['title'] for i in imgs)
1713		alt = imgs[0]['alt']
1714		return {
1715		'img': [i['src'] for i in imgs],
1716		'title': title,
1717		'alt': alt,
1718		}
1719
1720
1721		class MouseBearComedy(GenericNavigableComic):
1722		"""Class to retrieve Mouse Bear Comedy comics."""
1723		# Also on http://mousebearcomedy.tumblr.com
1724		name = 'mousebear'
1725		long_name = 'Mouse Bear Comedy'
1726		url = 'http://www.mousebearcomedy.com'
		@@ 1114-1137 (lines=24) @@
1111
1112
1113		class BouletCorp(GenericBouletCorp):
1114		"""Class to retrieve BouletCorp comics."""
1115		name = 'boulet'
1116		long_name = 'Boulet Corp'
1117		url = 'http://www.bouletcorp.com'
1118
1119
1120		class BouletCorpEn(GenericBouletCorp):
1121		"""Class to retrieve EnglishBouletCorp comics."""
1122		name = 'boulet_en'
1123		long_name = 'Boulet Corp English'
1124		url = 'http://english.bouletcorp.com'
1125
1126
1127		class AmazingSuperPowers(GenericNavigableComic):
1128		"""Class to retrieve Amazing Super Powers comics."""
1129		name = 'asp'
1130		long_name = 'Amazing Super Powers'
1131		url = 'http://www.amazingsuperpowers.com'
1132		get_first_comic_link = get_a_navi_navifirst
1133		get_navi_link = get_a_navi_navinext
1134
1135		@classmethod
1136		def get_comic_info(cls, soup, link):
1137		"""Get information about a particular comics."""
1138		author = soup.find("span", class_="post-author").find("a").string
1139		date_str = soup.find('span', class_='post-date').string
1140		day = string_to_date(date_str, "%B %d, %Y")
		@@ 2374-2400 (lines=27) @@
2371
2372		class EveryDayBlues(GenericNavigableComic):
2373		"""Class to retrieve EveryDayBlues Comics."""
2374		name = "blues"
2375		long_name = "Every Day Blues"
2376		url = "http://everydayblues.net"
2377		get_first_comic_link = get_a_navi_navifirst
2378		get_navi_link = get_link_rel_next
2379
2380		@classmethod
2381		def get_comic_info(cls, soup, link):
2382		"""Get information about a particular comics."""
2383		title = soup.find("h2", class_="post-title").string
2384		author = soup.find("span", class_="post-author").find("a").string
2385		date_str = soup.find("span", class_="post-date").string
2386		day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2387		imgs = soup.find("div", id="comic").find_all("img")
2388		assert all(i['alt'] == i['title'] == title for i in imgs)
2389		assert len(imgs) <= 1
2390		return {
2391		'img': [i['src'] for i in imgs],
2392		'title': title,
2393		'author': author,
2394		'day': day.day,
2395		'month': day.month,
2396		'year': day.year
2397		}
2398
2399
2400		class BiterComics(GenericNavigableComic):
2401		"""Class to retrieve Biter Comics."""
2402		name = "biter"
2403		long_name = "Biter Comics"
		@@ 2116-2141 (lines=26) @@
2113		title2 = soup.find('meta', property='og:title')['content']
2114		desc = soup.find('meta', property='og:description')
2115		description = desc['content'] if desc else ''
2116		tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2117		imgs = soup.find('div', class_='entry-content').find_all('img')
2118		return {
2119		'day': day.day,
2120		'month': day.month,
2121		'year': day.year,
2122		'title': title,
2123		'title2': title2,
2124		'description': description,
2125		'tags': tags,
2126		'img': [i['src'] for i in imgs],
2127		'alt': ' '.join(i['alt'] for i in imgs),
2128		}
2129
2130		@classmethod
2131		def get_url_from_archive_element(cls, tr):
2132		_, td2, td3 = tr.find_all('td')
2133		return td2.find('a')['href']
2134
2135		@classmethod
2136		def get_archive_elements(cls):
2137		archive_url = urljoin_wrapper(cls.url, 'archive-2')
2138		return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2139
2140
2141		class HappleTea(GenericNavigableComic):
2142		"""Class to retrieve Happle Tea Comics."""
2143		name = 'happletea'
2144		long_name = 'Happle Tea'
		@@ 2346-2370 (lines=25) @@
2343		class GerbilWithAJetpack(GenericNavigableComic):
2344		"""Class to retrieve GerbilWithAJetpack comics."""
2345		name = 'gerbil'
2346		long_name = 'Gerbil With A Jetpack'
2347		url = 'http://gerbilwithajetpack.com'
2348		get_first_comic_link = get_a_navi_navifirst
2349		get_navi_link = get_a_rel_next
2350
2351		@classmethod
2352		def get_comic_info(cls, soup, link):
2353		"""Get information about a particular comics."""
2354		title = soup.find('h2', class_='post-title').string
2355		author = soup.find("span", class_="post-author").find("a").string
2356		date_str = soup.find("span", class_="post-date").string
2357		day = string_to_date(date_str, "%B %d, %Y")
2358		imgs = soup.find("div", id="comic").find_all("img")
2359		alt = imgs[0]['alt']
2360		assert all(i['alt'] == i['title'] == alt for i in imgs)
2361		return {
2362		'img': [i['src'] for i in imgs],
2363		'title': title,
2364		'alt': alt,
2365		'author': author,
2366		'day': day.day,
2367		'month': day.month,
2368		'year': day.year
2369		}
2370
2371
2372		class EveryDayBlues(GenericNavigableComic):
2373		"""Class to retrieve EveryDayBlues Comics."""

SylvainDe / ComicBookMaker

Code Duplication Length = 24-27 lines in 13 locations

comics.py 13 locations