Code Duplication - SylvainDe/ComicBookMaker - Measure and Improve Code Quality continuously with Scrutinizer

Code Duplication Length = 21-27 lines in 19 locations

comics.py 19 locations


        }


class OneOneOneOneComic(GenericComicNotWorking, GenericNavigableComic):
    """Class to retrieve 1111 Comics."""
    # Also on http://comics1111.tumblr.com
    # Also on https://tapastic.com/series/1111-Comics
    name = '1111'
    long_name = '1111 Comics'
    url = 'http://www.1111comics.me'
    _categories = ('ONEONEONEONE', )
    get_first_comic_link = get_div_navfirst_a
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h1', class_='comic-title').find('a').string
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find_all('meta', property='og:image')
        return {
            'title': title,
            'month': day.month,
            'year': day.year,
            'day': day.day,
            'img': [i['content'] for i in imgs],
        }



        }


class TheGentlemanArmchair(GenericNavigableComic):
    """Class to retrieve The Gentleman Armchair comics."""
    name = 'gentlemanarmchair'
    long_name = 'The Gentleman Armchair'
    url = 'http://thegentlemansarmchair.com'
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        author = soup.find("span", class_="post-author").find("a").string
        date_str = soup.find('span', class_='post-date').string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find('div', id='comic').find_all('img')
        return {
            'img': [i['src'] for i in imgs],
            'title': title,
            'author': author,
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }



        }


class AngryAtNothing(GenericDeletedComic, GenericNavigableComic):
    """Class to retrieve Angry at Nothing comics."""
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
    # Also on http://angryatnothing.tumblr.com
    name = 'angry'
    long_name = 'Angry At Nothing'
    url = 'http://www.angryatnothing.net'
    get_first_comic_link = get_div_navfirst_a
    get_navi_link = get_a_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h1', class_='comic-title').find('a').string
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find_all('meta', property='og:image')
        return {
            'title': title,
            'month': day.month,
            'year': day.year,
            'day': day.day,
            'img': [i['content'] for i in imgs],
        }


        }


class PenelopeBagieu(GenericNavigableComic):
    """Class to retrieve comics from Penelope Bagieu's blog."""
    name = 'bagieu'
    long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
    url = 'http://www.penelope-jolicoeur.com'
    _categories = ('FRANCAIS', )
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        date_str = soup.find('h2', class_='date-header').string
        day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
        imgs = soup.find('div', class_='entry-body').find_all('img')
        title = soup.find('h3', class_='entry-header').string
        return {
            'title': title,
            'img': [i['src'] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }



        }


class PicturesInBoxes(GenericNavigableComic):
    """Class to retrieve Pictures In Boxes comics."""
    # Also on https://picturesinboxescomic.tumblr.com
    name = 'picturesinboxes'
    long_name = 'Pictures in Boxes'
    url = 'http://www.picturesinboxes.com'
    get_navi_link = get_a_navi_navinext
    get_first_comic_link = simulate_first_link
    first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        author = soup.find("span", class_="post-author").find("a").string
        date_str = soup.find('span', class_='post-date').string
        day = string_to_date(date_str, '%B %d, %Y')
        imgs = soup.find('div', class_='comicpane').find_all('img')
        assert imgs
        assert all(i['title'] == i['alt'] == title for i in imgs)
        return {
            'day': day.day,
            'month': day.month,
            'year': day.year,
            'img': [i['src'] for i in imgs],
            'title': title,
            'author': author,
        }



    first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'


class GenericBoumerie(GenericNavigableComic):
    """Generic class to retrieve Boumeries comics in different languages."""
    # Also on http://boumeries.tumblr.com
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_link_rel_next
    date_format = NotImplemented
    lang = NotImplemented

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        short_url = soup.find('link', rel='shortlink')['href']
        author = soup.find("span", class_="post-author").find("a").string
        date_str = soup.find('span', class_='post-date').string
        day = string_to_date(date_str, cls.date_format, cls.lang)
        imgs = soup.find('div', id='comic').find_all('img')
        assert all(i['alt'] == i['title'] for i in imgs)
        return {
            'short_url': short_url,
            'img': [i['src'] for i in imgs],
            'title': title,
            'author': author,
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }


        }


class TheAwkwardYeti(GenericNavigableComic):
    """Class to retrieve The Awkward Yeti comics."""
    # Also on http://www.gocomics.com/the-awkward-yeti
    # Also on http://larstheyeti.tumblr.com
    # Also on https://tapastic.com/series/TheAwkwardYeti
    name = 'yeti'
    long_name = 'The Awkward Yeti'
    url = 'http://theawkwardyeti.com'
    _categories = ('YETI', )
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        date_str = soup.find("span", class_="post-date").string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find("div", id="comic").find_all("img")
        assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
        return {
            'img': [i['src'] for i in imgs],
            'title': title,
            'day': day.day,
            'month': day.month,
            'year': day.year
        }



        }


class GerbilWithAJetpack(GenericNavigableComic):
    """Class to retrieve GerbilWithAJetpack comics."""
    name = 'gerbil'
    long_name = 'Gerbil With A Jetpack'
    url = 'http://gerbilwithajetpack.com'
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_a_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        author = soup.find("span", class_="post-author").find("a").string
        date_str = soup.find("span", class_="post-date").string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find("div", id="comic").find_all("img")
        alt = imgs[0]['alt']
        assert all(i['alt'] == i['title'] == alt for i in imgs)
        return {
            'img': [i['src'] for i in imgs],
            'title': title,
            'alt': alt,
            'author': author,
            'day': day.day,
            'month': day.month,
            'year': day.year
        }



        }


class EveryDayBlues(GenericDeletedComic, GenericNavigableComic):
    """Class to retrieve EveryDayBlues Comics."""
    name = "blues"
    long_name = "Every Day Blues"
    url = "http://everydayblues.net"
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find("h2", class_="post-title").string
        author = soup.find("span", class_="post-author").find("a").string
        date_str = soup.find("span", class_="post-date").string
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
        imgs = soup.find("div", id="comic").find_all("img")
        assert all(i['alt'] == i['title'] == title for i in imgs)
        assert len(imgs) <= 1, imgs
        return {
            'img': [i['src'] for i in imgs],
            'title': title,
            'author': author,
            'day': day.day,
            'month': day.month,
            'year': day.year
        }



        }


class MouseBearComedy(GenericComicNotWorking):  # Website has changed
    """Class to retrieve Mouse Bear Comedy comics."""
    # Also on http://mousebearcomedy.tumblr.com
    name = 'mousebear'
    long_name = 'Mouse Bear Comedy'
    url = 'http://www.mousebearcomedy.com'
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_a_navi_comicnavnext_navinext

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        author = soup.find("span", class_="post-author").find("a").string
        date_str = soup.find("span", class_="post-date").string
        day = string_to_date(date_str, '%B %d, %Y')
        imgs = soup.find("div", id="comic").find_all("img")
        assert all(i['alt'] == i['title'] == title for i in imgs)
        return {
            'day': day.day,
            'month': day.month,
            'year': day.year,
            'img': [i['src'] for i in imgs],
            'title': title,
            'author': author,
        }



    url = 'http://english.bouletcorp.com'


class AmazingSuperPowers(GenericNavigableComic):
    """Class to retrieve Amazing Super Powers comics."""
    name = 'asp'
    long_name = 'Amazing Super Powers'
    url = 'http://www.amazingsuperpowers.com'
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_a_navi_navinext

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        author = soup.find("span", class_="post-author").find("a").string
        date_str = soup.find('span', class_='post-date').string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find('div', id='comic').find_all('img')
        title = ' '.join(i['title'] for i in imgs)
        assert all(i['alt'] == i['title'] for i in imgs)
        return {
            'title': title,
            'author': author,
            'img': [img['src'] for img in imgs],
            'day': day.day,
            'month': day.month,
            'year': day.year
        }



        }


class BiterComics(GenericNavigableComic):
    """Class to retrieve Biter Comics."""
    name = "biter"
    long_name = "Biter Comics"
    url = "http://www.bitercomics.com"
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find("h1", class_="entry-title").string
        author = soup.find("span", class_="author vcard").find("a").string
        date_str = soup.find("span", class_="entry-date").string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find("div", id="comic").find_all("img")
        assert all(i['alt'] == i['title'] for i in imgs)
        assert len(imgs) == 1, imgs
        alt = imgs[0]['alt']
        return {
            'img': [i['src'] for i in imgs],
            'title': title,
            'alt': alt,
            'author': author,
            'day': day.day,
            'month': day.month,
            'year': day.year
        }



        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))


class HappleTea(GenericNavigableComic):
    """Class to retrieve Happle Tea Comics."""
    name = 'happletea'
    long_name = 'Happle Tea'
    url = 'http://www.happletea.com'
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        imgs = soup.find('div', id='comic').find_all('img')
        post = soup.find('div', class_='post-content')
        title = post.find('h2', class_='post-title').string
        author = post.find('a', rel='author').string
        date_str = post.find('span', class_='post-date').string
        day = string_to_date(date_str, "%B %d, %Y")
        assert all(i['alt'] == i['title'] for i in imgs)
        return {
            'title': title,
            'img': [i['src'] for i in imgs],
            'alt': ''.join(i['alt'] for i in imgs),
            'month': day.month,
            'year': day.year,
            'day': day.day,
            'author': author,
        }



        }


class LonnieMillsap(GenericNavigableComic):
    """Class to retrieve Lonnie Millsap's comics."""
    name = 'millsap'
    long_name = 'Lonnie Millsap'
    url = 'http://www.lonniemillsap.com'
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'http://www.lonniemillsap.com/?p=42'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        post = soup.find('div', class_='post-content')
        author = post.find("span", class_="post-author").find("a").string
        date_str = post.find("span", class_="post-date").string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = post.find("div", class_="entry").find_all("img")
        return {
            'title': title,
            'author': author,
            'img': [i['src'] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }



        }


class Ubertool(GenericNavigableComic):
    """Class to retrieve Ubertool comics."""
    # Also on https://ubertool.tumblr.com
    # Also on https://tapastic.com/series/ubertool
    name = 'ubertool'
    long_name = 'Ubertool'
    url = 'http://ubertoolcomic.com'
    _categories = ('UBERTOOL', )
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
    get_navi_link = get_a_comicnavbase_comicnavnext

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        date_str = soup.find('span', class_='post-date').string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find('div', id='comic').find_all('img')
        return {
            'img': [i['src'] for i in imgs],
            'title': title,
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }



        }


class Octopuns(GenericBlogspotComic):
    """Class to retrieve Octopuns comics."""
    # Also on http://octopuns.tumblr.com
    name = 'octopuns'
    long_name = 'Octopuns'
    url = 'http://www.octopuns.net'  # or http://octopuns.blogspot.fr/
    first_url = 'http://octopuns.blogspot.com/2010/12/17122010-always-read-label.html'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h3', class_='post-title entry-title').string
        date_str = soup.find('h2', class_='date-header').string
        day = string_to_date(date_str, "%A, %B %d, %Y")
        imgs = soup.find_all('link', rel='image_src')
        return {
            'img': [i['href'] for i in imgs],
            'title': title,
            'day': day.day,
            'month': day.month,
            'year': day.year,
        }



        }


class ManVersusManatee(GenericNavigableComic):
    """Class to retrieve Man Versus Manatee comics."""
    url = 'http://manvsmanatee.com'
    name = 'manvsmanatee'
    long_name = 'Man Versus Manatee'
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
    get_navi_link = get_a_comicnavbase_comicnavnext

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        imgs = soup.find('div', id='comic').find_all('img')
        date_str = soup.find('span', class_='post-date').string
        day = string_to_date(date_str, "%B %d, %Y")
        return {
            'img': [i['src'] for i in imgs],
            'title': title,
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }



        }


class PlanC(GenericNavigableComic):
    """Class to retrieve Plan C comics."""
    name = 'planc'
    long_name = 'Plan C'
    url = 'http://www.plancomic.com'
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_a_navi_comicnavnext_navinext

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        date_str = soup.find("span", class_="post-date").string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find('div', id='comic').find_all('img')
        return {
            'title': title,
            'img': [i['src'] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }



        }


class WarehouseComic(GenericNavigableComic):
    """Class to retrieve Warehouse Comic comics."""
    name = 'warehouse'
    long_name = 'Warehouse Comic'
    url = 'http://warehousecomic.com'
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        date_str = soup.find('span', class_='post-date').string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find('div', id='comic').find_all('img')
        return {
            'img': [i['src'] for i in imgs],
            'title': title,
            'day': day.day,
            'month': day.month,
            'year': day.year,
        }



		@@ 720-743 (lines=24) @@
717		}
718
719
720		class OneOneOneOneComic(GenericComicNotWorking, GenericNavigableComic):
721		"""Class to retrieve 1111 Comics."""
722		# Also on http://comics1111.tumblr.com
723		# Also on https://tapastic.com/series/1111-Comics
724		name = '1111'
725		long_name = '1111 Comics'
726		url = 'http://www.1111comics.me'
727		_categories = ('ONEONEONEONE', )
728		get_first_comic_link = get_div_navfirst_a
729		get_navi_link = get_link_rel_next
730
731		@classmethod
732		def get_comic_info(cls, soup, link):
733		"""Get information about a particular comics."""
734		title = soup.find('h1', class_='comic-title').find('a').string
735		date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
736		day = string_to_date(date_str, "%B %d, %Y")
737		imgs = soup.find_all('meta', property='og:image')
738		return {
739		'title': title,
740		'month': day.month,
741		'year': day.year,
742		'day': day.day,
743		'img': [i['content'] for i in imgs],
744		}
745
746
		@@ 948-970 (lines=23) @@
945		}
946
947
948		class TheGentlemanArmchair(GenericNavigableComic):
949		"""Class to retrieve The Gentleman Armchair comics."""
950		name = 'gentlemanarmchair'
951		long_name = 'The Gentleman Armchair'
952		url = 'http://thegentlemansarmchair.com'
953		get_first_comic_link = get_a_navi_navifirst
954		get_navi_link = get_link_rel_next
955
956		@classmethod
957		def get_comic_info(cls, soup, link):
958		"""Get information about a particular comics."""
959		title = soup.find('h2', class_='post-title').string
960		author = soup.find("span", class_="post-author").find("a").string
961		date_str = soup.find('span', class_='post-date').string
962		day = string_to_date(date_str, "%B %d, %Y")
963		imgs = soup.find('div', id='comic').find_all('img')
964		return {
965		'img': [i['src'] for i in imgs],
966		'title': title,
967		'author': author,
968		'month': day.month,
969		'year': day.year,
970		'day': day.day,
971		}
972
973
		@@ 747-768 (lines=22) @@
744		}
745
746
747		class AngryAtNothing(GenericDeletedComic, GenericNavigableComic):
748		"""Class to retrieve Angry at Nothing comics."""
749		# Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
750		# Also on http://angryatnothing.tumblr.com
751		name = 'angry'
752		long_name = 'Angry At Nothing'
753		url = 'http://www.angryatnothing.net'
754		get_first_comic_link = get_div_navfirst_a
755		get_navi_link = get_a_rel_next
756
757		@classmethod
758		def get_comic_info(cls, soup, link):
759		"""Get information about a particular comics."""
760		title = soup.find('h1', class_='comic-title').find('a').string
761		date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
762		day = string_to_date(date_str, "%B %d, %Y")
763		imgs = soup.find_all('meta', property='og:image')
764		return {
765		'title': title,
766		'month': day.month,
767		'year': day.year,
768		'day': day.day,
769		'img': [i['content'] for i in imgs],
770		}
771
		@@ 694-716 (lines=23) @@
691		}
692
693
694		class PenelopeBagieu(GenericNavigableComic):
695		"""Class to retrieve comics from Penelope Bagieu's blog."""
696		name = 'bagieu'
697		long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
698		url = 'http://www.penelope-jolicoeur.com'
699		_categories = ('FRANCAIS', )
700		get_navi_link = get_link_rel_next
701		get_first_comic_link = simulate_first_link
702		first_url = 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'
703
704		@classmethod
705		def get_comic_info(cls, soup, link):
706		"""Get information about a particular comics."""
707		date_str = soup.find('h2', class_='date-header').string
708		day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
709		imgs = soup.find('div', class_='entry-body').find_all('img')
710		title = soup.find('h3', class_='entry-header').string
711		return {
712		'title': title,
713		'img': [i['src'] for i in imgs],
714		'month': day.month,
715		'year': day.year,
716		'day': day.day,
717		}
718
719
		@@ 1908-1934 (lines=27) @@
1905		}
1906
1907
1908		class PicturesInBoxes(GenericNavigableComic):
1909		"""Class to retrieve Pictures In Boxes comics."""
1910		# Also on https://picturesinboxescomic.tumblr.com
1911		name = 'picturesinboxes'
1912		long_name = 'Pictures in Boxes'
1913		url = 'http://www.picturesinboxes.com'
1914		get_navi_link = get_a_navi_navinext
1915		get_first_comic_link = simulate_first_link
1916		first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'
1917
1918		@classmethod
1919		def get_comic_info(cls, soup, link):
1920		"""Get information about a particular comics."""
1921		title = soup.find('h2', class_='post-title').string
1922		author = soup.find("span", class_="post-author").find("a").string
1923		date_str = soup.find('span', class_='post-date').string
1924		day = string_to_date(date_str, '%B %d, %Y')
1925		imgs = soup.find('div', class_='comicpane').find_all('img')
1926		assert imgs
1927		assert all(i['title'] == i['alt'] == title for i in imgs)
1928		return {
1929		'day': day.day,
1930		'month': day.month,
1931		'year': day.year,
1932		'img': [i['src'] for i in imgs],
1933		'title': title,
1934		'author': author,
1935		}
1936
1937
		@@ 2895-2919 (lines=25) @@
2892		first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'
2893
2894
2895		class GenericBoumerie(GenericNavigableComic):
2896		"""Generic class to retrieve Boumeries comics in different languages."""
2897		# Also on http://boumeries.tumblr.com
2898		get_first_comic_link = get_a_navi_navifirst
2899		get_navi_link = get_link_rel_next
2900		date_format = NotImplemented
2901		lang = NotImplemented
2902
2903		@classmethod
2904		def get_comic_info(cls, soup, link):
2905		"""Get information about a particular comics."""
2906		title = soup.find('h2', class_='post-title').string
2907		short_url = soup.find('link', rel='shortlink')['href']
2908		author = soup.find("span", class_="post-author").find("a").string
2909		date_str = soup.find('span', class_='post-date').string
2910		day = string_to_date(date_str, cls.date_format, cls.lang)
2911		imgs = soup.find('div', id='comic').find_all('img')
2912		assert all(i['alt'] == i['title'] for i in imgs)
2913		return {
2914		'short_url': short_url,
2915		'img': [i['src'] for i in imgs],
2916		'title': title,
2917		'author': author,
2918		'month': day.month,
2919		'year': day.year,
2920		'day': day.day,
2921		}
2922
		@@ 2641-2666 (lines=26) @@
2638		}
2639
2640
2641		class TheAwkwardYeti(GenericNavigableComic):
2642		"""Class to retrieve The Awkward Yeti comics."""
2643		# Also on http://www.gocomics.com/the-awkward-yeti
2644		# Also on http://larstheyeti.tumblr.com
2645		# Also on https://tapastic.com/series/TheAwkwardYeti
2646		name = 'yeti'
2647		long_name = 'The Awkward Yeti'
2648		url = 'http://theawkwardyeti.com'
2649		_categories = ('YETI', )
2650		get_first_comic_link = get_a_navi_navifirst
2651		get_navi_link = get_link_rel_next
2652
2653		@classmethod
2654		def get_comic_info(cls, soup, link):
2655		"""Get information about a particular comics."""
2656		title = soup.find('h2', class_='post-title').string
2657		date_str = soup.find("span", class_="post-date").string
2658		day = string_to_date(date_str, "%B %d, %Y")
2659		imgs = soup.find("div", id="comic").find_all("img")
2660		assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
2661		return {
2662		'img': [i['src'] for i in imgs],
2663		'title': title,
2664		'day': day.day,
2665		'month': day.month,
2666		'year': day.year
2667		}
2668
2669
		@@ 2554-2579 (lines=26) @@
2551		}
2552
2553
2554		class GerbilWithAJetpack(GenericNavigableComic):
2555		"""Class to retrieve GerbilWithAJetpack comics."""
2556		name = 'gerbil'
2557		long_name = 'Gerbil With A Jetpack'
2558		url = 'http://gerbilwithajetpack.com'
2559		get_first_comic_link = get_a_navi_navifirst
2560		get_navi_link = get_a_rel_next
2561
2562		@classmethod
2563		def get_comic_info(cls, soup, link):
2564		"""Get information about a particular comics."""
2565		title = soup.find('h2', class_='post-title').string
2566		author = soup.find("span", class_="post-author").find("a").string
2567		date_str = soup.find("span", class_="post-date").string
2568		day = string_to_date(date_str, "%B %d, %Y")
2569		imgs = soup.find("div", id="comic").find_all("img")
2570		alt = imgs[0]['alt']
2571		assert all(i['alt'] == i['title'] == alt for i in imgs)
2572		return {
2573		'img': [i['src'] for i in imgs],
2574		'title': title,
2575		'alt': alt,
2576		'author': author,
2577		'day': day.day,
2578		'month': day.month,
2579		'year': day.year
2580		}
2581
2582
		@@ 2583-2607 (lines=25) @@
2580		}
2581
2582
2583		class EveryDayBlues(GenericDeletedComic, GenericNavigableComic):
2584		"""Class to retrieve EveryDayBlues Comics."""
2585		name = "blues"
2586		long_name = "Every Day Blues"
2587		url = "http://everydayblues.net"
2588		get_first_comic_link = get_a_navi_navifirst
2589		get_navi_link = get_link_rel_next
2590
2591		@classmethod
2592		def get_comic_info(cls, soup, link):
2593		"""Get information about a particular comics."""
2594		title = soup.find("h2", class_="post-title").string
2595		author = soup.find("span", class_="post-author").find("a").string
2596		date_str = soup.find("span", class_="post-date").string
2597		day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2598		imgs = soup.find("div", id="comic").find_all("img")
2599		assert all(i['alt'] == i['title'] == title for i in imgs)
2600		assert len(imgs) <= 1, imgs
2601		return {
2602		'img': [i['src'] for i in imgs],
2603		'title': title,
2604		'author': author,
2605		'day': day.day,
2606		'month': day.month,
2607		'year': day.year
2608		}
2609
2610
		@@ 1796-1820 (lines=25) @@
1793		}
1794
1795
1796		class MouseBearComedy(GenericComicNotWorking): # Website has changed
1797		"""Class to retrieve Mouse Bear Comedy comics."""
1798		# Also on http://mousebearcomedy.tumblr.com
1799		name = 'mousebear'
1800		long_name = 'Mouse Bear Comedy'
1801		url = 'http://www.mousebearcomedy.com'
1802		get_first_comic_link = get_a_navi_navifirst
1803		get_navi_link = get_a_navi_comicnavnext_navinext
1804
1805		@classmethod
1806		def get_comic_info(cls, soup, link):
1807		"""Get information about a particular comics."""
1808		title = soup.find('h2', class_='post-title').string
1809		author = soup.find("span", class_="post-author").find("a").string
1810		date_str = soup.find("span", class_="post-date").string
1811		day = string_to_date(date_str, '%B %d, %Y')
1812		imgs = soup.find("div", id="comic").find_all("img")
1813		assert all(i['alt'] == i['title'] == title for i in imgs)
1814		return {
1815		'day': day.day,
1816		'month': day.month,
1817		'year': day.year,
1818		'img': [i['src'] for i in imgs],
1819		'title': title,
1820		'author': author,
1821		}
1822
1823
		@@ 1206-1229 (lines=24) @@
1203		url = 'http://english.bouletcorp.com'
1204
1205
1206		class AmazingSuperPowers(GenericNavigableComic):
1207		"""Class to retrieve Amazing Super Powers comics."""
1208		name = 'asp'
1209		long_name = 'Amazing Super Powers'
1210		url = 'http://www.amazingsuperpowers.com'
1211		get_first_comic_link = get_a_navi_navifirst
1212		get_navi_link = get_a_navi_navinext
1213
1214		@classmethod
1215		def get_comic_info(cls, soup, link):
1216		"""Get information about a particular comics."""
1217		author = soup.find("span", class_="post-author").find("a").string
1218		date_str = soup.find('span', class_='post-date').string
1219		day = string_to_date(date_str, "%B %d, %Y")
1220		imgs = soup.find('div', id='comic').find_all('img')
1221		title = ' '.join(i['title'] for i in imgs)
1222		assert all(i['alt'] == i['title'] for i in imgs)
1223		return {
1224		'title': title,
1225		'author': author,
1226		'img': [img['src'] for img in imgs],
1227		'day': day.day,
1228		'month': day.month,
1229		'year': day.year
1230		}
1231
1232
		@@ 2611-2637 (lines=27) @@
2608		}
2609
2610
2611		class BiterComics(GenericNavigableComic):
2612		"""Class to retrieve Biter Comics."""
2613		name = "biter"
2614		long_name = "Biter Comics"
2615		url = "http://www.bitercomics.com"
2616		get_first_comic_link = get_a_navi_navifirst
2617		get_navi_link = get_link_rel_next
2618
2619		@classmethod
2620		def get_comic_info(cls, soup, link):
2621		"""Get information about a particular comics."""
2622		title = soup.find("h1", class_="entry-title").string
2623		author = soup.find("span", class_="author vcard").find("a").string
2624		date_str = soup.find("span", class_="entry-date").string
2625		day = string_to_date(date_str, "%B %d, %Y")
2626		imgs = soup.find("div", id="comic").find_all("img")
2627		assert all(i['alt'] == i['title'] for i in imgs)
2628		assert len(imgs) == 1, imgs
2629		alt = imgs[0]['alt']
2630		return {
2631		'img': [i['src'] for i in imgs],
2632		'title': title,
2633		'alt': alt,
2634		'author': author,
2635		'day': day.day,
2636		'month': day.month,
2637		'year': day.year
2638		}
2639
2640
		@@ 2264-2289 (lines=26) @@
2261		return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2262
2263
2264		class HappleTea(GenericNavigableComic):
2265		"""Class to retrieve Happle Tea Comics."""
2266		name = 'happletea'
2267		long_name = 'Happle Tea'
2268		url = 'http://www.happletea.com'
2269		get_first_comic_link = get_a_navi_navifirst
2270		get_navi_link = get_link_rel_next
2271
2272		@classmethod
2273		def get_comic_info(cls, soup, link):
2274		"""Get information about a particular comics."""
2275		imgs = soup.find('div', id='comic').find_all('img')
2276		post = soup.find('div', class_='post-content')
2277		title = post.find('h2', class_='post-title').string
2278		author = post.find('a', rel='author').string
2279		date_str = post.find('span', class_='post-date').string
2280		day = string_to_date(date_str, "%B %d, %Y")
2281		assert all(i['alt'] == i['title'] for i in imgs)
2282		return {
2283		'title': title,
2284		'img': [i['src'] for i in imgs],
2285		'alt': ''.join(i['alt'] for i in imgs),
2286		'month': day.month,
2287		'year': day.year,
2288		'day': day.day,
2289		'author': author,
2290		}
2291
2292
		@@ 2445-2469 (lines=25) @@
2442		}
2443
2444
2445		class LonnieMillsap(GenericNavigableComic):
2446		"""Class to retrieve Lonnie Millsap's comics."""
2447		name = 'millsap'
2448		long_name = 'Lonnie Millsap'
2449		url = 'http://www.lonniemillsap.com'
2450		get_navi_link = get_link_rel_next
2451		get_first_comic_link = simulate_first_link
2452		first_url = 'http://www.lonniemillsap.com/?p=42'
2453
2454		@classmethod
2455		def get_comic_info(cls, soup, link):
2456		"""Get information about a particular comics."""
2457		title = soup.find('h2', class_='post-title').string
2458		post = soup.find('div', class_='post-content')
2459		author = post.find("span", class_="post-author").find("a").string
2460		date_str = post.find("span", class_="post-date").string
2461		day = string_to_date(date_str, "%B %d, %Y")
2462		imgs = post.find("div", class_="entry").find_all("img")
2463		return {
2464		'title': title,
2465		'author': author,
2466		'img': [i['src'] for i in imgs],
2467		'month': day.month,
2468		'year': day.year,
2469		'day': day.day,
2470		}
2471
2472
		@@ 3306-3329 (lines=24) @@
3303		}
3304
3305
3306		class Ubertool(GenericNavigableComic):
3307		"""Class to retrieve Ubertool comics."""
3308		# Also on https://ubertool.tumblr.com
3309		# Also on https://tapastic.com/series/ubertool
3310		name = 'ubertool'
3311		long_name = 'Ubertool'
3312		url = 'http://ubertoolcomic.com'
3313		_categories = ('UBERTOOL', )
3314		get_first_comic_link = get_a_comicnavbase_comicnavfirst
3315		get_navi_link = get_a_comicnavbase_comicnavnext
3316
3317		@classmethod
3318		def get_comic_info(cls, soup, link):
3319		"""Get information about a particular comics."""
3320		title = soup.find('h2', class_='post-title').string
3321		date_str = soup.find('span', class_='post-date').string
3322		day = string_to_date(date_str, "%B %d, %Y")
3323		imgs = soup.find('div', id='comic').find_all('img')
3324		return {
3325		'img': [i['src'] for i in imgs],
3326		'title': title,
3327		'month': day.month,
3328		'year': day.year,
3329		'day': day.day,
3330		}
3331
3332
		@@ 3607-3627 (lines=21) @@
3604		}
3605
3606
3607		class Octopuns(GenericBlogspotComic):
3608		"""Class to retrieve Octopuns comics."""
3609		# Also on http://octopuns.tumblr.com
3610		name = 'octopuns'
3611		long_name = 'Octopuns'
3612		url = 'http://www.octopuns.net' # or http://octopuns.blogspot.fr/
3613		first_url = 'http://octopuns.blogspot.com/2010/12/17122010-always-read-label.html'
3614
3615		@classmethod
3616		def get_comic_info(cls, soup, link):
3617		"""Get information about a particular comics."""
3618		title = soup.find('h3', class_='post-title entry-title').string
3619		date_str = soup.find('h2', class_='date-header').string
3620		day = string_to_date(date_str, "%A, %B %d, %Y")
3621		imgs = soup.find_all('link', rel='image_src')
3622		return {
3623		'img': [i['href'] for i in imgs],
3624		'title': title,
3625		'day': day.day,
3626		'month': day.month,
3627		'year': day.year,
3628		}
3629
3630
		@@ 3263-3283 (lines=21) @@
3260		}
3261
3262
3263		class ManVersusManatee(GenericNavigableComic):
3264		"""Class to retrieve Man Versus Manatee comics."""
3265		url = 'http://manvsmanatee.com'
3266		name = 'manvsmanatee'
3267		long_name = 'Man Versus Manatee'
3268		get_first_comic_link = get_a_comicnavbase_comicnavfirst
3269		get_navi_link = get_a_comicnavbase_comicnavnext
3270
3271		@classmethod
3272		def get_comic_info(cls, soup, link):
3273		"""Get information about a particular comics."""
3274		title = soup.find('h2', class_='post-title').string
3275		imgs = soup.find('div', id='comic').find_all('img')
3276		date_str = soup.find('span', class_='post-date').string
3277		day = string_to_date(date_str, "%B %d, %Y")
3278		return {
3279		'img': [i['src'] for i in imgs],
3280		'title': title,
3281		'month': day.month,
3282		'year': day.year,
3283		'day': day.day,
3284		}
3285
3286
		@@ 2813-2833 (lines=21) @@
2810		}
2811
2812
2813		class PlanC(GenericNavigableComic):
2814		"""Class to retrieve Plan C comics."""
2815		name = 'planc'
2816		long_name = 'Plan C'
2817		url = 'http://www.plancomic.com'
2818		get_first_comic_link = get_a_navi_navifirst
2819		get_navi_link = get_a_navi_comicnavnext_navinext
2820
2821		@classmethod
2822		def get_comic_info(cls, soup, link):
2823		"""Get information about a particular comics."""
2824		title = soup.find('h2', class_='post-title').string
2825		date_str = soup.find("span", class_="post-date").string
2826		day = string_to_date(date_str, "%B %d, %Y")
2827		imgs = soup.find('div', id='comic').find_all('img')
2828		return {
2829		'title': title,
2830		'img': [i['src'] for i in imgs],
2831		'month': day.month,
2832		'year': day.year,
2833		'day': day.day,
2834		}
2835
2836
		@@ 1749-1769 (lines=21) @@
1746		}
1747
1748
1749		class WarehouseComic(GenericNavigableComic):
1750		"""Class to retrieve Warehouse Comic comics."""
1751		name = 'warehouse'
1752		long_name = 'Warehouse Comic'
1753		url = 'http://warehousecomic.com'
1754		get_first_comic_link = get_a_navi_navifirst
1755		get_navi_link = get_link_rel_next
1756
1757		@classmethod
1758		def get_comic_info(cls, soup, link):
1759		"""Get information about a particular comics."""
1760		title = soup.find('h2', class_='post-title').string
1761		date_str = soup.find('span', class_='post-date').string
1762		day = string_to_date(date_str, "%B %d, %Y")
1763		imgs = soup.find('div', id='comic').find_all('img')
1764		return {
1765		'img': [i['src'] for i in imgs],
1766		'title': title,
1767		'day': day.day,
1768		'month': day.month,
1769		'year': day.year,
1770		}
1771
1772

SylvainDe / ComicBookMaker

Code Duplication Length = 21-27 lines in 19 locations

comics.py 19 locations