Code Duplication - SylvainDe/ComicBookMaker - Measure and Improve Code Quality continuously with Scrutinizer

Code Duplication Length = 21-28 lines in 14 locations

comics.py 14 locations


        }


class SafelyEndangered(GenericNavigableComic):
    """Class to retrieve Safely Endangered comics."""
    # Also on http://tumblr.safelyendangered.com
    name = 'endangered'
    long_name = 'Safely Endangered'
    url = 'http://www.safelyendangered.com'
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'http://www.safelyendangered.com/comic/ignored/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        date_str = soup.find('span', class_='post-date').string
        day = string_to_date(date_str, '%B %d, %Y')
        imgs = soup.find('div', id='comic').find_all('img')
        alt = imgs[0]['alt']
        assert all(i['alt'] == i['title'] for i in imgs)
        return {
            'day': day.day,
            'month': day.month,
            'year': day.year,
            'img': [i['src'] for i in imgs],
            'title': title,
            'alt': alt,
        }



        }


class PicturesInBoxes(GenericNavigableComic):
    """Class to retrieve Pictures In Boxes comics."""
    # Also on http://picturesinboxescomic.tumblr.com
    name = 'picturesinboxes'
    long_name = 'Pictures in Boxes'
    url = 'http://www.picturesinboxes.com'
    get_navi_link = get_a_navi_navinext
    get_first_comic_link = simulate_first_link
    first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        author = soup.find("span", class_="post-author").find("a").string
        date_str = soup.find('span', class_='post-date').string
        day = string_to_date(date_str, '%B %d, %Y')
        imgs = soup.find('div', class_='comicpane').find_all('img')
        assert imgs
        assert all(i['title'] == i['alt'] == title for i in imgs)
        return {
            'day': day.day,
            'month': day.month,
            'year': day.year,
            'img': [i['src'] for i in imgs],
            'title': title,
            'author': author,
        }



        }


class TalesOfAbsurdity(GenericNavigableComic):
    """Class to retrieve Tales Of Absurdity comics."""
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
    # Also on http://talesofabsurdity.tumblr.com
    name = 'absurdity'
    long_name = 'Tales of Absurdity'
    url = 'http://talesofabsurdity.com'
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_a_navi_comicnavnext_navinext

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        author = soup.find("span", class_="post-author").find("a").string
        date_str = soup.find("span", class_="post-date").string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find("div", id="comic").find_all("img")
        assert all(i['alt'] == i['title'] for i in imgs)
        alt = imgs[0]['alt'] if imgs else ""
        return {
            'img': [i['src'] for i in imgs],
            'title': title,
            'alt': alt,
            'author': author,
            'day': day.day,
            'month': day.month,
            'year': day.year
        }



        }


class Optipess(GenericNavigableComic):
    """Class to retrieve Optipess comics."""
    name = 'optipess'
    long_name = 'Optipess'
    url = 'http://www.optipess.com'
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        author = soup.find("span", class_="post-author").find("a").string
        comic = soup.find('div', id='comic')
        imgs = comic.find_all('img') if comic else []
        alt = imgs[0]['title'] if imgs else ""
        assert all(i['alt'] == i['title'] == alt for i in imgs)
        date_str = soup.find('span', class_='post-date').string
        day = string_to_date(date_str, "%B %d, %Y")
        return {
            'title': title,
            'alt': alt,
            'author': author,
            'img': [i['src'] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }



        }


class LastPlaceComics(GenericNavigableComic):
    """Class to retrieve Last Place Comics."""
    name = 'lastplace'
    long_name = 'Last Place Comics'
    url = "http://lastplacecomics.com"
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        author = soup.find("span", class_="post-author").find("a").string
        date_str = soup.find("span", class_="post-date").string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find("div", id="comic").find_all("img")
        assert all(i['alt'] == i['title'] for i in imgs)
        assert len(imgs) <= 1
        alt = imgs[0]['alt'] if imgs else ""
        return {
            'img': [i['src'] for i in imgs],
            'title': title,
            'alt': alt,
            'author': author,
            'day': day.day,
            'month': day.month,
            'year': day.year
        }



        }


class GerbilWithAJetpack(GenericNavigableComic):
    """Class to retrieve GerbilWithAJetpack comics."""
    name = 'gerbil'
    long_name = 'Gerbil With A Jetpack'
    url = 'http://gerbilwithajetpack.com'
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_a_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        author = soup.find("span", class_="post-author").find("a").string
        date_str = soup.find("span", class_="post-date").string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find("div", id="comic").find_all("img")
        alt = imgs[0]['alt']
        assert all(i['alt'] == i['title'] == alt for i in imgs)
        return {
            'img': [i['src'] for i in imgs],
            'title': title,
            'alt': alt,
            'author': author,
            'day': day.day,
            'month': day.month,
            'year': day.year
        }



        }


class ChuckleADuck(GenericNavigableComic):
    """Class to retrieve Chuckle-A-Duck comics."""
    name = 'chuckleaduck'
    long_name = 'Chuckle-A-duck'
    url = 'http://chuckleaduck.com'
    get_first_comic_link = get_div_navfirst_a
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        date_str = soup.find('span', class_='post-date').string
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
        author = soup.find('span', class_='post-author').string
        div = soup.find('div', id='comic')
        imgs = div.find_all('img') if div else []
        title = imgs[0]['title'] if imgs else ""
        assert all(i['title'] == i['alt'] == title for i in imgs)
        return {
            'month': day.month,
            'year': day.year,
            'day': day.day,
            'img': [i['src'] for i in imgs],
            'title': title,
            'author': author,
        }



        }


class BiterComics(GenericNavigableComic):
    """Class to retrieve Biter Comics."""
    name = "biter"
    long_name = "Biter Comics"
    url = "http://www.bitercomics.com"
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find("h1", class_="entry-title").string
        author = soup.find("span", class_="author vcard").find("a").string
        date_str = soup.find("span", class_="entry-date").string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find("div", id="comic").find_all("img")
        assert all(i['alt'] == i['title'] for i in imgs)
        assert len(imgs) == 1
        alt = imgs[0]['alt']
        return {
            'img': [i['src'] for i in imgs],
            'title': title,
            'alt': alt,
            'author': author,
            'day': day.day,
            'month': day.month,
            'year': day.year
        }



    url = 'http://tubeytoons.com'


class CompletelySeriousComics(GenericNavigableComic):
    """Class to retrieve Completely Serious comics."""
    name = 'completelyserious'
    long_name = 'Completely Serious Comics'
    url = 'http://completelyseriouscomics.com'
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_a_navi_navinext

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        author = soup.find('span', class_='post-author').contents[1].string
        date_str = soup.find('span', class_='post-date').string
        day = string_to_date(date_str, '%B %d, %Y')
        imgs = soup.find('div', class_='comicpane').find_all('img')
        assert imgs
        alt = imgs[0]['title']
        assert all(i['title'] == i['alt'] == alt for i in imgs)
        return {
            'month': day.month,
            'year': day.year,
            'day': day.day,
            'img': [i['src'] for i in imgs],
            'title': title,
            'alt': alt,
            'author': author,
        }



        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))


class HappleTea(GenericNavigableComic):
    """Class to retrieve Happle Tea Comics."""
    name = 'happletea'
    long_name = 'Happle Tea'
    url = 'http://www.happletea.com'
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        imgs = soup.find('div', id='comic').find_all('img')
        post = soup.find('div', class_='post-content')
        title = post.find('h2', class_='post-title').string
        author = post.find('a', rel='author').string
        date_str = post.find('span', class_='post-date').string
        day = string_to_date(date_str, "%B %d, %Y")
        assert all(i['alt'] == i['title'] for i in imgs)
        return {
            'title': title,
            'img': [i['src'] for i in imgs],
            'alt': ''.join(i['alt'] for i in imgs),
            'month': day.month,
            'year': day.year,
            'day': day.day,
            'author': author,
        }



    first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'


class GenericBoumerie(GenericNavigableComic):
    """Generic class to retrieve Boumeries comics in different languages."""
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_link_rel_next
    date_format = NotImplemented
    lang = NotImplemented

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        short_url = soup.find('link', rel='shortlink')['href']
        author = soup.find("span", class_="post-author").find("a").string
        date_str = soup.find('span', class_='post-date').string
        day = string_to_date(date_str, cls.date_format, cls.lang)
        imgs = soup.find('div', id='comic').find_all('img')
        assert all(i['alt'] == i['title'] for i in imgs)
        return {
            'short_url': short_url,
            'img': [i['src'] for i in imgs],
            'title': title,
            'author': author,
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }



        }


class LinsEditions(GenericNavigableComic):
    """Class to retrieve L.I.N.S. Editions comics."""
    # Also on http://linscomics.tumblr.com
    name = 'lins'
    long_name = 'L.I.N.S. Editions'
    url = 'https://linsedition.com'
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('meta', property='og:title')['content']
        imgs = soup.find_all('meta', property='og:image')
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        return {
            'title': title,
            'img': [i['content'] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }



        return []


class ExtraFabulousComics(GenericNavigableComic):
    """Class to retrieve Extra Fabulous Comics."""
    name = 'efc'
    long_name = 'Extra Fabulous Comics'
    url = 'http://extrafabulouscomics.com'
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
        imgs = soup.find_all('img', src=img_src_re)
        title = soup.find('meta', property='og:title')['content']
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        return {
            'title': title,
            'img': [i['src'] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
            'prefix': title + '-'
        }



        }


class GenericLeMondeBlog(GenericNavigableComic):
    """Generic class to retrieve comics from Le Monde blogs."""
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = NotImplemented

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        url2 = soup.find('link', rel='shortlink')['href']
        title = soup.find('meta', property='og:title')['content']
        date_str = soup.find("span", class_="entry-date").string
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
        imgs = soup.find_all('meta', property='og:image')
        return {
            'title': title,
            'url2': url2,
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }



		@@ 1781-1806 (lines=26) @@
1778		}
1779
1780
1781		class SafelyEndangered(GenericNavigableComic):
1782		"""Class to retrieve Safely Endangered comics."""
1783		# Also on http://tumblr.safelyendangered.com
1784		name = 'endangered'
1785		long_name = 'Safely Endangered'
1786		url = 'http://www.safelyendangered.com'
1787		get_navi_link = get_link_rel_next
1788		get_first_comic_link = simulate_first_link
1789		first_url = 'http://www.safelyendangered.com/comic/ignored/'
1790
1791		@classmethod
1792		def get_comic_info(cls, soup, link):
1793		"""Get information about a particular comics."""
1794		title = soup.find('h2', class_='post-title').string
1795		date_str = soup.find('span', class_='post-date').string
1796		day = string_to_date(date_str, '%B %d, %Y')
1797		imgs = soup.find('div', id='comic').find_all('img')
1798		alt = imgs[0]['alt']
1799		assert all(i['alt'] == i['title'] for i in imgs)
1800		return {
1801		'day': day.day,
1802		'month': day.month,
1803		'year': day.year,
1804		'img': [i['src'] for i in imgs],
1805		'title': title,
1806		'alt': alt,
1807		}
1808
1809
		@@ 1810-1836 (lines=27) @@
1807		}
1808
1809
1810		class PicturesInBoxes(GenericNavigableComic):
1811		"""Class to retrieve Pictures In Boxes comics."""
1812		# Also on http://picturesinboxescomic.tumblr.com
1813		name = 'picturesinboxes'
1814		long_name = 'Pictures in Boxes'
1815		url = 'http://www.picturesinboxes.com'
1816		get_navi_link = get_a_navi_navinext
1817		get_first_comic_link = simulate_first_link
1818		first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'
1819
1820		@classmethod
1821		def get_comic_info(cls, soup, link):
1822		"""Get information about a particular comics."""
1823		title = soup.find('h2', class_='post-title').string
1824		author = soup.find("span", class_="post-author").find("a").string
1825		date_str = soup.find('span', class_='post-date').string
1826		day = string_to_date(date_str, '%B %d, %Y')
1827		imgs = soup.find('div', class_='comicpane').find_all('img')
1828		assert imgs
1829		assert all(i['title'] == i['alt'] == title for i in imgs)
1830		return {
1831		'day': day.day,
1832		'month': day.month,
1833		'year': day.year,
1834		'img': [i['src'] for i in imgs],
1835		'title': title,
1836		'author': author,
1837		}
1838
1839
		@@ 2518-2545 (lines=28) @@
2515		}
2516
2517
2518		class TalesOfAbsurdity(GenericNavigableComic):
2519		"""Class to retrieve Tales Of Absurdity comics."""
2520		# Also on http://tapastic.com/series/Tales-Of-Absurdity
2521		# Also on http://talesofabsurdity.tumblr.com
2522		name = 'absurdity'
2523		long_name = 'Tales of Absurdity'
2524		url = 'http://talesofabsurdity.com'
2525		get_first_comic_link = get_a_navi_navifirst
2526		get_navi_link = get_a_navi_comicnavnext_navinext
2527
2528		@classmethod
2529		def get_comic_info(cls, soup, link):
2530		"""Get information about a particular comics."""
2531		title = soup.find('h2', class_='post-title').string
2532		author = soup.find("span", class_="post-author").find("a").string
2533		date_str = soup.find("span", class_="post-date").string
2534		day = string_to_date(date_str, "%B %d, %Y")
2535		imgs = soup.find("div", id="comic").find_all("img")
2536		assert all(i['alt'] == i['title'] for i in imgs)
2537		alt = imgs[0]['alt'] if imgs else ""
2538		return {
2539		'img': [i['src'] for i in imgs],
2540		'title': title,
2541		'alt': alt,
2542		'author': author,
2543		'day': day.day,
2544		'month': day.month,
2545		'year': day.year
2546		}
2547
2548
		@@ 2738-2764 (lines=27) @@
2735		}
2736
2737
2738		class Optipess(GenericNavigableComic):
2739		"""Class to retrieve Optipess comics."""
2740		name = 'optipess'
2741		long_name = 'Optipess'
2742		url = 'http://www.optipess.com'
2743		get_first_comic_link = get_a_navi_navifirst
2744		get_navi_link = get_link_rel_next
2745
2746		@classmethod
2747		def get_comic_info(cls, soup, link):
2748		"""Get information about a particular comics."""
2749		title = soup.find('h2', class_='post-title').string
2750		author = soup.find("span", class_="post-author").find("a").string
2751		comic = soup.find('div', id='comic')
2752		imgs = comic.find_all('img') if comic else []
2753		alt = imgs[0]['title'] if imgs else ""
2754		assert all(i['alt'] == i['title'] == alt for i in imgs)
2755		date_str = soup.find('span', class_='post-date').string
2756		day = string_to_date(date_str, "%B %d, %Y")
2757		return {
2758		'title': title,
2759		'alt': alt,
2760		'author': author,
2761		'img': [i['src'] for i in imgs],
2762		'month': day.month,
2763		'year': day.year,
2764		'day': day.day,
2765		}
2766
2767
		@@ 2488-2514 (lines=27) @@
2485		}
2486
2487
2488		class LastPlaceComics(GenericNavigableComic):
2489		"""Class to retrieve Last Place Comics."""
2490		name = 'lastplace'
2491		long_name = 'Last Place Comics'
2492		url = "http://lastplacecomics.com"
2493		get_first_comic_link = get_a_comicnavbase_comicnavfirst
2494		get_navi_link = get_link_rel_next
2495
2496		@classmethod
2497		def get_comic_info(cls, soup, link):
2498		"""Get information about a particular comics."""
2499		title = soup.find('h2', class_='post-title').string
2500		author = soup.find("span", class_="post-author").find("a").string
2501		date_str = soup.find("span", class_="post-date").string
2502		day = string_to_date(date_str, "%B %d, %Y")
2503		imgs = soup.find("div", id="comic").find_all("img")
2504		assert all(i['alt'] == i['title'] for i in imgs)
2505		assert len(imgs) <= 1
2506		alt = imgs[0]['alt'] if imgs else ""
2507		return {
2508		'img': [i['src'] for i in imgs],
2509		'title': title,
2510		'alt': alt,
2511		'author': author,
2512		'day': day.day,
2513		'month': day.month,
2514		'year': day.year
2515		}
2516
2517
		@@ 2321-2346 (lines=26) @@
2318		}
2319
2320
2321		class GerbilWithAJetpack(GenericNavigableComic):
2322		"""Class to retrieve GerbilWithAJetpack comics."""
2323		name = 'gerbil'
2324		long_name = 'Gerbil With A Jetpack'
2325		url = 'http://gerbilwithajetpack.com'
2326		get_first_comic_link = get_a_navi_navifirst
2327		get_navi_link = get_a_rel_next
2328
2329		@classmethod
2330		def get_comic_info(cls, soup, link):
2331		"""Get information about a particular comics."""
2332		title = soup.find('h2', class_='post-title').string
2333		author = soup.find("span", class_="post-author").find("a").string
2334		date_str = soup.find("span", class_="post-date").string
2335		day = string_to_date(date_str, "%B %d, %Y")
2336		imgs = soup.find("div", id="comic").find_all("img")
2337		alt = imgs[0]['alt']
2338		assert all(i['alt'] == i['title'] == alt for i in imgs)
2339		return {
2340		'img': [i['src'] for i in imgs],
2341		'title': title,
2342		'alt': alt,
2343		'author': author,
2344		'day': day.day,
2345		'month': day.month,
2346		'year': day.year
2347		}
2348
2349
		@@ 2019-2043 (lines=25) @@
2016		}
2017
2018
2019		class ChuckleADuck(GenericNavigableComic):
2020		"""Class to retrieve Chuckle-A-Duck comics."""
2021		name = 'chuckleaduck'
2022		long_name = 'Chuckle-A-duck'
2023		url = 'http://chuckleaduck.com'
2024		get_first_comic_link = get_div_navfirst_a
2025		get_navi_link = get_link_rel_next
2026
2027		@classmethod
2028		def get_comic_info(cls, soup, link):
2029		"""Get information about a particular comics."""
2030		date_str = soup.find('span', class_='post-date').string
2031		day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
2032		author = soup.find('span', class_='post-author').string
2033		div = soup.find('div', id='comic')
2034		imgs = div.find_all('img') if div else []
2035		title = imgs[0]['title'] if imgs else ""
2036		assert all(i['title'] == i['alt'] == title for i in imgs)
2037		return {
2038		'month': day.month,
2039		'year': day.year,
2040		'day': day.day,
2041		'img': [i['src'] for i in imgs],
2042		'title': title,
2043		'author': author,
2044		}
2045
2046
		@@ 2378-2404 (lines=27) @@
2375		}
2376
2377
2378		class BiterComics(GenericNavigableComic):
2379		"""Class to retrieve Biter Comics."""
2380		name = "biter"
2381		long_name = "Biter Comics"
2382		url = "http://www.bitercomics.com"
2383		get_first_comic_link = get_a_navi_navifirst
2384		get_navi_link = get_link_rel_next
2385
2386		@classmethod
2387		def get_comic_info(cls, soup, link):
2388		"""Get information about a particular comics."""
2389		title = soup.find("h1", class_="entry-title").string
2390		author = soup.find("span", class_="author vcard").find("a").string
2391		date_str = soup.find("span", class_="entry-date").string
2392		day = string_to_date(date_str, "%B %d, %Y")
2393		imgs = soup.find("div", id="comic").find_all("img")
2394		assert all(i['alt'] == i['title'] for i in imgs)
2395		assert len(imgs) == 1
2396		alt = imgs[0]['alt']
2397		return {
2398		'img': [i['src'] for i in imgs],
2399		'title': title,
2400		'alt': alt,
2401		'author': author,
2402		'day': day.day,
2403		'month': day.month,
2404		'year': day.year
2405		}
2406
2407
		@@ 1932-1958 (lines=27) @@
1929		url = 'http://tubeytoons.com'
1930
1931
1932		class CompletelySeriousComics(GenericNavigableComic):
1933		"""Class to retrieve Completely Serious comics."""
1934		name = 'completelyserious'
1935		long_name = 'Completely Serious Comics'
1936		url = 'http://completelyseriouscomics.com'
1937		get_first_comic_link = get_a_navi_navifirst
1938		get_navi_link = get_a_navi_navinext
1939
1940		@classmethod
1941		def get_comic_info(cls, soup, link):
1942		"""Get information about a particular comics."""
1943		title = soup.find('h2', class_='post-title').string
1944		author = soup.find('span', class_='post-author').contents[1].string
1945		date_str = soup.find('span', class_='post-date').string
1946		day = string_to_date(date_str, '%B %d, %Y')
1947		imgs = soup.find('div', class_='comicpane').find_all('img')
1948		assert imgs
1949		alt = imgs[0]['title']
1950		assert all(i['title'] == i['alt'] == alt for i in imgs)
1951		return {
1952		'month': day.month,
1953		'year': day.year,
1954		'day': day.day,
1955		'img': [i['src'] for i in imgs],
1956		'title': title,
1957		'alt': alt,
1958		'author': author,
1959		}
1960
1961
		@@ 2119-2144 (lines=26) @@
2116		return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2117
2118
2119		class HappleTea(GenericNavigableComic):
2120		"""Class to retrieve Happle Tea Comics."""
2121		name = 'happletea'
2122		long_name = 'Happle Tea'
2123		url = 'http://www.happletea.com'
2124		get_first_comic_link = get_a_navi_navifirst
2125		get_navi_link = get_link_rel_next
2126
2127		@classmethod
2128		def get_comic_info(cls, soup, link):
2129		"""Get information about a particular comics."""
2130		imgs = soup.find('div', id='comic').find_all('img')
2131		post = soup.find('div', class_='post-content')
2132		title = post.find('h2', class_='post-title').string
2133		author = post.find('a', rel='author').string
2134		date_str = post.find('span', class_='post-date').string
2135		day = string_to_date(date_str, "%B %d, %Y")
2136		assert all(i['alt'] == i['title'] for i in imgs)
2137		return {
2138		'title': title,
2139		'img': [i['src'] for i in imgs],
2140		'alt': ''.join(i['alt'] for i in imgs),
2141		'month': day.month,
2142		'year': day.year,
2143		'day': day.day,
2144		'author': author,
2145		}
2146
2147
		@@ 2659-2683 (lines=25) @@
2656		first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'
2657
2658
2659		class GenericBoumerie(GenericNavigableComic):
2660		"""Generic class to retrieve Boumeries comics in different languages."""
2661		get_first_comic_link = get_a_navi_navifirst
2662		get_navi_link = get_link_rel_next
2663		date_format = NotImplemented
2664		lang = NotImplemented
2665
2666		@classmethod
2667		def get_comic_info(cls, soup, link):
2668		"""Get information about a particular comics."""
2669		title = soup.find('h2', class_='post-title').string
2670		short_url = soup.find('link', rel='shortlink')['href']
2671		author = soup.find("span", class_="post-author").find("a").string
2672		date_str = soup.find('span', class_='post-date').string
2673		day = string_to_date(date_str, cls.date_format, cls.lang)
2674		imgs = soup.find('div', id='comic').find_all('img')
2675		assert all(i['alt'] == i['title'] for i in imgs)
2676		return {
2677		'short_url': short_url,
2678		'img': [i['src'] for i in imgs],
2679		'title': title,
2680		'author': author,
2681		'month': day.month,
2682		'year': day.year,
2683		'day': day.day,
2684		}
2685
2686
		@@ 2251-2273 (lines=23) @@
2248		}
2249
2250
2251		class LinsEditions(GenericNavigableComic):
2252		"""Class to retrieve L.I.N.S. Editions comics."""
2253		# Also on http://linscomics.tumblr.com
2254		name = 'lins'
2255		long_name = 'L.I.N.S. Editions'
2256		url = 'https://linsedition.com'
2257		get_navi_link = get_link_rel_next
2258		get_first_comic_link = simulate_first_link
2259		first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2260
2261		@classmethod
2262		def get_comic_info(cls, soup, link):
2263		"""Get information about a particular comics."""
2264		title = soup.find('meta', property='og:title')['content']
2265		imgs = soup.find_all('meta', property='og:image')
2266		date_str = soup.find('meta', property='article:published_time')['content'][:10]
2267		day = string_to_date(date_str, "%Y-%m-%d")
2268		return {
2269		'title': title,
2270		'img': [i['content'] for i in imgs],
2271		'month': day.month,
2272		'year': day.year,
2273		'day': day.day,
2274		}
2275
2276
		@@ 338-360 (lines=23) @@
335		return []
336
337
338		class ExtraFabulousComics(GenericNavigableComic):
339		"""Class to retrieve Extra Fabulous Comics."""
340		name = 'efc'
341		long_name = 'Extra Fabulous Comics'
342		url = 'http://extrafabulouscomics.com'
343		get_first_comic_link = get_a_navi_navifirst
344		get_navi_link = get_link_rel_next
345
346		@classmethod
347		def get_comic_info(cls, soup, link):
348		"""Get information about a particular comics."""
349		img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
350		imgs = soup.find_all('img', src=img_src_re)
351		title = soup.find('meta', property='og:title')['content']
352		date_str = soup.find('meta', property='article:published_time')['content'][:10]
353		day = string_to_date(date_str, "%Y-%m-%d")
354		return {
355		'title': title,
356		'img': [i['src'] for i in imgs],
357		'month': day.month,
358		'year': day.year,
359		'day': day.day,
360		'prefix': title + '-'
361		}
362
363
		@@ 364-384 (lines=21) @@
361		}
362
363
364		class GenericLeMondeBlog(GenericNavigableComic):
365		"""Generic class to retrieve comics from Le Monde blogs."""
366		get_navi_link = get_link_rel_next
367		get_first_comic_link = simulate_first_link
368		first_url = NotImplemented
369
370		@classmethod
371		def get_comic_info(cls, soup, link):
372		"""Get information about a particular comics."""
373		url2 = soup.find('link', rel='shortlink')['href']
374		title = soup.find('meta', property='og:title')['content']
375		date_str = soup.find("span", class_="entry-date").string
376		day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
377		imgs = soup.find_all('meta', property='og:image')
378		return {
379		'title': title,
380		'url2': url2,
381		'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
382		'month': day.month,
383		'year': day.year,
384		'day': day.day,
385		}
386
387

SylvainDe / ComicBookMaker

Code Duplication Length = 21-28 lines in 14 locations

comics.py 14 locations