Code Duplication - SylvainDe/ComicBookMaker - Measure and Improve Code Quality continuously with Scrutinizer

Code Duplication Length = 22-29 lines in 16 locations

comics.py 16 locations


    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        post = soup.find('div', class_='post-content')
        author = post.find("span", class_="post-author").find("a").string
        date_str = post.find("span", class_="post-date").string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = post.find("div", class_="entry").find_all("img")
        return {
            'title': title,
            'author': author,
            'img': [i['src'] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }


class LinsEditions(GenericNavigableComic):
    """Class to retrieve L.I.N.S. Editions comics."""
    # Also on http://linscomics.tumblr.com
    # Now on https://warandpeas.com
    name = 'lins'
    long_name = 'L.I.N.S. Editions'
    url = 'https://linsedition.com'
    _categories = ('LINS', )
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'


        }


class Mercworks(GenericNavigableComic):
    """Class to retrieve Mercworks comics."""
    # Also on http://mercworks.tumblr.com
    name = 'mercworks'
    long_name = 'Mercworks'
    url = 'http://mercworks.net'
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('meta', property='og:title')['content']
        metadesc = soup.find('meta', property='og:description')
        desc = metadesc['content'] if metadesc else ""
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        imgs = soup.find_all('meta', property='og:image')
        return {
            'img': [i['content'] for i in imgs],
            'title': title,
            'desc': desc,
            'day': day.day,
            'month': day.month,
            'year': day.year
        }



        return []


class ExtraFabulousComics(GenericNavigableComic):
    """Class to retrieve Extra Fabulous Comics."""
    name = 'efc'
    long_name = 'Extra Fabulous Comics'
    url = 'http://extrafabulouscomics.com'
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
        imgs = soup.find_all('img', src=img_src_re)
        title = soup.find('meta', property='og:title')['content']
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        return {
            'title': title,
            'img': [i['src'] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
            'prefix': title + '-'
        }



        """Get link to first comics."""
        return cls.get_nav(get_soup_at_url(cls.url))[0]

    @classmethod
    def get_navi_link(cls, last_soup, next_):
        """Get link to next or previous comic."""
        return cls.get_nav(last_soup)[3 if next_ else 1]

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = link['title']
        imgs = soup.find_all('img', id='comicimg')
        return {
            'title': title,
            'img': [i['src'] for i in imgs],
        }


class MarketoonistComics(GenericNavigableComic):
    """Class to retrieve Marketoonist Comics."""
    name = 'marketoonist'
    long_name = 'Marketoonist'
    url = 'https://marketoonist.com/cartoons'
    get_first_comic_link = simulate_first_link
    get_navi_link = get_link_rel_next
    first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'


        }


class SafelyEndangered(GenericNavigableComic):
    """Class to retrieve Safely Endangered comics."""
    # Also on http://tumblr.safelyendangered.com
    name = 'endangered'
    long_name = 'Safely Endangered'
    url = 'http://www.safelyendangered.com'
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'http://www.safelyendangered.com/comic/ignored/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        date_str = soup.find('span', class_='post-date').string
        day = string_to_date(date_str, '%B %d, %Y')
        imgs = soup.find('div', id='comic').find_all('img')
        alt = imgs[0]['alt']
        assert all(i['alt'] == i['title'] for i in imgs)
        return {
            'day': day.day,
            'month': day.month,
            'year': day.year,
            'img': [i['src'] for i in imgs],
            'title': title,
            'alt': alt,
        }



        }


class GenericLeMondeBlog(GenericNavigableComic):
    """Generic class to retrieve comics from Le Monde blogs."""
    _categories = ('LEMONDE', 'FRANCAIS')
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = NotImplemented

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        url2 = soup.find('link', rel='shortlink')['href']
        title = soup.find('meta', property='og:title')['content']
        date_str = soup.find("span", class_="entry-date").string
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
        imgs = soup.find_all('meta', property='og:image')
        return {
            'title': title,
            'url2': url2,
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }



        }


class MyExtraLife(GenericNavigableComic):
    """Class to retrieve My Extra Life comics."""
    name = 'extralife'
    long_name = 'My Extra Life'
    url = 'http://www.myextralife.com'
    get_navi_link = get_link_rel_next

    @classmethod
    def get_first_comic_link(cls):
        """Get link to first comics."""
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find("h1", class_="comic_title").string
        date_str = soup.find("span", class_="comic_date").string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find_all("img", class_="comic")
        assert all(i['alt'] == i['title'] == title for i in imgs)
        return {
            'title': title,
            'img': [i['src'] for i in imgs if i["src"]],
            'day': day.day,
            'month': day.month,
            'year': day.year
        }



        }


class TalesOfAbsurdity(GenericNavigableComic):
    """Class to retrieve Tales Of Absurdity comics."""
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
    # Also on http://talesofabsurdity.tumblr.com
    name = 'absurdity'
    long_name = 'Tales of Absurdity'
    url = 'http://talesofabsurdity.com'
    _categories = ('ABSURDITY', )
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_a_navi_comicnavnext_navinext

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        author = soup.find("span", class_="post-author").find("a").string
        date_str = soup.find("span", class_="post-date").string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find("div", id="comic").find_all("img")
        assert all(i['alt'] == i['title'] for i in imgs)
        alt = imgs[0]['alt'] if imgs else ""
        return {
            'img': [i['src'] for i in imgs],
            'title': title,
            'alt': alt,
            'author': author,
            'day': day.day,
            'month': day.month,
            'year': day.year
        }



        }


class MisterAndMe(GenericNavigableComic):
    """Class to retrieve Mister & Me Comics."""
    # Also on http://www.gocomics.com/mister-and-me
    # Also on https://tapastic.com/series/Mister-and-Me
    name = 'mister'
    long_name = 'Mister & Me'
    url = 'http://www.mister-and-me.com'
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        author = soup.find("span", class_="post-author").find("a").string
        date_str = soup.find("span", class_="post-date").string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find("div", id="comic").find_all("img")
        assert all(i['alt'] == i['title'] for i in imgs)
        assert len(imgs) <= 1
        alt = imgs[0]['alt'] if imgs else ""
        return {
            'img': [i['src'] for i in imgs],
            'title': title,
            'alt': alt,
            'author': author,
            'day': day.day,
            'month': day.month,
            'year': day.year
        }



        }


class Optipess(GenericNavigableComic):
    """Class to retrieve Optipess comics."""
    name = 'optipess'
    long_name = 'Optipess'
    url = 'http://www.optipess.com'
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        author = soup.find("span", class_="post-author").find("a").string
        comic = soup.find('div', id='comic')
        imgs = comic.find_all('img') if comic else []
        alt = imgs[0]['title'] if imgs else ""
        assert all(i['alt'] == i['title'] == alt for i in imgs)
        date_str = soup.find('span', class_='post-date').string
        day = string_to_date(date_str, "%B %d, %Y")
        return {
            'title': title,
            'alt': alt,
            'author': author,
            'img': [i['src'] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }



        }


class LastPlaceComics(GenericNavigableComic):
    """Class to retrieve Last Place Comics."""
    name = 'lastplace'
    long_name = 'Last Place Comics'
    url = "http://lastplacecomics.com"
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        author = soup.find("span", class_="post-author").find("a").string
        date_str = soup.find("span", class_="post-date").string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find("div", id="comic").find_all("img")
        assert all(i['alt'] == i['title'] for i in imgs)
        assert len(imgs) <= 1
        alt = imgs[0]['alt'] if imgs else ""
        return {
            'img': [i['src'] for i in imgs],
            'title': title,
            'alt': alt,
            'author': author,
            'day': day.day,
            'month': day.month,
            'year': day.year
        }



        }


class BiterComics(GenericNavigableComic):
    """Class to retrieve Biter Comics."""
    name = "biter"
    long_name = "Biter Comics"
    url = "http://www.bitercomics.com"
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find("h1", class_="entry-title").string
        author = soup.find("span", class_="author vcard").find("a").string
        date_str = soup.find("span", class_="entry-date").string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find("div", id="comic").find_all("img")
        assert all(i['alt'] == i['title'] for i in imgs)
        assert len(imgs) == 1
        alt = imgs[0]['alt']
        return {
            'img': [i['src'] for i in imgs],
            'title': title,
            'alt': alt,
            'author': author,
            'day': day.day,
            'month': day.month,
            'year': day.year
        }



    _categories = ('TUNEYTOONS', )


class CompletelySeriousComics(GenericNavigableComic):
    """Class to retrieve Completely Serious comics."""
    name = 'completelyserious'
    long_name = 'Completely Serious Comics'
    url = 'http://completelyseriouscomics.com'
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_a_navi_navinext

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        author = soup.find('span', class_='post-author').contents[1].string
        date_str = soup.find('span', class_='post-date').string
        day = string_to_date(date_str, '%B %d, %Y')
        imgs = soup.find('div', class_='comicpane').find_all('img')
        assert imgs
        alt = imgs[0]['title']
        assert all(i['title'] == i['alt'] == alt for i in imgs)
        return {
            'month': day.month,
            'year': day.year,
            'day': day.day,
            'img': [i['src'] for i in imgs],
            'title': title,
            'alt': alt,
            'author': author,
        }



        }


class EndlessOrigami(GenericEmptyComic, GenericNavigableComic):
    """Class to retrieve Endless Origami Comics."""
    name = "origami"
    long_name = "Endless Origami"
    url = "http://endlessorigami.com"
    get_first_comic_link = get_a_navi_navifirst
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('h2', class_='post-title').string
        author = soup.find("span", class_="post-author").find("a").string
        date_str = soup.find("span", class_="post-date").string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find("div", id="comic").find_all("img")
        assert all(i['alt'] == i['title'] for i in imgs)
        alt = imgs[0]['alt'] if imgs else ""
        return {
            'img': [i['src'] for i in imgs],
            'title': title,
            'alt': alt,
            'author': author,
            'day': day.day,
            'month': day.month,
            'year': day.year
        }



        }


class Penmen(GenericNavigableComic):
    """Class to retrieve Penmen comics."""
    name = 'penmen'
    long_name = 'Penmen'
    url = 'http://penmen.com'
    get_navi_link = get_link_rel_next
    get_first_comic_link = simulate_first_link
    first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('title').string
        imgs = soup.find('div', class_='entry-content').find_all('img')
        short_url = soup.find('link', rel='shortlink')['href']
        tags = ' '.join(t.string for t in soup.find_all('a', rel='tag'))
        date_str = soup.find('time')['datetime'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        return {
            'title': title,
            'short_url': short_url,
            'img': [i['src'] for i in imgs],
            'tags': tags,
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }



        }


class ChuckleADuck(GenericNavigableComic):
    """Class to retrieve Chuckle-A-Duck comics."""
    name = 'chuckleaduck'
    long_name = 'Chuckle-A-duck'
    url = 'http://chuckleaduck.com'
    get_first_comic_link = get_div_navfirst_a
    get_navi_link = get_link_rel_next

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        date_str = soup.find('span', class_='post-date').string
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
        author = soup.find('span', class_='post-author').string
        div = soup.find('div', id='comic')
        imgs = div.find_all('img') if div else []
        title = imgs[0]['title'] if imgs else ""
        assert all(i['title'] == i['alt'] == title for i in imgs)
        return {
            'month': day.month,
            'year': day.year,
            'day': day.day,
            'img': [i['src'] for i in imgs],
            'title': title,
            'author': author,
        }



		@@ 2285-2309 (lines=25) @@
2282		@classmethod
2283		def get_comic_info(cls, soup, link):
2284		"""Get information about a particular comics."""
2285		title = soup.find('h2', class_='post-title').string
2286		post = soup.find('div', class_='post-content')
2287		author = post.find("span", class_="post-author").find("a").string
2288		date_str = post.find("span", class_="post-date").string
2289		day = string_to_date(date_str, "%B %d, %Y")
2290		imgs = post.find("div", class_="entry").find_all("img")
2291		return {
2292		'title': title,
2293		'author': author,
2294		'img': [i['src'] for i in imgs],
2295		'month': day.month,
2296		'year': day.year,
2297		'day': day.day,
2298		}
2299
2300
2301		class LinsEditions(GenericNavigableComic):
2302		"""Class to retrieve L.I.N.S. Editions comics."""
2303		# Also on http://linscomics.tumblr.com
2304		# Now on https://warandpeas.com
2305		name = 'lins'
2306		long_name = 'L.I.N.S. Editions'
2307		url = 'https://linsedition.com'
2308		_categories = ('LINS', )
2309		get_navi_link = get_link_rel_next
2310		get_first_comic_link = simulate_first_link
2311		first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2312
		@@ 1020-1044 (lines=25) @@
1017		}
1018
1019
1020		class Mercworks(GenericNavigableComic):
1021		"""Class to retrieve Mercworks comics."""
1022		# Also on http://mercworks.tumblr.com
1023		name = 'mercworks'
1024		long_name = 'Mercworks'
1025		url = 'http://mercworks.net'
1026		get_first_comic_link = get_a_comicnavbase_comicnavfirst
1027		get_navi_link = get_link_rel_next
1028
1029		@classmethod
1030		def get_comic_info(cls, soup, link):
1031		"""Get information about a particular comics."""
1032		title = soup.find('meta', property='og:title')['content']
1033		metadesc = soup.find('meta', property='og:description')
1034		desc = metadesc['content'] if metadesc else ""
1035		date_str = soup.find('meta', property='article:published_time')['content'][:10]
1036		day = string_to_date(date_str, "%Y-%m-%d")
1037		imgs = soup.find_all('meta', property='og:image')
1038		return {
1039		'img': [i['content'] for i in imgs],
1040		'title': title,
1041		'desc': desc,
1042		'day': day.day,
1043		'month': day.month,
1044		'year': day.year
1045		}
1046
1047
		@@ 355-377 (lines=23) @@
352		return []
353
354
355		class ExtraFabulousComics(GenericNavigableComic):
356		"""Class to retrieve Extra Fabulous Comics."""
357		name = 'efc'
358		long_name = 'Extra Fabulous Comics'
359		url = 'http://extrafabulouscomics.com'
360		get_first_comic_link = get_a_navi_navifirst
361		get_navi_link = get_link_rel_next
362
363		@classmethod
364		def get_comic_info(cls, soup, link):
365		"""Get information about a particular comics."""
366		img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
367		imgs = soup.find_all('img', src=img_src_re)
368		title = soup.find('meta', property='og:title')['content']
369		date_str = soup.find('meta', property='article:published_time')['content'][:10]
370		day = string_to_date(date_str, "%Y-%m-%d")
371		return {
372		'title': title,
373		'img': [i['src'] for i in imgs],
374		'month': day.month,
375		'year': day.year,
376		'day': day.day,
377		'prefix': title + '-'
378		}
379
380
		@@ 3194-3215 (lines=22) @@
3191		"""Get link to first comics."""
3192		return cls.get_nav(get_soup_at_url(cls.url))[0]
3193
3194		@classmethod
3195		def get_navi_link(cls, last_soup, next_):
3196		"""Get link to next or previous comic."""
3197		return cls.get_nav(last_soup)[3 if next_ else 1]
3198
3199		@classmethod
3200		def get_comic_info(cls, soup, link):
3201		"""Get information about a particular comics."""
3202		title = link['title']
3203		imgs = soup.find_all('img', id='comicimg')
3204		return {
3205		'title': title,
3206		'img': [i['src'] for i in imgs],
3207		}
3208
3209
3210		class MarketoonistComics(GenericNavigableComic):
3211		"""Class to retrieve Marketoonist Comics."""
3212		name = 'marketoonist'
3213		long_name = 'Marketoonist'
3214		url = 'https://marketoonist.com/cartoons'
3215		get_first_comic_link = simulate_first_link
3216		get_navi_link = get_link_rel_next
3217		first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'
3218
		@@ 1806-1831 (lines=26) @@
1803		}
1804
1805
1806		class SafelyEndangered(GenericNavigableComic):
1807		"""Class to retrieve Safely Endangered comics."""
1808		# Also on http://tumblr.safelyendangered.com
1809		name = 'endangered'
1810		long_name = 'Safely Endangered'
1811		url = 'http://www.safelyendangered.com'
1812		get_navi_link = get_link_rel_next
1813		get_first_comic_link = simulate_first_link
1814		first_url = 'http://www.safelyendangered.com/comic/ignored/'
1815
1816		@classmethod
1817		def get_comic_info(cls, soup, link):
1818		"""Get information about a particular comics."""
1819		title = soup.find('h2', class_='post-title').string
1820		date_str = soup.find('span', class_='post-date').string
1821		day = string_to_date(date_str, '%B %d, %Y')
1822		imgs = soup.find('div', id='comic').find_all('img')
1823		alt = imgs[0]['alt']
1824		assert all(i['alt'] == i['title'] for i in imgs)
1825		return {
1826		'day': day.day,
1827		'month': day.month,
1828		'year': day.year,
1829		'img': [i['src'] for i in imgs],
1830		'title': title,
1831		'alt': alt,
1832		}
1833
1834
		@@ 381-402 (lines=22) @@
378		}
379
380
381		class GenericLeMondeBlog(GenericNavigableComic):
382		"""Generic class to retrieve comics from Le Monde blogs."""
383		_categories = ('LEMONDE', 'FRANCAIS')
384		get_navi_link = get_link_rel_next
385		get_first_comic_link = simulate_first_link
386		first_url = NotImplemented
387
388		@classmethod
389		def get_comic_info(cls, soup, link):
390		"""Get information about a particular comics."""
391		url2 = soup.find('link', rel='shortlink')['href']
392		title = soup.find('meta', property='og:title')['content']
393		date_str = soup.find("span", class_="entry-date").string
394		day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
395		imgs = soup.find_all('meta', property='og:image')
396		return {
397		'title': title,
398		'url2': url2,
399		'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
400		'month': day.month,
401		'year': day.year,
402		'day': day.day,
403		}
404
405
		@@ 925-950 (lines=26) @@
922		}
923
924
925		class MyExtraLife(GenericNavigableComic):
926		"""Class to retrieve My Extra Life comics."""
927		name = 'extralife'
928		long_name = 'My Extra Life'
929		url = 'http://www.myextralife.com'
930		get_navi_link = get_link_rel_next
931
932		@classmethod
933		def get_first_comic_link(cls):
934		"""Get link to first comics."""
935		return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
936
937		@classmethod
938		def get_comic_info(cls, soup, link):
939		"""Get information about a particular comics."""
940		title = soup.find("h1", class_="comic_title").string
941		date_str = soup.find("span", class_="comic_date").string
942		day = string_to_date(date_str, "%B %d, %Y")
943		imgs = soup.find_all("img", class_="comic")
944		assert all(i['alt'] == i['title'] == title for i in imgs)
945		return {
946		'title': title,
947		'img': [i['src'] for i in imgs if i["src"]],
948		'day': day.day,
949		'month': day.month,
950		'year': day.year
951		}
952
953
		@@ 2572-2600 (lines=29) @@
2569		}
2570
2571
2572		class TalesOfAbsurdity(GenericNavigableComic):
2573		"""Class to retrieve Tales Of Absurdity comics."""
2574		# Also on http://tapastic.com/series/Tales-Of-Absurdity
2575		# Also on http://talesofabsurdity.tumblr.com
2576		name = 'absurdity'
2577		long_name = 'Tales of Absurdity'
2578		url = 'http://talesofabsurdity.com'
2579		_categories = ('ABSURDITY', )
2580		get_first_comic_link = get_a_navi_navifirst
2581		get_navi_link = get_a_navi_comicnavnext_navinext
2582
2583		@classmethod
2584		def get_comic_info(cls, soup, link):
2585		"""Get information about a particular comics."""
2586		title = soup.find('h2', class_='post-title').string
2587		author = soup.find("span", class_="post-author").find("a").string
2588		date_str = soup.find("span", class_="post-date").string
2589		day = string_to_date(date_str, "%B %d, %Y")
2590		imgs = soup.find("div", id="comic").find_all("img")
2591		assert all(i['alt'] == i['title'] for i in imgs)
2592		alt = imgs[0]['alt'] if imgs else ""
2593		return {
2594		'img': [i['src'] for i in imgs],
2595		'title': title,
2596		'alt': alt,
2597		'author': author,
2598		'day': day.day,
2599		'month': day.month,
2600		'year': day.year
2601		}
2602
2603
		@@ 2510-2538 (lines=29) @@
2507		}
2508
2509
2510		class MisterAndMe(GenericNavigableComic):
2511		"""Class to retrieve Mister & Me Comics."""
2512		# Also on http://www.gocomics.com/mister-and-me
2513		# Also on https://tapastic.com/series/Mister-and-Me
2514		name = 'mister'
2515		long_name = 'Mister & Me'
2516		url = 'http://www.mister-and-me.com'
2517		get_first_comic_link = get_a_comicnavbase_comicnavfirst
2518		get_navi_link = get_link_rel_next
2519
2520		@classmethod
2521		def get_comic_info(cls, soup, link):
2522		"""Get information about a particular comics."""
2523		title = soup.find('h2', class_='post-title').string
2524		author = soup.find("span", class_="post-author").find("a").string
2525		date_str = soup.find("span", class_="post-date").string
2526		day = string_to_date(date_str, "%B %d, %Y")
2527		imgs = soup.find("div", id="comic").find_all("img")
2528		assert all(i['alt'] == i['title'] for i in imgs)
2529		assert len(imgs) <= 1
2530		alt = imgs[0]['alt'] if imgs else ""
2531		return {
2532		'img': [i['src'] for i in imgs],
2533		'title': title,
2534		'alt': alt,
2535		'author': author,
2536		'day': day.day,
2537		'month': day.month,
2538		'year': day.year
2539		}
2540
2541
		@@ 2796-2822 (lines=27) @@
2793		}
2794
2795
2796		class Optipess(GenericNavigableComic):
2797		"""Class to retrieve Optipess comics."""
2798		name = 'optipess'
2799		long_name = 'Optipess'
2800		url = 'http://www.optipess.com'
2801		get_first_comic_link = get_a_navi_navifirst
2802		get_navi_link = get_link_rel_next
2803
2804		@classmethod
2805		def get_comic_info(cls, soup, link):
2806		"""Get information about a particular comics."""
2807		title = soup.find('h2', class_='post-title').string
2808		author = soup.find("span", class_="post-author").find("a").string
2809		comic = soup.find('div', id='comic')
2810		imgs = comic.find_all('img') if comic else []
2811		alt = imgs[0]['title'] if imgs else ""
2812		assert all(i['alt'] == i['title'] == alt for i in imgs)
2813		date_str = soup.find('span', class_='post-date').string
2814		day = string_to_date(date_str, "%B %d, %Y")
2815		return {
2816		'title': title,
2817		'alt': alt,
2818		'author': author,
2819		'img': [i['src'] for i in imgs],
2820		'month': day.month,
2821		'year': day.year,
2822		'day': day.day,
2823		}
2824
2825
		@@ 2542-2568 (lines=27) @@
2539		}
2540
2541
2542		class LastPlaceComics(GenericNavigableComic):
2543		"""Class to retrieve Last Place Comics."""
2544		name = 'lastplace'
2545		long_name = 'Last Place Comics'
2546		url = "http://lastplacecomics.com"
2547		get_first_comic_link = get_a_comicnavbase_comicnavfirst
2548		get_navi_link = get_link_rel_next
2549
2550		@classmethod
2551		def get_comic_info(cls, soup, link):
2552		"""Get information about a particular comics."""
2553		title = soup.find('h2', class_='post-title').string
2554		author = soup.find("span", class_="post-author").find("a").string
2555		date_str = soup.find("span", class_="post-date").string
2556		day = string_to_date(date_str, "%B %d, %Y")
2557		imgs = soup.find("div", id="comic").find_all("img")
2558		assert all(i['alt'] == i['title'] for i in imgs)
2559		assert len(imgs) <= 1
2560		alt = imgs[0]['alt'] if imgs else ""
2561		return {
2562		'img': [i['src'] for i in imgs],
2563		'title': title,
2564		'alt': alt,
2565		'author': author,
2566		'day': day.day,
2567		'month': day.month,
2568		'year': day.year
2569		}
2570
2571
		@@ 2431-2457 (lines=27) @@
2428		}
2429
2430
2431		class BiterComics(GenericNavigableComic):
2432		"""Class to retrieve Biter Comics."""
2433		name = "biter"
2434		long_name = "Biter Comics"
2435		url = "http://www.bitercomics.com"
2436		get_first_comic_link = get_a_navi_navifirst
2437		get_navi_link = get_link_rel_next
2438
2439		@classmethod
2440		def get_comic_info(cls, soup, link):
2441		"""Get information about a particular comics."""
2442		title = soup.find("h1", class_="entry-title").string
2443		author = soup.find("span", class_="author vcard").find("a").string
2444		date_str = soup.find("span", class_="entry-date").string
2445		day = string_to_date(date_str, "%B %d, %Y")
2446		imgs = soup.find("div", id="comic").find_all("img")
2447		assert all(i['alt'] == i['title'] for i in imgs)
2448		assert len(imgs) == 1
2449		alt = imgs[0]['alt']
2450		return {
2451		'img': [i['src'] for i in imgs],
2452		'title': title,
2453		'alt': alt,
2454		'author': author,
2455		'day': day.day,
2456		'month': day.month,
2457		'year': day.year
2458		}
2459
2460
		@@ 1980-2006 (lines=27) @@
1977		_categories = ('TUNEYTOONS', )
1978
1979
1980		class CompletelySeriousComics(GenericNavigableComic):
1981		"""Class to retrieve Completely Serious comics."""
1982		name = 'completelyserious'
1983		long_name = 'Completely Serious Comics'
1984		url = 'http://completelyseriouscomics.com'
1985		get_first_comic_link = get_a_navi_navifirst
1986		get_navi_link = get_a_navi_navinext
1987
1988		@classmethod
1989		def get_comic_info(cls, soup, link):
1990		"""Get information about a particular comics."""
1991		title = soup.find('h2', class_='post-title').string
1992		author = soup.find('span', class_='post-author').contents[1].string
1993		date_str = soup.find('span', class_='post-date').string
1994		day = string_to_date(date_str, '%B %d, %Y')
1995		imgs = soup.find('div', class_='comicpane').find_all('img')
1996		assert imgs
1997		alt = imgs[0]['title']
1998		assert all(i['title'] == i['alt'] == alt for i in imgs)
1999		return {
2000		'month': day.month,
2001		'year': day.year,
2002		'day': day.day,
2003		'img': [i['src'] for i in imgs],
2004		'title': title,
2005		'alt': alt,
2006		'author': author,
2007		}
2008
2009
		@@ 2604-2629 (lines=26) @@
2601		}
2602
2603
2604		class EndlessOrigami(GenericEmptyComic, GenericNavigableComic):
2605		"""Class to retrieve Endless Origami Comics."""
2606		name = "origami"
2607		long_name = "Endless Origami"
2608		url = "http://endlessorigami.com"
2609		get_first_comic_link = get_a_navi_navifirst
2610		get_navi_link = get_link_rel_next
2611
2612		@classmethod
2613		def get_comic_info(cls, soup, link):
2614		"""Get information about a particular comics."""
2615		title = soup.find('h2', class_='post-title').string
2616		author = soup.find("span", class_="post-author").find("a").string
2617		date_str = soup.find("span", class_="post-date").string
2618		day = string_to_date(date_str, "%B %d, %Y")
2619		imgs = soup.find("div", id="comic").find_all("img")
2620		assert all(i['alt'] == i['title'] for i in imgs)
2621		alt = imgs[0]['alt'] if imgs else ""
2622		return {
2623		'img': [i['src'] for i in imgs],
2624		'title': title,
2625		'alt': alt,
2626		'author': author,
2627		'day': day.day,
2628		'month': day.month,
2629		'year': day.year
2630		}
2631
2632
		@@ 1865-1890 (lines=26) @@
1862		}
1863
1864
1865		class Penmen(GenericNavigableComic):
1866		"""Class to retrieve Penmen comics."""
1867		name = 'penmen'
1868		long_name = 'Penmen'
1869		url = 'http://penmen.com'
1870		get_navi_link = get_link_rel_next
1871		get_first_comic_link = simulate_first_link
1872		first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/'
1873
1874		@classmethod
1875		def get_comic_info(cls, soup, link):
1876		"""Get information about a particular comics."""
1877		title = soup.find('title').string
1878		imgs = soup.find('div', class_='entry-content').find_all('img')
1879		short_url = soup.find('link', rel='shortlink')['href']
1880		tags = ' '.join(t.string for t in soup.find_all('a', rel='tag'))
1881		date_str = soup.find('time')['datetime'][:10]
1882		day = string_to_date(date_str, "%Y-%m-%d")
1883		return {
1884		'title': title,
1885		'short_url': short_url,
1886		'img': [i['src'] for i in imgs],
1887		'tags': tags,
1888		'month': day.month,
1889		'year': day.year,
1890		'day': day.day,
1891		}
1892
1893
		@@ 2068-2092 (lines=25) @@
2065		}
2066
2067
2068		class ChuckleADuck(GenericNavigableComic):
2069		"""Class to retrieve Chuckle-A-Duck comics."""
2070		name = 'chuckleaduck'
2071		long_name = 'Chuckle-A-duck'
2072		url = 'http://chuckleaduck.com'
2073		get_first_comic_link = get_div_navfirst_a
2074		get_navi_link = get_link_rel_next
2075
2076		@classmethod
2077		def get_comic_info(cls, soup, link):
2078		"""Get information about a particular comics."""
2079		date_str = soup.find('span', class_='post-date').string
2080		day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
2081		author = soup.find('span', class_='post-author').string
2082		div = soup.find('div', id='comic')
2083		imgs = div.find_all('img') if div else []
2084		title = imgs[0]['title'] if imgs else ""
2085		assert all(i['title'] == i['alt'] == title for i in imgs)
2086		return {
2087		'month': day.month,
2088		'year': day.year,
2089		'day': day.day,
2090		'img': [i['src'] for i in imgs],
2091		'title': title,
2092		'author': author,
2093		}
2094
2095

SylvainDe / ComicBookMaker

Code Duplication Length = 22-29 lines in 16 locations

comics.py 16 locations