Code Duplication    Length = 22-26 lines in 8 locations

comics.py 8 locations

@@ 389-414 (lines=26) @@
386
    _categories = ('DELETED', )
387
388
389
class ExtraFabulousComics(GenericNavigableComic):
390
    """Class to retrieve Extra Fabulous Comics."""
391
    # Also on https://extrafabulouscomics.tumblr.com
392
    name = 'efc'
393
    long_name = 'Extra Fabulous Comics'
394
    url = 'http://extrafabulouscomics.com'
395
    _categories = ('EFC', )
396
    get_navi_link = get_link_rel_next
397
    get_first_comic_link = simulate_first_link
398
    first_url = 'http://extrafabulouscomics.com/comic/buttfly/'
399
400
    @classmethod
401
    def get_comic_info(cls, soup, link):
402
        """Get information about a particular comics."""
403
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
404
        imgs = soup.find_all('img', src=img_src_re)
405
        title = soup.find('meta', property='og:title')['content']
406
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
407
        day = string_to_date(date_str, "%Y-%m-%d")
408
        return {
409
            'title': title,
410
            'img': [i['src'] for i in imgs],
411
            'month': day.month,
412
            'year': day.year,
413
            'day': day.day,
414
            'prefix': title + '-'
415
        }
416
417
@@ 2395-2419 (lines=25) @@
2392
        }
2393
2394
2395
class LinsEditions(GenericNavigableComic):
2396
    """Class to retrieve L.I.N.S. Editions comics."""
2397
    # Also on https://linscomics.tumblr.com
2398
    # Now on https://warandpeas.com
2399
    name = 'lins'
2400
    long_name = 'L.I.N.S. Editions'
2401
    url = 'https://linsedition.com'
2402
    _categories = ('LINS', )
2403
    get_navi_link = get_link_rel_next
2404
    get_first_comic_link = simulate_first_link
2405
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2406
2407
    @classmethod
2408
    def get_comic_info(cls, soup, link):
2409
        """Get information about a particular comics."""
2410
        title = soup.find('meta', property='og:title')['content']
2411
        imgs = soup.find_all('meta', property='og:image')
2412
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2413
        day = string_to_date(date_str, "%Y-%m-%d")
2414
        return {
2415
            'title': title,
2416
            'img': [i['content'] for i in imgs],
2417
            'month': day.month,
2418
            'year': day.year,
2419
            'day': day.day,
2420
        }
2421
2422
@@ 1076-1100 (lines=25) @@
1073
        }
1074
1075
1076
class Mercworks(GenericNavigableComic):
1077
    """Class to retrieve Mercworks comics."""
1078
    # Also on http://mercworks.tumblr.com
1079
    name = 'mercworks'
1080
    long_name = 'Mercworks'
1081
    url = 'http://mercworks.net'
1082
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1083
    get_navi_link = get_link_rel_next
1084
1085
    @classmethod
1086
    def get_comic_info(cls, soup, link):
1087
        """Get information about a particular comics."""
1088
        title = soup.find('meta', property='og:title')['content']
1089
        metadesc = soup.find('meta', property='og:description')
1090
        desc = metadesc['content'] if metadesc else ""
1091
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
1092
        day = string_to_date(date_str, "%Y-%m-%d")
1093
        imgs = soup.find_all('meta', property='og:image')
1094
        return {
1095
            'img': [i['content'] for i in imgs],
1096
            'title': title,
1097
            'desc': desc,
1098
            'day': day.day,
1099
            'month': day.month,
1100
            'year': day.year
1101
        }
1102
1103
@@ 3402-3423 (lines=22) @@
3399
        }
3400
3401
3402
class MarketoonistComics(GenericNavigableComic):
3403
    """Class to retrieve Marketoonist Comics."""
3404
    name = 'marketoonist'
3405
    long_name = 'Marketoonist'
3406
    url = 'https://marketoonist.com/cartoons'
3407
    get_first_comic_link = simulate_first_link
3408
    get_navi_link = get_link_rel_next
3409
    first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'
3410
3411
    @classmethod
3412
    def get_comic_info(cls, soup, link):
3413
        """Get information about a particular comics."""
3414
        imgs = soup.find_all('meta', property='og:image')
3415
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3416
        day = string_to_date(date_str, "%Y-%m-%d")
3417
        title = soup.find('meta', property='og:title')['content']
3418
        return {
3419
            'img': [i['content'] for i in imgs],
3420
            'day': day.day,
3421
            'month': day.month,
3422
            'year': day.year,
3423
            'title': title,
3424
        }
3425
3426
@@ 418-439 (lines=22) @@
415
        }
416
417
418
class GenericLeMondeBlog(GenericNavigableComic):
419
    """Generic class to retrieve comics from Le Monde blogs."""
420
    _categories = ('LEMONDE', 'FRANCAIS')
421
    get_navi_link = get_link_rel_next
422
    get_first_comic_link = simulate_first_link
423
    first_url = NotImplemented
424
425
    @classmethod
426
    def get_comic_info(cls, soup, link):
427
        """Get information about a particular comics."""
428
        url2 = soup.find('link', rel='shortlink')['href']
429
        title = soup.find('meta', property='og:title')['content']
430
        date_str = soup.find("span", class_="entry-date").string
431
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
432
        imgs = soup.find_all('meta', property='og:image')
433
        return {
434
            'title': title,
435
            'url2': url2,
436
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
437
            'month': day.month,
438
            'year': day.year,
439
            'day': day.day,
440
        }
441
442
@@ 1826-1851 (lines=26) @@
1823
        }
1824
1825
1826
class SafelyEndangered(GenericNavigableComic):
1827
    """Class to retrieve Safely Endangered comics."""
1828
    # Also on http://tumblr.safelyendangered.com
1829
    name = 'endangered'
1830
    long_name = 'Safely Endangered'
1831
    url = 'http://www.safelyendangered.com'
1832
    get_navi_link = get_link_rel_next
1833
    get_first_comic_link = simulate_first_link
1834
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1835
1836
    @classmethod
1837
    def get_comic_info(cls, soup, link):
1838
        """Get information about a particular comics."""
1839
        title = soup.find('h2', class_='post-title').string
1840
        date_str = soup.find('span', class_='post-date').string
1841
        day = string_to_date(date_str, '%B %d, %Y')
1842
        imgs = soup.find('div', id='comic').find_all('img')
1843
        alt = imgs[0]['alt']
1844
        assert all(i['alt'] == i['title'] for i in imgs)
1845
        return {
1846
            'day': day.day,
1847
            'month': day.month,
1848
            'year': day.year,
1849
            'img': [i['src'] for i in imgs],
1850
            'title': title,
1851
            'alt': alt,
1852
        }
1853
1854
@@ 988-1013 (lines=26) @@
985
        }
986
987
988
class MyExtraLife(GenericNavigableComic):
989
    """Class to retrieve My Extra Life comics."""
990
    name = 'extralife'
991
    long_name = 'My Extra Life'
992
    url = 'http://www.myextralife.com'
993
    get_navi_link = get_link_rel_next
994
995
    @classmethod
996
    def get_first_comic_link(cls):
997
        """Get link to first comics."""
998
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
999
1000
    @classmethod
1001
    def get_comic_info(cls, soup, link):
1002
        """Get information about a particular comics."""
1003
        title = soup.find("h1", class_="comic_title").string
1004
        date_str = soup.find("span", class_="comic_date").string
1005
        day = string_to_date(date_str, "%B %d, %Y")
1006
        imgs = soup.find_all("img", class_="comic")
1007
        assert all(i['alt'] == i['title'] == title for i in imgs)
1008
        return {
1009
            'title': title,
1010
            'img': [i['src'] for i in imgs if i["src"]],
1011
            'day': day.day,
1012
            'month': day.month,
1013
            'year': day.year
1014
        }
1015
1016
@@ 2296-2320 (lines=25) @@
2293
        }
2294
2295
2296
class JuliasDrawings(GenericListableComic):
2297
    """Class to retrieve Julia's Drawings."""
2298
    name = 'julia'
2299
    long_name = "Julia's Drawings"
2300
    url = 'https://drawings.jvns.ca'
2301
    get_url_from_archive_element = get_href
2302
2303
    @classmethod
2304
    def get_archive_elements(cls):
2305
        articles = get_soup_at_url(cls.url).find_all('article', class_='li post')
2306
        return [art.find('a') for art in reversed(articles)]
2307
2308
    @classmethod
2309
    def get_comic_info(cls, soup, archive_elt):
2310
        """Get information about a particular comics."""
2311
        date_str = soup.find('meta', property='og:article:published_time')['content'][:10]
2312
        day = string_to_date(date_str, "%Y-%m-%d")
2313
        title = soup.find('h3', class_='p-post-title').string
2314
        imgs = soup.find('section', class_='post-content').find_all('img')
2315
        return {
2316
            'title': title,
2317
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2318
            'month': day.month,
2319
            'year': day.year,
2320
            'day': day.day,
2321
        }
2322
2323