Code Duplication    Length = 22-26 lines in 8 locations

comics.py 8 locations

@@ 2429-2453 (lines=25) @@
2426
        }
2427
2428
2429
class LinsEditions(GenericNavigableComic):
2430
    """Class to retrieve L.I.N.S. Editions comics."""
2431
    # Also on https://linscomics.tumblr.com
2432
    # Now on https://warandpeas.com
2433
    name = 'lins'
2434
    long_name = 'L.I.N.S. Editions'
2435
    url = 'https://linsedition.com'
2436
    _categories = ('LINS', )
2437
    get_navi_link = get_link_rel_next
2438
    get_first_comic_link = simulate_first_link
2439
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2440
2441
    @classmethod
2442
    def get_comic_info(cls, soup, link):
2443
        """Get information about a particular comics."""
2444
        title = soup.find('meta', property='og:title')['content']
2445
        imgs = soup.find_all('meta', property='og:image')
2446
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2447
        day = string_to_date(date_str, "%Y-%m-%d")
2448
        return {
2449
            'title': title,
2450
            'img': [i['content'] for i in imgs],
2451
            'month': day.month,
2452
            'year': day.year,
2453
            'day': day.day,
2454
        }
2455
2456
@@ 1073-1097 (lines=25) @@
1070
        }
1071
1072
1073
class Mercworks(GenericNavigableComic):
1074
    """Class to retrieve Mercworks comics."""
1075
    # Also on http://mercworks.tumblr.com
1076
    name = 'mercworks'
1077
    long_name = 'Mercworks'
1078
    url = 'http://mercworks.net'
1079
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1080
    get_navi_link = get_link_rel_next
1081
1082
    @classmethod
1083
    def get_comic_info(cls, soup, link):
1084
        """Get information about a particular comics."""
1085
        title = soup.find('meta', property='og:title')['content']
1086
        metadesc = soup.find('meta', property='og:description')
1087
        desc = metadesc['content'] if metadesc else ""
1088
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
1089
        day = string_to_date(date_str, "%Y-%m-%d")
1090
        imgs = soup.find_all('meta', property='og:image')
1091
        return {
1092
            'img': [i['content'] for i in imgs],
1093
            'title': title,
1094
            'desc': desc,
1095
            'day': day.day,
1096
            'month': day.month,
1097
            'year': day.year
1098
        }
1099
1100
@@ 387-411 (lines=25) @@
384
    _categories = ('DELETED', )
385
386
387
class ExtraFabulousComics(GenericNavigableComic):
388
    """Class to retrieve Extra Fabulous Comics."""
389
    # Also on https://extrafabulouscomics.tumblr.com
390
    name = 'efc'
391
    long_name = 'Extra Fabulous Comics'
392
    url = 'http://extrafabulouscomics.com'
393
    _categories = ('EFC', )
394
    get_first_comic_link = get_a_navi_navifirst
395
    get_navi_link = get_link_rel_next
396
397
    @classmethod
398
    def get_comic_info(cls, soup, link):
399
        """Get information about a particular comics."""
400
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
401
        imgs = soup.find_all('img', src=img_src_re)
402
        title = soup.find('meta', property='og:title')['content']
403
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
404
        day = string_to_date(date_str, "%Y-%m-%d")
405
        return {
406
            'title': title,
407
            'img': [i['src'] for i in imgs],
408
            'month': day.month,
409
            'year': day.year,
410
            'day': day.day,
411
            'prefix': title + '-'
412
        }
413
414
@@ 3425-3446 (lines=22) @@
3422
        }
3423
3424
3425
class MarketoonistComics(GenericNavigableComic):
3426
    """Class to retrieve Marketoonist Comics."""
3427
    name = 'marketoonist'
3428
    long_name = 'Marketoonist'
3429
    url = 'https://marketoonist.com/cartoons'
3430
    get_first_comic_link = simulate_first_link
3431
    get_navi_link = get_link_rel_next
3432
    first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'
3433
3434
    @classmethod
3435
    def get_comic_info(cls, soup, link):
3436
        """Get information about a particular comics."""
3437
        imgs = soup.find_all('meta', property='og:image')
3438
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3439
        day = string_to_date(date_str, "%Y-%m-%d")
3440
        title = soup.find('meta', property='og:title')['content']
3441
        return {
3442
            'img': [i['content'] for i in imgs],
3443
            'day': day.day,
3444
            'month': day.month,
3445
            'year': day.year,
3446
            'title': title,
3447
        }
3448
3449
@@ 415-436 (lines=22) @@
412
        }
413
414
415
class GenericLeMondeBlog(GenericNavigableComic):
416
    """Generic class to retrieve comics from Le Monde blogs."""
417
    _categories = ('LEMONDE', 'FRANCAIS')
418
    get_navi_link = get_link_rel_next
419
    get_first_comic_link = simulate_first_link
420
    first_url = NotImplemented
421
422
    @classmethod
423
    def get_comic_info(cls, soup, link):
424
        """Get information about a particular comics."""
425
        url2 = soup.find('link', rel='shortlink')['href']
426
        title = soup.find('meta', property='og:title')['content']
427
        date_str = soup.find("span", class_="entry-date").string
428
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
429
        imgs = soup.find_all('meta', property='og:image')
430
        return {
431
            'title': title,
432
            'url2': url2,
433
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
434
            'month': day.month,
435
            'year': day.year,
436
            'day': day.day,
437
        }
438
439
@@ 1859-1884 (lines=26) @@
1856
        }
1857
1858
1859
class SafelyEndangered(GenericNavigableComic):
1860
    """Class to retrieve Safely Endangered comics."""
1861
    # Also on http://tumblr.safelyendangered.com
1862
    name = 'endangered'
1863
    long_name = 'Safely Endangered'
1864
    url = 'http://www.safelyendangered.com'
1865
    get_navi_link = get_link_rel_next
1866
    get_first_comic_link = simulate_first_link
1867
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1868
1869
    @classmethod
1870
    def get_comic_info(cls, soup, link):
1871
        """Get information about a particular comics."""
1872
        title = soup.find('h2', class_='post-title').string
1873
        date_str = soup.find('span', class_='post-date').string
1874
        day = string_to_date(date_str, '%B %d, %Y')
1875
        imgs = soup.find('div', id='comic').find_all('img')
1876
        alt = imgs[0]['alt']
1877
        assert all(i['alt'] == i['title'] for i in imgs)
1878
        return {
1879
            'day': day.day,
1880
            'month': day.month,
1881
            'year': day.year,
1882
            'img': [i['src'] for i in imgs],
1883
            'title': title,
1884
            'alt': alt,
1885
        }
1886
1887
@@ 985-1010 (lines=26) @@
982
        }
983
984
985
class MyExtraLife(GenericNavigableComic):
986
    """Class to retrieve My Extra Life comics."""
987
    name = 'extralife'
988
    long_name = 'My Extra Life'
989
    url = 'http://www.myextralife.com'
990
    get_navi_link = get_link_rel_next
991
992
    @classmethod
993
    def get_first_comic_link(cls):
994
        """Get link to first comics."""
995
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
996
997
    @classmethod
998
    def get_comic_info(cls, soup, link):
999
        """Get information about a particular comics."""
1000
        title = soup.find("h1", class_="comic_title").string
1001
        date_str = soup.find("span", class_="comic_date").string
1002
        day = string_to_date(date_str, "%B %d, %Y")
1003
        imgs = soup.find_all("img", class_="comic")
1004
        assert all(i['alt'] == i['title'] == title for i in imgs)
1005
        return {
1006
            'title': title,
1007
            'img': [i['src'] for i in imgs if i["src"]],
1008
            'day': day.day,
1009
            'month': day.month,
1010
            'year': day.year
1011
        }
1012
1013
@@ 2330-2354 (lines=25) @@
2327
        }
2328
2329
2330
class JuliasDrawings(GenericListableComic):
2331
    """Class to retrieve Julia's Drawings."""
2332
    name = 'julia'
2333
    long_name = "Julia's Drawings"
2334
    url = 'https://drawings.jvns.ca'
2335
    get_url_from_archive_element = get_href
2336
2337
    @classmethod
2338
    def get_archive_elements(cls):
2339
        articles = get_soup_at_url(cls.url).find_all('article', class_='li post')
2340
        return [art.find('a') for art in reversed(articles)]
2341
2342
    @classmethod
2343
    def get_comic_info(cls, soup, archive_elt):
2344
        """Get information about a particular comics."""
2345
        date_str = soup.find('meta', property='og:article:published_time')['content'][:10]
2346
        day = string_to_date(date_str, "%Y-%m-%d")
2347
        title = soup.find('h3', class_='p-post-title').string
2348
        imgs = soup.find('section', class_='post-content').find_all('img')
2349
        return {
2350
            'title': title,
2351
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2352
            'month': day.month,
2353
            'year': day.year,
2354
            'day': day.day,
2355
        }
2356
2357