Code Duplication    Length = 22-26 lines in 8 locations

comics.py 8 locations

@@ 405-430 (lines=26) @@
402
    _categories = ('DELETED', )
403
404
405
class ExtraFabulousComics(GenericNavigableComic):
406
    """Class to retrieve Extra Fabulous Comics."""
407
    # Also on https://extrafabulouscomics.tumblr.com
408
    name = 'efc'
409
    long_name = 'Extra Fabulous Comics'
410
    url = 'http://extrafabulouscomics.com'
411
    _categories = ('EFC', )
412
    get_navi_link = get_link_rel_next
413
    get_first_comic_link = simulate_first_link
414
    first_url = 'http://extrafabulouscomics.com/comic/buttfly/'
415
416
    @classmethod
417
    def get_comic_info(cls, soup, link):
418
        """Get information about a particular comics."""
419
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
420
        imgs = soup.find_all('img', src=img_src_re)
421
        title = soup.find('meta', property='og:title')['content']
422
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
423
        day = string_to_date(date_str, "%Y-%m-%d")
424
        return {
425
            'title': title,
426
            'img': [i['src'] for i in imgs],
427
            'month': day.month,
428
            'year': day.year,
429
            'day': day.day,
430
            'prefix': title + '-'
431
        }
432
433
@@ 1092-1116 (lines=25) @@
1089
        }
1090
1091
1092
class Mercworks(GenericNavigableComic):
1093
    """Class to retrieve Mercworks comics."""
1094
    # Also on http://mercworks.tumblr.com
1095
    name = 'mercworks'
1096
    long_name = 'Mercworks'
1097
    url = 'http://mercworks.net'
1098
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1099
    get_navi_link = get_link_rel_next
1100
1101
    @classmethod
1102
    def get_comic_info(cls, soup, link):
1103
        """Get information about a particular comics."""
1104
        title = soup.find('meta', property='og:title')['content']
1105
        metadesc = soup.find('meta', property='og:description')
1106
        desc = metadesc['content'] if metadesc else ""
1107
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
1108
        day = string_to_date(date_str, "%Y-%m-%d")
1109
        imgs = soup.find_all('meta', property='og:image')
1110
        return {
1111
            'img': [i['content'] for i in imgs],
1112
            'title': title,
1113
            'desc': desc,
1114
            'day': day.day,
1115
            'month': day.month,
1116
            'year': day.year
1117
        }
1118
1119
@@ 3504-3525 (lines=22) @@
3501
        author = soup.find('a', rel='author').string
3502
        return {
3503
            'title': title,
3504
            'img': [i['src'] for i in [img]],
3505
            'day': day.day,
3506
            'month': day.month,
3507
            'year': day.year,
3508
            'author': author,
3509
        }
3510
3511
3512
class MarketoonistComics(GenericNavigableComic):
3513
    """Class to retrieve Marketoonist Comics."""
3514
    name = 'marketoonist'
3515
    long_name = 'Marketoonist'
3516
    url = 'https://marketoonist.com/cartoons'
3517
    get_first_comic_link = simulate_first_link
3518
    get_navi_link = get_link_rel_next
3519
    first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'
3520
3521
    @classmethod
3522
    def get_comic_info(cls, soup, link):
3523
        """Get information about a particular comics."""
3524
        imgs = soup.find_all('meta', property='og:image')
3525
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3526
        day = string_to_date(date_str, "%Y-%m-%d")
3527
        title = soup.find('meta', property='og:title')['content']
3528
        return {
@@ 2345-2366 (lines=22) @@
2342
        }
2343
2344
2345
class PeterLauris(GenericNavigableComic):
2346
    """Class to retrieve Peter Lauris comics."""
2347
    name = 'peterlauris'
2348
    long_name = 'Peter Lauris'
2349
    url = 'http://peterlauris.com/comics'
2350
    get_navi_link = get_a_rel_next
2351
    get_first_comic_link = simulate_first_link
2352
    first_url = 'http://peterlauris.com/comics/just-in-case/'
2353
2354
    @classmethod
2355
    def get_comic_info(cls, soup, link):
2356
        """Get information about a particular comics."""
2357
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2358
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2359
        day = string_to_date(date_str, "%Y-%m-%d")
2360
        imgs = soup.find_all('meta', property='og:image')
2361
        return {
2362
            'title': title,
2363
            'img': [i['content'] for i in imgs],
2364
            'month': day.month,
2365
            'year': day.year,
2366
            'day': day.day,
2367
        }
2368
2369
@@ 434-455 (lines=22) @@
431
        }
432
433
434
class GenericLeMondeBlog(GenericNavigableComic):
435
    """Generic class to retrieve comics from Le Monde blogs."""
436
    _categories = ('LEMONDE', 'FRANCAIS')
437
    get_navi_link = get_link_rel_next
438
    get_first_comic_link = simulate_first_link
439
    first_url = NotImplemented
440
441
    @classmethod
442
    def get_comic_info(cls, soup, link):
443
        """Get information about a particular comics."""
444
        url2 = soup.find('link', rel='shortlink')['href']
445
        title = soup.find('meta', property='og:title')['content']
446
        date_str = soup.find("span", class_="entry-date").string
447
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
448
        imgs = soup.find_all('meta', property='og:image')
449
        return {
450
            'title': title,
451
            'url2': url2,
452
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
453
            'month': day.month,
454
            'year': day.year,
455
            'day': day.day,
456
        }
457
458
@@ 1004-1029 (lines=26) @@
1001
        }
1002
1003
1004
class MyExtraLife(GenericNavigableComic):
1005
    """Class to retrieve My Extra Life comics."""
1006
    name = 'extralife'
1007
    long_name = 'My Extra Life'
1008
    url = 'http://www.myextralife.com'
1009
    get_navi_link = get_link_rel_next
1010
1011
    @classmethod
1012
    def get_first_comic_link(cls):
1013
        """Get link to first comics."""
1014
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
1015
1016
    @classmethod
1017
    def get_comic_info(cls, soup, link):
1018
        """Get information about a particular comics."""
1019
        title = soup.find("h1", class_="comic_title").string
1020
        date_str = soup.find("span", class_="comic_date").string
1021
        day = string_to_date(date_str, "%B %d, %Y")
1022
        imgs = soup.find_all("img", class_="comic")
1023
        assert all(i['alt'] == i['title'] == title for i in imgs)
1024
        return {
1025
            'title': title,
1026
            'img': [i['src'] for i in imgs if i["src"]],
1027
            'day': day.day,
1028
            'month': day.month,
1029
            'year': day.year
1030
        }
1031
1032
@@ 2370-2394 (lines=25) @@
2367
        }
2368
2369
2370
class JuliasDrawings(GenericListableComic):
2371
    """Class to retrieve Julia's Drawings."""
2372
    name = 'julia'
2373
    long_name = "Julia's Drawings"
2374
    url = 'https://drawings.jvns.ca'
2375
    get_url_from_archive_element = get_href
2376
2377
    @classmethod
2378
    def get_archive_elements(cls):
2379
        articles = get_soup_at_url(cls.url).find_all('article', class_='li post')
2380
        return [art.find('a') for art in reversed(articles)]
2381
2382
    @classmethod
2383
    def get_comic_info(cls, soup, archive_elt):
2384
        """Get information about a particular comics."""
2385
        date_str = soup.find('meta', property='og:article:published_time')['content'][:10]
2386
        day = string_to_date(date_str, "%Y-%m-%d")
2387
        title = soup.find('h3', class_='p-post-title').string
2388
        imgs = soup.find('section', class_='post-content').find_all('img')
2389
        return {
2390
            'title': title,
2391
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2392
            'month': day.month,
2393
            'year': day.year,
2394
            'day': day.day,
2395
        }
2396
2397
@@ 2479-2501 (lines=23) @@
2476
    _categories = ('WARANDPEAS', 'LINS')
2477
2478
2479
class WarAndPeas(GenericNavigableComic):
2480
    """Class to retrieve War And Peas comics."""
2481
    name = 'warandpeas'
2482
    long_name = 'War And Peas'
2483
    url = 'https://warandpeas.com'
2484
    get_navi_link = get_link_rel_next
2485
    get_first_comic_link = simulate_first_link
2486
    first_url = 'https://warandpeas.com/2011/11/07/565/'
2487
    _categories = ('WARANDPEAS', 'LINS')
2488
2489
    @classmethod
2490
    def get_comic_info(cls, soup, link):
2491
        """Get information about a particular comics."""
2492
        title = soup.find('meta', property='og:title')['content']
2493
        imgs = soup.find_all('meta', property='og:image')
2494
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2495
        day = string_to_date(date_str, "%Y-%m-%d")
2496
        return {
2497
            'title': title,
2498
            'img': [i['content'] for i in imgs],
2499
            'month': day.month,
2500
            'year': day.year,
2501
            'day': day.day,
2502
        }
2503
2504