Code Duplication    Length = 22-29 lines in 16 locations

comics.py 16 locations

@@ 2285-2309 (lines=25) @@
2282
    @classmethod
2283
    def get_comic_info(cls, soup, link):
2284
        """Get information about a particular comics."""
2285
        title = soup.find('h2', class_='post-title').string
2286
        post = soup.find('div', class_='post-content')
2287
        author = post.find("span", class_="post-author").find("a").string
2288
        date_str = post.find("span", class_="post-date").string
2289
        day = string_to_date(date_str, "%B %d, %Y")
2290
        imgs = post.find("div", class_="entry").find_all("img")
2291
        return {
2292
            'title': title,
2293
            'author': author,
2294
            'img': [i['src'] for i in imgs],
2295
            'month': day.month,
2296
            'year': day.year,
2297
            'day': day.day,
2298
        }
2299
2300
2301
class LinsEditions(GenericNavigableComic):
2302
    """Class to retrieve L.I.N.S. Editions comics."""
2303
    # Also on http://linscomics.tumblr.com
2304
    # Now on https://warandpeas.com
2305
    name = 'lins'
2306
    long_name = 'L.I.N.S. Editions'
2307
    url = 'https://linsedition.com'
2308
    _categories = ('LINS', )
2309
    get_navi_link = get_link_rel_next
2310
    get_first_comic_link = simulate_first_link
2311
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2312
@@ 1020-1044 (lines=25) @@
1017
        }
1018
1019
1020
class Mercworks(GenericNavigableComic):
1021
    """Class to retrieve Mercworks comics."""
1022
    # Also on http://mercworks.tumblr.com
1023
    name = 'mercworks'
1024
    long_name = 'Mercworks'
1025
    url = 'http://mercworks.net'
1026
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1027
    get_navi_link = get_link_rel_next
1028
1029
    @classmethod
1030
    def get_comic_info(cls, soup, link):
1031
        """Get information about a particular comics."""
1032
        title = soup.find('meta', property='og:title')['content']
1033
        metadesc = soup.find('meta', property='og:description')
1034
        desc = metadesc['content'] if metadesc else ""
1035
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
1036
        day = string_to_date(date_str, "%Y-%m-%d")
1037
        imgs = soup.find_all('meta', property='og:image')
1038
        return {
1039
            'img': [i['content'] for i in imgs],
1040
            'title': title,
1041
            'desc': desc,
1042
            'day': day.day,
1043
            'month': day.month,
1044
            'year': day.year
1045
        }
1046
1047
@@ 355-377 (lines=23) @@
352
        return []
353
354
355
class ExtraFabulousComics(GenericNavigableComic):
356
    """Class to retrieve Extra Fabulous Comics."""
357
    name = 'efc'
358
    long_name = 'Extra Fabulous Comics'
359
    url = 'http://extrafabulouscomics.com'
360
    get_first_comic_link = get_a_navi_navifirst
361
    get_navi_link = get_link_rel_next
362
363
    @classmethod
364
    def get_comic_info(cls, soup, link):
365
        """Get information about a particular comics."""
366
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
367
        imgs = soup.find_all('img', src=img_src_re)
368
        title = soup.find('meta', property='og:title')['content']
369
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
370
        day = string_to_date(date_str, "%Y-%m-%d")
371
        return {
372
            'title': title,
373
            'img': [i['src'] for i in imgs],
374
            'month': day.month,
375
            'year': day.year,
376
            'day': day.day,
377
            'prefix': title + '-'
378
        }
379
380
@@ 3194-3215 (lines=22) @@
3191
        """Get link to first comics."""
3192
        return cls.get_nav(get_soup_at_url(cls.url))[0]
3193
3194
    @classmethod
3195
    def get_navi_link(cls, last_soup, next_):
3196
        """Get link to next or previous comic."""
3197
        return cls.get_nav(last_soup)[3 if next_ else 1]
3198
3199
    @classmethod
3200
    def get_comic_info(cls, soup, link):
3201
        """Get information about a particular comics."""
3202
        title = link['title']
3203
        imgs = soup.find_all('img', id='comicimg')
3204
        return {
3205
            'title': title,
3206
            'img': [i['src'] for i in imgs],
3207
        }
3208
3209
3210
class MarketoonistComics(GenericNavigableComic):
3211
    """Class to retrieve Marketoonist Comics."""
3212
    name = 'marketoonist'
3213
    long_name = 'Marketoonist'
3214
    url = 'https://marketoonist.com/cartoons'
3215
    get_first_comic_link = simulate_first_link
3216
    get_navi_link = get_link_rel_next
3217
    first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'
3218
@@ 1806-1831 (lines=26) @@
1803
        }
1804
1805
1806
class SafelyEndangered(GenericNavigableComic):
1807
    """Class to retrieve Safely Endangered comics."""
1808
    # Also on http://tumblr.safelyendangered.com
1809
    name = 'endangered'
1810
    long_name = 'Safely Endangered'
1811
    url = 'http://www.safelyendangered.com'
1812
    get_navi_link = get_link_rel_next
1813
    get_first_comic_link = simulate_first_link
1814
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1815
1816
    @classmethod
1817
    def get_comic_info(cls, soup, link):
1818
        """Get information about a particular comics."""
1819
        title = soup.find('h2', class_='post-title').string
1820
        date_str = soup.find('span', class_='post-date').string
1821
        day = string_to_date(date_str, '%B %d, %Y')
1822
        imgs = soup.find('div', id='comic').find_all('img')
1823
        alt = imgs[0]['alt']
1824
        assert all(i['alt'] == i['title'] for i in imgs)
1825
        return {
1826
            'day': day.day,
1827
            'month': day.month,
1828
            'year': day.year,
1829
            'img': [i['src'] for i in imgs],
1830
            'title': title,
1831
            'alt': alt,
1832
        }
1833
1834
@@ 381-402 (lines=22) @@
378
        }
379
380
381
class GenericLeMondeBlog(GenericNavigableComic):
382
    """Generic class to retrieve comics from Le Monde blogs."""
383
    _categories = ('LEMONDE', 'FRANCAIS')
384
    get_navi_link = get_link_rel_next
385
    get_first_comic_link = simulate_first_link
386
    first_url = NotImplemented
387
388
    @classmethod
389
    def get_comic_info(cls, soup, link):
390
        """Get information about a particular comics."""
391
        url2 = soup.find('link', rel='shortlink')['href']
392
        title = soup.find('meta', property='og:title')['content']
393
        date_str = soup.find("span", class_="entry-date").string
394
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
395
        imgs = soup.find_all('meta', property='og:image')
396
        return {
397
            'title': title,
398
            'url2': url2,
399
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
400
            'month': day.month,
401
            'year': day.year,
402
            'day': day.day,
403
        }
404
405
@@ 925-950 (lines=26) @@
922
        }
923
924
925
class MyExtraLife(GenericNavigableComic):
926
    """Class to retrieve My Extra Life comics."""
927
    name = 'extralife'
928
    long_name = 'My Extra Life'
929
    url = 'http://www.myextralife.com'
930
    get_navi_link = get_link_rel_next
931
932
    @classmethod
933
    def get_first_comic_link(cls):
934
        """Get link to first comics."""
935
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
936
937
    @classmethod
938
    def get_comic_info(cls, soup, link):
939
        """Get information about a particular comics."""
940
        title = soup.find("h1", class_="comic_title").string
941
        date_str = soup.find("span", class_="comic_date").string
942
        day = string_to_date(date_str, "%B %d, %Y")
943
        imgs = soup.find_all("img", class_="comic")
944
        assert all(i['alt'] == i['title'] == title for i in imgs)
945
        return {
946
            'title': title,
947
            'img': [i['src'] for i in imgs if i["src"]],
948
            'day': day.day,
949
            'month': day.month,
950
            'year': day.year
951
        }
952
953
@@ 2572-2600 (lines=29) @@
2569
        }
2570
2571
2572
class TalesOfAbsurdity(GenericNavigableComic):
2573
    """Class to retrieve Tales Of Absurdity comics."""
2574
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
2575
    # Also on http://talesofabsurdity.tumblr.com
2576
    name = 'absurdity'
2577
    long_name = 'Tales of Absurdity'
2578
    url = 'http://talesofabsurdity.com'
2579
    _categories = ('ABSURDITY', )
2580
    get_first_comic_link = get_a_navi_navifirst
2581
    get_navi_link = get_a_navi_comicnavnext_navinext
2582
2583
    @classmethod
2584
    def get_comic_info(cls, soup, link):
2585
        """Get information about a particular comics."""
2586
        title = soup.find('h2', class_='post-title').string
2587
        author = soup.find("span", class_="post-author").find("a").string
2588
        date_str = soup.find("span", class_="post-date").string
2589
        day = string_to_date(date_str, "%B %d, %Y")
2590
        imgs = soup.find("div", id="comic").find_all("img")
2591
        assert all(i['alt'] == i['title'] for i in imgs)
2592
        alt = imgs[0]['alt'] if imgs else ""
2593
        return {
2594
            'img': [i['src'] for i in imgs],
2595
            'title': title,
2596
            'alt': alt,
2597
            'author': author,
2598
            'day': day.day,
2599
            'month': day.month,
2600
            'year': day.year
2601
        }
2602
2603
@@ 2510-2538 (lines=29) @@
2507
        }
2508
2509
2510
class MisterAndMe(GenericNavigableComic):
2511
    """Class to retrieve Mister & Me Comics."""
2512
    # Also on http://www.gocomics.com/mister-and-me
2513
    # Also on https://tapastic.com/series/Mister-and-Me
2514
    name = 'mister'
2515
    long_name = 'Mister & Me'
2516
    url = 'http://www.mister-and-me.com'
2517
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2518
    get_navi_link = get_link_rel_next
2519
2520
    @classmethod
2521
    def get_comic_info(cls, soup, link):
2522
        """Get information about a particular comics."""
2523
        title = soup.find('h2', class_='post-title').string
2524
        author = soup.find("span", class_="post-author").find("a").string
2525
        date_str = soup.find("span", class_="post-date").string
2526
        day = string_to_date(date_str, "%B %d, %Y")
2527
        imgs = soup.find("div", id="comic").find_all("img")
2528
        assert all(i['alt'] == i['title'] for i in imgs)
2529
        assert len(imgs) <= 1
2530
        alt = imgs[0]['alt'] if imgs else ""
2531
        return {
2532
            'img': [i['src'] for i in imgs],
2533
            'title': title,
2534
            'alt': alt,
2535
            'author': author,
2536
            'day': day.day,
2537
            'month': day.month,
2538
            'year': day.year
2539
        }
2540
2541
@@ 2796-2822 (lines=27) @@
2793
        }
2794
2795
2796
class Optipess(GenericNavigableComic):
2797
    """Class to retrieve Optipess comics."""
2798
    name = 'optipess'
2799
    long_name = 'Optipess'
2800
    url = 'http://www.optipess.com'
2801
    get_first_comic_link = get_a_navi_navifirst
2802
    get_navi_link = get_link_rel_next
2803
2804
    @classmethod
2805
    def get_comic_info(cls, soup, link):
2806
        """Get information about a particular comics."""
2807
        title = soup.find('h2', class_='post-title').string
2808
        author = soup.find("span", class_="post-author").find("a").string
2809
        comic = soup.find('div', id='comic')
2810
        imgs = comic.find_all('img') if comic else []
2811
        alt = imgs[0]['title'] if imgs else ""
2812
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2813
        date_str = soup.find('span', class_='post-date').string
2814
        day = string_to_date(date_str, "%B %d, %Y")
2815
        return {
2816
            'title': title,
2817
            'alt': alt,
2818
            'author': author,
2819
            'img': [i['src'] for i in imgs],
2820
            'month': day.month,
2821
            'year': day.year,
2822
            'day': day.day,
2823
        }
2824
2825
@@ 2542-2568 (lines=27) @@
2539
        }
2540
2541
2542
class LastPlaceComics(GenericNavigableComic):
2543
    """Class to retrieve Last Place Comics."""
2544
    name = 'lastplace'
2545
    long_name = 'Last Place Comics'
2546
    url = "http://lastplacecomics.com"
2547
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2548
    get_navi_link = get_link_rel_next
2549
2550
    @classmethod
2551
    def get_comic_info(cls, soup, link):
2552
        """Get information about a particular comics."""
2553
        title = soup.find('h2', class_='post-title').string
2554
        author = soup.find("span", class_="post-author").find("a").string
2555
        date_str = soup.find("span", class_="post-date").string
2556
        day = string_to_date(date_str, "%B %d, %Y")
2557
        imgs = soup.find("div", id="comic").find_all("img")
2558
        assert all(i['alt'] == i['title'] for i in imgs)
2559
        assert len(imgs) <= 1
2560
        alt = imgs[0]['alt'] if imgs else ""
2561
        return {
2562
            'img': [i['src'] for i in imgs],
2563
            'title': title,
2564
            'alt': alt,
2565
            'author': author,
2566
            'day': day.day,
2567
            'month': day.month,
2568
            'year': day.year
2569
        }
2570
2571
@@ 2431-2457 (lines=27) @@
2428
        }
2429
2430
2431
class BiterComics(GenericNavigableComic):
2432
    """Class to retrieve Biter Comics."""
2433
    name = "biter"
2434
    long_name = "Biter Comics"
2435
    url = "http://www.bitercomics.com"
2436
    get_first_comic_link = get_a_navi_navifirst
2437
    get_navi_link = get_link_rel_next
2438
2439
    @classmethod
2440
    def get_comic_info(cls, soup, link):
2441
        """Get information about a particular comics."""
2442
        title = soup.find("h1", class_="entry-title").string
2443
        author = soup.find("span", class_="author vcard").find("a").string
2444
        date_str = soup.find("span", class_="entry-date").string
2445
        day = string_to_date(date_str, "%B %d, %Y")
2446
        imgs = soup.find("div", id="comic").find_all("img")
2447
        assert all(i['alt'] == i['title'] for i in imgs)
2448
        assert len(imgs) == 1
2449
        alt = imgs[0]['alt']
2450
        return {
2451
            'img': [i['src'] for i in imgs],
2452
            'title': title,
2453
            'alt': alt,
2454
            'author': author,
2455
            'day': day.day,
2456
            'month': day.month,
2457
            'year': day.year
2458
        }
2459
2460
@@ 1980-2006 (lines=27) @@
1977
    _categories = ('TUNEYTOONS', )
1978
1979
1980
class CompletelySeriousComics(GenericNavigableComic):
1981
    """Class to retrieve Completely Serious comics."""
1982
    name = 'completelyserious'
1983
    long_name = 'Completely Serious Comics'
1984
    url = 'http://completelyseriouscomics.com'
1985
    get_first_comic_link = get_a_navi_navifirst
1986
    get_navi_link = get_a_navi_navinext
1987
1988
    @classmethod
1989
    def get_comic_info(cls, soup, link):
1990
        """Get information about a particular comics."""
1991
        title = soup.find('h2', class_='post-title').string
1992
        author = soup.find('span', class_='post-author').contents[1].string
1993
        date_str = soup.find('span', class_='post-date').string
1994
        day = string_to_date(date_str, '%B %d, %Y')
1995
        imgs = soup.find('div', class_='comicpane').find_all('img')
1996
        assert imgs
1997
        alt = imgs[0]['title']
1998
        assert all(i['title'] == i['alt'] == alt for i in imgs)
1999
        return {
2000
            'month': day.month,
2001
            'year': day.year,
2002
            'day': day.day,
2003
            'img': [i['src'] for i in imgs],
2004
            'title': title,
2005
            'alt': alt,
2006
            'author': author,
2007
        }
2008
2009
@@ 2604-2629 (lines=26) @@
2601
        }
2602
2603
2604
class EndlessOrigami(GenericEmptyComic, GenericNavigableComic):
2605
    """Class to retrieve Endless Origami Comics."""
2606
    name = "origami"
2607
    long_name = "Endless Origami"
2608
    url = "http://endlessorigami.com"
2609
    get_first_comic_link = get_a_navi_navifirst
2610
    get_navi_link = get_link_rel_next
2611
2612
    @classmethod
2613
    def get_comic_info(cls, soup, link):
2614
        """Get information about a particular comics."""
2615
        title = soup.find('h2', class_='post-title').string
2616
        author = soup.find("span", class_="post-author").find("a").string
2617
        date_str = soup.find("span", class_="post-date").string
2618
        day = string_to_date(date_str, "%B %d, %Y")
2619
        imgs = soup.find("div", id="comic").find_all("img")
2620
        assert all(i['alt'] == i['title'] for i in imgs)
2621
        alt = imgs[0]['alt'] if imgs else ""
2622
        return {
2623
            'img': [i['src'] for i in imgs],
2624
            'title': title,
2625
            'alt': alt,
2626
            'author': author,
2627
            'day': day.day,
2628
            'month': day.month,
2629
            'year': day.year
2630
        }
2631
2632
@@ 1865-1890 (lines=26) @@
1862
        }
1863
1864
1865
class Penmen(GenericNavigableComic):
1866
    """Class to retrieve Penmen comics."""
1867
    name = 'penmen'
1868
    long_name = 'Penmen'
1869
    url = 'http://penmen.com'
1870
    get_navi_link = get_link_rel_next
1871
    get_first_comic_link = simulate_first_link
1872
    first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/'
1873
1874
    @classmethod
1875
    def get_comic_info(cls, soup, link):
1876
        """Get information about a particular comics."""
1877
        title = soup.find('title').string
1878
        imgs = soup.find('div', class_='entry-content').find_all('img')
1879
        short_url = soup.find('link', rel='shortlink')['href']
1880
        tags = ' '.join(t.string for t in soup.find_all('a', rel='tag'))
1881
        date_str = soup.find('time')['datetime'][:10]
1882
        day = string_to_date(date_str, "%Y-%m-%d")
1883
        return {
1884
            'title': title,
1885
            'short_url': short_url,
1886
            'img': [i['src'] for i in imgs],
1887
            'tags': tags,
1888
            'month': day.month,
1889
            'year': day.year,
1890
            'day': day.day,
1891
        }
1892
1893
@@ 2068-2092 (lines=25) @@
2065
        }
2066
2067
2068
class ChuckleADuck(GenericNavigableComic):
2069
    """Class to retrieve Chuckle-A-Duck comics."""
2070
    name = 'chuckleaduck'
2071
    long_name = 'Chuckle-A-duck'
2072
    url = 'http://chuckleaduck.com'
2073
    get_first_comic_link = get_div_navfirst_a
2074
    get_navi_link = get_link_rel_next
2075
2076
    @classmethod
2077
    def get_comic_info(cls, soup, link):
2078
        """Get information about a particular comics."""
2079
        date_str = soup.find('span', class_='post-date').string
2080
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
2081
        author = soup.find('span', class_='post-author').string
2082
        div = soup.find('div', id='comic')
2083
        imgs = div.find_all('img') if div else []
2084
        title = imgs[0]['title'] if imgs else ""
2085
        assert all(i['title'] == i['alt'] == title for i in imgs)
2086
        return {
2087
            'month': day.month,
2088
            'year': day.year,
2089
            'day': day.day,
2090
            'img': [i['src'] for i in imgs],
2091
            'title': title,
2092
            'author': author,
2093
        }
2094
2095