Code Duplication    Length = 22-29 lines in 16 locations

comics.py 16 locations

@@ 2006-2032 (lines=27) @@
2003
    _categories = ('TUNEYTOONS', )
2004
2005
2006
class CompletelySeriousComics(GenericNavigableComic):
2007
    """Class to retrieve Completely Serious comics."""
2008
    name = 'completelyserious'
2009
    long_name = 'Completely Serious Comics'
2010
    url = 'http://completelyseriouscomics.com'
2011
    get_first_comic_link = get_a_navi_navifirst
2012
    get_navi_link = get_a_navi_navinext
2013
2014
    @classmethod
2015
    def get_comic_info(cls, soup, link):
2016
        """Get information about a particular comics."""
2017
        title = soup.find('h2', class_='post-title').string
2018
        author = soup.find('span', class_='post-author').contents[1].string
2019
        date_str = soup.find('span', class_='post-date').string
2020
        day = string_to_date(date_str, '%B %d, %Y')
2021
        imgs = soup.find('div', class_='comicpane').find_all('img')
2022
        assert imgs
2023
        alt = imgs[0]['title']
2024
        assert all(i['title'] == i['alt'] == alt for i in imgs)
2025
        return {
2026
            'month': day.month,
2027
            'year': day.year,
2028
            'day': day.day,
2029
            'img': [i['src'] for i in imgs],
2030
            'title': title,
2031
            'alt': alt,
2032
            'author': author,
2033
        }
2034
2035
@@ 1891-1916 (lines=26) @@
1888
        }
1889
1890
1891
class Penmen(GenericNavigableComic):
1892
    """Class to retrieve Penmen comics."""
1893
    name = 'penmen'
1894
    long_name = 'Penmen'
1895
    url = 'http://penmen.com'
1896
    get_navi_link = get_link_rel_next
1897
    get_first_comic_link = simulate_first_link
1898
    first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/'
1899
1900
    @classmethod
1901
    def get_comic_info(cls, soup, link):
1902
        """Get information about a particular comics."""
1903
        title = soup.find('title').string
1904
        imgs = soup.find('div', class_='entry-content').find_all('img')
1905
        short_url = soup.find('link', rel='shortlink')['href']
1906
        tags = ' '.join(t.string for t in soup.find_all('a', rel='tag'))
1907
        date_str = soup.find('time')['datetime'][:10]
1908
        day = string_to_date(date_str, "%Y-%m-%d")
1909
        return {
1910
            'title': title,
1911
            'short_url': short_url,
1912
            'img': [i['src'] for i in imgs],
1913
            'tags': tags,
1914
            'month': day.month,
1915
            'year': day.year,
1916
            'day': day.day,
1917
        }
1918
1919
@@ 1832-1857 (lines=26) @@
1829
        }
1830
1831
1832
class SafelyEndangered(GenericNavigableComic):
1833
    """Class to retrieve Safely Endangered comics."""
1834
    # Also on http://tumblr.safelyendangered.com
1835
    name = 'endangered'
1836
    long_name = 'Safely Endangered'
1837
    url = 'http://www.safelyendangered.com'
1838
    get_navi_link = get_link_rel_next
1839
    get_first_comic_link = simulate_first_link
1840
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1841
1842
    @classmethod
1843
    def get_comic_info(cls, soup, link):
1844
        """Get information about a particular comics."""
1845
        title = soup.find('h2', class_='post-title').string
1846
        date_str = soup.find('span', class_='post-date').string
1847
        day = string_to_date(date_str, '%B %d, %Y')
1848
        imgs = soup.find('div', id='comic').find_all('img')
1849
        alt = imgs[0]['alt']
1850
        assert all(i['alt'] == i['title'] for i in imgs)
1851
        return {
1852
            'day': day.day,
1853
            'month': day.month,
1854
            'year': day.year,
1855
            'img': [i['src'] for i in imgs],
1856
            'title': title,
1857
            'alt': alt,
1858
        }
1859
1860
@@ 2094-2118 (lines=25) @@
2091
        }
2092
2093
2094
class ChuckleADuck(GenericNavigableComic):
2095
    """Class to retrieve Chuckle-A-Duck comics."""
2096
    name = 'chuckleaduck'
2097
    long_name = 'Chuckle-A-duck'
2098
    url = 'http://chuckleaduck.com'
2099
    get_first_comic_link = get_div_navfirst_a
2100
    get_navi_link = get_link_rel_next
2101
2102
    @classmethod
2103
    def get_comic_info(cls, soup, link):
2104
        """Get information about a particular comics."""
2105
        date_str = soup.find('span', class_='post-date').string
2106
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
2107
        author = soup.find('span', class_='post-author').string
2108
        div = soup.find('div', id='comic')
2109
        imgs = div.find_all('img') if div else []
2110
        title = imgs[0]['title'] if imgs else ""
2111
        assert all(i['title'] == i['alt'] == title for i in imgs)
2112
        return {
2113
            'month': day.month,
2114
            'year': day.year,
2115
            'day': day.day,
2116
            'img': [i['src'] for i in imgs],
2117
            'title': title,
2118
            'author': author,
2119
        }
2120
2121
@@ 2402-2426 (lines=25) @@
2399
        }
2400
2401
2402
class LinsEditions(GenericNavigableComic):
2403
    """Class to retrieve L.I.N.S. Editions comics."""
2404
    # Also on https://linscomics.tumblr.com
2405
    # Now on https://warandpeas.com
2406
    name = 'lins'
2407
    long_name = 'L.I.N.S. Editions'
2408
    url = 'https://linsedition.com'
2409
    _categories = ('LINS', )
2410
    get_navi_link = get_link_rel_next
2411
    get_first_comic_link = simulate_first_link
2412
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2413
2414
    @classmethod
2415
    def get_comic_info(cls, soup, link):
2416
        """Get information about a particular comics."""
2417
        title = soup.find('meta', property='og:title')['content']
2418
        imgs = soup.find_all('meta', property='og:image')
2419
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2420
        day = string_to_date(date_str, "%Y-%m-%d")
2421
        return {
2422
            'title': title,
2423
            'img': [i['content'] for i in imgs],
2424
            'month': day.month,
2425
            'year': day.year,
2426
            'day': day.day,
2427
        }
2428
2429
@@ 1046-1070 (lines=25) @@
1043
        }
1044
1045
1046
class Mercworks(GenericNavigableComic):
1047
    """Class to retrieve Mercworks comics."""
1048
    # Also on http://mercworks.tumblr.com
1049
    name = 'mercworks'
1050
    long_name = 'Mercworks'
1051
    url = 'http://mercworks.net'
1052
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1053
    get_navi_link = get_link_rel_next
1054
1055
    @classmethod
1056
    def get_comic_info(cls, soup, link):
1057
        """Get information about a particular comics."""
1058
        title = soup.find('meta', property='og:title')['content']
1059
        metadesc = soup.find('meta', property='og:description')
1060
        desc = metadesc['content'] if metadesc else ""
1061
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
1062
        day = string_to_date(date_str, "%Y-%m-%d")
1063
        imgs = soup.find_all('meta', property='og:image')
1064
        return {
1065
            'img': [i['content'] for i in imgs],
1066
            'title': title,
1067
            'desc': desc,
1068
            'day': day.day,
1069
            'month': day.month,
1070
            'year': day.year
1071
        }
1072
1073
@@ 360-384 (lines=25) @@
357
        return []
358
359
360
class ExtraFabulousComics(GenericNavigableComic):
361
    """Class to retrieve Extra Fabulous Comics."""
362
    # Also on https://extrafabulouscomics.tumblr.com
363
    name = 'efc'
364
    long_name = 'Extra Fabulous Comics'
365
    url = 'http://extrafabulouscomics.com'
366
    _categories = ('EFC', )
367
    get_first_comic_link = get_a_navi_navifirst
368
    get_navi_link = get_link_rel_next
369
370
    @classmethod
371
    def get_comic_info(cls, soup, link):
372
        """Get information about a particular comics."""
373
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
374
        imgs = soup.find_all('img', src=img_src_re)
375
        title = soup.find('meta', property='og:title')['content']
376
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
377
        day = string_to_date(date_str, "%Y-%m-%d")
378
        return {
379
            'title': title,
380
            'img': [i['src'] for i in imgs],
381
            'month': day.month,
382
            'year': day.year,
383
            'day': day.day,
384
            'prefix': title + '-'
385
        }
386
387
@@ 3368-3389 (lines=22) @@
3365
        """Get information about a particular comics."""
3366
        title = link['title']
3367
        imgs = soup.find_all('img', id='comicimg')
3368
        return {
3369
            'title': title,
3370
            'img': [i['src'] for i in imgs],
3371
        }
3372
3373
3374
class OffTheLeashDog(GenericNavigableComic):
3375
    """Class to retrieve Off The Leash Dog comics."""
3376
    # Also on http://rupertfawcettsdoggyblog.tumblr.com
3377
    # Also on http://www.rupertfawcettcartoons.com
3378
    name = 'offtheleash'
3379
    long_name = 'Off The Leash Dog'
3380
    url = 'http://offtheleashdogcartoons.com'
3381
    _categories = ('FAWCETT', )
3382
    get_navi_link = get_a_rel_next
3383
    get_first_comic_link = simulate_first_link
3384
    first_url = 'http://offtheleashdogcartoons.com/uncategorized/can-i-help-you/'
3385
3386
    @classmethod
3387
    def get_comic_info(cls, soup, link):
3388
        """Get information about a particular comics."""
3389
        print(link)
3390
        title = soup.find("h1", class_="entry-title").string
3391
        imgs = soup.find('div', class_='entry-content').find_all('img')
3392
        return {
@@ 388-409 (lines=22) @@
385
        }
386
387
388
class GenericLeMondeBlog(GenericNavigableComic):
389
    """Generic class to retrieve comics from Le Monde blogs."""
390
    _categories = ('LEMONDE', 'FRANCAIS')
391
    get_navi_link = get_link_rel_next
392
    get_first_comic_link = simulate_first_link
393
    first_url = NotImplemented
394
395
    @classmethod
396
    def get_comic_info(cls, soup, link):
397
        """Get information about a particular comics."""
398
        url2 = soup.find('link', rel='shortlink')['href']
399
        title = soup.find('meta', property='og:title')['content']
400
        date_str = soup.find("span", class_="entry-date").string
401
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
402
        imgs = soup.find_all('meta', property='og:image')
403
        return {
404
            'title': title,
405
            'url2': url2,
406
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
407
            'month': day.month,
408
            'year': day.year,
409
            'day': day.day,
410
        }
411
412
@@ 958-983 (lines=26) @@
955
        }
956
957
958
class MyExtraLife(GenericNavigableComic):
959
    """Class to retrieve My Extra Life comics."""
960
    name = 'extralife'
961
    long_name = 'My Extra Life'
962
    url = 'http://www.myextralife.com'
963
    get_navi_link = get_link_rel_next
964
965
    @classmethod
966
    def get_first_comic_link(cls):
967
        """Get link to first comics."""
968
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
969
970
    @classmethod
971
    def get_comic_info(cls, soup, link):
972
        """Get information about a particular comics."""
973
        title = soup.find("h1", class_="comic_title").string
974
        date_str = soup.find("span", class_="comic_date").string
975
        day = string_to_date(date_str, "%B %d, %Y")
976
        imgs = soup.find_all("img", class_="comic")
977
        assert all(i['alt'] == i['title'] == title for i in imgs)
978
        return {
979
            'title': title,
980
            'img': [i['src'] for i in imgs if i["src"]],
981
            'day': day.day,
982
            'month': day.month,
983
            'year': day.year
984
        }
985
986
@@ 2304-2328 (lines=25) @@
2301
2302
2303
class JuliasDrawings(GenericListableComic):
2304
    """Class to retrieve Julia's Drawings."""
2305
    name = 'julia'
2306
    long_name = "Julia's Drawings"
2307
    url = 'https://drawings.jvns.ca'
2308
    get_url_from_archive_element = get_href
2309
2310
    @classmethod
2311
    def get_archive_elements(cls):
2312
        articles = get_soup_at_url(cls.url).find_all('article', class_='li post')
2313
        return [art.find('a') for art in reversed(articles)]
2314
2315
    @classmethod
2316
    def get_comic_info(cls, soup, archive_elt):
2317
        """Get information about a particular comics."""
2318
        date_str = soup.find('meta', property='og:article:published_time')['content'][:10]
2319
        day = string_to_date(date_str, "%Y-%m-%d")
2320
        title = soup.find('h3', class_='p-post-title').string
2321
        imgs = soup.find('section', class_='post-content').find_all('img')
2322
        return {
2323
            'title': title,
2324
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2325
            'month': day.month,
2326
            'year': day.year,
2327
            'day': day.day,
2328
        }
2329
2330
2331
class AnythingComic(GenericListableComic):
@@ 2673-2701 (lines=29) @@
2670
        }
2671
2672
2673
class TalesOfAbsurdity(GenericNavigableComic):
2674
    """Class to retrieve Tales Of Absurdity comics."""
2675
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
2676
    # Also on http://talesofabsurdity.tumblr.com
2677
    name = 'absurdity'
2678
    long_name = 'Tales of Absurdity'
2679
    url = 'http://talesofabsurdity.com'
2680
    _categories = ('ABSURDITY', )
2681
    get_first_comic_link = get_a_navi_navifirst
2682
    get_navi_link = get_a_navi_comicnavnext_navinext
2683
2684
    @classmethod
2685
    def get_comic_info(cls, soup, link):
2686
        """Get information about a particular comics."""
2687
        title = soup.find('h2', class_='post-title').string
2688
        author = soup.find("span", class_="post-author").find("a").string
2689
        date_str = soup.find("span", class_="post-date").string
2690
        day = string_to_date(date_str, "%B %d, %Y")
2691
        imgs = soup.find("div", id="comic").find_all("img")
2692
        assert all(i['alt'] == i['title'] for i in imgs)
2693
        alt = imgs[0]['alt'] if imgs else ""
2694
        return {
2695
            'img': [i['src'] for i in imgs],
2696
            'title': title,
2697
            'alt': alt,
2698
            'author': author,
2699
            'day': day.day,
2700
            'month': day.month,
2701
            'year': day.year
2702
        }
2703
2704
@@ 2611-2639 (lines=29) @@
2608
        }
2609
2610
2611
class MisterAndMe(GenericNavigableComic):
2612
    """Class to retrieve Mister & Me Comics."""
2613
    # Also on http://www.gocomics.com/mister-and-me
2614
    # Also on https://tapastic.com/series/Mister-and-Me
2615
    name = 'mister'
2616
    long_name = 'Mister & Me'
2617
    url = 'http://www.mister-and-me.com'
2618
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2619
    get_navi_link = get_link_rel_next
2620
2621
    @classmethod
2622
    def get_comic_info(cls, soup, link):
2623
        """Get information about a particular comics."""
2624
        title = soup.find('h2', class_='post-title').string
2625
        author = soup.find("span", class_="post-author").find("a").string
2626
        date_str = soup.find("span", class_="post-date").string
2627
        day = string_to_date(date_str, "%B %d, %Y")
2628
        imgs = soup.find("div", id="comic").find_all("img")
2629
        assert all(i['alt'] == i['title'] for i in imgs)
2630
        assert len(imgs) <= 1
2631
        alt = imgs[0]['alt'] if imgs else ""
2632
        return {
2633
            'img': [i['src'] for i in imgs],
2634
            'title': title,
2635
            'alt': alt,
2636
            'author': author,
2637
            'day': day.day,
2638
            'month': day.month,
2639
            'year': day.year
2640
        }
2641
2642
@@ 2897-2923 (lines=27) @@
2894
        }
2895
2896
2897
class Optipess(GenericNavigableComic):
2898
    """Class to retrieve Optipess comics."""
2899
    name = 'optipess'
2900
    long_name = 'Optipess'
2901
    url = 'http://www.optipess.com'
2902
    get_first_comic_link = get_a_navi_navifirst
2903
    get_navi_link = get_link_rel_next
2904
2905
    @classmethod
2906
    def get_comic_info(cls, soup, link):
2907
        """Get information about a particular comics."""
2908
        title = soup.find('h2', class_='post-title').string
2909
        author = soup.find("span", class_="post-author").find("a").string
2910
        comic = soup.find('div', id='comic')
2911
        imgs = comic.find_all('img') if comic else []
2912
        alt = imgs[0]['title'] if imgs else ""
2913
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2914
        date_str = soup.find('span', class_='post-date').string
2915
        day = string_to_date(date_str, "%B %d, %Y")
2916
        return {
2917
            'title': title,
2918
            'alt': alt,
2919
            'author': author,
2920
            'img': [i['src'] for i in imgs],
2921
            'month': day.month,
2922
            'year': day.year,
2923
            'day': day.day,
2924
        }
2925
2926
@@ 2532-2558 (lines=27) @@
2529
        }
2530
2531
2532
class BiterComics(GenericNavigableComic):
2533
    """Class to retrieve Biter Comics."""
2534
    name = "biter"
2535
    long_name = "Biter Comics"
2536
    url = "http://www.bitercomics.com"
2537
    get_first_comic_link = get_a_navi_navifirst
2538
    get_navi_link = get_link_rel_next
2539
2540
    @classmethod
2541
    def get_comic_info(cls, soup, link):
2542
        """Get information about a particular comics."""
2543
        title = soup.find("h1", class_="entry-title").string
2544
        author = soup.find("span", class_="author vcard").find("a").string
2545
        date_str = soup.find("span", class_="entry-date").string
2546
        day = string_to_date(date_str, "%B %d, %Y")
2547
        imgs = soup.find("div", id="comic").find_all("img")
2548
        assert all(i['alt'] == i['title'] for i in imgs)
2549
        assert len(imgs) == 1
2550
        alt = imgs[0]['alt']
2551
        return {
2552
            'img': [i['src'] for i in imgs],
2553
            'title': title,
2554
            'alt': alt,
2555
            'author': author,
2556
            'day': day.day,
2557
            'month': day.month,
2558
            'year': day.year
2559
        }
2560
2561
@@ 2705-2730 (lines=26) @@
2702
        }
2703
2704
2705
class EndlessOrigami(GenericEmptyComic, GenericNavigableComic):
2706
    """Class to retrieve Endless Origami Comics."""
2707
    name = "origami"
2708
    long_name = "Endless Origami"
2709
    url = "http://endlessorigami.com"
2710
    get_first_comic_link = get_a_navi_navifirst
2711
    get_navi_link = get_link_rel_next
2712
2713
    @classmethod
2714
    def get_comic_info(cls, soup, link):
2715
        """Get information about a particular comics."""
2716
        title = soup.find('h2', class_='post-title').string
2717
        author = soup.find("span", class_="post-author").find("a").string
2718
        date_str = soup.find("span", class_="post-date").string
2719
        day = string_to_date(date_str, "%B %d, %Y")
2720
        imgs = soup.find("div", id="comic").find_all("img")
2721
        assert all(i['alt'] == i['title'] for i in imgs)
2722
        alt = imgs[0]['alt'] if imgs else ""
2723
        return {
2724
            'img': [i['src'] for i in imgs],
2725
            'title': title,
2726
            'alt': alt,
2727
            'author': author,
2728
            'day': day.day,
2729
            'month': day.month,
2730
            'year': day.year
2731
        }
2732
2733