Code Duplication    Length = 23-28 lines in 13 locations

comics.py 13 locations

@@ 1781-1806 (lines=26) @@
1778
        }
1779
1780
1781
class SafelyEndangered(GenericNavigableComic):
1782
    """Class to retrieve Safely Endangered comics."""
1783
    # Also on http://tumblr.safelyendangered.com
1784
    name = 'endangered'
1785
    long_name = 'Safely Endangered'
1786
    url = 'http://www.safelyendangered.com'
1787
    get_navi_link = get_link_rel_next
1788
    get_first_comic_link = simulate_first_link
1789
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1790
1791
    @classmethod
1792
    def get_comic_info(cls, soup, link):
1793
        """Get information about a particular comics."""
1794
        title = soup.find('h2', class_='post-title').string
1795
        date_str = soup.find('span', class_='post-date').string
1796
        day = string_to_date(date_str, '%B %d, %Y')
1797
        imgs = soup.find('div', id='comic').find_all('img')
1798
        alt = imgs[0]['alt']
1799
        assert all(i['alt'] == i['title'] for i in imgs)
1800
        return {
1801
            'day': day.day,
1802
            'month': day.month,
1803
            'year': day.year,
1804
            'img': [i['src'] for i in imgs],
1805
            'title': title,
1806
            'alt': alt,
1807
        }
1808
1809
@@ 1810-1836 (lines=27) @@
1807
        }
1808
1809
1810
class PicturesInBoxes(GenericNavigableComic):
1811
    """Class to retrieve Pictures In Boxes comics."""
1812
    # Also on http://picturesinboxescomic.tumblr.com
1813
    name = 'picturesinboxes'
1814
    long_name = 'Pictures in Boxes'
1815
    url = 'http://www.picturesinboxes.com'
1816
    get_navi_link = get_a_navi_navinext
1817
    get_first_comic_link = simulate_first_link
1818
    first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'
1819
1820
    @classmethod
1821
    def get_comic_info(cls, soup, link):
1822
        """Get information about a particular comics."""
1823
        title = soup.find('h2', class_='post-title').string
1824
        author = soup.find("span", class_="post-author").find("a").string
1825
        date_str = soup.find('span', class_='post-date').string
1826
        day = string_to_date(date_str, '%B %d, %Y')
1827
        imgs = soup.find('div', class_='comicpane').find_all('img')
1828
        assert imgs
1829
        assert all(i['title'] == i['alt'] == title for i in imgs)
1830
        return {
1831
            'day': day.day,
1832
            'month': day.month,
1833
            'year': day.year,
1834
            'img': [i['src'] for i in imgs],
1835
            'title': title,
1836
            'author': author,
1837
        }
1838
1839
@@ 2518-2545 (lines=28) @@
2515
        }
2516
2517
2518
class TalesOfAbsurdity(GenericNavigableComic):
2519
    """Class to retrieve Tales Of Absurdity comics."""
2520
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
2521
    # Also on http://talesofabsurdity.tumblr.com
2522
    name = 'absurdity'
2523
    long_name = 'Tales of Absurdity'
2524
    url = 'http://talesofabsurdity.com'
2525
    get_first_comic_link = get_a_navi_navifirst
2526
    get_navi_link = get_a_navi_comicnavnext_navinext
2527
2528
    @classmethod
2529
    def get_comic_info(cls, soup, link):
2530
        """Get information about a particular comics."""
2531
        title = soup.find('h2', class_='post-title').string
2532
        author = soup.find("span", class_="post-author").find("a").string
2533
        date_str = soup.find("span", class_="post-date").string
2534
        day = string_to_date(date_str, "%B %d, %Y")
2535
        imgs = soup.find("div", id="comic").find_all("img")
2536
        assert all(i['alt'] == i['title'] for i in imgs)
2537
        alt = imgs[0]['alt'] if imgs else ""
2538
        return {
2539
            'img': [i['src'] for i in imgs],
2540
            'title': title,
2541
            'alt': alt,
2542
            'author': author,
2543
            'day': day.day,
2544
            'month': day.month,
2545
            'year': day.year
2546
        }
2547
2548
@@ 2738-2764 (lines=27) @@
2735
        }
2736
2737
2738
class Optipess(GenericNavigableComic):
2739
    """Class to retrieve Optipess comics."""
2740
    name = 'optipess'
2741
    long_name = 'Optipess'
2742
    url = 'http://www.optipess.com'
2743
    get_first_comic_link = get_a_navi_navifirst
2744
    get_navi_link = get_link_rel_next
2745
2746
    @classmethod
2747
    def get_comic_info(cls, soup, link):
2748
        """Get information about a particular comics."""
2749
        title = soup.find('h2', class_='post-title').string
2750
        author = soup.find("span", class_="post-author").find("a").string
2751
        comic = soup.find('div', id='comic')
2752
        imgs = comic.find_all('img') if comic else []
2753
        alt = imgs[0]['title'] if imgs else ""
2754
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2755
        date_str = soup.find('span', class_='post-date').string
2756
        day = string_to_date(date_str, "%B %d, %Y")
2757
        return {
2758
            'title': title,
2759
            'alt': alt,
2760
            'author': author,
2761
            'img': [i['src'] for i in imgs],
2762
            'month': day.month,
2763
            'year': day.year,
2764
            'day': day.day,
2765
        }
2766
2767
@@ 2488-2514 (lines=27) @@
2485
        }
2486
2487
2488
class LastPlaceComics(GenericNavigableComic):
2489
    """Class to retrieve Last Place Comics."""
2490
    name = 'lastplace'
2491
    long_name = 'Last Place Comics'
2492
    url = "http://lastplacecomics.com"
2493
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2494
    get_navi_link = get_link_rel_next
2495
2496
    @classmethod
2497
    def get_comic_info(cls, soup, link):
2498
        """Get information about a particular comics."""
2499
        title = soup.find('h2', class_='post-title').string
2500
        author = soup.find("span", class_="post-author").find("a").string
2501
        date_str = soup.find("span", class_="post-date").string
2502
        day = string_to_date(date_str, "%B %d, %Y")
2503
        imgs = soup.find("div", id="comic").find_all("img")
2504
        assert all(i['alt'] == i['title'] for i in imgs)
2505
        assert len(imgs) <= 1
2506
        alt = imgs[0]['alt'] if imgs else ""
2507
        return {
2508
            'img': [i['src'] for i in imgs],
2509
            'title': title,
2510
            'alt': alt,
2511
            'author': author,
2512
            'day': day.day,
2513
            'month': day.month,
2514
            'year': day.year
2515
        }
2516
2517
@@ 2321-2346 (lines=26) @@
2318
        }
2319
2320
2321
class GerbilWithAJetpack(GenericNavigableComic):
2322
    """Class to retrieve GerbilWithAJetpack comics."""
2323
    name = 'gerbil'
2324
    long_name = 'Gerbil With A Jetpack'
2325
    url = 'http://gerbilwithajetpack.com'
2326
    get_first_comic_link = get_a_navi_navifirst
2327
    get_navi_link = get_a_rel_next
2328
2329
    @classmethod
2330
    def get_comic_info(cls, soup, link):
2331
        """Get information about a particular comics."""
2332
        title = soup.find('h2', class_='post-title').string
2333
        author = soup.find("span", class_="post-author").find("a").string
2334
        date_str = soup.find("span", class_="post-date").string
2335
        day = string_to_date(date_str, "%B %d, %Y")
2336
        imgs = soup.find("div", id="comic").find_all("img")
2337
        alt = imgs[0]['alt']
2338
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2339
        return {
2340
            'img': [i['src'] for i in imgs],
2341
            'title': title,
2342
            'alt': alt,
2343
            'author': author,
2344
            'day': day.day,
2345
            'month': day.month,
2346
            'year': day.year
2347
        }
2348
2349
@@ 2019-2043 (lines=25) @@
2016
        }
2017
2018
2019
class ChuckleADuck(GenericNavigableComic):
2020
    """Class to retrieve Chuckle-A-Duck comics."""
2021
    name = 'chuckleaduck'
2022
    long_name = 'Chuckle-A-duck'
2023
    url = 'http://chuckleaduck.com'
2024
    get_first_comic_link = get_div_navfirst_a
2025
    get_navi_link = get_link_rel_next
2026
2027
    @classmethod
2028
    def get_comic_info(cls, soup, link):
2029
        """Get information about a particular comics."""
2030
        date_str = soup.find('span', class_='post-date').string
2031
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
2032
        author = soup.find('span', class_='post-author').string
2033
        div = soup.find('div', id='comic')
2034
        imgs = div.find_all('img') if div else []
2035
        title = imgs[0]['title'] if imgs else ""
2036
        assert all(i['title'] == i['alt'] == title for i in imgs)
2037
        return {
2038
            'month': day.month,
2039
            'year': day.year,
2040
            'day': day.day,
2041
            'img': [i['src'] for i in imgs],
2042
            'title': title,
2043
            'author': author,
2044
        }
2045
2046
@@ 2378-2404 (lines=27) @@
2375
        }
2376
2377
2378
class BiterComics(GenericNavigableComic):
2379
    """Class to retrieve Biter Comics."""
2380
    name = "biter"
2381
    long_name = "Biter Comics"
2382
    url = "http://www.bitercomics.com"
2383
    get_first_comic_link = get_a_navi_navifirst
2384
    get_navi_link = get_link_rel_next
2385
2386
    @classmethod
2387
    def get_comic_info(cls, soup, link):
2388
        """Get information about a particular comics."""
2389
        title = soup.find("h1", class_="entry-title").string
2390
        author = soup.find("span", class_="author vcard").find("a").string
2391
        date_str = soup.find("span", class_="entry-date").string
2392
        day = string_to_date(date_str, "%B %d, %Y")
2393
        imgs = soup.find("div", id="comic").find_all("img")
2394
        assert all(i['alt'] == i['title'] for i in imgs)
2395
        assert len(imgs) == 1
2396
        alt = imgs[0]['alt']
2397
        return {
2398
            'img': [i['src'] for i in imgs],
2399
            'title': title,
2400
            'alt': alt,
2401
            'author': author,
2402
            'day': day.day,
2403
            'month': day.month,
2404
            'year': day.year
2405
        }
2406
2407
@@ 1932-1958 (lines=27) @@
1929
    url = 'http://tubeytoons.com'
1930
1931
1932
class CompletelySeriousComics(GenericNavigableComic):
1933
    """Class to retrieve Completely Serious comics."""
1934
    name = 'completelyserious'
1935
    long_name = 'Completely Serious Comics'
1936
    url = 'http://completelyseriouscomics.com'
1937
    get_first_comic_link = get_a_navi_navifirst
1938
    get_navi_link = get_a_navi_navinext
1939
1940
    @classmethod
1941
    def get_comic_info(cls, soup, link):
1942
        """Get information about a particular comics."""
1943
        title = soup.find('h2', class_='post-title').string
1944
        author = soup.find('span', class_='post-author').contents[1].string
1945
        date_str = soup.find('span', class_='post-date').string
1946
        day = string_to_date(date_str, '%B %d, %Y')
1947
        imgs = soup.find('div', class_='comicpane').find_all('img')
1948
        assert imgs
1949
        alt = imgs[0]['title']
1950
        assert all(i['title'] == i['alt'] == alt for i in imgs)
1951
        return {
1952
            'month': day.month,
1953
            'year': day.year,
1954
            'day': day.day,
1955
            'img': [i['src'] for i in imgs],
1956
            'title': title,
1957
            'alt': alt,
1958
            'author': author,
1959
        }
1960
1961
@@ 2119-2144 (lines=26) @@
2116
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2117
2118
2119
class HappleTea(GenericNavigableComic):
2120
    """Class to retrieve Happle Tea Comics."""
2121
    name = 'happletea'
2122
    long_name = 'Happle Tea'
2123
    url = 'http://www.happletea.com'
2124
    get_first_comic_link = get_a_navi_navifirst
2125
    get_navi_link = get_link_rel_next
2126
2127
    @classmethod
2128
    def get_comic_info(cls, soup, link):
2129
        """Get information about a particular comics."""
2130
        imgs = soup.find('div', id='comic').find_all('img')
2131
        post = soup.find('div', class_='post-content')
2132
        title = post.find('h2', class_='post-title').string
2133
        author = post.find('a', rel='author').string
2134
        date_str = post.find('span', class_='post-date').string
2135
        day = string_to_date(date_str, "%B %d, %Y")
2136
        assert all(i['alt'] == i['title'] for i in imgs)
2137
        return {
2138
            'title': title,
2139
            'img': [i['src'] for i in imgs],
2140
            'alt': ''.join(i['alt'] for i in imgs),
2141
            'month': day.month,
2142
            'year': day.year,
2143
            'day': day.day,
2144
            'author': author,
2145
        }
2146
2147
@@ 2659-2683 (lines=25) @@
2656
    first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'
2657
2658
2659
class GenericBoumerie(GenericNavigableComic):
2660
    """Generic class to retrieve Boumeries comics in different languages."""
2661
    get_first_comic_link = get_a_navi_navifirst
2662
    get_navi_link = get_link_rel_next
2663
    date_format = NotImplemented
2664
    lang = NotImplemented
2665
2666
    @classmethod
2667
    def get_comic_info(cls, soup, link):
2668
        """Get information about a particular comics."""
2669
        title = soup.find('h2', class_='post-title').string
2670
        short_url = soup.find('link', rel='shortlink')['href']
2671
        author = soup.find("span", class_="post-author").find("a").string
2672
        date_str = soup.find('span', class_='post-date').string
2673
        day = string_to_date(date_str, cls.date_format, cls.lang)
2674
        imgs = soup.find('div', id='comic').find_all('img')
2675
        assert all(i['alt'] == i['title'] for i in imgs)
2676
        return {
2677
            'short_url': short_url,
2678
            'img': [i['src'] for i in imgs],
2679
            'title': title,
2680
            'author': author,
2681
            'month': day.month,
2682
            'year': day.year,
2683
            'day': day.day,
2684
        }
2685
2686
@@ 2251-2273 (lines=23) @@
2248
        }
2249
2250
2251
class LinsEditions(GenericNavigableComic):
2252
    """Class to retrieve L.I.N.S. Editions comics."""
2253
    # Also on http://linscomics.tumblr.com
2254
    name = 'lins'
2255
    long_name = 'L.I.N.S. Editions'
2256
    url = 'https://linsedition.com'
2257
    get_navi_link = get_link_rel_next
2258
    get_first_comic_link = simulate_first_link
2259
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2260
2261
    @classmethod
2262
    def get_comic_info(cls, soup, link):
2263
        """Get information about a particular comics."""
2264
        title = soup.find('meta', property='og:title')['content']
2265
        imgs = soup.find_all('meta', property='og:image')
2266
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2267
        day = string_to_date(date_str, "%Y-%m-%d")
2268
        return {
2269
            'title': title,
2270
            'img': [i['content'] for i in imgs],
2271
            'month': day.month,
2272
            'year': day.year,
2273
            'day': day.day,
2274
        }
2275
2276
@@ 338-360 (lines=23) @@
335
        return []
336
337
338
class ExtraFabulousComics(GenericNavigableComic):
339
    """Class to retrieve Extra Fabulous Comics."""
340
    name = 'efc'
341
    long_name = 'Extra Fabulous Comics'
342
    url = 'http://extrafabulouscomics.com'
343
    get_first_comic_link = get_a_navi_navifirst
344
    get_navi_link = get_link_rel_next
345
346
    @classmethod
347
    def get_comic_info(cls, soup, link):
348
        """Get information about a particular comics."""
349
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
350
        imgs = soup.find_all('img', src=img_src_re)
351
        title = soup.find('meta', property='og:title')['content']
352
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
353
        day = string_to_date(date_str, "%Y-%m-%d")
354
        return {
355
            'title': title,
356
            'img': [i['src'] for i in imgs],
357
            'month': day.month,
358
            'year': day.year,
359
            'day': day.day,
360
            'prefix': title + '-'
361
        }
362
363