Code Duplication    Length = 24-27 lines in 13 locations

comics.py 13 locations

@@ 2734-2760 (lines=27) @@
2731
    name = 'unearthed'
2732
    long_name = 'Unearthed Comics'
2733
    url = 'http://unearthedcomics.com'
2734
    get_navi_link = get_link_rel_next
2735
    get_first_comic_link = simulate_first_link
2736
    first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'
2737
2738
    @classmethod
2739
    def get_comic_info(cls, soup, link):
2740
        """Get information about a particular comics."""
2741
        short_url = soup.find('link', rel='shortlink')['href']
2742
        title_elt = soup.find('h1') or soup.find('h2')
2743
        title = title_elt.string if title_elt else ""
2744
        desc = soup.find('meta', property='og:description')
2745
        date_str = soup.find('time', class_='published updated hidden')['datetime']
2746
        day = string_to_date(date_str, "%Y-%m-%d")
2747
        post = soup.find('div', class_="entry content entry-content type-portfolio")
2748
        imgs = post.find_all('img')
2749
        return {
2750
            'title': title,
2751
            'description': desc,
2752
            'url2': short_url,
2753
            'img': [i['src'] for i in imgs],
2754
            'month': day.month,
2755
            'year': day.year,
2756
            'day': day.day,
2757
        }
2758
2759
2760
class Optipess(GenericNavigableComic):
2761
    """Class to retrieve Optipess comics."""
2762
    name = 'optipess'
2763
    long_name = 'Optipess'
@@ 2484-2510 (lines=27) @@
2481
    # Also on https://tapastic.com/series/Mister-and-Me
2482
    name = 'mister'
2483
    long_name = 'Mister & Me'
2484
    url = 'http://www.mister-and-me.com'
2485
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2486
    get_navi_link = get_link_rel_next
2487
2488
    @classmethod
2489
    def get_comic_info(cls, soup, link):
2490
        """Get information about a particular comics."""
2491
        title = soup.find('h2', class_='post-title').string
2492
        author = soup.find("span", class_="post-author").find("a").string
2493
        date_str = soup.find("span", class_="post-date").string
2494
        day = string_to_date(date_str, "%B %d, %Y")
2495
        imgs = soup.find("div", id="comic").find_all("img")
2496
        assert all(i['alt'] == i['title'] for i in imgs)
2497
        assert len(imgs) <= 1
2498
        alt = imgs[0]['alt'] if imgs else ""
2499
        return {
2500
            'img': [i['src'] for i in imgs],
2501
            'title': title,
2502
            'alt': alt,
2503
            'author': author,
2504
            'day': day.day,
2505
            'month': day.month,
2506
            'year': day.year
2507
        }
2508
2509
2510
class LastPlaceComics(GenericNavigableComic):
2511
    """Class to retrieve Last Place Comics."""
2512
    name = 'lastplace'
2513
    long_name = 'LastPlaceComics'
@@ 2545-2570 (lines=26) @@
2542
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
2543
    # Also on http://talesofabsurdity.tumblr.com
2544
    name = 'absurdity'
2545
    long_name = 'Tales of Absurdity'
2546
    url = 'http://talesofabsurdity.com'
2547
    get_first_comic_link = get_a_navi_navifirst
2548
    get_navi_link = get_a_navi_comicnavnext_navinext
2549
2550
    @classmethod
2551
    def get_comic_info(cls, soup, link):
2552
        """Get information about a particular comics."""
2553
        title = soup.find('h2', class_='post-title').string
2554
        author = soup.find("span", class_="post-author").find("a").string
2555
        date_str = soup.find("span", class_="post-date").string
2556
        day = string_to_date(date_str, "%B %d, %Y")
2557
        imgs = soup.find("div", id="comic").find_all("img")
2558
        assert all(i['alt'] == i['title'] for i in imgs)
2559
        alt = imgs[0]['alt'] if imgs else ""
2560
        return {
2561
            'img': [i['src'] for i in imgs],
2562
            'title': title,
2563
            'alt': alt,
2564
            'author': author,
2565
            'day': day.day,
2566
            'month': day.month,
2567
            'year': day.year
2568
        }
2569
2570
2571
class EndlessOrigami(GenericNavigableComic):
2572
    """Class to retrieve Endless Origami Comics."""
2573
    name = "origami"
@@ 2317-2342 (lines=26) @@
2314
        """Get link to next or previous comic."""
2315
        for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
2316
            if link['href'] != '/comic':
2317
                return link
2318
        return None
2319
2320
    @classmethod
2321
    def get_comic_info(cls, soup, link):
2322
        """Get information about a particular comics."""
2323
        title = soup.find('meta', attrs={'name': 'description'})["content"]
2324
        description = soup.find('div', itemprop='articleBody').text
2325
        author = soup.find('span', itemprop='author copyrightHolder').string
2326
        imgs = soup.find_all('img', itemprop='image')
2327
        assert all(i['title'] == i['alt'] for i in imgs)
2328
        alt = imgs[0]['alt'] if imgs else ""
2329
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
2330
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2331
        return {
2332
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2333
            'month': day.month,
2334
            'year': day.year,
2335
            'day': day.day,
2336
            'author': author,
2337
            'title': title,
2338
            'alt': alt,
2339
            'description': description,
2340
        }
2341
2342
2343
class GerbilWithAJetpack(GenericNavigableComic):
2344
    """Class to retrieve GerbilWithAJetpack comics."""
2345
    name = 'gerbil'
@@ 1781-1806 (lines=26) @@
1778
    first_url = 'http://respawncomic.com/comic/c0001/'
1779
1780
    @classmethod
1781
    def get_comic_info(cls, soup, link):
1782
        """Get information about a particular comics."""
1783
        title = soup.find('meta', property='og:title')['content']
1784
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1785
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1786
        date_str = date_str[:10]
1787
        day = string_to_date(date_str, "%Y-%m-%d")
1788
        imgs = soup.find_all('meta', property='og:image')
1789
        skip_imgs = {
1790
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1791
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1792
        }
1793
        return {
1794
            'title': title,
1795
            'author': author,
1796
            'day': day.day,
1797
            'month': day.month,
1798
            'year': day.year,
1799
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1800
        }
1801
1802
1803
class SafelyEndangered(GenericNavigableComic):
1804
    """Class to retrieve Safely Endangered comics."""
1805
    # Also on http://tumblr.safelyendangered.com
1806
    name = 'endangered'
1807
    long_name = 'Safely Endangered'
1808
    url = 'http://www.safelyendangered.com'
1809
    get_navi_link = get_link_rel_next
@@ 2017-2041 (lines=25) @@
2014
2015
    @classmethod
2016
    def get_first_comic_link(cls):
2017
        """Get link to first comics."""
2018
        return get_soup_at_url(cls.url).find('a', title="First")
2019
2020
    @classmethod
2021
    def get_navi_link(cls, last_soup, next_):
2022
        """Get link to next or previous comic."""
2023
        return last_soup.find('a', title='Next' if next_ else 'Previous')
2024
2025
    @classmethod
2026
    def get_comic_info(cls, soup, link):
2027
        """Get information about a particular comics."""
2028
        title = soup.find('h1').string
2029
        date_str = soup.find('span', class_='date').string.strip()
2030
        day = string_to_date(date_str, "%B %d, %Y")
2031
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
2032
        return {
2033
            'title': title,
2034
            'img': [i['src'] for i in imgs],
2035
            'month': day.month,
2036
            'year': day.year,
2037
            'day': day.day,
2038
        }
2039
2040
2041
class ChuckleADuck(GenericNavigableComic):
2042
    """Class to retrieve Chuckle-A-Duck comics."""
2043
    name = 'chuckleaduck'
2044
    long_name = 'Chuckle-A-duck'
@@ 1931-1957 (lines=27) @@
1928
        assert all(i['title'] == i['alt'] == title for i in imgs)
1929
        return {
1930
            'month': day.month,
1931
            'year': day.year,
1932
            'day': day.day,
1933
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1934
            'title': title,
1935
        }
1936
1937
1938
class DiscoBleach(GenericEmptyComic):  # Does not work anymore
1939
    """Class to retrieve Disco Bleach Comics."""
1940
    name = 'discobleach'
1941
    long_name = 'Disco Bleach'
1942
    url = 'http://discobleach.com'
1943
1944
1945
class TubeyToons(GenericEmptyComic):  # Does not work anymore
1946
    """Class to retrieve TubeyToons comics."""
1947
    # Also on http://tapastic.com/series/Tubey-Toons
1948
    # Also on http://tubeytoons.tumblr.com
1949
    name = 'tubeytoons'
1950
    long_name = 'Tubey Toons'
1951
    url = 'http://tubeytoons.com'
1952
1953
1954
class CompletelySeriousComics(GenericNavigableComic):
1955
    """Class to retrieve Completely Serious comics."""
1956
    name = 'completelyserious'
1957
    long_name = 'Completely Serious Comics'
1958
    url = 'http://completelyseriouscomics.com'
1959
    get_first_comic_link = get_a_navi_navifirst
1960
    get_navi_link = get_a_navi_navinext
@@ 1810-1836 (lines=27) @@
1807
    long_name = 'Safely Endangered'
1808
    url = 'http://www.safelyendangered.com'
1809
    get_navi_link = get_link_rel_next
1810
    get_first_comic_link = simulate_first_link
1811
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1812
1813
    @classmethod
1814
    def get_comic_info(cls, soup, link):
1815
        """Get information about a particular comics."""
1816
        title = soup.find('h2', class_='post-title').string
1817
        date_str = soup.find('span', class_='post-date').string
1818
        day = string_to_date(date_str, '%B %d, %Y')
1819
        imgs = soup.find('div', id='comic').find_all('img')
1820
        alt = imgs[0]['alt']
1821
        assert all(i['alt'] == i['title'] for i in imgs)
1822
        return {
1823
            'day': day.day,
1824
            'month': day.month,
1825
            'year': day.year,
1826
            'img': [i['src'] for i in imgs],
1827
            'title': title,
1828
            'alt': alt,
1829
        }
1830
1831
1832
class PicturesInBoxes(GenericNavigableComic):
1833
    """Class to retrieve Pictures In Boxes comics."""
1834
    # Also on http://picturesinboxescomic.tumblr.com
1835
    name = 'picturesinboxes'
1836
    long_name = 'Pictures in Boxes'
1837
    url = 'http://www.picturesinboxes.com'
1838
    get_navi_link = get_a_navi_navinext
1839
    get_first_comic_link = simulate_first_link
@@ 1699-1723 (lines=25) @@
1696
1697
1698
class JustSayEh(GenericNavigableComic):
1699
    """Class to retrieve Just Say Eh comics."""
1700
    # Also on http//tapastic.com/series/Just-Say-Eh
1701
    name = 'justsayeh'
1702
    long_name = 'Just Say Eh'
1703
    url = 'http://www.justsayeh.com'
1704
    get_first_comic_link = get_a_navi_navifirst
1705
    get_navi_link = get_a_navi_comicnavnext_navinext
1706
1707
    @classmethod
1708
    def get_comic_info(cls, soup, link):
1709
        """Get information about a particular comics."""
1710
        title = soup.find('h2', class_='post-title').string
1711
        imgs = soup.find("div", id="comic").find_all("img")
1712
        assert all(i['alt'] == i['title'] for i in imgs)
1713
        alt = imgs[0]['alt']
1714
        return {
1715
            'img': [i['src'] for i in imgs],
1716
            'title': title,
1717
            'alt': alt,
1718
        }
1719
1720
1721
class MouseBearComedy(GenericNavigableComic):
1722
    """Class to retrieve Mouse Bear Comedy comics."""
1723
    # Also on http://mousebearcomedy.tumblr.com
1724
    name = 'mousebear'
1725
    long_name = 'Mouse Bear Comedy'
1726
    url = 'http://www.mousebearcomedy.com'
@@ 1114-1137 (lines=24) @@
1111
1112
1113
class BouletCorp(GenericBouletCorp):
1114
    """Class to retrieve BouletCorp comics."""
1115
    name = 'boulet'
1116
    long_name = 'Boulet Corp'
1117
    url = 'http://www.bouletcorp.com'
1118
1119
1120
class BouletCorpEn(GenericBouletCorp):
1121
    """Class to retrieve EnglishBouletCorp comics."""
1122
    name = 'boulet_en'
1123
    long_name = 'Boulet Corp English'
1124
    url = 'http://english.bouletcorp.com'
1125
1126
1127
class AmazingSuperPowers(GenericNavigableComic):
1128
    """Class to retrieve Amazing Super Powers comics."""
1129
    name = 'asp'
1130
    long_name = 'Amazing Super Powers'
1131
    url = 'http://www.amazingsuperpowers.com'
1132
    get_first_comic_link = get_a_navi_navifirst
1133
    get_navi_link = get_a_navi_navinext
1134
1135
    @classmethod
1136
    def get_comic_info(cls, soup, link):
1137
        """Get information about a particular comics."""
1138
        author = soup.find("span", class_="post-author").find("a").string
1139
        date_str = soup.find('span', class_='post-date').string
1140
        day = string_to_date(date_str, "%B %d, %Y")
@@ 2374-2400 (lines=27) @@
2371
2372
class EveryDayBlues(GenericNavigableComic):
2373
    """Class to retrieve EveryDayBlues Comics."""
2374
    name = "blues"
2375
    long_name = "Every Day Blues"
2376
    url = "http://everydayblues.net"
2377
    get_first_comic_link = get_a_navi_navifirst
2378
    get_navi_link = get_link_rel_next
2379
2380
    @classmethod
2381
    def get_comic_info(cls, soup, link):
2382
        """Get information about a particular comics."""
2383
        title = soup.find("h2", class_="post-title").string
2384
        author = soup.find("span", class_="post-author").find("a").string
2385
        date_str = soup.find("span", class_="post-date").string
2386
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2387
        imgs = soup.find("div", id="comic").find_all("img")
2388
        assert all(i['alt'] == i['title'] == title for i in imgs)
2389
        assert len(imgs) <= 1
2390
        return {
2391
            'img': [i['src'] for i in imgs],
2392
            'title': title,
2393
            'author': author,
2394
            'day': day.day,
2395
            'month': day.month,
2396
            'year': day.year
2397
        }
2398
2399
2400
class BiterComics(GenericNavigableComic):
2401
    """Class to retrieve Biter Comics."""
2402
    name = "biter"
2403
    long_name = "Biter Comics"
@@ 2116-2141 (lines=26) @@
2113
        title2 = soup.find('meta', property='og:title')['content']
2114
        desc = soup.find('meta', property='og:description')
2115
        description = desc['content'] if desc else ''
2116
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2117
        imgs = soup.find('div', class_='entry-content').find_all('img')
2118
        return {
2119
            'day': day.day,
2120
            'month': day.month,
2121
            'year': day.year,
2122
            'title': title,
2123
            'title2': title2,
2124
            'description': description,
2125
            'tags': tags,
2126
            'img': [i['src'] for i in imgs],
2127
            'alt': ' '.join(i['alt'] for i in imgs),
2128
        }
2129
2130
    @classmethod
2131
    def get_url_from_archive_element(cls, tr):
2132
        _, td2, td3 = tr.find_all('td')
2133
        return td2.find('a')['href']
2134
2135
    @classmethod
2136
    def get_archive_elements(cls):
2137
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
2138
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2139
2140
2141
class HappleTea(GenericNavigableComic):
2142
    """Class to retrieve Happle Tea Comics."""
2143
    name = 'happletea'
2144
    long_name = 'Happle Tea'
@@ 2346-2370 (lines=25) @@
2343
class GerbilWithAJetpack(GenericNavigableComic):
2344
    """Class to retrieve GerbilWithAJetpack comics."""
2345
    name = 'gerbil'
2346
    long_name = 'Gerbil With A Jetpack'
2347
    url = 'http://gerbilwithajetpack.com'
2348
    get_first_comic_link = get_a_navi_navifirst
2349
    get_navi_link = get_a_rel_next
2350
2351
    @classmethod
2352
    def get_comic_info(cls, soup, link):
2353
        """Get information about a particular comics."""
2354
        title = soup.find('h2', class_='post-title').string
2355
        author = soup.find("span", class_="post-author").find("a").string
2356
        date_str = soup.find("span", class_="post-date").string
2357
        day = string_to_date(date_str, "%B %d, %Y")
2358
        imgs = soup.find("div", id="comic").find_all("img")
2359
        alt = imgs[0]['alt']
2360
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2361
        return {
2362
            'img': [i['src'] for i in imgs],
2363
            'title': title,
2364
            'alt': alt,
2365
            'author': author,
2366
            'day': day.day,
2367
            'month': day.month,
2368
            'year': day.year
2369
        }
2370
2371
2372
class EveryDayBlues(GenericNavigableComic):
2373
    """Class to retrieve EveryDayBlues Comics."""