Code Duplication    Length = 23-28 lines in 12 locations

comics.py 12 locations

@@ 1781-1806 (lines=26) @@
1778
    @classmethod
1779
    def get_comic_info(cls, soup, link):
1780
        """Get information about a particular comics."""
1781
        title = soup.find('meta', property='og:title')['content']
1782
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1783
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1784
        date_str = date_str[:10]
1785
        day = string_to_date(date_str, "%Y-%m-%d")
1786
        imgs = soup.find_all('meta', property='og:image')
1787
        skip_imgs = {
1788
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1789
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1790
        }
1791
        return {
1792
            'title': title,
1793
            'author': author,
1794
            'day': day.day,
1795
            'month': day.month,
1796
            'year': day.year,
1797
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1798
        }
1799
1800
1801
class SafelyEndangered(GenericNavigableComic):
1802
    """Class to retrieve Safely Endangered comics."""
1803
    # Also on http://tumblr.safelyendangered.com
1804
    name = 'endangered'
1805
    long_name = 'Safely Endangered'
1806
    url = 'http://www.safelyendangered.com'
1807
    get_navi_link = get_link_rel_next
1808
    get_first_comic_link = simulate_first_link
1809
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
@@ 1810-1836 (lines=27) @@
1807
    get_navi_link = get_link_rel_next
1808
    get_first_comic_link = simulate_first_link
1809
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1810
1811
    @classmethod
1812
    def get_comic_info(cls, soup, link):
1813
        """Get information about a particular comics."""
1814
        title = soup.find('h2', class_='post-title').string
1815
        date_str = soup.find('span', class_='post-date').string
1816
        day = string_to_date(date_str, '%B %d, %Y')
1817
        imgs = soup.find('div', id='comic').find_all('img')
1818
        alt = imgs[0]['alt']
1819
        assert all(i['alt'] == i['title'] for i in imgs)
1820
        return {
1821
            'day': day.day,
1822
            'month': day.month,
1823
            'year': day.year,
1824
            'img': [i['src'] for i in imgs],
1825
            'title': title,
1826
            'alt': alt,
1827
        }
1828
1829
1830
class PicturesInBoxes(GenericNavigableComic):
1831
    """Class to retrieve Pictures In Boxes comics."""
1832
    # Also on http://picturesinboxescomic.tumblr.com
1833
    name = 'picturesinboxes'
1834
    long_name = 'Pictures in Boxes'
1835
    url = 'http://www.picturesinboxes.com'
1836
    get_navi_link = get_a_navi_navinext
1837
    get_first_comic_link = simulate_first_link
1838
    first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'
1839
@@ 2518-2545 (lines=28) @@
2515
class LastPlaceComics(GenericNavigableComic):
2516
    """Class to retrieve Last Place Comics."""
2517
    name = 'lastplace'
2518
    long_name = 'Last Place Comics'
2519
    url = "http://lastplacecomics.com"
2520
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2521
    get_navi_link = get_link_rel_next
2522
2523
    @classmethod
2524
    def get_comic_info(cls, soup, link):
2525
        """Get information about a particular comics."""
2526
        title = soup.find('h2', class_='post-title').string
2527
        author = soup.find("span", class_="post-author").find("a").string
2528
        date_str = soup.find("span", class_="post-date").string
2529
        day = string_to_date(date_str, "%B %d, %Y")
2530
        imgs = soup.find("div", id="comic").find_all("img")
2531
        assert all(i['alt'] == i['title'] for i in imgs)
2532
        assert len(imgs) <= 1
2533
        alt = imgs[0]['alt'] if imgs else ""
2534
        return {
2535
            'img': [i['src'] for i in imgs],
2536
            'title': title,
2537
            'alt': alt,
2538
            'author': author,
2539
            'day': day.day,
2540
            'month': day.month,
2541
            'year': day.year
2542
        }
2543
2544
2545
class TalesOfAbsurdity(GenericNavigableComic):
2546
    """Class to retrieve Tales Of Absurdity comics."""
2547
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
2548
    # Also on http://talesofabsurdity.tumblr.com
@@ 2738-2764 (lines=27) @@
2735
class UnearthedComics(GenericNavigableComic):
2736
    """Class to retrieve Unearthed comics."""
2737
    # Also on http://tapastic.com/series/UnearthedComics
2738
    # Also on http://unearthedcomics.tumblr.com
2739
    name = 'unearthed'
2740
    long_name = 'Unearthed Comics'
2741
    url = 'http://unearthedcomics.com'
2742
    _categories = ('UNEARTHED', )
2743
    get_navi_link = get_link_rel_next
2744
    get_first_comic_link = simulate_first_link
2745
    first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'
2746
2747
    @classmethod
2748
    def get_comic_info(cls, soup, link):
2749
        """Get information about a particular comics."""
2750
        short_url = soup.find('link', rel='shortlink')['href']
2751
        title_elt = soup.find('h1') or soup.find('h2')
2752
        title = title_elt.string if title_elt else ""
2753
        desc = soup.find('meta', property='og:description')
2754
        date_str = soup.find('time', class_='published updated hidden')['datetime']
2755
        day = string_to_date(date_str, "%Y-%m-%d")
2756
        post = soup.find('div', class_="entry content entry-content type-portfolio")
2757
        imgs = post.find_all('img')
2758
        return {
2759
            'title': title,
2760
            'description': desc,
2761
            'url2': short_url,
2762
            'img': [i['src'] for i in imgs],
2763
            'month': day.month,
2764
            'year': day.year,
2765
            'day': day.day,
2766
        }
2767
@@ 2488-2514 (lines=27) @@
2485
    # Also on http://www.gocomics.com/mister-and-me
2486
    # Also on https://tapastic.com/series/Mister-and-Me
2487
    name = 'mister'
2488
    long_name = 'Mister & Me'
2489
    url = 'http://www.mister-and-me.com'
2490
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2491
    get_navi_link = get_link_rel_next
2492
2493
    @classmethod
2494
    def get_comic_info(cls, soup, link):
2495
        """Get information about a particular comics."""
2496
        title = soup.find('h2', class_='post-title').string
2497
        author = soup.find("span", class_="post-author").find("a").string
2498
        date_str = soup.find("span", class_="post-date").string
2499
        day = string_to_date(date_str, "%B %d, %Y")
2500
        imgs = soup.find("div", id="comic").find_all("img")
2501
        assert all(i['alt'] == i['title'] for i in imgs)
2502
        assert len(imgs) <= 1
2503
        alt = imgs[0]['alt'] if imgs else ""
2504
        return {
2505
            'img': [i['src'] for i in imgs],
2506
            'title': title,
2507
            'alt': alt,
2508
            'author': author,
2509
            'day': day.day,
2510
            'month': day.month,
2511
            'year': day.year
2512
        }
2513
2514
2515
class LastPlaceComics(GenericNavigableComic):
2516
    """Class to retrieve Last Place Comics."""
2517
    name = 'lastplace'
@@ 2321-2346 (lines=26) @@
2318
        """Get link to next or previous comic."""
2319
        for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
2320
            if link['href'] != '/comic':
2321
                return link
2322
        return None
2323
2324
    @classmethod
2325
    def get_comic_info(cls, soup, link):
2326
        """Get information about a particular comics."""
2327
        title = soup.find('meta', attrs={'name': 'description'})["content"]
2328
        description = soup.find('div', itemprop='articleBody').text
2329
        author = soup.find('span', itemprop='author copyrightHolder').string
2330
        imgs = soup.find_all('img', itemprop='image')
2331
        assert all(i['title'] == i['alt'] for i in imgs)
2332
        alt = imgs[0]['alt'] if imgs else ""
2333
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
2334
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2335
        return {
2336
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2337
            'month': day.month,
2338
            'year': day.year,
2339
            'day': day.day,
2340
            'author': author,
2341
            'title': title,
2342
            'alt': alt,
2343
            'description': description,
2344
        }
2345
2346
2347
class GerbilWithAJetpack(GenericNavigableComic):
2348
    """Class to retrieve GerbilWithAJetpack comics."""
2349
    name = 'gerbil'
@@ 2019-2043 (lines=25) @@
2016
    def get_first_comic_link(cls):
2017
        """Get link to first comics."""
2018
        return get_soup_at_url(cls.url).find('a', title="First")
2019
2020
    @classmethod
2021
    def get_navi_link(cls, last_soup, next_):
2022
        """Get link to next or previous comic."""
2023
        return last_soup.find('a', title='Next' if next_ else 'Previous')
2024
2025
    @classmethod
2026
    def get_comic_info(cls, soup, link):
2027
        """Get information about a particular comics."""
2028
        title = soup.find('h1').string
2029
        date_str = soup.find('span', class_='date').string.strip()
2030
        day = string_to_date(date_str, "%B %d, %Y")
2031
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
2032
        return {
2033
            'title': title,
2034
            'img': [i['src'] for i in imgs],
2035
            'month': day.month,
2036
            'year': day.year,
2037
            'day': day.day,
2038
        }
2039
2040
2041
class ChuckleADuck(GenericNavigableComic):
2042
    """Class to retrieve Chuckle-A-Duck comics."""
2043
    name = 'chuckleaduck'
2044
    long_name = 'Chuckle-A-duck'
2045
    url = 'http://chuckleaduck.com'
2046
    get_first_comic_link = get_div_navfirst_a
@@ 2378-2404 (lines=27) @@
2375
2376
class EveryDayBlues(GenericNavigableComic):
2377
    """Class to retrieve EveryDayBlues Comics."""
2378
    name = "blues"
2379
    long_name = "Every Day Blues"
2380
    url = "http://everydayblues.net"
2381
    get_first_comic_link = get_a_navi_navifirst
2382
    get_navi_link = get_link_rel_next
2383
2384
    @classmethod
2385
    def get_comic_info(cls, soup, link):
2386
        """Get information about a particular comics."""
2387
        title = soup.find("h2", class_="post-title").string
2388
        author = soup.find("span", class_="post-author").find("a").string
2389
        date_str = soup.find("span", class_="post-date").string
2390
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2391
        imgs = soup.find("div", id="comic").find_all("img")
2392
        assert all(i['alt'] == i['title'] == title for i in imgs)
2393
        assert len(imgs) <= 1
2394
        return {
2395
            'img': [i['src'] for i in imgs],
2396
            'title': title,
2397
            'author': author,
2398
            'day': day.day,
2399
            'month': day.month,
2400
            'year': day.year
2401
        }
2402
2403
2404
class BiterComics(GenericNavigableComic):
2405
    """Class to retrieve Biter Comics."""
2406
    name = "biter"
2407
    long_name = "Biter Comics"
@@ 1932-1958 (lines=27) @@
1929
            'year': day.year,
1930
            'day': day.day,
1931
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1932
            'title': title,
1933
        }
1934
1935
1936
class DiscoBleach(GenericEmptyComic):  # Does not work anymore
1937
    """Class to retrieve Disco Bleach Comics."""
1938
    name = 'discobleach'
1939
    long_name = 'Disco Bleach'
1940
    url = 'http://discobleach.com'
1941
1942
1943
class TubeyToons(GenericEmptyComic):  # Does not work anymore
1944
    """Class to retrieve TubeyToons comics."""
1945
    # Also on http://tapastic.com/series/Tubey-Toons
1946
    # Also on http://tubeytoons.tumblr.com
1947
    name = 'tubeytoons'
1948
    long_name = 'Tubey Toons'
1949
    url = 'http://tubeytoons.com'
1950
    _categories = ('TUNEYTOONS', )
1951
1952
1953
class CompletelySeriousComics(GenericNavigableComic):
1954
    """Class to retrieve Completely Serious comics."""
1955
    name = 'completelyserious'
1956
    long_name = 'Completely Serious Comics'
1957
    url = 'http://completelyseriouscomics.com'
1958
    get_first_comic_link = get_a_navi_navifirst
1959
    get_navi_link = get_a_navi_navinext
1960
1961
    @classmethod
@@ 2119-2144 (lines=26) @@
2116
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2117
        imgs = soup.find('div', class_='entry-content').find_all('img')
2118
        return {
2119
            'day': day.day,
2120
            'month': day.month,
2121
            'year': day.year,
2122
            'title': title,
2123
            'title2': title2,
2124
            'description': description,
2125
            'tags': tags,
2126
            'img': [i['src'] for i in imgs],
2127
            'alt': ' '.join(i['alt'] for i in imgs),
2128
        }
2129
2130
    @classmethod
2131
    def get_url_from_archive_element(cls, tr):
2132
        _, td2, td3 = tr.find_all('td')
2133
        return td2.find('a')['href']
2134
2135
    @classmethod
2136
    def get_archive_elements(cls):
2137
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
2138
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2139
2140
2141
class HappleTea(GenericNavigableComic):
2142
    """Class to retrieve Happle Tea Comics."""
2143
    name = 'happletea'
2144
    long_name = 'Happle Tea'
2145
    url = 'http://www.happletea.com'
2146
    get_first_comic_link = get_a_navi_navifirst
2147
    get_navi_link = get_link_rel_next
@@ 2659-2683 (lines=25) @@
2656
    @classmethod
2657
    def get_comic_info(cls, soup, link):
2658
        """Get information about a particular comics."""
2659
        desc = soup.find('meta', property='og:description')['content']
2660
        title = soup.find('meta', property='og:title')['content']
2661
        imgs = soup.find('div', class_='entry-content').find_all('img')
2662
        title2 = ' '.join(i.get('title', '') for i in imgs)
2663
        return {
2664
            'title': title,
2665
            'title2': title2,
2666
            'description': desc,
2667
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2668
        }
2669
2670
2671
class CommitStripFr(GenericCommitStrip):
2672
    """Class to retrieve Commit Strips in French."""
2673
    name = 'commit_fr'
2674
    long_name = 'Commit Strip (Fr)'
2675
    url = 'http://www.commitstrip.com/fr'
2676
    _categories = ('FRANCAIS', )
2677
    first_url = 'http://www.commitstrip.com/fr/2012/02/22/interview/'
2678
2679
2680
class CommitStripEn(GenericCommitStrip):
2681
    """Class to retrieve Commit Strips in English."""
2682
    name = 'commit_en'
2683
    long_name = 'Commit Strip (En)'
2684
    url = 'http://www.commitstrip.com/en'
2685
    first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'
2686
@@ 338-360 (lines=23) @@
335
    def get_next_comic(cls, last_comic):
336
        """Implementation of get_next_comic returning no comics."""
337
        cls.log("comic is considered as empty - returning no comic")
338
        return []
339
340
341
class ExtraFabulousComics(GenericNavigableComic):
342
    """Class to retrieve Extra Fabulous Comics."""
343
    name = 'efc'
344
    long_name = 'Extra Fabulous Comics'
345
    url = 'http://extrafabulouscomics.com'
346
    get_first_comic_link = get_a_navi_navifirst
347
    get_navi_link = get_link_rel_next
348
349
    @classmethod
350
    def get_comic_info(cls, soup, link):
351
        """Get information about a particular comics."""
352
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
353
        imgs = soup.find_all('img', src=img_src_re)
354
        title = soup.find('meta', property='og:title')['content']
355
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
356
        day = string_to_date(date_str, "%Y-%m-%d")
357
        return {
358
            'title': title,
359
            'img': [i['src'] for i in imgs],
360
            'month': day.month,
361
            'year': day.year,
362
            'day': day.day,
363
            'prefix': title + '-'