Code Duplication    Length = 23-28 lines in 12 locations

comics.py 12 locations

@@ 1781-1806 (lines=26) @@
1778
    @classmethod
1779
    def get_comic_info(cls, soup, link):
1780
        """Get information about a particular comics."""
1781
        title = soup.find('meta', property='og:title')['content']
1782
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1783
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1784
        date_str = date_str[:10]
1785
        day = string_to_date(date_str, "%Y-%m-%d")
1786
        imgs = soup.find_all('meta', property='og:image')
1787
        skip_imgs = {
1788
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1789
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1790
        }
1791
        return {
1792
            'title': title,
1793
            'author': author,
1794
            'day': day.day,
1795
            'month': day.month,
1796
            'year': day.year,
1797
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1798
        }
1799
1800
1801
class SafelyEndangered(GenericNavigableComic):
1802
    """Class to retrieve Safely Endangered comics."""
1803
    # Also on http://tumblr.safelyendangered.com
1804
    name = 'endangered'
1805
    long_name = 'Safely Endangered'
1806
    url = 'http://www.safelyendangered.com'
1807
    get_navi_link = get_link_rel_next
1808
    get_first_comic_link = simulate_first_link
1809
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
@@ 1810-1836 (lines=27) @@
1807
    get_navi_link = get_link_rel_next
1808
    get_first_comic_link = simulate_first_link
1809
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1810
1811
    @classmethod
1812
    def get_comic_info(cls, soup, link):
1813
        """Get information about a particular comics."""
1814
        title = soup.find('h2', class_='post-title').string
1815
        date_str = soup.find('span', class_='post-date').string
1816
        day = string_to_date(date_str, '%B %d, %Y')
1817
        imgs = soup.find('div', id='comic').find_all('img')
1818
        alt = imgs[0]['alt']
1819
        assert all(i['alt'] == i['title'] for i in imgs)
1820
        return {
1821
            'day': day.day,
1822
            'month': day.month,
1823
            'year': day.year,
1824
            'img': [i['src'] for i in imgs],
1825
            'title': title,
1826
            'alt': alt,
1827
        }
1828
1829
1830
class PicturesInBoxes(GenericNavigableComic):
1831
    """Class to retrieve Pictures In Boxes comics."""
1832
    # Also on http://picturesinboxescomic.tumblr.com
1833
    name = 'picturesinboxes'
1834
    long_name = 'Pictures in Boxes'
1835
    url = 'http://www.picturesinboxes.com'
1836
    get_navi_link = get_a_navi_navinext
1837
    get_first_comic_link = simulate_first_link
1838
    first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'
1839
@@ 2518-2545 (lines=28) @@
2515
    """Class to retrieve Last Place Comics."""
2516
    name = 'lastplace'
2517
    long_name = 'Last Place Comics'
2518
    url = "http://lastplacecomics.com"
2519
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2520
    get_navi_link = get_link_rel_next
2521
2522
    @classmethod
2523
    def get_comic_info(cls, soup, link):
2524
        """Get information about a particular comics."""
2525
        title = soup.find('h2', class_='post-title').string
2526
        author = soup.find("span", class_="post-author").find("a").string
2527
        date_str = soup.find("span", class_="post-date").string
2528
        day = string_to_date(date_str, "%B %d, %Y")
2529
        imgs = soup.find("div", id="comic").find_all("img")
2530
        assert all(i['alt'] == i['title'] for i in imgs)
2531
        assert len(imgs) <= 1
2532
        alt = imgs[0]['alt'] if imgs else ""
2533
        return {
2534
            'img': [i['src'] for i in imgs],
2535
            'title': title,
2536
            'alt': alt,
2537
            'author': author,
2538
            'day': day.day,
2539
            'month': day.month,
2540
            'year': day.year
2541
        }
2542
2543
2544
class TalesOfAbsurdity(GenericNavigableComic):
2545
    """Class to retrieve Tales Of Absurdity comics."""
2546
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
2547
    # Also on http://talesofabsurdity.tumblr.com
2548
    name = 'absurdity'
@@ 2738-2764 (lines=27) @@
2735
    """Class to retrieve Unearthed comics."""
2736
    # Also on http://tapastic.com/series/UnearthedComics
2737
    # Also on http://unearthedcomics.tumblr.com
2738
    name = 'unearthed'
2739
    long_name = 'Unearthed Comics'
2740
    url = 'http://unearthedcomics.com'
2741
    _categories = ('UNEARTHED', )
2742
    get_navi_link = get_link_rel_next
2743
    get_first_comic_link = simulate_first_link
2744
    first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'
2745
2746
    @classmethod
2747
    def get_comic_info(cls, soup, link):
2748
        """Get information about a particular comics."""
2749
        short_url = soup.find('link', rel='shortlink')['href']
2750
        title_elt = soup.find('h1') or soup.find('h2')
2751
        title = title_elt.string if title_elt else ""
2752
        desc = soup.find('meta', property='og:description')
2753
        date_str = soup.find('time', class_='published updated hidden')['datetime']
2754
        day = string_to_date(date_str, "%Y-%m-%d")
2755
        post = soup.find('div', class_="entry content entry-content type-portfolio")
2756
        imgs = post.find_all('img')
2757
        return {
2758
            'title': title,
2759
            'description': desc,
2760
            'url2': short_url,
2761
            'img': [i['src'] for i in imgs],
2762
            'month': day.month,
2763
            'year': day.year,
2764
            'day': day.day,
2765
        }
2766
2767
@@ 2488-2514 (lines=27) @@
2485
    # Also on https://tapastic.com/series/Mister-and-Me
2486
    name = 'mister'
2487
    long_name = 'Mister & Me'
2488
    url = 'http://www.mister-and-me.com'
2489
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2490
    get_navi_link = get_link_rel_next
2491
2492
    @classmethod
2493
    def get_comic_info(cls, soup, link):
2494
        """Get information about a particular comics."""
2495
        title = soup.find('h2', class_='post-title').string
2496
        author = soup.find("span", class_="post-author").find("a").string
2497
        date_str = soup.find("span", class_="post-date").string
2498
        day = string_to_date(date_str, "%B %d, %Y")
2499
        imgs = soup.find("div", id="comic").find_all("img")
2500
        assert all(i['alt'] == i['title'] for i in imgs)
2501
        assert len(imgs) <= 1
2502
        alt = imgs[0]['alt'] if imgs else ""
2503
        return {
2504
            'img': [i['src'] for i in imgs],
2505
            'title': title,
2506
            'alt': alt,
2507
            'author': author,
2508
            'day': day.day,
2509
            'month': day.month,
2510
            'year': day.year
2511
        }
2512
2513
2514
class LastPlaceComics(GenericNavigableComic):
2515
    """Class to retrieve Last Place Comics."""
2516
    name = 'lastplace'
2517
    long_name = 'Last Place Comics'
@@ 2321-2346 (lines=26) @@
2318
        for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
2319
            if link['href'] != '/comic':
2320
                return link
2321
        return None
2322
2323
    @classmethod
2324
    def get_comic_info(cls, soup, link):
2325
        """Get information about a particular comics."""
2326
        title = soup.find('meta', attrs={'name': 'description'})["content"]
2327
        description = soup.find('div', itemprop='articleBody').text
2328
        author = soup.find('span', itemprop='author copyrightHolder').string
2329
        imgs = soup.find_all('img', itemprop='image')
2330
        assert all(i['title'] == i['alt'] for i in imgs)
2331
        alt = imgs[0]['alt'] if imgs else ""
2332
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
2333
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2334
        return {
2335
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2336
            'month': day.month,
2337
            'year': day.year,
2338
            'day': day.day,
2339
            'author': author,
2340
            'title': title,
2341
            'alt': alt,
2342
            'description': description,
2343
        }
2344
2345
2346
class GerbilWithAJetpack(GenericNavigableComic):
2347
    """Class to retrieve GerbilWithAJetpack comics."""
2348
    name = 'gerbil'
2349
    long_name = 'Gerbil With A Jetpack'
@@ 2019-2043 (lines=25) @@
2016
    def get_first_comic_link(cls):
2017
        """Get link to first comics."""
2018
        return get_soup_at_url(cls.url).find('a', title="First")
2019
2020
    @classmethod
2021
    def get_navi_link(cls, last_soup, next_):
2022
        """Get link to next or previous comic."""
2023
        return last_soup.find('a', title='Next' if next_ else 'Previous')
2024
2025
    @classmethod
2026
    def get_comic_info(cls, soup, link):
2027
        """Get information about a particular comics."""
2028
        title = soup.find('h1').string
2029
        date_str = soup.find('span', class_='date').string.strip()
2030
        day = string_to_date(date_str, "%B %d, %Y")
2031
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
2032
        return {
2033
            'title': title,
2034
            'img': [i['src'] for i in imgs],
2035
            'month': day.month,
2036
            'year': day.year,
2037
            'day': day.day,
2038
        }
2039
2040
2041
class ChuckleADuck(GenericNavigableComic):
2042
    """Class to retrieve Chuckle-A-Duck comics."""
2043
    name = 'chuckleaduck'
2044
    long_name = 'Chuckle-A-duck'
2045
    url = 'http://chuckleaduck.com'
2046
    get_first_comic_link = get_div_navfirst_a
@@ 2378-2404 (lines=27) @@
2375
class EveryDayBlues(GenericNavigableComic):
2376
    """Class to retrieve EveryDayBlues Comics."""
2377
    name = "blues"
2378
    long_name = "Every Day Blues"
2379
    url = "http://everydayblues.net"
2380
    get_first_comic_link = get_a_navi_navifirst
2381
    get_navi_link = get_link_rel_next
2382
2383
    @classmethod
2384
    def get_comic_info(cls, soup, link):
2385
        """Get information about a particular comics."""
2386
        title = soup.find("h2", class_="post-title").string
2387
        author = soup.find("span", class_="post-author").find("a").string
2388
        date_str = soup.find("span", class_="post-date").string
2389
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2390
        imgs = soup.find("div", id="comic").find_all("img")
2391
        assert all(i['alt'] == i['title'] == title for i in imgs)
2392
        assert len(imgs) <= 1
2393
        return {
2394
            'img': [i['src'] for i in imgs],
2395
            'title': title,
2396
            'author': author,
2397
            'day': day.day,
2398
            'month': day.month,
2399
            'year': day.year
2400
        }
2401
2402
2403
class BiterComics(GenericNavigableComic):
2404
    """Class to retrieve Biter Comics."""
2405
    name = "biter"
2406
    long_name = "Biter Comics"
2407
    url = "http://www.bitercomics.com"
@@ 1932-1958 (lines=27) @@
1929
            'year': day.year,
1930
            'day': day.day,
1931
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1932
            'title': title,
1933
        }
1934
1935
1936
class DiscoBleach(GenericEmptyComic):  # Does not work anymore
1937
    """Class to retrieve Disco Bleach Comics."""
1938
    name = 'discobleach'
1939
    long_name = 'Disco Bleach'
1940
    url = 'http://discobleach.com'
1941
1942
1943
class TubeyToons(GenericEmptyComic):  # Does not work anymore
1944
    """Class to retrieve TubeyToons comics."""
1945
    # Also on http://tapastic.com/series/Tubey-Toons
1946
    # Also on http://tubeytoons.tumblr.com
1947
    name = 'tubeytoons'
1948
    long_name = 'Tubey Toons'
1949
    url = 'http://tubeytoons.com'
1950
    _categories = ('TUNEYTOONS', )
1951
1952
1953
class CompletelySeriousComics(GenericNavigableComic):
1954
    """Class to retrieve Completely Serious comics."""
1955
    name = 'completelyserious'
1956
    long_name = 'Completely Serious Comics'
1957
    url = 'http://completelyseriouscomics.com'
1958
    get_first_comic_link = get_a_navi_navifirst
1959
    get_navi_link = get_a_navi_navinext
1960
1961
    @classmethod
@@ 2119-2144 (lines=26) @@
2116
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2117
        imgs = soup.find('div', class_='entry-content').find_all('img')
2118
        return {
2119
            'day': day.day,
2120
            'month': day.month,
2121
            'year': day.year,
2122
            'title': title,
2123
            'title2': title2,
2124
            'description': description,
2125
            'tags': tags,
2126
            'img': [i['src'] for i in imgs],
2127
            'alt': ' '.join(i['alt'] for i in imgs),
2128
        }
2129
2130
    @classmethod
2131
    def get_url_from_archive_element(cls, tr):
2132
        _, td2, td3 = tr.find_all('td')
2133
        return td2.find('a')['href']
2134
2135
    @classmethod
2136
    def get_archive_elements(cls):
2137
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
2138
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2139
2140
2141
class HappleTea(GenericNavigableComic):
2142
    """Class to retrieve Happle Tea Comics."""
2143
    name = 'happletea'
2144
    long_name = 'Happle Tea'
2145
    url = 'http://www.happletea.com'
2146
    get_first_comic_link = get_a_navi_navifirst
2147
    get_navi_link = get_link_rel_next
@@ 2659-2683 (lines=25) @@
2656
    def get_comic_info(cls, soup, link):
2657
        """Get information about a particular comics."""
2658
        desc = soup.find('meta', property='og:description')['content']
2659
        title = soup.find('meta', property='og:title')['content']
2660
        imgs = soup.find('div', class_='entry-content').find_all('img')
2661
        title2 = ' '.join(i.get('title', '') for i in imgs)
2662
        return {
2663
            'title': title,
2664
            'title2': title2,
2665
            'description': desc,
2666
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2667
        }
2668
2669
2670
class CommitStripFr(GenericCommitStrip):
2671
    """Class to retrieve Commit Strips in French."""
2672
    name = 'commit_fr'
2673
    long_name = 'Commit Strip (Fr)'
2674
    url = 'http://www.commitstrip.com/fr'
2675
    _categories = ('FRANCAIS', )
2676
    first_url = 'http://www.commitstrip.com/fr/2012/02/22/interview/'
2677
2678
2679
class CommitStripEn(GenericCommitStrip):
2680
    """Class to retrieve Commit Strips in English."""
2681
    name = 'commit_en'
2682
    long_name = 'Commit Strip (En)'
2683
    url = 'http://www.commitstrip.com/en'
2684
    first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'
2685
2686
@@ 338-360 (lines=23) @@
335
    def get_next_comic(cls, last_comic):
336
        """Implementation of get_next_comic returning no comics."""
337
        cls.log("comic is considered as empty - returning no comic")
338
        return []
339
340
341
class ExtraFabulousComics(GenericNavigableComic):
342
    """Class to retrieve Extra Fabulous Comics."""
343
    name = 'efc'
344
    long_name = 'Extra Fabulous Comics'
345
    url = 'http://extrafabulouscomics.com'
346
    get_first_comic_link = get_a_navi_navifirst
347
    get_navi_link = get_link_rel_next
348
349
    @classmethod
350
    def get_comic_info(cls, soup, link):
351
        """Get information about a particular comics."""
352
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
353
        imgs = soup.find_all('img', src=img_src_re)
354
        title = soup.find('meta', property='og:title')['content']
355
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
356
        day = string_to_date(date_str, "%Y-%m-%d")
357
        return {
358
            'title': title,
359
            'img': [i['src'] for i in imgs],
360
            'month': day.month,
361
            'year': day.year,
362
            'day': day.day,
363
            'prefix': title + '-'