Code Duplication    Length = 23-28 lines in 12 locations

comics.py 12 locations

@@ 1781-1806 (lines=26) @@
1778
        """Get information about a particular comics."""
1779
        title = soup.find('meta', property='og:title')['content']
1780
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1781
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1782
        date_str = date_str[:10]
1783
        day = string_to_date(date_str, "%Y-%m-%d")
1784
        imgs = soup.find_all('meta', property='og:image')
1785
        skip_imgs = {
1786
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1787
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1788
        }
1789
        return {
1790
            'title': title,
1791
            'author': author,
1792
            'day': day.day,
1793
            'month': day.month,
1794
            'year': day.year,
1795
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1796
        }
1797
1798
1799
class SafelyEndangered(GenericNavigableComic):
1800
    """Class to retrieve Safely Endangered comics."""
1801
    # Also on http://tumblr.safelyendangered.com
1802
    name = 'endangered'
1803
    long_name = 'Safely Endangered'
1804
    url = 'http://www.safelyendangered.com'
1805
    get_navi_link = get_link_rel_next
1806
    get_first_comic_link = simulate_first_link
1807
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1808
1809
    @classmethod
@@ 1810-1836 (lines=27) @@
1807
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1808
1809
    @classmethod
1810
    def get_comic_info(cls, soup, link):
1811
        """Get information about a particular comics."""
1812
        title = soup.find('h2', class_='post-title').string
1813
        date_str = soup.find('span', class_='post-date').string
1814
        day = string_to_date(date_str, '%B %d, %Y')
1815
        imgs = soup.find('div', id='comic').find_all('img')
1816
        alt = imgs[0]['alt']
1817
        assert all(i['alt'] == i['title'] for i in imgs)
1818
        return {
1819
            'day': day.day,
1820
            'month': day.month,
1821
            'year': day.year,
1822
            'img': [i['src'] for i in imgs],
1823
            'title': title,
1824
            'alt': alt,
1825
        }
1826
1827
1828
class PicturesInBoxes(GenericNavigableComic):
1829
    """Class to retrieve Pictures In Boxes comics."""
1830
    # Also on http://picturesinboxescomic.tumblr.com
1831
    name = 'picturesinboxes'
1832
    long_name = 'Pictures in Boxes'
1833
    url = 'http://www.picturesinboxes.com'
1834
    get_navi_link = get_a_navi_navinext
1835
    get_first_comic_link = simulate_first_link
1836
    first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'
1837
1838
    @classmethod
1839
    def get_comic_info(cls, soup, link):
@@ 2518-2545 (lines=28) @@
2515
    url = "http://lastplacecomics.com"
2516
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2517
    get_navi_link = get_link_rel_next
2518
2519
    @classmethod
2520
    def get_comic_info(cls, soup, link):
2521
        """Get information about a particular comics."""
2522
        title = soup.find('h2', class_='post-title').string
2523
        author = soup.find("span", class_="post-author").find("a").string
2524
        date_str = soup.find("span", class_="post-date").string
2525
        day = string_to_date(date_str, "%B %d, %Y")
2526
        imgs = soup.find("div", id="comic").find_all("img")
2527
        assert all(i['alt'] == i['title'] for i in imgs)
2528
        assert len(imgs) <= 1
2529
        alt = imgs[0]['alt'] if imgs else ""
2530
        return {
2531
            'img': [i['src'] for i in imgs],
2532
            'title': title,
2533
            'alt': alt,
2534
            'author': author,
2535
            'day': day.day,
2536
            'month': day.month,
2537
            'year': day.year
2538
        }
2539
2540
2541
class TalesOfAbsurdity(GenericNavigableComic):
2542
    """Class to retrieve Tales Of Absurdity comics."""
2543
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
2544
    # Also on http://talesofabsurdity.tumblr.com
2545
    name = 'absurdity'
2546
    long_name = 'Tales of Absurdity'
2547
    url = 'http://talesofabsurdity.com'
2548
    _categories = ('ABSURDITY', )
@@ 2738-2764 (lines=27) @@
2735
    name = 'unearthed'
2736
    long_name = 'Unearthed Comics'
2737
    url = 'http://unearthedcomics.com'
2738
    _categories = ('UNEARTHED', )
2739
    get_navi_link = get_link_rel_next
2740
    get_first_comic_link = simulate_first_link
2741
    first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'
2742
2743
    @classmethod
2744
    def get_comic_info(cls, soup, link):
2745
        """Get information about a particular comics."""
2746
        short_url = soup.find('link', rel='shortlink')['href']
2747
        title_elt = soup.find('h1') or soup.find('h2')
2748
        title = title_elt.string if title_elt else ""
2749
        desc = soup.find('meta', property='og:description')
2750
        date_str = soup.find('time', class_='published updated hidden')['datetime']
2751
        day = string_to_date(date_str, "%Y-%m-%d")
2752
        post = soup.find('div', class_="entry content entry-content type-portfolio")
2753
        imgs = post.find_all('img')
2754
        return {
2755
            'title': title,
2756
            'description': desc,
2757
            'url2': short_url,
2758
            'img': [i['src'] for i in imgs],
2759
            'month': day.month,
2760
            'year': day.year,
2761
            'day': day.day,
2762
        }
2763
2764
2765
class Optipess(GenericNavigableComic):
2766
    """Class to retrieve Optipess comics."""
2767
    name = 'optipess'
@@ 2488-2514 (lines=27) @@
2485
    url = 'http://www.mister-and-me.com'
2486
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2487
    get_navi_link = get_link_rel_next
2488
2489
    @classmethod
2490
    def get_comic_info(cls, soup, link):
2491
        """Get information about a particular comics."""
2492
        title = soup.find('h2', class_='post-title').string
2493
        author = soup.find("span", class_="post-author").find("a").string
2494
        date_str = soup.find("span", class_="post-date").string
2495
        day = string_to_date(date_str, "%B %d, %Y")
2496
        imgs = soup.find("div", id="comic").find_all("img")
2497
        assert all(i['alt'] == i['title'] for i in imgs)
2498
        assert len(imgs) <= 1
2499
        alt = imgs[0]['alt'] if imgs else ""
2500
        return {
2501
            'img': [i['src'] for i in imgs],
2502
            'title': title,
2503
            'alt': alt,
2504
            'author': author,
2505
            'day': day.day,
2506
            'month': day.month,
2507
            'year': day.year
2508
        }
2509
2510
2511
class LastPlaceComics(GenericNavigableComic):
2512
    """Class to retrieve Last Place Comics."""
2513
    name = 'lastplace'
2514
    long_name = 'Last Place Comics'
2515
    url = "http://lastplacecomics.com"
2516
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2517
    get_navi_link = get_link_rel_next
@@ 2321-2346 (lines=26) @@
2318
        return None
2319
2320
    @classmethod
2321
    def get_comic_info(cls, soup, link):
2322
        """Get information about a particular comics."""
2323
        title = soup.find('meta', attrs={'name': 'description'})["content"]
2324
        description = soup.find('div', itemprop='articleBody').text
2325
        author = soup.find('span', itemprop='author copyrightHolder').string
2326
        imgs = soup.find_all('img', itemprop='image')
2327
        assert all(i['title'] == i['alt'] for i in imgs)
2328
        alt = imgs[0]['alt'] if imgs else ""
2329
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
2330
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2331
        return {
2332
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2333
            'month': day.month,
2334
            'year': day.year,
2335
            'day': day.day,
2336
            'author': author,
2337
            'title': title,
2338
            'alt': alt,
2339
            'description': description,
2340
        }
2341
2342
2343
class GerbilWithAJetpack(GenericNavigableComic):
2344
    """Class to retrieve GerbilWithAJetpack comics."""
2345
    name = 'gerbil'
2346
    long_name = 'Gerbil With A Jetpack'
2347
    url = 'http://gerbilwithajetpack.com'
2348
    get_first_comic_link = get_a_navi_navifirst
2349
    get_navi_link = get_a_rel_next
@@ 2019-2043 (lines=25) @@
2016
        return get_soup_at_url(cls.url).find('a', title="First")
2017
2018
    @classmethod
2019
    def get_navi_link(cls, last_soup, next_):
2020
        """Get link to next or previous comic."""
2021
        return last_soup.find('a', title='Next' if next_ else 'Previous')
2022
2023
    @classmethod
2024
    def get_comic_info(cls, soup, link):
2025
        """Get information about a particular comics."""
2026
        title = soup.find('h1').string
2027
        date_str = soup.find('span', class_='date').string.strip()
2028
        day = string_to_date(date_str, "%B %d, %Y")
2029
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
2030
        return {
2031
            'title': title,
2032
            'img': [i['src'] for i in imgs],
2033
            'month': day.month,
2034
            'year': day.year,
2035
            'day': day.day,
2036
        }
2037
2038
2039
class ChuckleADuck(GenericNavigableComic):
2040
    """Class to retrieve Chuckle-A-Duck comics."""
2041
    name = 'chuckleaduck'
2042
    long_name = 'Chuckle-A-duck'
2043
    url = 'http://chuckleaduck.com'
2044
    get_first_comic_link = get_div_navfirst_a
2045
    get_navi_link = get_link_rel_next
2046
@@ 2378-2404 (lines=27) @@
2375
    long_name = "Every Day Blues"
2376
    url = "http://everydayblues.net"
2377
    get_first_comic_link = get_a_navi_navifirst
2378
    get_navi_link = get_link_rel_next
2379
2380
    @classmethod
2381
    def get_comic_info(cls, soup, link):
2382
        """Get information about a particular comics."""
2383
        title = soup.find("h2", class_="post-title").string
2384
        author = soup.find("span", class_="post-author").find("a").string
2385
        date_str = soup.find("span", class_="post-date").string
2386
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2387
        imgs = soup.find("div", id="comic").find_all("img")
2388
        assert all(i['alt'] == i['title'] == title for i in imgs)
2389
        assert len(imgs) <= 1
2390
        return {
2391
            'img': [i['src'] for i in imgs],
2392
            'title': title,
2393
            'author': author,
2394
            'day': day.day,
2395
            'month': day.month,
2396
            'year': day.year
2397
        }
2398
2399
2400
class BiterComics(GenericNavigableComic):
2401
    """Class to retrieve Biter Comics."""
2402
    name = "biter"
2403
    long_name = "Biter Comics"
2404
    url = "http://www.bitercomics.com"
2405
    get_first_comic_link = get_a_navi_navifirst
2406
    get_navi_link = get_link_rel_next
2407
@@ 1932-1958 (lines=27) @@
1929
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1930
            'title': title,
1931
        }
1932
1933
1934
class DiscoBleach(GenericEmptyComic):  # Does not work anymore
1935
    """Class to retrieve Disco Bleach Comics."""
1936
    name = 'discobleach'
1937
    long_name = 'Disco Bleach'
1938
    url = 'http://discobleach.com'
1939
1940
1941
class TubeyToons(GenericEmptyComic):  # Does not work anymore
1942
    """Class to retrieve TubeyToons comics."""
1943
    # Also on http://tapastic.com/series/Tubey-Toons
1944
    # Also on http://tubeytoons.tumblr.com
1945
    name = 'tubeytoons'
1946
    long_name = 'Tubey Toons'
1947
    url = 'http://tubeytoons.com'
1948
    _categories = ('TUNEYTOONS', )
1949
1950
1951
class CompletelySeriousComics(GenericNavigableComic):
1952
    """Class to retrieve Completely Serious comics."""
1953
    name = 'completelyserious'
1954
    long_name = 'Completely Serious Comics'
1955
    url = 'http://completelyseriouscomics.com'
1956
    get_first_comic_link = get_a_navi_navifirst
1957
    get_navi_link = get_a_navi_navinext
1958
1959
    @classmethod
1960
    def get_comic_info(cls, soup, link):
1961
        """Get information about a particular comics."""
@@ 2119-2144 (lines=26) @@
2116
        return {
2117
            'day': day.day,
2118
            'month': day.month,
2119
            'year': day.year,
2120
            'title': title,
2121
            'title2': title2,
2122
            'description': description,
2123
            'tags': tags,
2124
            'img': [i['src'] for i in imgs],
2125
            'alt': ' '.join(i['alt'] for i in imgs),
2126
        }
2127
2128
    @classmethod
2129
    def get_url_from_archive_element(cls, tr):
2130
        _, td2, td3 = tr.find_all('td')
2131
        return td2.find('a')['href']
2132
2133
    @classmethod
2134
    def get_archive_elements(cls):
2135
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
2136
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2137
2138
2139
class HappleTea(GenericNavigableComic):
2140
    """Class to retrieve Happle Tea Comics."""
2141
    name = 'happletea'
2142
    long_name = 'Happle Tea'
2143
    url = 'http://www.happletea.com'
2144
    get_first_comic_link = get_a_navi_navifirst
2145
    get_navi_link = get_link_rel_next
2146
2147
    @classmethod
@@ 2659-2683 (lines=25) @@
2656
        title = soup.find('meta', property='og:title')['content']
2657
        imgs = soup.find('div', class_='entry-content').find_all('img')
2658
        title2 = ' '.join(i.get('title', '') for i in imgs)
2659
        return {
2660
            'title': title,
2661
            'title2': title2,
2662
            'description': desc,
2663
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2664
        }
2665
2666
2667
class CommitStripFr(GenericCommitStrip):
2668
    """Class to retrieve Commit Strips in French."""
2669
    name = 'commit_fr'
2670
    long_name = 'Commit Strip (Fr)'
2671
    url = 'http://www.commitstrip.com/fr'
2672
    _categories = ('FRANCAIS', )
2673
    first_url = 'http://www.commitstrip.com/fr/2012/02/22/interview/'
2674
2675
2676
class CommitStripEn(GenericCommitStrip):
2677
    """Class to retrieve Commit Strips in English."""
2678
    name = 'commit_en'
2679
    long_name = 'Commit Strip (En)'
2680
    url = 'http://www.commitstrip.com/en'
2681
    first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'
2682
2683
2684
class GenericBoumerie(GenericNavigableComic):
2685
    """Generic class to retrieve Boumeries comics in different languages."""
2686
    get_first_comic_link = get_a_navi_navifirst
@@ 338-360 (lines=23) @@
335
    def get_next_comic(cls, last_comic):
336
        """Implementation of get_next_comic returning no comics."""
337
        cls.log("comic is considered as empty - returning no comic")
338
        return []
339
340
341
class ExtraFabulousComics(GenericNavigableComic):
342
    """Class to retrieve Extra Fabulous Comics."""
343
    name = 'efc'
344
    long_name = 'Extra Fabulous Comics'
345
    url = 'http://extrafabulouscomics.com'
346
    get_first_comic_link = get_a_navi_navifirst
347
    get_navi_link = get_link_rel_next
348
349
    @classmethod
350
    def get_comic_info(cls, soup, link):
351
        """Get information about a particular comics."""
352
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
353
        imgs = soup.find_all('img', src=img_src_re)
354
        title = soup.find('meta', property='og:title')['content']
355
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
356
        day = string_to_date(date_str, "%Y-%m-%d")
357
        return {
358
            'title': title,
359
            'img': [i['src'] for i in imgs],
360
            'month': day.month,
361
            'year': day.year,
362
            'day': day.day,
363
            'prefix': title + '-'