Code Duplication    Length = 21-27 lines in 19 locations

comics.py 19 locations

@@ 720-743 (lines=24) @@
717
        }
718
719
720
class OneOneOneOneComic(GenericComicNotWorking, GenericNavigableComic):
721
    """Class to retrieve 1111 Comics."""
722
    # Also on http://comics1111.tumblr.com
723
    # Also on https://tapastic.com/series/1111-Comics
724
    name = '1111'
725
    long_name = '1111 Comics'
726
    url = 'http://www.1111comics.me'
727
    _categories = ('ONEONEONEONE', )
728
    get_first_comic_link = get_div_navfirst_a
729
    get_navi_link = get_link_rel_next
730
731
    @classmethod
732
    def get_comic_info(cls, soup, link):
733
        """Get information about a particular comics."""
734
        title = soup.find('h1', class_='comic-title').find('a').string
735
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
736
        day = string_to_date(date_str, "%B %d, %Y")
737
        imgs = soup.find_all('meta', property='og:image')
738
        return {
739
            'title': title,
740
            'month': day.month,
741
            'year': day.year,
742
            'day': day.day,
743
            'img': [i['content'] for i in imgs],
744
        }
745
746
@@ 948-970 (lines=23) @@
945
        }
946
947
948
class TheGentlemanArmchair(GenericNavigableComic):
949
    """Class to retrieve The Gentleman Armchair comics."""
950
    name = 'gentlemanarmchair'
951
    long_name = 'The Gentleman Armchair'
952
    url = 'http://thegentlemansarmchair.com'
953
    get_first_comic_link = get_a_navi_navifirst
954
    get_navi_link = get_link_rel_next
955
956
    @classmethod
957
    def get_comic_info(cls, soup, link):
958
        """Get information about a particular comics."""
959
        title = soup.find('h2', class_='post-title').string
960
        author = soup.find("span", class_="post-author").find("a").string
961
        date_str = soup.find('span', class_='post-date').string
962
        day = string_to_date(date_str, "%B %d, %Y")
963
        imgs = soup.find('div', id='comic').find_all('img')
964
        return {
965
            'img': [i['src'] for i in imgs],
966
            'title': title,
967
            'author': author,
968
            'month': day.month,
969
            'year': day.year,
970
            'day': day.day,
971
        }
972
973
@@ 747-768 (lines=22) @@
744
        }
745
746
747
class AngryAtNothing(GenericDeletedComic, GenericNavigableComic):
748
    """Class to retrieve Angry at Nothing comics."""
749
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
750
    # Also on http://angryatnothing.tumblr.com
751
    name = 'angry'
752
    long_name = 'Angry At Nothing'
753
    url = 'http://www.angryatnothing.net'
754
    get_first_comic_link = get_div_navfirst_a
755
    get_navi_link = get_a_rel_next
756
757
    @classmethod
758
    def get_comic_info(cls, soup, link):
759
        """Get information about a particular comics."""
760
        title = soup.find('h1', class_='comic-title').find('a').string
761
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
762
        day = string_to_date(date_str, "%B %d, %Y")
763
        imgs = soup.find_all('meta', property='og:image')
764
        return {
765
            'title': title,
766
            'month': day.month,
767
            'year': day.year,
768
            'day': day.day,
769
            'img': [i['content'] for i in imgs],
770
        }
771
@@ 694-716 (lines=23) @@
691
        }
692
693
694
class PenelopeBagieu(GenericNavigableComic):
695
    """Class to retrieve comics from Penelope Bagieu's blog."""
696
    name = 'bagieu'
697
    long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
698
    url = 'http://www.penelope-jolicoeur.com'
699
    _categories = ('FRANCAIS', )
700
    get_navi_link = get_link_rel_next
701
    get_first_comic_link = simulate_first_link
702
    first_url = 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'
703
704
    @classmethod
705
    def get_comic_info(cls, soup, link):
706
        """Get information about a particular comics."""
707
        date_str = soup.find('h2', class_='date-header').string
708
        day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
709
        imgs = soup.find('div', class_='entry-body').find_all('img')
710
        title = soup.find('h3', class_='entry-header').string
711
        return {
712
            'title': title,
713
            'img': [i['src'] for i in imgs],
714
            'month': day.month,
715
            'year': day.year,
716
            'day': day.day,
717
        }
718
719
@@ 1908-1934 (lines=27) @@
1905
        }
1906
1907
1908
class PicturesInBoxes(GenericNavigableComic):
1909
    """Class to retrieve Pictures In Boxes comics."""
1910
    # Also on https://picturesinboxescomic.tumblr.com
1911
    name = 'picturesinboxes'
1912
    long_name = 'Pictures in Boxes'
1913
    url = 'http://www.picturesinboxes.com'
1914
    get_navi_link = get_a_navi_navinext
1915
    get_first_comic_link = simulate_first_link
1916
    first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'
1917
1918
    @classmethod
1919
    def get_comic_info(cls, soup, link):
1920
        """Get information about a particular comics."""
1921
        title = soup.find('h2', class_='post-title').string
1922
        author = soup.find("span", class_="post-author").find("a").string
1923
        date_str = soup.find('span', class_='post-date').string
1924
        day = string_to_date(date_str, '%B %d, %Y')
1925
        imgs = soup.find('div', class_='comicpane').find_all('img')
1926
        assert imgs
1927
        assert all(i['title'] == i['alt'] == title for i in imgs)
1928
        return {
1929
            'day': day.day,
1930
            'month': day.month,
1931
            'year': day.year,
1932
            'img': [i['src'] for i in imgs],
1933
            'title': title,
1934
            'author': author,
1935
        }
1936
1937
@@ 2895-2919 (lines=25) @@
2892
    first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'
2893
2894
2895
class GenericBoumerie(GenericNavigableComic):
2896
    """Generic class to retrieve Boumeries comics in different languages."""
2897
    # Also on http://boumeries.tumblr.com
2898
    get_first_comic_link = get_a_navi_navifirst
2899
    get_navi_link = get_link_rel_next
2900
    date_format = NotImplemented
2901
    lang = NotImplemented
2902
2903
    @classmethod
2904
    def get_comic_info(cls, soup, link):
2905
        """Get information about a particular comics."""
2906
        title = soup.find('h2', class_='post-title').string
2907
        short_url = soup.find('link', rel='shortlink')['href']
2908
        author = soup.find("span", class_="post-author").find("a").string
2909
        date_str = soup.find('span', class_='post-date').string
2910
        day = string_to_date(date_str, cls.date_format, cls.lang)
2911
        imgs = soup.find('div', id='comic').find_all('img')
2912
        assert all(i['alt'] == i['title'] for i in imgs)
2913
        return {
2914
            'short_url': short_url,
2915
            'img': [i['src'] for i in imgs],
2916
            'title': title,
2917
            'author': author,
2918
            'month': day.month,
2919
            'year': day.year,
2920
            'day': day.day,
2921
        }
2922
@@ 2641-2666 (lines=26) @@
2638
        }
2639
2640
2641
class TheAwkwardYeti(GenericNavigableComic):
2642
    """Class to retrieve The Awkward Yeti comics."""
2643
    # Also on http://www.gocomics.com/the-awkward-yeti
2644
    # Also on http://larstheyeti.tumblr.com
2645
    # Also on https://tapastic.com/series/TheAwkwardYeti
2646
    name = 'yeti'
2647
    long_name = 'The Awkward Yeti'
2648
    url = 'http://theawkwardyeti.com'
2649
    _categories = ('YETI', )
2650
    get_first_comic_link = get_a_navi_navifirst
2651
    get_navi_link = get_link_rel_next
2652
2653
    @classmethod
2654
    def get_comic_info(cls, soup, link):
2655
        """Get information about a particular comics."""
2656
        title = soup.find('h2', class_='post-title').string
2657
        date_str = soup.find("span", class_="post-date").string
2658
        day = string_to_date(date_str, "%B %d, %Y")
2659
        imgs = soup.find("div", id="comic").find_all("img")
2660
        assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
2661
        return {
2662
            'img': [i['src'] for i in imgs],
2663
            'title': title,
2664
            'day': day.day,
2665
            'month': day.month,
2666
            'year': day.year
2667
        }
2668
2669
@@ 2554-2579 (lines=26) @@
2551
        }
2552
2553
2554
class GerbilWithAJetpack(GenericNavigableComic):
2555
    """Class to retrieve GerbilWithAJetpack comics."""
2556
    name = 'gerbil'
2557
    long_name = 'Gerbil With A Jetpack'
2558
    url = 'http://gerbilwithajetpack.com'
2559
    get_first_comic_link = get_a_navi_navifirst
2560
    get_navi_link = get_a_rel_next
2561
2562
    @classmethod
2563
    def get_comic_info(cls, soup, link):
2564
        """Get information about a particular comics."""
2565
        title = soup.find('h2', class_='post-title').string
2566
        author = soup.find("span", class_="post-author").find("a").string
2567
        date_str = soup.find("span", class_="post-date").string
2568
        day = string_to_date(date_str, "%B %d, %Y")
2569
        imgs = soup.find("div", id="comic").find_all("img")
2570
        alt = imgs[0]['alt']
2571
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2572
        return {
2573
            'img': [i['src'] for i in imgs],
2574
            'title': title,
2575
            'alt': alt,
2576
            'author': author,
2577
            'day': day.day,
2578
            'month': day.month,
2579
            'year': day.year
2580
        }
2581
2582
@@ 2583-2607 (lines=25) @@
2580
        }
2581
2582
2583
class EveryDayBlues(GenericDeletedComic, GenericNavigableComic):
2584
    """Class to retrieve EveryDayBlues Comics."""
2585
    name = "blues"
2586
    long_name = "Every Day Blues"
2587
    url = "http://everydayblues.net"
2588
    get_first_comic_link = get_a_navi_navifirst
2589
    get_navi_link = get_link_rel_next
2590
2591
    @classmethod
2592
    def get_comic_info(cls, soup, link):
2593
        """Get information about a particular comics."""
2594
        title = soup.find("h2", class_="post-title").string
2595
        author = soup.find("span", class_="post-author").find("a").string
2596
        date_str = soup.find("span", class_="post-date").string
2597
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2598
        imgs = soup.find("div", id="comic").find_all("img")
2599
        assert all(i['alt'] == i['title'] == title for i in imgs)
2600
        assert len(imgs) <= 1, imgs
2601
        return {
2602
            'img': [i['src'] for i in imgs],
2603
            'title': title,
2604
            'author': author,
2605
            'day': day.day,
2606
            'month': day.month,
2607
            'year': day.year
2608
        }
2609
2610
@@ 1796-1820 (lines=25) @@
1793
        }
1794
1795
1796
class MouseBearComedy(GenericComicNotWorking):  # Website has changed
1797
    """Class to retrieve Mouse Bear Comedy comics."""
1798
    # Also on http://mousebearcomedy.tumblr.com
1799
    name = 'mousebear'
1800
    long_name = 'Mouse Bear Comedy'
1801
    url = 'http://www.mousebearcomedy.com'
1802
    get_first_comic_link = get_a_navi_navifirst
1803
    get_navi_link = get_a_navi_comicnavnext_navinext
1804
1805
    @classmethod
1806
    def get_comic_info(cls, soup, link):
1807
        """Get information about a particular comics."""
1808
        title = soup.find('h2', class_='post-title').string
1809
        author = soup.find("span", class_="post-author").find("a").string
1810
        date_str = soup.find("span", class_="post-date").string
1811
        day = string_to_date(date_str, '%B %d, %Y')
1812
        imgs = soup.find("div", id="comic").find_all("img")
1813
        assert all(i['alt'] == i['title'] == title for i in imgs)
1814
        return {
1815
            'day': day.day,
1816
            'month': day.month,
1817
            'year': day.year,
1818
            'img': [i['src'] for i in imgs],
1819
            'title': title,
1820
            'author': author,
1821
        }
1822
1823
@@ 1206-1229 (lines=24) @@
1203
    url = 'http://english.bouletcorp.com'
1204
1205
1206
class AmazingSuperPowers(GenericNavigableComic):
1207
    """Class to retrieve Amazing Super Powers comics."""
1208
    name = 'asp'
1209
    long_name = 'Amazing Super Powers'
1210
    url = 'http://www.amazingsuperpowers.com'
1211
    get_first_comic_link = get_a_navi_navifirst
1212
    get_navi_link = get_a_navi_navinext
1213
1214
    @classmethod
1215
    def get_comic_info(cls, soup, link):
1216
        """Get information about a particular comics."""
1217
        author = soup.find("span", class_="post-author").find("a").string
1218
        date_str = soup.find('span', class_='post-date').string
1219
        day = string_to_date(date_str, "%B %d, %Y")
1220
        imgs = soup.find('div', id='comic').find_all('img')
1221
        title = ' '.join(i['title'] for i in imgs)
1222
        assert all(i['alt'] == i['title'] for i in imgs)
1223
        return {
1224
            'title': title,
1225
            'author': author,
1226
            'img': [img['src'] for img in imgs],
1227
            'day': day.day,
1228
            'month': day.month,
1229
            'year': day.year
1230
        }
1231
1232
@@ 2611-2637 (lines=27) @@
2608
        }
2609
2610
2611
class BiterComics(GenericNavigableComic):
2612
    """Class to retrieve Biter Comics."""
2613
    name = "biter"
2614
    long_name = "Biter Comics"
2615
    url = "http://www.bitercomics.com"
2616
    get_first_comic_link = get_a_navi_navifirst
2617
    get_navi_link = get_link_rel_next
2618
2619
    @classmethod
2620
    def get_comic_info(cls, soup, link):
2621
        """Get information about a particular comics."""
2622
        title = soup.find("h1", class_="entry-title").string
2623
        author = soup.find("span", class_="author vcard").find("a").string
2624
        date_str = soup.find("span", class_="entry-date").string
2625
        day = string_to_date(date_str, "%B %d, %Y")
2626
        imgs = soup.find("div", id="comic").find_all("img")
2627
        assert all(i['alt'] == i['title'] for i in imgs)
2628
        assert len(imgs) == 1, imgs
2629
        alt = imgs[0]['alt']
2630
        return {
2631
            'img': [i['src'] for i in imgs],
2632
            'title': title,
2633
            'alt': alt,
2634
            'author': author,
2635
            'day': day.day,
2636
            'month': day.month,
2637
            'year': day.year
2638
        }
2639
2640
@@ 2264-2289 (lines=26) @@
2261
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2262
2263
2264
class HappleTea(GenericNavigableComic):
2265
    """Class to retrieve Happle Tea Comics."""
2266
    name = 'happletea'
2267
    long_name = 'Happle Tea'
2268
    url = 'http://www.happletea.com'
2269
    get_first_comic_link = get_a_navi_navifirst
2270
    get_navi_link = get_link_rel_next
2271
2272
    @classmethod
2273
    def get_comic_info(cls, soup, link):
2274
        """Get information about a particular comics."""
2275
        imgs = soup.find('div', id='comic').find_all('img')
2276
        post = soup.find('div', class_='post-content')
2277
        title = post.find('h2', class_='post-title').string
2278
        author = post.find('a', rel='author').string
2279
        date_str = post.find('span', class_='post-date').string
2280
        day = string_to_date(date_str, "%B %d, %Y")
2281
        assert all(i['alt'] == i['title'] for i in imgs)
2282
        return {
2283
            'title': title,
2284
            'img': [i['src'] for i in imgs],
2285
            'alt': ''.join(i['alt'] for i in imgs),
2286
            'month': day.month,
2287
            'year': day.year,
2288
            'day': day.day,
2289
            'author': author,
2290
        }
2291
2292
@@ 2445-2469 (lines=25) @@
2442
        }
2443
2444
2445
class LonnieMillsap(GenericNavigableComic):
2446
    """Class to retrieve Lonnie Millsap's comics."""
2447
    name = 'millsap'
2448
    long_name = 'Lonnie Millsap'
2449
    url = 'http://www.lonniemillsap.com'
2450
    get_navi_link = get_link_rel_next
2451
    get_first_comic_link = simulate_first_link
2452
    first_url = 'http://www.lonniemillsap.com/?p=42'
2453
2454
    @classmethod
2455
    def get_comic_info(cls, soup, link):
2456
        """Get information about a particular comics."""
2457
        title = soup.find('h2', class_='post-title').string
2458
        post = soup.find('div', class_='post-content')
2459
        author = post.find("span", class_="post-author").find("a").string
2460
        date_str = post.find("span", class_="post-date").string
2461
        day = string_to_date(date_str, "%B %d, %Y")
2462
        imgs = post.find("div", class_="entry").find_all("img")
2463
        return {
2464
            'title': title,
2465
            'author': author,
2466
            'img': [i['src'] for i in imgs],
2467
            'month': day.month,
2468
            'year': day.year,
2469
            'day': day.day,
2470
        }
2471
2472
@@ 3306-3329 (lines=24) @@
3303
        }
3304
3305
3306
class Ubertool(GenericNavigableComic):
3307
    """Class to retrieve Ubertool comics."""
3308
    # Also on https://ubertool.tumblr.com
3309
    # Also on https://tapastic.com/series/ubertool
3310
    name = 'ubertool'
3311
    long_name = 'Ubertool'
3312
    url = 'http://ubertoolcomic.com'
3313
    _categories = ('UBERTOOL', )
3314
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
3315
    get_navi_link = get_a_comicnavbase_comicnavnext
3316
3317
    @classmethod
3318
    def get_comic_info(cls, soup, link):
3319
        """Get information about a particular comics."""
3320
        title = soup.find('h2', class_='post-title').string
3321
        date_str = soup.find('span', class_='post-date').string
3322
        day = string_to_date(date_str, "%B %d, %Y")
3323
        imgs = soup.find('div', id='comic').find_all('img')
3324
        return {
3325
            'img': [i['src'] for i in imgs],
3326
            'title': title,
3327
            'month': day.month,
3328
            'year': day.year,
3329
            'day': day.day,
3330
        }
3331
3332
@@ 3607-3627 (lines=21) @@
3604
        }
3605
3606
3607
class Octopuns(GenericBlogspotComic):
3608
    """Class to retrieve Octopuns comics."""
3609
    # Also on http://octopuns.tumblr.com
3610
    name = 'octopuns'
3611
    long_name = 'Octopuns'
3612
    url = 'http://www.octopuns.net'  # or http://octopuns.blogspot.fr/
3613
    first_url = 'http://octopuns.blogspot.com/2010/12/17122010-always-read-label.html'
3614
3615
    @classmethod
3616
    def get_comic_info(cls, soup, link):
3617
        """Get information about a particular comics."""
3618
        title = soup.find('h3', class_='post-title entry-title').string
3619
        date_str = soup.find('h2', class_='date-header').string
3620
        day = string_to_date(date_str, "%A, %B %d, %Y")
3621
        imgs = soup.find_all('link', rel='image_src')
3622
        return {
3623
            'img': [i['href'] for i in imgs],
3624
            'title': title,
3625
            'day': day.day,
3626
            'month': day.month,
3627
            'year': day.year,
3628
        }
3629
3630
@@ 3263-3283 (lines=21) @@
3260
        }
3261
3262
3263
class ManVersusManatee(GenericNavigableComic):
3264
    """Class to retrieve Man Versus Manatee comics."""
3265
    url = 'http://manvsmanatee.com'
3266
    name = 'manvsmanatee'
3267
    long_name = 'Man Versus Manatee'
3268
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
3269
    get_navi_link = get_a_comicnavbase_comicnavnext
3270
3271
    @classmethod
3272
    def get_comic_info(cls, soup, link):
3273
        """Get information about a particular comics."""
3274
        title = soup.find('h2', class_='post-title').string
3275
        imgs = soup.find('div', id='comic').find_all('img')
3276
        date_str = soup.find('span', class_='post-date').string
3277
        day = string_to_date(date_str, "%B %d, %Y")
3278
        return {
3279
            'img': [i['src'] for i in imgs],
3280
            'title': title,
3281
            'month': day.month,
3282
            'year': day.year,
3283
            'day': day.day,
3284
        }
3285
3286
@@ 2813-2833 (lines=21) @@
2810
        }
2811
2812
2813
class PlanC(GenericNavigableComic):
2814
    """Class to retrieve Plan C comics."""
2815
    name = 'planc'
2816
    long_name = 'Plan C'
2817
    url = 'http://www.plancomic.com'
2818
    get_first_comic_link = get_a_navi_navifirst
2819
    get_navi_link = get_a_navi_comicnavnext_navinext
2820
2821
    @classmethod
2822
    def get_comic_info(cls, soup, link):
2823
        """Get information about a particular comics."""
2824
        title = soup.find('h2', class_='post-title').string
2825
        date_str = soup.find("span", class_="post-date").string
2826
        day = string_to_date(date_str, "%B %d, %Y")
2827
        imgs = soup.find('div', id='comic').find_all('img')
2828
        return {
2829
            'title': title,
2830
            'img': [i['src'] for i in imgs],
2831
            'month': day.month,
2832
            'year': day.year,
2833
            'day': day.day,
2834
        }
2835
2836
@@ 1749-1769 (lines=21) @@
1746
        }
1747
1748
1749
class WarehouseComic(GenericNavigableComic):
1750
    """Class to retrieve Warehouse Comic comics."""
1751
    name = 'warehouse'
1752
    long_name = 'Warehouse Comic'
1753
    url = 'http://warehousecomic.com'
1754
    get_first_comic_link = get_a_navi_navifirst
1755
    get_navi_link = get_link_rel_next
1756
1757
    @classmethod
1758
    def get_comic_info(cls, soup, link):
1759
        """Get information about a particular comics."""
1760
        title = soup.find('h2', class_='post-title').string
1761
        date_str = soup.find('span', class_='post-date').string
1762
        day = string_to_date(date_str, "%B %d, %Y")
1763
        imgs = soup.find('div', id='comic').find_all('img')
1764
        return {
1765
            'img': [i['src'] for i in imgs],
1766
            'title': title,
1767
            'day': day.day,
1768
            'month': day.month,
1769
            'year': day.year,
1770
        }
1771
1772