Code Duplication    Length = 23-27 lines in 8 locations

comics.py 8 locations

@@ 1259-1285 (lines=27) @@
1256
        }
1257
1258
1259
class ToonHole(GenericNavigableComic):
1260
    """Class to retrieve Toon Holes comics."""
1261
    # Also on http://tapastic.com/series/TOONHOLE
1262
    name = 'toonhole'
1263
    long_name = 'Toon Hole'
1264
    url = 'http://www.toonhole.com'
1265
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1266
    get_navi_link = get_a_comicnavbase_comicnavnext
1267
1268
    @classmethod
1269
    def get_comic_info(cls, soup, link):
1270
        """Get information about a particular comics."""
1271
        date_str = soup.find('div', class_='entry-meta').contents[0].strip()
1272
        day = string_to_date(date_str, "%B %d, %Y")
1273
        imgs = soup.find('div', id='comic').find_all('img')
1274
        if imgs:
1275
            img = imgs[0]
1276
            title = img['alt']
1277
            assert img['title'] == title
1278
        else:
1279
            title = ""
1280
        return {
1281
            'title': title,
1282
            'month': day.month,
1283
            'year': day.year,
1284
            'day': day.day,
1285
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs],
1286
        }
1287
1288
@@ 1000-1026 (lines=27) @@
997
        }
998
999
1000
class ImogenQuest(GenericNavigableComic):
1001
    """Class to retrieve Imogen Quest comics."""
1002
    # Also on http://imoquest.tumblr.com
1003
    name = 'imogen'
1004
    long_name = 'Imogen Quest'
1005
    url = 'http://imogenquest.net'
1006
    get_first_comic_link = get_div_navfirst_a
1007
    get_navi_link = get_a_rel_next
1008
1009
    @classmethod
1010
    def get_comic_info(cls, soup, link):
1011
        """Get information about a particular comics."""
1012
        title = soup.find('h2', class_='post-title').string
1013
        author = soup.find("span", class_="post-author").find("a").string
1014
        date_str = soup.find('span', class_='post-date').string
1015
        day = string_to_date(date_str, '%B %d, %Y')
1016
        imgs = soup.find('div', class_='comicpane').find_all('img')
1017
        assert all(i['alt'] == i['title'] for i in imgs)
1018
        title2 = imgs[0]['title']
1019
        return {
1020
            'day': day.day,
1021
            'month': day.month,
1022
            'year': day.year,
1023
            'img': [i['src'] for i in imgs],
1024
            'title': title,
1025
            'title2': title2,
1026
            'author': author,
1027
        }
1028
1029
@@ 2606-2631 (lines=26) @@
2603
        }
2604
2605
2606
class GerbilWithAJetpack(GenericNavigableComic):
2607
    """Class to retrieve GerbilWithAJetpack comics."""
2608
    name = 'gerbil'
2609
    long_name = 'Gerbil With A Jetpack'
2610
    url = 'http://gerbilwithajetpack.com'
2611
    get_first_comic_link = get_a_navi_navifirst
2612
    get_navi_link = get_a_rel_next
2613
2614
    @classmethod
2615
    def get_comic_info(cls, soup, link):
2616
        """Get information about a particular comics."""
2617
        title = soup.find('h2', class_='post-title').string
2618
        author = soup.find("span", class_="post-author").find("a").string
2619
        date_str = soup.find("span", class_="post-date").string
2620
        day = string_to_date(date_str, "%B %d, %Y")
2621
        imgs = soup.find("div", id="comic").find_all("img")
2622
        alt = imgs[0]['alt']
2623
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2624
        return {
2625
            'img': [i['src'] for i in imgs],
2626
            'title': title,
2627
            'alt': alt,
2628
            'author': author,
2629
            'day': day.day,
2630
            'month': day.month,
2631
            'year': day.year
2632
        }
2633
2634
@@ 1961-1986 (lines=26) @@
1958
        }
1959
1960
1961
class Penmen(GenericComicNotWorking, GenericNavigableComic):
1962
    """Class to retrieve Penmen comics."""
1963
    name = 'penmen'
1964
    long_name = 'Penmen'
1965
    url = 'http://penmen.com'
1966
    get_navi_link = get_link_rel_next
1967
    get_first_comic_link = simulate_first_link
1968
    first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/'
1969
1970
    @classmethod
1971
    def get_comic_info(cls, soup, link):
1972
        """Get information about a particular comics."""
1973
        title = soup.find('title').string
1974
        imgs = soup.find('div', class_='entry-content').find_all('img')
1975
        short_url = soup.find('link', rel='shortlink')['href']
1976
        tags = ' '.join(t.string for t in soup.find_all('a', rel='tag'))
1977
        date_str = soup.find('time')['datetime'][:10]
1978
        day = string_to_date(date_str, "%Y-%m-%d")
1979
        return {
1980
            'title': title,
1981
            'short_url': short_url,
1982
            'img': [i['src'] for i in imgs],
1983
            'tags': tags,
1984
            'month': day.month,
1985
            'year': day.year,
1986
            'day': day.day,
1987
        }
1988
1989
@@ 1902-1927 (lines=26) @@
1899
        }
1900
1901
1902
class SafelyEndangered(GenericNavigableComic):
1903
    """Class to retrieve Safely Endangered comics."""
1904
    # Also on http://tumblr.safelyendangered.com
1905
    name = 'endangered'
1906
    long_name = 'Safely Endangered'
1907
    url = 'http://www.safelyendangered.com'
1908
    get_navi_link = get_link_rel_next
1909
    get_first_comic_link = simulate_first_link
1910
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1911
1912
    @classmethod
1913
    def get_comic_info(cls, soup, link):
1914
        """Get information about a particular comics."""
1915
        title = soup.find('h2', class_='post-title').string
1916
        date_str = soup.find('span', class_='post-date').string
1917
        day = string_to_date(date_str, '%B %d, %Y')
1918
        imgs = soup.find('div', id='comic').find_all('img')
1919
        alt = imgs[0]['alt']
1920
        assert all(i['alt'] == i['title'] for i in imgs)
1921
        return {
1922
            'day': day.day,
1923
            'month': day.month,
1924
            'year': day.year,
1925
            'img': [i['src'] for i in imgs],
1926
            'title': title,
1927
            'alt': alt,
1928
        }
1929
1930
@@ 405-430 (lines=26) @@
402
    _categories = ('DELETED', )
403
404
405
class ExtraFabulousComics(GenericNavigableComic):
406
    """Class to retrieve Extra Fabulous Comics."""
407
    # Also on https://extrafabulouscomics.tumblr.com
408
    name = 'efc'
409
    long_name = 'Extra Fabulous Comics'
410
    url = 'http://extrafabulouscomics.com'
411
    _categories = ('EFC', )
412
    get_navi_link = get_link_rel_next
413
    get_first_comic_link = simulate_first_link
414
    first_url = 'http://extrafabulouscomics.com/comic/buttfly/'
415
416
    @classmethod
417
    def get_comic_info(cls, soup, link):
418
        """Get information about a particular comics."""
419
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
420
        imgs = soup.find_all('img', src=img_src_re)
421
        title = soup.find('meta', property='og:title')['content']
422
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
423
        day = string_to_date(date_str, "%Y-%m-%d")
424
        return {
425
            'title': title,
426
            'img': [i['src'] for i in imgs],
427
            'month': day.month,
428
            'year': day.year,
429
            'day': day.day,
430
            'prefix': title + '-'
431
        }
432
433
@@ 1232-1255 (lines=24) @@
1229
    url = 'http://english.bouletcorp.com'
1230
1231
1232
class AmazingSuperPowers(GenericNavigableComic):
1233
    """Class to retrieve Amazing Super Powers comics."""
1234
    name = 'asp'
1235
    long_name = 'Amazing Super Powers'
1236
    url = 'http://www.amazingsuperpowers.com'
1237
    get_first_comic_link = get_a_navi_navifirst
1238
    get_navi_link = get_a_navi_navinext
1239
1240
    @classmethod
1241
    def get_comic_info(cls, soup, link):
1242
        """Get information about a particular comics."""
1243
        author = soup.find("span", class_="post-author").find("a").string
1244
        date_str = soup.find('span', class_='post-date').string
1245
        day = string_to_date(date_str, "%B %d, %Y")
1246
        imgs = soup.find('div', id='comic').find_all('img')
1247
        title = ' '.join(i['title'] for i in imgs)
1248
        assert all(i['alt'] == i['title'] for i in imgs)
1249
        return {
1250
            'title': title,
1251
            'author': author,
1252
            'img': [img['src'] for img in imgs],
1253
            'day': day.day,
1254
            'month': day.month,
1255
            'year': day.year
1256
        }
1257
1258
@@ 434-456 (lines=23) @@
431
        }
432
433
434
class GenericLeMondeBlog(GenericNavigableComic):
435
    """Generic class to retrieve comics from Le Monde blogs."""
436
    _categories = ('LEMONDE', 'FRANCAIS')
437
    get_navi_link = get_link_rel_next
438
    get_first_comic_link = simulate_first_link
439
    first_url = NotImplemented
440
    date_format = "%d %B %Y"
441
442
    @classmethod
443
    def get_comic_info(cls, soup, link):
444
        """Get information about a particular comics."""
445
        url2 = soup.find('link', rel='shortlink')['href']
446
        title = soup.find('meta', property='og:title')['content']
447
        date_str = soup.find("span", class_="entry-date").string
448
        day = string_to_date(date_str, cls.date_format, "fr_FR.utf8")
449
        imgs = soup.find_all('meta', property='og:image')
450
        return {
451
            'title': title,
452
            'url2': url2,
453
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
454
            'month': day.month,
455
            'year': day.year,
456
            'day': day.day,
457
        }
458
459