Code Duplication    Length = 23-27 lines in 8 locations

comics.py 8 locations

@@ 1260-1286 (lines=27) @@
1257
        }
1258
1259
1260
class ToonHole(GenericNavigableComic):
1261
    """Class to retrieve Toon Holes comics."""
1262
    # Also on http://tapastic.com/series/TOONHOLE
1263
    name = 'toonhole'
1264
    long_name = 'Toon Hole'
1265
    url = 'http://www.toonhole.com'
1266
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1267
    get_navi_link = get_a_comicnavbase_comicnavnext
1268
1269
    @classmethod
1270
    def get_comic_info(cls, soup, link):
1271
        """Get information about a particular comics."""
1272
        date_str = soup.find('div', class_='entry-meta').contents[0].strip()
1273
        day = string_to_date(date_str, "%B %d, %Y")
1274
        imgs = soup.find('div', id='comic').find_all('img')
1275
        if imgs:
1276
            img = imgs[0]
1277
            title = img['alt']
1278
            assert img['title'] == title
1279
        else:
1280
            title = ""
1281
        return {
1282
            'title': title,
1283
            'month': day.month,
1284
            'year': day.year,
1285
            'day': day.day,
1286
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs],
1287
        }
1288
1289
@@ 1001-1027 (lines=27) @@
998
        }
999
1000
1001
class ImogenQuest(GenericNavigableComic):
1002
    """Class to retrieve Imogen Quest comics."""
1003
    # Also on http://imoquest.tumblr.com
1004
    name = 'imogen'
1005
    long_name = 'Imogen Quest'
1006
    url = 'http://imogenquest.net'
1007
    get_first_comic_link = get_div_navfirst_a
1008
    get_navi_link = get_a_rel_next
1009
1010
    @classmethod
1011
    def get_comic_info(cls, soup, link):
1012
        """Get information about a particular comics."""
1013
        title = soup.find('h2', class_='post-title').string
1014
        author = soup.find("span", class_="post-author").find("a").string
1015
        date_str = soup.find('span', class_='post-date').string
1016
        day = string_to_date(date_str, '%B %d, %Y')
1017
        imgs = soup.find('div', class_='comicpane').find_all('img')
1018
        assert all(i['alt'] == i['title'] for i in imgs)
1019
        title2 = imgs[0]['title']
1020
        return {
1021
            'day': day.day,
1022
            'month': day.month,
1023
            'year': day.year,
1024
            'img': [i['src'] for i in imgs],
1025
            'title': title,
1026
            'title2': title2,
1027
            'author': author,
1028
        }
1029
1030
@@ 2607-2632 (lines=26) @@
2604
        }
2605
2606
2607
class GerbilWithAJetpack(GenericNavigableComic):
2608
    """Class to retrieve GerbilWithAJetpack comics."""
2609
    name = 'gerbil'
2610
    long_name = 'Gerbil With A Jetpack'
2611
    url = 'http://gerbilwithajetpack.com'
2612
    get_first_comic_link = get_a_navi_navifirst
2613
    get_navi_link = get_a_rel_next
2614
2615
    @classmethod
2616
    def get_comic_info(cls, soup, link):
2617
        """Get information about a particular comics."""
2618
        title = soup.find('h2', class_='post-title').string
2619
        author = soup.find("span", class_="post-author").find("a").string
2620
        date_str = soup.find("span", class_="post-date").string
2621
        day = string_to_date(date_str, "%B %d, %Y")
2622
        imgs = soup.find("div", id="comic").find_all("img")
2623
        alt = imgs[0]['alt']
2624
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2625
        return {
2626
            'img': [i['src'] for i in imgs],
2627
            'title': title,
2628
            'alt': alt,
2629
            'author': author,
2630
            'day': day.day,
2631
            'month': day.month,
2632
            'year': day.year
2633
        }
2634
2635
@@ 1962-1987 (lines=26) @@
1959
        }
1960
1961
1962
class Penmen(GenericComicNotWorking, GenericNavigableComic):
1963
    """Class to retrieve Penmen comics."""
1964
    name = 'penmen'
1965
    long_name = 'Penmen'
1966
    url = 'http://penmen.com'
1967
    get_navi_link = get_link_rel_next
1968
    get_first_comic_link = simulate_first_link
1969
    first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/'
1970
1971
    @classmethod
1972
    def get_comic_info(cls, soup, link):
1973
        """Get information about a particular comics."""
1974
        title = soup.find('title').string
1975
        imgs = soup.find('div', class_='entry-content').find_all('img')
1976
        short_url = soup.find('link', rel='shortlink')['href']
1977
        tags = ' '.join(t.string for t in soup.find_all('a', rel='tag'))
1978
        date_str = soup.find('time')['datetime'][:10]
1979
        day = string_to_date(date_str, "%Y-%m-%d")
1980
        return {
1981
            'title': title,
1982
            'short_url': short_url,
1983
            'img': [i['src'] for i in imgs],
1984
            'tags': tags,
1985
            'month': day.month,
1986
            'year': day.year,
1987
            'day': day.day,
1988
        }
1989
1990
@@ 1903-1928 (lines=26) @@
1900
        }
1901
1902
1903
class SafelyEndangered(GenericNavigableComic):
1904
    """Class to retrieve Safely Endangered comics."""
1905
    # Also on http://tumblr.safelyendangered.com
1906
    name = 'endangered'
1907
    long_name = 'Safely Endangered'
1908
    url = 'http://www.safelyendangered.com'
1909
    get_navi_link = get_link_rel_next
1910
    get_first_comic_link = simulate_first_link
1911
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1912
1913
    @classmethod
1914
    def get_comic_info(cls, soup, link):
1915
        """Get information about a particular comics."""
1916
        title = soup.find('h2', class_='post-title').string
1917
        date_str = soup.find('span', class_='post-date').string
1918
        day = string_to_date(date_str, '%B %d, %Y')
1919
        imgs = soup.find('div', id='comic').find_all('img')
1920
        alt = imgs[0]['alt']
1921
        assert all(i['alt'] == i['title'] for i in imgs)
1922
        return {
1923
            'day': day.day,
1924
            'month': day.month,
1925
            'year': day.year,
1926
            'img': [i['src'] for i in imgs],
1927
            'title': title,
1928
            'alt': alt,
1929
        }
1930
1931
@@ 406-431 (lines=26) @@
403
    _categories = ('DELETED', )
404
405
406
class ExtraFabulousComics(GenericNavigableComic):
407
    """Class to retrieve Extra Fabulous Comics."""
408
    # Also on https://extrafabulouscomics.tumblr.com
409
    name = 'efc'
410
    long_name = 'Extra Fabulous Comics'
411
    url = 'http://extrafabulouscomics.com'
412
    _categories = ('EFC', )
413
    get_navi_link = get_link_rel_next
414
    get_first_comic_link = simulate_first_link
415
    first_url = 'http://extrafabulouscomics.com/comic/buttfly/'
416
417
    @classmethod
418
    def get_comic_info(cls, soup, link):
419
        """Get information about a particular comics."""
420
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
421
        imgs = soup.find_all('img', src=img_src_re)
422
        title = soup.find('meta', property='og:title')['content']
423
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
424
        day = string_to_date(date_str, "%Y-%m-%d")
425
        return {
426
            'title': title,
427
            'img': [i['src'] for i in imgs],
428
            'month': day.month,
429
            'year': day.year,
430
            'day': day.day,
431
            'prefix': title + '-'
432
        }
433
434
@@ 1233-1256 (lines=24) @@
1230
    url = 'http://english.bouletcorp.com'
1231
1232
1233
class AmazingSuperPowers(GenericNavigableComic):
1234
    """Class to retrieve Amazing Super Powers comics."""
1235
    name = 'asp'
1236
    long_name = 'Amazing Super Powers'
1237
    url = 'http://www.amazingsuperpowers.com'
1238
    get_first_comic_link = get_a_navi_navifirst
1239
    get_navi_link = get_a_navi_navinext
1240
1241
    @classmethod
1242
    def get_comic_info(cls, soup, link):
1243
        """Get information about a particular comics."""
1244
        author = soup.find("span", class_="post-author").find("a").string
1245
        date_str = soup.find('span', class_='post-date').string
1246
        day = string_to_date(date_str, "%B %d, %Y")
1247
        imgs = soup.find('div', id='comic').find_all('img')
1248
        title = ' '.join(i['title'] for i in imgs)
1249
        assert all(i['alt'] == i['title'] for i in imgs)
1250
        return {
1251
            'title': title,
1252
            'author': author,
1253
            'img': [img['src'] for img in imgs],
1254
            'day': day.day,
1255
            'month': day.month,
1256
            'year': day.year
1257
        }
1258
1259
@@ 435-457 (lines=23) @@
432
        }
433
434
435
class GenericLeMondeBlog(GenericNavigableComic):
436
    """Generic class to retrieve comics from Le Monde blogs."""
437
    _categories = ('LEMONDE', 'FRANCAIS')
438
    get_navi_link = get_link_rel_next
439
    get_first_comic_link = simulate_first_link
440
    first_url = NotImplemented
441
    date_format = "%d %B %Y"
442
443
    @classmethod
444
    def get_comic_info(cls, soup, link):
445
        """Get information about a particular comics."""
446
        url2 = soup.find('link', rel='shortlink')['href']
447
        title = soup.find('meta', property='og:title')['content']
448
        date_str = soup.find("span", class_="entry-date").string
449
        day = string_to_date(date_str, cls.date_format, "fr_FR.utf8")
450
        imgs = soup.find_all('meta', property='og:image')
451
        return {
452
            'title': title,
453
            'url2': url2,
454
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
455
            'month': day.month,
456
            'year': day.year,
457
            'day': day.day,
458
        }
459
460