Code Duplication    Length = 22-26 lines in 5 locations

comics.py 5 locations

@@ 958-983 (lines=26) @@
955
956
class MyExtraLife(GenericNavigableComic):
957
    """Class to retrieve My Extra Life comics."""
958
    name = 'extralife'
959
    long_name = 'My Extra Life'
960
    url = 'http://www.myextralife.com'
961
    get_navi_link = get_link_rel_next
962
963
    @classmethod
964
    def get_first_comic_link(cls):
965
        """Get link to first comics."""
966
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
967
968
    @classmethod
969
    def get_comic_info(cls, soup, link):
970
        """Get information about a particular comics."""
971
        title = soup.find("h1", class_="comic_title").string
972
        date_str = soup.find("span", class_="comic_date").string
973
        day = string_to_date(date_str, "%B %d, %Y")
974
        imgs = soup.find_all("img", class_="comic")
975
        assert all(i['alt'] == i['title'] == title for i in imgs)
976
        return {
977
            'title': title,
978
            'img': [i['src'] for i in imgs if i["src"]],
979
            'day': day.day,
980
            'month': day.month,
981
            'year': day.year
982
        }
983
984
985
class SaturdayMorningBreakfastCereal(GenericNavigableComic):
986
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
@@ 386-407 (lines=22) @@
383
        }
384
385
386
class GenericLeMondeBlog(GenericNavigableComic):
387
    """Generic class to retrieve comics from Le Monde blogs."""
388
    _categories = ('LEMONDE', 'FRANCAIS')
389
    get_navi_link = get_link_rel_next
390
    get_first_comic_link = simulate_first_link
391
    first_url = NotImplemented
392
393
    @classmethod
394
    def get_comic_info(cls, soup, link):
395
        """Get information about a particular comics."""
396
        url2 = soup.find('link', rel='shortlink')['href']
397
        title = soup.find('meta', property='og:title')['content']
398
        date_str = soup.find("span", class_="entry-date").string
399
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
400
        imgs = soup.find_all('meta', property='og:image')
401
        return {
402
            'title': title,
403
            'url2': url2,
404
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
405
            'month': day.month,
406
            'year': day.year,
407
            'day': day.day,
408
        }
409
410
@@ 2374-2398 (lines=25) @@
2371
2372
class LinsEditions(GenericNavigableComic):
2373
    """Class to retrieve L.I.N.S. Editions comics."""
2374
    # Also on https://linscomics.tumblr.com
2375
    # Now on https://warandpeas.com
2376
    name = 'lins'
2377
    long_name = 'L.I.N.S. Editions'
2378
    url = 'https://linsedition.com'
2379
    _categories = ('LINS', )
2380
    get_navi_link = get_link_rel_next
2381
    get_first_comic_link = simulate_first_link
2382
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2383
2384
    @classmethod
2385
    def get_comic_info(cls, soup, link):
2386
        """Get information about a particular comics."""
2387
        title = soup.find('meta', property='og:title')['content']
2388
        imgs = soup.find_all('meta', property='og:image')
2389
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2390
        day = string_to_date(date_str, "%Y-%m-%d")
2391
        return {
2392
            'title': title,
2393
            'img': [i['content'] for i in imgs],
2394
            'month': day.month,
2395
            'year': day.year,
2396
            'day': day.day,
2397
        }
2398
2399
2400
class ThorsThundershack(GenericNavigableComic):
2401
    """Class to retrieve Thor's Thundershack comics."""
@@ 3315-3336 (lines=22) @@
3312
3313
class OffTheLeashDog(GenericNavigableComic):
3314
    """Class to retrieve Off The Leash Dog comics."""
3315
    # Also on http://rupertfawcettsdoggyblog.tumblr.com
3316
    # Also on http://www.rupertfawcettcartoons.com
3317
    name = 'offtheleash'
3318
    long_name = 'Off The Leash Dog'
3319
    url = 'http://offtheleashdogcartoons.com'
3320
    _categories = ('FAWCETT', )
3321
    get_navi_link = get_a_rel_next
3322
    get_first_comic_link = simulate_first_link
3323
    first_url = 'http://offtheleashdogcartoons.com/uncategorized/can-i-help-you/'
3324
3325
    @classmethod
3326
    def get_comic_info(cls, soup, link):
3327
        """Get information about a particular comics."""
3328
        print(link)
3329
        title = soup.find("h1", class_="entry-title").string
3330
        imgs = soup.find('div', class_='entry-content').find_all('img')
3331
        return {
3332
            'title': title,
3333
            'img': [i['src'] for i in imgs],
3334
        }
3335
3336
3337
class MarketoonistComics(GenericNavigableComic):
3338
    """Class to retrieve Marketoonist Comics."""
3339
    name = 'marketoonist'
@@ 1046-1070 (lines=25) @@
1043
1044
class Mercworks(GenericNavigableComic):
1045
    """Class to retrieve Mercworks comics."""
1046
    # Also on http://mercworks.tumblr.com
1047
    name = 'mercworks'
1048
    long_name = 'Mercworks'
1049
    url = 'http://mercworks.net'
1050
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1051
    get_navi_link = get_link_rel_next
1052
1053
    @classmethod
1054
    def get_comic_info(cls, soup, link):
1055
        """Get information about a particular comics."""
1056
        title = soup.find('meta', property='og:title')['content']
1057
        metadesc = soup.find('meta', property='og:description')
1058
        desc = metadesc['content'] if metadesc else ""
1059
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
1060
        day = string_to_date(date_str, "%Y-%m-%d")
1061
        imgs = soup.find_all('meta', property='og:image')
1062
        return {
1063
            'img': [i['content'] for i in imgs],
1064
            'title': title,
1065
            'desc': desc,
1066
            'day': day.day,
1067
            'month': day.month,
1068
            'year': day.year
1069
        }
1070
1071
1072
class BerkeleyMews(GenericListableComic):
1073
    """Class to retrieve Berkeley Mews comics."""