Code Duplication    Length = 22-26 lines in 7 locations

comics.py 7 locations

@@ 2402-2426 (lines=25) @@
2399
            'day': day.day,
2400
        }
2401
2402
2403
class LinsEditions(GenericNavigableComic):
2404
    """Class to retrieve L.I.N.S. Editions comics."""
2405
    # Also on https://linscomics.tumblr.com
2406
    # Now on https://warandpeas.com
2407
    name = 'lins'
2408
    long_name = 'L.I.N.S. Editions'
2409
    url = 'https://linsedition.com'
2410
    _categories = ('LINS', )
2411
    get_navi_link = get_link_rel_next
2412
    get_first_comic_link = simulate_first_link
2413
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2414
2415
    @classmethod
2416
    def get_comic_info(cls, soup, link):
2417
        """Get information about a particular comics."""
2418
        title = soup.find('meta', property='og:title')['content']
2419
        imgs = soup.find_all('meta', property='og:image')
2420
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2421
        day = string_to_date(date_str, "%Y-%m-%d")
2422
        return {
2423
            'title': title,
2424
            'img': [i['content'] for i in imgs],
2425
            'month': day.month,
2426
            'year': day.year,
2427
            'day': day.day,
2428
        }
2429
@@ 1046-1070 (lines=25) @@
1043
            'img': [i['content'] for i in imgs],
1044
        }
1045
1046
1047
class Mercworks(GenericNavigableComic):
1048
    """Class to retrieve Mercworks comics."""
1049
    # Also on http://mercworks.tumblr.com
1050
    name = 'mercworks'
1051
    long_name = 'Mercworks'
1052
    url = 'http://mercworks.net'
1053
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1054
    get_navi_link = get_link_rel_next
1055
1056
    @classmethod
1057
    def get_comic_info(cls, soup, link):
1058
        """Get information about a particular comics."""
1059
        title = soup.find('meta', property='og:title')['content']
1060
        metadesc = soup.find('meta', property='og:description')
1061
        desc = metadesc['content'] if metadesc else ""
1062
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
1063
        day = string_to_date(date_str, "%Y-%m-%d")
1064
        imgs = soup.find_all('meta', property='og:image')
1065
        return {
1066
            'img': [i['content'] for i in imgs],
1067
            'title': title,
1068
            'desc': desc,
1069
            'day': day.day,
1070
            'month': day.month,
1071
            'year': day.year
1072
        }
1073
@@ 360-384 (lines=25) @@
357
        return []
358
359
360
class ExtraFabulousComics(GenericNavigableComic):
361
    """Class to retrieve Extra Fabulous Comics."""
362
    # Also on https://extrafabulouscomics.tumblr.com
363
    name = 'efc'
364
    long_name = 'Extra Fabulous Comics'
365
    url = 'http://extrafabulouscomics.com'
366
    _categories = ('EFC', )
367
    get_first_comic_link = get_a_navi_navifirst
368
    get_navi_link = get_link_rel_next
369
370
    @classmethod
371
    def get_comic_info(cls, soup, link):
372
        """Get information about a particular comics."""
373
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
374
        imgs = soup.find_all('img', src=img_src_re)
375
        title = soup.find('meta', property='og:title')['content']
376
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
377
        day = string_to_date(date_str, "%Y-%m-%d")
378
        return {
379
            'title': title,
380
            'img': [i['src'] for i in imgs],
381
            'month': day.month,
382
            'year': day.year,
383
            'day': day.day,
384
            'prefix': title + '-'
385
        }
386
387
@@ 3368-3389 (lines=22) @@
3365
    def get_comic_info(cls, soup, link):
3366
        """Get information about a particular comics."""
3367
        title = link['title']
3368
        imgs = soup.find_all('img', id='comicimg')
3369
        return {
3370
            'title': title,
3371
            'img': [i['src'] for i in imgs],
3372
        }
3373
3374
3375
class OffTheLeashDog(GenericNavigableComic):
3376
    """Class to retrieve Off The Leash Dog comics."""
3377
    # Also on http://rupertfawcettsdoggyblog.tumblr.com
3378
    # Also on http://www.rupertfawcettcartoons.com
3379
    name = 'offtheleash'
3380
    long_name = 'Off The Leash Dog'
3381
    url = 'http://offtheleashdogcartoons.com'
3382
    _categories = ('FAWCETT', )
3383
    get_navi_link = get_a_rel_next
3384
    get_first_comic_link = simulate_first_link
3385
    first_url = 'http://offtheleashdogcartoons.com/uncategorized/can-i-help-you/'
3386
3387
    @classmethod
3388
    def get_comic_info(cls, soup, link):
3389
        """Get information about a particular comics."""
3390
        print(link)
3391
        title = soup.find("h1", class_="entry-title").string
3392
        imgs = soup.find('div', class_='entry-content').find_all('img')
@@ 388-409 (lines=22) @@
385
        }
386
387
388
class GenericLeMondeBlog(GenericNavigableComic):
389
    """Generic class to retrieve comics from Le Monde blogs."""
390
    _categories = ('LEMONDE', 'FRANCAIS')
391
    get_navi_link = get_link_rel_next
392
    get_first_comic_link = simulate_first_link
393
    first_url = NotImplemented
394
395
    @classmethod
396
    def get_comic_info(cls, soup, link):
397
        """Get information about a particular comics."""
398
        url2 = soup.find('link', rel='shortlink')['href']
399
        title = soup.find('meta', property='og:title')['content']
400
        date_str = soup.find("span", class_="entry-date").string
401
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
402
        imgs = soup.find_all('meta', property='og:image')
403
        return {
404
            'title': title,
405
            'url2': url2,
406
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
407
            'month': day.month,
408
            'year': day.year,
409
            'day': day.day,
410
        }
411
412
@@ 958-983 (lines=26) @@
955
        }
956
957
958
class MyExtraLife(GenericNavigableComic):
959
    """Class to retrieve My Extra Life comics."""
960
    name = 'extralife'
961
    long_name = 'My Extra Life'
962
    url = 'http://www.myextralife.com'
963
    get_navi_link = get_link_rel_next
964
965
    @classmethod
966
    def get_first_comic_link(cls):
967
        """Get link to first comics."""
968
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
969
970
    @classmethod
971
    def get_comic_info(cls, soup, link):
972
        """Get information about a particular comics."""
973
        title = soup.find("h1", class_="comic_title").string
974
        date_str = soup.find("span", class_="comic_date").string
975
        day = string_to_date(date_str, "%B %d, %Y")
976
        imgs = soup.find_all("img", class_="comic")
977
        assert all(i['alt'] == i['title'] == title for i in imgs)
978
        return {
979
            'title': title,
980
            'img': [i['src'] for i in imgs if i["src"]],
981
            'day': day.day,
982
            'month': day.month,
983
            'year': day.year
984
        }
985
986
@@ 2304-2328 (lines=25) @@
2301
        }
2302
2303
2304
class JuliasDrawings(GenericListableComic):
2305
    """Class to retrieve Julia's Drawings."""
2306
    name = 'julia'
2307
    long_name = "Julia's Drawings"
2308
    url = 'https://drawings.jvns.ca'
2309
    get_url_from_archive_element = get_href
2310
2311
    @classmethod
2312
    def get_archive_elements(cls):
2313
        articles = get_soup_at_url(cls.url).find_all('article', class_='li post')
2314
        return [art.find('a') for art in reversed(articles)]
2315
2316
    @classmethod
2317
    def get_comic_info(cls, soup, archive_elt):
2318
        """Get information about a particular comics."""
2319
        date_str = soup.find('meta', property='og:article:published_time')['content'][:10]
2320
        day = string_to_date(date_str, "%Y-%m-%d")
2321
        title = soup.find('h3', class_='p-post-title').string
2322
        imgs = soup.find('section', class_='post-content').find_all('img')
2323
        return {
2324
            'title': title,
2325
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2326
            'month': day.month,
2327
            'year': day.year,
2328
            'day': day.day,
2329
        }
2330
2331