Code Duplication    Length = 21-26 lines in 5 locations

comics.py 5 locations

@@ 899-924 (lines=26) @@
896
        }
897
898
899
class MyExtraLife(GenericNavigableComic):
900
    """Class to retrieve My Extra Life comics."""
901
    name = 'extralife'
902
    long_name = 'My Extra Life'
903
    url = 'http://www.myextralife.com'
904
    get_navi_link = get_link_rel_next
905
906
    @classmethod
907
    def get_first_comic_link(cls):
908
        """Get link to first comics."""
909
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
910
911
    @classmethod
912
    def get_comic_info(cls, soup, link):
913
        """Get information about a particular comics."""
914
        title = soup.find("h1", class_="comic_title").string
915
        date_str = soup.find("span", class_="comic_date").string
916
        day = string_to_date(date_str, "%B %d, %Y")
917
        imgs = soup.find_all("img", class_="comic")
918
        assert all(i['alt'] == i['title'] == title for i in imgs)
919
        return {
920
            'title': title,
921
            'img': [i['src'] for i in imgs if i["src"]],
922
            'day': day.day,
923
            'month': day.month,
924
            'year': day.year
925
        }
926
927
@@ 2249-2271 (lines=23) @@
2246
        }
2247
2248
2249
class LinsEditions(GenericNavigableComic):
2250
    """Class to retrieve L.I.N.S. Editions comics."""
2251
    # Also on http://linscomics.tumblr.com
2252
    name = 'lins'
2253
    long_name = 'L.I.N.S. Editions'
2254
    url = 'https://linsedition.com'
2255
    get_navi_link = get_link_rel_next
2256
    get_first_comic_link = simulate_first_link
2257
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2258
2259
    @classmethod
2260
    def get_comic_info(cls, soup, link):
2261
        """Get information about a particular comics."""
2262
        title = soup.find('meta', property='og:title')['content']
2263
        imgs = soup.find_all('meta', property='og:image')
2264
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2265
        day = string_to_date(date_str, "%Y-%m-%d")
2266
        return {
2267
            'title': title,
2268
            'img': [i['content'] for i in imgs],
2269
            'month': day.month,
2270
            'year': day.year,
2271
            'day': day.day,
2272
        }
2273
2274
@@ 338-360 (lines=23) @@
335
        return []
336
337
338
class ExtraFabulousComics(GenericNavigableComic):
339
    """Class to retrieve Extra Fabulous Comics."""
340
    name = 'efc'
341
    long_name = 'Extra Fabulous Comics'
342
    url = 'http://extrafabulouscomics.com'
343
    get_first_comic_link = get_a_navi_navifirst
344
    get_navi_link = get_link_rel_next
345
346
    @classmethod
347
    def get_comic_info(cls, soup, link):
348
        """Get information about a particular comics."""
349
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
350
        imgs = soup.find_all('img', src=img_src_re)
351
        title = soup.find('meta', property='og:title')['content']
352
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
353
        day = string_to_date(date_str, "%Y-%m-%d")
354
        return {
355
            'title': title,
356
            'img': [i['src'] for i in imgs],
357
            'month': day.month,
358
            'year': day.year,
359
            'day': day.day,
360
            'prefix': title + '-'
361
        }
362
363
@@ 364-384 (lines=21) @@
361
        }
362
363
364
class GenericLeMondeBlog(GenericNavigableComic):
365
    """Generic class to retrieve comics from Le Monde blogs."""
366
    get_navi_link = get_link_rel_next
367
    get_first_comic_link = simulate_first_link
368
    first_url = NotImplemented
369
370
    @classmethod
371
    def get_comic_info(cls, soup, link):
372
        """Get information about a particular comics."""
373
        url2 = soup.find('link', rel='shortlink')['href']
374
        title = soup.find('meta', property='og:title')['content']
375
        date_str = soup.find("span", class_="entry-date").string
376
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
377
        imgs = soup.find_all('meta', property='og:image')
378
        return {
379
            'title': title,
380
            'url2': url2,
381
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
382
            'month': day.month,
383
            'year': day.year,
384
            'day': day.day,
385
        }
386
387
@@ 2406-2430 (lines=25) @@
2403
        }
2404
2405
2406
class TheAwkwardYeti(GenericNavigableComic):
2407
    """Class to retrieve The Awkward Yeti comics."""
2408
    # Also on http://www.gocomics.com/the-awkward-yeti
2409
    # Also on http://larstheyeti.tumblr.com
2410
    # Also on https://tapastic.com/series/TheAwkwardYeti
2411
    name = 'yeti'
2412
    long_name = 'The Awkward Yeti'
2413
    url = 'http://theawkwardyeti.com'
2414
    get_first_comic_link = get_a_navi_navifirst
2415
    get_navi_link = get_link_rel_next
2416
2417
    @classmethod
2418
    def get_comic_info(cls, soup, link):
2419
        """Get information about a particular comics."""
2420
        title = soup.find('h2', class_='post-title').string
2421
        date_str = soup.find("span", class_="post-date").string
2422
        day = string_to_date(date_str, "%B %d, %Y")
2423
        imgs = soup.find("div", id="comic").find_all("img")
2424
        assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
2425
        return {
2426
            'img': [i['src'] for i in imgs],
2427
            'title': title,
2428
            'day': day.day,
2429
            'month': day.month,
2430
            'year': day.year
2431
        }
2432
2433