Code Duplication    Length = 21-26 lines in 5 locations

comics.py 5 locations

@@ 338-360 (lines=23) @@
335
        return []
336
337
338
class ExtraFabulousComics(GenericNavigableComic):
339
    """Class to retrieve Extra Fabulous Comics."""
340
    name = 'efc'
341
    long_name = 'Extra Fabulous Comics'
342
    url = 'http://extrafabulouscomics.com'
343
    get_first_comic_link = get_a_navi_navifirst
344
    get_navi_link = get_link_rel_next
345
346
    @classmethod
347
    def get_comic_info(cls, soup, link):
348
        """Get information about a particular comics."""
349
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
350
        imgs = soup.find_all('img', src=img_src_re)
351
        title = soup.find('meta', property='og:title')['content']
352
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
353
        day = string_to_date(date_str, "%Y-%m-%d")
354
        return {
355
            'title': title,
356
            'img': [i['src'] for i in imgs],
357
            'month': day.month,
358
            'year': day.year,
359
            'day': day.day,
360
            'prefix': title + '-'
361
        }
362
363
@@ 364-384 (lines=21) @@
361
        }
362
363
364
class GenericLeMondeBlog(GenericNavigableComic):
365
    """Generic class to retrieve comics from Le Monde blogs."""
366
    get_navi_link = get_link_rel_next
367
    get_first_comic_link = simulate_first_link
368
    first_url = NotImplemented
369
370
    @classmethod
371
    def get_comic_info(cls, soup, link):
372
        """Get information about a particular comics."""
373
        url2 = soup.find('link', rel='shortlink')['href']
374
        title = soup.find('meta', property='og:title')['content']
375
        date_str = soup.find("span", class_="entry-date").string
376
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
377
        imgs = soup.find_all('meta', property='og:image')
378
        return {
379
            'title': title,
380
            'url2': url2,
381
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
382
            'month': day.month,
383
            'year': day.year,
384
            'day': day.day,
385
        }
386
387
@@ 900-925 (lines=26) @@
897
        }
898
899
900
class MyExtraLife(GenericNavigableComic):
901
    """Class to retrieve My Extra Life comics."""
902
    name = 'extralife'
903
    long_name = 'My Extra Life'
904
    url = 'http://www.myextralife.com'
905
    get_navi_link = get_link_rel_next
906
907
    @classmethod
908
    def get_first_comic_link(cls):
909
        """Get link to first comics."""
910
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
911
912
    @classmethod
913
    def get_comic_info(cls, soup, link):
914
        """Get information about a particular comics."""
915
        title = soup.find("h1", class_="comic_title").string
916
        date_str = soup.find("span", class_="comic_date").string
917
        day = string_to_date(date_str, "%B %d, %Y")
918
        imgs = soup.find_all("img", class_="comic")
919
        assert all(i['alt'] == i['title'] == title for i in imgs)
920
        return {
921
            'title': title,
922
            'img': [i['src'] for i in imgs if i["src"]],
923
            'day': day.day,
924
            'month': day.month,
925
            'year': day.year
926
        }
927
928
@@ 2250-2272 (lines=23) @@
2247
        }
2248
2249
2250
class LinsEditions(GenericNavigableComic):
2251
    """Class to retrieve L.I.N.S. Editions comics."""
2252
    # Also on http://linscomics.tumblr.com
2253
    name = 'lins'
2254
    long_name = 'L.I.N.S. Editions'
2255
    url = 'https://linsedition.com'
2256
    get_navi_link = get_link_rel_next
2257
    get_first_comic_link = simulate_first_link
2258
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2259
2260
    @classmethod
2261
    def get_comic_info(cls, soup, link):
2262
        """Get information about a particular comics."""
2263
        title = soup.find('meta', property='og:title')['content']
2264
        imgs = soup.find_all('meta', property='og:image')
2265
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2266
        day = string_to_date(date_str, "%Y-%m-%d")
2267
        return {
2268
            'title': title,
2269
            'img': [i['content'] for i in imgs],
2270
            'month': day.month,
2271
            'year': day.year,
2272
            'day': day.day,
2273
        }
2274
2275
@@ 2407-2431 (lines=25) @@
2404
        }
2405
2406
2407
class TheAwkwardYeti(GenericNavigableComic):
2408
    """Class to retrieve The Awkward Yeti comics."""
2409
    # Also on http://www.gocomics.com/the-awkward-yeti
2410
    # Also on http://larstheyeti.tumblr.com
2411
    # Also on https://tapastic.com/series/TheAwkwardYeti
2412
    name = 'yeti'
2413
    long_name = 'The Awkward Yeti'
2414
    url = 'http://theawkwardyeti.com'
2415
    get_first_comic_link = get_a_navi_navifirst
2416
    get_navi_link = get_link_rel_next
2417
2418
    @classmethod
2419
    def get_comic_info(cls, soup, link):
2420
        """Get information about a particular comics."""
2421
        title = soup.find('h2', class_='post-title').string
2422
        date_str = soup.find("span", class_="post-date").string
2423
        day = string_to_date(date_str, "%B %d, %Y")
2424
        imgs = soup.find("div", id="comic").find_all("img")
2425
        assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
2426
        return {
2427
            'img': [i['src'] for i in imgs],
2428
            'title': title,
2429
            'day': day.day,
2430
            'month': day.month,
2431
            'year': day.year
2432
        }
2433
2434