Code Duplication    Length = 22-26 lines in 7 locations

comics.py 7 locations

@@ 2429-2453 (lines=25) @@
2426
2427
2428
class LinsEditions(GenericNavigableComic):
2429
    """Class to retrieve L.I.N.S. Editions comics."""
2430
    # Also on https://linscomics.tumblr.com
2431
    # Now on https://warandpeas.com
2432
    name = 'lins'
2433
    long_name = 'L.I.N.S. Editions'
2434
    url = 'https://linsedition.com'
2435
    _categories = ('LINS', )
2436
    get_navi_link = get_link_rel_next
2437
    get_first_comic_link = simulate_first_link
2438
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2439
2440
    @classmethod
2441
    def get_comic_info(cls, soup, link):
2442
        """Get information about a particular comics."""
2443
        title = soup.find('meta', property='og:title')['content']
2444
        imgs = soup.find_all('meta', property='og:image')
2445
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2446
        day = string_to_date(date_str, "%Y-%m-%d")
2447
        return {
2448
            'title': title,
2449
            'img': [i['content'] for i in imgs],
2450
            'month': day.month,
2451
            'year': day.year,
2452
            'day': day.day,
2453
        }
2454
2455
2456
class ThorsThundershack(GenericNavigableComic):
@@ 1073-1097 (lines=25) @@
1070
            'img': [i['content'] for i in imgs],
1071
        }
1072
1073
1074
class Mercworks(GenericNavigableComic):
1075
    """Class to retrieve Mercworks comics."""
1076
    # Also on http://mercworks.tumblr.com
1077
    name = 'mercworks'
1078
    long_name = 'Mercworks'
1079
    url = 'http://mercworks.net'
1080
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1081
    get_navi_link = get_link_rel_next
1082
1083
    @classmethod
1084
    def get_comic_info(cls, soup, link):
1085
        """Get information about a particular comics."""
1086
        title = soup.find('meta', property='og:title')['content']
1087
        metadesc = soup.find('meta', property='og:description')
1088
        desc = metadesc['content'] if metadesc else ""
1089
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
1090
        day = string_to_date(date_str, "%Y-%m-%d")
1091
        imgs = soup.find_all('meta', property='og:image')
1092
        return {
1093
            'img': [i['content'] for i in imgs],
1094
            'title': title,
1095
            'desc': desc,
1096
            'day': day.day,
1097
            'month': day.month,
1098
            'year': day.year
1099
        }
1100
@@ 3425-3446 (lines=22) @@
3422
3423
    @classmethod
3424
    def get_first_comic_link(cls):
3425
        """Get link to first comics."""
3426
        return get_soup_at_url(cls.url).find('a', class_='first')
3427
3428
    @classmethod
3429
    def get_navi_link(cls, last_soup, next_):
3430
        """Get link to next or previous comic."""
3431
        return last_soup.find('a', class_='next' if next_ else 'prev')
3432
3433
    @classmethod
3434
    def get_comic_info(cls, soup, link):
3435
        """Get information about a particular comics."""
3436
        title = soup.find('meta', property='og:title')['content']
3437
        date_str = soup.find('time')["datetime"]
3438
        day = string_to_date(date_str, "%Y-%m-%d")
3439
        imgs = soup.find_all('meta', property='og:image')
3440
        return {
3441
            'title': title,
3442
            'img': [i['content'] for i in imgs],
3443
            'day': day.day,
3444
            'month': day.month,
3445
            'year': day.year,
3446
        }
3447
3448
3449
class TuMourrasMoinsBete(GenericNavigableComic):
@@ 415-436 (lines=22) @@
412
            'prefix': title + '-'
413
        }
414
415
416
class GenericLeMondeBlog(GenericNavigableComic):
417
    """Generic class to retrieve comics from Le Monde blogs."""
418
    _categories = ('LEMONDE', 'FRANCAIS')
419
    get_navi_link = get_link_rel_next
420
    get_first_comic_link = simulate_first_link
421
    first_url = NotImplemented
422
423
    @classmethod
424
    def get_comic_info(cls, soup, link):
425
        """Get information about a particular comics."""
426
        url2 = soup.find('link', rel='shortlink')['href']
427
        title = soup.find('meta', property='og:title')['content']
428
        date_str = soup.find("span", class_="entry-date").string
429
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
430
        imgs = soup.find_all('meta', property='og:image')
431
        return {
432
            'title': title,
433
            'url2': url2,
434
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
435
            'month': day.month,
436
            'year': day.year,
437
            'day': day.day,
438
        }
439
@@ 1859-1884 (lines=26) @@
1856
1857
1858
class SafelyEndangered(GenericNavigableComic):
1859
    """Class to retrieve Safely Endangered comics."""
1860
    # Also on http://tumblr.safelyendangered.com
1861
    name = 'endangered'
1862
    long_name = 'Safely Endangered'
1863
    url = 'http://www.safelyendangered.com'
1864
    get_navi_link = get_link_rel_next
1865
    get_first_comic_link = simulate_first_link
1866
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1867
1868
    @classmethod
1869
    def get_comic_info(cls, soup, link):
1870
        """Get information about a particular comics."""
1871
        title = soup.find('h2', class_='post-title').string
1872
        date_str = soup.find('span', class_='post-date').string
1873
        day = string_to_date(date_str, '%B %d, %Y')
1874
        imgs = soup.find('div', id='comic').find_all('img')
1875
        alt = imgs[0]['alt']
1876
        assert all(i['alt'] == i['title'] for i in imgs)
1877
        return {
1878
            'day': day.day,
1879
            'month': day.month,
1880
            'year': day.year,
1881
            'img': [i['src'] for i in imgs],
1882
            'title': title,
1883
            'alt': alt,
1884
        }
1885
1886
1887
class PicturesInBoxes(GenericNavigableComic):
@@ 985-1010 (lines=26) @@
982
            'author': author,
983
        }
984
985
986
class MyExtraLife(GenericNavigableComic):
987
    """Class to retrieve My Extra Life comics."""
988
    name = 'extralife'
989
    long_name = 'My Extra Life'
990
    url = 'http://www.myextralife.com'
991
    get_navi_link = get_link_rel_next
992
993
    @classmethod
994
    def get_first_comic_link(cls):
995
        """Get link to first comics."""
996
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
997
998
    @classmethod
999
    def get_comic_info(cls, soup, link):
1000
        """Get information about a particular comics."""
1001
        title = soup.find("h1", class_="comic_title").string
1002
        date_str = soup.find("span", class_="comic_date").string
1003
        day = string_to_date(date_str, "%B %d, %Y")
1004
        imgs = soup.find_all("img", class_="comic")
1005
        assert all(i['alt'] == i['title'] == title for i in imgs)
1006
        return {
1007
            'title': title,
1008
            'img': [i['src'] for i in imgs if i["src"]],
1009
            'day': day.day,
1010
            'month': day.month,
1011
            'year': day.year
1012
        }
1013
@@ 2330-2354 (lines=25) @@
2327
2328
2329
class JuliasDrawings(GenericListableComic):
2330
    """Class to retrieve Julia's Drawings."""
2331
    name = 'julia'
2332
    long_name = "Julia's Drawings"
2333
    url = 'https://drawings.jvns.ca'
2334
    get_url_from_archive_element = get_href
2335
2336
    @classmethod
2337
    def get_archive_elements(cls):
2338
        articles = get_soup_at_url(cls.url).find_all('article', class_='li post')
2339
        return [art.find('a') for art in reversed(articles)]
2340
2341
    @classmethod
2342
    def get_comic_info(cls, soup, archive_elt):
2343
        """Get information about a particular comics."""
2344
        date_str = soup.find('meta', property='og:article:published_time')['content'][:10]
2345
        day = string_to_date(date_str, "%Y-%m-%d")
2346
        title = soup.find('h3', class_='p-post-title').string
2347
        imgs = soup.find('section', class_='post-content').find_all('img')
2348
        return {
2349
            'title': title,
2350
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2351
            'month': day.month,
2352
            'year': day.year,
2353
            'day': day.day,
2354
        }
2355
2356
2357
class AnythingComic(GenericListableComic):