Code Duplication    Length = 22-26 lines in 6 locations

comics.py 6 locations

@@ 958-983 (lines=26) @@
955
956
class MyExtraLife(GenericNavigableComic):
957
    """Class to retrieve My Extra Life comics."""
958
    name = 'extralife'
959
    long_name = 'My Extra Life'
960
    url = 'http://www.myextralife.com'
961
    get_navi_link = get_link_rel_next
962
963
    @classmethod
964
    def get_first_comic_link(cls):
965
        """Get link to first comics."""
966
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
967
968
    @classmethod
969
    def get_comic_info(cls, soup, link):
970
        """Get information about a particular comics."""
971
        title = soup.find("h1", class_="comic_title").string
972
        date_str = soup.find("span", class_="comic_date").string
973
        day = string_to_date(date_str, "%B %d, %Y")
974
        imgs = soup.find_all("img", class_="comic")
975
        assert all(i['alt'] == i['title'] == title for i in imgs)
976
        return {
977
            'title': title,
978
            'img': [i['src'] for i in imgs if i["src"]],
979
            'day': day.day,
980
            'month': day.month,
981
            'year': day.year
982
        }
983
984
985
class SaturdayMorningBreakfastCereal(GenericNavigableComic):
986
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
@@ 386-407 (lines=22) @@
383
        }
384
385
386
class GenericLeMondeBlog(GenericNavigableComic):
387
    """Generic class to retrieve comics from Le Monde blogs."""
388
    _categories = ('LEMONDE', 'FRANCAIS')
389
    get_navi_link = get_link_rel_next
390
    get_first_comic_link = simulate_first_link
391
    first_url = NotImplemented
392
393
    @classmethod
394
    def get_comic_info(cls, soup, link):
395
        """Get information about a particular comics."""
396
        url2 = soup.find('link', rel='shortlink')['href']
397
        title = soup.find('meta', property='og:title')['content']
398
        date_str = soup.find("span", class_="entry-date").string
399
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
400
        imgs = soup.find_all('meta', property='og:image')
401
        return {
402
            'title': title,
403
            'url2': url2,
404
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
405
            'month': day.month,
406
            'year': day.year,
407
            'day': day.day,
408
        }
409
410
@@ 2374-2398 (lines=25) @@
2371
2372
class LonnieMillsap(GenericNavigableComic):
2373
    """Class to retrieve Lonnie Millsap's comics."""
2374
    name = 'millsap'
2375
    long_name = 'Lonnie Millsap'
2376
    url = 'http://www.lonniemillsap.com'
2377
    get_navi_link = get_link_rel_next
2378
    get_first_comic_link = simulate_first_link
2379
    first_url = 'http://www.lonniemillsap.com/?p=42'
2380
2381
    @classmethod
2382
    def get_comic_info(cls, soup, link):
2383
        """Get information about a particular comics."""
2384
        title = soup.find('h2', class_='post-title').string
2385
        post = soup.find('div', class_='post-content')
2386
        author = post.find("span", class_="post-author").find("a").string
2387
        date_str = post.find("span", class_="post-date").string
2388
        day = string_to_date(date_str, "%B %d, %Y")
2389
        imgs = post.find("div", class_="entry").find_all("img")
2390
        return {
2391
            'title': title,
2392
            'author': author,
2393
            'img': [i['src'] for i in imgs],
2394
            'month': day.month,
2395
            'year': day.year,
2396
            'day': day.day,
2397
        }
2398
2399
2400
class LinsEditions(GenericNavigableComic):
2401
    """Class to retrieve L.I.N.S. Editions comics."""
@@ 3315-3336 (lines=22) @@
3312
    def get_nav(cls, soup):
3313
        """Get the navigation elements from soup object."""
3314
        cnav = soup.find_all(class_='cnav')
3315
        nav1, nav2 = cnav[:5], cnav[5:]
3316
        assert nav1 == nav2
3317
        # begin, prev, archive, next_, end = nav1
3318
        return [None if i.get('href') is None else i for i in nav1]
3319
3320
    @classmethod
3321
    def get_first_comic_link(cls):
3322
        """Get link to first comics."""
3323
        return cls.get_nav(get_soup_at_url(cls.url))[0]
3324
3325
    @classmethod
3326
    def get_navi_link(cls, last_soup, next_):
3327
        """Get link to next or previous comic."""
3328
        return cls.get_nav(last_soup)[3 if next_ else 1]
3329
3330
    @classmethod
3331
    def get_comic_info(cls, soup, link):
3332
        """Get information about a particular comics."""
3333
        title = link['title']
3334
        imgs = soup.find_all('img', id='comicimg')
3335
        return {
3336
            'title': title,
3337
            'img': [i['src'] for i in imgs],
3338
        }
3339
@@ 1046-1070 (lines=25) @@
1043
1044
class Mercworks(GenericNavigableComic):
1045
    """Class to retrieve Mercworks comics."""
1046
    # Also on http://mercworks.tumblr.com
1047
    name = 'mercworks'
1048
    long_name = 'Mercworks'
1049
    url = 'http://mercworks.net'
1050
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1051
    get_navi_link = get_link_rel_next
1052
1053
    @classmethod
1054
    def get_comic_info(cls, soup, link):
1055
        """Get information about a particular comics."""
1056
        title = soup.find('meta', property='og:title')['content']
1057
        metadesc = soup.find('meta', property='og:description')
1058
        desc = metadesc['content'] if metadesc else ""
1059
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
1060
        day = string_to_date(date_str, "%Y-%m-%d")
1061
        imgs = soup.find_all('meta', property='og:image')
1062
        return {
1063
            'img': [i['content'] for i in imgs],
1064
            'title': title,
1065
            'desc': desc,
1066
            'day': day.day,
1067
            'month': day.month,
1068
            'year': day.year
1069
        }
1070
1071
1072
class BerkeleyMews(GenericListableComic):
1073
    """Class to retrieve Berkeley Mews comics."""
@@ 2301-2325 (lines=25) @@
2298
        }
2299
2300
2301
class JuliasDrawings(GenericListableComic):
2302
    """Class to retrieve Julia's Drawings."""
2303
    name = 'julia'
2304
    long_name = "Julia's Drawings"
2305
    url = 'https://drawings.jvns.ca'
2306
    get_url_from_archive_element = get_href
2307
2308
    @classmethod
2309
    def get_archive_elements(cls):
2310
        articles = get_soup_at_url(cls.url).find_all('article', class_='li post')
2311
        return [art.find('a') for art in reversed(articles)]
2312
2313
    @classmethod
2314
    def get_comic_info(cls, soup, archive_elt):
2315
        """Get information about a particular comics."""
2316
        date_str = soup.find('meta', property='og:article:published_time')['content'][:10]
2317
        day = string_to_date(date_str, "%Y-%m-%d")
2318
        title = soup.find('h3', class_='p-post-title').string
2319
        imgs = soup.find('section', class_='post-content').find_all('img')
2320
        return {
2321
            'title': title,
2322
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2323
            'month': day.month,
2324
            'year': day.year,
2325
            'day': day.day,
2326
        }
2327
2328