Code Duplication    Length = 22-26 lines in 6 locations

comics.py 6 locations

@@ 2282-2306 (lines=25) @@
2279
        }
2280
2281
2282
class LinsEditions(GenericNavigableComic):
2283
    """Class to retrieve L.I.N.S. Editions comics."""
2284
    # Also on http://linscomics.tumblr.com
2285
    # Now on https://warandpeas.com
2286
    name = 'lins'
2287
    long_name = 'L.I.N.S. Editions'
2288
    url = 'https://linsedition.com'
2289
    _categories = ('LINS', )
2290
    get_navi_link = get_link_rel_next
2291
    get_first_comic_link = simulate_first_link
2292
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2293
2294
    @classmethod
2295
    def get_comic_info(cls, soup, link):
2296
        """Get information about a particular comics."""
2297
        title = soup.find('meta', property='og:title')['content']
2298
        imgs = soup.find_all('meta', property='og:image')
2299
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2300
        day = string_to_date(date_str, "%Y-%m-%d")
2301
        return {
2302
            'title': title,
2303
            'img': [i['content'] for i in imgs],
2304
            'month': day.month,
2305
            'year': day.year,
2306
            'day': day.day,
2307
        }
2308
2309
@@ 349-371 (lines=23) @@
346
        return []
347
348
349
class ExtraFabulousComics(GenericNavigableComic):
350
    """Class to retrieve Extra Fabulous Comics."""
351
    name = 'efc'
352
    long_name = 'Extra Fabulous Comics'
353
    url = 'http://extrafabulouscomics.com'
354
    get_first_comic_link = get_a_navi_navifirst
355
    get_navi_link = get_link_rel_next
356
357
    @classmethod
358
    def get_comic_info(cls, soup, link):
359
        """Get information about a particular comics."""
360
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
361
        imgs = soup.find_all('img', src=img_src_re)
362
        title = soup.find('meta', property='og:title')['content']
363
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
364
        day = string_to_date(date_str, "%Y-%m-%d")
365
        return {
366
            'title': title,
367
            'img': [i['src'] for i in imgs],
368
            'month': day.month,
369
            'year': day.year,
370
            'day': day.day,
371
            'prefix': title + '-'
372
        }
373
374
@@ 375-396 (lines=22) @@
372
        }
373
374
375
class GenericLeMondeBlog(GenericNavigableComic):
376
    """Generic class to retrieve comics from Le Monde blogs."""
377
    _categories = ('LEMONDE', 'FRANCAIS')
378
    get_navi_link = get_link_rel_next
379
    get_first_comic_link = simulate_first_link
380
    first_url = NotImplemented
381
382
    @classmethod
383
    def get_comic_info(cls, soup, link):
384
        """Get information about a particular comics."""
385
        url2 = soup.find('link', rel='shortlink')['href']
386
        title = soup.find('meta', property='og:title')['content']
387
        date_str = soup.find("span", class_="entry-date").string
388
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
389
        imgs = soup.find_all('meta', property='og:image')
390
        return {
391
            'title': title,
392
            'url2': url2,
393
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
394
            'month': day.month,
395
            'year': day.year,
396
            'day': day.day,
397
        }
398
399
@@ 1809-1834 (lines=26) @@
1806
        }
1807
1808
1809
class SafelyEndangered(GenericNavigableComic):
1810
    """Class to retrieve Safely Endangered comics."""
1811
    # Also on http://tumblr.safelyendangered.com
1812
    name = 'endangered'
1813
    long_name = 'Safely Endangered'
1814
    url = 'http://www.safelyendangered.com'
1815
    get_navi_link = get_link_rel_next
1816
    get_first_comic_link = simulate_first_link
1817
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1818
1819
    @classmethod
1820
    def get_comic_info(cls, soup, link):
1821
        """Get information about a particular comics."""
1822
        title = soup.find('h2', class_='post-title').string
1823
        date_str = soup.find('span', class_='post-date').string
1824
        day = string_to_date(date_str, '%B %d, %Y')
1825
        imgs = soup.find('div', id='comic').find_all('img')
1826
        alt = imgs[0]['alt']
1827
        assert all(i['alt'] == i['title'] for i in imgs)
1828
        return {
1829
            'day': day.day,
1830
            'month': day.month,
1831
            'year': day.year,
1832
            'img': [i['src'] for i in imgs],
1833
            'title': title,
1834
            'alt': alt,
1835
        }
1836
1837
@@ 919-944 (lines=26) @@
916
        }
917
918
919
class MyExtraLife(GenericNavigableComic):
920
    """Class to retrieve My Extra Life comics."""
921
    name = 'extralife'
922
    long_name = 'My Extra Life'
923
    url = 'http://www.myextralife.com'
924
    get_navi_link = get_link_rel_next
925
926
    @classmethod
927
    def get_first_comic_link(cls):
928
        """Get link to first comics."""
929
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
930
931
    @classmethod
932
    def get_comic_info(cls, soup, link):
933
        """Get information about a particular comics."""
934
        title = soup.find("h1", class_="comic_title").string
935
        date_str = soup.find("span", class_="comic_date").string
936
        day = string_to_date(date_str, "%B %d, %Y")
937
        imgs = soup.find_all("img", class_="comic")
938
        assert all(i['alt'] == i['title'] == title for i in imgs)
939
        return {
940
            'title': title,
941
            'img': [i['src'] for i in imgs if i["src"]],
942
            'day': day.day,
943
            'month': day.month,
944
            'year': day.year
945
        }
946
947
@@ 3191-3212 (lines=22) @@
3188
        }
3189
3190
3191
class MarketoonistComics(GenericNavigableComic):
3192
    """Class to retrieve Marketoonist Comics."""
3193
    name = 'marketoonist'
3194
    long_name = 'Marketoonist'
3195
    url = 'https://marketoonist.com/cartoons'
3196
    get_first_comic_link = simulate_first_link
3197
    get_navi_link = get_link_rel_next
3198
    first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'
3199
3200
    @classmethod
3201
    def get_comic_info(cls, soup, link):
3202
        """Get information about a particular comics."""
3203
        imgs = soup.find_all('meta', property='og:image')
3204
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3205
        day = string_to_date(date_str, "%Y-%m-%d")
3206
        title = soup.find('meta', property='og:title')['content']
3207
        return {
3208
            'img': [i['content'] for i in imgs],
3209
            'day': day.day,
3210
            'month': day.month,
3211
            'year': day.year,
3212
            'title': title,
3213
        }
3214
3215