Code Duplication    Length = 22-26 lines in 6 locations

comics.py 6 locations

@@ 2282-2306 (lines=25) @@
2279
            'title': title,
2280
            'author': author,
2281
            'img': [i['src'] for i in imgs],
2282
            'month': day.month,
2283
            'year': day.year,
2284
            'day': day.day,
2285
        }
2286
2287
2288
class LinsEditions(GenericNavigableComic):
2289
    """Class to retrieve L.I.N.S. Editions comics."""
2290
    # Also on http://linscomics.tumblr.com
2291
    # Now on https://warandpeas.com
2292
    name = 'lins'
2293
    long_name = 'L.I.N.S. Editions'
2294
    url = 'https://linsedition.com'
2295
    _categories = ('LINS', )
2296
    get_navi_link = get_link_rel_next
2297
    get_first_comic_link = simulate_first_link
2298
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2299
2300
    @classmethod
2301
    def get_comic_info(cls, soup, link):
2302
        """Get information about a particular comics."""
2303
        title = soup.find('meta', property='og:title')['content']
2304
        imgs = soup.find_all('meta', property='og:image')
2305
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2306
        day = string_to_date(date_str, "%Y-%m-%d")
2307
        return {
2308
            'title': title,
2309
            'img': [i['content'] for i in imgs],
@@ 349-371 (lines=23) @@
346
    _categories = ('EMPTY', )
347
348
    @classmethod
349
    def get_next_comic(cls, last_comic):
350
        """Implementation of get_next_comic returning no comics."""
351
        cls.log("comic is considered as empty - returning no comic")
352
        return []
353
354
355
class ExtraFabulousComics(GenericNavigableComic):
356
    """Class to retrieve Extra Fabulous Comics."""
357
    name = 'efc'
358
    long_name = 'Extra Fabulous Comics'
359
    url = 'http://extrafabulouscomics.com'
360
    get_first_comic_link = get_a_navi_navifirst
361
    get_navi_link = get_link_rel_next
362
363
    @classmethod
364
    def get_comic_info(cls, soup, link):
365
        """Get information about a particular comics."""
366
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
367
        imgs = soup.find_all('img', src=img_src_re)
368
        title = soup.find('meta', property='og:title')['content']
369
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
370
        day = string_to_date(date_str, "%Y-%m-%d")
371
        return {
372
            'title': title,
373
            'img': [i['src'] for i in imgs],
374
            'month': day.month,
@@ 375-396 (lines=22) @@
372
            'title': title,
373
            'img': [i['src'] for i in imgs],
374
            'month': day.month,
375
            'year': day.year,
376
            'day': day.day,
377
            'prefix': title + '-'
378
        }
379
380
381
class GenericLeMondeBlog(GenericNavigableComic):
382
    """Generic class to retrieve comics from Le Monde blogs."""
383
    _categories = ('LEMONDE', 'FRANCAIS')
384
    get_navi_link = get_link_rel_next
385
    get_first_comic_link = simulate_first_link
386
    first_url = NotImplemented
387
388
    @classmethod
389
    def get_comic_info(cls, soup, link):
390
        """Get information about a particular comics."""
391
        url2 = soup.find('link', rel='shortlink')['href']
392
        title = soup.find('meta', property='og:title')['content']
393
        date_str = soup.find("span", class_="entry-date").string
394
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
395
        imgs = soup.find_all('meta', property='og:image')
396
        return {
397
            'title': title,
398
            'url2': url2,
399
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
@@ 1809-1834 (lines=26) @@
1806
            'title': title,
1807
            'author': author,
1808
            'day': day.day,
1809
            'month': day.month,
1810
            'year': day.year,
1811
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1812
        }
1813
1814
1815
class SafelyEndangered(GenericNavigableComic):
1816
    """Class to retrieve Safely Endangered comics."""
1817
    # Also on http://tumblr.safelyendangered.com
1818
    name = 'endangered'
1819
    long_name = 'Safely Endangered'
1820
    url = 'http://www.safelyendangered.com'
1821
    get_navi_link = get_link_rel_next
1822
    get_first_comic_link = simulate_first_link
1823
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1824
1825
    @classmethod
1826
    def get_comic_info(cls, soup, link):
1827
        """Get information about a particular comics."""
1828
        title = soup.find('h2', class_='post-title').string
1829
        date_str = soup.find('span', class_='post-date').string
1830
        day = string_to_date(date_str, '%B %d, %Y')
1831
        imgs = soup.find('div', id='comic').find_all('img')
1832
        alt = imgs[0]['alt']
1833
        assert all(i['alt'] == i['title'] for i in imgs)
1834
        return {
1835
            'day': day.day,
1836
            'month': day.month,
1837
            'year': day.year,
@@ 919-944 (lines=26) @@
916
            'img': [i['src'] for i in imgs],
917
            'title': title,
918
            'author': author,
919
            'month': day.month,
920
            'year': day.year,
921
            'day': day.day,
922
        }
923
924
925
class MyExtraLife(GenericNavigableComic):
926
    """Class to retrieve My Extra Life comics."""
927
    name = 'extralife'
928
    long_name = 'My Extra Life'
929
    url = 'http://www.myextralife.com'
930
    get_navi_link = get_link_rel_next
931
932
    @classmethod
933
    def get_first_comic_link(cls):
934
        """Get link to first comics."""
935
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
936
937
    @classmethod
938
    def get_comic_info(cls, soup, link):
939
        """Get information about a particular comics."""
940
        title = soup.find("h1", class_="comic_title").string
941
        date_str = soup.find("span", class_="comic_date").string
942
        day = string_to_date(date_str, "%B %d, %Y")
943
        imgs = soup.find_all("img", class_="comic")
944
        assert all(i['alt'] == i['title'] == title for i in imgs)
945
        return {
946
            'title': title,
947
            'img': [i['src'] for i in imgs if i["src"]],
@@ 3191-3212 (lines=22) @@
3188
        """Get information about a particular comics."""
3189
        title = link['title']
3190
        imgs = soup.find_all('img', id='comicimg')
3191
        return {
3192
            'title': title,
3193
            'img': [i['src'] for i in imgs],
3194
        }
3195
3196
3197
class MarketoonistComics(GenericNavigableComic):
3198
    """Class to retrieve Marketoonist Comics."""
3199
    name = 'marketoonist'
3200
    long_name = 'Marketoonist'
3201
    url = 'https://marketoonist.com/cartoons'
3202
    get_first_comic_link = simulate_first_link
3203
    get_navi_link = get_link_rel_next
3204
    first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'
3205
3206
    @classmethod
3207
    def get_comic_info(cls, soup, link):
3208
        """Get information about a particular comics."""
3209
        imgs = soup.find_all('meta', property='og:image')
3210
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3211
        day = string_to_date(date_str, "%Y-%m-%d")
3212
        title = soup.find('meta', property='og:title')['content']
3213
        return {
3214
            'img': [i['content'] for i in imgs],
3215
            'day': day.day,