Code Duplication    Length = 22-26 lines in 7 locations

comics.py 7 locations

@@ 2285-2309 (lines=25) @@
2282
    # Now on https://warandpeas.com
2283
    name = 'lins'
2284
    long_name = 'L.I.N.S. Editions'
2285
    url = 'https://linsedition.com'
2286
    _categories = ('LINS', )
2287
    get_navi_link = get_link_rel_next
2288
    get_first_comic_link = simulate_first_link
2289
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2290
2291
    @classmethod
2292
    def get_comic_info(cls, soup, link):
2293
        """Get information about a particular comics."""
2294
        title = soup.find('meta', property='og:title')['content']
2295
        imgs = soup.find_all('meta', property='og:image')
2296
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2297
        day = string_to_date(date_str, "%Y-%m-%d")
2298
        return {
2299
            'title': title,
2300
            'img': [i['content'] for i in imgs],
2301
            'month': day.month,
2302
            'year': day.year,
2303
            'day': day.day,
2304
        }
2305
2306
2307
class ThorsThundershack(GenericNavigableComic):
2308
    """Class to retrieve Thor's Thundershack comics."""
2309
    # Also on http://tapastic.com/series/Thors-Thundershac
2310
    name = 'thor'
2311
    long_name = 'Thor\'s Thundershack'
2312
    url = 'http://www.thorsthundershack.com'
@@ 1020-1044 (lines=25) @@
1017
        }
1018
1019
1020
class Mercworks(GenericNavigableComic):
1021
    """Class to retrieve Mercworks comics."""
1022
    # Also on http://mercworks.tumblr.com
1023
    name = 'mercworks'
1024
    long_name = 'Mercworks'
1025
    url = 'http://mercworks.net'
1026
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1027
    get_navi_link = get_link_rel_next
1028
1029
    @classmethod
1030
    def get_comic_info(cls, soup, link):
1031
        """Get information about a particular comics."""
1032
        title = soup.find('meta', property='og:title')['content']
1033
        metadesc = soup.find('meta', property='og:description')
1034
        desc = metadesc['content'] if metadesc else ""
1035
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
1036
        day = string_to_date(date_str, "%Y-%m-%d")
1037
        imgs = soup.find_all('meta', property='og:image')
1038
        return {
1039
            'img': [i['content'] for i in imgs],
1040
            'title': title,
1041
            'desc': desc,
1042
            'day': day.day,
1043
            'month': day.month,
1044
            'year': day.year
1045
        }
1046
1047
@@ 355-377 (lines=23) @@
352
        return []
353
354
355
class ExtraFabulousComics(GenericNavigableComic):
356
    """Class to retrieve Extra Fabulous Comics."""
357
    name = 'efc'
358
    long_name = 'Extra Fabulous Comics'
359
    url = 'http://extrafabulouscomics.com'
360
    get_first_comic_link = get_a_navi_navifirst
361
    get_navi_link = get_link_rel_next
362
363
    @classmethod
364
    def get_comic_info(cls, soup, link):
365
        """Get information about a particular comics."""
366
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
367
        imgs = soup.find_all('img', src=img_src_re)
368
        title = soup.find('meta', property='og:title')['content']
369
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
370
        day = string_to_date(date_str, "%Y-%m-%d")
371
        return {
372
            'title': title,
373
            'img': [i['src'] for i in imgs],
374
            'month': day.month,
375
            'year': day.year,
376
            'day': day.day,
377
            'prefix': title + '-'
378
        }
379
380
@@ 3194-3215 (lines=22) @@
3191
    long_name = 'Marketoonist'
3192
    url = 'https://marketoonist.com/cartoons'
3193
    get_first_comic_link = simulate_first_link
3194
    get_navi_link = get_link_rel_next
3195
    first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'
3196
3197
    @classmethod
3198
    def get_comic_info(cls, soup, link):
3199
        """Get information about a particular comics."""
3200
        imgs = soup.find_all('meta', property='og:image')
3201
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3202
        day = string_to_date(date_str, "%Y-%m-%d")
3203
        title = soup.find('meta', property='og:title')['content']
3204
        return {
3205
            'img': [i['content'] for i in imgs],
3206
            'day': day.day,
3207
            'month': day.month,
3208
            'year': day.year,
3209
            'title': title,
3210
        }
3211
3212
3213
class ConsoliaComics(GenericNavigableComic):
3214
    """Class to retrieve Consolia comics."""
3215
    name = 'consolia'
3216
    long_name = 'consolia'
3217
    url = 'https://consolia-comic.com'
3218
    get_url_from_link = join_cls_url_to_href
@@ 1806-1831 (lines=26) @@
1803
        }
1804
1805
1806
class SafelyEndangered(GenericNavigableComic):
1807
    """Class to retrieve Safely Endangered comics."""
1808
    # Also on http://tumblr.safelyendangered.com
1809
    name = 'endangered'
1810
    long_name = 'Safely Endangered'
1811
    url = 'http://www.safelyendangered.com'
1812
    get_navi_link = get_link_rel_next
1813
    get_first_comic_link = simulate_first_link
1814
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1815
1816
    @classmethod
1817
    def get_comic_info(cls, soup, link):
1818
        """Get information about a particular comics."""
1819
        title = soup.find('h2', class_='post-title').string
1820
        date_str = soup.find('span', class_='post-date').string
1821
        day = string_to_date(date_str, '%B %d, %Y')
1822
        imgs = soup.find('div', id='comic').find_all('img')
1823
        alt = imgs[0]['alt']
1824
        assert all(i['alt'] == i['title'] for i in imgs)
1825
        return {
1826
            'day': day.day,
1827
            'month': day.month,
1828
            'year': day.year,
1829
            'img': [i['src'] for i in imgs],
1830
            'title': title,
1831
            'alt': alt,
1832
        }
1833
1834
@@ 381-402 (lines=22) @@
378
        }
379
380
381
class GenericLeMondeBlog(GenericNavigableComic):
382
    """Generic class to retrieve comics from Le Monde blogs."""
383
    _categories = ('LEMONDE', 'FRANCAIS')
384
    get_navi_link = get_link_rel_next
385
    get_first_comic_link = simulate_first_link
386
    first_url = NotImplemented
387
388
    @classmethod
389
    def get_comic_info(cls, soup, link):
390
        """Get information about a particular comics."""
391
        url2 = soup.find('link', rel='shortlink')['href']
392
        title = soup.find('meta', property='og:title')['content']
393
        date_str = soup.find("span", class_="entry-date").string
394
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
395
        imgs = soup.find_all('meta', property='og:image')
396
        return {
397
            'title': title,
398
            'url2': url2,
399
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
400
            'month': day.month,
401
            'year': day.year,
402
            'day': day.day,
403
        }
404
405
@@ 925-950 (lines=26) @@
922
        }
923
924
925
class MyExtraLife(GenericNavigableComic):
926
    """Class to retrieve My Extra Life comics."""
927
    name = 'extralife'
928
    long_name = 'My Extra Life'
929
    url = 'http://www.myextralife.com'
930
    get_navi_link = get_link_rel_next
931
932
    @classmethod
933
    def get_first_comic_link(cls):
934
        """Get link to first comics."""
935
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
936
937
    @classmethod
938
    def get_comic_info(cls, soup, link):
939
        """Get information about a particular comics."""
940
        title = soup.find("h1", class_="comic_title").string
941
        date_str = soup.find("span", class_="comic_date").string
942
        day = string_to_date(date_str, "%B %d, %Y")
943
        imgs = soup.find_all("img", class_="comic")
944
        assert all(i['alt'] == i['title'] == title for i in imgs)
945
        return {
946
            'title': title,
947
            'img': [i['src'] for i in imgs if i["src"]],
948
            'day': day.day,
949
            'month': day.month,
950
            'year': day.year
951
        }
952
953