Code Duplication    Length = 22-26 lines in 8 locations

comics.py 8 locations

@@ 2353-2377 (lines=25) @@
2350
            'day': day.day,
2351
        }
2352
2353
2354
class LinsEditions(GenericNavigableComic):
2355
    """Class to retrieve L.I.N.S. Editions comics."""
2356
    # Also on http://linscomics.tumblr.com
2357
    # Now on https://warandpeas.com
2358
    name = 'lins'
2359
    long_name = 'L.I.N.S. Editions'
2360
    url = 'https://linsedition.com'
2361
    _categories = ('LINS', )
2362
    get_navi_link = get_link_rel_next
2363
    get_first_comic_link = simulate_first_link
2364
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2365
2366
    @classmethod
2367
    def get_comic_info(cls, soup, link):
2368
        """Get information about a particular comics."""
2369
        title = soup.find('meta', property='og:title')['content']
2370
        imgs = soup.find_all('meta', property='og:image')
2371
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2372
        day = string_to_date(date_str, "%Y-%m-%d")
2373
        return {
2374
            'title': title,
2375
            'img': [i['content'] for i in imgs],
2376
            'month': day.month,
2377
            'year': day.year,
2378
            'day': day.day,
2379
        }
2380
@@ 1049-1073 (lines=25) @@
1046
            'prefix': '%d-' % num,
1047
        }
1048
1049
1050
class Mercworks(GenericNavigableComic):
1051
    """Class to retrieve Mercworks comics."""
1052
    # Also on http://mercworks.tumblr.com
1053
    name = 'mercworks'
1054
    long_name = 'Mercworks'
1055
    url = 'http://mercworks.net'
1056
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1057
    get_navi_link = get_link_rel_next
1058
1059
    @classmethod
1060
    def get_comic_info(cls, soup, link):
1061
        """Get information about a particular comics."""
1062
        title = soup.find('meta', property='og:title')['content']
1063
        metadesc = soup.find('meta', property='og:description')
1064
        desc = metadesc['content'] if metadesc else ""
1065
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
1066
        day = string_to_date(date_str, "%Y-%m-%d")
1067
        imgs = soup.find_all('meta', property='og:image')
1068
        return {
1069
            'img': [i['content'] for i in imgs],
1070
            'title': title,
1071
            'desc': desc,
1072
            'day': day.day,
1073
            'month': day.month,
1074
            'year': day.year
1075
        }
1076
@@ 357-379 (lines=23) @@
354
        return []
355
356
357
class ExtraFabulousComics(GenericNavigableComic):
358
    """Class to retrieve Extra Fabulous Comics."""
359
    name = 'efc'
360
    long_name = 'Extra Fabulous Comics'
361
    url = 'http://extrafabulouscomics.com'
362
    get_first_comic_link = get_a_navi_navifirst
363
    get_navi_link = get_link_rel_next
364
365
    @classmethod
366
    def get_comic_info(cls, soup, link):
367
        """Get information about a particular comics."""
368
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
369
        imgs = soup.find_all('img', src=img_src_re)
370
        title = soup.find('meta', property='og:title')['content']
371
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
372
        day = string_to_date(date_str, "%Y-%m-%d")
373
        return {
374
            'title': title,
375
            'img': [i['src'] for i in imgs],
376
            'month': day.month,
377
            'year': day.year,
378
            'day': day.day,
379
            'prefix': title + '-'
380
        }
381
382
@@ 3294-3315 (lines=22) @@
3291
            'img': [i['src'] for i in imgs],
3292
        }
3293
3294
3295
class MarketoonistComics(GenericNavigableComic):
3296
    """Class to retrieve Marketoonist Comics."""
3297
    name = 'marketoonist'
3298
    long_name = 'Marketoonist'
3299
    url = 'https://marketoonist.com/cartoons'
3300
    get_first_comic_link = simulate_first_link
3301
    get_navi_link = get_link_rel_next
3302
    first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'
3303
3304
    @classmethod
3305
    def get_comic_info(cls, soup, link):
3306
        """Get information about a particular comics."""
3307
        imgs = soup.find_all('meta', property='og:image')
3308
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3309
        day = string_to_date(date_str, "%Y-%m-%d")
3310
        title = soup.find('meta', property='og:title')['content']
3311
        return {
3312
            'img': [i['content'] for i in imgs],
3313
            'day': day.day,
3314
            'month': day.month,
3315
            'year': day.year,
3316
            'title': title,
3317
        }
3318
@@ 1894-1919 (lines=26) @@
1891
            'author': author,
1892
        }
1893
1894
1895
class Penmen(GenericNavigableComic):
1896
    """Class to retrieve Penmen comics."""
1897
    name = 'penmen'
1898
    long_name = 'Penmen'
1899
    url = 'http://penmen.com'
1900
    get_navi_link = get_link_rel_next
1901
    get_first_comic_link = simulate_first_link
1902
    first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/'
1903
1904
    @classmethod
1905
    def get_comic_info(cls, soup, link):
1906
        """Get information about a particular comics."""
1907
        title = soup.find('title').string
1908
        imgs = soup.find('div', class_='entry-content').find_all('img')
1909
        short_url = soup.find('link', rel='shortlink')['href']
1910
        tags = ' '.join(t.string for t in soup.find_all('a', rel='tag'))
1911
        date_str = soup.find('time')['datetime'][:10]
1912
        day = string_to_date(date_str, "%Y-%m-%d")
1913
        return {
1914
            'title': title,
1915
            'short_url': short_url,
1916
            'img': [i['src'] for i in imgs],
1917
            'tags': tags,
1918
            'month': day.month,
1919
            'year': day.year,
1920
            'day': day.day,
1921
        }
1922
@@ 1835-1860 (lines=26) @@
1832
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1833
        }
1834
1835
1836
class SafelyEndangered(GenericNavigableComic):
1837
    """Class to retrieve Safely Endangered comics."""
1838
    # Also on http://tumblr.safelyendangered.com
1839
    name = 'endangered'
1840
    long_name = 'Safely Endangered'
1841
    url = 'http://www.safelyendangered.com'
1842
    get_navi_link = get_link_rel_next
1843
    get_first_comic_link = simulate_first_link
1844
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1845
1846
    @classmethod
1847
    def get_comic_info(cls, soup, link):
1848
        """Get information about a particular comics."""
1849
        title = soup.find('h2', class_='post-title').string
1850
        date_str = soup.find('span', class_='post-date').string
1851
        day = string_to_date(date_str, '%B %d, %Y')
1852
        imgs = soup.find('div', id='comic').find_all('img')
1853
        alt = imgs[0]['alt']
1854
        assert all(i['alt'] == i['title'] for i in imgs)
1855
        return {
1856
            'day': day.day,
1857
            'month': day.month,
1858
            'year': day.year,
1859
            'img': [i['src'] for i in imgs],
1860
            'title': title,
1861
            'alt': alt,
1862
        }
1863
@@ 383-404 (lines=22) @@
380
        }
381
382
383
class GenericLeMondeBlog(GenericNavigableComic):
384
    """Generic class to retrieve comics from Le Monde blogs."""
385
    _categories = ('LEMONDE', 'FRANCAIS')
386
    get_navi_link = get_link_rel_next
387
    get_first_comic_link = simulate_first_link
388
    first_url = NotImplemented
389
390
    @classmethod
391
    def get_comic_info(cls, soup, link):
392
        """Get information about a particular comics."""
393
        url2 = soup.find('link', rel='shortlink')['href']
394
        title = soup.find('meta', property='og:title')['content']
395
        date_str = soup.find("span", class_="entry-date").string
396
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
397
        imgs = soup.find_all('meta', property='og:image')
398
        return {
399
            'title': title,
400
            'url2': url2,
401
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
402
            'month': day.month,
403
            'year': day.year,
404
            'day': day.day,
405
        }
406
407
@@ 954-979 (lines=26) @@
951
            'author': author,
952
        }
953
954
955
class MyExtraLife(GenericNavigableComic):
956
    """Class to retrieve My Extra Life comics."""
957
    name = 'extralife'
958
    long_name = 'My Extra Life'
959
    url = 'http://www.myextralife.com'
960
    get_navi_link = get_link_rel_next
961
962
    @classmethod
963
    def get_first_comic_link(cls):
964
        """Get link to first comics."""
965
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
966
967
    @classmethod
968
    def get_comic_info(cls, soup, link):
969
        """Get information about a particular comics."""
970
        title = soup.find("h1", class_="comic_title").string
971
        date_str = soup.find("span", class_="comic_date").string
972
        day = string_to_date(date_str, "%B %d, %Y")
973
        imgs = soup.find_all("img", class_="comic")
974
        assert all(i['alt'] == i['title'] == title for i in imgs)
975
        return {
976
            'title': title,
977
            'img': [i['src'] for i in imgs if i["src"]],
978
            'day': day.day,
979
            'month': day.month,
980
            'year': day.year
981
        }
982