Code Duplication    Length = 22-26 lines in 7 locations

comics.py 7 locations

@@ 355-377 (lines=23) @@
352
        return []
353
354
355
class ExtraFabulousComics(GenericNavigableComic):
356
    """Class to retrieve Extra Fabulous Comics."""
357
    name = 'efc'
358
    long_name = 'Extra Fabulous Comics'
359
    url = 'http://extrafabulouscomics.com'
360
    get_first_comic_link = get_a_navi_navifirst
361
    get_navi_link = get_link_rel_next
362
363
    @classmethod
364
    def get_comic_info(cls, soup, link):
365
        """Get information about a particular comics."""
366
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
367
        imgs = soup.find_all('img', src=img_src_re)
368
        title = soup.find('meta', property='og:title')['content']
369
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
370
        day = string_to_date(date_str, "%Y-%m-%d")
371
        return {
372
            'title': title,
373
            'img': [i['src'] for i in imgs],
374
            'month': day.month,
375
            'year': day.year,
376
            'day': day.day,
377
            'prefix': title + '-'
378
        }
379
380
@@ 381-402 (lines=22) @@
378
        }
379
380
381
class GenericLeMondeBlog(GenericNavigableComic):
382
    """Generic class to retrieve comics from Le Monde blogs."""
383
    _categories = ('LEMONDE', 'FRANCAIS')
384
    get_navi_link = get_link_rel_next
385
    get_first_comic_link = simulate_first_link
386
    first_url = NotImplemented
387
388
    @classmethod
389
    def get_comic_info(cls, soup, link):
390
        """Get information about a particular comics."""
391
        url2 = soup.find('link', rel='shortlink')['href']
392
        title = soup.find('meta', property='og:title')['content']
393
        date_str = soup.find("span", class_="entry-date").string
394
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
395
        imgs = soup.find_all('meta', property='og:image')
396
        return {
397
            'title': title,
398
            'url2': url2,
399
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
400
            'month': day.month,
401
            'year': day.year,
402
            'day': day.day,
403
        }
404
405
@@ 2351-2375 (lines=25) @@
2348
        }
2349
2350
2351
class LinsEditions(GenericNavigableComic):
2352
    """Class to retrieve L.I.N.S. Editions comics."""
2353
    # Also on http://linscomics.tumblr.com
2354
    # Now on https://warandpeas.com
2355
    name = 'lins'
2356
    long_name = 'L.I.N.S. Editions'
2357
    url = 'https://linsedition.com'
2358
    _categories = ('LINS', )
2359
    get_navi_link = get_link_rel_next
2360
    get_first_comic_link = simulate_first_link
2361
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2362
2363
    @classmethod
2364
    def get_comic_info(cls, soup, link):
2365
        """Get information about a particular comics."""
2366
        title = soup.find('meta', property='og:title')['content']
2367
        imgs = soup.find_all('meta', property='og:image')
2368
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2369
        day = string_to_date(date_str, "%Y-%m-%d")
2370
        return {
2371
            'title': title,
2372
            'img': [i['content'] for i in imgs],
2373
            'month': day.month,
2374
            'year': day.year,
2375
            'day': day.day,
2376
        }
2377
2378
@@ 1047-1071 (lines=25) @@
1044
        }
1045
1046
1047
class Mercworks(GenericNavigableComic):
1048
    """Class to retrieve Mercworks comics."""
1049
    # Also on http://mercworks.tumblr.com
1050
    name = 'mercworks'
1051
    long_name = 'Mercworks'
1052
    url = 'http://mercworks.net'
1053
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1054
    get_navi_link = get_link_rel_next
1055
1056
    @classmethod
1057
    def get_comic_info(cls, soup, link):
1058
        """Get information about a particular comics."""
1059
        title = soup.find('meta', property='og:title')['content']
1060
        metadesc = soup.find('meta', property='og:description')
1061
        desc = metadesc['content'] if metadesc else ""
1062
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
1063
        day = string_to_date(date_str, "%Y-%m-%d")
1064
        imgs = soup.find_all('meta', property='og:image')
1065
        return {
1066
            'img': [i['content'] for i in imgs],
1067
            'title': title,
1068
            'desc': desc,
1069
            'day': day.day,
1070
            'month': day.month,
1071
            'year': day.year
1072
        }
1073
1074
@@ 3292-3313 (lines=22) @@
3289
        }
3290
3291
3292
class MarketoonistComics(GenericNavigableComic):
3293
    """Class to retrieve Marketoonist Comics."""
3294
    name = 'marketoonist'
3295
    long_name = 'Marketoonist'
3296
    url = 'https://marketoonist.com/cartoons'
3297
    get_first_comic_link = simulate_first_link
3298
    get_navi_link = get_link_rel_next
3299
    first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'
3300
3301
    @classmethod
3302
    def get_comic_info(cls, soup, link):
3303
        """Get information about a particular comics."""
3304
        imgs = soup.find_all('meta', property='og:image')
3305
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3306
        day = string_to_date(date_str, "%Y-%m-%d")
3307
        title = soup.find('meta', property='og:title')['content']
3308
        return {
3309
            'img': [i['content'] for i in imgs],
3310
            'day': day.day,
3311
            'month': day.month,
3312
            'year': day.year,
3313
            'title': title,
3314
        }
3315
3316
@@ 1892-1917 (lines=26) @@
1889
        }
1890
1891
1892
class Penmen(GenericNavigableComic):
1893
    """Class to retrieve Penmen comics."""
1894
    name = 'penmen'
1895
    long_name = 'Penmen'
1896
    url = 'http://penmen.com'
1897
    get_navi_link = get_link_rel_next
1898
    get_first_comic_link = simulate_first_link
1899
    first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/'
1900
1901
    @classmethod
1902
    def get_comic_info(cls, soup, link):
1903
        """Get information about a particular comics."""
1904
        title = soup.find('title').string
1905
        imgs = soup.find('div', class_='entry-content').find_all('img')
1906
        short_url = soup.find('link', rel='shortlink')['href']
1907
        tags = ' '.join(t.string for t in soup.find_all('a', rel='tag'))
1908
        date_str = soup.find('time')['datetime'][:10]
1909
        day = string_to_date(date_str, "%Y-%m-%d")
1910
        return {
1911
            'title': title,
1912
            'short_url': short_url,
1913
            'img': [i['src'] for i in imgs],
1914
            'tags': tags,
1915
            'month': day.month,
1916
            'year': day.year,
1917
            'day': day.day,
1918
        }
1919
1920
@@ 1833-1858 (lines=26) @@
1830
        }
1831
1832
1833
class SafelyEndangered(GenericNavigableComic):
1834
    """Class to retrieve Safely Endangered comics."""
1835
    # Also on http://tumblr.safelyendangered.com
1836
    name = 'endangered'
1837
    long_name = 'Safely Endangered'
1838
    url = 'http://www.safelyendangered.com'
1839
    get_navi_link = get_link_rel_next
1840
    get_first_comic_link = simulate_first_link
1841
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1842
1843
    @classmethod
1844
    def get_comic_info(cls, soup, link):
1845
        """Get information about a particular comics."""
1846
        title = soup.find('h2', class_='post-title').string
1847
        date_str = soup.find('span', class_='post-date').string
1848
        day = string_to_date(date_str, '%B %d, %Y')
1849
        imgs = soup.find('div', id='comic').find_all('img')
1850
        alt = imgs[0]['alt']
1851
        assert all(i['alt'] == i['title'] for i in imgs)
1852
        return {
1853
            'day': day.day,
1854
            'month': day.month,
1855
            'year': day.year,
1856
            'img': [i['src'] for i in imgs],
1857
            'title': title,
1858
            'alt': alt,
1859
        }
1860
1861