Code Duplication    Length = 22-26 lines in 7 locations

comics.py 7 locations

@@ 355-377 (lines=23) @@
352
        return []
353
354
355
class ExtraFabulousComics(GenericNavigableComic):
356
    """Class to retrieve Extra Fabulous Comics."""
357
    name = 'efc'
358
    long_name = 'Extra Fabulous Comics'
359
    url = 'http://extrafabulouscomics.com'
360
    get_first_comic_link = get_a_navi_navifirst
361
    get_navi_link = get_link_rel_next
362
363
    @classmethod
364
    def get_comic_info(cls, soup, link):
365
        """Get information about a particular comics."""
366
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
367
        imgs = soup.find_all('img', src=img_src_re)
368
        title = soup.find('meta', property='og:title')['content']
369
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
370
        day = string_to_date(date_str, "%Y-%m-%d")
371
        return {
372
            'title': title,
373
            'img': [i['src'] for i in imgs],
374
            'month': day.month,
375
            'year': day.year,
376
            'day': day.day,
377
            'prefix': title + '-'
378
        }
379
380
@@ 381-402 (lines=22) @@
378
        }
379
380
381
class GenericLeMondeBlog(GenericNavigableComic):
382
    """Generic class to retrieve comics from Le Monde blogs."""
383
    _categories = ('LEMONDE', 'FRANCAIS')
384
    get_navi_link = get_link_rel_next
385
    get_first_comic_link = simulate_first_link
386
    first_url = NotImplemented
387
388
    @classmethod
389
    def get_comic_info(cls, soup, link):
390
        """Get information about a particular comics."""
391
        url2 = soup.find('link', rel='shortlink')['href']
392
        title = soup.find('meta', property='og:title')['content']
393
        date_str = soup.find("span", class_="entry-date").string
394
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
395
        imgs = soup.find_all('meta', property='og:image')
396
        return {
397
            'title': title,
398
            'url2': url2,
399
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
400
            'month': day.month,
401
            'year': day.year,
402
            'day': day.day,
403
        }
404
405
@@ 2320-2344 (lines=25) @@
2317
        }
2318
2319
2320
class LinsEditions(GenericNavigableComic):
2321
    """Class to retrieve L.I.N.S. Editions comics."""
2322
    # Also on http://linscomics.tumblr.com
2323
    # Now on https://warandpeas.com
2324
    name = 'lins'
2325
    long_name = 'L.I.N.S. Editions'
2326
    url = 'https://linsedition.com'
2327
    _categories = ('LINS', )
2328
    get_navi_link = get_link_rel_next
2329
    get_first_comic_link = simulate_first_link
2330
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2331
2332
    @classmethod
2333
    def get_comic_info(cls, soup, link):
2334
        """Get information about a particular comics."""
2335
        title = soup.find('meta', property='og:title')['content']
2336
        imgs = soup.find_all('meta', property='og:image')
2337
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2338
        day = string_to_date(date_str, "%Y-%m-%d")
2339
        return {
2340
            'title': title,
2341
            'img': [i['content'] for i in imgs],
2342
            'month': day.month,
2343
            'year': day.year,
2344
            'day': day.day,
2345
        }
2346
2347
@@ 1016-1040 (lines=25) @@
1013
        }
1014
1015
1016
class Mercworks(GenericNavigableComic):
1017
    """Class to retrieve Mercworks comics."""
1018
    # Also on http://mercworks.tumblr.com
1019
    name = 'mercworks'
1020
    long_name = 'Mercworks'
1021
    url = 'http://mercworks.net'
1022
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1023
    get_navi_link = get_link_rel_next
1024
1025
    @classmethod
1026
    def get_comic_info(cls, soup, link):
1027
        """Get information about a particular comics."""
1028
        title = soup.find('meta', property='og:title')['content']
1029
        metadesc = soup.find('meta', property='og:description')
1030
        desc = metadesc['content'] if metadesc else ""
1031
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
1032
        day = string_to_date(date_str, "%Y-%m-%d")
1033
        imgs = soup.find_all('meta', property='og:image')
1034
        return {
1035
            'img': [i['content'] for i in imgs],
1036
            'title': title,
1037
            'desc': desc,
1038
            'day': day.day,
1039
            'month': day.month,
1040
            'year': day.year
1041
        }
1042
1043
@@ 3261-3282 (lines=22) @@
3258
        }
3259
3260
3261
class MarketoonistComics(GenericNavigableComic):
3262
    """Class to retrieve Marketoonist Comics."""
3263
    name = 'marketoonist'
3264
    long_name = 'Marketoonist'
3265
    url = 'https://marketoonist.com/cartoons'
3266
    get_first_comic_link = simulate_first_link
3267
    get_navi_link = get_link_rel_next
3268
    first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'
3269
3270
    @classmethod
3271
    def get_comic_info(cls, soup, link):
3272
        """Get information about a particular comics."""
3273
        imgs = soup.find_all('meta', property='og:image')
3274
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3275
        day = string_to_date(date_str, "%Y-%m-%d")
3276
        title = soup.find('meta', property='og:title')['content']
3277
        return {
3278
            'img': [i['content'] for i in imgs],
3279
            'day': day.day,
3280
            'month': day.month,
3281
            'year': day.year,
3282
            'title': title,
3283
        }
3284
3285
@@ 1861-1886 (lines=26) @@
1858
        }
1859
1860
1861
class Penmen(GenericNavigableComic):
1862
    """Class to retrieve Penmen comics."""
1863
    name = 'penmen'
1864
    long_name = 'Penmen'
1865
    url = 'http://penmen.com'
1866
    get_navi_link = get_link_rel_next
1867
    get_first_comic_link = simulate_first_link
1868
    first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/'
1869
1870
    @classmethod
1871
    def get_comic_info(cls, soup, link):
1872
        """Get information about a particular comics."""
1873
        title = soup.find('title').string
1874
        imgs = soup.find('div', class_='entry-content').find_all('img')
1875
        short_url = soup.find('link', rel='shortlink')['href']
1876
        tags = ' '.join(t.string for t in soup.find_all('a', rel='tag'))
1877
        date_str = soup.find('time')['datetime'][:10]
1878
        day = string_to_date(date_str, "%Y-%m-%d")
1879
        return {
1880
            'title': title,
1881
            'short_url': short_url,
1882
            'img': [i['src'] for i in imgs],
1883
            'tags': tags,
1884
            'month': day.month,
1885
            'year': day.year,
1886
            'day': day.day,
1887
        }
1888
1889
@@ 1802-1827 (lines=26) @@
1799
        }
1800
1801
1802
class SafelyEndangered(GenericNavigableComic):
1803
    """Class to retrieve Safely Endangered comics."""
1804
    # Also on http://tumblr.safelyendangered.com
1805
    name = 'endangered'
1806
    long_name = 'Safely Endangered'
1807
    url = 'http://www.safelyendangered.com'
1808
    get_navi_link = get_link_rel_next
1809
    get_first_comic_link = simulate_first_link
1810
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1811
1812
    @classmethod
1813
    def get_comic_info(cls, soup, link):
1814
        """Get information about a particular comics."""
1815
        title = soup.find('h2', class_='post-title').string
1816
        date_str = soup.find('span', class_='post-date').string
1817
        day = string_to_date(date_str, '%B %d, %Y')
1818
        imgs = soup.find('div', id='comic').find_all('img')
1819
        alt = imgs[0]['alt']
1820
        assert all(i['alt'] == i['title'] for i in imgs)
1821
        return {
1822
            'day': day.day,
1823
            'month': day.month,
1824
            'year': day.year,
1825
            'img': [i['src'] for i in imgs],
1826
            'title': title,
1827
            'alt': alt,
1828
        }
1829
1830