Code Duplication    Length = 22-26 lines in 7 locations

comics.py 7 locations

@@ 355-377 (lines=23) @@
352
        return []
353
354
355
class ExtraFabulousComics(GenericNavigableComic):
356
    """Class to retrieve Extra Fabulous Comics."""
357
    name = 'efc'
358
    long_name = 'Extra Fabulous Comics'
359
    url = 'http://extrafabulouscomics.com'
360
    get_first_comic_link = get_a_navi_navifirst
361
    get_navi_link = get_link_rel_next
362
363
    @classmethod
364
    def get_comic_info(cls, soup, link):
365
        """Get information about a particular comics."""
366
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
367
        imgs = soup.find_all('img', src=img_src_re)
368
        title = soup.find('meta', property='og:title')['content']
369
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
370
        day = string_to_date(date_str, "%Y-%m-%d")
371
        return {
372
            'title': title,
373
            'img': [i['src'] for i in imgs],
374
            'month': day.month,
375
            'year': day.year,
376
            'day': day.day,
377
            'prefix': title + '-'
378
        }
379
380
@@ 381-402 (lines=22) @@
378
        }
379
380
381
class GenericLeMondeBlog(GenericNavigableComic):
382
    """Generic class to retrieve comics from Le Monde blogs."""
383
    _categories = ('LEMONDE', 'FRANCAIS')
384
    get_navi_link = get_link_rel_next
385
    get_first_comic_link = simulate_first_link
386
    first_url = NotImplemented
387
388
    @classmethod
389
    def get_comic_info(cls, soup, link):
390
        """Get information about a particular comics."""
391
        url2 = soup.find('link', rel='shortlink')['href']
392
        title = soup.find('meta', property='og:title')['content']
393
        date_str = soup.find("span", class_="entry-date").string
394
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
395
        imgs = soup.find_all('meta', property='og:image')
396
        return {
397
            'title': title,
398
            'url2': url2,
399
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
400
            'month': day.month,
401
            'year': day.year,
402
            'day': day.day,
403
        }
404
405
@@ 2325-2349 (lines=25) @@
2322
        }
2323
2324
2325
class LinsEditions(GenericNavigableComic):
2326
    """Class to retrieve L.I.N.S. Editions comics."""
2327
    # Also on http://linscomics.tumblr.com
2328
    # Now on https://warandpeas.com
2329
    name = 'lins'
2330
    long_name = 'L.I.N.S. Editions'
2331
    url = 'https://linsedition.com'
2332
    _categories = ('LINS', )
2333
    get_navi_link = get_link_rel_next
2334
    get_first_comic_link = simulate_first_link
2335
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2336
2337
    @classmethod
2338
    def get_comic_info(cls, soup, link):
2339
        """Get information about a particular comics."""
2340
        title = soup.find('meta', property='og:title')['content']
2341
        imgs = soup.find_all('meta', property='og:image')
2342
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2343
        day = string_to_date(date_str, "%Y-%m-%d")
2344
        return {
2345
            'title': title,
2346
            'img': [i['content'] for i in imgs],
2347
            'month': day.month,
2348
            'year': day.year,
2349
            'day': day.day,
2350
        }
2351
2352
@@ 1021-1045 (lines=25) @@
1018
        }
1019
1020
1021
class Mercworks(GenericNavigableComic):
1022
    """Class to retrieve Mercworks comics."""
1023
    # Also on http://mercworks.tumblr.com
1024
    name = 'mercworks'
1025
    long_name = 'Mercworks'
1026
    url = 'http://mercworks.net'
1027
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1028
    get_navi_link = get_link_rel_next
1029
1030
    @classmethod
1031
    def get_comic_info(cls, soup, link):
1032
        """Get information about a particular comics."""
1033
        title = soup.find('meta', property='og:title')['content']
1034
        metadesc = soup.find('meta', property='og:description')
1035
        desc = metadesc['content'] if metadesc else ""
1036
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
1037
        day = string_to_date(date_str, "%Y-%m-%d")
1038
        imgs = soup.find_all('meta', property='og:image')
1039
        return {
1040
            'img': [i['content'] for i in imgs],
1041
            'title': title,
1042
            'desc': desc,
1043
            'day': day.day,
1044
            'month': day.month,
1045
            'year': day.year
1046
        }
1047
1048
@@ 3266-3287 (lines=22) @@
3263
        }
3264
3265
3266
class MarketoonistComics(GenericNavigableComic):
3267
    """Class to retrieve Marketoonist Comics."""
3268
    name = 'marketoonist'
3269
    long_name = 'Marketoonist'
3270
    url = 'https://marketoonist.com/cartoons'
3271
    get_first_comic_link = simulate_first_link
3272
    get_navi_link = get_link_rel_next
3273
    first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'
3274
3275
    @classmethod
3276
    def get_comic_info(cls, soup, link):
3277
        """Get information about a particular comics."""
3278
        imgs = soup.find_all('meta', property='og:image')
3279
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3280
        day = string_to_date(date_str, "%Y-%m-%d")
3281
        title = soup.find('meta', property='og:title')['content']
3282
        return {
3283
            'img': [i['content'] for i in imgs],
3284
            'day': day.day,
3285
            'month': day.month,
3286
            'year': day.year,
3287
            'title': title,
3288
        }
3289
3290
@@ 1866-1891 (lines=26) @@
1863
        }
1864
1865
1866
class Penmen(GenericNavigableComic):
1867
    """Class to retrieve Penmen comics."""
1868
    name = 'penmen'
1869
    long_name = 'Penmen'
1870
    url = 'http://penmen.com'
1871
    get_navi_link = get_link_rel_next
1872
    get_first_comic_link = simulate_first_link
1873
    first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/'
1874
1875
    @classmethod
1876
    def get_comic_info(cls, soup, link):
1877
        """Get information about a particular comics."""
1878
        title = soup.find('title').string
1879
        imgs = soup.find('div', class_='entry-content').find_all('img')
1880
        short_url = soup.find('link', rel='shortlink')['href']
1881
        tags = ' '.join(t.string for t in soup.find_all('a', rel='tag'))
1882
        date_str = soup.find('time')['datetime'][:10]
1883
        day = string_to_date(date_str, "%Y-%m-%d")
1884
        return {
1885
            'title': title,
1886
            'short_url': short_url,
1887
            'img': [i['src'] for i in imgs],
1888
            'tags': tags,
1889
            'month': day.month,
1890
            'year': day.year,
1891
            'day': day.day,
1892
        }
1893
1894
@@ 1807-1832 (lines=26) @@
1804
        }
1805
1806
1807
class SafelyEndangered(GenericNavigableComic):
1808
    """Class to retrieve Safely Endangered comics."""
1809
    # Also on http://tumblr.safelyendangered.com
1810
    name = 'endangered'
1811
    long_name = 'Safely Endangered'
1812
    url = 'http://www.safelyendangered.com'
1813
    get_navi_link = get_link_rel_next
1814
    get_first_comic_link = simulate_first_link
1815
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1816
1817
    @classmethod
1818
    def get_comic_info(cls, soup, link):
1819
        """Get information about a particular comics."""
1820
        title = soup.find('h2', class_='post-title').string
1821
        date_str = soup.find('span', class_='post-date').string
1822
        day = string_to_date(date_str, '%B %d, %Y')
1823
        imgs = soup.find('div', id='comic').find_all('img')
1824
        alt = imgs[0]['alt']
1825
        assert all(i['alt'] == i['title'] for i in imgs)
1826
        return {
1827
            'day': day.day,
1828
            'month': day.month,
1829
            'year': day.year,
1830
            'img': [i['src'] for i in imgs],
1831
            'title': title,
1832
            'alt': alt,
1833
        }
1834
1835