Code Duplication    Length = 22-26 lines in 8 locations

comics.py 8 locations

@@ 355-377 (lines=23) @@
352
        return []
353
354
355
class ExtraFabulousComics(GenericNavigableComic):
356
    """Class to retrieve Extra Fabulous Comics."""
357
    name = 'efc'
358
    long_name = 'Extra Fabulous Comics'
359
    url = 'http://extrafabulouscomics.com'
360
    get_first_comic_link = get_a_navi_navifirst
361
    get_navi_link = get_link_rel_next
362
363
    @classmethod
364
    def get_comic_info(cls, soup, link):
365
        """Get information about a particular comics."""
366
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
367
        imgs = soup.find_all('img', src=img_src_re)
368
        title = soup.find('meta', property='og:title')['content']
369
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
370
        day = string_to_date(date_str, "%Y-%m-%d")
371
        return {
372
            'title': title,
373
            'img': [i['src'] for i in imgs],
374
            'month': day.month,
375
            'year': day.year,
376
            'day': day.day,
377
            'prefix': title + '-'
378
        }
379
380
@@ 381-402 (lines=22) @@
378
        }
379
380
381
class GenericLeMondeBlog(GenericNavigableComic):
382
    """Generic class to retrieve comics from Le Monde blogs."""
383
    _categories = ('LEMONDE', 'FRANCAIS')
384
    get_navi_link = get_link_rel_next
385
    get_first_comic_link = simulate_first_link
386
    first_url = NotImplemented
387
388
    @classmethod
389
    def get_comic_info(cls, soup, link):
390
        """Get information about a particular comics."""
391
        url2 = soup.find('link', rel='shortlink')['href']
392
        title = soup.find('meta', property='og:title')['content']
393
        date_str = soup.find("span", class_="entry-date").string
394
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
395
        imgs = soup.find_all('meta', property='og:image')
396
        return {
397
            'title': title,
398
            'url2': url2,
399
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
400
            'month': day.month,
401
            'year': day.year,
402
            'day': day.day,
403
        }
404
405
@@ 2320-2344 (lines=25) @@
2317
            'day': day.day,
2318
        }
2319
2320
2321
class LinsEditions(GenericNavigableComic):
2322
    """Class to retrieve L.I.N.S. Editions comics."""
2323
    # Also on http://linscomics.tumblr.com
2324
    # Now on https://warandpeas.com
2325
    name = 'lins'
2326
    long_name = 'L.I.N.S. Editions'
2327
    url = 'https://linsedition.com'
2328
    _categories = ('LINS', )
2329
    get_navi_link = get_link_rel_next
2330
    get_first_comic_link = simulate_first_link
2331
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2332
2333
    @classmethod
2334
    def get_comic_info(cls, soup, link):
2335
        """Get information about a particular comics."""
2336
        title = soup.find('meta', property='og:title')['content']
2337
        imgs = soup.find_all('meta', property='og:image')
2338
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2339
        day = string_to_date(date_str, "%Y-%m-%d")
2340
        return {
2341
            'title': title,
2342
            'img': [i['content'] for i in imgs],
2343
            'month': day.month,
2344
            'year': day.year,
2345
            'day': day.day,
2346
        }
2347
@@ 1016-1040 (lines=25) @@
1013
            'prefix': '%d-' % num,
1014
        }
1015
1016
1017
class Mercworks(GenericNavigableComic):
1018
    """Class to retrieve Mercworks comics."""
1019
    # Also on http://mercworks.tumblr.com
1020
    name = 'mercworks'
1021
    long_name = 'Mercworks'
1022
    url = 'http://mercworks.net'
1023
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1024
    get_navi_link = get_link_rel_next
1025
1026
    @classmethod
1027
    def get_comic_info(cls, soup, link):
1028
        """Get information about a particular comics."""
1029
        title = soup.find('meta', property='og:title')['content']
1030
        metadesc = soup.find('meta', property='og:description')
1031
        desc = metadesc['content'] if metadesc else ""
1032
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
1033
        day = string_to_date(date_str, "%Y-%m-%d")
1034
        imgs = soup.find_all('meta', property='og:image')
1035
        return {
1036
            'img': [i['content'] for i in imgs],
1037
            'title': title,
1038
            'desc': desc,
1039
            'day': day.day,
1040
            'month': day.month,
1041
            'year': day.year
1042
        }
1043
@@ 3261-3282 (lines=22) @@
3258
            'img': [i['src'] for i in imgs],
3259
        }
3260
3261
3262
class MarketoonistComics(GenericNavigableComic):
3263
    """Class to retrieve Marketoonist Comics."""
3264
    name = 'marketoonist'
3265
    long_name = 'Marketoonist'
3266
    url = 'https://marketoonist.com/cartoons'
3267
    get_first_comic_link = simulate_first_link
3268
    get_navi_link = get_link_rel_next
3269
    first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'
3270
3271
    @classmethod
3272
    def get_comic_info(cls, soup, link):
3273
        """Get information about a particular comics."""
3274
        imgs = soup.find_all('meta', property='og:image')
3275
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3276
        day = string_to_date(date_str, "%Y-%m-%d")
3277
        title = soup.find('meta', property='og:title')['content']
3278
        return {
3279
            'img': [i['content'] for i in imgs],
3280
            'day': day.day,
3281
            'month': day.month,
3282
            'year': day.year,
3283
            'title': title,
3284
        }
3285
@@ 1861-1886 (lines=26) @@
1858
            'author': author,
1859
        }
1860
1861
1862
class Penmen(GenericNavigableComic):
1863
    """Class to retrieve Penmen comics."""
1864
    name = 'penmen'
1865
    long_name = 'Penmen'
1866
    url = 'http://penmen.com'
1867
    get_navi_link = get_link_rel_next
1868
    get_first_comic_link = simulate_first_link
1869
    first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/'
1870
1871
    @classmethod
1872
    def get_comic_info(cls, soup, link):
1873
        """Get information about a particular comics."""
1874
        title = soup.find('title').string
1875
        imgs = soup.find('div', class_='entry-content').find_all('img')
1876
        short_url = soup.find('link', rel='shortlink')['href']
1877
        tags = ' '.join(t.string for t in soup.find_all('a', rel='tag'))
1878
        date_str = soup.find('time')['datetime'][:10]
1879
        day = string_to_date(date_str, "%Y-%m-%d")
1880
        return {
1881
            'title': title,
1882
            'short_url': short_url,
1883
            'img': [i['src'] for i in imgs],
1884
            'tags': tags,
1885
            'month': day.month,
1886
            'year': day.year,
1887
            'day': day.day,
1888
        }
1889
@@ 1802-1827 (lines=26) @@
1799
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1800
        }
1801
1802
1803
class SafelyEndangered(GenericNavigableComic):
1804
    """Class to retrieve Safely Endangered comics."""
1805
    # Also on http://tumblr.safelyendangered.com
1806
    name = 'endangered'
1807
    long_name = 'Safely Endangered'
1808
    url = 'http://www.safelyendangered.com'
1809
    get_navi_link = get_link_rel_next
1810
    get_first_comic_link = simulate_first_link
1811
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1812
1813
    @classmethod
1814
    def get_comic_info(cls, soup, link):
1815
        """Get information about a particular comics."""
1816
        title = soup.find('h2', class_='post-title').string
1817
        date_str = soup.find('span', class_='post-date').string
1818
        day = string_to_date(date_str, '%B %d, %Y')
1819
        imgs = soup.find('div', id='comic').find_all('img')
1820
        alt = imgs[0]['alt']
1821
        assert all(i['alt'] == i['title'] for i in imgs)
1822
        return {
1823
            'day': day.day,
1824
            'month': day.month,
1825
            'year': day.year,
1826
            'img': [i['src'] for i in imgs],
1827
            'title': title,
1828
            'alt': alt,
1829
        }
1830
@@ 922-947 (lines=26) @@
919
        }
920
921
922
class MyExtraLife(GenericNavigableComic):
923
    """Class to retrieve My Extra Life comics."""
924
    name = 'extralife'
925
    long_name = 'My Extra Life'
926
    url = 'http://www.myextralife.com'
927
    get_navi_link = get_link_rel_next
928
929
    @classmethod
930
    def get_first_comic_link(cls):
931
        """Get link to first comics."""
932
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
933
934
    @classmethod
935
    def get_comic_info(cls, soup, link):
936
        """Get information about a particular comics."""
937
        title = soup.find("h1", class_="comic_title").string
938
        date_str = soup.find("span", class_="comic_date").string
939
        day = string_to_date(date_str, "%B %d, %Y")
940
        imgs = soup.find_all("img", class_="comic")
941
        assert all(i['alt'] == i['title'] == title for i in imgs)
942
        return {
943
            'title': title,
944
            'img': [i['src'] for i in imgs if i["src"]],
945
            'day': day.day,
946
            'month': day.month,
947
            'year': day.year
948
        }
949
950