Code Duplication    Length = 12-15 lines in 7 locations

comics.py 7 locations

@@ 4852-4866 (lines=15) @@
4849
        gocomics = 'http://www.gocomics.com'
4850
        return urljoin_wrapper(gocomics, link['href'])
4851
4852
    @classmethod
4853
    def get_comic_info(cls, soup, link):
4854
        """Get information about a particular comics."""
4855
        date_str = soup.find('meta', property='article:published_time')['content']
4856
        day = string_to_date(date_str, "%Y-%m-%d")
4857
        imgs = soup.find('picture', class_='img-fluid item-comic-image').find_all('img')
4858
        author = soup.find('meta', property='article:author')['content']
4859
        tags = soup.find('meta', property='article:tag')['content']
4860
        return {
4861
            'day': day.day,
4862
            'month': day.month,
4863
            'year': day.year,
4864
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
4865
            'author': author,
4866
            'tags': tags,
4867
        }
4868
4869
@@ 441-455 (lines=15) @@
438
    get_first_comic_link = simulate_first_link
439
    first_url = NotImplemented
440
441
    @classmethod
442
    def get_comic_info(cls, soup, link):
443
        """Get information about a particular comics."""
444
        url2 = soup.find('link', rel='shortlink')['href']
445
        title = soup.find('meta', property='og:title')['content']
446
        date_str = soup.find("span", class_="entry-date").string
447
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
448
        imgs = soup.find_all('meta', property='og:image')
449
        return {
450
            'title': title,
451
            'url2': url2,
452
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
453
            'month': day.month,
454
            'year': day.year,
455
            'day': day.day,
456
        }
457
458
@@ 416-430 (lines=15) @@
413
    get_first_comic_link = simulate_first_link
414
    first_url = 'http://extrafabulouscomics.com/comic/buttfly/'
415
416
    @classmethod
417
    def get_comic_info(cls, soup, link):
418
        """Get information about a particular comics."""
419
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
420
        imgs = soup.find_all('img', src=img_src_re)
421
        title = soup.find('meta', property='og:title')['content']
422
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
423
        day = string_to_date(date_str, "%Y-%m-%d")
424
        return {
425
            'title': title,
426
            'img': [i['src'] for i in imgs],
427
            'month': day.month,
428
            'year': day.year,
429
            'day': day.day,
430
            'prefix': title + '-'
431
        }
432
433
@@ 1016-1029 (lines=14) @@
1013
        """Get link to first comics."""
1014
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
1015
1016
    @classmethod
1017
    def get_comic_info(cls, soup, link):
1018
        """Get information about a particular comics."""
1019
        title = soup.find("h1", class_="comic_title").string
1020
        date_str = soup.find("span", class_="comic_date").string
1021
        day = string_to_date(date_str, "%B %d, %Y")
1022
        imgs = soup.find_all("img", class_="comic")
1023
        assert all(i['alt'] == i['title'] == title for i in imgs)
1024
        return {
1025
            'title': title,
1026
            'img': [i['src'] for i in imgs if i["src"]],
1027
            'day': day.day,
1028
            'month': day.month,
1029
            'year': day.year
1030
        }
1031
1032
@@ 3089-3101 (lines=13) @@
3086
        """Get link to first comics."""
3087
        return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')
3088
3089
    @classmethod
3090
    def get_comic_info(cls, soup, link):
3091
        """Get information about a particular comics."""
3092
        title = soup.find('meta', property='og:title')['content']
3093
        imgs = soup.find('div', class_='webcomic-image').find_all('img')
3094
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3095
        day = string_to_date(date_str, "%Y-%m-%d")
3096
        return {
3097
            'title': title,
3098
            'day': day.day,
3099
            'month': day.month,
3100
            'year': day.year,
3101
            'img': [i['src'] for i in imgs],
3102
        }
3103
3104
@@ 2324-2336 (lines=13) @@
2321
        articles = get_soup_at_url(cls.url).find_all('article', class_='li post')
2322
        return [art.find('a') for art in reversed(articles)]
2323
2324
    @classmethod
2325
    def get_comic_info(cls, soup, archive_elt):
2326
        """Get information about a particular comics."""
2327
        date_str = soup.find('meta', property='og:article:published_time')['content'][:10]
2328
        day = string_to_date(date_str, "%Y-%m-%d")
2329
        title = soup.find('h3', class_='p-post-title').string
2330
        imgs = soup.find('section', class_='post-content').find_all('img')
2331
        return {
2332
            'title': title,
2333
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2334
            'month': day.month,
2335
            'year': day.year,
2336
            'day': day.day,
2337
        }
2338
2339
@@ 2793-2804 (lines=12) @@
2790
    get_first_comic_link = simulate_first_link
2791
    first_url = NotImplemented
2792
2793
    @classmethod
2794
    def get_comic_info(cls, soup, link):
2795
        """Get information about a particular comics."""
2796
        desc = soup.find('meta', property='og:description')['content']
2797
        title = soup.find('meta', property='og:title')['content']
2798
        imgs = soup.find('div', class_='entry-content').find_all('img')
2799
        title2 = ' '.join(i.get('title', '') for i in imgs)
2800
        return {
2801
            'title': title,
2802
            'title2': title2,
2803
            'description': desc,
2804
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2805
        }
2806
2807