Code Duplication - SylvainDe/ComicBookMaker - Measure and Improve Code Quality continuously with Scrutinizer

Code Duplication Length = 12-15 lines in 7 locations

comics.py 7 locations


        gocomics = 'http://www.gocomics.com'
        return urljoin_wrapper(gocomics, link['href'])

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        date_str = soup.find('meta', property='article:published_time')['content']
        day = string_to_date(date_str, "%Y-%m-%d")
        imgs = soup.find('picture', class_='img-fluid item-comic-image').find_all('img')
        author = soup.find('meta', property='article:author')['content']
        tags = soup.find('meta', property='article:tag')['content']
        return {
            'day': day.day,
            'month': day.month,
            'year': day.year,
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
            'author': author,
            'tags': tags,
        }



    get_first_comic_link = simulate_first_link
    first_url = NotImplemented

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        url2 = soup.find('link', rel='shortlink')['href']
        title = soup.find('meta', property='og:title')['content']
        date_str = soup.find("span", class_="entry-date").string
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
        imgs = soup.find_all('meta', property='og:image')
        return {
            'title': title,
            'url2': url2,
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }



    get_first_comic_link = simulate_first_link
    first_url = 'http://extrafabulouscomics.com/comic/buttfly/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
        imgs = soup.find_all('img', src=img_src_re)
        title = soup.find('meta', property='og:title')['content']
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        return {
            'title': title,
            'img': [i['src'] for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
            'prefix': title + '-'
        }



        """Get link to first comics."""
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find("h1", class_="comic_title").string
        date_str = soup.find("span", class_="comic_date").string
        day = string_to_date(date_str, "%B %d, %Y")
        imgs = soup.find_all("img", class_="comic")
        assert all(i['alt'] == i['title'] == title for i in imgs)
        return {
            'title': title,
            'img': [i['src'] for i in imgs if i["src"]],
            'day': day.day,
            'month': day.month,
            'year': day.year
        }



        """Get link to first comics."""
        return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('meta', property='og:title')['content']
        imgs = soup.find('div', class_='webcomic-image').find_all('img')
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        return {
            'title': title,
            'day': day.day,
            'month': day.month,
            'year': day.year,
            'img': [i['src'] for i in imgs],
        }



        articles = get_soup_at_url(cls.url).find_all('article', class_='li post')
        return [art.find('a') for art in reversed(articles)]

    @classmethod
    def get_comic_info(cls, soup, archive_elt):
        """Get information about a particular comics."""
        date_str = soup.find('meta', property='og:article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        title = soup.find('h3', class_='p-post-title').string
        imgs = soup.find('section', class_='post-content').find_all('img')
        return {
            'title': title,
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
            'month': day.month,
            'year': day.year,
            'day': day.day,
        }



    get_first_comic_link = simulate_first_link
    first_url = NotImplemented

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        desc = soup.find('meta', property='og:description')['content']
        title = soup.find('meta', property='og:title')['content']
        imgs = soup.find('div', class_='entry-content').find_all('img')
        title2 = ' '.join(i.get('title', '') for i in imgs)
        return {
            'title': title,
            'title2': title2,
            'description': desc,
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
        }



		@@ 4852-4866 (lines=15) @@
4849		gocomics = 'http://www.gocomics.com'
4850		return urljoin_wrapper(gocomics, link['href'])
4851
4852		@classmethod
4853		def get_comic_info(cls, soup, link):
4854		"""Get information about a particular comics."""
4855		date_str = soup.find('meta', property='article:published_time')['content']
4856		day = string_to_date(date_str, "%Y-%m-%d")
4857		imgs = soup.find('picture', class_='img-fluid item-comic-image').find_all('img')
4858		author = soup.find('meta', property='article:author')['content']
4859		tags = soup.find('meta', property='article:tag')['content']
4860		return {
4861		'day': day.day,
4862		'month': day.month,
4863		'year': day.year,
4864		'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
4865		'author': author,
4866		'tags': tags,
4867		}
4868
4869
		@@ 441-455 (lines=15) @@
438		get_first_comic_link = simulate_first_link
439		first_url = NotImplemented
440
441		@classmethod
442		def get_comic_info(cls, soup, link):
443		"""Get information about a particular comics."""
444		url2 = soup.find('link', rel='shortlink')['href']
445		title = soup.find('meta', property='og:title')['content']
446		date_str = soup.find("span", class_="entry-date").string
447		day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
448		imgs = soup.find_all('meta', property='og:image')
449		return {
450		'title': title,
451		'url2': url2,
452		'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
453		'month': day.month,
454		'year': day.year,
455		'day': day.day,
456		}
457
458
		@@ 416-430 (lines=15) @@
413		get_first_comic_link = simulate_first_link
414		first_url = 'http://extrafabulouscomics.com/comic/buttfly/'
415
416		@classmethod
417		def get_comic_info(cls, soup, link):
418		"""Get information about a particular comics."""
419		img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
420		imgs = soup.find_all('img', src=img_src_re)
421		title = soup.find('meta', property='og:title')['content']
422		date_str = soup.find('meta', property='article:published_time')['content'][:10]
423		day = string_to_date(date_str, "%Y-%m-%d")
424		return {
425		'title': title,
426		'img': [i['src'] for i in imgs],
427		'month': day.month,
428		'year': day.year,
429		'day': day.day,
430		'prefix': title + '-'
431		}
432
433
		@@ 1016-1029 (lines=14) @@
1013		"""Get link to first comics."""
1014		return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
1015
1016		@classmethod
1017		def get_comic_info(cls, soup, link):
1018		"""Get information about a particular comics."""
1019		title = soup.find("h1", class_="comic_title").string
1020		date_str = soup.find("span", class_="comic_date").string
1021		day = string_to_date(date_str, "%B %d, %Y")
1022		imgs = soup.find_all("img", class_="comic")
1023		assert all(i['alt'] == i['title'] == title for i in imgs)
1024		return {
1025		'title': title,
1026		'img': [i['src'] for i in imgs if i["src"]],
1027		'day': day.day,
1028		'month': day.month,
1029		'year': day.year
1030		}
1031
1032
		@@ 3089-3101 (lines=13) @@
3086		"""Get link to first comics."""
3087		return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')
3088
3089		@classmethod
3090		def get_comic_info(cls, soup, link):
3091		"""Get information about a particular comics."""
3092		title = soup.find('meta', property='og:title')['content']
3093		imgs = soup.find('div', class_='webcomic-image').find_all('img')
3094		date_str = soup.find('meta', property='article:published_time')['content'][:10]
3095		day = string_to_date(date_str, "%Y-%m-%d")
3096		return {
3097		'title': title,
3098		'day': day.day,
3099		'month': day.month,
3100		'year': day.year,
3101		'img': [i['src'] for i in imgs],
3102		}
3103
3104
		@@ 2324-2336 (lines=13) @@
2321		articles = get_soup_at_url(cls.url).find_all('article', class_='li post')
2322		return [art.find('a') for art in reversed(articles)]
2323
2324		@classmethod
2325		def get_comic_info(cls, soup, archive_elt):
2326		"""Get information about a particular comics."""
2327		date_str = soup.find('meta', property='og:article:published_time')['content'][:10]
2328		day = string_to_date(date_str, "%Y-%m-%d")
2329		title = soup.find('h3', class_='p-post-title').string
2330		imgs = soup.find('section', class_='post-content').find_all('img')
2331		return {
2332		'title': title,
2333		'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2334		'month': day.month,
2335		'year': day.year,
2336		'day': day.day,
2337		}
2338
2339
		@@ 2793-2804 (lines=12) @@
2790		get_first_comic_link = simulate_first_link
2791		first_url = NotImplemented
2792
2793		@classmethod
2794		def get_comic_info(cls, soup, link):
2795		"""Get information about a particular comics."""
2796		desc = soup.find('meta', property='og:description')['content']
2797		title = soup.find('meta', property='og:title')['content']
2798		imgs = soup.find('div', class_='entry-content').find_all('img')
2799		title2 = ' '.join(i.get('title', '') for i in imgs)
2800		return {
2801		'title': title,
2802		'title2': title2,
2803		'description': desc,
2804		'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2805		}
2806
2807

SylvainDe / ComicBookMaker

Code Duplication Length = 12-15 lines in 7 locations

comics.py 7 locations