Code Duplication - SylvainDe/ComicBookMaker - Measure and Improve Code Quality continuously with Scrutinizer

Code Duplication Length = 13-20 lines in 5 locations

comics.py 5 locations


    get_first_comic_link = simulate_first_link
    first_url = 'http://respawncomic.com/comic/c0001/'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('meta', property='og:title')['content']
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
        date_str = date_str[:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        imgs = soup.find_all('meta', property='og:image')
        skip_imgs = {
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
        }
        return {
            'title': title,
            'author': author,
            'day': day.day,
            'month': day.month,
            'year': day.year,
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
        }



        li = last_soup.find('li', class_='prev' if next_ else 'next')
        return li.find('a') if li else None

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        short_url = soup.find('link', rel='shortlink')['href']
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
        imgs = soup.find_all('meta', property='og:image')
        date_str = soup.find('span', property='dc:date')['content']
        date_str = date_str[:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        return {
            'short_url': short_url,
            'title': title,
            'img': [i['content'] for i in imgs],
            'day': day.day,
            'month': day.month,
            'year': day.year,
        }



        gocomics = 'http://www.gocomics.com'
        return urljoin_wrapper(gocomics, link['href'])

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        date_str = soup.find('meta', property='article:published_time')['content']
        day = string_to_date(date_str, "%Y-%m-%d")
        imgs = soup.find('picture', class_='img-fluid item-comic-image').find_all('img')
        author = soup.find('meta', property='article:author')['content']
        tags = soup.find('meta', property='article:tag')['content']
        return {
            'day': day.day,
            'month': day.month,
            'year': day.year,
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
            'author': author,
            'tags': tags,
        }



        # prev is next / next is prev
        return last_soup.find('li', class_='previous' if next_ else 'next').find('a')

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        date_str = soup.find('time', class_='published')['datetime']
        day = string_to_date(date_str, "%Y-%m-%d")
        author = soup.find('span', class_='blog-author').find('a').string
        title = soup.find('meta', property='og:title')['content']
        imgs = soup.find_all('meta', itemprop='image')
        return {
            'img': [i['content'] for i in imgs],
            'title': title,
            'author': author,
            'day': day.day,
            'month': day.month,
            'year': day.year,
        }



        """Get link to first comics."""
        return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('meta', property='og:title')['content']
        imgs = soup.find('div', class_='webcomic-image').find_all('img')
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        return {
            'title': title,
            'day': day.day,
            'month': day.month,
            'year': day.year,
            'img': [i['src'] for i in imgs],
        }



		@@ 1809-1828 (lines=20) @@
1806		get_first_comic_link = simulate_first_link
1807		first_url = 'http://respawncomic.com/comic/c0001/'
1808
1809		@classmethod
1810		def get_comic_info(cls, soup, link):
1811		"""Get information about a particular comics."""
1812		title = soup.find('meta', property='og:title')['content']
1813		author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1814		date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1815		date_str = date_str[:10]
1816		day = string_to_date(date_str, "%Y-%m-%d")
1817		imgs = soup.find_all('meta', property='og:image')
1818		skip_imgs = {
1819		'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1820		'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1821		}
1822		return {
1823		'title': title,
1824		'author': author,
1825		'day': day.day,
1826		'month': day.month,
1827		'year': day.year,
1828		'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1829		}
1830
1831
		@@ 526-541 (lines=16) @@
523		li = last_soup.find('li', class_='prev' if next_ else 'next')
524		return li.find('a') if li else None
525
526		@classmethod
527		def get_comic_info(cls, soup, link):
528		"""Get information about a particular comics."""
529		short_url = soup.find('link', rel='shortlink')['href']
530		title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
531		imgs = soup.find_all('meta', property='og:image')
532		date_str = soup.find('span', property='dc:date')['content']
533		date_str = date_str[:10]
534		day = string_to_date(date_str, "%Y-%m-%d")
535		return {
536		'short_url': short_url,
537		'title': title,
538		'img': [i['content'] for i in imgs],
539		'day': day.day,
540		'month': day.month,
541		'year': day.year,
542		}
543
544
		@@ 4528-4542 (lines=15) @@
4525		gocomics = 'http://www.gocomics.com'
4526		return urljoin_wrapper(gocomics, link['href'])
4527
4528		@classmethod
4529		def get_comic_info(cls, soup, link):
4530		"""Get information about a particular comics."""
4531		date_str = soup.find('meta', property='article:published_time')['content']
4532		day = string_to_date(date_str, "%Y-%m-%d")
4533		imgs = soup.find('picture', class_='img-fluid item-comic-image').find_all('img')
4534		author = soup.find('meta', property='article:author')['content']
4535		tags = soup.find('meta', property='article:tag')['content']
4536		return {
4537		'day': day.day,
4538		'month': day.month,
4539		'year': day.year,
4540		'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
4541		'author': author,
4542		'tags': tags,
4543		}
4544
4545
		@@ 2984-2998 (lines=15) @@
2981		# prev is next / next is prev
2982		return last_soup.find('li', class_='previous' if next_ else 'next').find('a')
2983
2984		@classmethod
2985		def get_comic_info(cls, soup, link):
2986		"""Get information about a particular comics."""
2987		date_str = soup.find('time', class_='published')['datetime']
2988		day = string_to_date(date_str, "%Y-%m-%d")
2989		author = soup.find('span', class_='blog-author').find('a').string
2990		title = soup.find('meta', property='og:title')['content']
2991		imgs = soup.find_all('meta', itemprop='image')
2992		return {
2993		'img': [i['content'] for i in imgs],
2994		'title': title,
2995		'author': author,
2996		'day': day.day,
2997		'month': day.month,
2998		'year': day.year,
2999		}
3000
3001
		@@ 3052-3064 (lines=13) @@
3049		"""Get link to first comics."""
3050		return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')
3051
3052		@classmethod
3053		def get_comic_info(cls, soup, link):
3054		"""Get information about a particular comics."""
3055		title = soup.find('meta', property='og:title')['content']
3056		imgs = soup.find('div', class_='webcomic-image').find_all('img')
3057		date_str = soup.find('meta', property='article:published_time')['content'][:10]
3058		day = string_to_date(date_str, "%Y-%m-%d")
3059		return {
3060		'title': title,
3061		'day': day.day,
3062		'month': day.month,
3063		'year': day.year,
3064		'img': [i['src'] for i in imgs],
3065		}
3066
3067

SylvainDe / ComicBookMaker

Code Duplication Length = 13-20 lines in 5 locations

comics.py 5 locations