Code Duplication - SylvainDe/ComicBookMaker - Measure and Improve Code Quality continuously with Scrutinizer

Code Duplication Length = 35-37 lines in 2 locations

comics.py 2 locations


        date_str = soup.find('time', class_='published')['datetime']
        day = string_to_date(date_str, "%Y-%m-%d")
        author = soup.find('a', rel='author').string
        div_content = soup.find('div', class_="body entry-content")
        imgs = div_content.find_all('img')
        imgs = [i for i in imgs if i.get('src') is not None]
        alt = imgs[0]['alt']
        return {
            'title': title,
            'alt': alt,
            'description': desc,
            'author': author,
            'day': day.day,
            'month': day.month,
            'year': day.year,
            'img': [i['src'] for i in imgs],
        }


class GenericWordPressInkblot(GenericNavigableComic):
    """Generic class to retrieve comics using WordPress with Inkblot."""
    get_navi_link = get_link_rel_next

    @classmethod
    def get_first_comic_link(cls):
        """Get link to first comics."""
        return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        title = soup.find('meta', property='og:title')['content']
        imgs = soup.find('div', class_='webcomic-image').find_all('img')
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
        day = string_to_date(date_str, "%Y-%m-%d")
        return {
            'title': title,
            'day': day.day,
            'month': day.month,
            'year': day.year,
            'img': [i['src'] for i in imgs],
        }


        desc = soup.find('meta', property='og:description')['content']
        date_str = soup.find('meta', property='article:publish_date')['content']
        day = string_to_date(date_str, "%B %d, %Y")
        author = soup.find('meta', property='article:author')['content']
        tags = soup.find('meta', property='article:tag')['content']
        return {
            'title': title,
            'description': desc,
            'img': [i['content'] for i in imgs],
            'author': author,
            'tags': tags,
            'day': day.day,
            'month': day.month,
            'year': day.year
        }


class VictimsOfCircumsolar(GenericNavigableComic):
    """Class to retrieve VictimsOfCircumsolar comics."""
    name = 'circumsolar'
    long_name = 'Victims Of Circumsolar'
    url = 'http://www.victimsofcircumsolar.com'
    get_navi_link = get_a_navi_comicnavnext_navinext
    get_first_comic_link = simulate_first_link
    first_url = 'http://www.victimsofcircumsolar.com/comic/modern-addiction'

    @classmethod
    def get_comic_info(cls, soup, link):
        """Get information about a particular comics."""
        # Date is on the archive page
        title = soup.find_all('meta', property='og:title')[-1]['content']
        desc = soup.find_all('meta', property='og:description')[-1]['content']
        imgs = soup.find('div', id='comic').find_all('img')
        assert all(i['title'] == i['alt'] == title for i in imgs)
        return {
            'title': title,
            'description': desc,
            'img': [i['src'] for i in imgs],
        }



		@@ 2893-2929 (lines=37) @@
2890		date_str = soup.find('time', class_='published')['datetime']
2891		day = string_to_date(date_str, "%Y-%m-%d")
2892		author = soup.find('a', rel='author').string
2893		div_content = soup.find('div', class_="body entry-content")
2894		imgs = div_content.find_all('img')
2895		imgs = [i for i in imgs if i.get('src') is not None]
2896		alt = imgs[0]['alt']
2897		return {
2898		'title': title,
2899		'alt': alt,
2900		'description': desc,
2901		'author': author,
2902		'day': day.day,
2903		'month': day.month,
2904		'year': day.year,
2905		'img': [i['src'] for i in imgs],
2906		}
2907
2908
2909		class GenericWordPressInkblot(GenericNavigableComic):
2910		"""Generic class to retrieve comics using WordPress with Inkblot."""
2911		get_navi_link = get_link_rel_next
2912
2913		@classmethod
2914		def get_first_comic_link(cls):
2915		"""Get link to first comics."""
2916		return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')
2917
2918		@classmethod
2919		def get_comic_info(cls, soup, link):
2920		"""Get information about a particular comics."""
2921		title = soup.find('meta', property='og:title')['content']
2922		imgs = soup.find('div', class_='webcomic-image').find_all('img')
2923		date_str = soup.find('meta', property='article:published_time')['content'][:10]
2924		day = string_to_date(date_str, "%Y-%m-%d")
2925		return {
2926		'title': title,
2927		'day': day.day,
2928		'month': day.month,
2929		'year': day.year,
2930		'img': [i['src'] for i in imgs],
2931		}
2932
		@@ 781-815 (lines=35) @@
778		desc = soup.find('meta', property='og:description')['content']
779		date_str = soup.find('meta', property='article:publish_date')['content']
780		day = string_to_date(date_str, "%B %d, %Y")
781		author = soup.find('meta', property='article:author')['content']
782		tags = soup.find('meta', property='article:tag')['content']
783		return {
784		'title': title,
785		'description': desc,
786		'img': [i['content'] for i in imgs],
787		'author': author,
788		'tags': tags,
789		'day': day.day,
790		'month': day.month,
791		'year': day.year
792		}
793
794
795		class VictimsOfCircumsolar(GenericNavigableComic):
796		"""Class to retrieve VictimsOfCircumsolar comics."""
797		name = 'circumsolar'
798		long_name = 'Victims Of Circumsolar'
799		url = 'http://www.victimsofcircumsolar.com'
800		get_navi_link = get_a_navi_comicnavnext_navinext
801		get_first_comic_link = simulate_first_link
802		first_url = 'http://www.victimsofcircumsolar.com/comic/modern-addiction'
803
804		@classmethod
805		def get_comic_info(cls, soup, link):
806		"""Get information about a particular comics."""
807		# Date is on the archive page
808		title = soup.find_all('meta', property='og:title')[-1]['content']
809		desc = soup.find_all('meta', property='og:description')[-1]['content']
810		imgs = soup.find('div', id='comic').find_all('img')
811		assert all(i['title'] == i['alt'] == title for i in imgs)
812		return {
813		'title': title,
814		'description': desc,
815		'img': [i['src'] for i in imgs],
816		}
817
818

SylvainDe / ComicBookMaker

Code Duplication Length = 35-37 lines in 2 locations

comics.py 2 locations