Code Duplication    Length = 35-37 lines in 2 locations

comics.py 2 locations

@@ 2893-2929 (lines=37) @@
2890
        date_str = soup.find('time', class_='published')['datetime']
2891
        day = string_to_date(date_str, "%Y-%m-%d")
2892
        author = soup.find('a', rel='author').string
2893
        div_content = soup.find('div', class_="body entry-content")
2894
        imgs = div_content.find_all('img')
2895
        imgs = [i for i in imgs if i.get('src') is not None]
2896
        alt = imgs[0]['alt']
2897
        return {
2898
            'title': title,
2899
            'alt': alt,
2900
            'description': desc,
2901
            'author': author,
2902
            'day': day.day,
2903
            'month': day.month,
2904
            'year': day.year,
2905
            'img': [i['src'] for i in imgs],
2906
        }
2907
2908
2909
class GenericWordPressInkblot(GenericNavigableComic):
2910
    """Generic class to retrieve comics using WordPress with Inkblot."""
2911
    get_navi_link = get_link_rel_next
2912
2913
    @classmethod
2914
    def get_first_comic_link(cls):
2915
        """Get link to first comics."""
2916
        return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')
2917
2918
    @classmethod
2919
    def get_comic_info(cls, soup, link):
2920
        """Get information about a particular comics."""
2921
        title = soup.find('meta', property='og:title')['content']
2922
        imgs = soup.find('div', class_='webcomic-image').find_all('img')
2923
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2924
        day = string_to_date(date_str, "%Y-%m-%d")
2925
        return {
2926
            'title': title,
2927
            'day': day.day,
2928
            'month': day.month,
2929
            'year': day.year,
2930
            'img': [i['src'] for i in imgs],
2931
        }
2932
@@ 781-815 (lines=35) @@
778
        desc = soup.find('meta', property='og:description')['content']
779
        date_str = soup.find('meta', property='article:publish_date')['content']
780
        day = string_to_date(date_str, "%B %d, %Y")
781
        author = soup.find('meta', property='article:author')['content']
782
        tags = soup.find('meta', property='article:tag')['content']
783
        return {
784
            'title': title,
785
            'description': desc,
786
            'img': [i['content'] for i in imgs],
787
            'author': author,
788
            'tags': tags,
789
            'day': day.day,
790
            'month': day.month,
791
            'year': day.year
792
        }
793
794
795
class VictimsOfCircumsolar(GenericNavigableComic):
796
    """Class to retrieve VictimsOfCircumsolar comics."""
797
    name = 'circumsolar'
798
    long_name = 'Victims Of Circumsolar'
799
    url = 'http://www.victimsofcircumsolar.com'
800
    get_navi_link = get_a_navi_comicnavnext_navinext
801
    get_first_comic_link = simulate_first_link
802
    first_url = 'http://www.victimsofcircumsolar.com/comic/modern-addiction'
803
804
    @classmethod
805
    def get_comic_info(cls, soup, link):
806
        """Get information about a particular comics."""
807
        # Date is on the archive page
808
        title = soup.find_all('meta', property='og:title')[-1]['content']
809
        desc = soup.find_all('meta', property='og:description')[-1]['content']
810
        imgs = soup.find('div', id='comic').find_all('img')
811
        assert all(i['title'] == i['alt'] == title for i in imgs)
812
        return {
813
            'title': title,
814
            'description': desc,
815
            'img': [i['src'] for i in imgs],
816
        }
817
818