Code Duplication    Length = 35-37 lines in 2 locations

comics.py 2 locations

@@ 2893-2929 (lines=37) @@
2890
        date_str = soup.find('time', class_='published')['datetime']
2891
        day = string_to_date(date_str, "%Y-%m-%d")
2892
        author = soup.find('a', rel='author').string
2893
        div_content = soup.find('div', class_="body entry-content")
2894
        imgs = div_content.find_all('img')
2895
        imgs = [i for i in imgs if i.get('src') is not None]
2896
        alt = imgs[0]['alt']
2897
        return {
2898
            'title': title,
2899
            'alt': alt,
2900
            'description': desc,
2901
            'author': author,
2902
            'day': day.day,
2903
            'month': day.month,
2904
            'year': day.year,
2905
            'img': [i['src'] for i in imgs],
2906
        }
2907
2908
2909
class GenericWordPressInkblot(GenericNavigableComic):
2910
    """Generic class to retrieve comics using WordPress with Inkblot."""
2911
    get_navi_link = get_link_rel_next
2912
2913
    @classmethod
2914
    def get_first_comic_link(cls):
2915
        """Get link to first comics."""
2916
        return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')
2917
2918
    @classmethod
2919
    def get_comic_info(cls, soup, link):
2920
        """Get information about a particular comics."""
2921
        title = soup.find('meta', property='og:title')['content']
2922
        imgs = soup.find('div', class_='webcomic-image').find_all('img')
2923
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2924
        day = string_to_date(date_str, "%Y-%m-%d")
2925
        return {
2926
            'title': title,
2927
            'day': day.day,
2928
            'month': day.month,
2929
            'year': day.year,
2930
            'img': [i['src'] for i in imgs],
2931
        }
2932
@@ 781-815 (lines=35) @@
778
        imgs = soup.find_all('meta', property='og:image')
779
        desc = soup.find('meta', property='og:description')['content']
780
        date_str = soup.find('meta', property='article:publish_date')['content']
781
        day = string_to_date(date_str, "%B %d, %Y")
782
        author = soup.find('meta', property='article:author')['content']
783
        tags = soup.find('meta', property='article:tag')['content']
784
        return {
785
            'title': title,
786
            'description': desc,
787
            'img': [i['content'] for i in imgs],
788
            'author': author,
789
            'tags': tags,
790
            'day': day.day,
791
            'month': day.month,
792
            'year': day.year
793
        }
794
795
796
class VictimsOfCircumsolar(GenericNavigableComic):
797
    """Class to retrieve VictimsOfCircumsolar comics."""
798
    name = 'circumsolar'
799
    long_name = 'Victims Of Circumsolar'
800
    url = 'http://www.victimsofcircumsolar.com'
801
    get_navi_link = get_a_navi_comicnavnext_navinext
802
    get_first_comic_link = simulate_first_link
803
    first_url = 'http://www.victimsofcircumsolar.com/comic/modern-addiction'
804
805
    @classmethod
806
    def get_comic_info(cls, soup, link):
807
        """Get information about a particular comics."""
808
        # Date is on the archive page
809
        title = soup.find_all('meta', property='og:title')[-1]['content']
810
        desc = soup.find_all('meta', property='og:description')[-1]['content']
811
        imgs = soup.find('div', id='comic').find_all('img')
812
        assert all(i['title'] == i['alt'] == title for i in imgs)
813
        return {
814
            'title': title,
815
            'description': desc,
816
            'img': [i['src'] for i in imgs],
817
        }
818