Code Duplication    Length = 30-31 lines in 2 locations

comics.py 2 locations

@@ 2996-3026 (lines=31) @@
2993
    lang = "fr_FR.utf8"
2994
2995
2996
class UnearthedComics(GenericNavigableComic):
2997
    """Class to retrieve Unearthed comics."""
2998
    # Also on http://tapastic.com/series/UnearthedComics
2999
    # Also on https://unearthedcomics.tumblr.com
3000
    name = 'unearthed'
3001
    long_name = 'Unearthed Comics'
3002
    url = 'http://unearthedcomics.com'
3003
    _categories = ('UNEARTHED', )
3004
    get_navi_link = get_link_rel_next
3005
    get_first_comic_link = simulate_first_link
3006
    first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'
3007
3008
    @classmethod
3009
    def get_comic_info(cls, soup, link):
3010
        """Get information about a particular comics."""
3011
        short_url = soup.find('link', rel='shortlink')['href']
3012
        title_elt = soup.find('h1') or soup.find('h2')
3013
        title = title_elt.string if title_elt else ""
3014
        desc = soup.find('meta', property='og:description')
3015
        date_str = soup.find('time', class_='published updated hidden')['datetime']
3016
        day = string_to_date(date_str, "%Y-%m-%d")
3017
        post = soup.find('div', class_="entry content entry-content type-portfolio")
3018
        imgs = post.find_all('img')
3019
        return {
3020
            'title': title,
3021
            'description': desc,
3022
            'url2': short_url,
3023
            'img': [i['src'] for i in imgs],
3024
            'month': day.month,
3025
            'year': day.year,
3026
            'day': day.day,
3027
        }
3028
3029
@@ 550-579 (lines=30) @@
547
    first_url = 'http://morgannavarro.blog.lemonde.fr/2015/09/09/le-doute/'
548
549
550
class Rall(GenericComicNotWorking, GenericNavigableComic):
551
    """Class to retrieve Ted Rall comics."""
552
    # Also on http://www.gocomics.com/tedrall
553
    name = 'rall'
554
    long_name = "Ted Rall"
555
    url = "http://rall.com/comic"
556
    _categories = ('RALL', )
557
    get_navi_link = get_link_rel_next
558
    get_first_comic_link = simulate_first_link
559
    # Not the first but I didn't find an efficient way to retrieve it
560
    first_url = "http://rall.com/2014/01/30/los-angeles-times-cartoon-well-miss-those-california-flowers"
561
562
    @classmethod
563
    def get_comic_info(cls, soup, link):
564
        """Get information about a particular comics."""
565
        title = soup.find('meta', property='og:title')['content']
566
        author = soup.find("span", class_="author vcard").find("a").string
567
        date_str = soup.find("span", class_="entry-date").string
568
        day = string_to_date(date_str, "%B %d, %Y")
569
        desc = soup.find('meta', property='og:description')['content']
570
        imgs = soup.find('div', class_='entry-content').find_all('img')
571
        imgs = imgs[:-7]  # remove social media buttons
572
        return {
573
            'title': title,
574
            'author': author,
575
            'month': day.month,
576
            'year': day.year,
577
            'day': day.day,
578
            'description': desc,
579
            'img': [i['src'] for i in imgs],
580
        }
581
582