Code Duplication    Length = 23-27 lines in 8 locations

comics.py 8 locations

@@ 2432-2458 (lines=27) @@
2429
    url = 'http://theawkwardyeti.com'
2430
    get_first_comic_link = get_a_navi_navifirst
2431
    get_navi_link = get_link_rel_next
2432
2433
    @classmethod
2434
    def get_comic_info(cls, soup, link):
2435
        """Get information about a particular comics."""
2436
        title = soup.find('h2', class_='post-title').string
2437
        date_str = soup.find("span", class_="post-date").string
2438
        day = string_to_date(date_str, "%B %d, %Y")
2439
        imgs = soup.find("div", id="comic").find_all("img")
2440
        assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
2441
        return {
2442
            'img': [i['src'] for i in imgs],
2443
            'title': title,
2444
            'day': day.day,
2445
            'month': day.month,
2446
            'year': day.year
2447
        }
2448
2449
2450
class PleasantThoughts(GenericNavigableComic):
2451
    """Class to retrieve Pleasant Thoughts comics."""
2452
    name = 'pleasant'
2453
    long_name = 'Pleasant Thoughts'
2454
    url = 'http://pleasant-thoughts.com'
2455
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2456
    get_navi_link = get_link_rel_next
2457
2458
    @classmethod
2459
    def get_comic_info(cls, soup, link):
2460
        """Get information about a particular comics."""
2461
        post = soup.find('div', class_='post-content')
@@ 2462-2486 (lines=25) @@
2459
    def get_comic_info(cls, soup, link):
2460
        """Get information about a particular comics."""
2461
        post = soup.find('div', class_='post-content')
2462
        title = post.find('h2', class_='post-title').string
2463
        imgs = post.find("div", class_="entry").find_all("img")
2464
        return {
2465
            'title': title,
2466
            'img': [i['src'] for i in imgs],
2467
        }
2468
2469
2470
class MisterAndMe(GenericNavigableComic):
2471
    """Class to retrieve Mister & Me Comics."""
2472
    # Also on http://www.gocomics.com/mister-and-me
2473
    # Also on https://tapastic.com/series/Mister-and-Me
2474
    name = 'mister'
2475
    long_name = 'Mister & Me'
2476
    url = 'http://www.mister-and-me.com'
2477
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2478
    get_navi_link = get_link_rel_next
2479
2480
    @classmethod
2481
    def get_comic_info(cls, soup, link):
2482
        """Get information about a particular comics."""
2483
        title = soup.find('h2', class_='post-title').string
2484
        author = soup.find("span", class_="post-author").find("a").string
2485
        date_str = soup.find("span", class_="post-date").string
2486
        day = string_to_date(date_str, "%B %d, %Y")
2487
        imgs = soup.find("div", id="comic").find_all("img")
2488
        assert all(i['alt'] == i['title'] for i in imgs)
2489
        assert len(imgs) <= 1
@@ 2242-2266 (lines=25) @@
2239
    @classmethod
2240
    def get_first_comic_link(cls):
2241
        """Get link to first comics."""
2242
        return {'href': 'http://www.lonniemillsap.com/?p=42'}
2243
2244
    @classmethod
2245
    def get_comic_info(cls, soup, link):
2246
        """Get information about a particular comics."""
2247
        title = soup.find('h2', class_='post-title').string
2248
        post = soup.find('div', class_='post-content')
2249
        author = post.find("span", class_="post-author").find("a").string
2250
        date_str = post.find("span", class_="post-date").string
2251
        day = string_to_date(date_str, "%B %d, %Y")
2252
        imgs = post.find("div", class_="entry").find_all("img")
2253
        return {
2254
            'title': title,
2255
            'author': author,
2256
            'img': [i['src'] for i in imgs],
2257
            'month': day.month,
2258
            'year': day.year,
2259
            'day': day.day,
2260
        }
2261
2262
2263
class LinsEditions(GenericNavigableComic):
2264
    """Class to retrieve L.I.N.S. Editions comics."""
2265
    # Also on http://linscomics.tumblr.com
2266
    name = 'lins'
2267
    long_name = 'L.I.N.S. Editions'
2268
    url = 'https://linsedition.com'
2269
    get_navi_link = get_link_rel_next
@@ 2046-2070 (lines=25) @@
2043
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
2044
        author = soup.find('span', class_='post-author').string
2045
        div = soup.find('div', id='comic')
2046
        imgs = div.find_all('img') if div else []
2047
        title = imgs[0]['title'] if imgs else ""
2048
        assert all(i['title'] == i['alt'] == title for i in imgs)
2049
        return {
2050
            'month': day.month,
2051
            'year': day.year,
2052
            'day': day.day,
2053
            'img': [i['src'] for i in imgs],
2054
            'title': title,
2055
            'author': author,
2056
        }
2057
2058
2059
class DepressedAlien(GenericNavigableComic):
2060
    """Class to retrieve Depressed Alien Comics."""
2061
    name = 'depressedalien'
2062
    long_name = 'Depressed Alien'
2063
    url = 'http://depressedalien.com'
2064
    get_url_from_link = join_cls_url_to_href
2065
2066
    @classmethod
2067
    def get_first_comic_link(cls):
2068
        """Get link to first comics."""
2069
        return get_soup_at_url(cls.url).find('img', attrs={'name': 'beginArrow'}).parent
2070
2071
    @classmethod
2072
    def get_navi_link(cls, last_soup, next_):
2073
        return last_soup.find('img', attrs={'name': 'rightArrow' if next_ else 'leftArrow'}).parent
@@ 2572-2595 (lines=24) @@
2569
    get_navi_link = get_link_rel_next
2570
2571
    @classmethod
2572
    def get_comic_info(cls, soup, link):
2573
        """Get information about a particular comics."""
2574
        title = soup.find('h2', class_='post-title').string
2575
        author = soup.find("span", class_="post-author").find("a").string
2576
        date_str = soup.find("span", class_="post-date").string
2577
        day = string_to_date(date_str, "%B %d, %Y")
2578
        imgs = soup.find("div", id="comic").find_all("img")
2579
        assert all(i['alt'] == i['title'] for i in imgs)
2580
        alt = imgs[0]['alt'] if imgs else ""
2581
        return {
2582
            'img': [i['src'] for i in imgs],
2583
            'title': title,
2584
            'alt': alt,
2585
            'author': author,
2586
            'day': day.day,
2587
            'month': day.month,
2588
            'year': day.year
2589
        }
2590
2591
2592
class PlanC(GenericNavigableComic):
2593
    """Class to retrieve Plan C comics."""
2594
    name = 'planc'
2595
    long_name = 'Plan C'
2596
    url = 'http://www.plancomic.com'
2597
    get_first_comic_link = get_a_navi_navifirst
2598
    get_navi_link = get_a_navi_comicnavnext_navinext
@@ 1951-1974 (lines=24) @@
1948
    long_name = 'Completely Serious Comics'
1949
    url = 'http://completelyseriouscomics.com'
1950
    get_first_comic_link = get_a_navi_navifirst
1951
    get_navi_link = get_a_navi_navinext
1952
1953
    @classmethod
1954
    def get_comic_info(cls, soup, link):
1955
        """Get information about a particular comics."""
1956
        title = soup.find('h2', class_='post-title').string
1957
        author = soup.find('span', class_='post-author').contents[1].string
1958
        date_str = soup.find('span', class_='post-date').string
1959
        day = string_to_date(date_str, '%B %d, %Y')
1960
        imgs = soup.find('div', class_='comicpane').find_all('img')
1961
        assert imgs
1962
        alt = imgs[0]['title']
1963
        assert all(i['title'] == i['alt'] == alt for i in imgs)
1964
        return {
1965
            'month': day.month,
1966
            'year': day.year,
1967
            'day': day.day,
1968
            'img': [i['src'] for i in imgs],
1969
            'title': title,
1970
            'alt': alt,
1971
            'author': author,
1972
        }
1973
1974
1975
class PoorlyDrawnLines(GenericListableComic):
1976
    """Class to retrieve Poorly Drawn Lines comics."""
1977
    # Also on http://pdlcomics.tumblr.com
@@ 1639-1662 (lines=24) @@
1636
        if div:
1637
            img = div.find('img')
1638
            img_src = [img['src']]
1639
            alt = img['alt']
1640
            assert alt == img['title']
1641
            title = soup.find('meta', property='og:title')['content']
1642
        else:
1643
            img_src = []
1644
            alt = ''
1645
            title = ''
1646
        return {
1647
            'month': day.month,
1648
            'year': day.year,
1649
            'day': day.day,
1650
            'img': img_src,
1651
            'title': title,
1652
            'alt': alt,
1653
            'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')),
1654
        }
1655
1656
1657
class WarehouseComic(GenericNavigableComic):
1658
    """Class to retrieve Warehouse Comic comics."""
1659
    name = 'warehouse'
1660
    long_name = 'Warehouse Comic'
1661
    url = 'http://warehousecomic.com'
1662
    get_first_comic_link = get_a_navi_navifirst
1663
    get_navi_link = get_link_rel_next
1664
1665
    @classmethod
@@ 1072-1094 (lines=23) @@
1069
1070
class GenericBouletCorp(GenericNavigableComic):
1071
    """Generic class to retrieve BouletCorp comics in different languages."""
1072
    # Also on http://bouletcorp.tumblr.com
1073
    get_navi_link = get_link_rel_next
1074
1075
    @classmethod
1076
    def get_first_comic_link(cls):
1077
        """Get link to first comics."""
1078
        return get_soup_at_url(cls.url).find('div', id='centered_nav').find_all('a')[0]
1079
1080
    @classmethod
1081
    def get_comic_info(cls, soup, link):
1082
        """Get information about a particular comics."""
1083
        url = cls.get_url_from_link(link)
1084
        date_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/' % cls.url)
1085
        year, month, day = [int(s) for s in date_re.match(url).groups()]
1086
        imgs = soup.find('div', id='notes').find('div', class_='storycontent').find_all('img')
1087
        texts = '  '.join(t for t in (i.get('title') for i in imgs) if t)
1088
        title = soup.find('title').string
1089
        return {
1090
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs if i.get('src') is not None],
1091
            'title': title,
1092
            'texts': texts,
1093
            'year': year,
1094
            'month': month,
1095
            'day': day,
1096
        }
1097