Code Duplication    Length = 22-27 lines in 4 locations

comics.py 4 locations

@@ 1387-1413 (lines=27) @@
1384
1385
1386
class ButterSafe(GenericListableComic):
1387
    """Class to retrieve Butter Safe comics."""
1388
    name = 'butter'
1389
    long_name = 'ButterSafe'
1390
    url = 'http://buttersafe.com'
1391
    get_url_from_archive_element = get_href
1392
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)
1393
1394
    @classmethod
1395
    def get_archive_elements(cls):
1396
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1397
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
1398
1399
    @classmethod
1400
    def get_comic_info(cls, soup, link):
1401
        """Get information about a particular comics."""
1402
        url = cls.get_url_from_archive_element(link)
1403
        title = link.string
1404
        year, month, day = [int(s) for s in cls.comic_link_re.match(url).groups()]
1405
        img = soup.find('div', id='comic').find('img')
1406
        assert img['alt'] == title
1407
        return {
1408
            'title': title,
1409
            'day': day,
1410
            'month': month,
1411
            'year': year,
1412
            'img': [img['src']],
1413
        }
1414
1415
1416
class CalvinAndHobbes(GenericComic):
@@ 1453-1474 (lines=22) @@
1450
1451
1452
class AbstruseGoose(GenericListableComic):
1453
    """Class to retrieve AbstruseGoose Comics."""
1454
    name = 'abstruse'
1455
    long_name = 'Abstruse Goose'
1456
    url = 'http://abstrusegoose.com'
1457
    get_url_from_archive_element = get_href
1458
    comic_url_re = re.compile('^%s/([0-9]*)$' % url)
1459
    comic_img_re = re.compile('^%s/strips/.*' % url)
1460
1461
    @classmethod
1462
    def get_archive_elements(cls):
1463
        archive_url = urljoin_wrapper(cls.url, 'archive')
1464
        return get_soup_at_url(archive_url).find_all('a', href=cls.comic_url_re)
1465
1466
    @classmethod
1467
    def get_comic_info(cls, soup, archive_elt):
1468
        comic_url = cls.get_url_from_archive_element(archive_elt)
1469
        num = int(cls.comic_url_re.match(comic_url).groups()[0])
1470
        return {
1471
            'num': num,
1472
            'title': archive_elt.string,
1473
            'img': [soup.find('img', src=cls.comic_img_re)['src']]
1474
        }
1475
1476
1477
class PhDComics(GenericNavigableComic):
@@ 2063-2086 (lines=24) @@
2060
2061
2062
class PoorlyDrawnLines(GenericListableComic):
2063
    """Class to retrieve Poorly Drawn Lines comics."""
2064
    # Also on http://pdlcomics.tumblr.com
2065
    name = 'poorlydrawn'
2066
    long_name = 'Poorly Drawn Lines'
2067
    url = 'https://www.poorlydrawnlines.com'
2068
    _categories = ('POORLYDRAWN', )
2069
    get_url_from_archive_element = get_href
2070
2071
    @classmethod
2072
    def get_comic_info(cls, soup, link):
2073
        """Get information about a particular comics."""
2074
        imgs = soup.find('div', class_='post').find_all('img')
2075
        assert len(imgs) <= 1
2076
        return {
2077
            'img': [i['src'] for i in imgs],
2078
            'title': imgs[0].get('title', "") if imgs else "",
2079
        }
2080
2081
    @classmethod
2082
    def get_archive_elements(cls):
2083
        archive_url = urljoin_wrapper(cls.url, 'archive')
2084
        url_re = re.compile('^%s/comic/.' % cls.url)
2085
        return reversed(get_soup_at_url(archive_url).find_all('a', href=url_re))
2086
2087
2088
class LoadingComics(GenericNavigableComic):
2089
    """Class to retrieve Loading Artist comics."""
@@ 1048-1069 (lines=22) @@
1045
            'year': day.year
1046
        }
1047
1048
1049
class PerryBibleFellowship(GenericListableComic):  # Is now navigable too
1050
    """Class to retrieve Perry Bible Fellowship comics."""
1051
    name = 'pbf'
1052
    long_name = 'Perry Bible Fellowship'
1053
    url = 'http://pbfcomics.com'
1054
    get_url_from_archive_element = join_cls_url_to_href
1055
1056
    @classmethod
1057
    def get_archive_elements(cls):
1058
        soup = get_soup_at_url(cls.url)
1059
        thumbnails = soup.find('div', id='all_thumbnails')
1060
        return reversed(thumbnails.find_all('a'))
1061
1062
    @classmethod
1063
    def get_comic_info(cls, soup, link):
1064
        """Get information about a particular comics."""
1065
        name = soup.find('meta', property='og:title')['content']
1066
        imgs = soup.find_all('meta', property='og:image')
1067
        assert len(imgs) == 1
1068
        return {
1069
            'name': name,
1070
            'img': [i['content'] for i in imgs],
1071
        }
1072