Code Duplication    Length = 22-27 lines in 4 locations

comics.py 4 locations

@@ 1387-1413 (lines=27) @@
1384
1385
class ButterSafe(GenericListableComic):
1386
    """Class to retrieve Butter Safe comics."""
1387
    name = 'butter'
1388
    long_name = 'ButterSafe'
1389
    url = 'http://buttersafe.com'
1390
    get_url_from_archive_element = get_href
1391
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)
1392
1393
    @classmethod
1394
    def get_archive_elements(cls):
1395
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1396
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
1397
1398
    @classmethod
1399
    def get_comic_info(cls, soup, link):
1400
        """Get information about a particular comics."""
1401
        url = cls.get_url_from_archive_element(link)
1402
        title = link.string
1403
        year, month, day = [int(s) for s in cls.comic_link_re.match(url).groups()]
1404
        img = soup.find('div', id='comic').find('img')
1405
        assert img['alt'] == title
1406
        return {
1407
            'title': title,
1408
            'day': day,
1409
            'month': month,
1410
            'year': year,
1411
            'img': [img['src']],
1412
        }
1413
1414
1415
class CalvinAndHobbes(GenericComic):
1416
    """Class to retrieve Calvin and Hobbes comics."""
@@ 1453-1474 (lines=22) @@
1450
1451
class AbstruseGoose(GenericListableComic):
1452
    """Class to retrieve AbstruseGoose Comics."""
1453
    name = 'abstruse'
1454
    long_name = 'Abstruse Goose'
1455
    url = 'http://abstrusegoose.com'
1456
    get_url_from_archive_element = get_href
1457
    comic_url_re = re.compile('^%s/([0-9]*)$' % url)
1458
    comic_img_re = re.compile('^%s/strips/.*' % url)
1459
1460
    @classmethod
1461
    def get_archive_elements(cls):
1462
        archive_url = urljoin_wrapper(cls.url, 'archive')
1463
        return get_soup_at_url(archive_url).find_all('a', href=cls.comic_url_re)
1464
1465
    @classmethod
1466
    def get_comic_info(cls, soup, archive_elt):
1467
        comic_url = cls.get_url_from_archive_element(archive_elt)
1468
        num = int(cls.comic_url_re.match(comic_url).groups()[0])
1469
        return {
1470
            'num': num,
1471
            'title': archive_elt.string,
1472
            'img': [soup.find('img', src=cls.comic_img_re)['src']]
1473
        }
1474
1475
1476
class PhDComics(GenericNavigableComic):
1477
    """Class to retrieve PHD Comics."""
@@ 2063-2086 (lines=24) @@
2060
2061
class PoorlyDrawnLines(GenericListableComic):
2062
    """Class to retrieve Poorly Drawn Lines comics."""
2063
    # Also on http://pdlcomics.tumblr.com
2064
    name = 'poorlydrawn'
2065
    long_name = 'Poorly Drawn Lines'
2066
    url = 'https://www.poorlydrawnlines.com'
2067
    _categories = ('POORLYDRAWN', )
2068
    get_url_from_archive_element = get_href
2069
2070
    @classmethod
2071
    def get_comic_info(cls, soup, link):
2072
        """Get information about a particular comics."""
2073
        imgs = soup.find('div', class_='post').find_all('img')
2074
        assert len(imgs) <= 1
2075
        return {
2076
            'img': [i['src'] for i in imgs],
2077
            'title': imgs[0].get('title', "") if imgs else "",
2078
        }
2079
2080
    @classmethod
2081
    def get_archive_elements(cls):
2082
        archive_url = urljoin_wrapper(cls.url, 'archive')
2083
        url_re = re.compile('^%s/comic/.' % cls.url)
2084
        return reversed(get_soup_at_url(archive_url).find_all('a', href=url_re))
2085
2086
2087
class LoadingComics(GenericNavigableComic):
2088
    """Class to retrieve Loading Artist comics."""
2089
    name = 'loadingartist'
@@ 1048-1069 (lines=22) @@
1045
        }
1046
1047
1048
class PerryBibleFellowship(GenericListableComic):  # Is now navigable too
1049
    """Class to retrieve Perry Bible Fellowship comics."""
1050
    name = 'pbf'
1051
    long_name = 'Perry Bible Fellowship'
1052
    url = 'http://pbfcomics.com'
1053
    get_url_from_archive_element = join_cls_url_to_href
1054
1055
    @classmethod
1056
    def get_archive_elements(cls):
1057
        soup = get_soup_at_url(cls.url)
1058
        thumbnails = soup.find('div', id='all_thumbnails')
1059
        return reversed(thumbnails.find_all('a'))
1060
1061
    @classmethod
1062
    def get_comic_info(cls, soup, link):
1063
        """Get information about a particular comics."""
1064
        name = soup.find('meta', property='og:title')['content']
1065
        imgs = soup.find_all('meta', property='og:image')
1066
        assert len(imgs) == 1
1067
        return {
1068
            'name': name,
1069
            'img': [i['content'] for i in imgs],
1070
        }
1071
1072