Code Duplication    Length = 22-27 lines in 2 locations

comics.py 2 locations

@@ 1327-1353 (lines=27) @@
1324
    long_name = 'ButterSafe'
1325
    url = 'http://buttersafe.com'
1326
    get_url_from_archive_element = get_href
1327
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)
1328
1329
    @classmethod
1330
    def get_archive_elements(cls):
1331
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1332
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
1333
1334
    @classmethod
1335
    def get_comic_info(cls, soup, link):
1336
        """Get information about a particular comics."""
1337
        url = cls.get_url_from_archive_element(link)
1338
        title = link.string
1339
        year, month, day = [int(s) for s in cls.comic_link_re.match(url).groups()]
1340
        img = soup.find('div', id='comic').find('img')
1341
        assert img['alt'] == title
1342
        return {
1343
            'title': title,
1344
            'day': day,
1345
            'month': month,
1346
            'year': year,
1347
            'img': [img['src']],
1348
        }
1349
1350
1351
class CalvinAndHobbes(GenericComic):
1352
    """Class to retrieve Calvin and Hobbes comics."""
1353
    # Also on http://www.gocomics.com/calvinandhobbes/
1354
    name = 'calvin'
1355
    long_name = 'Calvin and Hobbes'
1356
    # This is not through any official webpage but eh...
@@ 1393-1414 (lines=22) @@
1390
    long_name = 'Abstruse Goose'
1391
    url = 'http://abstrusegoose.com'
1392
    get_url_from_archive_element = get_href
1393
    comic_url_re = re.compile('^%s/([0-9]*)$' % url)
1394
    comic_img_re = re.compile('^%s/strips/.*' % url)
1395
1396
    @classmethod
1397
    def get_archive_elements(cls):
1398
        archive_url = urljoin_wrapper(cls.url, 'archive')
1399
        return get_soup_at_url(archive_url).find_all('a', href=cls.comic_url_re)
1400
1401
    @classmethod
1402
    def get_comic_info(cls, soup, archive_elt):
1403
        comic_url = cls.get_url_from_archive_element(archive_elt)
1404
        num = int(cls.comic_url_re.match(comic_url).groups()[0])
1405
        return {
1406
            'num': num,
1407
            'title': archive_elt.string,
1408
            'img': [soup.find('img', src=cls.comic_img_re)['src']]
1409
        }
1410
1411
1412
class PhDComics(GenericNavigableComic):
1413
    """Class to retrieve PHD Comics."""
1414
    name = 'phd'
1415
    long_name = 'PhD Comics'
1416
    url = 'http://phdcomics.com/comics/archive.php'
1417
    get_url_from_link = join_cls_url_to_href