Code Duplication    Length = 22-27 lines in 3 locations

comics.py 3 locations

@@ 1361-1387 (lines=27) @@
1358
            'title': img.get('title'),
1359
            'text': text,
1360
            'num': num,
1361
        }
1362
1363
1364
class ButterSafe(GenericListableComic):
1365
    """Class to retrieve Butter Safe comics."""
1366
    name = 'butter'
1367
    long_name = 'ButterSafe'
1368
    url = 'http://buttersafe.com'
1369
    get_url_from_archive_element = get_href
1370
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)
1371
1372
    @classmethod
1373
    def get_archive_elements(cls):
1374
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1375
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
1376
1377
    @classmethod
1378
    def get_comic_info(cls, soup, link):
1379
        """Get information about a particular comics."""
1380
        url = cls.get_url_from_archive_element(link)
1381
        title = link.string
1382
        year, month, day = [int(s) for s in cls.comic_link_re.match(url).groups()]
1383
        img = soup.find('div', id='comic').find('img')
1384
        assert img['alt'] == title
1385
        return {
1386
            'title': title,
1387
            'day': day,
1388
            'month': month,
1389
            'year': year,
1390
            'img': [img['src']],
@@ 1427-1448 (lines=22) @@
1424
                            'day': int(day),
1425
                            'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)],
1426
                        }
1427
                        last_date = comic_date
1428
1429
1430
class AbstruseGoose(GenericListableComic):
1431
    """Class to retrieve AbstruseGoose Comics."""
1432
    name = 'abstruse'
1433
    long_name = 'Abstruse Goose'
1434
    url = 'http://abstrusegoose.com'
1435
    get_url_from_archive_element = get_href
1436
    comic_url_re = re.compile('^%s/([0-9]*)$' % url)
1437
    comic_img_re = re.compile('^%s/strips/.*' % url)
1438
1439
    @classmethod
1440
    def get_archive_elements(cls):
1441
        archive_url = urljoin_wrapper(cls.url, 'archive')
1442
        return get_soup_at_url(archive_url).find_all('a', href=cls.comic_url_re)
1443
1444
    @classmethod
1445
    def get_comic_info(cls, soup, archive_elt):
1446
        comic_url = cls.get_url_from_archive_element(archive_elt)
1447
        num = int(cls.comic_url_re.match(comic_url).groups()[0])
1448
        return {
1449
            'num': num,
1450
            'title': archive_elt.string,
1451
            'img': [soup.find('img', src=cls.comic_img_re)['src']]
@@ 2037-2060 (lines=24) @@
2034
            'title': title,
2035
            'alt': alt,
2036
            'author': author,
2037
        }
2038
2039
2040
class PoorlyDrawnLines(GenericListableComic):
2041
    """Class to retrieve Poorly Drawn Lines comics."""
2042
    # Also on http://pdlcomics.tumblr.com
2043
    name = 'poorlydrawn'
2044
    long_name = 'Poorly Drawn Lines'
2045
    url = 'http://www.poorlydrawnlines.com'
2046
    _categories = ('POORLYDRAWN', )
2047
    get_url_from_archive_element = get_href
2048
2049
    @classmethod
2050
    def get_comic_info(cls, soup, link):
2051
        """Get information about a particular comics."""
2052
        imgs = soup.find('div', class_='post').find_all('img')
2053
        assert len(imgs) <= 1
2054
        return {
2055
            'img': [i['src'] for i in imgs],
2056
            'title': imgs[0].get('title', "") if imgs else "",
2057
        }
2058
2059
    @classmethod
2060
    def get_archive_elements(cls):
2061
        archive_url = urljoin_wrapper(cls.url, 'archive')
2062
        url_re = re.compile('^%s/comic/.' % cls.url)
2063
        return reversed(get_soup_at_url(archive_url).find_all('a', href=url_re))