Code Duplication    Length = 22-27 lines in 2 locations

comics.py 2 locations

@@ 1327-1353 (lines=27) @@
1324
        comic_img_re = re.compile('^%s/comics/' % cls.url)
1325
        img = soup.find('img', src=comic_img_re)
1326
        return {
1327
            'month': day.month,
1328
            'year': day.year,
1329
            'day': day.day,
1330
            'img': [img.get('src')],
1331
            'title': img.get('title'),
1332
            'text': text,
1333
            'num': num,
1334
        }
1335
1336
1337
class ButterSafe(GenericListableComic):
1338
    """Class to retrieve Butter Safe comics."""
1339
    name = 'butter'
1340
    long_name = 'ButterSafe'
1341
    url = 'http://buttersafe.com'
1342
    get_url_from_archive_element = get_href
1343
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)
1344
1345
    @classmethod
1346
    def get_archive_elements(cls):
1347
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1348
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
1349
1350
    @classmethod
1351
    def get_comic_info(cls, soup, link):
1352
        """Get information about a particular comics."""
1353
        url = cls.get_url_from_archive_element(link)
1354
        title = link.string
1355
        year, month, day = [int(s) for s in cls.comic_link_re.match(url).groups()]
1356
        img = soup.find('div', id='comic').find('img')
@@ 1393-1414 (lines=22) @@
1390
                    day = int(img_re.match(img_src).groups()[0])
1391
                    comic_date = date(int(year), int(month), day)
1392
                    if comic_date > last_date:
1393
                        yield {
1394
                            'url': month_url,
1395
                            'year': int(year),
1396
                            'month': int(month),
1397
                            'day': int(day),
1398
                            'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)],
1399
                        }
1400
                        last_date = comic_date
1401
1402
1403
class AbstruseGoose(GenericListableComic):
1404
    """Class to retrieve AbstruseGoose Comics."""
1405
    name = 'abstruse'
1406
    long_name = 'Abstruse Goose'
1407
    url = 'http://abstrusegoose.com'
1408
    get_url_from_archive_element = get_href
1409
    comic_url_re = re.compile('^%s/([0-9]*)$' % url)
1410
    comic_img_re = re.compile('^%s/strips/.*' % url)
1411
1412
    @classmethod
1413
    def get_archive_elements(cls):
1414
        archive_url = urljoin_wrapper(cls.url, 'archive')
1415
        return get_soup_at_url(archive_url).find_all('a', href=cls.comic_url_re)
1416
1417
    @classmethod