Code Duplication    Length = 22-27 lines in 2 locations

comics.py 2 locations

@@ 1386-1412 (lines=27) @@
1383
    @classmethod
1384
    def get_comic_info(cls, soup, link):
1385
        """Get information about a particular comics."""
1386
        url = cls.get_url_from_archive_element(link)
1387
        num = int(cls.comic_link_re.match(url).groups()[0])
1388
        date_str = link.string
1389
        text = link.next_sibling.string
1390
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1391
        comic_img_re = re.compile('^%s/comics/' % cls.url)
1392
        img = soup.find('img', src=comic_img_re)
1393
        return {
1394
            'month': day.month,
1395
            'year': day.year,
1396
            'day': day.day,
1397
            'img': [img.get('src')],
1398
            'title': img.get('title'),
1399
            'text': text,
1400
            'num': num,
1401
        }
1402
1403
1404
class ButterSafe(GenericListableComic):
1405
    """Class to retrieve Butter Safe comics."""
1406
    name = 'butter'
1407
    long_name = 'ButterSafe'
1408
    url = 'http://buttersafe.com'
1409
    get_url_from_archive_element = get_href
1410
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)
1411
1412
    @classmethod
1413
    def get_archive_elements(cls):
1414
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1415
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
@@ 1452-1473 (lines=22) @@
1449
        for link in get_soup_at_url(cls.url).find_all('a', href=link_re):
1450
            url = link['href']
1451
            year, month = link_re.match(url).groups()
1452
            if date(int(year), int(month), 1) + timedelta(days=31) >= last_date:
1453
                img_re = re.compile('^%s%s([0-9]*)' % (year, month))
1454
                month_url = urljoin_wrapper(cls.url, url)
1455
                for img in get_soup_at_url(month_url).find_all('img', src=img_re):
1456
                    img_src = img['src']
1457
                    day = int(img_re.match(img_src).groups()[0])
1458
                    comic_date = date(int(year), int(month), day)
1459
                    if comic_date > last_date:
1460
                        yield {
1461
                            'url': month_url,
1462
                            'year': int(year),
1463
                            'month': int(month),
1464
                            'day': int(day),
1465
                            'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)],
1466
                        }
1467
                        last_date = comic_date
1468
1469
1470
class AbstruseGoose(GenericListableComic):
1471
    """Class to retrieve AbstruseGoose Comics."""
1472
    name = 'abstruse'
1473
    long_name = 'Abstruse Goose'
1474
    url = 'http://abstrusegoose.com'
1475
    get_url_from_archive_element = get_href
1476
    comic_url_re = re.compile('^%s/([0-9]*)$' % url)