Code Duplication    Length = 22-27 lines in 2 locations

comics.py 2 locations

@@ 1386-1412 (lines=27) @@
1383
        # first link is random -> skip it
1384
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re)[1:])
1385
1386
    @classmethod
1387
    def get_comic_info(cls, soup, link):
1388
        """Get information about a particular comics."""
1389
        url = cls.get_url_from_archive_element(link)
1390
        num = int(cls.comic_link_re.match(url).groups()[0])
1391
        date_str = link.string
1392
        text = link.next_sibling.string
1393
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1394
        comic_img_re = re.compile('^%s/comics/' % cls.url)
1395
        img = soup.find('img', src=comic_img_re)
1396
        return {
1397
            'month': day.month,
1398
            'year': day.year,
1399
            'day': day.day,
1400
            'img': [img.get('src')],
1401
            'title': img.get('title'),
1402
            'text': text,
1403
            'num': num,
1404
        }
1405
1406
1407
class ButterSafe(GenericListableComic):
1408
    """Class to retrieve Butter Safe comics."""
1409
    name = 'butter'
1410
    long_name = 'ButterSafe'
1411
    url = 'http://buttersafe.com'
1412
    get_url_from_archive_element = get_href
1413
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)
1414
1415
    @classmethod
@@ 1452-1473 (lines=22) @@
1449
            last_comic) if last_comic else date(1985, 11, 1)
1450
        link_re = re.compile('^([0-9]*)/([0-9]*)/')
1451
        img_re = re.compile('')
1452
        for link in get_soup_at_url(cls.url).find_all('a', href=link_re):
1453
            url = link['href']
1454
            year, month = link_re.match(url).groups()
1455
            if date(int(year), int(month), 1) + timedelta(days=31) >= last_date:
1456
                img_re = re.compile('^%s%s([0-9]*)' % (year, month))
1457
                month_url = urljoin_wrapper(cls.url, url)
1458
                for img in get_soup_at_url(month_url).find_all('img', src=img_re):
1459
                    img_src = img['src']
1460
                    day = int(img_re.match(img_src).groups()[0])
1461
                    comic_date = date(int(year), int(month), day)
1462
                    if comic_date > last_date:
1463
                        yield {
1464
                            'url': month_url,
1465
                            'year': int(year),
1466
                            'month': int(month),
1467
                            'day': int(day),
1468
                            'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)],
1469
                        }
1470
                        last_date = comic_date
1471
1472
1473
class AbstruseGoose(GenericListableComic):
1474
    """Class to retrieve AbstruseGoose Comics."""
1475
    name = 'abstruse'
1476
    long_name = 'Abstruse Goose'