|
@@ 2010-2028 (lines=19) @@
|
| 2007 |
|
def get_url_from_archive_element(cls, td): |
| 2008 |
|
return td.find('a')['href'] |
| 2009 |
|
|
| 2010 |
|
@classmethod |
| 2011 |
|
def get_comic_info(cls, soup, td): |
| 2012 |
|
"""Get information about a particular comics.""" |
| 2013 |
|
url = cls.get_url_from_archive_element(td) |
| 2014 |
|
title = td.find('a').string |
| 2015 |
|
month_and_day = td.previous_sibling.string |
| 2016 |
|
link_re = re.compile('^%s/([0-9]+)/' % cls.url) |
| 2017 |
|
year = link_re.match(url).groups()[0] |
| 2018 |
|
date_str = month_and_day + ' ' + year |
| 2019 |
|
day = string_to_date(date_str, '%b %d %Y') |
| 2020 |
|
imgs = [soup.find('div', id='comic').find('img')] |
| 2021 |
|
assert len(imgs) == 1, imgs |
| 2022 |
|
assert all(i['title'] == i['alt'] == title for i in imgs) |
| 2023 |
|
return { |
| 2024 |
|
'month': day.month, |
| 2025 |
|
'year': day.year, |
| 2026 |
|
'day': day.day, |
| 2027 |
|
'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs], |
| 2028 |
|
'title': title, |
| 2029 |
|
} |
| 2030 |
|
|
| 2031 |
|
|
|
@@ 2418-2437 (lines=20) @@
|
| 2415 |
|
link = td_comic.find('a') |
| 2416 |
|
return urljoin_wrapper(cls.url, link['href']) |
| 2417 |
|
|
| 2418 |
|
@classmethod |
| 2419 |
|
def get_comic_info(cls, soup, tr): |
| 2420 |
|
"""Get information about a particular comics.""" |
| 2421 |
|
td_num, td_comic, td_date, _ = tr.find_all('td') |
| 2422 |
|
num = int(td_num.string) |
| 2423 |
|
link = td_comic.find('a') |
| 2424 |
|
title = link.string |
| 2425 |
|
imgs = soup.find_all('img', id='comic_image') |
| 2426 |
|
date_str = td_date.string |
| 2427 |
|
day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y, %I:%M %p") |
| 2428 |
|
assert len(imgs) == 1, imgs |
| 2429 |
|
assert all(i.get('alt') == i.get('title') for i in imgs) |
| 2430 |
|
return { |
| 2431 |
|
'num': num, |
| 2432 |
|
'title': title, |
| 2433 |
|
'alt': imgs[0].get('alt', ''), |
| 2434 |
|
'img': [i['src'] for i in imgs], |
| 2435 |
|
'month': day.month, |
| 2436 |
|
'year': day.year, |
| 2437 |
|
'day': day.day, |
| 2438 |
|
} |
| 2439 |
|
|
| 2440 |
|
|