@@ 2010-2028 (lines=19) @@ | ||
2007 | archive_url = urljoin_wrapper(cls.url, 'archives/') |
|
2008 | return reversed(get_soup_at_url(archive_url).find_all('td', class_='archive-title')) |
|
2009 | ||
2010 | @classmethod |
|
2011 | def get_url_from_archive_element(cls, td): |
|
2012 | return td.find('a')['href'] |
|
2013 | ||
2014 | @classmethod |
|
2015 | def get_comic_info(cls, soup, td): |
|
2016 | """Get information about a particular comics.""" |
|
2017 | url = cls.get_url_from_archive_element(td) |
|
2018 | title = td.find('a').string |
|
2019 | month_and_day = td.previous_sibling.string |
|
2020 | link_re = re.compile('^%s/([0-9]+)/' % cls.url) |
|
2021 | year = link_re.match(url).groups()[0] |
|
2022 | date_str = month_and_day + ' ' + year |
|
2023 | day = string_to_date(date_str, '%b %d %Y') |
|
2024 | imgs = [soup.find('div', id='comic').find('img')] |
|
2025 | assert len(imgs) == 1, imgs |
|
2026 | assert all(i['title'] == i['alt'] == title for i in imgs) |
|
2027 | return { |
|
2028 | 'month': day.month, |
|
2029 | 'year': day.year, |
|
2030 | 'day': day.day, |
|
2031 | 'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs], |
|
@@ 2418-2437 (lines=20) @@ | ||
2415 | @classmethod |
|
2416 | def get_url_from_archive_element(cls, tr): |
|
2417 | """Get url corresponding to an archive element.""" |
|
2418 | _, td_comic, td_date, _ = tr.find_all('td') |
|
2419 | link = td_comic.find('a') |
|
2420 | return urljoin_wrapper(cls.url, link['href']) |
|
2421 | ||
2422 | @classmethod |
|
2423 | def get_comic_info(cls, soup, tr): |
|
2424 | """Get information about a particular comics.""" |
|
2425 | td_num, td_comic, td_date, _ = tr.find_all('td') |
|
2426 | num = int(td_num.string) |
|
2427 | link = td_comic.find('a') |
|
2428 | title = link.string |
|
2429 | imgs = soup.find_all('img', id='comic_image') |
|
2430 | date_str = td_date.string |
|
2431 | day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y, %I:%M %p") |
|
2432 | assert len(imgs) == 1, imgs |
|
2433 | assert all(i.get('alt') == i.get('title') for i in imgs) |
|
2434 | return { |
|
2435 | 'num': num, |
|
2436 | 'title': title, |
|
2437 | 'alt': imgs[0].get('alt', ''), |
|
2438 | 'img': [i['src'] for i in imgs], |
|
2439 | 'month': day.month, |
|
2440 | 'year': day.year, |