|
@@ 4666-4682 (lines=17) @@
|
| 4663 |
|
"""Get information about a particular comics.""" |
| 4664 |
|
href = link['href'] |
| 4665 |
|
num = int(cls.link_re.match(href).groups()[0]) |
| 4666 |
|
title = link.string |
| 4667 |
|
imgs = soup.find_all('img', id='comic') |
| 4668 |
|
assert len(imgs) == 1 |
| 4669 |
|
year, month, day = [int(s) |
| 4670 |
|
for s in cls.img_re.match(imgs[0]['src']).groups()] |
| 4671 |
|
return { |
| 4672 |
|
'title': title, |
| 4673 |
|
'day': day, |
| 4674 |
|
'month': month, |
| 4675 |
|
'year': year, |
| 4676 |
|
'img': [i['src'] for i in imgs], |
| 4677 |
|
'num': num, |
| 4678 |
|
} |
| 4679 |
|
|
| 4680 |
|
@classmethod |
| 4681 |
|
def get_archive_elements(cls): |
| 4682 |
|
archive_url = 'http://www.horovitzcomics.com/comics/archive/' |
| 4683 |
|
return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.link_re)) |
| 4684 |
|
|
| 4685 |
|
|
|
@@ 763-778 (lines=16) @@
|
| 760 |
|
get_first_comic_link = get_div_navfirst_a |
| 761 |
|
get_navi_link = get_link_rel_next |
| 762 |
|
get_url_from_link = join_cls_url_to_href |
| 763 |
|
|
| 764 |
|
@classmethod |
| 765 |
|
def get_comic_info(cls, soup, link): |
| 766 |
|
"""Get information about a particular comics.""" |
| 767 |
|
short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url) |
| 768 |
|
short_url = cls.get_url_from_link(soup.find('link', rel='shortlink')) |
| 769 |
|
num = int(short_url_re.match(short_url).groups()[0]) |
| 770 |
|
imgs = soup.find('div', id='comic').find_all('img') |
| 771 |
|
assert len(imgs) == 1 |
| 772 |
|
title = imgs[0]['alt'] |
| 773 |
|
title2 = imgs[0]['title'] |
| 774 |
|
return { |
| 775 |
|
'short_url': short_url, |
| 776 |
|
'title': title, |
| 777 |
|
'title2': title2, |
| 778 |
|
'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs], |
| 779 |
|
'num': num, |
| 780 |
|
} |
| 781 |
|
|