|
@@ 782-797 (lines=16) @@
|
| 779 |
|
get_navi_link = get_link_rel_next |
| 780 |
|
get_url_from_link = join_cls_url_to_href |
| 781 |
|
|
| 782 |
|
@classmethod |
| 783 |
|
def get_comic_info(cls, soup, link): |
| 784 |
|
"""Get information about a particular comics.""" |
| 785 |
|
short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url) |
| 786 |
|
short_url = cls.get_url_from_link(soup.find('link', rel='shortlink')) |
| 787 |
|
num = int(short_url_re.match(short_url).groups()[0]) |
| 788 |
|
imgs = soup.find('div', id='comic').find_all('img') |
| 789 |
|
assert len(imgs) == 1, imgs |
| 790 |
|
title = imgs[0]['alt'] |
| 791 |
|
title2 = imgs[0]['title'] |
| 792 |
|
return { |
| 793 |
|
'short_url': short_url, |
| 794 |
|
'title': title, |
| 795 |
|
'title2': title2, |
| 796 |
|
'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs], |
| 797 |
|
'num': num, |
| 798 |
|
} |
| 799 |
|
|
| 800 |
|
|
|
@@ 5013-5029 (lines=17) @@
|
| 5010 |
|
class HorovitzComics(GenericDeletedComic, GenericListableComic): |
| 5011 |
|
"""Generic class to handle the logic common to the different comics from Horovitz.""" |
| 5012 |
|
# Also on https://horovitzcomics.tumblr.com |
| 5013 |
|
url = 'http://www.horovitzcomics.com' |
| 5014 |
|
_categories = ('HOROVITZ', ) |
| 5015 |
|
img_re = re.compile('.*comics/([0-9]*)/([0-9]*)/([0-9]*)/.*$') |
| 5016 |
|
link_re = NotImplemented |
| 5017 |
|
get_url_from_archive_element = join_cls_url_to_href |
| 5018 |
|
|
| 5019 |
|
@classmethod |
| 5020 |
|
def get_comic_info(cls, soup, link): |
| 5021 |
|
"""Get information about a particular comics.""" |
| 5022 |
|
href = link['href'] |
| 5023 |
|
num = int(cls.link_re.match(href).groups()[0]) |
| 5024 |
|
title = link.string |
| 5025 |
|
imgs = soup.find_all('img', id='comic') |
| 5026 |
|
assert len(imgs) == 1, imgs |
| 5027 |
|
year, month, day = [int(s) |
| 5028 |
|
for s in cls.img_re.match(imgs[0]['src']).groups()] |
| 5029 |
|
return { |
| 5030 |
|
'title': title, |
| 5031 |
|
'day': day, |
| 5032 |
|
'month': month, |