@@ 782-797 (lines=16) @@ | ||
779 | get_navi_link = get_link_rel_next |
|
780 | get_url_from_link = join_cls_url_to_href |
|
781 | ||
782 | @classmethod |
|
783 | def get_comic_info(cls, soup, link): |
|
784 | """Get information about a particular comics.""" |
|
785 | short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url) |
|
786 | short_url = cls.get_url_from_link(soup.find('link', rel='shortlink')) |
|
787 | num = int(short_url_re.match(short_url).groups()[0]) |
|
788 | imgs = soup.find('div', id='comic').find_all('img') |
|
789 | assert len(imgs) == 1, imgs |
|
790 | title = imgs[0]['alt'] |
|
791 | title2 = imgs[0]['title'] |
|
792 | return { |
|
793 | 'short_url': short_url, |
|
794 | 'title': title, |
|
795 | 'title2': title2, |
|
796 | 'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs], |
|
797 | 'num': num, |
|
798 | } |
|
799 | ||
800 | ||
@@ 5013-5029 (lines=17) @@ | ||
5010 | @classmethod |
|
5011 | def get_comic_info(cls, soup, link): |
|
5012 | """Get information about a particular comics.""" |
|
5013 | href = link['href'] |
|
5014 | num = int(cls.link_re.match(href).groups()[0]) |
|
5015 | title = link.string |
|
5016 | imgs = soup.find_all('img', id='comic') |
|
5017 | assert len(imgs) == 1, imgs |
|
5018 | year, month, day = [int(s) |
|
5019 | for s in cls.img_re.match(imgs[0]['src']).groups()] |
|
5020 | return { |
|
5021 | 'title': title, |
|
5022 | 'day': day, |
|
5023 | 'month': month, |
|
5024 | 'year': year, |
|
5025 | 'img': [i['src'] for i in imgs], |
|
5026 | 'num': num, |
|
5027 | } |
|
5028 | ||
5029 | @classmethod |
|
5030 | def get_archive_elements(cls): |
|
5031 | archive_url = 'http://www.horovitzcomics.com/comics/archive/' |
|
5032 | return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.link_re)) |