|
@@ 3191-3214 (lines=24) @@
|
| 3188 |
|
first_url = 'http://geek-and-poke.com/geekandpoke/2006/8/27/a-new-place-for-a-not-so-old-blog.html' |
| 3189 |
|
|
| 3190 |
|
@classmethod |
| 3191 |
|
def get_navi_link(cls, last_soup, next_): |
| 3192 |
|
"""Get link to next or previous comic.""" |
| 3193 |
|
return last_soup.find('a', class_='prev-item' if next_ else 'next-item') |
| 3194 |
|
|
| 3195 |
|
@classmethod |
| 3196 |
|
def get_comic_info(cls, soup, link): |
| 3197 |
|
"""Get information about a particular comics.""" |
| 3198 |
|
title = soup.find('meta', property='og:title')['content'] |
| 3199 |
|
desc = soup.find('meta', property='og:description')['content'] |
| 3200 |
|
date_str = soup.find('time', class_='published')['datetime'] |
| 3201 |
|
day = string_to_date(date_str, "%Y-%m-%d") |
| 3202 |
|
author = soup.find('a', rel='author').string |
| 3203 |
|
div_content = (soup.find('div', class_="body entry-content") or |
| 3204 |
|
soup.find('div', class_="special-content")) |
| 3205 |
|
imgs = div_content.find_all('img') |
| 3206 |
|
imgs = [i for i in imgs if i.get('src') is not None] |
| 3207 |
|
assert all('title' not in i or i['alt'] == i['title'] for i in imgs) |
| 3208 |
|
alt = imgs[0].get('alt', "") if imgs else [] |
| 3209 |
|
return { |
| 3210 |
|
'title': title, |
| 3211 |
|
'alt': alt, |
| 3212 |
|
'description': desc, |
| 3213 |
|
'author': author, |
| 3214 |
|
'day': day.day, |
| 3215 |
|
'month': day.month, |
| 3216 |
|
'year': day.year, |
| 3217 |
|
'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs], |
|
@@ 3124-3147 (lines=24) @@
|
| 3121 |
|
long_name = 'Make it stoopid' |
| 3122 |
|
url = 'http://makeitstoopid.com/comic.php' |
| 3123 |
|
|
| 3124 |
|
@classmethod |
| 3125 |
|
def get_nav(cls, soup): |
| 3126 |
|
"""Get the navigation elements from soup object.""" |
| 3127 |
|
cnav = soup.find_all(class_='cnav') |
| 3128 |
|
nav1, nav2 = cnav[:5], cnav[5:] |
| 3129 |
|
assert nav1 == nav2 |
| 3130 |
|
# begin, prev, archive, next_, end = nav1 |
| 3131 |
|
return [None if i.get('href') is None else i for i in nav1] |
| 3132 |
|
|
| 3133 |
|
@classmethod |
| 3134 |
|
def get_first_comic_link(cls): |
| 3135 |
|
"""Get link to first comics.""" |
| 3136 |
|
return cls.get_nav(get_soup_at_url(cls.url))[0] |
| 3137 |
|
|
| 3138 |
|
@classmethod |
| 3139 |
|
def get_navi_link(cls, last_soup, next_): |
| 3140 |
|
"""Get link to next or previous comic.""" |
| 3141 |
|
return cls.get_nav(last_soup)[3 if next_ else 1] |
| 3142 |
|
|
| 3143 |
|
@classmethod |
| 3144 |
|
def get_comic_info(cls, soup, link): |
| 3145 |
|
"""Get information about a particular comics.""" |
| 3146 |
|
title = link['title'] |
| 3147 |
|
imgs = soup.find_all('img', id='comicimg') |
| 3148 |
|
return { |
| 3149 |
|
'title': title, |
| 3150 |
|
'img': [i['src'] for i in imgs], |