@@ 3191-3214 (lines=24) @@ | ||
3188 | first_url = 'http://geek-and-poke.com/geekandpoke/2006/8/27/a-new-place-for-a-not-so-old-blog.html' |
|
3189 | ||
3190 | @classmethod |
|
3191 | def get_navi_link(cls, last_soup, next_): |
|
3192 | """Get link to next or previous comic.""" |
|
3193 | return last_soup.find('a', class_='prev-item' if next_ else 'next-item') |
|
3194 | ||
3195 | @classmethod |
|
3196 | def get_comic_info(cls, soup, link): |
|
3197 | """Get information about a particular comics.""" |
|
3198 | title = soup.find('meta', property='og:title')['content'] |
|
3199 | desc = soup.find('meta', property='og:description')['content'] |
|
3200 | date_str = soup.find('time', class_='published')['datetime'] |
|
3201 | day = string_to_date(date_str, "%Y-%m-%d") |
|
3202 | author = soup.find('a', rel='author').string |
|
3203 | div_content = (soup.find('div', class_="body entry-content") or |
|
3204 | soup.find('div', class_="special-content")) |
|
3205 | imgs = div_content.find_all('img') |
|
3206 | imgs = [i for i in imgs if i.get('src') is not None] |
|
3207 | assert all('title' not in i or i['alt'] == i['title'] for i in imgs) |
|
3208 | alt = imgs[0].get('alt', "") if imgs else [] |
|
3209 | return { |
|
3210 | 'title': title, |
|
3211 | 'alt': alt, |
|
3212 | 'description': desc, |
|
3213 | 'author': author, |
|
3214 | 'day': day.day, |
|
3215 | 'month': day.month, |
|
3216 | 'year': day.year, |
|
3217 | 'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs], |
|
@@ 3124-3147 (lines=24) @@ | ||
3121 | long_name = 'Make it stoopid' |
|
3122 | url = 'http://makeitstoopid.com/comic.php' |
|
3123 | ||
3124 | @classmethod |
|
3125 | def get_nav(cls, soup): |
|
3126 | """Get the navigation elements from soup object.""" |
|
3127 | cnav = soup.find_all(class_='cnav') |
|
3128 | nav1, nav2 = cnav[:5], cnav[5:] |
|
3129 | assert nav1 == nav2 |
|
3130 | # begin, prev, archive, next_, end = nav1 |
|
3131 | return [None if i.get('href') is None else i for i in nav1] |
|
3132 | ||
3133 | @classmethod |
|
3134 | def get_first_comic_link(cls): |
|
3135 | """Get link to first comics.""" |
|
3136 | return cls.get_nav(get_soup_at_url(cls.url))[0] |
|
3137 | ||
3138 | @classmethod |
|
3139 | def get_navi_link(cls, last_soup, next_): |
|
3140 | """Get link to next or previous comic.""" |
|
3141 | return cls.get_nav(last_soup)[3 if next_ else 1] |
|
3142 | ||
3143 | @classmethod |
|
3144 | def get_comic_info(cls, soup, link): |
|
3145 | """Get information about a particular comics.""" |
|
3146 | title = link['title'] |
|
3147 | imgs = soup.find_all('img', id='comicimg') |
|
3148 | return { |
|
3149 | 'title': title, |
|
3150 | 'img': [i['src'] for i in imgs], |