|
@@ 3419-3442 (lines=24) @@
|
| 3416 |
|
date_str = soup.find('time', class_='published')['datetime'] |
| 3417 |
|
day = string_to_date(date_str, "%Y-%m-%d") |
| 3418 |
|
author = soup.find('a', rel='author').string |
| 3419 |
|
div_content = (soup.find('div', class_="body entry-content") or |
| 3420 |
|
soup.find('div', class_="special-content")) |
| 3421 |
|
imgs = div_content.find_all('img') |
| 3422 |
|
imgs = [i for i in imgs if i.get('src') is not None] |
| 3423 |
|
assert all('title' not in i or i['alt'] == i['title'] for i in imgs) |
| 3424 |
|
alt = imgs[0].get('alt', "") if imgs else [] |
| 3425 |
|
return { |
| 3426 |
|
'title': title, |
| 3427 |
|
'alt': alt, |
| 3428 |
|
'description': desc, |
| 3429 |
|
'author': author, |
| 3430 |
|
'day': day.day, |
| 3431 |
|
'month': day.month, |
| 3432 |
|
'year': day.year, |
| 3433 |
|
'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs], |
| 3434 |
|
} |
| 3435 |
|
|
| 3436 |
|
|
| 3437 |
|
class GloryOwlComix(GenericNavigableComic): |
| 3438 |
|
"""Class to retrieve Glory Owl comics.""" |
| 3439 |
|
name = 'gloryowl' |
| 3440 |
|
long_name = 'Glory Owl' |
| 3441 |
|
url = 'http://gloryowlcomix.blogspot.fr' |
| 3442 |
|
_categories = ('NSFW', 'FRANCAIS') |
| 3443 |
|
get_first_comic_link = simulate_first_link |
| 3444 |
|
first_url = 'http://gloryowlcomix.blogspot.fr/2013/02/1_7.html' |
| 3445 |
|
|
|
@@ 3352-3375 (lines=24) @@
|
| 3349 |
|
@classmethod |
| 3350 |
|
def get_navi_link(cls, last_soup, next_): |
| 3351 |
|
"""Get link to next or previous comic.""" |
| 3352 |
|
return last_soup.find('a', class_='next' if next_ else 'prev') |
| 3353 |
|
|
| 3354 |
|
@classmethod |
| 3355 |
|
def get_comic_info(cls, soup, link): |
| 3356 |
|
"""Get information about a particular comics.""" |
| 3357 |
|
title = soup.find('meta', property='og:title')['content'] |
| 3358 |
|
date_str = soup.find('time')["datetime"] |
| 3359 |
|
day = string_to_date(date_str, "%Y-%m-%d") |
| 3360 |
|
imgs = soup.find_all('meta', property='og:image') |
| 3361 |
|
return { |
| 3362 |
|
'title': title, |
| 3363 |
|
'img': [i['content'] for i in imgs], |
| 3364 |
|
'day': day.day, |
| 3365 |
|
'month': day.month, |
| 3366 |
|
'year': day.year, |
| 3367 |
|
} |
| 3368 |
|
|
| 3369 |
|
|
| 3370 |
|
class TuMourrasMoinsBete(GenericNavigableComic): |
| 3371 |
|
"""Class to retrieve Tu Mourras Moins Bete comics.""" |
| 3372 |
|
name = 'mourrasmoinsbete' |
| 3373 |
|
long_name = 'Tu Mourras Moins Bete' |
| 3374 |
|
url = 'http://tumourrasmoinsbete.blogspot.fr' |
| 3375 |
|
_categories = ('FRANCAIS', ) |
| 3376 |
|
get_first_comic_link = simulate_first_link |
| 3377 |
|
first_url = 'http://tumourrasmoinsbete.blogspot.fr/2008/06/essai.html' |
| 3378 |
|
|
|
@@ 3162-3185 (lines=24) @@
|
| 3159 |
|
url = 'http://ubertoolcomic.com' |
| 3160 |
|
_categories = ('UBERTOOL', ) |
| 3161 |
|
get_first_comic_link = get_a_comicnavbase_comicnavfirst |
| 3162 |
|
get_navi_link = get_a_comicnavbase_comicnavnext |
| 3163 |
|
|
| 3164 |
|
@classmethod |
| 3165 |
|
def get_comic_info(cls, soup, link): |
| 3166 |
|
"""Get information about a particular comics.""" |
| 3167 |
|
title = soup.find('h2', class_='post-title').string |
| 3168 |
|
date_str = soup.find('span', class_='post-date').string |
| 3169 |
|
day = string_to_date(date_str, "%B %d, %Y") |
| 3170 |
|
imgs = soup.find('div', id='comic').find_all('img') |
| 3171 |
|
return { |
| 3172 |
|
'img': [i['src'] for i in imgs], |
| 3173 |
|
'title': title, |
| 3174 |
|
'month': day.month, |
| 3175 |
|
'year': day.year, |
| 3176 |
|
'day': day.day, |
| 3177 |
|
} |
| 3178 |
|
|
| 3179 |
|
|
| 3180 |
|
class EarthExplodes(GenericNavigableComic): |
| 3181 |
|
"""Class to retrieve The Earth Explodes comics.""" |
| 3182 |
|
name = 'earthexplodes' |
| 3183 |
|
long_name = 'The Earth Explodes' |
| 3184 |
|
url = 'http://www.earthexplodes.com' |
| 3185 |
|
get_url_from_link = join_cls_url_to_href |
| 3186 |
|
get_first_comic_link = simulate_first_link |
| 3187 |
|
first_url = 'http://www.earthexplodes.com/comics/000/' |
| 3188 |
|
|