|
@@ 338-360 (lines=23) @@
|
| 335 |
|
return [] |
| 336 |
|
|
| 337 |
|
|
| 338 |
|
class ExtraFabulousComics(GenericNavigableComic): |
| 339 |
|
"""Class to retrieve Extra Fabulous Comics.""" |
| 340 |
|
name = 'efc' |
| 341 |
|
long_name = 'Extra Fabulous Comics' |
| 342 |
|
url = 'http://extrafabulouscomics.com' |
| 343 |
|
get_first_comic_link = get_a_navi_navifirst |
| 344 |
|
get_navi_link = get_link_rel_next |
| 345 |
|
|
| 346 |
|
@classmethod |
| 347 |
|
def get_comic_info(cls, soup, link): |
| 348 |
|
"""Get information about a particular comics.""" |
| 349 |
|
img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url) |
| 350 |
|
imgs = soup.find_all('img', src=img_src_re) |
| 351 |
|
title = soup.find('meta', property='og:title')['content'] |
| 352 |
|
date_str = soup.find('meta', property='article:published_time')['content'][:10] |
| 353 |
|
day = string_to_date(date_str, "%Y-%m-%d") |
| 354 |
|
return { |
| 355 |
|
'title': title, |
| 356 |
|
'img': [i['src'] for i in imgs], |
| 357 |
|
'month': day.month, |
| 358 |
|
'year': day.year, |
| 359 |
|
'day': day.day, |
| 360 |
|
'prefix': title + '-' |
| 361 |
|
} |
| 362 |
|
|
| 363 |
|
|
|
@@ 364-384 (lines=21) @@
|
| 361 |
|
} |
| 362 |
|
|
| 363 |
|
|
| 364 |
|
class GenericLeMondeBlog(GenericNavigableComic): |
| 365 |
|
"""Generic class to retrieve comics from Le Monde blogs.""" |
| 366 |
|
get_navi_link = get_link_rel_next |
| 367 |
|
get_first_comic_link = simulate_first_link |
| 368 |
|
first_url = NotImplemented |
| 369 |
|
|
| 370 |
|
@classmethod |
| 371 |
|
def get_comic_info(cls, soup, link): |
| 372 |
|
"""Get information about a particular comics.""" |
| 373 |
|
url2 = soup.find('link', rel='shortlink')['href'] |
| 374 |
|
title = soup.find('meta', property='og:title')['content'] |
| 375 |
|
date_str = soup.find("span", class_="entry-date").string |
| 376 |
|
day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8") |
| 377 |
|
imgs = soup.find_all('meta', property='og:image') |
| 378 |
|
return { |
| 379 |
|
'title': title, |
| 380 |
|
'url2': url2, |
| 381 |
|
'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs], |
| 382 |
|
'month': day.month, |
| 383 |
|
'year': day.year, |
| 384 |
|
'day': day.day, |
| 385 |
|
} |
| 386 |
|
|
| 387 |
|
|
|
@@ 900-925 (lines=26) @@
|
| 897 |
|
} |
| 898 |
|
|
| 899 |
|
|
| 900 |
|
class MyExtraLife(GenericNavigableComic): |
| 901 |
|
"""Class to retrieve My Extra Life comics.""" |
| 902 |
|
name = 'extralife' |
| 903 |
|
long_name = 'My Extra Life' |
| 904 |
|
url = 'http://www.myextralife.com' |
| 905 |
|
get_navi_link = get_link_rel_next |
| 906 |
|
|
| 907 |
|
@classmethod |
| 908 |
|
def get_first_comic_link(cls): |
| 909 |
|
"""Get link to first comics.""" |
| 910 |
|
return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link') |
| 911 |
|
|
| 912 |
|
@classmethod |
| 913 |
|
def get_comic_info(cls, soup, link): |
| 914 |
|
"""Get information about a particular comics.""" |
| 915 |
|
title = soup.find("h1", class_="comic_title").string |
| 916 |
|
date_str = soup.find("span", class_="comic_date").string |
| 917 |
|
day = string_to_date(date_str, "%B %d, %Y") |
| 918 |
|
imgs = soup.find_all("img", class_="comic") |
| 919 |
|
assert all(i['alt'] == i['title'] == title for i in imgs) |
| 920 |
|
return { |
| 921 |
|
'title': title, |
| 922 |
|
'img': [i['src'] for i in imgs if i["src"]], |
| 923 |
|
'day': day.day, |
| 924 |
|
'month': day.month, |
| 925 |
|
'year': day.year |
| 926 |
|
} |
| 927 |
|
|
| 928 |
|
|
|
@@ 2250-2272 (lines=23) @@
|
| 2247 |
|
} |
| 2248 |
|
|
| 2249 |
|
|
| 2250 |
|
class LinsEditions(GenericNavigableComic): |
| 2251 |
|
"""Class to retrieve L.I.N.S. Editions comics.""" |
| 2252 |
|
# Also on http://linscomics.tumblr.com |
| 2253 |
|
name = 'lins' |
| 2254 |
|
long_name = 'L.I.N.S. Editions' |
| 2255 |
|
url = 'https://linsedition.com' |
| 2256 |
|
get_navi_link = get_link_rel_next |
| 2257 |
|
get_first_comic_link = simulate_first_link |
| 2258 |
|
first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/' |
| 2259 |
|
|
| 2260 |
|
@classmethod |
| 2261 |
|
def get_comic_info(cls, soup, link): |
| 2262 |
|
"""Get information about a particular comics.""" |
| 2263 |
|
title = soup.find('meta', property='og:title')['content'] |
| 2264 |
|
imgs = soup.find_all('meta', property='og:image') |
| 2265 |
|
date_str = soup.find('meta', property='article:published_time')['content'][:10] |
| 2266 |
|
day = string_to_date(date_str, "%Y-%m-%d") |
| 2267 |
|
return { |
| 2268 |
|
'title': title, |
| 2269 |
|
'img': [i['content'] for i in imgs], |
| 2270 |
|
'month': day.month, |
| 2271 |
|
'year': day.year, |
| 2272 |
|
'day': day.day, |
| 2273 |
|
} |
| 2274 |
|
|
| 2275 |
|
|
|
@@ 2407-2431 (lines=25) @@
|
| 2404 |
|
} |
| 2405 |
|
|
| 2406 |
|
|
| 2407 |
|
class TheAwkwardYeti(GenericNavigableComic): |
| 2408 |
|
"""Class to retrieve The Awkward Yeti comics.""" |
| 2409 |
|
# Also on http://www.gocomics.com/the-awkward-yeti |
| 2410 |
|
# Also on http://larstheyeti.tumblr.com |
| 2411 |
|
# Also on https://tapastic.com/series/TheAwkwardYeti |
| 2412 |
|
name = 'yeti' |
| 2413 |
|
long_name = 'The Awkward Yeti' |
| 2414 |
|
url = 'http://theawkwardyeti.com' |
| 2415 |
|
get_first_comic_link = get_a_navi_navifirst |
| 2416 |
|
get_navi_link = get_link_rel_next |
| 2417 |
|
|
| 2418 |
|
@classmethod |
| 2419 |
|
def get_comic_info(cls, soup, link): |
| 2420 |
|
"""Get information about a particular comics.""" |
| 2421 |
|
title = soup.find('h2', class_='post-title').string |
| 2422 |
|
date_str = soup.find("span", class_="post-date").string |
| 2423 |
|
day = string_to_date(date_str, "%B %d, %Y") |
| 2424 |
|
imgs = soup.find("div", id="comic").find_all("img") |
| 2425 |
|
assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs)) |
| 2426 |
|
return { |
| 2427 |
|
'img': [i['src'] for i in imgs], |
| 2428 |
|
'title': title, |
| 2429 |
|
'day': day.day, |
| 2430 |
|
'month': day.month, |
| 2431 |
|
'year': day.year |
| 2432 |
|
} |
| 2433 |
|
|
| 2434 |
|
|