|
@@ 405-430 (lines=26) @@
|
| 402 |
|
_categories = ('DELETED', ) |
| 403 |
|
|
| 404 |
|
|
| 405 |
|
class ExtraFabulousComics(GenericNavigableComic): |
| 406 |
|
"""Class to retrieve Extra Fabulous Comics.""" |
| 407 |
|
# Also on https://extrafabulouscomics.tumblr.com |
| 408 |
|
name = 'efc' |
| 409 |
|
long_name = 'Extra Fabulous Comics' |
| 410 |
|
url = 'http://extrafabulouscomics.com' |
| 411 |
|
_categories = ('EFC', ) |
| 412 |
|
get_navi_link = get_link_rel_next |
| 413 |
|
get_first_comic_link = simulate_first_link |
| 414 |
|
first_url = 'http://extrafabulouscomics.com/comic/buttfly/' |
| 415 |
|
|
| 416 |
|
@classmethod |
| 417 |
|
def get_comic_info(cls, soup, link): |
| 418 |
|
"""Get information about a particular comics.""" |
| 419 |
|
img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url) |
| 420 |
|
imgs = soup.find_all('img', src=img_src_re) |
| 421 |
|
title = soup.find('meta', property='og:title')['content'] |
| 422 |
|
date_str = soup.find('meta', property='article:published_time')['content'][:10] |
| 423 |
|
day = string_to_date(date_str, "%Y-%m-%d") |
| 424 |
|
return { |
| 425 |
|
'title': title, |
| 426 |
|
'img': [i['src'] for i in imgs], |
| 427 |
|
'month': day.month, |
| 428 |
|
'year': day.year, |
| 429 |
|
'day': day.day, |
| 430 |
|
'prefix': title + '-' |
| 431 |
|
} |
| 432 |
|
|
| 433 |
|
|
|
@@ 1092-1116 (lines=25) @@
|
| 1089 |
|
} |
| 1090 |
|
|
| 1091 |
|
|
| 1092 |
|
class Mercworks(GenericNavigableComic): |
| 1093 |
|
"""Class to retrieve Mercworks comics.""" |
| 1094 |
|
# Also on http://mercworks.tumblr.com |
| 1095 |
|
name = 'mercworks' |
| 1096 |
|
long_name = 'Mercworks' |
| 1097 |
|
url = 'http://mercworks.net' |
| 1098 |
|
get_first_comic_link = get_a_comicnavbase_comicnavfirst |
| 1099 |
|
get_navi_link = get_link_rel_next |
| 1100 |
|
|
| 1101 |
|
@classmethod |
| 1102 |
|
def get_comic_info(cls, soup, link): |
| 1103 |
|
"""Get information about a particular comics.""" |
| 1104 |
|
title = soup.find('meta', property='og:title')['content'] |
| 1105 |
|
metadesc = soup.find('meta', property='og:description') |
| 1106 |
|
desc = metadesc['content'] if metadesc else "" |
| 1107 |
|
date_str = soup.find('meta', property='article:published_time')['content'][:10] |
| 1108 |
|
day = string_to_date(date_str, "%Y-%m-%d") |
| 1109 |
|
imgs = soup.find_all('meta', property='og:image') |
| 1110 |
|
return { |
| 1111 |
|
'img': [i['content'] for i in imgs], |
| 1112 |
|
'title': title, |
| 1113 |
|
'desc': desc, |
| 1114 |
|
'day': day.day, |
| 1115 |
|
'month': day.month, |
| 1116 |
|
'year': day.year |
| 1117 |
|
} |
| 1118 |
|
|
| 1119 |
|
|
|
@@ 3504-3525 (lines=22) @@
|
| 3501 |
|
author = soup.find('a', rel='author').string |
| 3502 |
|
return { |
| 3503 |
|
'title': title, |
| 3504 |
|
'img': [i['src'] for i in [img]], |
| 3505 |
|
'day': day.day, |
| 3506 |
|
'month': day.month, |
| 3507 |
|
'year': day.year, |
| 3508 |
|
'author': author, |
| 3509 |
|
} |
| 3510 |
|
|
| 3511 |
|
|
| 3512 |
|
class MarketoonistComics(GenericNavigableComic): |
| 3513 |
|
"""Class to retrieve Marketoonist Comics.""" |
| 3514 |
|
name = 'marketoonist' |
| 3515 |
|
long_name = 'Marketoonist' |
| 3516 |
|
url = 'https://marketoonist.com/cartoons' |
| 3517 |
|
get_first_comic_link = simulate_first_link |
| 3518 |
|
get_navi_link = get_link_rel_next |
| 3519 |
|
first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html' |
| 3520 |
|
|
| 3521 |
|
@classmethod |
| 3522 |
|
def get_comic_info(cls, soup, link): |
| 3523 |
|
"""Get information about a particular comics.""" |
| 3524 |
|
imgs = soup.find_all('meta', property='og:image') |
| 3525 |
|
date_str = soup.find('meta', property='article:published_time')['content'][:10] |
| 3526 |
|
day = string_to_date(date_str, "%Y-%m-%d") |
| 3527 |
|
title = soup.find('meta', property='og:title')['content'] |
| 3528 |
|
return { |
|
@@ 2345-2366 (lines=22) @@
|
| 2342 |
|
} |
| 2343 |
|
|
| 2344 |
|
|
| 2345 |
|
class PeterLauris(GenericNavigableComic): |
| 2346 |
|
"""Class to retrieve Peter Lauris comics.""" |
| 2347 |
|
name = 'peterlauris' |
| 2348 |
|
long_name = 'Peter Lauris' |
| 2349 |
|
url = 'http://peterlauris.com/comics' |
| 2350 |
|
get_navi_link = get_a_rel_next |
| 2351 |
|
get_first_comic_link = simulate_first_link |
| 2352 |
|
first_url = 'http://peterlauris.com/comics/just-in-case/' |
| 2353 |
|
|
| 2354 |
|
@classmethod |
| 2355 |
|
def get_comic_info(cls, soup, link): |
| 2356 |
|
"""Get information about a particular comics.""" |
| 2357 |
|
title = soup.find('meta', attrs={'name': 'twitter:title'})['content'] |
| 2358 |
|
date_str = soup.find('meta', property='article:published_time')['content'][:10] |
| 2359 |
|
day = string_to_date(date_str, "%Y-%m-%d") |
| 2360 |
|
imgs = soup.find_all('meta', property='og:image') |
| 2361 |
|
return { |
| 2362 |
|
'title': title, |
| 2363 |
|
'img': [i['content'] for i in imgs], |
| 2364 |
|
'month': day.month, |
| 2365 |
|
'year': day.year, |
| 2366 |
|
'day': day.day, |
| 2367 |
|
} |
| 2368 |
|
|
| 2369 |
|
|
|
@@ 434-455 (lines=22) @@
|
| 431 |
|
} |
| 432 |
|
|
| 433 |
|
|
| 434 |
|
class GenericLeMondeBlog(GenericNavigableComic): |
| 435 |
|
"""Generic class to retrieve comics from Le Monde blogs.""" |
| 436 |
|
_categories = ('LEMONDE', 'FRANCAIS') |
| 437 |
|
get_navi_link = get_link_rel_next |
| 438 |
|
get_first_comic_link = simulate_first_link |
| 439 |
|
first_url = NotImplemented |
| 440 |
|
|
| 441 |
|
@classmethod |
| 442 |
|
def get_comic_info(cls, soup, link): |
| 443 |
|
"""Get information about a particular comics.""" |
| 444 |
|
url2 = soup.find('link', rel='shortlink')['href'] |
| 445 |
|
title = soup.find('meta', property='og:title')['content'] |
| 446 |
|
date_str = soup.find("span", class_="entry-date").string |
| 447 |
|
day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8") |
| 448 |
|
imgs = soup.find_all('meta', property='og:image') |
| 449 |
|
return { |
| 450 |
|
'title': title, |
| 451 |
|
'url2': url2, |
| 452 |
|
'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs], |
| 453 |
|
'month': day.month, |
| 454 |
|
'year': day.year, |
| 455 |
|
'day': day.day, |
| 456 |
|
} |
| 457 |
|
|
| 458 |
|
|
|
@@ 1004-1029 (lines=26) @@
|
| 1001 |
|
} |
| 1002 |
|
|
| 1003 |
|
|
| 1004 |
|
class MyExtraLife(GenericNavigableComic): |
| 1005 |
|
"""Class to retrieve My Extra Life comics.""" |
| 1006 |
|
name = 'extralife' |
| 1007 |
|
long_name = 'My Extra Life' |
| 1008 |
|
url = 'http://www.myextralife.com' |
| 1009 |
|
get_navi_link = get_link_rel_next |
| 1010 |
|
|
| 1011 |
|
@classmethod |
| 1012 |
|
def get_first_comic_link(cls): |
| 1013 |
|
"""Get link to first comics.""" |
| 1014 |
|
return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link') |
| 1015 |
|
|
| 1016 |
|
@classmethod |
| 1017 |
|
def get_comic_info(cls, soup, link): |
| 1018 |
|
"""Get information about a particular comics.""" |
| 1019 |
|
title = soup.find("h1", class_="comic_title").string |
| 1020 |
|
date_str = soup.find("span", class_="comic_date").string |
| 1021 |
|
day = string_to_date(date_str, "%B %d, %Y") |
| 1022 |
|
imgs = soup.find_all("img", class_="comic") |
| 1023 |
|
assert all(i['alt'] == i['title'] == title for i in imgs) |
| 1024 |
|
return { |
| 1025 |
|
'title': title, |
| 1026 |
|
'img': [i['src'] for i in imgs if i["src"]], |
| 1027 |
|
'day': day.day, |
| 1028 |
|
'month': day.month, |
| 1029 |
|
'year': day.year |
| 1030 |
|
} |
| 1031 |
|
|
| 1032 |
|
|
|
@@ 2370-2394 (lines=25) @@
|
| 2367 |
|
} |
| 2368 |
|
|
| 2369 |
|
|
| 2370 |
|
class JuliasDrawings(GenericListableComic): |
| 2371 |
|
"""Class to retrieve Julia's Drawings.""" |
| 2372 |
|
name = 'julia' |
| 2373 |
|
long_name = "Julia's Drawings" |
| 2374 |
|
url = 'https://drawings.jvns.ca' |
| 2375 |
|
get_url_from_archive_element = get_href |
| 2376 |
|
|
| 2377 |
|
@classmethod |
| 2378 |
|
def get_archive_elements(cls): |
| 2379 |
|
articles = get_soup_at_url(cls.url).find_all('article', class_='li post') |
| 2380 |
|
return [art.find('a') for art in reversed(articles)] |
| 2381 |
|
|
| 2382 |
|
@classmethod |
| 2383 |
|
def get_comic_info(cls, soup, archive_elt): |
| 2384 |
|
"""Get information about a particular comics.""" |
| 2385 |
|
date_str = soup.find('meta', property='og:article:published_time')['content'][:10] |
| 2386 |
|
day = string_to_date(date_str, "%Y-%m-%d") |
| 2387 |
|
title = soup.find('h3', class_='p-post-title').string |
| 2388 |
|
imgs = soup.find('section', class_='post-content').find_all('img') |
| 2389 |
|
return { |
| 2390 |
|
'title': title, |
| 2391 |
|
'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs], |
| 2392 |
|
'month': day.month, |
| 2393 |
|
'year': day.year, |
| 2394 |
|
'day': day.day, |
| 2395 |
|
} |
| 2396 |
|
|
| 2397 |
|
|
|
@@ 2479-2501 (lines=23) @@
|
| 2476 |
|
_categories = ('WARANDPEAS', 'LINS') |
| 2477 |
|
|
| 2478 |
|
|
| 2479 |
|
class WarAndPeas(GenericNavigableComic): |
| 2480 |
|
"""Class to retrieve War And Peas comics.""" |
| 2481 |
|
name = 'warandpeas' |
| 2482 |
|
long_name = 'War And Peas' |
| 2483 |
|
url = 'https://warandpeas.com' |
| 2484 |
|
get_navi_link = get_link_rel_next |
| 2485 |
|
get_first_comic_link = simulate_first_link |
| 2486 |
|
first_url = 'https://warandpeas.com/2011/11/07/565/' |
| 2487 |
|
_categories = ('WARANDPEAS', 'LINS') |
| 2488 |
|
|
| 2489 |
|
@classmethod |
| 2490 |
|
def get_comic_info(cls, soup, link): |
| 2491 |
|
"""Get information about a particular comics.""" |
| 2492 |
|
title = soup.find('meta', property='og:title')['content'] |
| 2493 |
|
imgs = soup.find_all('meta', property='og:image') |
| 2494 |
|
date_str = soup.find('meta', property='article:published_time')['content'][:10] |
| 2495 |
|
day = string_to_date(date_str, "%Y-%m-%d") |
| 2496 |
|
return { |
| 2497 |
|
'title': title, |
| 2498 |
|
'img': [i['content'] for i in imgs], |
| 2499 |
|
'month': day.month, |
| 2500 |
|
'year': day.year, |
| 2501 |
|
'day': day.day, |
| 2502 |
|
} |
| 2503 |
|
|
| 2504 |
|
|