@@ 405-430 (lines=26) @@ | ||
402 | _categories = ('DELETED', ) |
|
403 | ||
404 | ||
405 | class ExtraFabulousComics(GenericNavigableComic): |
|
406 | """Class to retrieve Extra Fabulous Comics.""" |
|
407 | # Also on https://extrafabulouscomics.tumblr.com |
|
408 | name = 'efc' |
|
409 | long_name = 'Extra Fabulous Comics' |
|
410 | url = 'http://extrafabulouscomics.com' |
|
411 | _categories = ('EFC', ) |
|
412 | get_navi_link = get_link_rel_next |
|
413 | get_first_comic_link = simulate_first_link |
|
414 | first_url = 'http://extrafabulouscomics.com/comic/buttfly/' |
|
415 | ||
416 | @classmethod |
|
417 | def get_comic_info(cls, soup, link): |
|
418 | """Get information about a particular comics.""" |
|
419 | img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url) |
|
420 | imgs = soup.find_all('img', src=img_src_re) |
|
421 | title = soup.find('meta', property='og:title')['content'] |
|
422 | date_str = soup.find('meta', property='article:published_time')['content'][:10] |
|
423 | day = string_to_date(date_str, "%Y-%m-%d") |
|
424 | return { |
|
425 | 'title': title, |
|
426 | 'img': [i['src'] for i in imgs], |
|
427 | 'month': day.month, |
|
428 | 'year': day.year, |
|
429 | 'day': day.day, |
|
430 | 'prefix': title + '-' |
|
431 | } |
|
432 | ||
433 | ||
@@ 1092-1116 (lines=25) @@ | ||
1089 | } |
|
1090 | ||
1091 | ||
1092 | class Mercworks(GenericNavigableComic): |
|
1093 | """Class to retrieve Mercworks comics.""" |
|
1094 | # Also on http://mercworks.tumblr.com |
|
1095 | name = 'mercworks' |
|
1096 | long_name = 'Mercworks' |
|
1097 | url = 'http://mercworks.net' |
|
1098 | get_first_comic_link = get_a_comicnavbase_comicnavfirst |
|
1099 | get_navi_link = get_link_rel_next |
|
1100 | ||
1101 | @classmethod |
|
1102 | def get_comic_info(cls, soup, link): |
|
1103 | """Get information about a particular comics.""" |
|
1104 | title = soup.find('meta', property='og:title')['content'] |
|
1105 | metadesc = soup.find('meta', property='og:description') |
|
1106 | desc = metadesc['content'] if metadesc else "" |
|
1107 | date_str = soup.find('meta', property='article:published_time')['content'][:10] |
|
1108 | day = string_to_date(date_str, "%Y-%m-%d") |
|
1109 | imgs = soup.find_all('meta', property='og:image') |
|
1110 | return { |
|
1111 | 'img': [i['content'] for i in imgs], |
|
1112 | 'title': title, |
|
1113 | 'desc': desc, |
|
1114 | 'day': day.day, |
|
1115 | 'month': day.month, |
|
1116 | 'year': day.year |
|
1117 | } |
|
1118 | ||
1119 | ||
@@ 3504-3525 (lines=22) @@ | ||
3501 | author = soup.find('a', rel='author').string |
|
3502 | return { |
|
3503 | 'title': title, |
|
3504 | 'img': [i['src'] for i in [img]], |
|
3505 | 'day': day.day, |
|
3506 | 'month': day.month, |
|
3507 | 'year': day.year, |
|
3508 | 'author': author, |
|
3509 | } |
|
3510 | ||
3511 | ||
3512 | class MarketoonistComics(GenericNavigableComic): |
|
3513 | """Class to retrieve Marketoonist Comics.""" |
|
3514 | name = 'marketoonist' |
|
3515 | long_name = 'Marketoonist' |
|
3516 | url = 'https://marketoonist.com/cartoons' |
|
3517 | get_first_comic_link = simulate_first_link |
|
3518 | get_navi_link = get_link_rel_next |
|
3519 | first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html' |
|
3520 | ||
3521 | @classmethod |
|
3522 | def get_comic_info(cls, soup, link): |
|
3523 | """Get information about a particular comics.""" |
|
3524 | imgs = soup.find_all('meta', property='og:image') |
|
3525 | date_str = soup.find('meta', property='article:published_time')['content'][:10] |
|
3526 | day = string_to_date(date_str, "%Y-%m-%d") |
|
3527 | title = soup.find('meta', property='og:title')['content'] |
|
3528 | return { |
|
@@ 2345-2366 (lines=22) @@ | ||
2342 | } |
|
2343 | ||
2344 | ||
2345 | class PeterLauris(GenericNavigableComic): |
|
2346 | """Class to retrieve Peter Lauris comics.""" |
|
2347 | name = 'peterlauris' |
|
2348 | long_name = 'Peter Lauris' |
|
2349 | url = 'http://peterlauris.com/comics' |
|
2350 | get_navi_link = get_a_rel_next |
|
2351 | get_first_comic_link = simulate_first_link |
|
2352 | first_url = 'http://peterlauris.com/comics/just-in-case/' |
|
2353 | ||
2354 | @classmethod |
|
2355 | def get_comic_info(cls, soup, link): |
|
2356 | """Get information about a particular comics.""" |
|
2357 | title = soup.find('meta', attrs={'name': 'twitter:title'})['content'] |
|
2358 | date_str = soup.find('meta', property='article:published_time')['content'][:10] |
|
2359 | day = string_to_date(date_str, "%Y-%m-%d") |
|
2360 | imgs = soup.find_all('meta', property='og:image') |
|
2361 | return { |
|
2362 | 'title': title, |
|
2363 | 'img': [i['content'] for i in imgs], |
|
2364 | 'month': day.month, |
|
2365 | 'year': day.year, |
|
2366 | 'day': day.day, |
|
2367 | } |
|
2368 | ||
2369 | ||
@@ 434-455 (lines=22) @@ | ||
431 | } |
|
432 | ||
433 | ||
434 | class GenericLeMondeBlog(GenericNavigableComic): |
|
435 | """Generic class to retrieve comics from Le Monde blogs.""" |
|
436 | _categories = ('LEMONDE', 'FRANCAIS') |
|
437 | get_navi_link = get_link_rel_next |
|
438 | get_first_comic_link = simulate_first_link |
|
439 | first_url = NotImplemented |
|
440 | ||
441 | @classmethod |
|
442 | def get_comic_info(cls, soup, link): |
|
443 | """Get information about a particular comics.""" |
|
444 | url2 = soup.find('link', rel='shortlink')['href'] |
|
445 | title = soup.find('meta', property='og:title')['content'] |
|
446 | date_str = soup.find("span", class_="entry-date").string |
|
447 | day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8") |
|
448 | imgs = soup.find_all('meta', property='og:image') |
|
449 | return { |
|
450 | 'title': title, |
|
451 | 'url2': url2, |
|
452 | 'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs], |
|
453 | 'month': day.month, |
|
454 | 'year': day.year, |
|
455 | 'day': day.day, |
|
456 | } |
|
457 | ||
458 | ||
@@ 1004-1029 (lines=26) @@ | ||
1001 | } |
|
1002 | ||
1003 | ||
1004 | class MyExtraLife(GenericNavigableComic): |
|
1005 | """Class to retrieve My Extra Life comics.""" |
|
1006 | name = 'extralife' |
|
1007 | long_name = 'My Extra Life' |
|
1008 | url = 'http://www.myextralife.com' |
|
1009 | get_navi_link = get_link_rel_next |
|
1010 | ||
1011 | @classmethod |
|
1012 | def get_first_comic_link(cls): |
|
1013 | """Get link to first comics.""" |
|
1014 | return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link') |
|
1015 | ||
1016 | @classmethod |
|
1017 | def get_comic_info(cls, soup, link): |
|
1018 | """Get information about a particular comics.""" |
|
1019 | title = soup.find("h1", class_="comic_title").string |
|
1020 | date_str = soup.find("span", class_="comic_date").string |
|
1021 | day = string_to_date(date_str, "%B %d, %Y") |
|
1022 | imgs = soup.find_all("img", class_="comic") |
|
1023 | assert all(i['alt'] == i['title'] == title for i in imgs) |
|
1024 | return { |
|
1025 | 'title': title, |
|
1026 | 'img': [i['src'] for i in imgs if i["src"]], |
|
1027 | 'day': day.day, |
|
1028 | 'month': day.month, |
|
1029 | 'year': day.year |
|
1030 | } |
|
1031 | ||
1032 | ||
@@ 2370-2394 (lines=25) @@ | ||
2367 | } |
|
2368 | ||
2369 | ||
2370 | class JuliasDrawings(GenericListableComic): |
|
2371 | """Class to retrieve Julia's Drawings.""" |
|
2372 | name = 'julia' |
|
2373 | long_name = "Julia's Drawings" |
|
2374 | url = 'https://drawings.jvns.ca' |
|
2375 | get_url_from_archive_element = get_href |
|
2376 | ||
2377 | @classmethod |
|
2378 | def get_archive_elements(cls): |
|
2379 | articles = get_soup_at_url(cls.url).find_all('article', class_='li post') |
|
2380 | return [art.find('a') for art in reversed(articles)] |
|
2381 | ||
2382 | @classmethod |
|
2383 | def get_comic_info(cls, soup, archive_elt): |
|
2384 | """Get information about a particular comics.""" |
|
2385 | date_str = soup.find('meta', property='og:article:published_time')['content'][:10] |
|
2386 | day = string_to_date(date_str, "%Y-%m-%d") |
|
2387 | title = soup.find('h3', class_='p-post-title').string |
|
2388 | imgs = soup.find('section', class_='post-content').find_all('img') |
|
2389 | return { |
|
2390 | 'title': title, |
|
2391 | 'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs], |
|
2392 | 'month': day.month, |
|
2393 | 'year': day.year, |
|
2394 | 'day': day.day, |
|
2395 | } |
|
2396 | ||
2397 | ||
@@ 2479-2501 (lines=23) @@ | ||
2476 | _categories = ('WARANDPEAS', 'LINS') |
|
2477 | ||
2478 | ||
2479 | class WarAndPeas(GenericNavigableComic): |
|
2480 | """Class to retrieve War And Peas comics.""" |
|
2481 | name = 'warandpeas' |
|
2482 | long_name = 'War And Peas' |
|
2483 | url = 'https://warandpeas.com' |
|
2484 | get_navi_link = get_link_rel_next |
|
2485 | get_first_comic_link = simulate_first_link |
|
2486 | first_url = 'https://warandpeas.com/2011/11/07/565/' |
|
2487 | _categories = ('WARANDPEAS', 'LINS') |
|
2488 | ||
2489 | @classmethod |
|
2490 | def get_comic_info(cls, soup, link): |
|
2491 | """Get information about a particular comics.""" |
|
2492 | title = soup.find('meta', property='og:title')['content'] |
|
2493 | imgs = soup.find_all('meta', property='og:image') |
|
2494 | date_str = soup.find('meta', property='article:published_time')['content'][:10] |
|
2495 | day = string_to_date(date_str, "%Y-%m-%d") |
|
2496 | return { |
|
2497 | 'title': title, |
|
2498 | 'img': [i['content'] for i in imgs], |
|
2499 | 'month': day.month, |
|
2500 | 'year': day.year, |
|
2501 | 'day': day.day, |
|
2502 | } |
|
2503 | ||
2504 |