@@ 389-414 (lines=26) @@ | ||
386 | _categories = ('DELETED', ) |
|
387 | ||
388 | ||
389 | class ExtraFabulousComics(GenericNavigableComic): |
|
390 | """Class to retrieve Extra Fabulous Comics.""" |
|
391 | # Also on https://extrafabulouscomics.tumblr.com |
|
392 | name = 'efc' |
|
393 | long_name = 'Extra Fabulous Comics' |
|
394 | url = 'http://extrafabulouscomics.com' |
|
395 | _categories = ('EFC', ) |
|
396 | get_navi_link = get_link_rel_next |
|
397 | get_first_comic_link = simulate_first_link |
|
398 | first_url = 'http://extrafabulouscomics.com/comic/buttfly/' |
|
399 | ||
400 | @classmethod |
|
401 | def get_comic_info(cls, soup, link): |
|
402 | """Get information about a particular comics.""" |
|
403 | img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url) |
|
404 | imgs = soup.find_all('img', src=img_src_re) |
|
405 | title = soup.find('meta', property='og:title')['content'] |
|
406 | date_str = soup.find('meta', property='article:published_time')['content'][:10] |
|
407 | day = string_to_date(date_str, "%Y-%m-%d") |
|
408 | return { |
|
409 | 'title': title, |
|
410 | 'img': [i['src'] for i in imgs], |
|
411 | 'month': day.month, |
|
412 | 'year': day.year, |
|
413 | 'day': day.day, |
|
414 | 'prefix': title + '-' |
|
415 | } |
|
416 | ||
417 | ||
@@ 2395-2419 (lines=25) @@ | ||
2392 | } |
|
2393 | ||
2394 | ||
2395 | class LinsEditions(GenericNavigableComic): |
|
2396 | """Class to retrieve L.I.N.S. Editions comics.""" |
|
2397 | # Also on https://linscomics.tumblr.com |
|
2398 | # Now on https://warandpeas.com |
|
2399 | name = 'lins' |
|
2400 | long_name = 'L.I.N.S. Editions' |
|
2401 | url = 'https://linsedition.com' |
|
2402 | _categories = ('LINS', ) |
|
2403 | get_navi_link = get_link_rel_next |
|
2404 | get_first_comic_link = simulate_first_link |
|
2405 | first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/' |
|
2406 | ||
2407 | @classmethod |
|
2408 | def get_comic_info(cls, soup, link): |
|
2409 | """Get information about a particular comics.""" |
|
2410 | title = soup.find('meta', property='og:title')['content'] |
|
2411 | imgs = soup.find_all('meta', property='og:image') |
|
2412 | date_str = soup.find('meta', property='article:published_time')['content'][:10] |
|
2413 | day = string_to_date(date_str, "%Y-%m-%d") |
|
2414 | return { |
|
2415 | 'title': title, |
|
2416 | 'img': [i['content'] for i in imgs], |
|
2417 | 'month': day.month, |
|
2418 | 'year': day.year, |
|
2419 | 'day': day.day, |
|
2420 | } |
|
2421 | ||
2422 | ||
@@ 1076-1100 (lines=25) @@ | ||
1073 | } |
|
1074 | ||
1075 | ||
1076 | class Mercworks(GenericNavigableComic): |
|
1077 | """Class to retrieve Mercworks comics.""" |
|
1078 | # Also on http://mercworks.tumblr.com |
|
1079 | name = 'mercworks' |
|
1080 | long_name = 'Mercworks' |
|
1081 | url = 'http://mercworks.net' |
|
1082 | get_first_comic_link = get_a_comicnavbase_comicnavfirst |
|
1083 | get_navi_link = get_link_rel_next |
|
1084 | ||
1085 | @classmethod |
|
1086 | def get_comic_info(cls, soup, link): |
|
1087 | """Get information about a particular comics.""" |
|
1088 | title = soup.find('meta', property='og:title')['content'] |
|
1089 | metadesc = soup.find('meta', property='og:description') |
|
1090 | desc = metadesc['content'] if metadesc else "" |
|
1091 | date_str = soup.find('meta', property='article:published_time')['content'][:10] |
|
1092 | day = string_to_date(date_str, "%Y-%m-%d") |
|
1093 | imgs = soup.find_all('meta', property='og:image') |
|
1094 | return { |
|
1095 | 'img': [i['content'] for i in imgs], |
|
1096 | 'title': title, |
|
1097 | 'desc': desc, |
|
1098 | 'day': day.day, |
|
1099 | 'month': day.month, |
|
1100 | 'year': day.year |
|
1101 | } |
|
1102 | ||
1103 | ||
@@ 3402-3423 (lines=22) @@ | ||
3399 | } |
|
3400 | ||
3401 | ||
3402 | class MarketoonistComics(GenericNavigableComic): |
|
3403 | """Class to retrieve Marketoonist Comics.""" |
|
3404 | name = 'marketoonist' |
|
3405 | long_name = 'Marketoonist' |
|
3406 | url = 'https://marketoonist.com/cartoons' |
|
3407 | get_first_comic_link = simulate_first_link |
|
3408 | get_navi_link = get_link_rel_next |
|
3409 | first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html' |
|
3410 | ||
3411 | @classmethod |
|
3412 | def get_comic_info(cls, soup, link): |
|
3413 | """Get information about a particular comics.""" |
|
3414 | imgs = soup.find_all('meta', property='og:image') |
|
3415 | date_str = soup.find('meta', property='article:published_time')['content'][:10] |
|
3416 | day = string_to_date(date_str, "%Y-%m-%d") |
|
3417 | title = soup.find('meta', property='og:title')['content'] |
|
3418 | return { |
|
3419 | 'img': [i['content'] for i in imgs], |
|
3420 | 'day': day.day, |
|
3421 | 'month': day.month, |
|
3422 | 'year': day.year, |
|
3423 | 'title': title, |
|
3424 | } |
|
3425 | ||
3426 | ||
@@ 418-439 (lines=22) @@ | ||
415 | } |
|
416 | ||
417 | ||
418 | class GenericLeMondeBlog(GenericNavigableComic): |
|
419 | """Generic class to retrieve comics from Le Monde blogs.""" |
|
420 | _categories = ('LEMONDE', 'FRANCAIS') |
|
421 | get_navi_link = get_link_rel_next |
|
422 | get_first_comic_link = simulate_first_link |
|
423 | first_url = NotImplemented |
|
424 | ||
425 | @classmethod |
|
426 | def get_comic_info(cls, soup, link): |
|
427 | """Get information about a particular comics.""" |
|
428 | url2 = soup.find('link', rel='shortlink')['href'] |
|
429 | title = soup.find('meta', property='og:title')['content'] |
|
430 | date_str = soup.find("span", class_="entry-date").string |
|
431 | day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8") |
|
432 | imgs = soup.find_all('meta', property='og:image') |
|
433 | return { |
|
434 | 'title': title, |
|
435 | 'url2': url2, |
|
436 | 'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs], |
|
437 | 'month': day.month, |
|
438 | 'year': day.year, |
|
439 | 'day': day.day, |
|
440 | } |
|
441 | ||
442 | ||
@@ 1826-1851 (lines=26) @@ | ||
1823 | } |
|
1824 | ||
1825 | ||
1826 | class SafelyEndangered(GenericNavigableComic): |
|
1827 | """Class to retrieve Safely Endangered comics.""" |
|
1828 | # Also on http://tumblr.safelyendangered.com |
|
1829 | name = 'endangered' |
|
1830 | long_name = 'Safely Endangered' |
|
1831 | url = 'http://www.safelyendangered.com' |
|
1832 | get_navi_link = get_link_rel_next |
|
1833 | get_first_comic_link = simulate_first_link |
|
1834 | first_url = 'http://www.safelyendangered.com/comic/ignored/' |
|
1835 | ||
1836 | @classmethod |
|
1837 | def get_comic_info(cls, soup, link): |
|
1838 | """Get information about a particular comics.""" |
|
1839 | title = soup.find('h2', class_='post-title').string |
|
1840 | date_str = soup.find('span', class_='post-date').string |
|
1841 | day = string_to_date(date_str, '%B %d, %Y') |
|
1842 | imgs = soup.find('div', id='comic').find_all('img') |
|
1843 | alt = imgs[0]['alt'] |
|
1844 | assert all(i['alt'] == i['title'] for i in imgs) |
|
1845 | return { |
|
1846 | 'day': day.day, |
|
1847 | 'month': day.month, |
|
1848 | 'year': day.year, |
|
1849 | 'img': [i['src'] for i in imgs], |
|
1850 | 'title': title, |
|
1851 | 'alt': alt, |
|
1852 | } |
|
1853 | ||
1854 | ||
@@ 988-1013 (lines=26) @@ | ||
985 | } |
|
986 | ||
987 | ||
988 | class MyExtraLife(GenericNavigableComic): |
|
989 | """Class to retrieve My Extra Life comics.""" |
|
990 | name = 'extralife' |
|
991 | long_name = 'My Extra Life' |
|
992 | url = 'http://www.myextralife.com' |
|
993 | get_navi_link = get_link_rel_next |
|
994 | ||
995 | @classmethod |
|
996 | def get_first_comic_link(cls): |
|
997 | """Get link to first comics.""" |
|
998 | return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link') |
|
999 | ||
1000 | @classmethod |
|
1001 | def get_comic_info(cls, soup, link): |
|
1002 | """Get information about a particular comics.""" |
|
1003 | title = soup.find("h1", class_="comic_title").string |
|
1004 | date_str = soup.find("span", class_="comic_date").string |
|
1005 | day = string_to_date(date_str, "%B %d, %Y") |
|
1006 | imgs = soup.find_all("img", class_="comic") |
|
1007 | assert all(i['alt'] == i['title'] == title for i in imgs) |
|
1008 | return { |
|
1009 | 'title': title, |
|
1010 | 'img': [i['src'] for i in imgs if i["src"]], |
|
1011 | 'day': day.day, |
|
1012 | 'month': day.month, |
|
1013 | 'year': day.year |
|
1014 | } |
|
1015 | ||
1016 | ||
@@ 2296-2320 (lines=25) @@ | ||
2293 | } |
|
2294 | ||
2295 | ||
2296 | class JuliasDrawings(GenericListableComic): |
|
2297 | """Class to retrieve Julia's Drawings.""" |
|
2298 | name = 'julia' |
|
2299 | long_name = "Julia's Drawings" |
|
2300 | url = 'https://drawings.jvns.ca' |
|
2301 | get_url_from_archive_element = get_href |
|
2302 | ||
2303 | @classmethod |
|
2304 | def get_archive_elements(cls): |
|
2305 | articles = get_soup_at_url(cls.url).find_all('article', class_='li post') |
|
2306 | return [art.find('a') for art in reversed(articles)] |
|
2307 | ||
2308 | @classmethod |
|
2309 | def get_comic_info(cls, soup, archive_elt): |
|
2310 | """Get information about a particular comics.""" |
|
2311 | date_str = soup.find('meta', property='og:article:published_time')['content'][:10] |
|
2312 | day = string_to_date(date_str, "%Y-%m-%d") |
|
2313 | title = soup.find('h3', class_='p-post-title').string |
|
2314 | imgs = soup.find('section', class_='post-content').find_all('img') |
|
2315 | return { |
|
2316 | 'title': title, |
|
2317 | 'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs], |
|
2318 | 'month': day.month, |
|
2319 | 'year': day.year, |
|
2320 | 'day': day.day, |
|
2321 | } |
|
2322 | ||
2323 |