@@ 1259-1285 (lines=27) @@ | ||
1256 | } |
|
1257 | ||
1258 | ||
1259 | class ToonHole(GenericNavigableComic): |
|
1260 | """Class to retrieve Toon Holes comics.""" |
|
1261 | # Also on http://tapastic.com/series/TOONHOLE |
|
1262 | name = 'toonhole' |
|
1263 | long_name = 'Toon Hole' |
|
1264 | url = 'http://www.toonhole.com' |
|
1265 | get_first_comic_link = get_a_comicnavbase_comicnavfirst |
|
1266 | get_navi_link = get_a_comicnavbase_comicnavnext |
|
1267 | ||
1268 | @classmethod |
|
1269 | def get_comic_info(cls, soup, link): |
|
1270 | """Get information about a particular comics.""" |
|
1271 | date_str = soup.find('div', class_='entry-meta').contents[0].strip() |
|
1272 | day = string_to_date(date_str, "%B %d, %Y") |
|
1273 | imgs = soup.find('div', id='comic').find_all('img') |
|
1274 | if imgs: |
|
1275 | img = imgs[0] |
|
1276 | title = img['alt'] |
|
1277 | assert img['title'] == title |
|
1278 | else: |
|
1279 | title = "" |
|
1280 | return { |
|
1281 | 'title': title, |
|
1282 | 'month': day.month, |
|
1283 | 'year': day.year, |
|
1284 | 'day': day.day, |
|
1285 | 'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs], |
|
1286 | } |
|
1287 | ||
1288 | ||
@@ 1000-1026 (lines=27) @@ | ||
997 | } |
|
998 | ||
999 | ||
1000 | class ImogenQuest(GenericNavigableComic): |
|
1001 | """Class to retrieve Imogen Quest comics.""" |
|
1002 | # Also on http://imoquest.tumblr.com |
|
1003 | name = 'imogen' |
|
1004 | long_name = 'Imogen Quest' |
|
1005 | url = 'http://imogenquest.net' |
|
1006 | get_first_comic_link = get_div_navfirst_a |
|
1007 | get_navi_link = get_a_rel_next |
|
1008 | ||
1009 | @classmethod |
|
1010 | def get_comic_info(cls, soup, link): |
|
1011 | """Get information about a particular comics.""" |
|
1012 | title = soup.find('h2', class_='post-title').string |
|
1013 | author = soup.find("span", class_="post-author").find("a").string |
|
1014 | date_str = soup.find('span', class_='post-date').string |
|
1015 | day = string_to_date(date_str, '%B %d, %Y') |
|
1016 | imgs = soup.find('div', class_='comicpane').find_all('img') |
|
1017 | assert all(i['alt'] == i['title'] for i in imgs) |
|
1018 | title2 = imgs[0]['title'] |
|
1019 | return { |
|
1020 | 'day': day.day, |
|
1021 | 'month': day.month, |
|
1022 | 'year': day.year, |
|
1023 | 'img': [i['src'] for i in imgs], |
|
1024 | 'title': title, |
|
1025 | 'title2': title2, |
|
1026 | 'author': author, |
|
1027 | } |
|
1028 | ||
1029 | ||
@@ 2606-2631 (lines=26) @@ | ||
2603 | } |
|
2604 | ||
2605 | ||
2606 | class GerbilWithAJetpack(GenericNavigableComic): |
|
2607 | """Class to retrieve GerbilWithAJetpack comics.""" |
|
2608 | name = 'gerbil' |
|
2609 | long_name = 'Gerbil With A Jetpack' |
|
2610 | url = 'http://gerbilwithajetpack.com' |
|
2611 | get_first_comic_link = get_a_navi_navifirst |
|
2612 | get_navi_link = get_a_rel_next |
|
2613 | ||
2614 | @classmethod |
|
2615 | def get_comic_info(cls, soup, link): |
|
2616 | """Get information about a particular comics.""" |
|
2617 | title = soup.find('h2', class_='post-title').string |
|
2618 | author = soup.find("span", class_="post-author").find("a").string |
|
2619 | date_str = soup.find("span", class_="post-date").string |
|
2620 | day = string_to_date(date_str, "%B %d, %Y") |
|
2621 | imgs = soup.find("div", id="comic").find_all("img") |
|
2622 | alt = imgs[0]['alt'] |
|
2623 | assert all(i['alt'] == i['title'] == alt for i in imgs) |
|
2624 | return { |
|
2625 | 'img': [i['src'] for i in imgs], |
|
2626 | 'title': title, |
|
2627 | 'alt': alt, |
|
2628 | 'author': author, |
|
2629 | 'day': day.day, |
|
2630 | 'month': day.month, |
|
2631 | 'year': day.year |
|
2632 | } |
|
2633 | ||
2634 | ||
@@ 1961-1986 (lines=26) @@ | ||
1958 | } |
|
1959 | ||
1960 | ||
1961 | class Penmen(GenericComicNotWorking, GenericNavigableComic): |
|
1962 | """Class to retrieve Penmen comics.""" |
|
1963 | name = 'penmen' |
|
1964 | long_name = 'Penmen' |
|
1965 | url = 'http://penmen.com' |
|
1966 | get_navi_link = get_link_rel_next |
|
1967 | get_first_comic_link = simulate_first_link |
|
1968 | first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/' |
|
1969 | ||
1970 | @classmethod |
|
1971 | def get_comic_info(cls, soup, link): |
|
1972 | """Get information about a particular comics.""" |
|
1973 | title = soup.find('title').string |
|
1974 | imgs = soup.find('div', class_='entry-content').find_all('img') |
|
1975 | short_url = soup.find('link', rel='shortlink')['href'] |
|
1976 | tags = ' '.join(t.string for t in soup.find_all('a', rel='tag')) |
|
1977 | date_str = soup.find('time')['datetime'][:10] |
|
1978 | day = string_to_date(date_str, "%Y-%m-%d") |
|
1979 | return { |
|
1980 | 'title': title, |
|
1981 | 'short_url': short_url, |
|
1982 | 'img': [i['src'] for i in imgs], |
|
1983 | 'tags': tags, |
|
1984 | 'month': day.month, |
|
1985 | 'year': day.year, |
|
1986 | 'day': day.day, |
|
1987 | } |
|
1988 | ||
1989 | ||
@@ 1902-1927 (lines=26) @@ | ||
1899 | } |
|
1900 | ||
1901 | ||
1902 | class SafelyEndangered(GenericNavigableComic): |
|
1903 | """Class to retrieve Safely Endangered comics.""" |
|
1904 | # Also on http://tumblr.safelyendangered.com |
|
1905 | name = 'endangered' |
|
1906 | long_name = 'Safely Endangered' |
|
1907 | url = 'http://www.safelyendangered.com' |
|
1908 | get_navi_link = get_link_rel_next |
|
1909 | get_first_comic_link = simulate_first_link |
|
1910 | first_url = 'http://www.safelyendangered.com/comic/ignored/' |
|
1911 | ||
1912 | @classmethod |
|
1913 | def get_comic_info(cls, soup, link): |
|
1914 | """Get information about a particular comics.""" |
|
1915 | title = soup.find('h2', class_='post-title').string |
|
1916 | date_str = soup.find('span', class_='post-date').string |
|
1917 | day = string_to_date(date_str, '%B %d, %Y') |
|
1918 | imgs = soup.find('div', id='comic').find_all('img') |
|
1919 | alt = imgs[0]['alt'] |
|
1920 | assert all(i['alt'] == i['title'] for i in imgs) |
|
1921 | return { |
|
1922 | 'day': day.day, |
|
1923 | 'month': day.month, |
|
1924 | 'year': day.year, |
|
1925 | 'img': [i['src'] for i in imgs], |
|
1926 | 'title': title, |
|
1927 | 'alt': alt, |
|
1928 | } |
|
1929 | ||
1930 | ||
@@ 405-430 (lines=26) @@ | ||
402 | _categories = ('DELETED', ) |
|
403 | ||
404 | ||
405 | class ExtraFabulousComics(GenericNavigableComic): |
|
406 | """Class to retrieve Extra Fabulous Comics.""" |
|
407 | # Also on https://extrafabulouscomics.tumblr.com |
|
408 | name = 'efc' |
|
409 | long_name = 'Extra Fabulous Comics' |
|
410 | url = 'http://extrafabulouscomics.com' |
|
411 | _categories = ('EFC', ) |
|
412 | get_navi_link = get_link_rel_next |
|
413 | get_first_comic_link = simulate_first_link |
|
414 | first_url = 'http://extrafabulouscomics.com/comic/buttfly/' |
|
415 | ||
416 | @classmethod |
|
417 | def get_comic_info(cls, soup, link): |
|
418 | """Get information about a particular comics.""" |
|
419 | img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url) |
|
420 | imgs = soup.find_all('img', src=img_src_re) |
|
421 | title = soup.find('meta', property='og:title')['content'] |
|
422 | date_str = soup.find('meta', property='article:published_time')['content'][:10] |
|
423 | day = string_to_date(date_str, "%Y-%m-%d") |
|
424 | return { |
|
425 | 'title': title, |
|
426 | 'img': [i['src'] for i in imgs], |
|
427 | 'month': day.month, |
|
428 | 'year': day.year, |
|
429 | 'day': day.day, |
|
430 | 'prefix': title + '-' |
|
431 | } |
|
432 | ||
433 | ||
@@ 1232-1255 (lines=24) @@ | ||
1229 | url = 'http://english.bouletcorp.com' |
|
1230 | ||
1231 | ||
1232 | class AmazingSuperPowers(GenericNavigableComic): |
|
1233 | """Class to retrieve Amazing Super Powers comics.""" |
|
1234 | name = 'asp' |
|
1235 | long_name = 'Amazing Super Powers' |
|
1236 | url = 'http://www.amazingsuperpowers.com' |
|
1237 | get_first_comic_link = get_a_navi_navifirst |
|
1238 | get_navi_link = get_a_navi_navinext |
|
1239 | ||
1240 | @classmethod |
|
1241 | def get_comic_info(cls, soup, link): |
|
1242 | """Get information about a particular comics.""" |
|
1243 | author = soup.find("span", class_="post-author").find("a").string |
|
1244 | date_str = soup.find('span', class_='post-date').string |
|
1245 | day = string_to_date(date_str, "%B %d, %Y") |
|
1246 | imgs = soup.find('div', id='comic').find_all('img') |
|
1247 | title = ' '.join(i['title'] for i in imgs) |
|
1248 | assert all(i['alt'] == i['title'] for i in imgs) |
|
1249 | return { |
|
1250 | 'title': title, |
|
1251 | 'author': author, |
|
1252 | 'img': [img['src'] for img in imgs], |
|
1253 | 'day': day.day, |
|
1254 | 'month': day.month, |
|
1255 | 'year': day.year |
|
1256 | } |
|
1257 | ||
1258 | ||
@@ 434-456 (lines=23) @@ | ||
431 | } |
|
432 | ||
433 | ||
434 | class GenericLeMondeBlog(GenericNavigableComic): |
|
435 | """Generic class to retrieve comics from Le Monde blogs.""" |
|
436 | _categories = ('LEMONDE', 'FRANCAIS') |
|
437 | get_navi_link = get_link_rel_next |
|
438 | get_first_comic_link = simulate_first_link |
|
439 | first_url = NotImplemented |
|
440 | date_format = "%d %B %Y" |
|
441 | ||
442 | @classmethod |
|
443 | def get_comic_info(cls, soup, link): |
|
444 | """Get information about a particular comics.""" |
|
445 | url2 = soup.find('link', rel='shortlink')['href'] |
|
446 | title = soup.find('meta', property='og:title')['content'] |
|
447 | date_str = soup.find("span", class_="entry-date").string |
|
448 | day = string_to_date(date_str, cls.date_format, "fr_FR.utf8") |
|
449 | imgs = soup.find_all('meta', property='og:image') |
|
450 | return { |
|
451 | 'title': title, |
|
452 | 'url2': url2, |
|
453 | 'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs], |
|
454 | 'month': day.month, |
|
455 | 'year': day.year, |
|
456 | 'day': day.day, |
|
457 | } |
|
458 | ||
459 |