@@ 1260-1286 (lines=27) @@ | ||
1257 | } |
|
1258 | ||
1259 | ||
1260 | class ToonHole(GenericNavigableComic): |
|
1261 | """Class to retrieve Toon Holes comics.""" |
|
1262 | # Also on http://tapastic.com/series/TOONHOLE |
|
1263 | name = 'toonhole' |
|
1264 | long_name = 'Toon Hole' |
|
1265 | url = 'http://www.toonhole.com' |
|
1266 | get_first_comic_link = get_a_comicnavbase_comicnavfirst |
|
1267 | get_navi_link = get_a_comicnavbase_comicnavnext |
|
1268 | ||
1269 | @classmethod |
|
1270 | def get_comic_info(cls, soup, link): |
|
1271 | """Get information about a particular comics.""" |
|
1272 | date_str = soup.find('div', class_='entry-meta').contents[0].strip() |
|
1273 | day = string_to_date(date_str, "%B %d, %Y") |
|
1274 | imgs = soup.find('div', id='comic').find_all('img') |
|
1275 | if imgs: |
|
1276 | img = imgs[0] |
|
1277 | title = img['alt'] |
|
1278 | assert img['title'] == title |
|
1279 | else: |
|
1280 | title = "" |
|
1281 | return { |
|
1282 | 'title': title, |
|
1283 | 'month': day.month, |
|
1284 | 'year': day.year, |
|
1285 | 'day': day.day, |
|
1286 | 'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs], |
|
1287 | } |
|
1288 | ||
1289 | ||
@@ 1001-1027 (lines=27) @@ | ||
998 | } |
|
999 | ||
1000 | ||
1001 | class ImogenQuest(GenericNavigableComic): |
|
1002 | """Class to retrieve Imogen Quest comics.""" |
|
1003 | # Also on http://imoquest.tumblr.com |
|
1004 | name = 'imogen' |
|
1005 | long_name = 'Imogen Quest' |
|
1006 | url = 'http://imogenquest.net' |
|
1007 | get_first_comic_link = get_div_navfirst_a |
|
1008 | get_navi_link = get_a_rel_next |
|
1009 | ||
1010 | @classmethod |
|
1011 | def get_comic_info(cls, soup, link): |
|
1012 | """Get information about a particular comics.""" |
|
1013 | title = soup.find('h2', class_='post-title').string |
|
1014 | author = soup.find("span", class_="post-author").find("a").string |
|
1015 | date_str = soup.find('span', class_='post-date').string |
|
1016 | day = string_to_date(date_str, '%B %d, %Y') |
|
1017 | imgs = soup.find('div', class_='comicpane').find_all('img') |
|
1018 | assert all(i['alt'] == i['title'] for i in imgs) |
|
1019 | title2 = imgs[0]['title'] |
|
1020 | return { |
|
1021 | 'day': day.day, |
|
1022 | 'month': day.month, |
|
1023 | 'year': day.year, |
|
1024 | 'img': [i['src'] for i in imgs], |
|
1025 | 'title': title, |
|
1026 | 'title2': title2, |
|
1027 | 'author': author, |
|
1028 | } |
|
1029 | ||
1030 | ||
@@ 2607-2632 (lines=26) @@ | ||
2604 | } |
|
2605 | ||
2606 | ||
2607 | class GerbilWithAJetpack(GenericNavigableComic): |
|
2608 | """Class to retrieve GerbilWithAJetpack comics.""" |
|
2609 | name = 'gerbil' |
|
2610 | long_name = 'Gerbil With A Jetpack' |
|
2611 | url = 'http://gerbilwithajetpack.com' |
|
2612 | get_first_comic_link = get_a_navi_navifirst |
|
2613 | get_navi_link = get_a_rel_next |
|
2614 | ||
2615 | @classmethod |
|
2616 | def get_comic_info(cls, soup, link): |
|
2617 | """Get information about a particular comics.""" |
|
2618 | title = soup.find('h2', class_='post-title').string |
|
2619 | author = soup.find("span", class_="post-author").find("a").string |
|
2620 | date_str = soup.find("span", class_="post-date").string |
|
2621 | day = string_to_date(date_str, "%B %d, %Y") |
|
2622 | imgs = soup.find("div", id="comic").find_all("img") |
|
2623 | alt = imgs[0]['alt'] |
|
2624 | assert all(i['alt'] == i['title'] == alt for i in imgs) |
|
2625 | return { |
|
2626 | 'img': [i['src'] for i in imgs], |
|
2627 | 'title': title, |
|
2628 | 'alt': alt, |
|
2629 | 'author': author, |
|
2630 | 'day': day.day, |
|
2631 | 'month': day.month, |
|
2632 | 'year': day.year |
|
2633 | } |
|
2634 | ||
2635 | ||
@@ 1962-1987 (lines=26) @@ | ||
1959 | } |
|
1960 | ||
1961 | ||
1962 | class Penmen(GenericComicNotWorking, GenericNavigableComic): |
|
1963 | """Class to retrieve Penmen comics.""" |
|
1964 | name = 'penmen' |
|
1965 | long_name = 'Penmen' |
|
1966 | url = 'http://penmen.com' |
|
1967 | get_navi_link = get_link_rel_next |
|
1968 | get_first_comic_link = simulate_first_link |
|
1969 | first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/' |
|
1970 | ||
1971 | @classmethod |
|
1972 | def get_comic_info(cls, soup, link): |
|
1973 | """Get information about a particular comics.""" |
|
1974 | title = soup.find('title').string |
|
1975 | imgs = soup.find('div', class_='entry-content').find_all('img') |
|
1976 | short_url = soup.find('link', rel='shortlink')['href'] |
|
1977 | tags = ' '.join(t.string for t in soup.find_all('a', rel='tag')) |
|
1978 | date_str = soup.find('time')['datetime'][:10] |
|
1979 | day = string_to_date(date_str, "%Y-%m-%d") |
|
1980 | return { |
|
1981 | 'title': title, |
|
1982 | 'short_url': short_url, |
|
1983 | 'img': [i['src'] for i in imgs], |
|
1984 | 'tags': tags, |
|
1985 | 'month': day.month, |
|
1986 | 'year': day.year, |
|
1987 | 'day': day.day, |
|
1988 | } |
|
1989 | ||
1990 | ||
@@ 1903-1928 (lines=26) @@ | ||
1900 | } |
|
1901 | ||
1902 | ||
1903 | class SafelyEndangered(GenericNavigableComic): |
|
1904 | """Class to retrieve Safely Endangered comics.""" |
|
1905 | # Also on http://tumblr.safelyendangered.com |
|
1906 | name = 'endangered' |
|
1907 | long_name = 'Safely Endangered' |
|
1908 | url = 'http://www.safelyendangered.com' |
|
1909 | get_navi_link = get_link_rel_next |
|
1910 | get_first_comic_link = simulate_first_link |
|
1911 | first_url = 'http://www.safelyendangered.com/comic/ignored/' |
|
1912 | ||
1913 | @classmethod |
|
1914 | def get_comic_info(cls, soup, link): |
|
1915 | """Get information about a particular comics.""" |
|
1916 | title = soup.find('h2', class_='post-title').string |
|
1917 | date_str = soup.find('span', class_='post-date').string |
|
1918 | day = string_to_date(date_str, '%B %d, %Y') |
|
1919 | imgs = soup.find('div', id='comic').find_all('img') |
|
1920 | alt = imgs[0]['alt'] |
|
1921 | assert all(i['alt'] == i['title'] for i in imgs) |
|
1922 | return { |
|
1923 | 'day': day.day, |
|
1924 | 'month': day.month, |
|
1925 | 'year': day.year, |
|
1926 | 'img': [i['src'] for i in imgs], |
|
1927 | 'title': title, |
|
1928 | 'alt': alt, |
|
1929 | } |
|
1930 | ||
1931 | ||
@@ 406-431 (lines=26) @@ | ||
403 | _categories = ('DELETED', ) |
|
404 | ||
405 | ||
406 | class ExtraFabulousComics(GenericNavigableComic): |
|
407 | """Class to retrieve Extra Fabulous Comics.""" |
|
408 | # Also on https://extrafabulouscomics.tumblr.com |
|
409 | name = 'efc' |
|
410 | long_name = 'Extra Fabulous Comics' |
|
411 | url = 'http://extrafabulouscomics.com' |
|
412 | _categories = ('EFC', ) |
|
413 | get_navi_link = get_link_rel_next |
|
414 | get_first_comic_link = simulate_first_link |
|
415 | first_url = 'http://extrafabulouscomics.com/comic/buttfly/' |
|
416 | ||
417 | @classmethod |
|
418 | def get_comic_info(cls, soup, link): |
|
419 | """Get information about a particular comics.""" |
|
420 | img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url) |
|
421 | imgs = soup.find_all('img', src=img_src_re) |
|
422 | title = soup.find('meta', property='og:title')['content'] |
|
423 | date_str = soup.find('meta', property='article:published_time')['content'][:10] |
|
424 | day = string_to_date(date_str, "%Y-%m-%d") |
|
425 | return { |
|
426 | 'title': title, |
|
427 | 'img': [i['src'] for i in imgs], |
|
428 | 'month': day.month, |
|
429 | 'year': day.year, |
|
430 | 'day': day.day, |
|
431 | 'prefix': title + '-' |
|
432 | } |
|
433 | ||
434 | ||
@@ 1233-1256 (lines=24) @@ | ||
1230 | url = 'http://english.bouletcorp.com' |
|
1231 | ||
1232 | ||
1233 | class AmazingSuperPowers(GenericNavigableComic): |
|
1234 | """Class to retrieve Amazing Super Powers comics.""" |
|
1235 | name = 'asp' |
|
1236 | long_name = 'Amazing Super Powers' |
|
1237 | url = 'http://www.amazingsuperpowers.com' |
|
1238 | get_first_comic_link = get_a_navi_navifirst |
|
1239 | get_navi_link = get_a_navi_navinext |
|
1240 | ||
1241 | @classmethod |
|
1242 | def get_comic_info(cls, soup, link): |
|
1243 | """Get information about a particular comics.""" |
|
1244 | author = soup.find("span", class_="post-author").find("a").string |
|
1245 | date_str = soup.find('span', class_='post-date').string |
|
1246 | day = string_to_date(date_str, "%B %d, %Y") |
|
1247 | imgs = soup.find('div', id='comic').find_all('img') |
|
1248 | title = ' '.join(i['title'] for i in imgs) |
|
1249 | assert all(i['alt'] == i['title'] for i in imgs) |
|
1250 | return { |
|
1251 | 'title': title, |
|
1252 | 'author': author, |
|
1253 | 'img': [img['src'] for img in imgs], |
|
1254 | 'day': day.day, |
|
1255 | 'month': day.month, |
|
1256 | 'year': day.year |
|
1257 | } |
|
1258 | ||
1259 | ||
@@ 435-457 (lines=23) @@ | ||
432 | } |
|
433 | ||
434 | ||
435 | class GenericLeMondeBlog(GenericNavigableComic): |
|
436 | """Generic class to retrieve comics from Le Monde blogs.""" |
|
437 | _categories = ('LEMONDE', 'FRANCAIS') |
|
438 | get_navi_link = get_link_rel_next |
|
439 | get_first_comic_link = simulate_first_link |
|
440 | first_url = NotImplemented |
|
441 | date_format = "%d %B %Y" |
|
442 | ||
443 | @classmethod |
|
444 | def get_comic_info(cls, soup, link): |
|
445 | """Get information about a particular comics.""" |
|
446 | url2 = soup.find('link', rel='shortlink')['href'] |
|
447 | title = soup.find('meta', property='og:title')['content'] |
|
448 | date_str = soup.find("span", class_="entry-date").string |
|
449 | day = string_to_date(date_str, cls.date_format, "fr_FR.utf8") |
|
450 | imgs = soup.find_all('meta', property='og:image') |
|
451 | return { |
|
452 | 'title': title, |
|
453 | 'url2': url2, |
|
454 | 'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs], |
|
455 | 'month': day.month, |
|
456 | 'year': day.year, |
|
457 | 'day': day.day, |
|
458 | } |
|
459 | ||
460 |