@@ 2353-2377 (lines=25) @@ | ||
2350 | 'day': day.day, |
|
2351 | } |
|
2352 | ||
2353 | ||
2354 | class LinsEditions(GenericNavigableComic): |
|
2355 | """Class to retrieve L.I.N.S. Editions comics.""" |
|
2356 | # Also on http://linscomics.tumblr.com |
|
2357 | # Now on https://warandpeas.com |
|
2358 | name = 'lins' |
|
2359 | long_name = 'L.I.N.S. Editions' |
|
2360 | url = 'https://linsedition.com' |
|
2361 | _categories = ('LINS', ) |
|
2362 | get_navi_link = get_link_rel_next |
|
2363 | get_first_comic_link = simulate_first_link |
|
2364 | first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/' |
|
2365 | ||
2366 | @classmethod |
|
2367 | def get_comic_info(cls, soup, link): |
|
2368 | """Get information about a particular comics.""" |
|
2369 | title = soup.find('meta', property='og:title')['content'] |
|
2370 | imgs = soup.find_all('meta', property='og:image') |
|
2371 | date_str = soup.find('meta', property='article:published_time')['content'][:10] |
|
2372 | day = string_to_date(date_str, "%Y-%m-%d") |
|
2373 | return { |
|
2374 | 'title': title, |
|
2375 | 'img': [i['content'] for i in imgs], |
|
2376 | 'month': day.month, |
|
2377 | 'year': day.year, |
|
2378 | 'day': day.day, |
|
2379 | } |
|
2380 | ||
@@ 1049-1073 (lines=25) @@ | ||
1046 | 'prefix': '%d-' % num, |
|
1047 | } |
|
1048 | ||
1049 | ||
1050 | class Mercworks(GenericNavigableComic): |
|
1051 | """Class to retrieve Mercworks comics.""" |
|
1052 | # Also on http://mercworks.tumblr.com |
|
1053 | name = 'mercworks' |
|
1054 | long_name = 'Mercworks' |
|
1055 | url = 'http://mercworks.net' |
|
1056 | get_first_comic_link = get_a_comicnavbase_comicnavfirst |
|
1057 | get_navi_link = get_link_rel_next |
|
1058 | ||
1059 | @classmethod |
|
1060 | def get_comic_info(cls, soup, link): |
|
1061 | """Get information about a particular comics.""" |
|
1062 | title = soup.find('meta', property='og:title')['content'] |
|
1063 | metadesc = soup.find('meta', property='og:description') |
|
1064 | desc = metadesc['content'] if metadesc else "" |
|
1065 | date_str = soup.find('meta', property='article:published_time')['content'][:10] |
|
1066 | day = string_to_date(date_str, "%Y-%m-%d") |
|
1067 | imgs = soup.find_all('meta', property='og:image') |
|
1068 | return { |
|
1069 | 'img': [i['content'] for i in imgs], |
|
1070 | 'title': title, |
|
1071 | 'desc': desc, |
|
1072 | 'day': day.day, |
|
1073 | 'month': day.month, |
|
1074 | 'year': day.year |
|
1075 | } |
|
1076 | ||
@@ 357-379 (lines=23) @@ | ||
354 | return [] |
|
355 | ||
356 | ||
357 | class ExtraFabulousComics(GenericNavigableComic): |
|
358 | """Class to retrieve Extra Fabulous Comics.""" |
|
359 | name = 'efc' |
|
360 | long_name = 'Extra Fabulous Comics' |
|
361 | url = 'http://extrafabulouscomics.com' |
|
362 | get_first_comic_link = get_a_navi_navifirst |
|
363 | get_navi_link = get_link_rel_next |
|
364 | ||
365 | @classmethod |
|
366 | def get_comic_info(cls, soup, link): |
|
367 | """Get information about a particular comics.""" |
|
368 | img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url) |
|
369 | imgs = soup.find_all('img', src=img_src_re) |
|
370 | title = soup.find('meta', property='og:title')['content'] |
|
371 | date_str = soup.find('meta', property='article:published_time')['content'][:10] |
|
372 | day = string_to_date(date_str, "%Y-%m-%d") |
|
373 | return { |
|
374 | 'title': title, |
|
375 | 'img': [i['src'] for i in imgs], |
|
376 | 'month': day.month, |
|
377 | 'year': day.year, |
|
378 | 'day': day.day, |
|
379 | 'prefix': title + '-' |
|
380 | } |
|
381 | ||
382 | ||
@@ 3294-3315 (lines=22) @@ | ||
3291 | 'img': [i['src'] for i in imgs], |
|
3292 | } |
|
3293 | ||
3294 | ||
3295 | class MarketoonistComics(GenericNavigableComic): |
|
3296 | """Class to retrieve Marketoonist Comics.""" |
|
3297 | name = 'marketoonist' |
|
3298 | long_name = 'Marketoonist' |
|
3299 | url = 'https://marketoonist.com/cartoons' |
|
3300 | get_first_comic_link = simulate_first_link |
|
3301 | get_navi_link = get_link_rel_next |
|
3302 | first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html' |
|
3303 | ||
3304 | @classmethod |
|
3305 | def get_comic_info(cls, soup, link): |
|
3306 | """Get information about a particular comics.""" |
|
3307 | imgs = soup.find_all('meta', property='og:image') |
|
3308 | date_str = soup.find('meta', property='article:published_time')['content'][:10] |
|
3309 | day = string_to_date(date_str, "%Y-%m-%d") |
|
3310 | title = soup.find('meta', property='og:title')['content'] |
|
3311 | return { |
|
3312 | 'img': [i['content'] for i in imgs], |
|
3313 | 'day': day.day, |
|
3314 | 'month': day.month, |
|
3315 | 'year': day.year, |
|
3316 | 'title': title, |
|
3317 | } |
|
3318 | ||
@@ 1894-1919 (lines=26) @@ | ||
1891 | 'author': author, |
|
1892 | } |
|
1893 | ||
1894 | ||
1895 | class Penmen(GenericNavigableComic): |
|
1896 | """Class to retrieve Penmen comics.""" |
|
1897 | name = 'penmen' |
|
1898 | long_name = 'Penmen' |
|
1899 | url = 'http://penmen.com' |
|
1900 | get_navi_link = get_link_rel_next |
|
1901 | get_first_comic_link = simulate_first_link |
|
1902 | first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/' |
|
1903 | ||
1904 | @classmethod |
|
1905 | def get_comic_info(cls, soup, link): |
|
1906 | """Get information about a particular comics.""" |
|
1907 | title = soup.find('title').string |
|
1908 | imgs = soup.find('div', class_='entry-content').find_all('img') |
|
1909 | short_url = soup.find('link', rel='shortlink')['href'] |
|
1910 | tags = ' '.join(t.string for t in soup.find_all('a', rel='tag')) |
|
1911 | date_str = soup.find('time')['datetime'][:10] |
|
1912 | day = string_to_date(date_str, "%Y-%m-%d") |
|
1913 | return { |
|
1914 | 'title': title, |
|
1915 | 'short_url': short_url, |
|
1916 | 'img': [i['src'] for i in imgs], |
|
1917 | 'tags': tags, |
|
1918 | 'month': day.month, |
|
1919 | 'year': day.year, |
|
1920 | 'day': day.day, |
|
1921 | } |
|
1922 | ||
@@ 1835-1860 (lines=26) @@ | ||
1832 | 'img': [i['content'] for i in imgs if i['content'] not in skip_imgs], |
|
1833 | } |
|
1834 | ||
1835 | ||
1836 | class SafelyEndangered(GenericNavigableComic): |
|
1837 | """Class to retrieve Safely Endangered comics.""" |
|
1838 | # Also on http://tumblr.safelyendangered.com |
|
1839 | name = 'endangered' |
|
1840 | long_name = 'Safely Endangered' |
|
1841 | url = 'http://www.safelyendangered.com' |
|
1842 | get_navi_link = get_link_rel_next |
|
1843 | get_first_comic_link = simulate_first_link |
|
1844 | first_url = 'http://www.safelyendangered.com/comic/ignored/' |
|
1845 | ||
1846 | @classmethod |
|
1847 | def get_comic_info(cls, soup, link): |
|
1848 | """Get information about a particular comics.""" |
|
1849 | title = soup.find('h2', class_='post-title').string |
|
1850 | date_str = soup.find('span', class_='post-date').string |
|
1851 | day = string_to_date(date_str, '%B %d, %Y') |
|
1852 | imgs = soup.find('div', id='comic').find_all('img') |
|
1853 | alt = imgs[0]['alt'] |
|
1854 | assert all(i['alt'] == i['title'] for i in imgs) |
|
1855 | return { |
|
1856 | 'day': day.day, |
|
1857 | 'month': day.month, |
|
1858 | 'year': day.year, |
|
1859 | 'img': [i['src'] for i in imgs], |
|
1860 | 'title': title, |
|
1861 | 'alt': alt, |
|
1862 | } |
|
1863 | ||
@@ 383-404 (lines=22) @@ | ||
380 | } |
|
381 | ||
382 | ||
383 | class GenericLeMondeBlog(GenericNavigableComic): |
|
384 | """Generic class to retrieve comics from Le Monde blogs.""" |
|
385 | _categories = ('LEMONDE', 'FRANCAIS') |
|
386 | get_navi_link = get_link_rel_next |
|
387 | get_first_comic_link = simulate_first_link |
|
388 | first_url = NotImplemented |
|
389 | ||
390 | @classmethod |
|
391 | def get_comic_info(cls, soup, link): |
|
392 | """Get information about a particular comics.""" |
|
393 | url2 = soup.find('link', rel='shortlink')['href'] |
|
394 | title = soup.find('meta', property='og:title')['content'] |
|
395 | date_str = soup.find("span", class_="entry-date").string |
|
396 | day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8") |
|
397 | imgs = soup.find_all('meta', property='og:image') |
|
398 | return { |
|
399 | 'title': title, |
|
400 | 'url2': url2, |
|
401 | 'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs], |
|
402 | 'month': day.month, |
|
403 | 'year': day.year, |
|
404 | 'day': day.day, |
|
405 | } |
|
406 | ||
407 | ||
@@ 954-979 (lines=26) @@ | ||
951 | 'author': author, |
|
952 | } |
|
953 | ||
954 | ||
955 | class MyExtraLife(GenericNavigableComic): |
|
956 | """Class to retrieve My Extra Life comics.""" |
|
957 | name = 'extralife' |
|
958 | long_name = 'My Extra Life' |
|
959 | url = 'http://www.myextralife.com' |
|
960 | get_navi_link = get_link_rel_next |
|
961 | ||
962 | @classmethod |
|
963 | def get_first_comic_link(cls): |
|
964 | """Get link to first comics.""" |
|
965 | return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link') |
|
966 | ||
967 | @classmethod |
|
968 | def get_comic_info(cls, soup, link): |
|
969 | """Get information about a particular comics.""" |
|
970 | title = soup.find("h1", class_="comic_title").string |
|
971 | date_str = soup.find("span", class_="comic_date").string |
|
972 | day = string_to_date(date_str, "%B %d, %Y") |
|
973 | imgs = soup.find_all("img", class_="comic") |
|
974 | assert all(i['alt'] == i['title'] == title for i in imgs) |
|
975 | return { |
|
976 | 'title': title, |
|
977 | 'img': [i['src'] for i in imgs if i["src"]], |
|
978 | 'day': day.day, |
|
979 | 'month': day.month, |
|
980 | 'year': day.year |
|
981 | } |
|
982 |