@@ 355-377 (lines=23) @@ | ||
352 | return [] |
|
353 | ||
354 | ||
355 | class ExtraFabulousComics(GenericNavigableComic): |
|
356 | """Class to retrieve Extra Fabulous Comics.""" |
|
357 | name = 'efc' |
|
358 | long_name = 'Extra Fabulous Comics' |
|
359 | url = 'http://extrafabulouscomics.com' |
|
360 | get_first_comic_link = get_a_navi_navifirst |
|
361 | get_navi_link = get_link_rel_next |
|
362 | ||
363 | @classmethod |
|
364 | def get_comic_info(cls, soup, link): |
|
365 | """Get information about a particular comics.""" |
|
366 | img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url) |
|
367 | imgs = soup.find_all('img', src=img_src_re) |
|
368 | title = soup.find('meta', property='og:title')['content'] |
|
369 | date_str = soup.find('meta', property='article:published_time')['content'][:10] |
|
370 | day = string_to_date(date_str, "%Y-%m-%d") |
|
371 | return { |
|
372 | 'title': title, |
|
373 | 'img': [i['src'] for i in imgs], |
|
374 | 'month': day.month, |
|
375 | 'year': day.year, |
|
376 | 'day': day.day, |
|
377 | 'prefix': title + '-' |
|
378 | } |
|
379 | ||
380 | ||
@@ 381-402 (lines=22) @@ | ||
378 | } |
|
379 | ||
380 | ||
381 | class GenericLeMondeBlog(GenericNavigableComic): |
|
382 | """Generic class to retrieve comics from Le Monde blogs.""" |
|
383 | _categories = ('LEMONDE', 'FRANCAIS') |
|
384 | get_navi_link = get_link_rel_next |
|
385 | get_first_comic_link = simulate_first_link |
|
386 | first_url = NotImplemented |
|
387 | ||
388 | @classmethod |
|
389 | def get_comic_info(cls, soup, link): |
|
390 | """Get information about a particular comics.""" |
|
391 | url2 = soup.find('link', rel='shortlink')['href'] |
|
392 | title = soup.find('meta', property='og:title')['content'] |
|
393 | date_str = soup.find("span", class_="entry-date").string |
|
394 | day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8") |
|
395 | imgs = soup.find_all('meta', property='og:image') |
|
396 | return { |
|
397 | 'title': title, |
|
398 | 'url2': url2, |
|
399 | 'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs], |
|
400 | 'month': day.month, |
|
401 | 'year': day.year, |
|
402 | 'day': day.day, |
|
403 | } |
|
404 | ||
405 | ||
@@ 2320-2344 (lines=25) @@ | ||
2317 | 'day': day.day, |
|
2318 | } |
|
2319 | ||
2320 | ||
2321 | class LinsEditions(GenericNavigableComic): |
|
2322 | """Class to retrieve L.I.N.S. Editions comics.""" |
|
2323 | # Also on http://linscomics.tumblr.com |
|
2324 | # Now on https://warandpeas.com |
|
2325 | name = 'lins' |
|
2326 | long_name = 'L.I.N.S. Editions' |
|
2327 | url = 'https://linsedition.com' |
|
2328 | _categories = ('LINS', ) |
|
2329 | get_navi_link = get_link_rel_next |
|
2330 | get_first_comic_link = simulate_first_link |
|
2331 | first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/' |
|
2332 | ||
2333 | @classmethod |
|
2334 | def get_comic_info(cls, soup, link): |
|
2335 | """Get information about a particular comics.""" |
|
2336 | title = soup.find('meta', property='og:title')['content'] |
|
2337 | imgs = soup.find_all('meta', property='og:image') |
|
2338 | date_str = soup.find('meta', property='article:published_time')['content'][:10] |
|
2339 | day = string_to_date(date_str, "%Y-%m-%d") |
|
2340 | return { |
|
2341 | 'title': title, |
|
2342 | 'img': [i['content'] for i in imgs], |
|
2343 | 'month': day.month, |
|
2344 | 'year': day.year, |
|
2345 | 'day': day.day, |
|
2346 | } |
|
2347 | ||
@@ 1016-1040 (lines=25) @@ | ||
1013 | 'prefix': '%d-' % num, |
|
1014 | } |
|
1015 | ||
1016 | ||
1017 | class Mercworks(GenericNavigableComic): |
|
1018 | """Class to retrieve Mercworks comics.""" |
|
1019 | # Also on http://mercworks.tumblr.com |
|
1020 | name = 'mercworks' |
|
1021 | long_name = 'Mercworks' |
|
1022 | url = 'http://mercworks.net' |
|
1023 | get_first_comic_link = get_a_comicnavbase_comicnavfirst |
|
1024 | get_navi_link = get_link_rel_next |
|
1025 | ||
1026 | @classmethod |
|
1027 | def get_comic_info(cls, soup, link): |
|
1028 | """Get information about a particular comics.""" |
|
1029 | title = soup.find('meta', property='og:title')['content'] |
|
1030 | metadesc = soup.find('meta', property='og:description') |
|
1031 | desc = metadesc['content'] if metadesc else "" |
|
1032 | date_str = soup.find('meta', property='article:published_time')['content'][:10] |
|
1033 | day = string_to_date(date_str, "%Y-%m-%d") |
|
1034 | imgs = soup.find_all('meta', property='og:image') |
|
1035 | return { |
|
1036 | 'img': [i['content'] for i in imgs], |
|
1037 | 'title': title, |
|
1038 | 'desc': desc, |
|
1039 | 'day': day.day, |
|
1040 | 'month': day.month, |
|
1041 | 'year': day.year |
|
1042 | } |
|
1043 | ||
@@ 3261-3282 (lines=22) @@ | ||
3258 | 'img': [i['src'] for i in imgs], |
|
3259 | } |
|
3260 | ||
3261 | ||
3262 | class MarketoonistComics(GenericNavigableComic): |
|
3263 | """Class to retrieve Marketoonist Comics.""" |
|
3264 | name = 'marketoonist' |
|
3265 | long_name = 'Marketoonist' |
|
3266 | url = 'https://marketoonist.com/cartoons' |
|
3267 | get_first_comic_link = simulate_first_link |
|
3268 | get_navi_link = get_link_rel_next |
|
3269 | first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html' |
|
3270 | ||
3271 | @classmethod |
|
3272 | def get_comic_info(cls, soup, link): |
|
3273 | """Get information about a particular comics.""" |
|
3274 | imgs = soup.find_all('meta', property='og:image') |
|
3275 | date_str = soup.find('meta', property='article:published_time')['content'][:10] |
|
3276 | day = string_to_date(date_str, "%Y-%m-%d") |
|
3277 | title = soup.find('meta', property='og:title')['content'] |
|
3278 | return { |
|
3279 | 'img': [i['content'] for i in imgs], |
|
3280 | 'day': day.day, |
|
3281 | 'month': day.month, |
|
3282 | 'year': day.year, |
|
3283 | 'title': title, |
|
3284 | } |
|
3285 | ||
@@ 1861-1886 (lines=26) @@ | ||
1858 | 'author': author, |
|
1859 | } |
|
1860 | ||
1861 | ||
1862 | class Penmen(GenericNavigableComic): |
|
1863 | """Class to retrieve Penmen comics.""" |
|
1864 | name = 'penmen' |
|
1865 | long_name = 'Penmen' |
|
1866 | url = 'http://penmen.com' |
|
1867 | get_navi_link = get_link_rel_next |
|
1868 | get_first_comic_link = simulate_first_link |
|
1869 | first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/' |
|
1870 | ||
1871 | @classmethod |
|
1872 | def get_comic_info(cls, soup, link): |
|
1873 | """Get information about a particular comics.""" |
|
1874 | title = soup.find('title').string |
|
1875 | imgs = soup.find('div', class_='entry-content').find_all('img') |
|
1876 | short_url = soup.find('link', rel='shortlink')['href'] |
|
1877 | tags = ' '.join(t.string for t in soup.find_all('a', rel='tag')) |
|
1878 | date_str = soup.find('time')['datetime'][:10] |
|
1879 | day = string_to_date(date_str, "%Y-%m-%d") |
|
1880 | return { |
|
1881 | 'title': title, |
|
1882 | 'short_url': short_url, |
|
1883 | 'img': [i['src'] for i in imgs], |
|
1884 | 'tags': tags, |
|
1885 | 'month': day.month, |
|
1886 | 'year': day.year, |
|
1887 | 'day': day.day, |
|
1888 | } |
|
1889 | ||
@@ 1802-1827 (lines=26) @@ | ||
1799 | 'img': [i['content'] for i in imgs if i['content'] not in skip_imgs], |
|
1800 | } |
|
1801 | ||
1802 | ||
1803 | class SafelyEndangered(GenericNavigableComic): |
|
1804 | """Class to retrieve Safely Endangered comics.""" |
|
1805 | # Also on http://tumblr.safelyendangered.com |
|
1806 | name = 'endangered' |
|
1807 | long_name = 'Safely Endangered' |
|
1808 | url = 'http://www.safelyendangered.com' |
|
1809 | get_navi_link = get_link_rel_next |
|
1810 | get_first_comic_link = simulate_first_link |
|
1811 | first_url = 'http://www.safelyendangered.com/comic/ignored/' |
|
1812 | ||
1813 | @classmethod |
|
1814 | def get_comic_info(cls, soup, link): |
|
1815 | """Get information about a particular comics.""" |
|
1816 | title = soup.find('h2', class_='post-title').string |
|
1817 | date_str = soup.find('span', class_='post-date').string |
|
1818 | day = string_to_date(date_str, '%B %d, %Y') |
|
1819 | imgs = soup.find('div', id='comic').find_all('img') |
|
1820 | alt = imgs[0]['alt'] |
|
1821 | assert all(i['alt'] == i['title'] for i in imgs) |
|
1822 | return { |
|
1823 | 'day': day.day, |
|
1824 | 'month': day.month, |
|
1825 | 'year': day.year, |
|
1826 | 'img': [i['src'] for i in imgs], |
|
1827 | 'title': title, |
|
1828 | 'alt': alt, |
|
1829 | } |
|
1830 | ||
@@ 922-947 (lines=26) @@ | ||
919 | } |
|
920 | ||
921 | ||
922 | class MyExtraLife(GenericNavigableComic): |
|
923 | """Class to retrieve My Extra Life comics.""" |
|
924 | name = 'extralife' |
|
925 | long_name = 'My Extra Life' |
|
926 | url = 'http://www.myextralife.com' |
|
927 | get_navi_link = get_link_rel_next |
|
928 | ||
929 | @classmethod |
|
930 | def get_first_comic_link(cls): |
|
931 | """Get link to first comics.""" |
|
932 | return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link') |
|
933 | ||
934 | @classmethod |
|
935 | def get_comic_info(cls, soup, link): |
|
936 | """Get information about a particular comics.""" |
|
937 | title = soup.find("h1", class_="comic_title").string |
|
938 | date_str = soup.find("span", class_="comic_date").string |
|
939 | day = string_to_date(date_str, "%B %d, %Y") |
|
940 | imgs = soup.find_all("img", class_="comic") |
|
941 | assert all(i['alt'] == i['title'] == title for i in imgs) |
|
942 | return { |
|
943 | 'title': title, |
|
944 | 'img': [i['src'] for i in imgs if i["src"]], |
|
945 | 'day': day.day, |
|
946 | 'month': day.month, |
|
947 | 'year': day.year |
|
948 | } |
|
949 | ||
950 |