|
@@ 2353-2377 (lines=25) @@
|
| 2350 |
|
} |
| 2351 |
|
|
| 2352 |
|
|
| 2353 |
|
class LinsEditions(GenericNavigableComic): |
| 2354 |
|
"""Class to retrieve L.I.N.S. Editions comics.""" |
| 2355 |
|
# Also on http://linscomics.tumblr.com |
| 2356 |
|
# Now on https://warandpeas.com |
| 2357 |
|
name = 'lins' |
| 2358 |
|
long_name = 'L.I.N.S. Editions' |
| 2359 |
|
url = 'https://linsedition.com' |
| 2360 |
|
_categories = ('LINS', ) |
| 2361 |
|
get_navi_link = get_link_rel_next |
| 2362 |
|
get_first_comic_link = simulate_first_link |
| 2363 |
|
first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/' |
| 2364 |
|
|
| 2365 |
|
@classmethod |
| 2366 |
|
def get_comic_info(cls, soup, link): |
| 2367 |
|
"""Get information about a particular comics.""" |
| 2368 |
|
title = soup.find('meta', property='og:title')['content'] |
| 2369 |
|
imgs = soup.find_all('meta', property='og:image') |
| 2370 |
|
date_str = soup.find('meta', property='article:published_time')['content'][:10] |
| 2371 |
|
day = string_to_date(date_str, "%Y-%m-%d") |
| 2372 |
|
return { |
| 2373 |
|
'title': title, |
| 2374 |
|
'img': [i['content'] for i in imgs], |
| 2375 |
|
'month': day.month, |
| 2376 |
|
'year': day.year, |
| 2377 |
|
'day': day.day, |
| 2378 |
|
} |
| 2379 |
|
|
| 2380 |
|
|
|
@@ 1049-1073 (lines=25) @@
|
| 1046 |
|
} |
| 1047 |
|
|
| 1048 |
|
|
| 1049 |
|
class Mercworks(GenericNavigableComic): |
| 1050 |
|
"""Class to retrieve Mercworks comics.""" |
| 1051 |
|
# Also on http://mercworks.tumblr.com |
| 1052 |
|
name = 'mercworks' |
| 1053 |
|
long_name = 'Mercworks' |
| 1054 |
|
url = 'http://mercworks.net' |
| 1055 |
|
get_first_comic_link = get_a_comicnavbase_comicnavfirst |
| 1056 |
|
get_navi_link = get_link_rel_next |
| 1057 |
|
|
| 1058 |
|
@classmethod |
| 1059 |
|
def get_comic_info(cls, soup, link): |
| 1060 |
|
"""Get information about a particular comics.""" |
| 1061 |
|
title = soup.find('meta', property='og:title')['content'] |
| 1062 |
|
metadesc = soup.find('meta', property='og:description') |
| 1063 |
|
desc = metadesc['content'] if metadesc else "" |
| 1064 |
|
date_str = soup.find('meta', property='article:published_time')['content'][:10] |
| 1065 |
|
day = string_to_date(date_str, "%Y-%m-%d") |
| 1066 |
|
imgs = soup.find_all('meta', property='og:image') |
| 1067 |
|
return { |
| 1068 |
|
'img': [i['content'] for i in imgs], |
| 1069 |
|
'title': title, |
| 1070 |
|
'desc': desc, |
| 1071 |
|
'day': day.day, |
| 1072 |
|
'month': day.month, |
| 1073 |
|
'year': day.year |
| 1074 |
|
} |
| 1075 |
|
|
| 1076 |
|
|
|
@@ 357-379 (lines=23) @@
|
| 354 |
|
return [] |
| 355 |
|
|
| 356 |
|
|
| 357 |
|
class ExtraFabulousComics(GenericNavigableComic): |
| 358 |
|
"""Class to retrieve Extra Fabulous Comics.""" |
| 359 |
|
name = 'efc' |
| 360 |
|
long_name = 'Extra Fabulous Comics' |
| 361 |
|
url = 'http://extrafabulouscomics.com' |
| 362 |
|
get_first_comic_link = get_a_navi_navifirst |
| 363 |
|
get_navi_link = get_link_rel_next |
| 364 |
|
|
| 365 |
|
@classmethod |
| 366 |
|
def get_comic_info(cls, soup, link): |
| 367 |
|
"""Get information about a particular comics.""" |
| 368 |
|
img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url) |
| 369 |
|
imgs = soup.find_all('img', src=img_src_re) |
| 370 |
|
title = soup.find('meta', property='og:title')['content'] |
| 371 |
|
date_str = soup.find('meta', property='article:published_time')['content'][:10] |
| 372 |
|
day = string_to_date(date_str, "%Y-%m-%d") |
| 373 |
|
return { |
| 374 |
|
'title': title, |
| 375 |
|
'img': [i['src'] for i in imgs], |
| 376 |
|
'month': day.month, |
| 377 |
|
'year': day.year, |
| 378 |
|
'day': day.day, |
| 379 |
|
'prefix': title + '-' |
| 380 |
|
} |
| 381 |
|
|
| 382 |
|
|
|
@@ 3294-3315 (lines=22) @@
|
| 3291 |
|
} |
| 3292 |
|
|
| 3293 |
|
|
| 3294 |
|
class MarketoonistComics(GenericNavigableComic): |
| 3295 |
|
"""Class to retrieve Marketoonist Comics.""" |
| 3296 |
|
name = 'marketoonist' |
| 3297 |
|
long_name = 'Marketoonist' |
| 3298 |
|
url = 'https://marketoonist.com/cartoons' |
| 3299 |
|
get_first_comic_link = simulate_first_link |
| 3300 |
|
get_navi_link = get_link_rel_next |
| 3301 |
|
first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html' |
| 3302 |
|
|
| 3303 |
|
@classmethod |
| 3304 |
|
def get_comic_info(cls, soup, link): |
| 3305 |
|
"""Get information about a particular comics.""" |
| 3306 |
|
imgs = soup.find_all('meta', property='og:image') |
| 3307 |
|
date_str = soup.find('meta', property='article:published_time')['content'][:10] |
| 3308 |
|
day = string_to_date(date_str, "%Y-%m-%d") |
| 3309 |
|
title = soup.find('meta', property='og:title')['content'] |
| 3310 |
|
return { |
| 3311 |
|
'img': [i['content'] for i in imgs], |
| 3312 |
|
'day': day.day, |
| 3313 |
|
'month': day.month, |
| 3314 |
|
'year': day.year, |
| 3315 |
|
'title': title, |
| 3316 |
|
} |
| 3317 |
|
|
| 3318 |
|
|
|
@@ 1894-1919 (lines=26) @@
|
| 1891 |
|
} |
| 1892 |
|
|
| 1893 |
|
|
| 1894 |
|
class Penmen(GenericNavigableComic): |
| 1895 |
|
"""Class to retrieve Penmen comics.""" |
| 1896 |
|
name = 'penmen' |
| 1897 |
|
long_name = 'Penmen' |
| 1898 |
|
url = 'http://penmen.com' |
| 1899 |
|
get_navi_link = get_link_rel_next |
| 1900 |
|
get_first_comic_link = simulate_first_link |
| 1901 |
|
first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/' |
| 1902 |
|
|
| 1903 |
|
@classmethod |
| 1904 |
|
def get_comic_info(cls, soup, link): |
| 1905 |
|
"""Get information about a particular comics.""" |
| 1906 |
|
title = soup.find('title').string |
| 1907 |
|
imgs = soup.find('div', class_='entry-content').find_all('img') |
| 1908 |
|
short_url = soup.find('link', rel='shortlink')['href'] |
| 1909 |
|
tags = ' '.join(t.string for t in soup.find_all('a', rel='tag')) |
| 1910 |
|
date_str = soup.find('time')['datetime'][:10] |
| 1911 |
|
day = string_to_date(date_str, "%Y-%m-%d") |
| 1912 |
|
return { |
| 1913 |
|
'title': title, |
| 1914 |
|
'short_url': short_url, |
| 1915 |
|
'img': [i['src'] for i in imgs], |
| 1916 |
|
'tags': tags, |
| 1917 |
|
'month': day.month, |
| 1918 |
|
'year': day.year, |
| 1919 |
|
'day': day.day, |
| 1920 |
|
} |
| 1921 |
|
|
| 1922 |
|
|
|
@@ 1835-1860 (lines=26) @@
|
| 1832 |
|
} |
| 1833 |
|
|
| 1834 |
|
|
| 1835 |
|
class SafelyEndangered(GenericNavigableComic): |
| 1836 |
|
"""Class to retrieve Safely Endangered comics.""" |
| 1837 |
|
# Also on http://tumblr.safelyendangered.com |
| 1838 |
|
name = 'endangered' |
| 1839 |
|
long_name = 'Safely Endangered' |
| 1840 |
|
url = 'http://www.safelyendangered.com' |
| 1841 |
|
get_navi_link = get_link_rel_next |
| 1842 |
|
get_first_comic_link = simulate_first_link |
| 1843 |
|
first_url = 'http://www.safelyendangered.com/comic/ignored/' |
| 1844 |
|
|
| 1845 |
|
@classmethod |
| 1846 |
|
def get_comic_info(cls, soup, link): |
| 1847 |
|
"""Get information about a particular comics.""" |
| 1848 |
|
title = soup.find('h2', class_='post-title').string |
| 1849 |
|
date_str = soup.find('span', class_='post-date').string |
| 1850 |
|
day = string_to_date(date_str, '%B %d, %Y') |
| 1851 |
|
imgs = soup.find('div', id='comic').find_all('img') |
| 1852 |
|
alt = imgs[0]['alt'] |
| 1853 |
|
assert all(i['alt'] == i['title'] for i in imgs) |
| 1854 |
|
return { |
| 1855 |
|
'day': day.day, |
| 1856 |
|
'month': day.month, |
| 1857 |
|
'year': day.year, |
| 1858 |
|
'img': [i['src'] for i in imgs], |
| 1859 |
|
'title': title, |
| 1860 |
|
'alt': alt, |
| 1861 |
|
} |
| 1862 |
|
|
| 1863 |
|
|
|
@@ 383-404 (lines=22) @@
|
| 380 |
|
} |
| 381 |
|
|
| 382 |
|
|
| 383 |
|
class GenericLeMondeBlog(GenericNavigableComic): |
| 384 |
|
"""Generic class to retrieve comics from Le Monde blogs.""" |
| 385 |
|
_categories = ('LEMONDE', 'FRANCAIS') |
| 386 |
|
get_navi_link = get_link_rel_next |
| 387 |
|
get_first_comic_link = simulate_first_link |
| 388 |
|
first_url = NotImplemented |
| 389 |
|
|
| 390 |
|
@classmethod |
| 391 |
|
def get_comic_info(cls, soup, link): |
| 392 |
|
"""Get information about a particular comics.""" |
| 393 |
|
url2 = soup.find('link', rel='shortlink')['href'] |
| 394 |
|
title = soup.find('meta', property='og:title')['content'] |
| 395 |
|
date_str = soup.find("span", class_="entry-date").string |
| 396 |
|
day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8") |
| 397 |
|
imgs = soup.find_all('meta', property='og:image') |
| 398 |
|
return { |
| 399 |
|
'title': title, |
| 400 |
|
'url2': url2, |
| 401 |
|
'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs], |
| 402 |
|
'month': day.month, |
| 403 |
|
'year': day.year, |
| 404 |
|
'day': day.day, |
| 405 |
|
} |
| 406 |
|
|
| 407 |
|
|
|
@@ 954-979 (lines=26) @@
|
| 951 |
|
} |
| 952 |
|
|
| 953 |
|
|
| 954 |
|
class MyExtraLife(GenericNavigableComic): |
| 955 |
|
"""Class to retrieve My Extra Life comics.""" |
| 956 |
|
name = 'extralife' |
| 957 |
|
long_name = 'My Extra Life' |
| 958 |
|
url = 'http://www.myextralife.com' |
| 959 |
|
get_navi_link = get_link_rel_next |
| 960 |
|
|
| 961 |
|
@classmethod |
| 962 |
|
def get_first_comic_link(cls): |
| 963 |
|
"""Get link to first comics.""" |
| 964 |
|
return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link') |
| 965 |
|
|
| 966 |
|
@classmethod |
| 967 |
|
def get_comic_info(cls, soup, link): |
| 968 |
|
"""Get information about a particular comics.""" |
| 969 |
|
title = soup.find("h1", class_="comic_title").string |
| 970 |
|
date_str = soup.find("span", class_="comic_date").string |
| 971 |
|
day = string_to_date(date_str, "%B %d, %Y") |
| 972 |
|
imgs = soup.find_all("img", class_="comic") |
| 973 |
|
assert all(i['alt'] == i['title'] == title for i in imgs) |
| 974 |
|
return { |
| 975 |
|
'title': title, |
| 976 |
|
'img': [i['src'] for i in imgs if i["src"]], |
| 977 |
|
'day': day.day, |
| 978 |
|
'month': day.month, |
| 979 |
|
'year': day.year |
| 980 |
|
} |
| 981 |
|
|
| 982 |
|
|