|
@@ 2282-2306 (lines=25) @@
|
| 2279 |
|
'title': title, |
| 2280 |
|
'author': author, |
| 2281 |
|
'img': [i['src'] for i in imgs], |
| 2282 |
|
'month': day.month, |
| 2283 |
|
'year': day.year, |
| 2284 |
|
'day': day.day, |
| 2285 |
|
} |
| 2286 |
|
|
| 2287 |
|
|
| 2288 |
|
class LinsEditions(GenericNavigableComic): |
| 2289 |
|
"""Class to retrieve L.I.N.S. Editions comics.""" |
| 2290 |
|
# Also on http://linscomics.tumblr.com |
| 2291 |
|
# Now on https://warandpeas.com |
| 2292 |
|
name = 'lins' |
| 2293 |
|
long_name = 'L.I.N.S. Editions' |
| 2294 |
|
url = 'https://linsedition.com' |
| 2295 |
|
_categories = ('LINS', ) |
| 2296 |
|
get_navi_link = get_link_rel_next |
| 2297 |
|
get_first_comic_link = simulate_first_link |
| 2298 |
|
first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/' |
| 2299 |
|
|
| 2300 |
|
@classmethod |
| 2301 |
|
def get_comic_info(cls, soup, link): |
| 2302 |
|
"""Get information about a particular comics.""" |
| 2303 |
|
title = soup.find('meta', property='og:title')['content'] |
| 2304 |
|
imgs = soup.find_all('meta', property='og:image') |
| 2305 |
|
date_str = soup.find('meta', property='article:published_time')['content'][:10] |
| 2306 |
|
day = string_to_date(date_str, "%Y-%m-%d") |
| 2307 |
|
return { |
| 2308 |
|
'title': title, |
| 2309 |
|
'img': [i['content'] for i in imgs], |
|
@@ 349-371 (lines=23) @@
|
| 346 |
|
_categories = ('EMPTY', ) |
| 347 |
|
|
| 348 |
|
@classmethod |
| 349 |
|
def get_next_comic(cls, last_comic): |
| 350 |
|
"""Implementation of get_next_comic returning no comics.""" |
| 351 |
|
cls.log("comic is considered as empty - returning no comic") |
| 352 |
|
return [] |
| 353 |
|
|
| 354 |
|
|
| 355 |
|
class ExtraFabulousComics(GenericNavigableComic): |
| 356 |
|
"""Class to retrieve Extra Fabulous Comics.""" |
| 357 |
|
name = 'efc' |
| 358 |
|
long_name = 'Extra Fabulous Comics' |
| 359 |
|
url = 'http://extrafabulouscomics.com' |
| 360 |
|
get_first_comic_link = get_a_navi_navifirst |
| 361 |
|
get_navi_link = get_link_rel_next |
| 362 |
|
|
| 363 |
|
@classmethod |
| 364 |
|
def get_comic_info(cls, soup, link): |
| 365 |
|
"""Get information about a particular comics.""" |
| 366 |
|
img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url) |
| 367 |
|
imgs = soup.find_all('img', src=img_src_re) |
| 368 |
|
title = soup.find('meta', property='og:title')['content'] |
| 369 |
|
date_str = soup.find('meta', property='article:published_time')['content'][:10] |
| 370 |
|
day = string_to_date(date_str, "%Y-%m-%d") |
| 371 |
|
return { |
| 372 |
|
'title': title, |
| 373 |
|
'img': [i['src'] for i in imgs], |
| 374 |
|
'month': day.month, |
|
@@ 375-396 (lines=22) @@
|
| 372 |
|
'title': title, |
| 373 |
|
'img': [i['src'] for i in imgs], |
| 374 |
|
'month': day.month, |
| 375 |
|
'year': day.year, |
| 376 |
|
'day': day.day, |
| 377 |
|
'prefix': title + '-' |
| 378 |
|
} |
| 379 |
|
|
| 380 |
|
|
| 381 |
|
class GenericLeMondeBlog(GenericNavigableComic): |
| 382 |
|
"""Generic class to retrieve comics from Le Monde blogs.""" |
| 383 |
|
_categories = ('LEMONDE', 'FRANCAIS') |
| 384 |
|
get_navi_link = get_link_rel_next |
| 385 |
|
get_first_comic_link = simulate_first_link |
| 386 |
|
first_url = NotImplemented |
| 387 |
|
|
| 388 |
|
@classmethod |
| 389 |
|
def get_comic_info(cls, soup, link): |
| 390 |
|
"""Get information about a particular comics.""" |
| 391 |
|
url2 = soup.find('link', rel='shortlink')['href'] |
| 392 |
|
title = soup.find('meta', property='og:title')['content'] |
| 393 |
|
date_str = soup.find("span", class_="entry-date").string |
| 394 |
|
day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8") |
| 395 |
|
imgs = soup.find_all('meta', property='og:image') |
| 396 |
|
return { |
| 397 |
|
'title': title, |
| 398 |
|
'url2': url2, |
| 399 |
|
'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs], |
|
@@ 1809-1834 (lines=26) @@
|
| 1806 |
|
'title': title, |
| 1807 |
|
'author': author, |
| 1808 |
|
'day': day.day, |
| 1809 |
|
'month': day.month, |
| 1810 |
|
'year': day.year, |
| 1811 |
|
'img': [i['content'] for i in imgs if i['content'] not in skip_imgs], |
| 1812 |
|
} |
| 1813 |
|
|
| 1814 |
|
|
| 1815 |
|
class SafelyEndangered(GenericNavigableComic): |
| 1816 |
|
"""Class to retrieve Safely Endangered comics.""" |
| 1817 |
|
# Also on http://tumblr.safelyendangered.com |
| 1818 |
|
name = 'endangered' |
| 1819 |
|
long_name = 'Safely Endangered' |
| 1820 |
|
url = 'http://www.safelyendangered.com' |
| 1821 |
|
get_navi_link = get_link_rel_next |
| 1822 |
|
get_first_comic_link = simulate_first_link |
| 1823 |
|
first_url = 'http://www.safelyendangered.com/comic/ignored/' |
| 1824 |
|
|
| 1825 |
|
@classmethod |
| 1826 |
|
def get_comic_info(cls, soup, link): |
| 1827 |
|
"""Get information about a particular comics.""" |
| 1828 |
|
title = soup.find('h2', class_='post-title').string |
| 1829 |
|
date_str = soup.find('span', class_='post-date').string |
| 1830 |
|
day = string_to_date(date_str, '%B %d, %Y') |
| 1831 |
|
imgs = soup.find('div', id='comic').find_all('img') |
| 1832 |
|
alt = imgs[0]['alt'] |
| 1833 |
|
assert all(i['alt'] == i['title'] for i in imgs) |
| 1834 |
|
return { |
| 1835 |
|
'day': day.day, |
| 1836 |
|
'month': day.month, |
| 1837 |
|
'year': day.year, |
|
@@ 919-944 (lines=26) @@
|
| 916 |
|
'img': [i['src'] for i in imgs], |
| 917 |
|
'title': title, |
| 918 |
|
'author': author, |
| 919 |
|
'month': day.month, |
| 920 |
|
'year': day.year, |
| 921 |
|
'day': day.day, |
| 922 |
|
} |
| 923 |
|
|
| 924 |
|
|
| 925 |
|
class MyExtraLife(GenericNavigableComic): |
| 926 |
|
"""Class to retrieve My Extra Life comics.""" |
| 927 |
|
name = 'extralife' |
| 928 |
|
long_name = 'My Extra Life' |
| 929 |
|
url = 'http://www.myextralife.com' |
| 930 |
|
get_navi_link = get_link_rel_next |
| 931 |
|
|
| 932 |
|
@classmethod |
| 933 |
|
def get_first_comic_link(cls): |
| 934 |
|
"""Get link to first comics.""" |
| 935 |
|
return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link') |
| 936 |
|
|
| 937 |
|
@classmethod |
| 938 |
|
def get_comic_info(cls, soup, link): |
| 939 |
|
"""Get information about a particular comics.""" |
| 940 |
|
title = soup.find("h1", class_="comic_title").string |
| 941 |
|
date_str = soup.find("span", class_="comic_date").string |
| 942 |
|
day = string_to_date(date_str, "%B %d, %Y") |
| 943 |
|
imgs = soup.find_all("img", class_="comic") |
| 944 |
|
assert all(i['alt'] == i['title'] == title for i in imgs) |
| 945 |
|
return { |
| 946 |
|
'title': title, |
| 947 |
|
'img': [i['src'] for i in imgs if i["src"]], |
|
@@ 3191-3212 (lines=22) @@
|
| 3188 |
|
"""Get information about a particular comics.""" |
| 3189 |
|
title = link['title'] |
| 3190 |
|
imgs = soup.find_all('img', id='comicimg') |
| 3191 |
|
return { |
| 3192 |
|
'title': title, |
| 3193 |
|
'img': [i['src'] for i in imgs], |
| 3194 |
|
} |
| 3195 |
|
|
| 3196 |
|
|
| 3197 |
|
class MarketoonistComics(GenericNavigableComic): |
| 3198 |
|
"""Class to retrieve Marketoonist Comics.""" |
| 3199 |
|
name = 'marketoonist' |
| 3200 |
|
long_name = 'Marketoonist' |
| 3201 |
|
url = 'https://marketoonist.com/cartoons' |
| 3202 |
|
get_first_comic_link = simulate_first_link |
| 3203 |
|
get_navi_link = get_link_rel_next |
| 3204 |
|
first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html' |
| 3205 |
|
|
| 3206 |
|
@classmethod |
| 3207 |
|
def get_comic_info(cls, soup, link): |
| 3208 |
|
"""Get information about a particular comics.""" |
| 3209 |
|
imgs = soup.find_all('meta', property='og:image') |
| 3210 |
|
date_str = soup.find('meta', property='article:published_time')['content'][:10] |
| 3211 |
|
day = string_to_date(date_str, "%Y-%m-%d") |
| 3212 |
|
title = soup.find('meta', property='og:title')['content'] |
| 3213 |
|
return { |
| 3214 |
|
'img': [i['content'] for i in imgs], |
| 3215 |
|
'day': day.day, |