|
@@ 355-377 (lines=23) @@
|
| 352 |
|
return [] |
| 353 |
|
|
| 354 |
|
|
| 355 |
|
class ExtraFabulousComics(GenericNavigableComic): |
| 356 |
|
"""Class to retrieve Extra Fabulous Comics.""" |
| 357 |
|
name = 'efc' |
| 358 |
|
long_name = 'Extra Fabulous Comics' |
| 359 |
|
url = 'http://extrafabulouscomics.com' |
| 360 |
|
get_first_comic_link = get_a_navi_navifirst |
| 361 |
|
get_navi_link = get_link_rel_next |
| 362 |
|
|
| 363 |
|
@classmethod |
| 364 |
|
def get_comic_info(cls, soup, link): |
| 365 |
|
"""Get information about a particular comics.""" |
| 366 |
|
img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url) |
| 367 |
|
imgs = soup.find_all('img', src=img_src_re) |
| 368 |
|
title = soup.find('meta', property='og:title')['content'] |
| 369 |
|
date_str = soup.find('meta', property='article:published_time')['content'][:10] |
| 370 |
|
day = string_to_date(date_str, "%Y-%m-%d") |
| 371 |
|
return { |
| 372 |
|
'title': title, |
| 373 |
|
'img': [i['src'] for i in imgs], |
| 374 |
|
'month': day.month, |
| 375 |
|
'year': day.year, |
| 376 |
|
'day': day.day, |
| 377 |
|
'prefix': title + '-' |
| 378 |
|
} |
| 379 |
|
|
| 380 |
|
|
|
@@ 381-402 (lines=22) @@
|
| 378 |
|
} |
| 379 |
|
|
| 380 |
|
|
| 381 |
|
class GenericLeMondeBlog(GenericNavigableComic): |
| 382 |
|
"""Generic class to retrieve comics from Le Monde blogs.""" |
| 383 |
|
_categories = ('LEMONDE', 'FRANCAIS') |
| 384 |
|
get_navi_link = get_link_rel_next |
| 385 |
|
get_first_comic_link = simulate_first_link |
| 386 |
|
first_url = NotImplemented |
| 387 |
|
|
| 388 |
|
@classmethod |
| 389 |
|
def get_comic_info(cls, soup, link): |
| 390 |
|
"""Get information about a particular comics.""" |
| 391 |
|
url2 = soup.find('link', rel='shortlink')['href'] |
| 392 |
|
title = soup.find('meta', property='og:title')['content'] |
| 393 |
|
date_str = soup.find("span", class_="entry-date").string |
| 394 |
|
day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8") |
| 395 |
|
imgs = soup.find_all('meta', property='og:image') |
| 396 |
|
return { |
| 397 |
|
'title': title, |
| 398 |
|
'url2': url2, |
| 399 |
|
'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs], |
| 400 |
|
'month': day.month, |
| 401 |
|
'year': day.year, |
| 402 |
|
'day': day.day, |
| 403 |
|
} |
| 404 |
|
|
| 405 |
|
|
|
@@ 2320-2344 (lines=25) @@
|
| 2317 |
|
} |
| 2318 |
|
|
| 2319 |
|
|
| 2320 |
|
class LinsEditions(GenericNavigableComic): |
| 2321 |
|
"""Class to retrieve L.I.N.S. Editions comics.""" |
| 2322 |
|
# Also on http://linscomics.tumblr.com |
| 2323 |
|
# Now on https://warandpeas.com |
| 2324 |
|
name = 'lins' |
| 2325 |
|
long_name = 'L.I.N.S. Editions' |
| 2326 |
|
url = 'https://linsedition.com' |
| 2327 |
|
_categories = ('LINS', ) |
| 2328 |
|
get_navi_link = get_link_rel_next |
| 2329 |
|
get_first_comic_link = simulate_first_link |
| 2330 |
|
first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/' |
| 2331 |
|
|
| 2332 |
|
@classmethod |
| 2333 |
|
def get_comic_info(cls, soup, link): |
| 2334 |
|
"""Get information about a particular comics.""" |
| 2335 |
|
title = soup.find('meta', property='og:title')['content'] |
| 2336 |
|
imgs = soup.find_all('meta', property='og:image') |
| 2337 |
|
date_str = soup.find('meta', property='article:published_time')['content'][:10] |
| 2338 |
|
day = string_to_date(date_str, "%Y-%m-%d") |
| 2339 |
|
return { |
| 2340 |
|
'title': title, |
| 2341 |
|
'img': [i['content'] for i in imgs], |
| 2342 |
|
'month': day.month, |
| 2343 |
|
'year': day.year, |
| 2344 |
|
'day': day.day, |
| 2345 |
|
} |
| 2346 |
|
|
| 2347 |
|
|
|
@@ 1016-1040 (lines=25) @@
|
| 1013 |
|
} |
| 1014 |
|
|
| 1015 |
|
|
| 1016 |
|
class Mercworks(GenericNavigableComic): |
| 1017 |
|
"""Class to retrieve Mercworks comics.""" |
| 1018 |
|
# Also on http://mercworks.tumblr.com |
| 1019 |
|
name = 'mercworks' |
| 1020 |
|
long_name = 'Mercworks' |
| 1021 |
|
url = 'http://mercworks.net' |
| 1022 |
|
get_first_comic_link = get_a_comicnavbase_comicnavfirst |
| 1023 |
|
get_navi_link = get_link_rel_next |
| 1024 |
|
|
| 1025 |
|
@classmethod |
| 1026 |
|
def get_comic_info(cls, soup, link): |
| 1027 |
|
"""Get information about a particular comics.""" |
| 1028 |
|
title = soup.find('meta', property='og:title')['content'] |
| 1029 |
|
metadesc = soup.find('meta', property='og:description') |
| 1030 |
|
desc = metadesc['content'] if metadesc else "" |
| 1031 |
|
date_str = soup.find('meta', property='article:published_time')['content'][:10] |
| 1032 |
|
day = string_to_date(date_str, "%Y-%m-%d") |
| 1033 |
|
imgs = soup.find_all('meta', property='og:image') |
| 1034 |
|
return { |
| 1035 |
|
'img': [i['content'] for i in imgs], |
| 1036 |
|
'title': title, |
| 1037 |
|
'desc': desc, |
| 1038 |
|
'day': day.day, |
| 1039 |
|
'month': day.month, |
| 1040 |
|
'year': day.year |
| 1041 |
|
} |
| 1042 |
|
|
| 1043 |
|
|
|
@@ 3261-3282 (lines=22) @@
|
| 3258 |
|
} |
| 3259 |
|
|
| 3260 |
|
|
| 3261 |
|
class MarketoonistComics(GenericNavigableComic): |
| 3262 |
|
"""Class to retrieve Marketoonist Comics.""" |
| 3263 |
|
name = 'marketoonist' |
| 3264 |
|
long_name = 'Marketoonist' |
| 3265 |
|
url = 'https://marketoonist.com/cartoons' |
| 3266 |
|
get_first_comic_link = simulate_first_link |
| 3267 |
|
get_navi_link = get_link_rel_next |
| 3268 |
|
first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html' |
| 3269 |
|
|
| 3270 |
|
@classmethod |
| 3271 |
|
def get_comic_info(cls, soup, link): |
| 3272 |
|
"""Get information about a particular comics.""" |
| 3273 |
|
imgs = soup.find_all('meta', property='og:image') |
| 3274 |
|
date_str = soup.find('meta', property='article:published_time')['content'][:10] |
| 3275 |
|
day = string_to_date(date_str, "%Y-%m-%d") |
| 3276 |
|
title = soup.find('meta', property='og:title')['content'] |
| 3277 |
|
return { |
| 3278 |
|
'img': [i['content'] for i in imgs], |
| 3279 |
|
'day': day.day, |
| 3280 |
|
'month': day.month, |
| 3281 |
|
'year': day.year, |
| 3282 |
|
'title': title, |
| 3283 |
|
} |
| 3284 |
|
|
| 3285 |
|
|
|
@@ 1861-1886 (lines=26) @@
|
| 1858 |
|
} |
| 1859 |
|
|
| 1860 |
|
|
| 1861 |
|
class Penmen(GenericNavigableComic): |
| 1862 |
|
"""Class to retrieve Penmen comics.""" |
| 1863 |
|
name = 'penmen' |
| 1864 |
|
long_name = 'Penmen' |
| 1865 |
|
url = 'http://penmen.com' |
| 1866 |
|
get_navi_link = get_link_rel_next |
| 1867 |
|
get_first_comic_link = simulate_first_link |
| 1868 |
|
first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/' |
| 1869 |
|
|
| 1870 |
|
@classmethod |
| 1871 |
|
def get_comic_info(cls, soup, link): |
| 1872 |
|
"""Get information about a particular comics.""" |
| 1873 |
|
title = soup.find('title').string |
| 1874 |
|
imgs = soup.find('div', class_='entry-content').find_all('img') |
| 1875 |
|
short_url = soup.find('link', rel='shortlink')['href'] |
| 1876 |
|
tags = ' '.join(t.string for t in soup.find_all('a', rel='tag')) |
| 1877 |
|
date_str = soup.find('time')['datetime'][:10] |
| 1878 |
|
day = string_to_date(date_str, "%Y-%m-%d") |
| 1879 |
|
return { |
| 1880 |
|
'title': title, |
| 1881 |
|
'short_url': short_url, |
| 1882 |
|
'img': [i['src'] for i in imgs], |
| 1883 |
|
'tags': tags, |
| 1884 |
|
'month': day.month, |
| 1885 |
|
'year': day.year, |
| 1886 |
|
'day': day.day, |
| 1887 |
|
} |
| 1888 |
|
|
| 1889 |
|
|
|
@@ 1802-1827 (lines=26) @@
|
| 1799 |
|
} |
| 1800 |
|
|
| 1801 |
|
|
| 1802 |
|
class SafelyEndangered(GenericNavigableComic): |
| 1803 |
|
"""Class to retrieve Safely Endangered comics.""" |
| 1804 |
|
# Also on http://tumblr.safelyendangered.com |
| 1805 |
|
name = 'endangered' |
| 1806 |
|
long_name = 'Safely Endangered' |
| 1807 |
|
url = 'http://www.safelyendangered.com' |
| 1808 |
|
get_navi_link = get_link_rel_next |
| 1809 |
|
get_first_comic_link = simulate_first_link |
| 1810 |
|
first_url = 'http://www.safelyendangered.com/comic/ignored/' |
| 1811 |
|
|
| 1812 |
|
@classmethod |
| 1813 |
|
def get_comic_info(cls, soup, link): |
| 1814 |
|
"""Get information about a particular comics.""" |
| 1815 |
|
title = soup.find('h2', class_='post-title').string |
| 1816 |
|
date_str = soup.find('span', class_='post-date').string |
| 1817 |
|
day = string_to_date(date_str, '%B %d, %Y') |
| 1818 |
|
imgs = soup.find('div', id='comic').find_all('img') |
| 1819 |
|
alt = imgs[0]['alt'] |
| 1820 |
|
assert all(i['alt'] == i['title'] for i in imgs) |
| 1821 |
|
return { |
| 1822 |
|
'day': day.day, |
| 1823 |
|
'month': day.month, |
| 1824 |
|
'year': day.year, |
| 1825 |
|
'img': [i['src'] for i in imgs], |
| 1826 |
|
'title': title, |
| 1827 |
|
'alt': alt, |
| 1828 |
|
} |
| 1829 |
|
|
| 1830 |
|
|