|
@@ 958-983 (lines=26) @@
|
| 955 |
|
|
| 956 |
|
class MyExtraLife(GenericNavigableComic): |
| 957 |
|
"""Class to retrieve My Extra Life comics.""" |
| 958 |
|
name = 'extralife' |
| 959 |
|
long_name = 'My Extra Life' |
| 960 |
|
url = 'http://www.myextralife.com' |
| 961 |
|
get_navi_link = get_link_rel_next |
| 962 |
|
|
| 963 |
|
@classmethod |
| 964 |
|
def get_first_comic_link(cls): |
| 965 |
|
"""Get link to first comics.""" |
| 966 |
|
return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link') |
| 967 |
|
|
| 968 |
|
@classmethod |
| 969 |
|
def get_comic_info(cls, soup, link): |
| 970 |
|
"""Get information about a particular comics.""" |
| 971 |
|
title = soup.find("h1", class_="comic_title").string |
| 972 |
|
date_str = soup.find("span", class_="comic_date").string |
| 973 |
|
day = string_to_date(date_str, "%B %d, %Y") |
| 974 |
|
imgs = soup.find_all("img", class_="comic") |
| 975 |
|
assert all(i['alt'] == i['title'] == title for i in imgs) |
| 976 |
|
return { |
| 977 |
|
'title': title, |
| 978 |
|
'img': [i['src'] for i in imgs if i["src"]], |
| 979 |
|
'day': day.day, |
| 980 |
|
'month': day.month, |
| 981 |
|
'year': day.year |
| 982 |
|
} |
| 983 |
|
|
| 984 |
|
|
| 985 |
|
class SaturdayMorningBreakfastCereal(GenericNavigableComic): |
| 986 |
|
"""Class to retrieve Saturday Morning Breakfast Cereal comics.""" |
|
@@ 386-407 (lines=22) @@
|
| 383 |
|
} |
| 384 |
|
|
| 385 |
|
|
| 386 |
|
class GenericLeMondeBlog(GenericNavigableComic): |
| 387 |
|
"""Generic class to retrieve comics from Le Monde blogs.""" |
| 388 |
|
_categories = ('LEMONDE', 'FRANCAIS') |
| 389 |
|
get_navi_link = get_link_rel_next |
| 390 |
|
get_first_comic_link = simulate_first_link |
| 391 |
|
first_url = NotImplemented |
| 392 |
|
|
| 393 |
|
@classmethod |
| 394 |
|
def get_comic_info(cls, soup, link): |
| 395 |
|
"""Get information about a particular comics.""" |
| 396 |
|
url2 = soup.find('link', rel='shortlink')['href'] |
| 397 |
|
title = soup.find('meta', property='og:title')['content'] |
| 398 |
|
date_str = soup.find("span", class_="entry-date").string |
| 399 |
|
day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8") |
| 400 |
|
imgs = soup.find_all('meta', property='og:image') |
| 401 |
|
return { |
| 402 |
|
'title': title, |
| 403 |
|
'url2': url2, |
| 404 |
|
'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs], |
| 405 |
|
'month': day.month, |
| 406 |
|
'year': day.year, |
| 407 |
|
'day': day.day, |
| 408 |
|
} |
| 409 |
|
|
| 410 |
|
|
|
@@ 2374-2398 (lines=25) @@
|
| 2371 |
|
|
| 2372 |
|
class LonnieMillsap(GenericNavigableComic): |
| 2373 |
|
"""Class to retrieve Lonnie Millsap's comics.""" |
| 2374 |
|
name = 'millsap' |
| 2375 |
|
long_name = 'Lonnie Millsap' |
| 2376 |
|
url = 'http://www.lonniemillsap.com' |
| 2377 |
|
get_navi_link = get_link_rel_next |
| 2378 |
|
get_first_comic_link = simulate_first_link |
| 2379 |
|
first_url = 'http://www.lonniemillsap.com/?p=42' |
| 2380 |
|
|
| 2381 |
|
@classmethod |
| 2382 |
|
def get_comic_info(cls, soup, link): |
| 2383 |
|
"""Get information about a particular comics.""" |
| 2384 |
|
title = soup.find('h2', class_='post-title').string |
| 2385 |
|
post = soup.find('div', class_='post-content') |
| 2386 |
|
author = post.find("span", class_="post-author").find("a").string |
| 2387 |
|
date_str = post.find("span", class_="post-date").string |
| 2388 |
|
day = string_to_date(date_str, "%B %d, %Y") |
| 2389 |
|
imgs = post.find("div", class_="entry").find_all("img") |
| 2390 |
|
return { |
| 2391 |
|
'title': title, |
| 2392 |
|
'author': author, |
| 2393 |
|
'img': [i['src'] for i in imgs], |
| 2394 |
|
'month': day.month, |
| 2395 |
|
'year': day.year, |
| 2396 |
|
'day': day.day, |
| 2397 |
|
} |
| 2398 |
|
|
| 2399 |
|
|
| 2400 |
|
class LinsEditions(GenericNavigableComic): |
| 2401 |
|
"""Class to retrieve L.I.N.S. Editions comics.""" |
|
@@ 3315-3336 (lines=22) @@
|
| 3312 |
|
def get_nav(cls, soup): |
| 3313 |
|
"""Get the navigation elements from soup object.""" |
| 3314 |
|
cnav = soup.find_all(class_='cnav') |
| 3315 |
|
nav1, nav2 = cnav[:5], cnav[5:] |
| 3316 |
|
assert nav1 == nav2 |
| 3317 |
|
# begin, prev, archive, next_, end = nav1 |
| 3318 |
|
return [None if i.get('href') is None else i for i in nav1] |
| 3319 |
|
|
| 3320 |
|
@classmethod |
| 3321 |
|
def get_first_comic_link(cls): |
| 3322 |
|
"""Get link to first comics.""" |
| 3323 |
|
return cls.get_nav(get_soup_at_url(cls.url))[0] |
| 3324 |
|
|
| 3325 |
|
@classmethod |
| 3326 |
|
def get_navi_link(cls, last_soup, next_): |
| 3327 |
|
"""Get link to next or previous comic.""" |
| 3328 |
|
return cls.get_nav(last_soup)[3 if next_ else 1] |
| 3329 |
|
|
| 3330 |
|
@classmethod |
| 3331 |
|
def get_comic_info(cls, soup, link): |
| 3332 |
|
"""Get information about a particular comics.""" |
| 3333 |
|
title = link['title'] |
| 3334 |
|
imgs = soup.find_all('img', id='comicimg') |
| 3335 |
|
return { |
| 3336 |
|
'title': title, |
| 3337 |
|
'img': [i['src'] for i in imgs], |
| 3338 |
|
} |
| 3339 |
|
|
|
@@ 1046-1070 (lines=25) @@
|
| 1043 |
|
|
| 1044 |
|
class Mercworks(GenericNavigableComic): |
| 1045 |
|
"""Class to retrieve Mercworks comics.""" |
| 1046 |
|
# Also on http://mercworks.tumblr.com |
| 1047 |
|
name = 'mercworks' |
| 1048 |
|
long_name = 'Mercworks' |
| 1049 |
|
url = 'http://mercworks.net' |
| 1050 |
|
get_first_comic_link = get_a_comicnavbase_comicnavfirst |
| 1051 |
|
get_navi_link = get_link_rel_next |
| 1052 |
|
|
| 1053 |
|
@classmethod |
| 1054 |
|
def get_comic_info(cls, soup, link): |
| 1055 |
|
"""Get information about a particular comics.""" |
| 1056 |
|
title = soup.find('meta', property='og:title')['content'] |
| 1057 |
|
metadesc = soup.find('meta', property='og:description') |
| 1058 |
|
desc = metadesc['content'] if metadesc else "" |
| 1059 |
|
date_str = soup.find('meta', property='article:published_time')['content'][:10] |
| 1060 |
|
day = string_to_date(date_str, "%Y-%m-%d") |
| 1061 |
|
imgs = soup.find_all('meta', property='og:image') |
| 1062 |
|
return { |
| 1063 |
|
'img': [i['content'] for i in imgs], |
| 1064 |
|
'title': title, |
| 1065 |
|
'desc': desc, |
| 1066 |
|
'day': day.day, |
| 1067 |
|
'month': day.month, |
| 1068 |
|
'year': day.year |
| 1069 |
|
} |
| 1070 |
|
|
| 1071 |
|
|
| 1072 |
|
class BerkeleyMews(GenericListableComic): |
| 1073 |
|
"""Class to retrieve Berkeley Mews comics.""" |
|
@@ 2301-2325 (lines=25) @@
|
| 2298 |
|
} |
| 2299 |
|
|
| 2300 |
|
|
| 2301 |
|
class JuliasDrawings(GenericListableComic): |
| 2302 |
|
"""Class to retrieve Julia's Drawings.""" |
| 2303 |
|
name = 'julia' |
| 2304 |
|
long_name = "Julia's Drawings" |
| 2305 |
|
url = 'https://drawings.jvns.ca' |
| 2306 |
|
get_url_from_archive_element = get_href |
| 2307 |
|
|
| 2308 |
|
@classmethod |
| 2309 |
|
def get_archive_elements(cls): |
| 2310 |
|
articles = get_soup_at_url(cls.url).find_all('article', class_='li post') |
| 2311 |
|
return [art.find('a') for art in reversed(articles)] |
| 2312 |
|
|
| 2313 |
|
@classmethod |
| 2314 |
|
def get_comic_info(cls, soup, archive_elt): |
| 2315 |
|
"""Get information about a particular comics.""" |
| 2316 |
|
date_str = soup.find('meta', property='og:article:published_time')['content'][:10] |
| 2317 |
|
day = string_to_date(date_str, "%Y-%m-%d") |
| 2318 |
|
title = soup.find('h3', class_='p-post-title').string |
| 2319 |
|
imgs = soup.find('section', class_='post-content').find_all('img') |
| 2320 |
|
return { |
| 2321 |
|
'title': title, |
| 2322 |
|
'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs], |
| 2323 |
|
'month': day.month, |
| 2324 |
|
'year': day.year, |
| 2325 |
|
'day': day.day, |
| 2326 |
|
} |
| 2327 |
|
|
| 2328 |
|
|