|
@@ 2432-2458 (lines=27) @@
|
| 2429 |
|
url = 'http://theawkwardyeti.com' |
| 2430 |
|
get_first_comic_link = get_a_navi_navifirst |
| 2431 |
|
get_navi_link = get_link_rel_next |
| 2432 |
|
|
| 2433 |
|
@classmethod |
| 2434 |
|
def get_comic_info(cls, soup, link): |
| 2435 |
|
"""Get information about a particular comics.""" |
| 2436 |
|
title = soup.find('h2', class_='post-title').string |
| 2437 |
|
date_str = soup.find("span", class_="post-date").string |
| 2438 |
|
day = string_to_date(date_str, "%B %d, %Y") |
| 2439 |
|
imgs = soup.find("div", id="comic").find_all("img") |
| 2440 |
|
assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs)) |
| 2441 |
|
return { |
| 2442 |
|
'img': [i['src'] for i in imgs], |
| 2443 |
|
'title': title, |
| 2444 |
|
'day': day.day, |
| 2445 |
|
'month': day.month, |
| 2446 |
|
'year': day.year |
| 2447 |
|
} |
| 2448 |
|
|
| 2449 |
|
|
| 2450 |
|
class PleasantThoughts(GenericNavigableComic): |
| 2451 |
|
"""Class to retrieve Pleasant Thoughts comics.""" |
| 2452 |
|
name = 'pleasant' |
| 2453 |
|
long_name = 'Pleasant Thoughts' |
| 2454 |
|
url = 'http://pleasant-thoughts.com' |
| 2455 |
|
get_first_comic_link = get_a_comicnavbase_comicnavfirst |
| 2456 |
|
get_navi_link = get_link_rel_next |
| 2457 |
|
|
| 2458 |
|
@classmethod |
| 2459 |
|
def get_comic_info(cls, soup, link): |
| 2460 |
|
"""Get information about a particular comics.""" |
| 2461 |
|
post = soup.find('div', class_='post-content') |
|
@@ 2462-2486 (lines=25) @@
|
| 2459 |
|
def get_comic_info(cls, soup, link): |
| 2460 |
|
"""Get information about a particular comics.""" |
| 2461 |
|
post = soup.find('div', class_='post-content') |
| 2462 |
|
title = post.find('h2', class_='post-title').string |
| 2463 |
|
imgs = post.find("div", class_="entry").find_all("img") |
| 2464 |
|
return { |
| 2465 |
|
'title': title, |
| 2466 |
|
'img': [i['src'] for i in imgs], |
| 2467 |
|
} |
| 2468 |
|
|
| 2469 |
|
|
| 2470 |
|
class MisterAndMe(GenericNavigableComic): |
| 2471 |
|
"""Class to retrieve Mister & Me Comics.""" |
| 2472 |
|
# Also on http://www.gocomics.com/mister-and-me |
| 2473 |
|
# Also on https://tapastic.com/series/Mister-and-Me |
| 2474 |
|
name = 'mister' |
| 2475 |
|
long_name = 'Mister & Me' |
| 2476 |
|
url = 'http://www.mister-and-me.com' |
| 2477 |
|
get_first_comic_link = get_a_comicnavbase_comicnavfirst |
| 2478 |
|
get_navi_link = get_link_rel_next |
| 2479 |
|
|
| 2480 |
|
@classmethod |
| 2481 |
|
def get_comic_info(cls, soup, link): |
| 2482 |
|
"""Get information about a particular comics.""" |
| 2483 |
|
title = soup.find('h2', class_='post-title').string |
| 2484 |
|
author = soup.find("span", class_="post-author").find("a").string |
| 2485 |
|
date_str = soup.find("span", class_="post-date").string |
| 2486 |
|
day = string_to_date(date_str, "%B %d, %Y") |
| 2487 |
|
imgs = soup.find("div", id="comic").find_all("img") |
| 2488 |
|
assert all(i['alt'] == i['title'] for i in imgs) |
| 2489 |
|
assert len(imgs) <= 1 |
|
@@ 2242-2266 (lines=25) @@
|
| 2239 |
|
@classmethod |
| 2240 |
|
def get_first_comic_link(cls): |
| 2241 |
|
"""Get link to first comics.""" |
| 2242 |
|
return {'href': 'http://www.lonniemillsap.com/?p=42'} |
| 2243 |
|
|
| 2244 |
|
@classmethod |
| 2245 |
|
def get_comic_info(cls, soup, link): |
| 2246 |
|
"""Get information about a particular comics.""" |
| 2247 |
|
title = soup.find('h2', class_='post-title').string |
| 2248 |
|
post = soup.find('div', class_='post-content') |
| 2249 |
|
author = post.find("span", class_="post-author").find("a").string |
| 2250 |
|
date_str = post.find("span", class_="post-date").string |
| 2251 |
|
day = string_to_date(date_str, "%B %d, %Y") |
| 2252 |
|
imgs = post.find("div", class_="entry").find_all("img") |
| 2253 |
|
return { |
| 2254 |
|
'title': title, |
| 2255 |
|
'author': author, |
| 2256 |
|
'img': [i['src'] for i in imgs], |
| 2257 |
|
'month': day.month, |
| 2258 |
|
'year': day.year, |
| 2259 |
|
'day': day.day, |
| 2260 |
|
} |
| 2261 |
|
|
| 2262 |
|
|
| 2263 |
|
class LinsEditions(GenericNavigableComic): |
| 2264 |
|
"""Class to retrieve L.I.N.S. Editions comics.""" |
| 2265 |
|
# Also on http://linscomics.tumblr.com |
| 2266 |
|
name = 'lins' |
| 2267 |
|
long_name = 'L.I.N.S. Editions' |
| 2268 |
|
url = 'https://linsedition.com' |
| 2269 |
|
get_navi_link = get_link_rel_next |
|
@@ 2046-2070 (lines=25) @@
|
| 2043 |
|
day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y") |
| 2044 |
|
author = soup.find('span', class_='post-author').string |
| 2045 |
|
div = soup.find('div', id='comic') |
| 2046 |
|
imgs = div.find_all('img') if div else [] |
| 2047 |
|
title = imgs[0]['title'] if imgs else "" |
| 2048 |
|
assert all(i['title'] == i['alt'] == title for i in imgs) |
| 2049 |
|
return { |
| 2050 |
|
'month': day.month, |
| 2051 |
|
'year': day.year, |
| 2052 |
|
'day': day.day, |
| 2053 |
|
'img': [i['src'] for i in imgs], |
| 2054 |
|
'title': title, |
| 2055 |
|
'author': author, |
| 2056 |
|
} |
| 2057 |
|
|
| 2058 |
|
|
| 2059 |
|
class DepressedAlien(GenericNavigableComic): |
| 2060 |
|
"""Class to retrieve Depressed Alien Comics.""" |
| 2061 |
|
name = 'depressedalien' |
| 2062 |
|
long_name = 'Depressed Alien' |
| 2063 |
|
url = 'http://depressedalien.com' |
| 2064 |
|
get_url_from_link = join_cls_url_to_href |
| 2065 |
|
|
| 2066 |
|
@classmethod |
| 2067 |
|
def get_first_comic_link(cls): |
| 2068 |
|
"""Get link to first comics.""" |
| 2069 |
|
return get_soup_at_url(cls.url).find('img', attrs={'name': 'beginArrow'}).parent |
| 2070 |
|
|
| 2071 |
|
@classmethod |
| 2072 |
|
def get_navi_link(cls, last_soup, next_): |
| 2073 |
|
return last_soup.find('img', attrs={'name': 'rightArrow' if next_ else 'leftArrow'}).parent |
|
@@ 2572-2595 (lines=24) @@
|
| 2569 |
|
get_navi_link = get_link_rel_next |
| 2570 |
|
|
| 2571 |
|
@classmethod |
| 2572 |
|
def get_comic_info(cls, soup, link): |
| 2573 |
|
"""Get information about a particular comics.""" |
| 2574 |
|
title = soup.find('h2', class_='post-title').string |
| 2575 |
|
author = soup.find("span", class_="post-author").find("a").string |
| 2576 |
|
date_str = soup.find("span", class_="post-date").string |
| 2577 |
|
day = string_to_date(date_str, "%B %d, %Y") |
| 2578 |
|
imgs = soup.find("div", id="comic").find_all("img") |
| 2579 |
|
assert all(i['alt'] == i['title'] for i in imgs) |
| 2580 |
|
alt = imgs[0]['alt'] if imgs else "" |
| 2581 |
|
return { |
| 2582 |
|
'img': [i['src'] for i in imgs], |
| 2583 |
|
'title': title, |
| 2584 |
|
'alt': alt, |
| 2585 |
|
'author': author, |
| 2586 |
|
'day': day.day, |
| 2587 |
|
'month': day.month, |
| 2588 |
|
'year': day.year |
| 2589 |
|
} |
| 2590 |
|
|
| 2591 |
|
|
| 2592 |
|
class PlanC(GenericNavigableComic): |
| 2593 |
|
"""Class to retrieve Plan C comics.""" |
| 2594 |
|
name = 'planc' |
| 2595 |
|
long_name = 'Plan C' |
| 2596 |
|
url = 'http://www.plancomic.com' |
| 2597 |
|
get_first_comic_link = get_a_navi_navifirst |
| 2598 |
|
get_navi_link = get_a_navi_comicnavnext_navinext |
|
@@ 1951-1974 (lines=24) @@
|
| 1948 |
|
long_name = 'Completely Serious Comics' |
| 1949 |
|
url = 'http://completelyseriouscomics.com' |
| 1950 |
|
get_first_comic_link = get_a_navi_navifirst |
| 1951 |
|
get_navi_link = get_a_navi_navinext |
| 1952 |
|
|
| 1953 |
|
@classmethod |
| 1954 |
|
def get_comic_info(cls, soup, link): |
| 1955 |
|
"""Get information about a particular comics.""" |
| 1956 |
|
title = soup.find('h2', class_='post-title').string |
| 1957 |
|
author = soup.find('span', class_='post-author').contents[1].string |
| 1958 |
|
date_str = soup.find('span', class_='post-date').string |
| 1959 |
|
day = string_to_date(date_str, '%B %d, %Y') |
| 1960 |
|
imgs = soup.find('div', class_='comicpane').find_all('img') |
| 1961 |
|
assert imgs |
| 1962 |
|
alt = imgs[0]['title'] |
| 1963 |
|
assert all(i['title'] == i['alt'] == alt for i in imgs) |
| 1964 |
|
return { |
| 1965 |
|
'month': day.month, |
| 1966 |
|
'year': day.year, |
| 1967 |
|
'day': day.day, |
| 1968 |
|
'img': [i['src'] for i in imgs], |
| 1969 |
|
'title': title, |
| 1970 |
|
'alt': alt, |
| 1971 |
|
'author': author, |
| 1972 |
|
} |
| 1973 |
|
|
| 1974 |
|
|
| 1975 |
|
class PoorlyDrawnLines(GenericListableComic): |
| 1976 |
|
"""Class to retrieve Poorly Drawn Lines comics.""" |
| 1977 |
|
# Also on http://pdlcomics.tumblr.com |
|
@@ 1639-1662 (lines=24) @@
|
| 1636 |
|
if div: |
| 1637 |
|
img = div.find('img') |
| 1638 |
|
img_src = [img['src']] |
| 1639 |
|
alt = img['alt'] |
| 1640 |
|
assert alt == img['title'] |
| 1641 |
|
title = soup.find('meta', property='og:title')['content'] |
| 1642 |
|
else: |
| 1643 |
|
img_src = [] |
| 1644 |
|
alt = '' |
| 1645 |
|
title = '' |
| 1646 |
|
return { |
| 1647 |
|
'month': day.month, |
| 1648 |
|
'year': day.year, |
| 1649 |
|
'day': day.day, |
| 1650 |
|
'img': img_src, |
| 1651 |
|
'title': title, |
| 1652 |
|
'alt': alt, |
| 1653 |
|
'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')), |
| 1654 |
|
} |
| 1655 |
|
|
| 1656 |
|
|
| 1657 |
|
class WarehouseComic(GenericNavigableComic): |
| 1658 |
|
"""Class to retrieve Warehouse Comic comics.""" |
| 1659 |
|
name = 'warehouse' |
| 1660 |
|
long_name = 'Warehouse Comic' |
| 1661 |
|
url = 'http://warehousecomic.com' |
| 1662 |
|
get_first_comic_link = get_a_navi_navifirst |
| 1663 |
|
get_navi_link = get_link_rel_next |
| 1664 |
|
|
| 1665 |
|
@classmethod |
|
@@ 1072-1094 (lines=23) @@
|
| 1069 |
|
|
| 1070 |
|
class GenericBouletCorp(GenericNavigableComic): |
| 1071 |
|
"""Generic class to retrieve BouletCorp comics in different languages.""" |
| 1072 |
|
# Also on http://bouletcorp.tumblr.com |
| 1073 |
|
get_navi_link = get_link_rel_next |
| 1074 |
|
|
| 1075 |
|
@classmethod |
| 1076 |
|
def get_first_comic_link(cls): |
| 1077 |
|
"""Get link to first comics.""" |
| 1078 |
|
return get_soup_at_url(cls.url).find('div', id='centered_nav').find_all('a')[0] |
| 1079 |
|
|
| 1080 |
|
@classmethod |
| 1081 |
|
def get_comic_info(cls, soup, link): |
| 1082 |
|
"""Get information about a particular comics.""" |
| 1083 |
|
url = cls.get_url_from_link(link) |
| 1084 |
|
date_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/' % cls.url) |
| 1085 |
|
year, month, day = [int(s) for s in date_re.match(url).groups()] |
| 1086 |
|
imgs = soup.find('div', id='notes').find('div', class_='storycontent').find_all('img') |
| 1087 |
|
texts = ' '.join(t for t in (i.get('title') for i in imgs) if t) |
| 1088 |
|
title = soup.find('title').string |
| 1089 |
|
return { |
| 1090 |
|
'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs if i.get('src') is not None], |
| 1091 |
|
'title': title, |
| 1092 |
|
'texts': texts, |
| 1093 |
|
'year': year, |
| 1094 |
|
'month': month, |
| 1095 |
|
'day': day, |
| 1096 |
|
} |
| 1097 |
|
|