@@ 2432-2458 (lines=27) @@ | ||
2429 | url = 'http://theawkwardyeti.com' |
|
2430 | get_first_comic_link = get_a_navi_navifirst |
|
2431 | get_navi_link = get_link_rel_next |
|
2432 | ||
2433 | @classmethod |
|
2434 | def get_comic_info(cls, soup, link): |
|
2435 | """Get information about a particular comics.""" |
|
2436 | title = soup.find('h2', class_='post-title').string |
|
2437 | date_str = soup.find("span", class_="post-date").string |
|
2438 | day = string_to_date(date_str, "%B %d, %Y") |
|
2439 | imgs = soup.find("div", id="comic").find_all("img") |
|
2440 | assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs)) |
|
2441 | return { |
|
2442 | 'img': [i['src'] for i in imgs], |
|
2443 | 'title': title, |
|
2444 | 'day': day.day, |
|
2445 | 'month': day.month, |
|
2446 | 'year': day.year |
|
2447 | } |
|
2448 | ||
2449 | ||
2450 | class PleasantThoughts(GenericNavigableComic): |
|
2451 | """Class to retrieve Pleasant Thoughts comics.""" |
|
2452 | name = 'pleasant' |
|
2453 | long_name = 'Pleasant Thoughts' |
|
2454 | url = 'http://pleasant-thoughts.com' |
|
2455 | get_first_comic_link = get_a_comicnavbase_comicnavfirst |
|
2456 | get_navi_link = get_link_rel_next |
|
2457 | ||
2458 | @classmethod |
|
2459 | def get_comic_info(cls, soup, link): |
|
2460 | """Get information about a particular comics.""" |
|
2461 | post = soup.find('div', class_='post-content') |
|
@@ 2462-2486 (lines=25) @@ | ||
2459 | def get_comic_info(cls, soup, link): |
|
2460 | """Get information about a particular comics.""" |
|
2461 | post = soup.find('div', class_='post-content') |
|
2462 | title = post.find('h2', class_='post-title').string |
|
2463 | imgs = post.find("div", class_="entry").find_all("img") |
|
2464 | return { |
|
2465 | 'title': title, |
|
2466 | 'img': [i['src'] for i in imgs], |
|
2467 | } |
|
2468 | ||
2469 | ||
2470 | class MisterAndMe(GenericNavigableComic): |
|
2471 | """Class to retrieve Mister & Me Comics.""" |
|
2472 | # Also on http://www.gocomics.com/mister-and-me |
|
2473 | # Also on https://tapastic.com/series/Mister-and-Me |
|
2474 | name = 'mister' |
|
2475 | long_name = 'Mister & Me' |
|
2476 | url = 'http://www.mister-and-me.com' |
|
2477 | get_first_comic_link = get_a_comicnavbase_comicnavfirst |
|
2478 | get_navi_link = get_link_rel_next |
|
2479 | ||
2480 | @classmethod |
|
2481 | def get_comic_info(cls, soup, link): |
|
2482 | """Get information about a particular comics.""" |
|
2483 | title = soup.find('h2', class_='post-title').string |
|
2484 | author = soup.find("span", class_="post-author").find("a").string |
|
2485 | date_str = soup.find("span", class_="post-date").string |
|
2486 | day = string_to_date(date_str, "%B %d, %Y") |
|
2487 | imgs = soup.find("div", id="comic").find_all("img") |
|
2488 | assert all(i['alt'] == i['title'] for i in imgs) |
|
2489 | assert len(imgs) <= 1 |
|
@@ 2242-2266 (lines=25) @@ | ||
2239 | @classmethod |
|
2240 | def get_first_comic_link(cls): |
|
2241 | """Get link to first comics.""" |
|
2242 | return {'href': 'http://www.lonniemillsap.com/?p=42'} |
|
2243 | ||
2244 | @classmethod |
|
2245 | def get_comic_info(cls, soup, link): |
|
2246 | """Get information about a particular comics.""" |
|
2247 | title = soup.find('h2', class_='post-title').string |
|
2248 | post = soup.find('div', class_='post-content') |
|
2249 | author = post.find("span", class_="post-author").find("a").string |
|
2250 | date_str = post.find("span", class_="post-date").string |
|
2251 | day = string_to_date(date_str, "%B %d, %Y") |
|
2252 | imgs = post.find("div", class_="entry").find_all("img") |
|
2253 | return { |
|
2254 | 'title': title, |
|
2255 | 'author': author, |
|
2256 | 'img': [i['src'] for i in imgs], |
|
2257 | 'month': day.month, |
|
2258 | 'year': day.year, |
|
2259 | 'day': day.day, |
|
2260 | } |
|
2261 | ||
2262 | ||
2263 | class LinsEditions(GenericNavigableComic): |
|
2264 | """Class to retrieve L.I.N.S. Editions comics.""" |
|
2265 | # Also on http://linscomics.tumblr.com |
|
2266 | name = 'lins' |
|
2267 | long_name = 'L.I.N.S. Editions' |
|
2268 | url = 'https://linsedition.com' |
|
2269 | get_navi_link = get_link_rel_next |
|
@@ 2046-2070 (lines=25) @@ | ||
2043 | day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y") |
|
2044 | author = soup.find('span', class_='post-author').string |
|
2045 | div = soup.find('div', id='comic') |
|
2046 | imgs = div.find_all('img') if div else [] |
|
2047 | title = imgs[0]['title'] if imgs else "" |
|
2048 | assert all(i['title'] == i['alt'] == title for i in imgs) |
|
2049 | return { |
|
2050 | 'month': day.month, |
|
2051 | 'year': day.year, |
|
2052 | 'day': day.day, |
|
2053 | 'img': [i['src'] for i in imgs], |
|
2054 | 'title': title, |
|
2055 | 'author': author, |
|
2056 | } |
|
2057 | ||
2058 | ||
2059 | class DepressedAlien(GenericNavigableComic): |
|
2060 | """Class to retrieve Depressed Alien Comics.""" |
|
2061 | name = 'depressedalien' |
|
2062 | long_name = 'Depressed Alien' |
|
2063 | url = 'http://depressedalien.com' |
|
2064 | get_url_from_link = join_cls_url_to_href |
|
2065 | ||
2066 | @classmethod |
|
2067 | def get_first_comic_link(cls): |
|
2068 | """Get link to first comics.""" |
|
2069 | return get_soup_at_url(cls.url).find('img', attrs={'name': 'beginArrow'}).parent |
|
2070 | ||
2071 | @classmethod |
|
2072 | def get_navi_link(cls, last_soup, next_): |
|
2073 | return last_soup.find('img', attrs={'name': 'rightArrow' if next_ else 'leftArrow'}).parent |
|
@@ 2572-2595 (lines=24) @@ | ||
2569 | get_navi_link = get_link_rel_next |
|
2570 | ||
2571 | @classmethod |
|
2572 | def get_comic_info(cls, soup, link): |
|
2573 | """Get information about a particular comics.""" |
|
2574 | title = soup.find('h2', class_='post-title').string |
|
2575 | author = soup.find("span", class_="post-author").find("a").string |
|
2576 | date_str = soup.find("span", class_="post-date").string |
|
2577 | day = string_to_date(date_str, "%B %d, %Y") |
|
2578 | imgs = soup.find("div", id="comic").find_all("img") |
|
2579 | assert all(i['alt'] == i['title'] for i in imgs) |
|
2580 | alt = imgs[0]['alt'] if imgs else "" |
|
2581 | return { |
|
2582 | 'img': [i['src'] for i in imgs], |
|
2583 | 'title': title, |
|
2584 | 'alt': alt, |
|
2585 | 'author': author, |
|
2586 | 'day': day.day, |
|
2587 | 'month': day.month, |
|
2588 | 'year': day.year |
|
2589 | } |
|
2590 | ||
2591 | ||
2592 | class PlanC(GenericNavigableComic): |
|
2593 | """Class to retrieve Plan C comics.""" |
|
2594 | name = 'planc' |
|
2595 | long_name = 'Plan C' |
|
2596 | url = 'http://www.plancomic.com' |
|
2597 | get_first_comic_link = get_a_navi_navifirst |
|
2598 | get_navi_link = get_a_navi_comicnavnext_navinext |
|
@@ 1951-1974 (lines=24) @@ | ||
1948 | long_name = 'Completely Serious Comics' |
|
1949 | url = 'http://completelyseriouscomics.com' |
|
1950 | get_first_comic_link = get_a_navi_navifirst |
|
1951 | get_navi_link = get_a_navi_navinext |
|
1952 | ||
1953 | @classmethod |
|
1954 | def get_comic_info(cls, soup, link): |
|
1955 | """Get information about a particular comics.""" |
|
1956 | title = soup.find('h2', class_='post-title').string |
|
1957 | author = soup.find('span', class_='post-author').contents[1].string |
|
1958 | date_str = soup.find('span', class_='post-date').string |
|
1959 | day = string_to_date(date_str, '%B %d, %Y') |
|
1960 | imgs = soup.find('div', class_='comicpane').find_all('img') |
|
1961 | assert imgs |
|
1962 | alt = imgs[0]['title'] |
|
1963 | assert all(i['title'] == i['alt'] == alt for i in imgs) |
|
1964 | return { |
|
1965 | 'month': day.month, |
|
1966 | 'year': day.year, |
|
1967 | 'day': day.day, |
|
1968 | 'img': [i['src'] for i in imgs], |
|
1969 | 'title': title, |
|
1970 | 'alt': alt, |
|
1971 | 'author': author, |
|
1972 | } |
|
1973 | ||
1974 | ||
1975 | class PoorlyDrawnLines(GenericListableComic): |
|
1976 | """Class to retrieve Poorly Drawn Lines comics.""" |
|
1977 | # Also on http://pdlcomics.tumblr.com |
|
@@ 1639-1662 (lines=24) @@ | ||
1636 | if div: |
|
1637 | img = div.find('img') |
|
1638 | img_src = [img['src']] |
|
1639 | alt = img['alt'] |
|
1640 | assert alt == img['title'] |
|
1641 | title = soup.find('meta', property='og:title')['content'] |
|
1642 | else: |
|
1643 | img_src = [] |
|
1644 | alt = '' |
|
1645 | title = '' |
|
1646 | return { |
|
1647 | 'month': day.month, |
|
1648 | 'year': day.year, |
|
1649 | 'day': day.day, |
|
1650 | 'img': img_src, |
|
1651 | 'title': title, |
|
1652 | 'alt': alt, |
|
1653 | 'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')), |
|
1654 | } |
|
1655 | ||
1656 | ||
1657 | class WarehouseComic(GenericNavigableComic): |
|
1658 | """Class to retrieve Warehouse Comic comics.""" |
|
1659 | name = 'warehouse' |
|
1660 | long_name = 'Warehouse Comic' |
|
1661 | url = 'http://warehousecomic.com' |
|
1662 | get_first_comic_link = get_a_navi_navifirst |
|
1663 | get_navi_link = get_link_rel_next |
|
1664 | ||
1665 | @classmethod |
|
@@ 1072-1094 (lines=23) @@ | ||
1069 | ||
1070 | class GenericBouletCorp(GenericNavigableComic): |
|
1071 | """Generic class to retrieve BouletCorp comics in different languages.""" |
|
1072 | # Also on http://bouletcorp.tumblr.com |
|
1073 | get_navi_link = get_link_rel_next |
|
1074 | ||
1075 | @classmethod |
|
1076 | def get_first_comic_link(cls): |
|
1077 | """Get link to first comics.""" |
|
1078 | return get_soup_at_url(cls.url).find('div', id='centered_nav').find_all('a')[0] |
|
1079 | ||
1080 | @classmethod |
|
1081 | def get_comic_info(cls, soup, link): |
|
1082 | """Get information about a particular comics.""" |
|
1083 | url = cls.get_url_from_link(link) |
|
1084 | date_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/' % cls.url) |
|
1085 | year, month, day = [int(s) for s in date_re.match(url).groups()] |
|
1086 | imgs = soup.find('div', id='notes').find('div', class_='storycontent').find_all('img') |
|
1087 | texts = ' '.join(t for t in (i.get('title') for i in imgs) if t) |
|
1088 | title = soup.find('title').string |
|
1089 | return { |
|
1090 | 'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs if i.get('src') is not None], |
|
1091 | 'title': title, |
|
1092 | 'texts': texts, |
|
1093 | 'year': year, |
|
1094 | 'month': month, |
|
1095 | 'day': day, |
|
1096 | } |
|
1097 |