|
@@ 1781-1806 (lines=26) @@
|
| 1778 |
|
@classmethod |
| 1779 |
|
def get_comic_info(cls, soup, link): |
| 1780 |
|
"""Get information about a particular comics.""" |
| 1781 |
|
title = soup.find('meta', property='og:title')['content'] |
| 1782 |
|
author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content'] |
| 1783 |
|
date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content'] |
| 1784 |
|
date_str = date_str[:10] |
| 1785 |
|
day = string_to_date(date_str, "%Y-%m-%d") |
| 1786 |
|
imgs = soup.find_all('meta', property='og:image') |
| 1787 |
|
skip_imgs = { |
| 1788 |
|
'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png', |
| 1789 |
|
'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png' |
| 1790 |
|
} |
| 1791 |
|
return { |
| 1792 |
|
'title': title, |
| 1793 |
|
'author': author, |
| 1794 |
|
'day': day.day, |
| 1795 |
|
'month': day.month, |
| 1796 |
|
'year': day.year, |
| 1797 |
|
'img': [i['content'] for i in imgs if i['content'] not in skip_imgs], |
| 1798 |
|
} |
| 1799 |
|
|
| 1800 |
|
|
| 1801 |
|
class SafelyEndangered(GenericNavigableComic): |
| 1802 |
|
"""Class to retrieve Safely Endangered comics.""" |
| 1803 |
|
# Also on http://tumblr.safelyendangered.com |
| 1804 |
|
name = 'endangered' |
| 1805 |
|
long_name = 'Safely Endangered' |
| 1806 |
|
url = 'http://www.safelyendangered.com' |
| 1807 |
|
get_navi_link = get_link_rel_next |
| 1808 |
|
get_first_comic_link = simulate_first_link |
| 1809 |
|
first_url = 'http://www.safelyendangered.com/comic/ignored/' |
|
@@ 1810-1836 (lines=27) @@
|
| 1807 |
|
get_navi_link = get_link_rel_next |
| 1808 |
|
get_first_comic_link = simulate_first_link |
| 1809 |
|
first_url = 'http://www.safelyendangered.com/comic/ignored/' |
| 1810 |
|
|
| 1811 |
|
@classmethod |
| 1812 |
|
def get_comic_info(cls, soup, link): |
| 1813 |
|
"""Get information about a particular comics.""" |
| 1814 |
|
title = soup.find('h2', class_='post-title').string |
| 1815 |
|
date_str = soup.find('span', class_='post-date').string |
| 1816 |
|
day = string_to_date(date_str, '%B %d, %Y') |
| 1817 |
|
imgs = soup.find('div', id='comic').find_all('img') |
| 1818 |
|
alt = imgs[0]['alt'] |
| 1819 |
|
assert all(i['alt'] == i['title'] for i in imgs) |
| 1820 |
|
return { |
| 1821 |
|
'day': day.day, |
| 1822 |
|
'month': day.month, |
| 1823 |
|
'year': day.year, |
| 1824 |
|
'img': [i['src'] for i in imgs], |
| 1825 |
|
'title': title, |
| 1826 |
|
'alt': alt, |
| 1827 |
|
} |
| 1828 |
|
|
| 1829 |
|
|
| 1830 |
|
class PicturesInBoxes(GenericNavigableComic): |
| 1831 |
|
"""Class to retrieve Pictures In Boxes comics.""" |
| 1832 |
|
# Also on http://picturesinboxescomic.tumblr.com |
| 1833 |
|
name = 'picturesinboxes' |
| 1834 |
|
long_name = 'Pictures in Boxes' |
| 1835 |
|
url = 'http://www.picturesinboxes.com' |
| 1836 |
|
get_navi_link = get_a_navi_navinext |
| 1837 |
|
get_first_comic_link = simulate_first_link |
| 1838 |
|
first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/' |
| 1839 |
|
|
|
@@ 2518-2545 (lines=28) @@
|
| 2515 |
|
"""Class to retrieve Last Place Comics.""" |
| 2516 |
|
name = 'lastplace' |
| 2517 |
|
long_name = 'Last Place Comics' |
| 2518 |
|
url = "http://lastplacecomics.com" |
| 2519 |
|
get_first_comic_link = get_a_comicnavbase_comicnavfirst |
| 2520 |
|
get_navi_link = get_link_rel_next |
| 2521 |
|
|
| 2522 |
|
@classmethod |
| 2523 |
|
def get_comic_info(cls, soup, link): |
| 2524 |
|
"""Get information about a particular comics.""" |
| 2525 |
|
title = soup.find('h2', class_='post-title').string |
| 2526 |
|
author = soup.find("span", class_="post-author").find("a").string |
| 2527 |
|
date_str = soup.find("span", class_="post-date").string |
| 2528 |
|
day = string_to_date(date_str, "%B %d, %Y") |
| 2529 |
|
imgs = soup.find("div", id="comic").find_all("img") |
| 2530 |
|
assert all(i['alt'] == i['title'] for i in imgs) |
| 2531 |
|
assert len(imgs) <= 1 |
| 2532 |
|
alt = imgs[0]['alt'] if imgs else "" |
| 2533 |
|
return { |
| 2534 |
|
'img': [i['src'] for i in imgs], |
| 2535 |
|
'title': title, |
| 2536 |
|
'alt': alt, |
| 2537 |
|
'author': author, |
| 2538 |
|
'day': day.day, |
| 2539 |
|
'month': day.month, |
| 2540 |
|
'year': day.year |
| 2541 |
|
} |
| 2542 |
|
|
| 2543 |
|
|
| 2544 |
|
class TalesOfAbsurdity(GenericNavigableComic): |
| 2545 |
|
"""Class to retrieve Tales Of Absurdity comics.""" |
| 2546 |
|
# Also on http://tapastic.com/series/Tales-Of-Absurdity |
| 2547 |
|
# Also on http://talesofabsurdity.tumblr.com |
| 2548 |
|
name = 'absurdity' |
|
@@ 2738-2764 (lines=27) @@
|
| 2735 |
|
"""Class to retrieve Unearthed comics.""" |
| 2736 |
|
# Also on http://tapastic.com/series/UnearthedComics |
| 2737 |
|
# Also on http://unearthedcomics.tumblr.com |
| 2738 |
|
name = 'unearthed' |
| 2739 |
|
long_name = 'Unearthed Comics' |
| 2740 |
|
url = 'http://unearthedcomics.com' |
| 2741 |
|
_categories = ('UNEARTHED', ) |
| 2742 |
|
get_navi_link = get_link_rel_next |
| 2743 |
|
get_first_comic_link = simulate_first_link |
| 2744 |
|
first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/' |
| 2745 |
|
|
| 2746 |
|
@classmethod |
| 2747 |
|
def get_comic_info(cls, soup, link): |
| 2748 |
|
"""Get information about a particular comics.""" |
| 2749 |
|
short_url = soup.find('link', rel='shortlink')['href'] |
| 2750 |
|
title_elt = soup.find('h1') or soup.find('h2') |
| 2751 |
|
title = title_elt.string if title_elt else "" |
| 2752 |
|
desc = soup.find('meta', property='og:description') |
| 2753 |
|
date_str = soup.find('time', class_='published updated hidden')['datetime'] |
| 2754 |
|
day = string_to_date(date_str, "%Y-%m-%d") |
| 2755 |
|
post = soup.find('div', class_="entry content entry-content type-portfolio") |
| 2756 |
|
imgs = post.find_all('img') |
| 2757 |
|
return { |
| 2758 |
|
'title': title, |
| 2759 |
|
'description': desc, |
| 2760 |
|
'url2': short_url, |
| 2761 |
|
'img': [i['src'] for i in imgs], |
| 2762 |
|
'month': day.month, |
| 2763 |
|
'year': day.year, |
| 2764 |
|
'day': day.day, |
| 2765 |
|
} |
| 2766 |
|
|
| 2767 |
|
|
|
@@ 2488-2514 (lines=27) @@
|
| 2485 |
|
# Also on https://tapastic.com/series/Mister-and-Me |
| 2486 |
|
name = 'mister' |
| 2487 |
|
long_name = 'Mister & Me' |
| 2488 |
|
url = 'http://www.mister-and-me.com' |
| 2489 |
|
get_first_comic_link = get_a_comicnavbase_comicnavfirst |
| 2490 |
|
get_navi_link = get_link_rel_next |
| 2491 |
|
|
| 2492 |
|
@classmethod |
| 2493 |
|
def get_comic_info(cls, soup, link): |
| 2494 |
|
"""Get information about a particular comics.""" |
| 2495 |
|
title = soup.find('h2', class_='post-title').string |
| 2496 |
|
author = soup.find("span", class_="post-author").find("a").string |
| 2497 |
|
date_str = soup.find("span", class_="post-date").string |
| 2498 |
|
day = string_to_date(date_str, "%B %d, %Y") |
| 2499 |
|
imgs = soup.find("div", id="comic").find_all("img") |
| 2500 |
|
assert all(i['alt'] == i['title'] for i in imgs) |
| 2501 |
|
assert len(imgs) <= 1 |
| 2502 |
|
alt = imgs[0]['alt'] if imgs else "" |
| 2503 |
|
return { |
| 2504 |
|
'img': [i['src'] for i in imgs], |
| 2505 |
|
'title': title, |
| 2506 |
|
'alt': alt, |
| 2507 |
|
'author': author, |
| 2508 |
|
'day': day.day, |
| 2509 |
|
'month': day.month, |
| 2510 |
|
'year': day.year |
| 2511 |
|
} |
| 2512 |
|
|
| 2513 |
|
|
| 2514 |
|
class LastPlaceComics(GenericNavigableComic): |
| 2515 |
|
"""Class to retrieve Last Place Comics.""" |
| 2516 |
|
name = 'lastplace' |
| 2517 |
|
long_name = 'Last Place Comics' |
|
@@ 2321-2346 (lines=26) @@
|
| 2318 |
|
for link in last_soup.find_all('a', rel='next' if next_ else 'prev'): |
| 2319 |
|
if link['href'] != '/comic': |
| 2320 |
|
return link |
| 2321 |
|
return None |
| 2322 |
|
|
| 2323 |
|
@classmethod |
| 2324 |
|
def get_comic_info(cls, soup, link): |
| 2325 |
|
"""Get information about a particular comics.""" |
| 2326 |
|
title = soup.find('meta', attrs={'name': 'description'})["content"] |
| 2327 |
|
description = soup.find('div', itemprop='articleBody').text |
| 2328 |
|
author = soup.find('span', itemprop='author copyrightHolder').string |
| 2329 |
|
imgs = soup.find_all('img', itemprop='image') |
| 2330 |
|
assert all(i['title'] == i['alt'] for i in imgs) |
| 2331 |
|
alt = imgs[0]['alt'] if imgs else "" |
| 2332 |
|
date_str = soup.find('time', itemprop='datePublished')["datetime"] |
| 2333 |
|
day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S") |
| 2334 |
|
return { |
| 2335 |
|
'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs], |
| 2336 |
|
'month': day.month, |
| 2337 |
|
'year': day.year, |
| 2338 |
|
'day': day.day, |
| 2339 |
|
'author': author, |
| 2340 |
|
'title': title, |
| 2341 |
|
'alt': alt, |
| 2342 |
|
'description': description, |
| 2343 |
|
} |
| 2344 |
|
|
| 2345 |
|
|
| 2346 |
|
class GerbilWithAJetpack(GenericNavigableComic): |
| 2347 |
|
"""Class to retrieve GerbilWithAJetpack comics.""" |
| 2348 |
|
name = 'gerbil' |
| 2349 |
|
long_name = 'Gerbil With A Jetpack' |
|
@@ 2019-2043 (lines=25) @@
|
| 2016 |
|
def get_first_comic_link(cls): |
| 2017 |
|
"""Get link to first comics.""" |
| 2018 |
|
return get_soup_at_url(cls.url).find('a', title="First") |
| 2019 |
|
|
| 2020 |
|
@classmethod |
| 2021 |
|
def get_navi_link(cls, last_soup, next_): |
| 2022 |
|
"""Get link to next or previous comic.""" |
| 2023 |
|
return last_soup.find('a', title='Next' if next_ else 'Previous') |
| 2024 |
|
|
| 2025 |
|
@classmethod |
| 2026 |
|
def get_comic_info(cls, soup, link): |
| 2027 |
|
"""Get information about a particular comics.""" |
| 2028 |
|
title = soup.find('h1').string |
| 2029 |
|
date_str = soup.find('span', class_='date').string.strip() |
| 2030 |
|
day = string_to_date(date_str, "%B %d, %Y") |
| 2031 |
|
imgs = soup.find('div', class_='comic').find_all('img', alt='', title='') |
| 2032 |
|
return { |
| 2033 |
|
'title': title, |
| 2034 |
|
'img': [i['src'] for i in imgs], |
| 2035 |
|
'month': day.month, |
| 2036 |
|
'year': day.year, |
| 2037 |
|
'day': day.day, |
| 2038 |
|
} |
| 2039 |
|
|
| 2040 |
|
|
| 2041 |
|
class ChuckleADuck(GenericNavigableComic): |
| 2042 |
|
"""Class to retrieve Chuckle-A-Duck comics.""" |
| 2043 |
|
name = 'chuckleaduck' |
| 2044 |
|
long_name = 'Chuckle-A-duck' |
| 2045 |
|
url = 'http://chuckleaduck.com' |
| 2046 |
|
get_first_comic_link = get_div_navfirst_a |
|
@@ 2378-2404 (lines=27) @@
|
| 2375 |
|
class EveryDayBlues(GenericNavigableComic): |
| 2376 |
|
"""Class to retrieve EveryDayBlues Comics.""" |
| 2377 |
|
name = "blues" |
| 2378 |
|
long_name = "Every Day Blues" |
| 2379 |
|
url = "http://everydayblues.net" |
| 2380 |
|
get_first_comic_link = get_a_navi_navifirst |
| 2381 |
|
get_navi_link = get_link_rel_next |
| 2382 |
|
|
| 2383 |
|
@classmethod |
| 2384 |
|
def get_comic_info(cls, soup, link): |
| 2385 |
|
"""Get information about a particular comics.""" |
| 2386 |
|
title = soup.find("h2", class_="post-title").string |
| 2387 |
|
author = soup.find("span", class_="post-author").find("a").string |
| 2388 |
|
date_str = soup.find("span", class_="post-date").string |
| 2389 |
|
day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8") |
| 2390 |
|
imgs = soup.find("div", id="comic").find_all("img") |
| 2391 |
|
assert all(i['alt'] == i['title'] == title for i in imgs) |
| 2392 |
|
assert len(imgs) <= 1 |
| 2393 |
|
return { |
| 2394 |
|
'img': [i['src'] for i in imgs], |
| 2395 |
|
'title': title, |
| 2396 |
|
'author': author, |
| 2397 |
|
'day': day.day, |
| 2398 |
|
'month': day.month, |
| 2399 |
|
'year': day.year |
| 2400 |
|
} |
| 2401 |
|
|
| 2402 |
|
|
| 2403 |
|
class BiterComics(GenericNavigableComic): |
| 2404 |
|
"""Class to retrieve Biter Comics.""" |
| 2405 |
|
name = "biter" |
| 2406 |
|
long_name = "Biter Comics" |
| 2407 |
|
url = "http://www.bitercomics.com" |
|
@@ 1932-1958 (lines=27) @@
|
| 1929 |
|
'year': day.year, |
| 1930 |
|
'day': day.day, |
| 1931 |
|
'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs], |
| 1932 |
|
'title': title, |
| 1933 |
|
} |
| 1934 |
|
|
| 1935 |
|
|
| 1936 |
|
class DiscoBleach(GenericEmptyComic): # Does not work anymore |
| 1937 |
|
"""Class to retrieve Disco Bleach Comics.""" |
| 1938 |
|
name = 'discobleach' |
| 1939 |
|
long_name = 'Disco Bleach' |
| 1940 |
|
url = 'http://discobleach.com' |
| 1941 |
|
|
| 1942 |
|
|
| 1943 |
|
class TubeyToons(GenericEmptyComic): # Does not work anymore |
| 1944 |
|
"""Class to retrieve TubeyToons comics.""" |
| 1945 |
|
# Also on http://tapastic.com/series/Tubey-Toons |
| 1946 |
|
# Also on http://tubeytoons.tumblr.com |
| 1947 |
|
name = 'tubeytoons' |
| 1948 |
|
long_name = 'Tubey Toons' |
| 1949 |
|
url = 'http://tubeytoons.com' |
| 1950 |
|
_categories = ('TUNEYTOONS', ) |
| 1951 |
|
|
| 1952 |
|
|
| 1953 |
|
class CompletelySeriousComics(GenericNavigableComic): |
| 1954 |
|
"""Class to retrieve Completely Serious comics.""" |
| 1955 |
|
name = 'completelyserious' |
| 1956 |
|
long_name = 'Completely Serious Comics' |
| 1957 |
|
url = 'http://completelyseriouscomics.com' |
| 1958 |
|
get_first_comic_link = get_a_navi_navifirst |
| 1959 |
|
get_navi_link = get_a_navi_navinext |
| 1960 |
|
|
| 1961 |
|
@classmethod |
|
@@ 2119-2144 (lines=26) @@
|
| 2116 |
|
tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag')) |
| 2117 |
|
imgs = soup.find('div', class_='entry-content').find_all('img') |
| 2118 |
|
return { |
| 2119 |
|
'day': day.day, |
| 2120 |
|
'month': day.month, |
| 2121 |
|
'year': day.year, |
| 2122 |
|
'title': title, |
| 2123 |
|
'title2': title2, |
| 2124 |
|
'description': description, |
| 2125 |
|
'tags': tags, |
| 2126 |
|
'img': [i['src'] for i in imgs], |
| 2127 |
|
'alt': ' '.join(i['alt'] for i in imgs), |
| 2128 |
|
} |
| 2129 |
|
|
| 2130 |
|
@classmethod |
| 2131 |
|
def get_url_from_archive_element(cls, tr): |
| 2132 |
|
_, td2, td3 = tr.find_all('td') |
| 2133 |
|
return td2.find('a')['href'] |
| 2134 |
|
|
| 2135 |
|
@classmethod |
| 2136 |
|
def get_archive_elements(cls): |
| 2137 |
|
archive_url = urljoin_wrapper(cls.url, 'archive-2') |
| 2138 |
|
return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr')) |
| 2139 |
|
|
| 2140 |
|
|
| 2141 |
|
class HappleTea(GenericNavigableComic): |
| 2142 |
|
"""Class to retrieve Happle Tea Comics.""" |
| 2143 |
|
name = 'happletea' |
| 2144 |
|
long_name = 'Happle Tea' |
| 2145 |
|
url = 'http://www.happletea.com' |
| 2146 |
|
get_first_comic_link = get_a_navi_navifirst |
| 2147 |
|
get_navi_link = get_link_rel_next |
|
@@ 2659-2683 (lines=25) @@
|
| 2656 |
|
def get_comic_info(cls, soup, link): |
| 2657 |
|
"""Get information about a particular comics.""" |
| 2658 |
|
desc = soup.find('meta', property='og:description')['content'] |
| 2659 |
|
title = soup.find('meta', property='og:title')['content'] |
| 2660 |
|
imgs = soup.find('div', class_='entry-content').find_all('img') |
| 2661 |
|
title2 = ' '.join(i.get('title', '') for i in imgs) |
| 2662 |
|
return { |
| 2663 |
|
'title': title, |
| 2664 |
|
'title2': title2, |
| 2665 |
|
'description': desc, |
| 2666 |
|
'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs], |
| 2667 |
|
} |
| 2668 |
|
|
| 2669 |
|
|
| 2670 |
|
class CommitStripFr(GenericCommitStrip): |
| 2671 |
|
"""Class to retrieve Commit Strips in French.""" |
| 2672 |
|
name = 'commit_fr' |
| 2673 |
|
long_name = 'Commit Strip (Fr)' |
| 2674 |
|
url = 'http://www.commitstrip.com/fr' |
| 2675 |
|
_categories = ('FRANCAIS', ) |
| 2676 |
|
first_url = 'http://www.commitstrip.com/fr/2012/02/22/interview/' |
| 2677 |
|
|
| 2678 |
|
|
| 2679 |
|
class CommitStripEn(GenericCommitStrip): |
| 2680 |
|
"""Class to retrieve Commit Strips in English.""" |
| 2681 |
|
name = 'commit_en' |
| 2682 |
|
long_name = 'Commit Strip (En)' |
| 2683 |
|
url = 'http://www.commitstrip.com/en' |
| 2684 |
|
first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/' |
| 2685 |
|
|
| 2686 |
|
|
|
@@ 338-360 (lines=23) @@
|
| 335 |
|
def get_next_comic(cls, last_comic): |
| 336 |
|
"""Implementation of get_next_comic returning no comics.""" |
| 337 |
|
cls.log("comic is considered as empty - returning no comic") |
| 338 |
|
return [] |
| 339 |
|
|
| 340 |
|
|
| 341 |
|
class ExtraFabulousComics(GenericNavigableComic): |
| 342 |
|
"""Class to retrieve Extra Fabulous Comics.""" |
| 343 |
|
name = 'efc' |
| 344 |
|
long_name = 'Extra Fabulous Comics' |
| 345 |
|
url = 'http://extrafabulouscomics.com' |
| 346 |
|
get_first_comic_link = get_a_navi_navifirst |
| 347 |
|
get_navi_link = get_link_rel_next |
| 348 |
|
|
| 349 |
|
@classmethod |
| 350 |
|
def get_comic_info(cls, soup, link): |
| 351 |
|
"""Get information about a particular comics.""" |
| 352 |
|
img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url) |
| 353 |
|
imgs = soup.find_all('img', src=img_src_re) |
| 354 |
|
title = soup.find('meta', property='og:title')['content'] |
| 355 |
|
date_str = soup.find('meta', property='article:published_time')['content'][:10] |
| 356 |
|
day = string_to_date(date_str, "%Y-%m-%d") |
| 357 |
|
return { |
| 358 |
|
'title': title, |
| 359 |
|
'img': [i['src'] for i in imgs], |
| 360 |
|
'month': day.month, |
| 361 |
|
'year': day.year, |
| 362 |
|
'day': day.day, |
| 363 |
|
'prefix': title + '-' |