@@ 1781-1806 (lines=26) @@ | ||
1778 | @classmethod |
|
1779 | def get_comic_info(cls, soup, link): |
|
1780 | """Get information about a particular comics.""" |
|
1781 | title = soup.find('meta', property='og:title')['content'] |
|
1782 | author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content'] |
|
1783 | date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content'] |
|
1784 | date_str = date_str[:10] |
|
1785 | day = string_to_date(date_str, "%Y-%m-%d") |
|
1786 | imgs = soup.find_all('meta', property='og:image') |
|
1787 | skip_imgs = { |
|
1788 | 'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png', |
|
1789 | 'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png' |
|
1790 | } |
|
1791 | return { |
|
1792 | 'title': title, |
|
1793 | 'author': author, |
|
1794 | 'day': day.day, |
|
1795 | 'month': day.month, |
|
1796 | 'year': day.year, |
|
1797 | 'img': [i['content'] for i in imgs if i['content'] not in skip_imgs], |
|
1798 | } |
|
1799 | ||
1800 | ||
1801 | class SafelyEndangered(GenericNavigableComic): |
|
1802 | """Class to retrieve Safely Endangered comics.""" |
|
1803 | # Also on http://tumblr.safelyendangered.com |
|
1804 | name = 'endangered' |
|
1805 | long_name = 'Safely Endangered' |
|
1806 | url = 'http://www.safelyendangered.com' |
|
1807 | get_navi_link = get_link_rel_next |
|
1808 | get_first_comic_link = simulate_first_link |
|
1809 | first_url = 'http://www.safelyendangered.com/comic/ignored/' |
|
@@ 1810-1836 (lines=27) @@ | ||
1807 | get_navi_link = get_link_rel_next |
|
1808 | get_first_comic_link = simulate_first_link |
|
1809 | first_url = 'http://www.safelyendangered.com/comic/ignored/' |
|
1810 | ||
1811 | @classmethod |
|
1812 | def get_comic_info(cls, soup, link): |
|
1813 | """Get information about a particular comics.""" |
|
1814 | title = soup.find('h2', class_='post-title').string |
|
1815 | date_str = soup.find('span', class_='post-date').string |
|
1816 | day = string_to_date(date_str, '%B %d, %Y') |
|
1817 | imgs = soup.find('div', id='comic').find_all('img') |
|
1818 | alt = imgs[0]['alt'] |
|
1819 | assert all(i['alt'] == i['title'] for i in imgs) |
|
1820 | return { |
|
1821 | 'day': day.day, |
|
1822 | 'month': day.month, |
|
1823 | 'year': day.year, |
|
1824 | 'img': [i['src'] for i in imgs], |
|
1825 | 'title': title, |
|
1826 | 'alt': alt, |
|
1827 | } |
|
1828 | ||
1829 | ||
1830 | class PicturesInBoxes(GenericNavigableComic): |
|
1831 | """Class to retrieve Pictures In Boxes comics.""" |
|
1832 | # Also on http://picturesinboxescomic.tumblr.com |
|
1833 | name = 'picturesinboxes' |
|
1834 | long_name = 'Pictures in Boxes' |
|
1835 | url = 'http://www.picturesinboxes.com' |
|
1836 | get_navi_link = get_a_navi_navinext |
|
1837 | get_first_comic_link = simulate_first_link |
|
1838 | first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/' |
|
1839 | ||
@@ 2518-2545 (lines=28) @@ | ||
2515 | """Class to retrieve Last Place Comics.""" |
|
2516 | name = 'lastplace' |
|
2517 | long_name = 'Last Place Comics' |
|
2518 | url = "http://lastplacecomics.com" |
|
2519 | get_first_comic_link = get_a_comicnavbase_comicnavfirst |
|
2520 | get_navi_link = get_link_rel_next |
|
2521 | ||
2522 | @classmethod |
|
2523 | def get_comic_info(cls, soup, link): |
|
2524 | """Get information about a particular comics.""" |
|
2525 | title = soup.find('h2', class_='post-title').string |
|
2526 | author = soup.find("span", class_="post-author").find("a").string |
|
2527 | date_str = soup.find("span", class_="post-date").string |
|
2528 | day = string_to_date(date_str, "%B %d, %Y") |
|
2529 | imgs = soup.find("div", id="comic").find_all("img") |
|
2530 | assert all(i['alt'] == i['title'] for i in imgs) |
|
2531 | assert len(imgs) <= 1 |
|
2532 | alt = imgs[0]['alt'] if imgs else "" |
|
2533 | return { |
|
2534 | 'img': [i['src'] for i in imgs], |
|
2535 | 'title': title, |
|
2536 | 'alt': alt, |
|
2537 | 'author': author, |
|
2538 | 'day': day.day, |
|
2539 | 'month': day.month, |
|
2540 | 'year': day.year |
|
2541 | } |
|
2542 | ||
2543 | ||
2544 | class TalesOfAbsurdity(GenericNavigableComic): |
|
2545 | """Class to retrieve Tales Of Absurdity comics.""" |
|
2546 | # Also on http://tapastic.com/series/Tales-Of-Absurdity |
|
2547 | # Also on http://talesofabsurdity.tumblr.com |
|
2548 | name = 'absurdity' |
|
@@ 2738-2764 (lines=27) @@ | ||
2735 | """Class to retrieve Unearthed comics.""" |
|
2736 | # Also on http://tapastic.com/series/UnearthedComics |
|
2737 | # Also on http://unearthedcomics.tumblr.com |
|
2738 | name = 'unearthed' |
|
2739 | long_name = 'Unearthed Comics' |
|
2740 | url = 'http://unearthedcomics.com' |
|
2741 | _categories = ('UNEARTHED', ) |
|
2742 | get_navi_link = get_link_rel_next |
|
2743 | get_first_comic_link = simulate_first_link |
|
2744 | first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/' |
|
2745 | ||
2746 | @classmethod |
|
2747 | def get_comic_info(cls, soup, link): |
|
2748 | """Get information about a particular comics.""" |
|
2749 | short_url = soup.find('link', rel='shortlink')['href'] |
|
2750 | title_elt = soup.find('h1') or soup.find('h2') |
|
2751 | title = title_elt.string if title_elt else "" |
|
2752 | desc = soup.find('meta', property='og:description') |
|
2753 | date_str = soup.find('time', class_='published updated hidden')['datetime'] |
|
2754 | day = string_to_date(date_str, "%Y-%m-%d") |
|
2755 | post = soup.find('div', class_="entry content entry-content type-portfolio") |
|
2756 | imgs = post.find_all('img') |
|
2757 | return { |
|
2758 | 'title': title, |
|
2759 | 'description': desc, |
|
2760 | 'url2': short_url, |
|
2761 | 'img': [i['src'] for i in imgs], |
|
2762 | 'month': day.month, |
|
2763 | 'year': day.year, |
|
2764 | 'day': day.day, |
|
2765 | } |
|
2766 | ||
2767 | ||
@@ 2488-2514 (lines=27) @@ | ||
2485 | # Also on https://tapastic.com/series/Mister-and-Me |
|
2486 | name = 'mister' |
|
2487 | long_name = 'Mister & Me' |
|
2488 | url = 'http://www.mister-and-me.com' |
|
2489 | get_first_comic_link = get_a_comicnavbase_comicnavfirst |
|
2490 | get_navi_link = get_link_rel_next |
|
2491 | ||
2492 | @classmethod |
|
2493 | def get_comic_info(cls, soup, link): |
|
2494 | """Get information about a particular comics.""" |
|
2495 | title = soup.find('h2', class_='post-title').string |
|
2496 | author = soup.find("span", class_="post-author").find("a").string |
|
2497 | date_str = soup.find("span", class_="post-date").string |
|
2498 | day = string_to_date(date_str, "%B %d, %Y") |
|
2499 | imgs = soup.find("div", id="comic").find_all("img") |
|
2500 | assert all(i['alt'] == i['title'] for i in imgs) |
|
2501 | assert len(imgs) <= 1 |
|
2502 | alt = imgs[0]['alt'] if imgs else "" |
|
2503 | return { |
|
2504 | 'img': [i['src'] for i in imgs], |
|
2505 | 'title': title, |
|
2506 | 'alt': alt, |
|
2507 | 'author': author, |
|
2508 | 'day': day.day, |
|
2509 | 'month': day.month, |
|
2510 | 'year': day.year |
|
2511 | } |
|
2512 | ||
2513 | ||
2514 | class LastPlaceComics(GenericNavigableComic): |
|
2515 | """Class to retrieve Last Place Comics.""" |
|
2516 | name = 'lastplace' |
|
2517 | long_name = 'Last Place Comics' |
|
@@ 2321-2346 (lines=26) @@ | ||
2318 | for link in last_soup.find_all('a', rel='next' if next_ else 'prev'): |
|
2319 | if link['href'] != '/comic': |
|
2320 | return link |
|
2321 | return None |
|
2322 | ||
2323 | @classmethod |
|
2324 | def get_comic_info(cls, soup, link): |
|
2325 | """Get information about a particular comics.""" |
|
2326 | title = soup.find('meta', attrs={'name': 'description'})["content"] |
|
2327 | description = soup.find('div', itemprop='articleBody').text |
|
2328 | author = soup.find('span', itemprop='author copyrightHolder').string |
|
2329 | imgs = soup.find_all('img', itemprop='image') |
|
2330 | assert all(i['title'] == i['alt'] for i in imgs) |
|
2331 | alt = imgs[0]['alt'] if imgs else "" |
|
2332 | date_str = soup.find('time', itemprop='datePublished')["datetime"] |
|
2333 | day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S") |
|
2334 | return { |
|
2335 | 'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs], |
|
2336 | 'month': day.month, |
|
2337 | 'year': day.year, |
|
2338 | 'day': day.day, |
|
2339 | 'author': author, |
|
2340 | 'title': title, |
|
2341 | 'alt': alt, |
|
2342 | 'description': description, |
|
2343 | } |
|
2344 | ||
2345 | ||
2346 | class GerbilWithAJetpack(GenericNavigableComic): |
|
2347 | """Class to retrieve GerbilWithAJetpack comics.""" |
|
2348 | name = 'gerbil' |
|
2349 | long_name = 'Gerbil With A Jetpack' |
|
@@ 2019-2043 (lines=25) @@ | ||
2016 | def get_first_comic_link(cls): |
|
2017 | """Get link to first comics.""" |
|
2018 | return get_soup_at_url(cls.url).find('a', title="First") |
|
2019 | ||
2020 | @classmethod |
|
2021 | def get_navi_link(cls, last_soup, next_): |
|
2022 | """Get link to next or previous comic.""" |
|
2023 | return last_soup.find('a', title='Next' if next_ else 'Previous') |
|
2024 | ||
2025 | @classmethod |
|
2026 | def get_comic_info(cls, soup, link): |
|
2027 | """Get information about a particular comics.""" |
|
2028 | title = soup.find('h1').string |
|
2029 | date_str = soup.find('span', class_='date').string.strip() |
|
2030 | day = string_to_date(date_str, "%B %d, %Y") |
|
2031 | imgs = soup.find('div', class_='comic').find_all('img', alt='', title='') |
|
2032 | return { |
|
2033 | 'title': title, |
|
2034 | 'img': [i['src'] for i in imgs], |
|
2035 | 'month': day.month, |
|
2036 | 'year': day.year, |
|
2037 | 'day': day.day, |
|
2038 | } |
|
2039 | ||
2040 | ||
2041 | class ChuckleADuck(GenericNavigableComic): |
|
2042 | """Class to retrieve Chuckle-A-Duck comics.""" |
|
2043 | name = 'chuckleaduck' |
|
2044 | long_name = 'Chuckle-A-duck' |
|
2045 | url = 'http://chuckleaduck.com' |
|
2046 | get_first_comic_link = get_div_navfirst_a |
|
@@ 2378-2404 (lines=27) @@ | ||
2375 | class EveryDayBlues(GenericNavigableComic): |
|
2376 | """Class to retrieve EveryDayBlues Comics.""" |
|
2377 | name = "blues" |
|
2378 | long_name = "Every Day Blues" |
|
2379 | url = "http://everydayblues.net" |
|
2380 | get_first_comic_link = get_a_navi_navifirst |
|
2381 | get_navi_link = get_link_rel_next |
|
2382 | ||
2383 | @classmethod |
|
2384 | def get_comic_info(cls, soup, link): |
|
2385 | """Get information about a particular comics.""" |
|
2386 | title = soup.find("h2", class_="post-title").string |
|
2387 | author = soup.find("span", class_="post-author").find("a").string |
|
2388 | date_str = soup.find("span", class_="post-date").string |
|
2389 | day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8") |
|
2390 | imgs = soup.find("div", id="comic").find_all("img") |
|
2391 | assert all(i['alt'] == i['title'] == title for i in imgs) |
|
2392 | assert len(imgs) <= 1 |
|
2393 | return { |
|
2394 | 'img': [i['src'] for i in imgs], |
|
2395 | 'title': title, |
|
2396 | 'author': author, |
|
2397 | 'day': day.day, |
|
2398 | 'month': day.month, |
|
2399 | 'year': day.year |
|
2400 | } |
|
2401 | ||
2402 | ||
2403 | class BiterComics(GenericNavigableComic): |
|
2404 | """Class to retrieve Biter Comics.""" |
|
2405 | name = "biter" |
|
2406 | long_name = "Biter Comics" |
|
2407 | url = "http://www.bitercomics.com" |
|
@@ 1932-1958 (lines=27) @@ | ||
1929 | 'year': day.year, |
|
1930 | 'day': day.day, |
|
1931 | 'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs], |
|
1932 | 'title': title, |
|
1933 | } |
|
1934 | ||
1935 | ||
1936 | class DiscoBleach(GenericEmptyComic): # Does not work anymore |
|
1937 | """Class to retrieve Disco Bleach Comics.""" |
|
1938 | name = 'discobleach' |
|
1939 | long_name = 'Disco Bleach' |
|
1940 | url = 'http://discobleach.com' |
|
1941 | ||
1942 | ||
1943 | class TubeyToons(GenericEmptyComic): # Does not work anymore |
|
1944 | """Class to retrieve TubeyToons comics.""" |
|
1945 | # Also on http://tapastic.com/series/Tubey-Toons |
|
1946 | # Also on http://tubeytoons.tumblr.com |
|
1947 | name = 'tubeytoons' |
|
1948 | long_name = 'Tubey Toons' |
|
1949 | url = 'http://tubeytoons.com' |
|
1950 | _categories = ('TUNEYTOONS', ) |
|
1951 | ||
1952 | ||
1953 | class CompletelySeriousComics(GenericNavigableComic): |
|
1954 | """Class to retrieve Completely Serious comics.""" |
|
1955 | name = 'completelyserious' |
|
1956 | long_name = 'Completely Serious Comics' |
|
1957 | url = 'http://completelyseriouscomics.com' |
|
1958 | get_first_comic_link = get_a_navi_navifirst |
|
1959 | get_navi_link = get_a_navi_navinext |
|
1960 | ||
1961 | @classmethod |
|
@@ 2119-2144 (lines=26) @@ | ||
2116 | tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag')) |
|
2117 | imgs = soup.find('div', class_='entry-content').find_all('img') |
|
2118 | return { |
|
2119 | 'day': day.day, |
|
2120 | 'month': day.month, |
|
2121 | 'year': day.year, |
|
2122 | 'title': title, |
|
2123 | 'title2': title2, |
|
2124 | 'description': description, |
|
2125 | 'tags': tags, |
|
2126 | 'img': [i['src'] for i in imgs], |
|
2127 | 'alt': ' '.join(i['alt'] for i in imgs), |
|
2128 | } |
|
2129 | ||
2130 | @classmethod |
|
2131 | def get_url_from_archive_element(cls, tr): |
|
2132 | _, td2, td3 = tr.find_all('td') |
|
2133 | return td2.find('a')['href'] |
|
2134 | ||
2135 | @classmethod |
|
2136 | def get_archive_elements(cls): |
|
2137 | archive_url = urljoin_wrapper(cls.url, 'archive-2') |
|
2138 | return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr')) |
|
2139 | ||
2140 | ||
2141 | class HappleTea(GenericNavigableComic): |
|
2142 | """Class to retrieve Happle Tea Comics.""" |
|
2143 | name = 'happletea' |
|
2144 | long_name = 'Happle Tea' |
|
2145 | url = 'http://www.happletea.com' |
|
2146 | get_first_comic_link = get_a_navi_navifirst |
|
2147 | get_navi_link = get_link_rel_next |
|
@@ 2659-2683 (lines=25) @@ | ||
2656 | def get_comic_info(cls, soup, link): |
|
2657 | """Get information about a particular comics.""" |
|
2658 | desc = soup.find('meta', property='og:description')['content'] |
|
2659 | title = soup.find('meta', property='og:title')['content'] |
|
2660 | imgs = soup.find('div', class_='entry-content').find_all('img') |
|
2661 | title2 = ' '.join(i.get('title', '') for i in imgs) |
|
2662 | return { |
|
2663 | 'title': title, |
|
2664 | 'title2': title2, |
|
2665 | 'description': desc, |
|
2666 | 'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs], |
|
2667 | } |
|
2668 | ||
2669 | ||
2670 | class CommitStripFr(GenericCommitStrip): |
|
2671 | """Class to retrieve Commit Strips in French.""" |
|
2672 | name = 'commit_fr' |
|
2673 | long_name = 'Commit Strip (Fr)' |
|
2674 | url = 'http://www.commitstrip.com/fr' |
|
2675 | _categories = ('FRANCAIS', ) |
|
2676 | first_url = 'http://www.commitstrip.com/fr/2012/02/22/interview/' |
|
2677 | ||
2678 | ||
2679 | class CommitStripEn(GenericCommitStrip): |
|
2680 | """Class to retrieve Commit Strips in English.""" |
|
2681 | name = 'commit_en' |
|
2682 | long_name = 'Commit Strip (En)' |
|
2683 | url = 'http://www.commitstrip.com/en' |
|
2684 | first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/' |
|
2685 | ||
2686 | ||
@@ 338-360 (lines=23) @@ | ||
335 | def get_next_comic(cls, last_comic): |
|
336 | """Implementation of get_next_comic returning no comics.""" |
|
337 | cls.log("comic is considered as empty - returning no comic") |
|
338 | return [] |
|
339 | ||
340 | ||
341 | class ExtraFabulousComics(GenericNavigableComic): |
|
342 | """Class to retrieve Extra Fabulous Comics.""" |
|
343 | name = 'efc' |
|
344 | long_name = 'Extra Fabulous Comics' |
|
345 | url = 'http://extrafabulouscomics.com' |
|
346 | get_first_comic_link = get_a_navi_navifirst |
|
347 | get_navi_link = get_link_rel_next |
|
348 | ||
349 | @classmethod |
|
350 | def get_comic_info(cls, soup, link): |
|
351 | """Get information about a particular comics.""" |
|
352 | img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url) |
|
353 | imgs = soup.find_all('img', src=img_src_re) |
|
354 | title = soup.find('meta', property='og:title')['content'] |
|
355 | date_str = soup.find('meta', property='article:published_time')['content'][:10] |
|
356 | day = string_to_date(date_str, "%Y-%m-%d") |
|
357 | return { |
|
358 | 'title': title, |
|
359 | 'img': [i['src'] for i in imgs], |
|
360 | 'month': day.month, |
|
361 | 'year': day.year, |
|
362 | 'day': day.day, |
|
363 | 'prefix': title + '-' |