@@ 1781-1806 (lines=26) @@ | ||
1778 | """Get information about a particular comics.""" |
|
1779 | title = soup.find('meta', property='og:title')['content'] |
|
1780 | author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content'] |
|
1781 | date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content'] |
|
1782 | date_str = date_str[:10] |
|
1783 | day = string_to_date(date_str, "%Y-%m-%d") |
|
1784 | imgs = soup.find_all('meta', property='og:image') |
|
1785 | skip_imgs = { |
|
1786 | 'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png', |
|
1787 | 'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png' |
|
1788 | } |
|
1789 | return { |
|
1790 | 'title': title, |
|
1791 | 'author': author, |
|
1792 | 'day': day.day, |
|
1793 | 'month': day.month, |
|
1794 | 'year': day.year, |
|
1795 | 'img': [i['content'] for i in imgs if i['content'] not in skip_imgs], |
|
1796 | } |
|
1797 | ||
1798 | ||
1799 | class SafelyEndangered(GenericNavigableComic): |
|
1800 | """Class to retrieve Safely Endangered comics.""" |
|
1801 | # Also on http://tumblr.safelyendangered.com |
|
1802 | name = 'endangered' |
|
1803 | long_name = 'Safely Endangered' |
|
1804 | url = 'http://www.safelyendangered.com' |
|
1805 | get_navi_link = get_link_rel_next |
|
1806 | get_first_comic_link = simulate_first_link |
|
1807 | first_url = 'http://www.safelyendangered.com/comic/ignored/' |
|
1808 | ||
1809 | @classmethod |
|
@@ 1810-1836 (lines=27) @@ | ||
1807 | first_url = 'http://www.safelyendangered.com/comic/ignored/' |
|
1808 | ||
1809 | @classmethod |
|
1810 | def get_comic_info(cls, soup, link): |
|
1811 | """Get information about a particular comics.""" |
|
1812 | title = soup.find('h2', class_='post-title').string |
|
1813 | date_str = soup.find('span', class_='post-date').string |
|
1814 | day = string_to_date(date_str, '%B %d, %Y') |
|
1815 | imgs = soup.find('div', id='comic').find_all('img') |
|
1816 | alt = imgs[0]['alt'] |
|
1817 | assert all(i['alt'] == i['title'] for i in imgs) |
|
1818 | return { |
|
1819 | 'day': day.day, |
|
1820 | 'month': day.month, |
|
1821 | 'year': day.year, |
|
1822 | 'img': [i['src'] for i in imgs], |
|
1823 | 'title': title, |
|
1824 | 'alt': alt, |
|
1825 | } |
|
1826 | ||
1827 | ||
1828 | class PicturesInBoxes(GenericNavigableComic): |
|
1829 | """Class to retrieve Pictures In Boxes comics.""" |
|
1830 | # Also on http://picturesinboxescomic.tumblr.com |
|
1831 | name = 'picturesinboxes' |
|
1832 | long_name = 'Pictures in Boxes' |
|
1833 | url = 'http://www.picturesinboxes.com' |
|
1834 | get_navi_link = get_a_navi_navinext |
|
1835 | get_first_comic_link = simulate_first_link |
|
1836 | first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/' |
|
1837 | ||
1838 | @classmethod |
|
1839 | def get_comic_info(cls, soup, link): |
|
@@ 2518-2545 (lines=28) @@ | ||
2515 | url = "http://lastplacecomics.com" |
|
2516 | get_first_comic_link = get_a_comicnavbase_comicnavfirst |
|
2517 | get_navi_link = get_link_rel_next |
|
2518 | ||
2519 | @classmethod |
|
2520 | def get_comic_info(cls, soup, link): |
|
2521 | """Get information about a particular comics.""" |
|
2522 | title = soup.find('h2', class_='post-title').string |
|
2523 | author = soup.find("span", class_="post-author").find("a").string |
|
2524 | date_str = soup.find("span", class_="post-date").string |
|
2525 | day = string_to_date(date_str, "%B %d, %Y") |
|
2526 | imgs = soup.find("div", id="comic").find_all("img") |
|
2527 | assert all(i['alt'] == i['title'] for i in imgs) |
|
2528 | assert len(imgs) <= 1 |
|
2529 | alt = imgs[0]['alt'] if imgs else "" |
|
2530 | return { |
|
2531 | 'img': [i['src'] for i in imgs], |
|
2532 | 'title': title, |
|
2533 | 'alt': alt, |
|
2534 | 'author': author, |
|
2535 | 'day': day.day, |
|
2536 | 'month': day.month, |
|
2537 | 'year': day.year |
|
2538 | } |
|
2539 | ||
2540 | ||
2541 | class TalesOfAbsurdity(GenericNavigableComic): |
|
2542 | """Class to retrieve Tales Of Absurdity comics.""" |
|
2543 | # Also on http://tapastic.com/series/Tales-Of-Absurdity |
|
2544 | # Also on http://talesofabsurdity.tumblr.com |
|
2545 | name = 'absurdity' |
|
2546 | long_name = 'Tales of Absurdity' |
|
2547 | url = 'http://talesofabsurdity.com' |
|
2548 | _categories = ('ABSURDITY', ) |
|
@@ 2738-2764 (lines=27) @@ | ||
2735 | name = 'unearthed' |
|
2736 | long_name = 'Unearthed Comics' |
|
2737 | url = 'http://unearthedcomics.com' |
|
2738 | _categories = ('UNEARTHED', ) |
|
2739 | get_navi_link = get_link_rel_next |
|
2740 | get_first_comic_link = simulate_first_link |
|
2741 | first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/' |
|
2742 | ||
2743 | @classmethod |
|
2744 | def get_comic_info(cls, soup, link): |
|
2745 | """Get information about a particular comics.""" |
|
2746 | short_url = soup.find('link', rel='shortlink')['href'] |
|
2747 | title_elt = soup.find('h1') or soup.find('h2') |
|
2748 | title = title_elt.string if title_elt else "" |
|
2749 | desc = soup.find('meta', property='og:description') |
|
2750 | date_str = soup.find('time', class_='published updated hidden')['datetime'] |
|
2751 | day = string_to_date(date_str, "%Y-%m-%d") |
|
2752 | post = soup.find('div', class_="entry content entry-content type-portfolio") |
|
2753 | imgs = post.find_all('img') |
|
2754 | return { |
|
2755 | 'title': title, |
|
2756 | 'description': desc, |
|
2757 | 'url2': short_url, |
|
2758 | 'img': [i['src'] for i in imgs], |
|
2759 | 'month': day.month, |
|
2760 | 'year': day.year, |
|
2761 | 'day': day.day, |
|
2762 | } |
|
2763 | ||
2764 | ||
2765 | class Optipess(GenericNavigableComic): |
|
2766 | """Class to retrieve Optipess comics.""" |
|
2767 | name = 'optipess' |
|
@@ 2488-2514 (lines=27) @@ | ||
2485 | url = 'http://www.mister-and-me.com' |
|
2486 | get_first_comic_link = get_a_comicnavbase_comicnavfirst |
|
2487 | get_navi_link = get_link_rel_next |
|
2488 | ||
2489 | @classmethod |
|
2490 | def get_comic_info(cls, soup, link): |
|
2491 | """Get information about a particular comics.""" |
|
2492 | title = soup.find('h2', class_='post-title').string |
|
2493 | author = soup.find("span", class_="post-author").find("a").string |
|
2494 | date_str = soup.find("span", class_="post-date").string |
|
2495 | day = string_to_date(date_str, "%B %d, %Y") |
|
2496 | imgs = soup.find("div", id="comic").find_all("img") |
|
2497 | assert all(i['alt'] == i['title'] for i in imgs) |
|
2498 | assert len(imgs) <= 1 |
|
2499 | alt = imgs[0]['alt'] if imgs else "" |
|
2500 | return { |
|
2501 | 'img': [i['src'] for i in imgs], |
|
2502 | 'title': title, |
|
2503 | 'alt': alt, |
|
2504 | 'author': author, |
|
2505 | 'day': day.day, |
|
2506 | 'month': day.month, |
|
2507 | 'year': day.year |
|
2508 | } |
|
2509 | ||
2510 | ||
2511 | class LastPlaceComics(GenericNavigableComic): |
|
2512 | """Class to retrieve Last Place Comics.""" |
|
2513 | name = 'lastplace' |
|
2514 | long_name = 'Last Place Comics' |
|
2515 | url = "http://lastplacecomics.com" |
|
2516 | get_first_comic_link = get_a_comicnavbase_comicnavfirst |
|
2517 | get_navi_link = get_link_rel_next |
|
@@ 2321-2346 (lines=26) @@ | ||
2318 | return None |
|
2319 | ||
2320 | @classmethod |
|
2321 | def get_comic_info(cls, soup, link): |
|
2322 | """Get information about a particular comics.""" |
|
2323 | title = soup.find('meta', attrs={'name': 'description'})["content"] |
|
2324 | description = soup.find('div', itemprop='articleBody').text |
|
2325 | author = soup.find('span', itemprop='author copyrightHolder').string |
|
2326 | imgs = soup.find_all('img', itemprop='image') |
|
2327 | assert all(i['title'] == i['alt'] for i in imgs) |
|
2328 | alt = imgs[0]['alt'] if imgs else "" |
|
2329 | date_str = soup.find('time', itemprop='datePublished')["datetime"] |
|
2330 | day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S") |
|
2331 | return { |
|
2332 | 'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs], |
|
2333 | 'month': day.month, |
|
2334 | 'year': day.year, |
|
2335 | 'day': day.day, |
|
2336 | 'author': author, |
|
2337 | 'title': title, |
|
2338 | 'alt': alt, |
|
2339 | 'description': description, |
|
2340 | } |
|
2341 | ||
2342 | ||
2343 | class GerbilWithAJetpack(GenericNavigableComic): |
|
2344 | """Class to retrieve GerbilWithAJetpack comics.""" |
|
2345 | name = 'gerbil' |
|
2346 | long_name = 'Gerbil With A Jetpack' |
|
2347 | url = 'http://gerbilwithajetpack.com' |
|
2348 | get_first_comic_link = get_a_navi_navifirst |
|
2349 | get_navi_link = get_a_rel_next |
|
@@ 2019-2043 (lines=25) @@ | ||
2016 | return get_soup_at_url(cls.url).find('a', title="First") |
|
2017 | ||
2018 | @classmethod |
|
2019 | def get_navi_link(cls, last_soup, next_): |
|
2020 | """Get link to next or previous comic.""" |
|
2021 | return last_soup.find('a', title='Next' if next_ else 'Previous') |
|
2022 | ||
2023 | @classmethod |
|
2024 | def get_comic_info(cls, soup, link): |
|
2025 | """Get information about a particular comics.""" |
|
2026 | title = soup.find('h1').string |
|
2027 | date_str = soup.find('span', class_='date').string.strip() |
|
2028 | day = string_to_date(date_str, "%B %d, %Y") |
|
2029 | imgs = soup.find('div', class_='comic').find_all('img', alt='', title='') |
|
2030 | return { |
|
2031 | 'title': title, |
|
2032 | 'img': [i['src'] for i in imgs], |
|
2033 | 'month': day.month, |
|
2034 | 'year': day.year, |
|
2035 | 'day': day.day, |
|
2036 | } |
|
2037 | ||
2038 | ||
2039 | class ChuckleADuck(GenericNavigableComic): |
|
2040 | """Class to retrieve Chuckle-A-Duck comics.""" |
|
2041 | name = 'chuckleaduck' |
|
2042 | long_name = 'Chuckle-A-duck' |
|
2043 | url = 'http://chuckleaduck.com' |
|
2044 | get_first_comic_link = get_div_navfirst_a |
|
2045 | get_navi_link = get_link_rel_next |
|
2046 | ||
@@ 2378-2404 (lines=27) @@ | ||
2375 | long_name = "Every Day Blues" |
|
2376 | url = "http://everydayblues.net" |
|
2377 | get_first_comic_link = get_a_navi_navifirst |
|
2378 | get_navi_link = get_link_rel_next |
|
2379 | ||
2380 | @classmethod |
|
2381 | def get_comic_info(cls, soup, link): |
|
2382 | """Get information about a particular comics.""" |
|
2383 | title = soup.find("h2", class_="post-title").string |
|
2384 | author = soup.find("span", class_="post-author").find("a").string |
|
2385 | date_str = soup.find("span", class_="post-date").string |
|
2386 | day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8") |
|
2387 | imgs = soup.find("div", id="comic").find_all("img") |
|
2388 | assert all(i['alt'] == i['title'] == title for i in imgs) |
|
2389 | assert len(imgs) <= 1 |
|
2390 | return { |
|
2391 | 'img': [i['src'] for i in imgs], |
|
2392 | 'title': title, |
|
2393 | 'author': author, |
|
2394 | 'day': day.day, |
|
2395 | 'month': day.month, |
|
2396 | 'year': day.year |
|
2397 | } |
|
2398 | ||
2399 | ||
2400 | class BiterComics(GenericNavigableComic): |
|
2401 | """Class to retrieve Biter Comics.""" |
|
2402 | name = "biter" |
|
2403 | long_name = "Biter Comics" |
|
2404 | url = "http://www.bitercomics.com" |
|
2405 | get_first_comic_link = get_a_navi_navifirst |
|
2406 | get_navi_link = get_link_rel_next |
|
2407 | ||
@@ 1932-1958 (lines=27) @@ | ||
1929 | 'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs], |
|
1930 | 'title': title, |
|
1931 | } |
|
1932 | ||
1933 | ||
1934 | class DiscoBleach(GenericEmptyComic): # Does not work anymore |
|
1935 | """Class to retrieve Disco Bleach Comics.""" |
|
1936 | name = 'discobleach' |
|
1937 | long_name = 'Disco Bleach' |
|
1938 | url = 'http://discobleach.com' |
|
1939 | ||
1940 | ||
1941 | class TubeyToons(GenericEmptyComic): # Does not work anymore |
|
1942 | """Class to retrieve TubeyToons comics.""" |
|
1943 | # Also on http://tapastic.com/series/Tubey-Toons |
|
1944 | # Also on http://tubeytoons.tumblr.com |
|
1945 | name = 'tubeytoons' |
|
1946 | long_name = 'Tubey Toons' |
|
1947 | url = 'http://tubeytoons.com' |
|
1948 | _categories = ('TUNEYTOONS', ) |
|
1949 | ||
1950 | ||
1951 | class CompletelySeriousComics(GenericNavigableComic): |
|
1952 | """Class to retrieve Completely Serious comics.""" |
|
1953 | name = 'completelyserious' |
|
1954 | long_name = 'Completely Serious Comics' |
|
1955 | url = 'http://completelyseriouscomics.com' |
|
1956 | get_first_comic_link = get_a_navi_navifirst |
|
1957 | get_navi_link = get_a_navi_navinext |
|
1958 | ||
1959 | @classmethod |
|
1960 | def get_comic_info(cls, soup, link): |
|
1961 | """Get information about a particular comics.""" |
|
@@ 2119-2144 (lines=26) @@ | ||
2116 | return { |
|
2117 | 'day': day.day, |
|
2118 | 'month': day.month, |
|
2119 | 'year': day.year, |
|
2120 | 'title': title, |
|
2121 | 'title2': title2, |
|
2122 | 'description': description, |
|
2123 | 'tags': tags, |
|
2124 | 'img': [i['src'] for i in imgs], |
|
2125 | 'alt': ' '.join(i['alt'] for i in imgs), |
|
2126 | } |
|
2127 | ||
2128 | @classmethod |
|
2129 | def get_url_from_archive_element(cls, tr): |
|
2130 | _, td2, td3 = tr.find_all('td') |
|
2131 | return td2.find('a')['href'] |
|
2132 | ||
2133 | @classmethod |
|
2134 | def get_archive_elements(cls): |
|
2135 | archive_url = urljoin_wrapper(cls.url, 'archive-2') |
|
2136 | return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr')) |
|
2137 | ||
2138 | ||
2139 | class HappleTea(GenericNavigableComic): |
|
2140 | """Class to retrieve Happle Tea Comics.""" |
|
2141 | name = 'happletea' |
|
2142 | long_name = 'Happle Tea' |
|
2143 | url = 'http://www.happletea.com' |
|
2144 | get_first_comic_link = get_a_navi_navifirst |
|
2145 | get_navi_link = get_link_rel_next |
|
2146 | ||
2147 | @classmethod |
|
@@ 2659-2683 (lines=25) @@ | ||
2656 | title = soup.find('meta', property='og:title')['content'] |
|
2657 | imgs = soup.find('div', class_='entry-content').find_all('img') |
|
2658 | title2 = ' '.join(i.get('title', '') for i in imgs) |
|
2659 | return { |
|
2660 | 'title': title, |
|
2661 | 'title2': title2, |
|
2662 | 'description': desc, |
|
2663 | 'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs], |
|
2664 | } |
|
2665 | ||
2666 | ||
2667 | class CommitStripFr(GenericCommitStrip): |
|
2668 | """Class to retrieve Commit Strips in French.""" |
|
2669 | name = 'commit_fr' |
|
2670 | long_name = 'Commit Strip (Fr)' |
|
2671 | url = 'http://www.commitstrip.com/fr' |
|
2672 | _categories = ('FRANCAIS', ) |
|
2673 | first_url = 'http://www.commitstrip.com/fr/2012/02/22/interview/' |
|
2674 | ||
2675 | ||
2676 | class CommitStripEn(GenericCommitStrip): |
|
2677 | """Class to retrieve Commit Strips in English.""" |
|
2678 | name = 'commit_en' |
|
2679 | long_name = 'Commit Strip (En)' |
|
2680 | url = 'http://www.commitstrip.com/en' |
|
2681 | first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/' |
|
2682 | ||
2683 | ||
2684 | class GenericBoumerie(GenericNavigableComic): |
|
2685 | """Generic class to retrieve Boumeries comics in different languages.""" |
|
2686 | get_first_comic_link = get_a_navi_navifirst |
|
@@ 338-360 (lines=23) @@ | ||
335 | def get_next_comic(cls, last_comic): |
|
336 | """Implementation of get_next_comic returning no comics.""" |
|
337 | cls.log("comic is considered as empty - returning no comic") |
|
338 | return [] |
|
339 | ||
340 | ||
341 | class ExtraFabulousComics(GenericNavigableComic): |
|
342 | """Class to retrieve Extra Fabulous Comics.""" |
|
343 | name = 'efc' |
|
344 | long_name = 'Extra Fabulous Comics' |
|
345 | url = 'http://extrafabulouscomics.com' |
|
346 | get_first_comic_link = get_a_navi_navifirst |
|
347 | get_navi_link = get_link_rel_next |
|
348 | ||
349 | @classmethod |
|
350 | def get_comic_info(cls, soup, link): |
|
351 | """Get information about a particular comics.""" |
|
352 | img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url) |
|
353 | imgs = soup.find_all('img', src=img_src_re) |
|
354 | title = soup.find('meta', property='og:title')['content'] |
|
355 | date_str = soup.find('meta', property='article:published_time')['content'][:10] |
|
356 | day = string_to_date(date_str, "%Y-%m-%d") |
|
357 | return { |
|
358 | 'title': title, |
|
359 | 'img': [i['src'] for i in imgs], |
|
360 | 'month': day.month, |
|
361 | 'year': day.year, |
|
362 | 'day': day.day, |
|
363 | 'prefix': title + '-' |