Completed
Push — master ( 32fa9a...e12c63 )
by De
01:10
created

comics.py (24 issues)

1
#! /usr/bin/python3
2
# vim: set expandtab tabstop=4 shiftwidth=4 :
3
"""Module to retrieve webcomics"""
4
5
from comic_abstract import GenericComic, get_date_for_comic
6
import re
7
from datetime import date, timedelta
8
import datetime
9
from urlfunctions import get_soup_at_url, urljoin_wrapper,\
10
    convert_iri_to_plain_ascii_uri, load_json_at_url, urlopen_wrapper
11
import json
12
import locale
13
import urllib
14
15
DEFAULT_LOCAL = 'en_GB.UTF-8'
16
17
18
class Xkcd(GenericComic):
19
    """Class to retrieve Xkcd comics."""
20
    name = 'xkcd'
21
    long_name = 'xkcd'
22
    url = 'http://xkcd.com'
23
24
    @classmethod
25
    def get_next_comic(cls, last_comic):
26
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
27
        first_num = last_comic['num'] if last_comic else 0
28
        last_num = load_json_at_url(
29
            urljoin_wrapper(cls.url, 'info.0.json'))['num']
30
31
        for num in range(first_num + 1, last_num + 1):
32
            if num != 404:
33
                json_url = urljoin_wrapper(cls.url, '%d/info.0.json' % num)
34
                comic = load_json_at_url(json_url)
35
                comic['img'] = [comic['img']]
36
                comic['prefix'] = '%d-' % num
37
                comic['json_url'] = json_url
38
                comic['url'] = urljoin_wrapper(cls.url, str(num))
39
                comic['day'] = int(comic['day'])
40
                comic['month'] = int(comic['month'])
41
                comic['year'] = int(comic['year'])
42
                assert comic['num'] == num
43
                yield comic
44
45
46
# Helper functions corresponding to get_url_from_link/get_url_from_archive_element
47
48
49
@classmethod
50
def get_href(cls, link):
51
    """Implementation of get_url_from_link/get_url_from_archive_element."""
52
    return link['href']
53
54
55
@classmethod
56
def join_cls_url_to_href(cls, link):
57
    """Implementation of get_url_from_link/get_url_from_archive_element."""
58
    return urljoin_wrapper(cls.url, link['href'])
59
60
61
class GenericNavigableComic(GenericComic):
62
    """Generic class for "navigable" comics : with first/next arrows.
63
64
    This class applies to comic where previous and next comics can be
65
    accessed from a given comic. Once given a starting point (either
66
    the first comic or the last comic retrieved), it will handle the
67
    navigation, the retrieval of the soup object and the setting of
68
    the 'url' attribute on retrieved comics. This limits a lot the
69
    amount of boilerplate code in the different implementation classes.
70
71
    The method `get_next_comic` methods is implemented in terms of new
72
    more specialized methods to be implemented/overridden:
73
        - get_first_comic_link
74
        - get_navi_link
75
        - get_comic_info
76
        - get_url_from_link
77
    """
78
    _categories = ('NAVIGABLE', )
79
80
    @classmethod
81
    def get_first_comic_link(cls):
82
        """Get link to first comics.
83
84
        Sometimes this can be retrieved of any comic page, sometimes on
85
        the archive page, sometimes it doesn't exist at all and one has
86
        to iterate backward to find it before hardcoding the result found.
87
        """
88
        raise NotImplementedError
89
90
    @classmethod
91
    def get_navi_link(cls, last_soup, next_):
92
        """Get link to next (or previous - for dev purposes) comic."""
93
        raise NotImplementedError
94
95
    @classmethod
96
    def get_comic_info(cls, soup, link):
97
        """Get information about a particular comics."""
98
        raise NotImplementedError
99
100
    @classmethod
101
    def get_url_from_link(cls, link):
102
        """Get url corresponding to a link. Default implementation is similar to get_href."""
103
        return link['href']
104
105
    @classmethod
106
    def get_next_link(cls, last_soup):
107
        """Get link to next comic."""
108
        link = cls.get_navi_link(last_soup, True)
109
        cls.log("Next link is %s" % link)
110
        return link
111
112
    @classmethod
113
    def get_prev_link(cls, last_soup):
114
        """Get link to previous comic."""
115
        link = cls.get_navi_link(last_soup, False)
116
        cls.log("Prev link is %s" % link)
117
        return link
118
119
    @classmethod
120
    def get_next_comic(cls, last_comic):
121
        """Generic implementation of get_next_comic for navigable comics."""
122
        url = last_comic['url'] if last_comic else None
123
        cls.log("starting 'get_next_comic' from %s" % url)
124
        next_comic = \
125
            cls.get_next_link(get_soup_at_url(url)) \
126
            if url else \
127
            cls.get_first_comic_link()
128
        cls.log("next/first comic will be %s (url is %s)" % (str(next_comic), url))
129
        # cls.check_navigation(url)
130
        while next_comic:
131
            prev_url, url = url, cls.get_url_from_link(next_comic)
132
            if prev_url == url:
133
                cls.log("got same url %s" % url)
134
                break
135
            cls.log("about to get %s (%s)" % (url, str(next_comic)))
136
            soup = get_soup_at_url(url)
137
            comic = cls.get_comic_info(soup, next_comic)
138
            if comic is not None:
139
                assert 'url' not in comic
140
                comic['url'] = url
141
                yield comic
142
            next_comic = cls.get_next_link(soup)
143
            cls.log("next comic will be %s" % str(next_comic))
144
145
    @classmethod
146
    def check_first_link(cls):
147
        """Check that navigation to first comic seems to be working - for dev purposes."""
148
        cls.log("about to check first link")
149
        ok = True
150
        firstlink = cls.get_first_comic_link()
151
        if firstlink is None:
152
            print("From %s : no first link" % cls.url)
153
            ok = False
154
        else:
155
            firsturl = cls.get_url_from_link(firstlink)
156
            try:
157
                get_soup_at_url(firsturl)
158
            except urllib.error.HTTPError:
159
                print("From %s : invalid first url" % cls.url)
160
                ok = False
161
        cls.log("checked first link -> returned %d" % ok)
162
        return ok
163
164
    @classmethod
165
    def check_prev_next_links(cls, url):
166
        """Check that navigation to prev/next from a given URL seems to be working - for dev purposes."""
167
        cls.log("about to check prev/next from %s" % url)
168
        ok = True
169
        if url is None:
170
            prevlink, nextlink = None, None
171
        else:
172
            soup = get_soup_at_url(url)
173
            prevlink, nextlink = cls.get_prev_link(soup), cls.get_next_link(soup)
174
        if prevlink is None and nextlink is None:
175
            print("From %s : no previous nor next" % url)
176
            ok = False
177
        else:
178
            if prevlink:
179
                prevurl = cls.get_url_from_link(prevlink)
180
                prevsoup = get_soup_at_url(prevurl)
181
                prevnextlink = cls.get_next_link(prevsoup)
182
                prevnext = cls.get_url_from_link(prevnextlink) if prevnextlink is not None else "NO URL"
183
                if prevnext != url:
184
                    print("From %s, going backward then forward leads to %s" % (url, prevnext))
185
                    ok = False
186
            if nextlink:
187
                nexturl = cls.get_url_from_link(nextlink)
188
                if nexturl != url:
189
                    nextsoup = get_soup_at_url(nexturl)
190
                    nextprevlink = cls.get_prev_link(nextsoup)
191
                    nextprev = cls.get_url_from_link(nextprevlink) if nextprevlink is not None else "NO URL"
192
                    if nextprev != url:
193
                        print("From %s, going forward then backward leads to %s" % (url, nextprev))
194
                        ok = False
195
        cls.log("checked prev/next from %s -> returned %d" % (url, ok))
196
        return ok
197
198
    @classmethod
199
    def check_navigation(cls, url):
200
        """Check that navigation functions seem to be working - for dev purposes."""
201
        cls.log("about to check navigation from %s" % url)
202
        first = cls.check_first_link()
203
        prevnext = cls.check_prev_next_links(url)
204
        ok = first and prevnext
205
        cls.log("checked navigation from %s -> returned %d" % (url, ok))
206
        return ok
207
208
209
class GenericListableComic(GenericComic):
210
    """Generic class for "listable" comics : with a list of comics (aka 'archive')
211
212
    The method `get_next_comic` methods is implemented in terms of new
213
    more specialized methods to be implemented/overridden:
214
        - get_archive_elements
215
        - get_url_from_archive_element
216
        - get_comic_info
217
    """
218
    _categories = ('LISTABLE', )
219
220
    @classmethod
221
    def get_archive_elements(cls):
222
        """Get the archive elements (iterable)."""
223
        raise NotImplementedError
224
225
    @classmethod
226
    def get_url_from_archive_element(cls, archive_elt):
227
        """Get url corresponding to an archive element."""
228
        raise NotImplementedError
229
230
    @classmethod
231
    def get_comic_info(cls, soup, archive_elt):
232
        """Get information about a particular comics."""
233
        raise NotImplementedError
234
235
    @classmethod
236
    def get_next_comic(cls, last_comic):
237
        """Generic implementation of get_next_comic for listable comics."""
238
        waiting_for_url = last_comic['url'] if last_comic else None
239
        archive_elts = list(cls.get_archive_elements())
240
        for archive_elt in archive_elts:
241
            url = cls.get_url_from_archive_element(archive_elt)
242
            cls.log("considering %s" % url)
243
            if waiting_for_url is None:
244
                cls.log("about to get %s (%s)" % (url, str(archive_elt)))
245
                soup = get_soup_at_url(url)
246
                comic = cls.get_comic_info(soup, archive_elt)
247
                if comic is not None:
248
                    assert 'url' not in comic
249
                    comic['url'] = url
250
                    yield comic
251
            elif waiting_for_url == url:
252
                waiting_for_url = None
253
        if waiting_for_url is not None:
254
            print("Did not find %s in the %d comics: there might be a problem" %
255
                  (waiting_for_url, len(archive_elts)))
256
257
# Helper functions corresponding to get_first_comic_link/get_navi_link
258
259
260
@classmethod
261
def get_link_rel_next(cls, last_soup, next_):
262
    """Implementation of get_navi_link."""
263
    return last_soup.find('link', rel='next' if next_ else 'prev')
264
265
266
@classmethod
267
def get_a_rel_next(cls, last_soup, next_):
268
    """Implementation of get_navi_link."""
269
    return last_soup.find('a', rel='next' if next_ else 'prev')
270
271
272
@classmethod
273
def get_a_navi_navinext(cls, last_soup, next_):
274
    """Implementation of get_navi_link."""
275
    return last_soup.find('a', class_='navi navi-next' if next_ else 'navi navi-prev')
276
277
278
@classmethod
279
def get_a_navi_comicnavnext_navinext(cls, last_soup, next_):
280
    """Implementation of get_navi_link."""
281
    return last_soup.find('a', class_='navi comic-nav-next navi-next' if next_ else 'navi comic-nav-previous navi-prev')
282
283
284
@classmethod
285
def get_a_comicnavbase_comicnavnext(cls, last_soup, next_):
286
    """Implementation of get_navi_link."""
287
    return last_soup.find('a', class_='comic-nav-base comic-nav-next' if next_ else 'comic-nav-base comic-nav-previous')
288
289
290
@classmethod
291
def get_a_navi_navifirst(cls):
292
    """Implementation of get_first_comic_link."""
293
    return get_soup_at_url(cls.url).find('a', class_='navi navi-first')
294
295
296
@classmethod
297
def get_div_navfirst_a(cls):
298
    """Implementation of get_first_comic_link."""
299
    return get_soup_at_url(cls.url).find('div', class_="nav-first").find('a')
300
301
302
@classmethod
303
def get_a_comicnavbase_comicnavfirst(cls):
304
    """Implementation of get_first_comic_link."""
305
    return get_soup_at_url(cls.url).find('a', class_='comic-nav-base comic-nav-first')
306
307
308
@classmethod
309
def simulate_first_link(cls):
310
    """Implementation of get_first_comic_link creating a link-like object from
311
    an URL provided by the class.
312
313
    Note: The first URL can easily be found using :
314
    `get_first_comic_link = navigate_to_first_comic`.
315
    """
316
    return {'href': cls.first_url}
317
318
319
@classmethod
320
def navigate_to_first_comic(cls):
321
    """Implementation of get_first_comic_link navigating from a user provided
322
    URL to the first comic.
323
324
    Sometimes, the first comic cannot be reached directly so to start
325
    from the first comic one has to go to the previous comic until
326
    there is no previous comics. Once this URL is reached, it
327
    is better to hardcode it but for development purposes, it
328
    is convenient to have an automatic way to find it.
329
330
    Then, the URL found can easily be used via `simulate_first_link`.
331
    """
332
    url = input("Get starting URL: ")
333
    print(url)
334
    comic = cls.get_prev_link(get_soup_at_url(url))
335
    while comic:
336
        url = cls.get_url_from_link(comic)
337
        print(url)
338
        comic = cls.get_prev_link(get_soup_at_url(url))
339
    return {'href': url}
340
341
342
class GenericEmptyComic(GenericComic):
343
    """Generic class for comics where nothing is to be done.
344
345
    It can be useful to deactivate temporarily comics that do not work
346
    properly by replacing `def MyComic(GenericWhateverComic)` with
347
    `def MyComic(GenericEmptyComic, GenericWhateverComic)`."""
348
    _categories = ('EMPTY', )
349
350
    @classmethod
351
    def get_next_comic(cls, last_comic):
352
        """Implementation of get_next_comic returning no comics."""
353
        cls.log("comic is considered as empty - returning no comic")
354
        return []
355
356
357
class ExtraFabulousComics(GenericNavigableComic):
358
    """Class to retrieve Extra Fabulous Comics."""
359
    name = 'efc'
360
    long_name = 'Extra Fabulous Comics'
361
    url = 'http://extrafabulouscomics.com'
362
    get_first_comic_link = get_a_navi_navifirst
363
    get_navi_link = get_link_rel_next
364
365
    @classmethod
366
    def get_comic_info(cls, soup, link):
367
        """Get information about a particular comics."""
368
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
369
        imgs = soup.find_all('img', src=img_src_re)
370
        title = soup.find('meta', property='og:title')['content']
371
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
372
        day = string_to_date(date_str, "%Y-%m-%d")
373
        return {
374
            'title': title,
375
            'img': [i['src'] for i in imgs],
376
            'month': day.month,
377
            'year': day.year,
378
            'day': day.day,
379
            'prefix': title + '-'
380
        }
381
382
383
class GenericLeMondeBlog(GenericNavigableComic):
384
    """Generic class to retrieve comics from Le Monde blogs."""
385
    _categories = ('LEMONDE', 'FRANCAIS')
386
    get_navi_link = get_link_rel_next
387
    get_first_comic_link = simulate_first_link
388
    first_url = NotImplemented
389
390
    @classmethod
391
    def get_comic_info(cls, soup, link):
392
        """Get information about a particular comics."""
393
        url2 = soup.find('link', rel='shortlink')['href']
394
        title = soup.find('meta', property='og:title')['content']
395
        date_str = soup.find("span", class_="entry-date").string
396
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
397
        imgs = soup.find_all('meta', property='og:image')
398
        return {
399
            'title': title,
400
            'url2': url2,
401
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
402
            'month': day.month,
403
            'year': day.year,
404
            'day': day.day,
405
        }
406
407
408
class ZepWorld(GenericLeMondeBlog):
409
    """Class to retrieve Zep World comics."""
410
    name = "zep"
411
    long_name = "Zep World"
412
    url = "http://zepworld.blog.lemonde.fr"
413
    first_url = "http://zepworld.blog.lemonde.fr/2014/10/31/bientot-le-blog-de-zep/"
414
415
416
class Vidberg(GenericLeMondeBlog):
417
    """Class to retrieve Vidberg comics."""
418
    name = 'vidberg'
419
    long_name = "Vidberg - l'actu en patates"
420
    url = "http://vidberg.blog.lemonde.fr"
421
    # Not the first but I didn't find an efficient way to retrieve it
422
    first_url = "http://vidberg.blog.lemonde.fr/2012/02/09/revue-de-campagne-la-campagne-du-modem-semballe/"
423
424
425
class Plantu(GenericLeMondeBlog):
426
    """Class to retrieve Plantu comics."""
427
    name = 'plantu'
428
    long_name = "Plantu"
429
    url = "http://plantu.blog.lemonde.fr"
430
    first_url = "http://plantu.blog.lemonde.fr/2014/10/28/stress-test-a-bruxelles/"
431
432
433
class XavierGorce(GenericLeMondeBlog):
434
    """Class to retrieve Xavier Gorce comics."""
435
    name = 'gorce'
436
    long_name = "Xavier Gorce"
437
    url = "http://xaviergorce.blog.lemonde.fr"
438
    first_url = "http://xaviergorce.blog.lemonde.fr/2015/01/09/distinction/"
439
440
441
class CartooningForPeace(GenericLeMondeBlog):
442
    """Class to retrieve Cartooning For Peace comics."""
443
    name = 'forpeace'
444
    long_name = "Cartooning For Peace"
445
    url = "http://cartooningforpeace.blog.lemonde.fr"
446
    first_url = "http://cartooningforpeace.blog.lemonde.fr/2014/12/15/bado/"
447
448
449
class Aurel(GenericLeMondeBlog):
450
    """Class to retrieve Aurel comics."""
451
    name = 'aurel'
452
    long_name = "Aurel"
453
    url = "http://aurel.blog.lemonde.fr"
454
    first_url = "http://aurel.blog.lemonde.fr/2014/09/29/le-senat-repasse-a-droite/"
455
456
457
class LesCulottees(GenericLeMondeBlog):
458
    """Class to retrieve Les Culottees comics."""
459
    name = 'culottees'
460
    long_name = 'Les Culottees'
461
    url = "http://lesculottees.blog.lemonde.fr"
462
    first_url = "http://lesculottees.blog.lemonde.fr/2016/01/11/clementine-delait-femme-a-barbe/"
463
464
465
class UneAnneeAuLycee(GenericLeMondeBlog):
466
    """Class to retrieve Une Annee Au Lycee comics."""
467
    name = 'lycee'
468
    long_name = 'Une Annee au Lycee'
469
    url = 'http://uneanneeaulycee.blog.lemonde.fr'
470
    first_url = "http://uneanneeaulycee.blog.lemonde.fr/2016/06/13/la-semaine-du-bac-est-arrivee/"
471
472
473
class Rall(GenericNavigableComic):
474
    """Class to retrieve Ted Rall comics."""
475
    # Also on http://www.gocomics.com/tedrall
476
    name = 'rall'
477
    long_name = "Ted Rall"
478
    url = "http://rall.com/comic"
479
    _categories = ('RALL', )
480
    get_navi_link = get_link_rel_next
481
    get_first_comic_link = simulate_first_link
482
    # Not the first but I didn't find an efficient way to retrieve it
483
    first_url = "http://rall.com/2014/01/30/los-angeles-times-cartoon-well-miss-those-california-flowers"
484
485
    @classmethod
486
    def get_comic_info(cls, soup, link):
487
        """Get information about a particular comics."""
488
        title = soup.find('meta', property='og:title')['content']
489
        author = soup.find("span", class_="author vcard").find("a").string
490
        date_str = soup.find("span", class_="entry-date").string
491
        day = string_to_date(date_str, "%B %d, %Y")
492
        desc = soup.find('meta', property='og:description')['content']
493
        imgs = soup.find('div', class_='entry-content').find_all('img')
494
        imgs = imgs[:-7]  # remove social media buttons
495
        return {
496
            'title': title,
497
            'author': author,
498
            'month': day.month,
499
            'year': day.year,
500
            'day': day.day,
501
            'description': desc,
502
            'img': [i['src'] for i in imgs],
503
        }
504
505
506
class Dilem(GenericNavigableComic):
507
    """Class to retrieve Ali Dilem comics."""
508
    name = 'dilem'
509
    long_name = 'Ali Dilem'
510
    url = 'http://information.tv5monde.com/dilem'
511
    _categories = ('FRANCAIS', )
512
    get_url_from_link = join_cls_url_to_href
513
    get_first_comic_link = simulate_first_link
514
    first_url = "http://information.tv5monde.com/dilem/2004-06-26"
515
516
    @classmethod
517
    def get_navi_link(cls, last_soup, next_):
518
        """Get link to next or previous comic."""
519
        # prev is next / next is prev
520
        li = last_soup.find('li', class_='prev' if next_ else 'next')
521
        return li.find('a') if li else None
522
523 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
524
    def get_comic_info(cls, soup, link):
525
        """Get information about a particular comics."""
526
        short_url = soup.find('link', rel='shortlink')['href']
527
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
528
        imgs = soup.find_all('meta', property='og:image')
529
        date_str = soup.find('span', property='dc:date')['content']
530
        date_str = date_str[:10]
531
        day = string_to_date(date_str, "%Y-%m-%d")
532
        return {
533
            'short_url': short_url,
534
            'title': title,
535
            'img': [i['content'] for i in imgs],
536
            'day': day.day,
537
            'month': day.month,
538
            'year': day.year,
539
        }
540
541
542
class SpaceAvalanche(GenericNavigableComic):
543
    """Class to retrieve Space Avalanche comics."""
544
    name = 'avalanche'
545
    long_name = 'Space Avalanche'
546
    url = 'http://www.spaceavalanche.com'
547
    get_navi_link = get_link_rel_next
548
549
    @classmethod
550
    def get_first_comic_link(cls):
551
        """Get link to first comics."""
552
        return {'href': "http://www.spaceavalanche.com/2009/02/02/irish-sea/", 'title': "Irish Sea"}
553
554
    @classmethod
555
    def get_comic_info(cls, soup, link):
556
        """Get information about a particular comics."""
557
        url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
558
        title = link['title']
559
        url = cls.get_url_from_link(link)
560
        year, month, day = [int(s)
561
                            for s in url_date_re.match(url).groups()]
562
        imgs = soup.find("div", class_="entry").find_all("img")
563
        return {
564
            'title': title,
565
            'day': day,
566
            'month': month,
567
            'year': year,
568
            'img': [i['src'] for i in imgs],
569
        }
570
571
572
class ZenPencils(GenericNavigableComic):
573
    """Class to retrieve ZenPencils comics."""
574
    # Also on http://zenpencils.tumblr.com
575
    # Also on http://www.gocomics.com/zen-pencils
576
    name = 'zenpencils'
577
    long_name = 'Zen Pencils'
578
    url = 'http://zenpencils.com'
579
    _categories = ('ZENPENCILS', )
580
    get_navi_link = get_link_rel_next
581
    get_first_comic_link = simulate_first_link
582
    first_url = "http://zenpencils.com/comic/1-ralph-waldo-emerson-make-them-cry/"
583
584
    @classmethod
585
    def get_comic_info(cls, soup, link):
586
        """Get information about a particular comics."""
587
        imgs = soup.find('div', id='comic').find_all('img')
588
        # imgs2 = soup.find_all('meta', property='og:image')
589
        post = soup.find('div', class_='post-content')
590
        author = post.find("span", class_="post-author").find("a").string
591
        title = soup.find('meta', property='og:title')['content']
592
        date_str = post.find('span', class_='post-date').string
593
        day = string_to_date(date_str, "%B %d, %Y")
594
        assert imgs
595
        assert all(i['alt'] == i['title'] for i in imgs)
596
        assert all(i['alt'] in (title, "") for i in imgs)
597
        desc = soup.find('meta', property='og:description')['content']
598
        return {
599
            'title': title,
600
            'description': desc,
601
            'author': author,
602
            'day': day.day,
603
            'month': day.month,
604
            'year': day.year,
605
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
606
        }
607
608
609
class ItsTheTie(GenericEmptyComic, GenericNavigableComic):
610
    """Class to retrieve It's the tie comics."""
611
    # Also on http://itsthetie.tumblr.com
612
    # Also on https://tapastic.com/series/itsthetie
613
    name = 'tie'
614
    long_name = "It's the tie"
615
    url = "http://itsthetie.com"
616
    _categories = ('TIE', )
617
    get_first_comic_link = get_div_navfirst_a
618
    get_navi_link = get_a_rel_next
619
620
    @classmethod
621
    def get_comic_info(cls, soup, link):
622
        """Get information about a particular comics."""
623
        title = soup.find('h1', class_='comic-title').find('a').string
624
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
625
        day = string_to_date(date_str, "%B %d, %Y")
626
        # Bonus images may or may not be in meta og:image.
627
        imgs = soup.find_all('meta', property='og:image')
628
        imgs_src = [i['content'] for i in imgs]
629
        bonus = soup.find_all('img', attrs={'data-oversrc': True})
630
        bonus_src = [b['data-oversrc'] for b in bonus]
631
        all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src]
632
        all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")]
633
        tag_meta = soup.find('meta', property='article:tag')
634
        tags = tag_meta['content'] if tag_meta else ""
635
        return {
636
            'title': title,
637
            'month': day.month,
638
            'year': day.year,
639
            'day': day.day,
640
            'img': all_imgs_src,
641
            'tags': tags,
642
        }
643
644
645
class PenelopeBagieu(GenericNavigableComic):
646
    """Class to retrieve comics from Penelope Bagieu's blog."""
647
    name = 'bagieu'
648
    long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
649
    url = 'http://www.penelope-jolicoeur.com'
650
    _categories = ('FRANCAIS', )
651
    get_navi_link = get_link_rel_next
652
    get_first_comic_link = simulate_first_link
653
    first_url = 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'
654
655
    @classmethod
656
    def get_comic_info(cls, soup, link):
657
        """Get information about a particular comics."""
658
        date_str = soup.find('h2', class_='date-header').string
659
        day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
660
        imgs = soup.find('div', class_='entry-body').find_all('img')
661
        title = soup.find('h3', class_='entry-header').string
662
        return {
663
            'title': title,
664
            'img': [i['src'] for i in imgs],
665
            'month': day.month,
666
            'year': day.year,
667
            'day': day.day,
668
        }
669
670
671
class OneOneOneOneComic(GenericNavigableComic):
672
    """Class to retrieve 1111 Comics."""
673
    # Also on http://comics1111.tumblr.com
674
    # Also on https://tapastic.com/series/1111-Comics
675
    name = '1111'
676
    long_name = '1111 Comics'
677
    url = 'http://www.1111comics.me'
678
    _categories = ('ONEONEONEONE', )
679
    get_first_comic_link = get_div_navfirst_a
680
    get_navi_link = get_link_rel_next
681
682
    @classmethod
683
    def get_comic_info(cls, soup, link):
684
        """Get information about a particular comics."""
685
        title = soup.find('h1', class_='comic-title').find('a').string
686
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
687
        day = string_to_date(date_str, "%B %d, %Y")
688
        imgs = soup.find_all('meta', property='og:image')
689
        return {
690
            'title': title,
691
            'month': day.month,
692
            'year': day.year,
693
            'day': day.day,
694
            'img': [i['content'] for i in imgs],
695
        }
696
697
698
class AngryAtNothing(GenericEmptyComic, GenericNavigableComic):
699
    """Class to retrieve Angry at Nothing comics."""
700
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
701
    # Also on http://angryatnothing.tumblr.com
702
    name = 'angry'
703
    long_name = 'Angry At Nothing'
704
    url = 'http://www.angryatnothing.net'
705
    get_first_comic_link = get_div_navfirst_a
706
    get_navi_link = get_a_rel_next
707
708
    @classmethod
709
    def get_comic_info(cls, soup, link):
710
        """Get information about a particular comics."""
711
        title = soup.find('h1', class_='comic-title').find('a').string
712
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
713
        day = string_to_date(date_str, "%B %d, %Y")
714
        imgs = soup.find_all('meta', property='og:image')
715
        return {
716
            'title': title,
717
            'month': day.month,
718
            'year': day.year,
719
            'day': day.day,
720
            'img': [i['content'] for i in imgs],
721
        }
722
723
724
class NeDroid(GenericNavigableComic):
725
    """Class to retrieve NeDroid comics."""
726
    name = 'nedroid'
727
    long_name = 'NeDroid'
728
    url = 'http://nedroid.com'
729
    get_first_comic_link = get_div_navfirst_a
730 View Code Duplication
    get_navi_link = get_link_rel_next
731
    get_url_from_link = join_cls_url_to_href
732
733
    @classmethod
734
    def get_comic_info(cls, soup, link):
735
        """Get information about a particular comics."""
736
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
737
        short_url = cls.get_url_from_link(soup.find('link', rel='shortlink'))
738
        num = int(short_url_re.match(short_url).groups()[0])
739
        imgs = soup.find('div', id='comic').find_all('img')
740
        assert len(imgs) == 1
741
        title = imgs[0]['alt']
742
        title2 = imgs[0]['title']
743
        return {
744
            'short_url': short_url,
745
            'title': title,
746
            'title2': title2,
747
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
748
            'num': num,
749
        }
750
751
752
class Garfield(GenericNavigableComic):
753
    """Class to retrieve Garfield comics."""
754
    # Also on http://www.gocomics.com/garfield
755
    name = 'garfield'
756
    long_name = 'Garfield'
757
    url = 'https://garfield.com'
758
    _categories = ('GARFIELD', )
759
    get_first_comic_link = simulate_first_link
760
    first_url = 'https://garfield.com/comic/1978/06/19'
761
762
    @classmethod
763
    def get_navi_link(cls, last_soup, next_):
764
        """Get link to next or previous comic."""
765
        return last_soup.find('a', class_='comic-arrow-right' if next_ else 'comic-arrow-left')
766
767
    @classmethod
768
    def get_comic_info(cls, soup, link):
769
        """Get information about a particular comics."""
770
        url = cls.get_url_from_link(link)
771
        date_re = re.compile('^%s/comic/([0-9]*)/([0-9]*)/([0-9]*)' % cls.url)
772
        year, month, day = [int(s) for s in date_re.match(url).groups()]
773
        imgs = soup.find('div', class_='comic-display').find_all('img', class_='img-responsive')
774
        return {
775
            'month': month,
776
            'year': year,
777
            'day': day,
778
            'img': [i['src'] for i in imgs],
779
        }
780
781 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
782
class Dilbert(GenericNavigableComic):
783
    """Class to retrieve Dilbert comics."""
784
    # Also on http://www.gocomics.com/dilbert-classics
785
    name = 'dilbert'
786
    long_name = 'Dilbert'
787
    url = 'http://dilbert.com'
788
    get_url_from_link = join_cls_url_to_href
789
    get_first_comic_link = simulate_first_link
790
    first_url = 'http://dilbert.com/strip/1989-04-16'
791
792
    @classmethod
793
    def get_navi_link(cls, last_soup, next_):
794
        """Get link to next or previous comic."""
795
        link = last_soup.find('div', class_='nav-comic nav-right' if next_ else 'nav-comic nav-left')
796
        return link.find('a') if link else None
797
798
    @classmethod
799
    def get_comic_info(cls, soup, link):
800
        """Get information about a particular comics."""
801
        title = soup.find('meta', property='og:title')['content']
802
        imgs = soup.find_all('meta', property='og:image')
803
        desc = soup.find('meta', property='og:description')['content']
804
        date_str = soup.find('meta', property='article:publish_date')['content']
805
        day = string_to_date(date_str, "%B %d, %Y")
806
        author = soup.find('meta', property='article:author')['content']
807
        tags = soup.find('meta', property='article:tag')['content']
808
        return {
809
            'title': title,
810
            'description': desc,
811
            'img': [i['content'] for i in imgs],
812
            'author': author,
813
            'tags': tags,
814
            'day': day.day,
815
            'month': day.month,
816
            'year': day.year
817
        }
818
819
820
class VictimsOfCircumsolar(GenericEmptyComic, GenericNavigableComic):
821
    """Class to retrieve VictimsOfCircumsolar comics."""
822
    # Also on https://victimsofcomics.tumblr.com
823
    name = 'circumsolar'
824
    long_name = 'Victims Of Circumsolar'
825
    url = 'http://www.victimsofcircumsolar.com'
826
    get_navi_link = get_a_navi_comicnavnext_navinext
827
    get_first_comic_link = simulate_first_link
828
    first_url = 'http://www.victimsofcircumsolar.com/comic/modern-addiction'
829
830
    @classmethod
831
    def get_comic_info(cls, soup, link):
832
        """Get information about a particular comics."""
833
        # Date is on the archive page
834
        title = soup.find_all('meta', property='og:title')[-1]['content']
835
        desc = soup.find_all('meta', property='og:description')[-1]['content']
836
        imgs = soup.find('div', id='comic').find_all('img')
837
        assert all(i['title'] == i['alt'] == title for i in imgs)
838
        return {
839
            'title': title,
840
            'description': desc,
841
            'img': [i['src'] for i in imgs],
842
        }
843
844
845
class ThreeWordPhrase(GenericNavigableComic):
846
    """Class to retrieve Three Word Phrase comics."""
847
    # Also on http://www.threewordphrase.tumblr.com
848
    name = 'threeword'
849
    long_name = 'Three Word Phrase'
850
    url = 'http://threewordphrase.com'
851
    get_url_from_link = join_cls_url_to_href
852
853
    @classmethod
854
    def get_first_comic_link(cls):
855
        """Get link to first comics."""
856
        return get_soup_at_url(cls.url).find('img', src='/firstlink.gif').parent
857
858
    @classmethod
859
    def get_navi_link(cls, last_soup, next_):
860
        """Get link to next or previous comic."""
861
        link = last_soup.find('img', src='/nextlink.gif' if next_ else '/prevlink.gif').parent
862
        return None if link.get('href') is None else link
863
864
    @classmethod
865
    def get_comic_info(cls, soup, link):
866
        """Get information about a particular comics."""
867
        title = soup.find('title')
868
        imgs = [img for img in soup.find_all('img')
869
                if not img['src'].endswith(
870
                    ('link.gif', '32.png', 'twpbookad.jpg',
871
                     'merchad.jpg', 'header.gif', 'tipjar.jpg'))]
872
        return {
873
            'title': title.string if title else None,
874
            'title2': '  '.join(img.get('alt') for img in imgs if img.get('alt')),
875
            'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs],
876
        }
877
878
879
class DeadlyPanel(GenericEmptyComic, GenericNavigableComic):
880
    """Class to retrieve Deadly Panel comics."""
881
    # Also on https://tapastic.com/series/deadlypanel
882
    # Also on https://deadlypanel.tumblr.com
883
    name = 'deadly'
884
    long_name = 'Deadly Panel'
885
    url = 'http://www.deadlypanel.com'
886
    get_first_comic_link = get_a_navi_navifirst
887
    get_navi_link = get_a_navi_comicnavnext_navinext
888
889
    @classmethod
890
    def get_comic_info(cls, soup, link):
891
        """Get information about a particular comics."""
892
        imgs = soup.find('div', id='comic').find_all('img')
893
        assert all(i['alt'] == i['title'] for i in imgs)
894
        return {
895
            'img': [i['src'] for i in imgs],
896
        }
897
898
899
class TheGentlemanArmchair(GenericNavigableComic):
900
    """Class to retrieve The Gentleman Armchair comics."""
901
    name = 'gentlemanarmchair'
902
    long_name = 'The Gentleman Armchair'
903
    url = 'http://thegentlemansarmchair.com'
904
    get_first_comic_link = get_a_navi_navifirst
905
    get_navi_link = get_link_rel_next
906
907
    @classmethod
908
    def get_comic_info(cls, soup, link):
909
        """Get information about a particular comics."""
910
        title = soup.find('h2', class_='post-title').string
911
        author = soup.find("span", class_="post-author").find("a").string
912
        date_str = soup.find('span', class_='post-date').string
913
        day = string_to_date(date_str, "%B %d, %Y")
914
        imgs = soup.find('div', id='comic').find_all('img')
915
        return {
916
            'img': [i['src'] for i in imgs],
917
            'title': title,
918
            'author': author,
919
            'month': day.month,
920
            'year': day.year,
921
            'day': day.day,
922
        }
923
924
925
class ImogenQuest(GenericNavigableComic):
926
    """Class to retrieve Imogen Quest comics."""
927
    # Also on http://imoquest.tumblr.com
928
    name = 'imogen'
929
    long_name = 'Imogen Quest'
930
    url = 'http://imogenquest.net'
931
    get_first_comic_link = get_div_navfirst_a
932
    get_navi_link = get_a_rel_next
933
934 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
935
    def get_comic_info(cls, soup, link):
936
        """Get information about a particular comics."""
937
        title = soup.find('h2', class_='post-title').string
938
        author = soup.find("span", class_="post-author").find("a").string
939
        date_str = soup.find('span', class_='post-date').string
940
        day = string_to_date(date_str, '%B %d, %Y')
941
        imgs = soup.find('div', class_='comicpane').find_all('img')
942
        assert all(i['alt'] == i['title'] for i in imgs)
943
        title2 = imgs[0]['title']
944
        return {
945
            'day': day.day,
946
            'month': day.month,
947
            'year': day.year,
948
            'img': [i['src'] for i in imgs],
949
            'title': title,
950
            'title2': title2,
951
            'author': author,
952
        }
953
954
955
class MyExtraLife(GenericNavigableComic):
956
    """Class to retrieve My Extra Life comics."""
957
    name = 'extralife'
958
    long_name = 'My Extra Life'
959
    url = 'http://www.myextralife.com'
960
    get_navi_link = get_link_rel_next
961
962
    @classmethod
963
    def get_first_comic_link(cls):
964
        """Get link to first comics."""
965
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
966
967
    @classmethod
968
    def get_comic_info(cls, soup, link):
969
        """Get information about a particular comics."""
970
        title = soup.find("h1", class_="comic_title").string
971
        date_str = soup.find("span", class_="comic_date").string
972
        day = string_to_date(date_str, "%B %d, %Y")
973
        imgs = soup.find_all("img", class_="comic")
974
        assert all(i['alt'] == i['title'] == title for i in imgs)
975
        return {
976
            'title': title,
977
            'img': [i['src'] for i in imgs if i["src"]],
978
            'day': day.day,
979
            'month': day.month,
980
            'year': day.year
981
        }
982
983
984
class SaturdayMorningBreakfastCereal(GenericNavigableComic):
985
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
986
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
987
    # Also on http://smbc-comics.tumblr.com
988
    name = 'smbc'
989
    long_name = 'Saturday Morning Breakfast Cereal'
990
    url = 'http://www.smbc-comics.com'
991
    _categories = ('SMBC', )
992
    get_navi_link = get_a_rel_next
993
994
    @classmethod
995
    def get_first_comic_link(cls):
996
        """Get link to first comics."""
997
        return get_soup_at_url(cls.url).find('a', rel='start')
998
999
    @classmethod
1000
    def get_comic_info(cls, soup, link):
1001
        """Get information about a particular comics."""
1002
        image1 = soup.find('img', id='cc-comic')
1003
        image_url1 = image1['src']
1004
        aftercomic = soup.find('div', id='aftercomic')
1005
        image_url2 = aftercomic.find('img')['src'] if aftercomic else ''
1006
        imgs = [image_url1] + ([image_url2] if image_url2 else [])
1007
        date_str = soup.find('div', class_='cc-publishtime').contents[0]
1008
        day = string_to_date(date_str, "%B %d, %Y")
1009
        return {
1010
            'title': image1['title'],
1011
            'img': [urljoin_wrapper(cls.url, i) for i in imgs],
1012
            'day': day.day,
1013
            'month': day.month,
1014
            'year': day.year
1015
        }
1016
1017
1018
class PerryBibleFellowship(GenericListableComic):  # Is now navigable too
1019
    """Class to retrieve Perry Bible Fellowship comics."""
1020
    name = 'pbf'
1021
    long_name = 'Perry Bible Fellowship'
1022
    url = 'http://pbfcomics.com'
1023
    get_url_from_archive_element = join_cls_url_to_href
1024
1025
    @classmethod
1026
    def get_archive_elements(cls):
1027
        soup = get_soup_at_url(cls.url)
1028
        thumbnails = soup.find('div', id='all_thumbnails')
1029
        return reversed(thumbnails.find_all('a'))
1030
1031
    @classmethod
1032
    def get_comic_info(cls, soup, link):
1033
        """Get information about a particular comics."""
1034
        name = soup.find('meta', property='og:title')['content']
1035
        imgs = soup.find_all('meta', property='og:image')
1036
        assert len(imgs) == 1
1037
        return {
1038
            'name': name,
1039
            'img': [i['content'] for i in imgs],
1040
        }
1041
1042
1043
class Mercworks(GenericNavigableComic):
1044
    """Class to retrieve Mercworks comics."""
1045
    # Also on http://mercworks.tumblr.com
1046
    name = 'mercworks'
1047
    long_name = 'Mercworks'
1048
    url = 'http://mercworks.net'
1049
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1050
    get_navi_link = get_link_rel_next
1051
1052 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1053
    def get_comic_info(cls, soup, link):
1054
        """Get information about a particular comics."""
1055
        title = soup.find('meta', property='og:title')['content']
1056
        metadesc = soup.find('meta', property='og:description')
1057
        desc = metadesc['content'] if metadesc else ""
1058
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
1059
        day = string_to_date(date_str, "%Y-%m-%d")
1060
        imgs = soup.find_all('meta', property='og:image')
1061
        return {
1062
            'img': [i['content'] for i in imgs],
1063
            'title': title,
1064
            'desc': desc,
1065
            'day': day.day,
1066
            'month': day.month,
1067
            'year': day.year
1068
        }
1069
1070
1071
class BerkeleyMews(GenericListableComic):
1072
    """Class to retrieve Berkeley Mews comics."""
1073
    # Also on http://mews.tumblr.com
1074
    # Also on http://www.gocomics.com/berkeley-mews
1075
    name = 'berkeley'
1076
    long_name = 'Berkeley Mews'
1077
    url = 'http://www.berkeleymews.com'
1078
    _categories = ('BERKELEY', )
1079
    get_url_from_archive_element = get_href
1080
    comic_num_re = re.compile('%s/\\?p=([0-9]*)$' % url)
1081
1082
    @classmethod
1083
    def get_archive_elements(cls):
1084
        archive_url = urljoin_wrapper(cls.url, "?page_id=2")
1085
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_num_re))
1086
1087
    @classmethod
1088
    def get_comic_info(cls, soup, link):
1089
        """Get information about a particular comics."""
1090
        comic_date_re = re.compile('.*/([0-9]*)-([0-9]*)-([0-9]*)-.*')
1091
        url = cls.get_url_from_archive_element(link)
1092
        num = int(cls.comic_num_re.match(url).groups()[0])
1093
        img = soup.find('div', id='comic').find('img')
1094
        assert all(i['alt'] == i['title'] for i in [img])
1095
        title2 = img['title']
1096
        img_url = img['src']
1097
        year, month, day = [int(s) for s in comic_date_re.match(img_url).groups()]
1098
        return {
1099
            'num': num,
1100
            'title': link.string,
1101
            'title2': title2,
1102
            'img': [img_url],
1103
            'year': year,
1104
            'month': month,
1105
            'day': day,
1106
        }
1107
1108
1109
class GenericBouletCorp(GenericNavigableComic):
1110
    """Generic class to retrieve BouletCorp comics in different languages."""
1111
    # Also on https://bouletcorp.tumblr.com
1112
    _categories = ('BOULET', )
1113
    get_navi_link = get_link_rel_next
1114
1115
    @classmethod
1116
    def get_first_comic_link(cls):
1117
        """Get link to first comics."""
1118
        return get_soup_at_url(cls.url).find('div', id='centered_nav').find_all('a')[0]
1119
1120
    @classmethod
1121
    def get_comic_info(cls, soup, link):
1122
        """Get information about a particular comics."""
1123
        url = cls.get_url_from_link(link)
1124
        date_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/' % cls.url)
1125
        year, month, day = [int(s) for s in date_re.match(url).groups()]
1126
        imgs = soup.find('div', id='notes').find('div', class_='storycontent').find_all('img')
1127
        texts = '  '.join(t for t in (i.get('title') for i in imgs) if t)
1128
        title = soup.find('title').string
1129
        return {
1130
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs if i.get('src') is not None],
1131
            'title': title,
1132
            'texts': texts,
1133
            'year': year,
1134
            'month': month,
1135
            'day': day,
1136
        }
1137
1138
1139
class BouletCorp(GenericBouletCorp):
1140
    """Class to retrieve BouletCorp comics."""
1141
    name = 'boulet'
1142
    long_name = 'Boulet Corp'
1143
    url = 'http://www.bouletcorp.com'
1144
    _categories = ('FRANCAIS', )
1145
1146
1147
class BouletCorpEn(GenericBouletCorp):
1148
    """Class to retrieve EnglishBouletCorp comics."""
1149
    name = 'boulet_en'
1150
    long_name = 'Boulet Corp English'
1151
    url = 'http://english.bouletcorp.com'
1152
1153
1154
class AmazingSuperPowers(GenericNavigableComic):
1155
    """Class to retrieve Amazing Super Powers comics."""
1156
    name = 'asp'
1157
    long_name = 'Amazing Super Powers'
1158
    url = 'http://www.amazingsuperpowers.com'
1159
    get_first_comic_link = get_a_navi_navifirst
1160
    get_navi_link = get_a_navi_navinext
1161
1162
    @classmethod
1163
    def get_comic_info(cls, soup, link):
1164
        """Get information about a particular comics."""
1165
        author = soup.find("span", class_="post-author").find("a").string
1166
        date_str = soup.find('span', class_='post-date').string
1167
        day = string_to_date(date_str, "%B %d, %Y")
1168
        imgs = soup.find('div', id='comic').find_all('img')
1169
        title = ' '.join(i['title'] for i in imgs)
1170
        assert all(i['alt'] == i['title'] for i in imgs)
1171
        return {
1172
            'title': title,
1173
            'author': author,
1174
            'img': [img['src'] for img in imgs],
1175
            'day': day.day,
1176
            'month': day.month,
1177
            'year': day.year
1178
        }
1179
1180
1181
class ToonHole(GenericNavigableComic):
1182
    """Class to retrieve Toon Holes comics."""
1183
    # Also on http://tapastic.com/series/TOONHOLE
1184
    name = 'toonhole'
1185
    long_name = 'Toon Hole'
1186
    url = 'http://www.toonhole.com'
1187
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1188
    get_navi_link = get_link_rel_next
1189
1190
    @classmethod
1191
    def get_comic_info(cls, soup, link):
1192
        """Get information about a particular comics."""
1193
        short_url = soup.find('link', rel='shortlink')['href']
1194
        date_str = soup.find('div', class_='entry-meta').contents[0].strip()
1195
        day = string_to_date(date_str, "%B %d, %Y")
1196
        imgs = soup.find('div', id='comic').find_all('img')
1197
        if imgs:
1198
            img = imgs[0]
1199
            title = img['alt']
1200
            assert img['title'] == title
1201
        else:
1202
            title = ""
1203
        return {
1204
            'short_url': short_url,
1205
            'title': title,
1206
            'month': day.month,
1207
            'year': day.year,
1208
            'day': day.day,
1209
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs],
1210
        }
1211
1212
1213
class Channelate(GenericNavigableComic):
1214
    """Class to retrieve Channelate comics."""
1215
    name = 'channelate'
1216
    long_name = 'Channelate'
1217
    url = 'http://www.channelate.com'
1218
    get_first_comic_link = get_div_navfirst_a
1219
    get_navi_link = get_link_rel_next
1220
    get_url_from_link = join_cls_url_to_href
1221
1222
    @classmethod
1223
    def get_comic_info(cls, soup, link):
1224
        """Get information about a particular comics."""
1225
        author = soup.find("span", class_="post-author").find("a").string
1226
        date_str = soup.find('span', class_='post-date').string
1227
        day = string_to_date(date_str, '%Y/%m/%d')
1228
        title = soup.find('meta', property='og:title')['content']
1229
        post = soup.find('div', id='comic')
1230
        imgs = post.find_all('img') if post else []
1231
        extra_url = None
1232
        extra_div = soup.find('div', id='extrapanelbutton')
1233
        if extra_div:
1234
            extra_url = extra_div.find('a')['href']
1235
            extra_soup = get_soup_at_url(extra_url)
1236
            extra_imgs = extra_soup.find_all('img', class_='extrapanelimage')
1237
            imgs.extend(extra_imgs)
1238
        return {
1239
            'url_extra': extra_url,
1240
            'title': title,
1241
            'author': author,
1242
            'month': day.month,
1243
            'year': day.year,
1244
            'day': day.day,
1245
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1246
        }
1247
1248
1249
class CyanideAndHappiness(GenericNavigableComic):
1250
    """Class to retrieve Cyanide And Happiness comics."""
1251
    name = 'cyanide'
1252
    long_name = 'Cyanide and Happiness'
1253
    url = 'http://explosm.net'
1254
    _categories = ('NSFW', )
1255
    get_url_from_link = join_cls_url_to_href
1256
1257
    @classmethod
1258
    def get_first_comic_link(cls):
1259
        """Get link to first comics."""
1260
        return get_soup_at_url(cls.url).find('a', title='Oldest comic')
1261
1262
    @classmethod
1263
    def get_navi_link(cls, last_soup, next_):
1264
        """Get link to next or previous comic."""
1265
        link = last_soup.find('a', class_='next-comic' if next_ else 'previous-comic ')
1266
        return None if link.get('href') is None else link
1267
1268
    @classmethod
1269
    def get_comic_info(cls, soup, link):
1270
        """Get information about a particular comics."""
1271
        url2 = soup.find('meta', property='og:url')['content']
1272
        num = int(url2.split('/')[-2])
1273
        date_str = soup.find('h3').find('a').string
1274
        day = string_to_date(date_str, '%Y.%m.%d')
1275
        author = soup.find('small', class_="author-credit-name").string
1276
        assert author.startswith('by ')
1277
        author = author[3:]
1278
        imgs = soup.find_all('img', id='main-comic')
1279
        return {
1280
            'num': num,
1281
            'author': author,
1282
            'month': day.month,
1283
            'year': day.year,
1284
            'day': day.day,
1285
            'prefix': '%d-' % num,
1286
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i['src'])) for i in imgs]
1287
        }
1288
1289
1290
class MrLovenstein(GenericComic):
1291
    """Class to retrieve Mr Lovenstein comics."""
1292
    # Also on https://tapastic.com/series/MrLovenstein
1293
    name = 'mrlovenstein'
1294
    long_name = 'Mr. Lovenstein'
1295
    url = 'http://www.mrlovenstein.com'
1296
1297
    @classmethod
1298
    def get_next_comic(cls, last_comic):
1299
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1300
        # TODO: more info from http://www.mrlovenstein.com/archive
1301
        comic_num_re = re.compile('^/comic/([0-9]*)$')
1302
        nums = [int(comic_num_re.match(link['href']).groups()[0])
1303
                for link in get_soup_at_url(cls.url).find_all('a', href=comic_num_re)]
1304
        first, last = min(nums), max(nums)
1305
        if last_comic:
1306
            first = last_comic['num'] + 1
1307
        for num in range(first, last + 1):
1308
            url = urljoin_wrapper(cls.url, '/comic/%d' % num)
1309
            soup = get_soup_at_url(url)
1310
            imgs = list(
1311
                reversed(soup.find_all('img', src=re.compile('^/images/comics/'))))
1312
            description = soup.find('meta', attrs={'name': 'description'})['content']
1313
            yield {
1314
                'url': url,
1315
                'num': num,
1316
                'texts': '  '.join(t for t in (i.get('title') for i in imgs) if t),
1317
                'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1318
                'description': description,
1319
            }
1320
1321
1322
class DinosaurComics(GenericListableComic):
1323
    """Class to retrieve Dinosaur Comics comics."""
1324
    name = 'dinosaur'
1325
    long_name = 'Dinosaur Comics'
1326
    url = 'http://www.qwantz.com'
1327
    get_url_from_archive_element = get_href
1328
    comic_link_re = re.compile('^%s/index.php\\?comic=([0-9]*)$' % url)
1329
1330
    @classmethod
1331
    def get_archive_elements(cls):
1332
        archive_url = urljoin_wrapper(cls.url, 'archive.php')
1333
        # first link is random -> skip it
1334
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re)[1:])
1335
1336
    @classmethod
1337
    def get_comic_info(cls, soup, link):
1338
        """Get information about a particular comics."""
1339
        url = cls.get_url_from_archive_element(link)
1340
        num = int(cls.comic_link_re.match(url).groups()[0])
1341
        date_str = link.string
1342
        text = link.next_sibling.string
1343
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1344
        comic_img_re = re.compile('^%s/comics/' % cls.url)
1345
        img = soup.find('img', src=comic_img_re)
1346
        return {
1347
            'month': day.month,
1348
            'year': day.year,
1349
            'day': day.day,
1350
            'img': [img.get('src')],
1351
            'title': img.get('title'),
1352
            'text': text,
1353
            'num': num,
1354
        }
1355
1356
1357
class ButterSafe(GenericListableComic):
1358
    """Class to retrieve Butter Safe comics."""
1359
    name = 'butter'
1360
    long_name = 'ButterSafe'
1361 View Code Duplication
    url = 'http://buttersafe.com'
1362
    get_url_from_archive_element = get_href
1363
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)
1364
1365
    @classmethod
1366
    def get_archive_elements(cls):
1367
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1368
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
1369
1370
    @classmethod
1371
    def get_comic_info(cls, soup, link):
1372
        """Get information about a particular comics."""
1373
        url = cls.get_url_from_archive_element(link)
1374
        title = link.string
1375
        year, month, day = [int(s) for s in cls.comic_link_re.match(url).groups()]
1376
        img = soup.find('div', id='comic').find('img')
1377
        assert img['alt'] == title
1378
        return {
1379
            'title': title,
1380
            'day': day,
1381
            'month': month,
1382
            'year': year,
1383
            'img': [img['src']],
1384
        }
1385
1386
1387
class CalvinAndHobbes(GenericComic):
1388
    """Class to retrieve Calvin and Hobbes comics."""
1389
    # Also on http://www.gocomics.com/calvinandhobbes/
1390
    name = 'calvin'
1391
    long_name = 'Calvin and Hobbes'
1392
    # This is not through any official webpage but eh...
1393
    url = 'http://marcel-oehler.marcellosendos.ch/comics/ch/'
1394
1395
    @classmethod
1396
    def get_next_comic(cls, last_comic):
1397
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1398
        last_date = get_date_for_comic(
1399
            last_comic) if last_comic else date(1985, 11, 1)
1400
        link_re = re.compile('^([0-9]*)/([0-9]*)/')
1401
        img_re = re.compile('')
1402
        for link in get_soup_at_url(cls.url).find_all('a', href=link_re):
1403
            url = link['href']
1404
            year, month = link_re.match(url).groups()
1405
            if date(int(year), int(month), 1) + timedelta(days=31) >= last_date:
1406
                img_re = re.compile('^%s%s([0-9]*)' % (year, month))
1407
                month_url = urljoin_wrapper(cls.url, url)
1408
                for img in get_soup_at_url(month_url).find_all('img', src=img_re):
1409
                    img_src = img['src']
1410
                    day = int(img_re.match(img_src).groups()[0])
1411
                    comic_date = date(int(year), int(month), day)
1412
                    if comic_date > last_date:
1413
                        yield {
1414
                            'url': month_url,
1415
                            'year': int(year),
1416
                            'month': int(month),
1417
                            'day': int(day),
1418
                            'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)],
1419
                        }
1420
                        last_date = comic_date
1421
1422
1423
class AbstruseGoose(GenericListableComic):
1424
    """Class to retrieve AbstruseGoose Comics."""
1425
    name = 'abstruse'
1426
    long_name = 'Abstruse Goose'
1427 View Code Duplication
    url = 'http://abstrusegoose.com'
1428
    get_url_from_archive_element = get_href
1429
    comic_url_re = re.compile('^%s/([0-9]*)$' % url)
1430
    comic_img_re = re.compile('^%s/strips/.*' % url)
1431
1432
    @classmethod
1433
    def get_archive_elements(cls):
1434
        archive_url = urljoin_wrapper(cls.url, 'archive')
1435
        return get_soup_at_url(archive_url).find_all('a', href=cls.comic_url_re)
1436
1437
    @classmethod
1438
    def get_comic_info(cls, soup, archive_elt):
1439
        comic_url = cls.get_url_from_archive_element(archive_elt)
1440
        num = int(cls.comic_url_re.match(comic_url).groups()[0])
1441
        return {
1442
            'num': num,
1443
            'title': archive_elt.string,
1444
            'img': [soup.find('img', src=cls.comic_img_re)['src']]
1445
        }
1446
1447
1448
class PhDComics(GenericNavigableComic):
1449
    """Class to retrieve PHD Comics."""
1450
    name = 'phd'
1451
    long_name = 'PhD Comics'
1452
    url = 'http://phdcomics.com/comics/archive.php'
1453
1454
    @classmethod
1455
    def get_first_comic_link(cls):
1456
        """Get link to first comics."""
1457
        soup = get_soup_at_url(cls.url)
1458
        img = soup.find('img', src='http://phdcomics.com/comics/images/first_button.gif')
1459
        return None if img is None else img.parent
1460
1461
    @classmethod
1462
    def get_navi_link(cls, last_soup, next_):
1463
        """Get link to next or previous comic."""
1464
        url = 'http://phdcomics.com/comics/images/%s_button.gif' % ('next' if next_ else 'prev')
1465
        img = last_soup.find('img', src=url)
1466
        return None if img is None else img.parent
1467
1468
    @classmethod
1469
    def get_comic_info(cls, soup, link):
1470
        """Get information about a particular comics."""
1471
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
1472
        imgs = soup.find_all('meta', property='og:image')
1473
        return {
1474
            'img': [i['content'] for i in imgs],
1475
            'title': title,
1476
        }
1477
1478
1479
class Octopuns(GenericEmptyComic, GenericNavigableComic):
1480
    """Class to retrieve Octopuns comics."""
1481
    # Also on http://octopuns.tumblr.com
1482
    name = 'octopuns'
1483
    long_name = 'Octopuns'
1484
    url = 'http://www.octopuns.net'
1485
1486 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1487
    def get_first_comic_link(cls):
1488
        """Get link to first comics."""
1489
        return get_soup_at_url(cls.url).find('img', src=re.compile('.*/First.png')).parent
1490
1491
    @classmethod
1492
    def get_navi_link(cls, last_soup, next_):
1493
        """Get link to next or previous comic."""
1494
        link = last_soup.find('img', src=re.compile('.*/Next.png' if next_ else '.*/Back.png')).parent
1495
        return None if link.get('href') is None else link
1496
1497
    @classmethod
1498
    def get_comic_info(cls, soup, link):
1499
        """Get information about a particular comics."""
1500
        title = soup.find('h3', class_='post-title entry-title').string
1501
        date_str = soup.find('h2', class_='date-header').string
1502
        day = string_to_date(date_str, "%A, %B %d, %Y")
1503
        imgs = soup.find_all('link', rel='image_src')
1504
        return {
1505
            'img': [i['href'] for i in imgs],
1506
            'title': title,
1507
            'day': day.day,
1508
            'month': day.month,
1509
            'year': day.year,
1510
        }
1511
1512
1513
class Quarktees(GenericNavigableComic):
1514
    """Class to retrieve the Quarktees comics."""
1515
    name = 'quarktees'
1516
    long_name = 'Quarktees'
1517
    url = 'http://www.quarktees.com/blogs/news'
1518
    get_url_from_link = join_cls_url_to_href
1519
    get_first_comic_link = simulate_first_link
1520
    first_url = 'http://www.quarktees.com/blogs/news/12486621-coming-soon'
1521
1522
    @classmethod
1523
    def get_navi_link(cls, last_soup, next_):
1524
        """Get link to next or previous comic."""
1525
        return last_soup.find('a', id='article-next' if next_ else 'article-prev')
1526
1527
    @classmethod
1528
    def get_comic_info(cls, soup, link):
1529
        """Get information about a particular comics."""
1530
        title = soup.find('meta', property='og:title')['content']
1531
        article = soup.find('div', class_='single-article')
1532
        imgs = article.find_all('img')
1533
        return {
1534
            'title': title,
1535
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1536
        }
1537
1538
1539
class OverCompensating(GenericNavigableComic):
1540
    """Class to retrieve the Over Compensating comics."""
1541
    name = 'compensating'
1542
    long_name = 'Over Compensating'
1543
    url = 'http://www.overcompensating.com'
1544
    get_url_from_link = join_cls_url_to_href
1545
1546
    @classmethod
1547
    def get_first_comic_link(cls):
1548
        """Get link to first comics."""
1549
        return get_soup_at_url(cls.url).find('a', href=re.compile('comic=1$'))
1550
1551
    @classmethod
1552
    def get_navi_link(cls, last_soup, next_):
1553
        """Get link to next or previous comic."""
1554
        return last_soup.find('a', title='next comic' if next_ else 'go back already')
1555
1556
    @classmethod
1557
    def get_comic_info(cls, soup, link):
1558
        """Get information about a particular comics."""
1559
        img_src_re = re.compile('^/oc/comics/.*')
1560
        comic_num_re = re.compile('.*comic=([0-9]*)$')
1561
        comic_url = cls.get_url_from_link(link)
1562
        num = int(comic_num_re.match(comic_url).groups()[0])
1563
        img = soup.find('img', src=img_src_re)
1564
        return {
1565
            'num': num,
1566
            'img': [urljoin_wrapper(comic_url, img['src'])],
1567
            'title': img.get('title')
1568
        }
1569
1570
1571
class Oglaf(GenericNavigableComic):
1572
    """Class to retrieve Oglaf comics."""
1573
    name = 'oglaf'
1574
    long_name = 'Oglaf [NSFW]'
1575
    url = 'http://oglaf.com'
1576
    _categories = ('NSFW', )
1577
    get_url_from_link = join_cls_url_to_href
1578
1579
    @classmethod
1580
    def get_first_comic_link(cls):
1581
        """Get link to first comics."""
1582
        return get_soup_at_url(cls.url).find("div", id="st").parent
1583
1584
    @classmethod
1585
    def get_navi_link(cls, last_soup, next_):
1586
        """Get link to next or previous comic."""
1587
        div = last_soup.find("div", id="nx" if next_ else "pvs")
1588
        return div.parent if div else None
1589
1590
    @classmethod
1591
    def get_comic_info(cls, soup, link):
1592
        """Get information about a particular comics."""
1593
        title = soup.find('title').string
1594
        title_imgs = soup.find('div', id='tt').find_all('img')
1595
        assert len(title_imgs) == 1
1596
        strip_imgs = soup.find_all('img', id='strip')
1597
        assert len(strip_imgs) == 1
1598
        imgs = title_imgs + strip_imgs
1599
        desc = ' '.join(i['title'] for i in imgs)
1600
        return {
1601
            'title': title,
1602
            'img': [i['src'] for i in imgs],
1603
            'description': desc,
1604
        }
1605
1606
1607
class ScandinaviaAndTheWorld(GenericNavigableComic):
1608
    """Class to retrieve Scandinavia And The World comics."""
1609
    name = 'satw'
1610
    long_name = 'Scandinavia And The World'
1611
    url = 'http://satwcomic.com'
1612
    get_first_comic_link = simulate_first_link
1613
    first_url = 'http://satwcomic.com/sweden-denmark-and-norway'
1614
1615
    @classmethod
1616
    def get_navi_link(cls, last_soup, next_):
1617
        """Get link to next or previous comic."""
1618
        return last_soup.find('a', accesskey='n' if next_ else 'p')
1619
1620
    @classmethod
1621
    def get_comic_info(cls, soup, link):
1622
        """Get information about a particular comics."""
1623
        title = soup.find('meta', attrs={'name': 'twitter:label1'})['content']
1624
        desc = soup.find('meta', property='og:description')['content']
1625
        imgs = soup.find_all('img', itemprop="image")
1626
        return {
1627
            'title': title,
1628
            'description': desc,
1629
            'img': [i['src'] for i in imgs],
1630
        }
1631
1632
1633
class SomethingOfThatIlk(GenericEmptyComic):  # Does not exist anymore
1634
    """Class to retrieve the Something Of That Ilk comics."""
1635
    name = 'somethingofthatilk'
1636
    long_name = 'Something Of That Ilk'
1637
    url = 'http://www.somethingofthatilk.com'
1638
1639
1640
class InfiniteMonkeyBusiness(GenericNavigableComic):
1641
    """Generic class to retrieve InfiniteMonkeyBusiness comics."""
1642
    name = 'monkey'
1643
    long_name = 'Infinite Monkey Business'
1644
    url = 'http://infinitemonkeybusiness.net'
1645
    get_navi_link = get_a_navi_comicnavnext_navinext
1646
    get_first_comic_link = simulate_first_link
1647
    first_url = 'http://infinitemonkeybusiness.net/comic/pillory/'
1648
1649
    @classmethod
1650
    def get_comic_info(cls, soup, link):
1651
        """Get information about a particular comics."""
1652
        title = soup.find('meta', property='og:title')['content']
1653
        imgs = soup.find('div', id='comic').find_all('img')
1654
        return {
1655
            'title': title,
1656
            'img': [i['src'] for i in imgs],
1657
        }
1658
1659
1660
class Wondermark(GenericListableComic):
1661
    """Class to retrieve the Wondermark comics."""
1662
    name = 'wondermark'
1663
    long_name = 'Wondermark'
1664
    url = 'http://wondermark.com'
1665
    get_url_from_archive_element = get_href
1666
1667
    @classmethod
1668
    def get_archive_elements(cls):
1669
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1670
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1671
1672
    @classmethod
1673
    def get_comic_info(cls, soup, link):
1674
        """Get information about a particular comics."""
1675
        date_str = soup.find('div', class_='postdate').find('em').string
1676
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1677
        div = soup.find('div', id='comic')
1678
        if div:
1679
            img = div.find('img')
1680
            img_src = [img['src']]
1681
            alt = img['alt']
1682
            assert alt == img['title']
1683
            title = soup.find('meta', property='og:title')['content']
1684
        else:
1685
            img_src = []
1686
            alt = ''
1687
            title = ''
1688
        return {
1689
            'month': day.month,
1690
            'year': day.year,
1691
            'day': day.day,
1692
            'img': img_src,
1693
            'title': title,
1694
            'alt': alt,
1695
            'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')),
1696
        }
1697
1698
1699
class WarehouseComic(GenericNavigableComic):
1700
    """Class to retrieve Warehouse Comic comics."""
1701
    name = 'warehouse'
1702
    long_name = 'Warehouse Comic'
1703
    url = 'http://warehousecomic.com'
1704
    get_first_comic_link = get_a_navi_navifirst
1705
    get_navi_link = get_link_rel_next
1706
1707
    @classmethod
1708
    def get_comic_info(cls, soup, link):
1709
        """Get information about a particular comics."""
1710
        title = soup.find('h2', class_='post-title').string
1711
        date_str = soup.find('span', class_='post-date').string
1712
        day = string_to_date(date_str, "%B %d, %Y")
1713
        imgs = soup.find('div', id='comic').find_all('img')
1714
        return {
1715
            'img': [i['src'] for i in imgs],
1716
            'title': title,
1717
            'day': day.day,
1718
            'month': day.month,
1719
            'year': day.year,
1720
        }
1721
1722
1723
class JustSayEh(GenericNavigableComic):
1724
    """Class to retrieve Just Say Eh comics."""
1725
    # Also on http//tapastic.com/series/Just-Say-Eh
1726
    name = 'justsayeh'
1727
    long_name = 'Just Say Eh'
1728
    url = 'http://www.justsayeh.com'
1729
    get_first_comic_link = get_a_navi_navifirst
1730
    get_navi_link = get_a_navi_comicnavnext_navinext
1731
1732
    @classmethod
1733
    def get_comic_info(cls, soup, link):
1734
        """Get information about a particular comics."""
1735
        title = soup.find('h2', class_='post-title').string
1736
        imgs = soup.find("div", id="comic").find_all("img")
1737
        assert all(i['alt'] == i['title'] for i in imgs)
1738
        alt = imgs[0]['alt']
1739
        return {
1740
            'img': [i['src'] for i in imgs],
1741
            'title': title,
1742
            'alt': alt,
1743
        }
1744
1745
1746
class MouseBearComedy(GenericNavigableComic):
1747
    """Class to retrieve Mouse Bear Comedy comics."""
1748
    # Also on http://mousebearcomedy.tumblr.com
1749
    name = 'mousebear'
1750
    long_name = 'Mouse Bear Comedy'
1751
    url = 'http://www.mousebearcomedy.com'
1752
    get_first_comic_link = get_a_navi_navifirst
1753
    get_navi_link = get_a_navi_comicnavnext_navinext
1754
1755
    @classmethod
1756
    def get_comic_info(cls, soup, link):
1757
        """Get information about a particular comics."""
1758
        title = soup.find('h2', class_='post-title').string
1759
        author = soup.find("span", class_="post-author").find("a").string
1760
        date_str = soup.find("span", class_="post-date").string
1761
        day = string_to_date(date_str, '%B %d, %Y')
1762
        imgs = soup.find("div", id="comic").find_all("img")
1763
        assert all(i['alt'] == i['title'] == title for i in imgs)
1764
        return {
1765
            'day': day.day,
1766
            'month': day.month,
1767
            'year': day.year,
1768
            'img': [i['src'] for i in imgs],
1769
            'title': title,
1770
            'author': author,
1771
        }
1772
1773
1774
class BigFootJustice(GenericNavigableComic):
1775
    """Class to retrieve Big Foot Justice comics."""
1776
    # Also on http://tapastic.com/series/bigfoot-justice
1777
    name = 'bigfoot'
1778 View Code Duplication
    long_name = 'Big Foot Justice'
1779
    url = 'http://bigfootjustice.com'
1780
    get_first_comic_link = get_a_navi_navifirst
1781
    get_navi_link = get_a_navi_comicnavnext_navinext
1782
1783
    @classmethod
1784
    def get_comic_info(cls, soup, link):
1785
        """Get information about a particular comics."""
1786
        imgs = soup.find('div', id='comic').find_all('img')
1787
        assert all(i['title'] == i['alt'] for i in imgs)
1788
        title = ' '.join(i['title'] for i in imgs)
1789
        return {
1790
            'img': [i['src'] for i in imgs],
1791
            'title': title,
1792
        }
1793
1794
1795
class RespawnComic(GenericNavigableComic):
1796
    """Class to retrieve Respawn Comic."""
1797
    # Also on https://respawncomic.tumblr.com
1798
    name = 'respawn'
1799
    long_name = 'Respawn Comic'
1800
    url = 'http://respawncomic.com '
1801
    _categories = ('RESPAWN', )
1802
    get_navi_link = get_a_rel_next
1803
    get_first_comic_link = simulate_first_link
1804
    first_url = 'http://respawncomic.com/comic/c0001/'
1805
1806 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1807
    def get_comic_info(cls, soup, link):
1808
        """Get information about a particular comics."""
1809
        title = soup.find('meta', property='og:title')['content']
1810
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1811
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1812
        date_str = date_str[:10]
1813
        day = string_to_date(date_str, "%Y-%m-%d")
1814
        imgs = soup.find_all('meta', property='og:image')
1815
        skip_imgs = {
1816
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1817
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1818
        }
1819
        return {
1820
            'title': title,
1821
            'author': author,
1822
            'day': day.day,
1823
            'month': day.month,
1824
            'year': day.year,
1825
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1826
        }
1827
1828
1829
class SafelyEndangered(GenericNavigableComic):
1830
    """Class to retrieve Safely Endangered comics."""
1831
    # Also on http://tumblr.safelyendangered.com
1832
    name = 'endangered'
1833
    long_name = 'Safely Endangered'
1834
    url = 'http://www.safelyendangered.com'
1835
    get_navi_link = get_link_rel_next
1836
    get_first_comic_link = simulate_first_link
1837
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1838
1839
    @classmethod
1840
    def get_comic_info(cls, soup, link):
1841
        """Get information about a particular comics."""
1842
        title = soup.find('h2', class_='post-title').string
1843
        date_str = soup.find('span', class_='post-date').string
1844
        day = string_to_date(date_str, '%B %d, %Y')
1845
        imgs = soup.find('div', id='comic').find_all('img')
1846
        alt = imgs[0]['alt']
1847
        assert all(i['alt'] == i['title'] for i in imgs)
1848
        return {
1849
            'day': day.day,
1850
            'month': day.month,
1851
            'year': day.year,
1852
            'img': [i['src'] for i in imgs],
1853
            'title': title,
1854
            'alt': alt,
1855
        }
1856
1857
1858
class PicturesInBoxes(GenericNavigableComic):
1859
    """Class to retrieve Pictures In Boxes comics."""
1860
    # Also on https://picturesinboxescomic.tumblr.com
1861
    name = 'picturesinboxes'
1862
    long_name = 'Pictures in Boxes'
1863
    url = 'http://www.picturesinboxes.com'
1864
    get_navi_link = get_a_navi_navinext
1865
    get_first_comic_link = simulate_first_link
1866
    first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'
1867
1868
    @classmethod
1869
    def get_comic_info(cls, soup, link):
1870
        """Get information about a particular comics."""
1871
        title = soup.find('h2', class_='post-title').string
1872
        author = soup.find("span", class_="post-author").find("a").string
1873
        date_str = soup.find('span', class_='post-date').string
1874
        day = string_to_date(date_str, '%B %d, %Y')
1875
        imgs = soup.find('div', class_='comicpane').find_all('img')
1876
        assert imgs
1877
        assert all(i['title'] == i['alt'] == title for i in imgs)
1878
        return {
1879
            'day': day.day,
1880
            'month': day.month,
1881
            'year': day.year,
1882
            'img': [i['src'] for i in imgs],
1883
            'title': title,
1884
            'author': author,
1885
        }
1886
1887
1888
class Penmen(GenericNavigableComic):
1889
    """Class to retrieve Penmen comics."""
1890
    name = 'penmen'
1891
    long_name = 'Penmen'
1892
    url = 'http://penmen.com'
1893
    get_navi_link = get_link_rel_next
1894
    get_first_comic_link = simulate_first_link
1895
    first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/'
1896
1897
    @classmethod
1898
    def get_comic_info(cls, soup, link):
1899
        """Get information about a particular comics."""
1900
        title = soup.find('title').string
1901
        imgs = soup.find('div', class_='entry-content').find_all('img')
1902
        short_url = soup.find('link', rel='shortlink')['href']
1903
        tags = ' '.join(t.string for t in soup.find_all('a', rel='tag'))
1904
        date_str = soup.find('time')['datetime'][:10]
1905
        day = string_to_date(date_str, "%Y-%m-%d")
1906
        return {
1907
            'title': title,
1908
            'short_url': short_url,
1909
            'img': [i['src'] for i in imgs],
1910
            'tags': tags,
1911
            'month': day.month,
1912
            'year': day.year,
1913
            'day': day.day,
1914
        }
1915
1916
1917
class TheDoghouseDiaries(GenericNavigableComic):
1918
    """Class to retrieve The Dog House Diaries comics."""
1919
    name = 'doghouse'
1920
    long_name = 'The Dog House Diaries'
1921
    url = 'http://thedoghousediaries.com'
1922
1923
    @classmethod
1924
    def get_first_comic_link(cls):
1925
        """Get link to first comics."""
1926
        return get_soup_at_url(cls.url).find('a', id='firstlink')
1927
1928
    @classmethod
1929
    def get_navi_link(cls, last_soup, next_):
1930
        """Get link to next or previous comic."""
1931
        return last_soup.find('a', id='nextlink' if next_ else 'previouslink')
1932
1933
    @classmethod
1934
    def get_comic_info(cls, soup, link):
1935
        """Get information about a particular comics."""
1936
        comic_img_re = re.compile('^dhdcomics/.*')
1937
        img = soup.find('img', src=comic_img_re)
1938
        comic_url = cls.get_url_from_link(link)
1939
        return {
1940
            'title': soup.find('h2', id='titleheader').string,
1941
            'title2': soup.find('div', id='subtext').string,
1942
            'alt': img.get('title'),
1943
            'img': [urljoin_wrapper(comic_url, img['src'].strip())],
1944
            'num': int(comic_url.split('/')[-1]),
1945
        }
1946
1947
1948
class InvisibleBread(GenericListableComic):
1949
    """Class to retrieve Invisible Bread comics."""
1950
    # Also on http://www.gocomics.com/invisible-bread
1951
    name = 'invisiblebread'
1952
    long_name = 'Invisible Bread'
1953
    url = 'http://invisiblebread.com'
1954
1955
    @classmethod
1956
    def get_archive_elements(cls):
1957
        archive_url = urljoin_wrapper(cls.url, 'archives/')
1958
        return reversed(get_soup_at_url(archive_url).find_all('td', class_='archive-title'))
1959
1960
    @classmethod
1961
    def get_url_from_archive_element(cls, td):
1962
        return td.find('a')['href']
1963
1964
    @classmethod
1965
    def get_comic_info(cls, soup, td):
1966
        """Get information about a particular comics."""
1967
        url = cls.get_url_from_archive_element(td)
1968 View Code Duplication
        title = td.find('a').string
1969
        month_and_day = td.previous_sibling.string
1970
        link_re = re.compile('^%s/([0-9]+)/' % cls.url)
1971
        year = link_re.match(url).groups()[0]
1972
        date_str = month_and_day + ' ' + year
1973
        day = string_to_date(date_str, '%b %d %Y')
1974
        imgs = [soup.find('div', id='comic').find('img')]
1975
        assert len(imgs) == 1
1976
        assert all(i['title'] == i['alt'] == title for i in imgs)
1977
        return {
1978
            'month': day.month,
1979
            'year': day.year,
1980
            'day': day.day,
1981
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1982
            'title': title,
1983
        }
1984
1985
1986
class DiscoBleach(GenericEmptyComic):  # Does not work anymore
1987
    """Class to retrieve Disco Bleach Comics."""
1988
    name = 'discobleach'
1989
    long_name = 'Disco Bleach'
1990
    url = 'http://discobleach.com'
1991
1992
1993
class TubeyToons(GenericEmptyComic):  # Does not work anymore
1994
    """Class to retrieve TubeyToons comics."""
1995
    # Also on http://tapastic.com/series/Tubey-Toons
1996
    # Also on https://tubeytoons.tumblr.com
1997
    name = 'tubeytoons'
1998
    long_name = 'Tubey Toons'
1999
    url = 'http://tubeytoons.com'
2000
    _categories = ('TUNEYTOONS', )
2001
2002
2003
class CompletelySeriousComics(GenericNavigableComic):
2004
    """Class to retrieve Completely Serious comics."""
2005
    name = 'completelyserious'
2006
    long_name = 'Completely Serious Comics'
2007
    url = 'http://completelyseriouscomics.com'
2008
    get_first_comic_link = get_a_navi_navifirst
2009
    get_navi_link = get_a_navi_navinext
2010
2011 View Code Duplication
    @classmethod
2012
    def get_comic_info(cls, soup, link):
2013
        """Get information about a particular comics."""
2014
        title = soup.find('h2', class_='post-title').string
2015
        author = soup.find('span', class_='post-author').contents[1].string
2016
        date_str = soup.find('span', class_='post-date').string
2017
        day = string_to_date(date_str, '%B %d, %Y')
2018
        imgs = soup.find('div', class_='comicpane').find_all('img')
2019
        assert imgs
2020
        alt = imgs[0]['title']
2021
        assert all(i['title'] == i['alt'] == alt for i in imgs)
2022
        return {
2023
            'month': day.month,
2024
            'year': day.year,
2025
            'day': day.day,
2026
            'img': [i['src'] for i in imgs],
2027
            'title': title,
2028
            'alt': alt,
2029
            'author': author,
2030
        }
2031
2032
2033
class PoorlyDrawnLines(GenericListableComic):
2034
    """Class to retrieve Poorly Drawn Lines comics."""
2035
    # Also on http://pdlcomics.tumblr.com
2036
    name = 'poorlydrawn'
2037 View Code Duplication
    long_name = 'Poorly Drawn Lines'
2038
    url = 'https://www.poorlydrawnlines.com'
2039
    _categories = ('POORLYDRAWN', )
2040
    get_url_from_archive_element = get_href
2041
2042
    @classmethod
2043
    def get_comic_info(cls, soup, link):
2044
        """Get information about a particular comics."""
2045
        imgs = soup.find('div', class_='post').find_all('img')
2046
        assert len(imgs) <= 1
2047
        return {
2048
            'img': [i['src'] for i in imgs],
2049
            'title': imgs[0].get('title', "") if imgs else "",
2050
        }
2051
2052
    @classmethod
2053
    def get_archive_elements(cls):
2054
        archive_url = urljoin_wrapper(cls.url, 'archive')
2055
        url_re = re.compile('^%s/comic/.' % cls.url)
2056
        return reversed(get_soup_at_url(archive_url).find_all('a', href=url_re))
2057
2058
2059
class LoadingComics(GenericNavigableComic):
2060
    """Class to retrieve Loading Artist comics."""
2061
    name = 'loadingartist'
2062
    long_name = 'Loading Artist'
2063
    url = 'http://www.loadingartist.com/latest'
2064
2065
    @classmethod
2066 View Code Duplication
    def get_first_comic_link(cls):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2067
        """Get link to first comics."""
2068
        return get_soup_at_url(cls.url).find('a', title="First")
2069
2070
    @classmethod
2071
    def get_navi_link(cls, last_soup, next_):
2072
        """Get link to next or previous comic."""
2073
        return last_soup.find('a', title='Next' if next_ else 'Previous')
2074
2075
    @classmethod
2076
    def get_comic_info(cls, soup, link):
2077
        """Get information about a particular comics."""
2078
        title = soup.find('h1').string
2079
        date_str = soup.find('span', class_='date').string.strip()
2080
        day = string_to_date(date_str, "%B %d, %Y")
2081
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
2082
        return {
2083
            'title': title,
2084
            'img': [i['src'] for i in imgs],
2085
            'month': day.month,
2086
            'year': day.year,
2087
            'day': day.day,
2088
        }
2089
2090
2091
class ChuckleADuck(GenericNavigableComic):
2092
    """Class to retrieve Chuckle-A-Duck comics."""
2093
    name = 'chuckleaduck'
2094
    long_name = 'Chuckle-A-duck'
2095
    url = 'http://chuckleaduck.com'
2096
    get_first_comic_link = get_div_navfirst_a
2097
    get_navi_link = get_link_rel_next
2098
2099
    @classmethod
2100
    def get_comic_info(cls, soup, link):
2101
        """Get information about a particular comics."""
2102
        date_str = soup.find('span', class_='post-date').string
2103
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
2104
        author = soup.find('span', class_='post-author').string
2105
        div = soup.find('div', id='comic')
2106
        imgs = div.find_all('img') if div else []
2107
        title = imgs[0]['title'] if imgs else ""
2108
        assert all(i['title'] == i['alt'] == title for i in imgs)
2109
        return {
2110
            'month': day.month,
2111
            'year': day.year,
2112
            'day': day.day,
2113
            'img': [i['src'] for i in imgs],
2114
            'title': title,
2115
            'author': author,
2116
        }
2117
2118
2119
class DepressedAlien(GenericNavigableComic):
2120
    """Class to retrieve Depressed Alien Comics."""
2121
    name = 'depressedalien'
2122
    long_name = 'Depressed Alien'
2123
    url = 'http://depressedalien.com'
2124
    get_url_from_link = join_cls_url_to_href
2125
2126
    @classmethod
2127
    def get_first_comic_link(cls):
2128
        """Get link to first comics."""
2129
        return get_soup_at_url(cls.url).find('img', attrs={'name': 'beginArrow'}).parent
2130
2131
    @classmethod
2132
    def get_navi_link(cls, last_soup, next_):
2133
        """Get link to next or previous comic."""
2134
        return last_soup.find('img', attrs={'name': 'rightArrow' if next_ else 'leftArrow'}).parent
2135
2136
    @classmethod
2137
    def get_comic_info(cls, soup, link):
2138
        """Get information about a particular comics."""
2139
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2140
        imgs = soup.find_all('meta', property='og:image')
2141
        return {
2142
            'title': title,
2143
            'img': [i['content'] for i in imgs],
2144
        }
2145
2146
2147
class ThingsInSquares(GenericListableComic):
2148
    """Class to retrieve Things In Squares comics."""
2149
    # This can be retrieved in other languages
2150
    # Also on https://tapastic.com/series/Things-in-Squares
2151
    name = 'squares'
2152
    long_name = 'Things in squares'
2153
    url = 'http://www.thingsinsquares.com'
2154
2155
    @classmethod
2156
    def get_comic_info(cls, soup, tr):
2157
        """Get information about a particular comics."""
2158
        _, td2, td3 = tr.find_all('td')
2159
        a = td2.find('a')
2160
        date_str = td3.string
2161
        day = string_to_date(date_str, "%m.%d.%y")
2162
        title = a.string
2163
        title2 = soup.find('meta', property='og:title')['content']
2164
        desc = soup.find('meta', property='og:description')
2165
        description = desc['content'] if desc else ''
2166
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2167
        imgs = soup.find('div', class_='entry-content').find_all('img')
2168
        return {
2169
            'day': day.day,
2170
            'month': day.month,
2171
            'year': day.year,
2172
            'title': title,
2173
            'title2': title2,
2174
            'description': description,
2175
            'tags': tags,
2176
            'img': [i['src'] for i in imgs],
2177
            'alt': ' '.join(i['alt'] for i in imgs),
2178
        }
2179
2180
    @classmethod
2181
    def get_url_from_archive_element(cls, tr):
2182
        _, td2, td3 = tr.find_all('td')
2183
        return td2.find('a')['href']
2184
2185
    @classmethod
2186
    def get_archive_elements(cls):
2187
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
2188
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2189
2190
2191
class HappleTea(GenericNavigableComic):
2192
    """Class to retrieve Happle Tea Comics."""
2193
    name = 'happletea'
2194
    long_name = 'Happle Tea'
2195
    url = 'http://www.happletea.com'
2196
    get_first_comic_link = get_a_navi_navifirst
2197
    get_navi_link = get_link_rel_next
2198
2199
    @classmethod
2200
    def get_comic_info(cls, soup, link):
2201
        """Get information about a particular comics."""
2202
        imgs = soup.find('div', id='comic').find_all('img')
2203
        post = soup.find('div', class_='post-content')
2204
        title = post.find('h2', class_='post-title').string
2205
        author = post.find('a', rel='author').string
2206
        date_str = post.find('span', class_='post-date').string
2207
        day = string_to_date(date_str, "%B %d, %Y")
2208
        assert all(i['alt'] == i['title'] for i in imgs)
2209
        return {
2210
            'title': title,
2211
            'img': [i['src'] for i in imgs],
2212
            'alt': ''.join(i['alt'] for i in imgs),
2213
            'month': day.month,
2214
            'year': day.year,
2215
            'day': day.day,
2216
            'author': author,
2217
        }
2218
2219
2220
class RockPaperScissors(GenericNavigableComic):
2221
    """Class to retrieve Rock Paper Scissors comics."""
2222
    name = 'rps'
2223
    long_name = 'Rock Paper Scissors'
2224
    url = 'http://rps-comics.com'
2225
    get_first_comic_link = get_a_navi_navifirst
2226
    get_navi_link = get_link_rel_next
2227
2228
    @classmethod
2229
    def get_comic_info(cls, soup, link):
2230
        """Get information about a particular comics."""
2231
        title = soup.find('title').string
2232
        imgs = soup.find_all('meta', property='og:image')
2233
        short_url = soup.find('link', rel='shortlink')['href']
2234
        transcript = soup.find('div', id='transcript-content').string
2235
        return {
2236
            'title': title,
2237
            'transcript': transcript,
2238
            'short_url': short_url,
2239
            'img': [i['content'] for i in imgs],
2240
        }
2241
2242
2243
class FatAwesomeComics(GenericNavigableComic):
2244
    """Class to retrieve Fat Awesome Comics."""
2245
    # Also on http://fatawesomecomedy.tumblr.com
2246
    name = 'fatawesome'
2247
    long_name = 'Fat Awesome'
2248
    url = 'http://fatawesome.com/comics'
2249
    get_navi_link = get_a_rel_next
2250
    get_first_comic_link = simulate_first_link
2251
    first_url = 'http://fatawesome.com/shortbus/'
2252
2253
    @classmethod
2254
    def get_comic_info(cls, soup, link):
2255
        """Get information about a particular comics."""
2256
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2257
        description = soup.find('meta', attrs={'name': 'description'})['content']
2258
        tags_prop = soup.find('meta', property='article:tag')
2259
        tags = tags_prop['content'] if tags_prop else ""
2260
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2261
        day = string_to_date(date_str, "%Y-%m-%d")
2262
        imgs = soup.find_all('img', attrs={'data-recalc-dims': "1"})
2263
        assert len(imgs) == 1
2264
        return {
2265
            'title': title,
2266
            'description': description,
2267
            'tags': tags,
2268
            'alt': "".join(i['alt'] for i in imgs),
2269
            'img': [i['src'].rsplit('?', 1)[0] for i in imgs],
2270
            'month': day.month,
2271
            'year': day.year,
2272
            'day': day.day,
2273
        }
2274
2275
2276
class AnythingComic(GenericListableComic):
2277
    """Class to retrieve Anything Comics."""
2278
    # Also on http://tapastic.com/series/anything
2279
    name = 'anythingcomic'
2280
    long_name = 'Anything Comic'
2281
    url = 'http://www.anythingcomic.com'
2282
2283
    @classmethod
2284
    def get_archive_elements(cls):
2285
        archive_url = urljoin_wrapper(cls.url, 'archive/')
2286
        # The first 2 <tr>'s do not correspond to comics
2287
        return get_soup_at_url(archive_url).find('table', id='chapter_table').find_all('tr')[2:]
2288
2289
    @classmethod
2290
    def get_url_from_archive_element(cls, tr):
2291
        """Get url corresponding to an archive element."""
2292
        td_num, td_comic, td_date, _ = tr.find_all('td')
2293
        link = td_comic.find('a')
2294
        return urljoin_wrapper(cls.url, link['href'])
2295
2296
    @classmethod
2297
    def get_comic_info(cls, soup, tr):
2298
        """Get information about a particular comics."""
2299
        td_num, td_comic, td_date, _ = tr.find_all('td')
2300 View Code Duplication
        num = int(td_num.string)
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2301
        link = td_comic.find('a')
2302
        title = link.string
2303
        imgs = soup.find_all('img', id='comic_image')
2304
        date_str = td_date.string
2305
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y, %I:%M %p")
2306
        assert len(imgs) == 1
2307
        assert all(i.get('alt') == i.get('title') for i in imgs)
2308
        return {
2309
            'num': num,
2310
            'title': title,
2311
            'alt': imgs[0].get('alt', ''),
2312
            'img': [i['src'] for i in imgs],
2313
            'month': day.month,
2314
            'year': day.year,
2315
            'day': day.day,
2316
        }
2317
2318
2319
class LonnieMillsap(GenericNavigableComic):
2320
    """Class to retrieve Lonnie Millsap's comics."""
2321
    name = 'millsap'
2322
    long_name = 'Lonnie Millsap'
2323
    url = 'http://www.lonniemillsap.com'
2324
    get_navi_link = get_link_rel_next
2325
    get_first_comic_link = simulate_first_link
2326
    first_url = 'http://www.lonniemillsap.com/?p=42'
2327
2328
    @classmethod
2329
    def get_comic_info(cls, soup, link):
2330
        """Get information about a particular comics."""
2331
        title = soup.find('h2', class_='post-title').string
2332
        post = soup.find('div', class_='post-content')
2333
        author = post.find("span", class_="post-author").find("a").string
2334
        date_str = post.find("span", class_="post-date").string
2335
        day = string_to_date(date_str, "%B %d, %Y")
2336
        imgs = post.find("div", class_="entry").find_all("img")
2337
        return {
2338
            'title': title,
2339
            'author': author,
2340
            'img': [i['src'] for i in imgs],
2341
            'month': day.month,
2342
            'year': day.year,
2343
            'day': day.day,
2344
        }
2345
2346
2347
class LinsEditions(GenericNavigableComic):
2348
    """Class to retrieve L.I.N.S. Editions comics."""
2349
    # Also on https://linscomics.tumblr.com
2350
    # Now on https://warandpeas.com
2351
    name = 'lins'
2352
    long_name = 'L.I.N.S. Editions'
2353
    url = 'https://linsedition.com'
2354
    _categories = ('LINS', )
2355
    get_navi_link = get_link_rel_next
2356
    get_first_comic_link = simulate_first_link
2357
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2358
2359 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2360
    def get_comic_info(cls, soup, link):
2361
        """Get information about a particular comics."""
2362
        title = soup.find('meta', property='og:title')['content']
2363
        imgs = soup.find_all('meta', property='og:image')
2364
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2365
        day = string_to_date(date_str, "%Y-%m-%d")
2366
        return {
2367
            'title': title,
2368
            'img': [i['content'] for i in imgs],
2369
            'month': day.month,
2370
            'year': day.year,
2371
            'day': day.day,
2372
        }
2373
2374
2375
class ThorsThundershack(GenericNavigableComic):
2376
    """Class to retrieve Thor's Thundershack comics."""
2377
    # Also on http://tapastic.com/series/Thors-Thundershac
2378
    name = 'thor'
2379
    long_name = 'Thor\'s Thundershack'
2380
    url = 'http://www.thorsthundershack.com'
2381
    _categories = ('THOR', )
2382
    get_url_from_link = join_cls_url_to_href
2383
2384
    @classmethod
2385
    def get_first_comic_link(cls):
2386
        """Get link to first comics."""
2387
        return get_soup_at_url(cls.url).find('a', class_='first navlink')
2388
2389
    @classmethod
2390
    def get_navi_link(cls, last_soup, next_):
2391
        """Get link to next or previous comic."""
2392
        for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
2393
            if link['href'] != '/comic':
2394
                return link
2395
        return None
2396
2397 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2398
    def get_comic_info(cls, soup, link):
2399
        """Get information about a particular comics."""
2400
        title = soup.find('meta', attrs={'name': 'description'})["content"]
2401
        description = soup.find('div', itemprop='articleBody').text
2402
        author = soup.find('span', itemprop='author copyrightHolder').string
2403
        imgs = soup.find_all('img', itemprop='image')
2404
        assert all(i['title'] == i['alt'] for i in imgs)
2405
        alt = imgs[0]['alt'] if imgs else ""
2406
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
2407
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2408
        return {
2409
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2410
            'month': day.month,
2411
            'year': day.year,
2412
            'day': day.day,
2413
            'author': author,
2414
            'title': title,
2415
            'alt': alt,
2416
            'description': description,
2417
        }
2418
2419
2420
class GerbilWithAJetpack(GenericNavigableComic):
2421
    """Class to retrieve GerbilWithAJetpack comics."""
2422
    name = 'gerbil'
2423
    long_name = 'Gerbil With A Jetpack'
2424
    url = 'http://gerbilwithajetpack.com'
2425
    get_first_comic_link = get_a_navi_navifirst
2426
    get_navi_link = get_a_rel_next
2427
2428 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2429
    def get_comic_info(cls, soup, link):
2430
        """Get information about a particular comics."""
2431
        title = soup.find('h2', class_='post-title').string
2432
        author = soup.find("span", class_="post-author").find("a").string
2433
        date_str = soup.find("span", class_="post-date").string
2434
        day = string_to_date(date_str, "%B %d, %Y")
2435
        imgs = soup.find("div", id="comic").find_all("img")
2436
        alt = imgs[0]['alt']
2437
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2438
        return {
2439
            'img': [i['src'] for i in imgs],
2440
            'title': title,
2441
            'alt': alt,
2442
            'author': author,
2443
            'day': day.day,
2444
            'month': day.month,
2445
            'year': day.year
2446
        }
2447
2448
2449
class EveryDayBlues(GenericEmptyComic, GenericNavigableComic):
2450
    """Class to retrieve EveryDayBlues Comics."""
2451
    name = "blues"
2452
    long_name = "Every Day Blues"
2453
    url = "http://everydayblues.net"
2454
    get_first_comic_link = get_a_navi_navifirst
2455
    get_navi_link = get_link_rel_next
2456
2457
    @classmethod
2458
    def get_comic_info(cls, soup, link):
2459
        """Get information about a particular comics."""
2460
        title = soup.find("h2", class_="post-title").string
2461
        author = soup.find("span", class_="post-author").find("a").string
2462
        date_str = soup.find("span", class_="post-date").string
2463
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2464
        imgs = soup.find("div", id="comic").find_all("img")
2465
        assert all(i['alt'] == i['title'] == title for i in imgs)
2466
        assert len(imgs) <= 1
2467
        return {
2468
            'img': [i['src'] for i in imgs],
2469
            'title': title,
2470
            'author': author,
2471
            'day': day.day,
2472
            'month': day.month,
2473
            'year': day.year
2474
        }
2475
2476
2477
class BiterComics(GenericNavigableComic):
2478
    """Class to retrieve Biter Comics."""
2479
    name = "biter"
2480
    long_name = "Biter Comics"
2481
    url = "http://www.bitercomics.com"
2482
    get_first_comic_link = get_a_navi_navifirst
2483
    get_navi_link = get_link_rel_next
2484
2485 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2486
    def get_comic_info(cls, soup, link):
2487
        """Get information about a particular comics."""
2488
        title = soup.find("h1", class_="entry-title").string
2489
        author = soup.find("span", class_="author vcard").find("a").string
2490
        date_str = soup.find("span", class_="entry-date").string
2491
        day = string_to_date(date_str, "%B %d, %Y")
2492
        imgs = soup.find("div", id="comic").find_all("img")
2493
        assert all(i['alt'] == i['title'] for i in imgs)
2494
        assert len(imgs) == 1
2495
        alt = imgs[0]['alt']
2496
        return {
2497
            'img': [i['src'] for i in imgs],
2498
            'title': title,
2499
            'alt': alt,
2500
            'author': author,
2501
            'day': day.day,
2502
            'month': day.month,
2503
            'year': day.year
2504
        }
2505
2506
2507
class TheAwkwardYeti(GenericNavigableComic):
2508
    """Class to retrieve The Awkward Yeti comics."""
2509
    # Also on http://www.gocomics.com/the-awkward-yeti
2510
    # Also on http://larstheyeti.tumblr.com
2511
    # Also on https://tapastic.com/series/TheAwkwardYeti
2512
    name = 'yeti'
2513
    long_name = 'The Awkward Yeti'
2514
    url = 'http://theawkwardyeti.com'
2515
    _categories = ('YETI', )
2516
    get_first_comic_link = get_a_navi_navifirst
2517
    get_navi_link = get_link_rel_next
2518
2519
    @classmethod
2520
    def get_comic_info(cls, soup, link):
2521
        """Get information about a particular comics."""
2522
        title = soup.find('h2', class_='post-title').string
2523
        date_str = soup.find("span", class_="post-date").string
2524
        day = string_to_date(date_str, "%B %d, %Y")
2525
        imgs = soup.find("div", id="comic").find_all("img")
2526
        assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
2527
        return {
2528
            'img': [i['src'] for i in imgs],
2529
            'title': title,
2530
            'day': day.day,
2531
            'month': day.month,
2532
            'year': day.year
2533
        }
2534
2535
2536
class PleasantThoughts(GenericNavigableComic):
2537
    """Class to retrieve Pleasant Thoughts comics."""
2538
    name = 'pleasant'
2539
    long_name = 'Pleasant Thoughts'
2540
    url = 'http://pleasant-thoughts.com'
2541
    get_first_comic_link = get_a_navi_navifirst
2542
    get_navi_link = get_link_rel_next
2543
2544
    @classmethod
2545
    def get_comic_info(cls, soup, link):
2546
        """Get information about a particular comics."""
2547
        post = soup.find('div', class_='post-content')
2548
        title = post.find('h2', class_='post-title').string
2549
        imgs = post.find("div", class_="entry").find_all("img")
2550
        return {
2551
            'title': title,
2552
            'img': [i['src'] for i in imgs],
2553
        }
2554
2555
2556
class MisterAndMe(GenericNavigableComic):
2557
    """Class to retrieve Mister & Me Comics."""
2558
    # Also on http://www.gocomics.com/mister-and-me
2559
    # Also on https://tapastic.com/series/Mister-and-Me
2560
    name = 'mister'
2561
    long_name = 'Mister & Me'
2562
    url = 'http://www.mister-and-me.com'
2563
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2564
    get_navi_link = get_link_rel_next
2565
2566 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2567
    def get_comic_info(cls, soup, link):
2568
        """Get information about a particular comics."""
2569
        title = soup.find('h2', class_='post-title').string
2570
        author = soup.find("span", class_="post-author").find("a").string
2571
        date_str = soup.find("span", class_="post-date").string
2572
        day = string_to_date(date_str, "%B %d, %Y")
2573
        imgs = soup.find("div", id="comic").find_all("img")
2574
        assert all(i['alt'] == i['title'] for i in imgs)
2575
        assert len(imgs) <= 1
2576
        alt = imgs[0]['alt'] if imgs else ""
2577
        return {
2578
            'img': [i['src'] for i in imgs],
2579
            'title': title,
2580
            'alt': alt,
2581
            'author': author,
2582
            'day': day.day,
2583
            'month': day.month,
2584
            'year': day.year
2585
        }
2586
2587
2588
class LastPlaceComics(GenericNavigableComic):
2589
    """Class to retrieve Last Place Comics."""
2590
    name = 'lastplace'
2591
    long_name = 'Last Place Comics'
2592
    url = "http://lastplacecomics.com"
2593
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2594
    get_navi_link = get_link_rel_next
2595
2596 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2597
    def get_comic_info(cls, soup, link):
2598
        """Get information about a particular comics."""
2599
        title = soup.find('h2', class_='post-title').string
2600
        author = soup.find("span", class_="post-author").find("a").string
2601
        date_str = soup.find("span", class_="post-date").string
2602
        day = string_to_date(date_str, "%B %d, %Y")
2603
        imgs = soup.find("div", id="comic").find_all("img")
2604
        assert all(i['alt'] == i['title'] for i in imgs)
2605
        assert len(imgs) <= 1
2606
        alt = imgs[0]['alt'] if imgs else ""
2607
        return {
2608
            'img': [i['src'] for i in imgs],
2609
            'title': title,
2610
            'alt': alt,
2611
            'author': author,
2612
            'day': day.day,
2613
            'month': day.month,
2614
            'year': day.year
2615
        }
2616
2617
2618
class TalesOfAbsurdity(GenericNavigableComic):
2619
    """Class to retrieve Tales Of Absurdity comics."""
2620
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
2621
    # Also on http://talesofabsurdity.tumblr.com
2622
    name = 'absurdity'
2623
    long_name = 'Tales of Absurdity'
2624
    url = 'http://talesofabsurdity.com'
2625
    _categories = ('ABSURDITY', )
2626
    get_first_comic_link = get_a_navi_navifirst
2627
    get_navi_link = get_a_navi_comicnavnext_navinext
2628
2629 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2630
    def get_comic_info(cls, soup, link):
2631
        """Get information about a particular comics."""
2632
        title = soup.find('h2', class_='post-title').string
2633
        author = soup.find("span", class_="post-author").find("a").string
2634
        date_str = soup.find("span", class_="post-date").string
2635
        day = string_to_date(date_str, "%B %d, %Y")
2636
        imgs = soup.find("div", id="comic").find_all("img")
2637
        assert all(i['alt'] == i['title'] for i in imgs)
2638
        alt = imgs[0]['alt'] if imgs else ""
2639
        return {
2640
            'img': [i['src'] for i in imgs],
2641
            'title': title,
2642
            'alt': alt,
2643
            'author': author,
2644
            'day': day.day,
2645
            'month': day.month,
2646
            'year': day.year
2647
        }
2648
2649
2650
class EndlessOrigami(GenericEmptyComic, GenericNavigableComic):
2651
    """Class to retrieve Endless Origami Comics."""
2652
    name = "origami"
2653
    long_name = "Endless Origami"
2654
    url = "http://endlessorigami.com"
2655
    get_first_comic_link = get_a_navi_navifirst
2656
    get_navi_link = get_link_rel_next
2657
2658 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2659
    def get_comic_info(cls, soup, link):
2660
        """Get information about a particular comics."""
2661
        title = soup.find('h2', class_='post-title').string
2662
        author = soup.find("span", class_="post-author").find("a").string
2663
        date_str = soup.find("span", class_="post-date").string
2664
        day = string_to_date(date_str, "%B %d, %Y")
2665
        imgs = soup.find("div", id="comic").find_all("img")
2666
        assert all(i['alt'] == i['title'] for i in imgs)
2667
        alt = imgs[0]['alt'] if imgs else ""
2668
        return {
2669
            'img': [i['src'] for i in imgs],
2670
            'title': title,
2671
            'alt': alt,
2672
            'author': author,
2673
            'day': day.day,
2674
            'month': day.month,
2675
            'year': day.year
2676
        }
2677
2678
2679
class PlanC(GenericNavigableComic):
2680
    """Class to retrieve Plan C comics."""
2681
    name = 'planc'
2682
    long_name = 'Plan C'
2683
    url = 'http://www.plancomic.com'
2684
    get_first_comic_link = get_a_navi_navifirst
2685
    get_navi_link = get_a_navi_comicnavnext_navinext
2686
2687
    @classmethod
2688
    def get_comic_info(cls, soup, link):
2689
        """Get information about a particular comics."""
2690
        title = soup.find('h2', class_='post-title').string
2691
        date_str = soup.find("span", class_="post-date").string
2692
        day = string_to_date(date_str, "%B %d, %Y")
2693
        imgs = soup.find('div', id='comic').find_all('img')
2694
        return {
2695
            'title': title,
2696
            'img': [i['src'] for i in imgs],
2697
            'month': day.month,
2698
            'year': day.year,
2699
            'day': day.day,
2700
        }
2701
2702
2703
class BuniComic(GenericNavigableComic):
2704
    """Class to retrieve Buni Comics."""
2705
    name = 'buni'
2706
    long_name = 'BuniComics'
2707 View Code Duplication
    url = 'http://www.bunicomic.com'
2708
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2709
    get_navi_link = get_link_rel_next
2710
2711
    @classmethod
2712
    def get_comic_info(cls, soup, link):
2713
        """Get information about a particular comics."""
2714
        imgs = soup.find('div', id='comic').find_all('img')
2715
        assert all(i['alt'] == i['title'] for i in imgs)
2716
        assert len(imgs) == 1
2717
        return {
2718
            'img': [i['src'] for i in imgs],
2719
            'title': imgs[0]['title'],
2720
        }
2721
2722
2723
class GenericCommitStrip(GenericNavigableComic):
2724
    """Generic class to retrieve Commit Strips in different languages."""
2725
    get_navi_link = get_a_rel_next
2726
    get_first_comic_link = simulate_first_link
2727
    first_url = NotImplemented
2728
2729
    @classmethod
2730
    def get_comic_info(cls, soup, link):
2731
        """Get information about a particular comics."""
2732
        desc = soup.find('meta', property='og:description')['content']
2733
        title = soup.find('meta', property='og:title')['content']
2734
        imgs = soup.find('div', class_='entry-content').find_all('img')
2735
        title2 = ' '.join(i.get('title', '') for i in imgs)
2736
        return {
2737
            'title': title,
2738
            'title2': title2,
2739
            'description': desc,
2740
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2741
        }
2742
2743
2744
class CommitStripFr(GenericCommitStrip):
2745
    """Class to retrieve Commit Strips in French."""
2746
    name = 'commit_fr'
2747
    long_name = 'Commit Strip (Fr)'
2748
    url = 'http://www.commitstrip.com/fr'
2749
    _categories = ('FRANCAIS', )
2750
    first_url = 'http://www.commitstrip.com/fr/2012/02/22/interview/'
2751
2752
2753
class CommitStripEn(GenericCommitStrip):
2754
    """Class to retrieve Commit Strips in English."""
2755
    name = 'commit_en'
2756
    long_name = 'Commit Strip (En)'
2757
    url = 'http://www.commitstrip.com/en'
2758
    first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'
2759
2760
2761
class GenericBoumerie(GenericNavigableComic):
2762
    """Generic class to retrieve Boumeries comics in different languages."""
2763
    get_first_comic_link = get_a_navi_navifirst
2764
    get_navi_link = get_link_rel_next
2765
    date_format = NotImplemented
2766
    lang = NotImplemented
2767
2768
    @classmethod
2769
    def get_comic_info(cls, soup, link):
2770
        """Get information about a particular comics."""
2771
        title = soup.find('h2', class_='post-title').string
2772
        short_url = soup.find('link', rel='shortlink')['href']
2773
        author = soup.find("span", class_="post-author").find("a").string
2774
        date_str = soup.find('span', class_='post-date').string
2775
        day = string_to_date(date_str, cls.date_format, cls.lang)
2776
        imgs = soup.find('div', id='comic').find_all('img')
2777
        assert all(i['alt'] == i['title'] for i in imgs)
2778
        return {
2779
            'short_url': short_url,
2780
            'img': [i['src'] for i in imgs],
2781
            'title': title,
2782
            'author': author,
2783
            'month': day.month,
2784
            'year': day.year,
2785
            'day': day.day,
2786
        }
2787
2788
2789
class BoumerieEn(GenericBoumerie):
2790
    """Class to retrieve Boumeries comics in English."""
2791
    name = 'boumeries_en'
2792
    long_name = 'Boumeries (En)'
2793
    url = 'http://comics.boumerie.com'
2794
    date_format = "%B %d, %Y"
2795
    lang = 'en_GB.UTF-8'
2796
2797
2798
class BoumerieFr(GenericBoumerie):
2799
    """Class to retrieve Boumeries comics in French."""
2800
    name = 'boumeries_fr'
2801
    long_name = 'Boumeries (Fr)'
2802
    url = 'http://bd.boumerie.com'
2803
    _categories = ('FRANCAIS', )
2804
    date_format = "%A, %d %B %Y"
2805
    lang = "fr_FR.utf8"
2806
2807
2808
class UnearthedComics(GenericNavigableComic):
2809
    """Class to retrieve Unearthed comics."""
2810
    # Also on http://tapastic.com/series/UnearthedComics
2811
    # Also on https://unearthedcomics.tumblr.com
2812
    name = 'unearthed'
2813
    long_name = 'Unearthed Comics'
2814
    url = 'http://unearthedcomics.com'
2815
    _categories = ('UNEARTHED', )
2816
    get_navi_link = get_link_rel_next
2817
    get_first_comic_link = simulate_first_link
2818
    first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'
2819
2820
    @classmethod
2821
    def get_comic_info(cls, soup, link):
2822
        """Get information about a particular comics."""
2823
        short_url = soup.find('link', rel='shortlink')['href']
2824
        title_elt = soup.find('h1') or soup.find('h2')
2825
        title = title_elt.string if title_elt else ""
2826
        desc = soup.find('meta', property='og:description')
2827
        date_str = soup.find('time', class_='published updated hidden')['datetime']
2828
        day = string_to_date(date_str, "%Y-%m-%d")
2829
        post = soup.find('div', class_="entry content entry-content type-portfolio")
2830
        imgs = post.find_all('img')
2831
        return {
2832
            'title': title,
2833
            'description': desc,
2834
            'url2': short_url,
2835
            'img': [i['src'] for i in imgs],
2836
            'month': day.month,
2837
            'year': day.year,
2838
            'day': day.day,
2839
        }
2840
2841
2842
class Optipess(GenericNavigableComic):
2843
    """Class to retrieve Optipess comics."""
2844
    name = 'optipess'
2845
    long_name = 'Optipess'
2846
    url = 'http://www.optipess.com'
2847
    get_first_comic_link = get_a_navi_navifirst
2848
    get_navi_link = get_link_rel_next
2849
2850
    @classmethod
2851
    def get_comic_info(cls, soup, link):
2852
        """Get information about a particular comics."""
2853
        title = soup.find('h2', class_='post-title').string
2854
        author = soup.find("span", class_="post-author").find("a").string
2855
        comic = soup.find('div', id='comic')
2856
        imgs = comic.find_all('img') if comic else []
2857
        alt = imgs[0]['title'] if imgs else ""
2858
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2859
        date_str = soup.find('span', class_='post-date').string
2860
        day = string_to_date(date_str, "%B %d, %Y")
2861
        return {
2862
            'title': title,
2863
            'alt': alt,
2864
            'author': author,
2865
            'img': [i['src'] for i in imgs],
2866
            'month': day.month,
2867
            'year': day.year,
2868
            'day': day.day,
2869
        }
2870
2871
2872
class PainTrainComic(GenericNavigableComic):
2873
    """Class to retrieve Pain Train Comics."""
2874
    name = 'paintrain'
2875
    long_name = 'Pain Train Comics'
2876
    url = 'http://paintraincomic.com'
2877
    get_first_comic_link = get_a_navi_navifirst
2878
    get_navi_link = get_link_rel_next
2879
2880
    @classmethod
2881
    def get_comic_info(cls, soup, link):
2882
        """Get information about a particular comics."""
2883
        title = soup.find('h2', class_='post-title').string
2884
        short_url = soup.find('link', rel='shortlink')['href']
2885
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2886
        num = int(short_url_re.match(short_url).groups()[0])
2887
        imgs = soup.find('div', id='comic').find_all('img')
2888
        alt = imgs[0]['title']
2889
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2890
        date_str = soup.find('span', class_='post-date').string
2891
        day = string_to_date(date_str, "%d/%m/%Y")
2892
        return {
2893
            'short_url': short_url,
2894
            'num': num,
2895
            'img': [i['src'] for i in imgs],
2896
            'month': day.month,
2897
            'year': day.year,
2898
            'day': day.day,
2899
            'alt': alt,
2900
            'title': title,
2901
        }
2902
2903
2904
class MoonBeard(GenericNavigableComic):
2905
    """Class to retrieve MoonBeard comics."""
2906
    # Also on http://blog.squiresjam.es
2907
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
2908
    name = 'moonbeard'
2909
    long_name = 'Moon Beard'
2910
    url = 'http://moonbeard.com'
2911
    get_first_comic_link = get_a_navi_navifirst
2912
    get_navi_link = get_a_navi_navinext
2913
2914
    @classmethod
2915
    def get_comic_info(cls, soup, link):
2916
        """Get information about a particular comics."""
2917
        title = soup.find('h2', class_='post-title').string
2918
        short_url = soup.find('link', rel='shortlink')['href']
2919
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2920
        num = int(short_url_re.match(short_url).groups()[0])
2921
        imgs = soup.find('div', id='comic').find_all('img')
2922
        alt = imgs[0]['title']
2923
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2924
        date_str = soup.find('span', class_='post-date').string
2925
        day = string_to_date(date_str, "%B %d, %Y")
2926
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2927
        author = soup.find('span', class_='post-author').string
2928
        return {
2929
            'short_url': short_url,
2930
            'num': num,
2931
            'img': [i['src'] for i in imgs],
2932
            'month': day.month,
2933
            'year': day.year,
2934
            'day': day.day,
2935
            'title': title,
2936
            'tags': tags,
2937
            'alt': alt,
2938
            'author': author,
2939
        }
2940
2941
2942
class AHammADay(GenericEmptyComic, GenericNavigableComic):
2943
    """Class to retrieve class A Hamm A Day comics."""
2944
    name = 'hamm'
2945
    long_name = 'A Hamm A Day'
2946
    url = 'http://www.ahammaday.com'
2947
    get_url_from_link = join_cls_url_to_href
2948
    get_first_comic_link = simulate_first_link
2949
    first_url = 'http://www.ahammaday.com/today/3/6/french'
2950
2951
    @classmethod
2952
    def get_navi_link(cls, last_soup, next_):
2953
        """Get link to next or previous comic."""
2954
        # prev is next / next is prev
2955
        return last_soup.find('li', class_='previous' if next_ else 'next').find('a')
2956
2957
    @classmethod
2958
    def get_comic_info(cls, soup, link):
2959
        """Get information about a particular comics."""
2960
        date_str = soup.find('time', class_='published')['datetime']
2961
        day = string_to_date(date_str, "%Y-%m-%d")
2962
        author = soup.find('span', class_='blog-author').find('a').string
2963
        title = soup.find('meta', property='og:title')['content']
2964
        imgs = soup.find_all('meta', itemprop='image')
2965
        return {
2966
            'img': [i['content'] for i in imgs],
2967
            'title': title,
2968
            'author': author,
2969
            'day': day.day,
2970
            'month': day.month,
2971
            'year': day.year,
2972
        }
2973
2974
2975
class LittleLifeLines(GenericNavigableComic):
2976
    """Class to retrieve Little Life Lines comics."""
2977
    # Also on https://little-life-lines.tumblr.com
2978
    name = 'life'
2979
    long_name = 'Little Life Lines'
2980
    url = 'http://www.littlelifelines.com'
2981 View Code Duplication
    get_url_from_link = join_cls_url_to_href
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2982
    get_first_comic_link = simulate_first_link
2983
    first_url = 'http://www.littlelifelines.com/comics/well-done'
2984
2985
    @classmethod
2986
    def get_navi_link(cls, last_soup, next_):
2987
        """Get link to next or previous comic."""
2988
        # prev is next / next is prev
2989
        li = last_soup.find('li', class_='prev' if next_ else 'next')
2990
        return li.find('a') if li else None
2991
2992
    @classmethod
2993
    def get_comic_info(cls, soup, link):
2994
        """Get information about a particular comics."""
2995
        title = soup.find('meta', property='og:title')['content']
2996
        desc = soup.find('meta', property='og:description')['content']
2997
        date_str = soup.find('time', class_='published')['datetime']
2998
        day = string_to_date(date_str, "%Y-%m-%d")
2999
        author = soup.find('a', rel='author').string
3000
        div_content = soup.find('div', class_="body entry-content")
3001
        imgs = div_content.find_all('img')
3002
        imgs = [i for i in imgs if i.get('src') is not None]
3003
        alt = imgs[0]['alt']
3004
        return {
3005
            'title': title,
3006
            'alt': alt,
3007
            'description': desc,
3008
            'author': author,
3009
            'day': day.day,
3010
            'month': day.month,
3011
            'year': day.year,
3012
            'img': [i['src'] for i in imgs],
3013
        }
3014
3015
3016
class GenericWordPressInkblot(GenericNavigableComic):
3017
    """Generic class to retrieve comics using WordPress with Inkblot."""
3018
    get_navi_link = get_link_rel_next
3019
3020
    @classmethod
3021
    def get_first_comic_link(cls):
3022
        """Get link to first comics."""
3023
        return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')
3024
3025
    @classmethod
3026
    def get_comic_info(cls, soup, link):
3027
        """Get information about a particular comics."""
3028
        title = soup.find('meta', property='og:title')['content']
3029
        imgs = soup.find('div', class_='webcomic-image').find_all('img')
3030
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3031
        day = string_to_date(date_str, "%Y-%m-%d")
3032
        return {
3033
            'title': title,
3034
            'day': day.day,
3035
            'month': day.month,
3036
            'year': day.year,
3037
            'img': [i['src'] for i in imgs],
3038
        }
3039
3040
3041
class EverythingsStupid(GenericWordPressInkblot):
3042
    """Class to retrieve Everything's stupid Comics."""
3043
    # Also on http://tapastic.com/series/EverythingsStupid
3044
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
3045
    # Also on http://everythingsstupidcomics.tumblr.com
3046
    name = 'stupid'
3047
    long_name = "Everything's Stupid"
3048
    url = 'http://everythingsstupid.net'
3049
3050
3051
class TheIsmComics(GenericWordPressInkblot):
3052
    """Class to retrieve The Ism Comics."""
3053
    # Also on https://tapastic.com/series/TheIsm (?)
3054
    name = 'theism'
3055
    long_name = "The Ism"
3056
    url = 'http://www.theism-comics.com'
3057
3058
3059
class WoodenPlankStudios(GenericWordPressInkblot):
3060
    """Class to retrieve Wooden Plank Studios comics."""
3061
    name = 'woodenplank'
3062
    long_name = 'Wooden Plank Studios'
3063
    url = 'http://woodenplankstudios.com'
3064
3065
3066
class ElectricBunnyComic(GenericNavigableComic):
3067
    """Class to retrieve Electric Bunny Comics."""
3068
    # Also on http://electricbunnycomics.tumblr.com
3069
    name = 'bunny'
3070
    long_name = 'Electric Bunny Comic'
3071
    url = 'http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell'
3072
    get_url_from_link = join_cls_url_to_href
3073
3074
    @classmethod
3075
    def get_first_comic_link(cls):
3076
        """Get link to first comics."""
3077
        return get_soup_at_url(cls.url).find('img', alt='First').parent
3078
3079
    @classmethod
3080
    def get_navi_link(cls, last_soup, next_):
3081
        """Get link to next or previous comic."""
3082
        img = last_soup.find('img', alt='Next' if next_ else 'Back')
3083
        return img.parent if img else None
3084
3085
    @classmethod
3086
    def get_comic_info(cls, soup, link):
3087
        """Get information about a particular comics."""
3088
        title = soup.find('meta', property='og:title')['content']
3089
        imgs = soup.find_all('meta', property='og:image')
3090
        return {
3091
            'title': title,
3092
            'img': [i['content'] for i in imgs],
3093
        }
3094
3095
3096
class SheldonComics(GenericNavigableComic):
3097
    """Class to retrieve Sheldon comics."""
3098
    # Also on http://www.gocomics.com/sheldon
3099
    name = 'sheldon'
3100
    long_name = 'Sheldon Comics'
3101
    url = 'http://www.sheldoncomics.com'
3102
3103
    @classmethod
3104
    def get_first_comic_link(cls):
3105
        """Get link to first comics."""
3106
        return get_soup_at_url(cls.url).find("a", id="nav-first")
3107
3108
    @classmethod
3109
    def get_navi_link(cls, last_soup, next_):
3110
        """Get link to next or previous comic."""
3111
        for link in last_soup.find_all("a", id="nav-next" if next_ else "nav-prev"):
3112
            if link['href'] != 'http://www.sheldoncomics.com':
3113
                return link
3114
        return None
3115
3116
    @classmethod
3117
    def get_comic_info(cls, soup, link):
3118
        """Get information about a particular comics."""
3119
        imgs = soup.find("div", id="comic-foot").find_all("img")
3120
        assert all(i['alt'] == i['title'] for i in imgs)
3121
        assert len(imgs) == 1
3122
        title = imgs[0]['title']
3123
        return {
3124
            'title': title,
3125
            'img': [i['src'] for i in imgs],
3126
        }
3127
3128
3129
class Ubertool(GenericNavigableComic):
3130
    """Class to retrieve Ubertool comics."""
3131
    # Also on https://ubertool.tumblr.com
3132
    # Also on https://tapastic.com/series/ubertool
3133
    name = 'ubertool'
3134
    long_name = 'Ubertool'
3135
    url = 'http://ubertoolcomic.com'
3136
    _categories = ('UBERTOOL', )
3137
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
3138
    get_navi_link = get_a_comicnavbase_comicnavnext
3139
3140
    @classmethod
3141
    def get_comic_info(cls, soup, link):
3142
        """Get information about a particular comics."""
3143
        title = soup.find('h2', class_='post-title').string
3144
        date_str = soup.find('span', class_='post-date').string
3145
        day = string_to_date(date_str, "%B %d, %Y")
3146
        imgs = soup.find('div', id='comic').find_all('img')
3147
        return {
3148
            'img': [i['src'] for i in imgs],
3149
            'title': title,
3150
            'month': day.month,
3151
            'year': day.year,
3152
            'day': day.day,
3153
        }
3154
3155
3156
class EarthExplodes(GenericNavigableComic):
3157
    """Class to retrieve The Earth Explodes comics."""
3158
    name = 'earthexplodes'
3159
    long_name = 'The Earth Explodes'
3160
    url = 'http://www.earthexplodes.com'
3161
    get_url_from_link = join_cls_url_to_href
3162 View Code Duplication
    get_first_comic_link = simulate_first_link
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3163
    first_url = 'http://www.earthexplodes.com/comics/000/'
3164
3165
    @classmethod
3166
    def get_navi_link(cls, last_soup, next_):
3167
        """Get link to next or previous comic."""
3168
        return last_soup.find('a', id='next' if next_ else 'prev')
3169
3170
    @classmethod
3171
    def get_comic_info(cls, soup, link):
3172
        """Get information about a particular comics."""
3173
        title = soup.find('title').string
3174
        imgs = soup.find('div', id='image').find_all('img')
3175
        alt = imgs[0].get('title', '')
3176
        return {
3177
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3178
            'title': title,
3179
            'alt': alt,
3180
        }
3181
3182
3183
class PomComics(GenericNavigableComic):
3184
    """Class to retrieve PomComics."""
3185
    name = 'pom'
3186
    long_name = 'Pom Comics / Piece of Me'
3187
    url = 'http://www.pomcomic.com'
3188
    get_url_from_link = join_cls_url_to_href
3189
3190 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3191
    def get_first_comic_link(cls):
3192
        """Get link to first comics."""
3193
        return get_soup_at_url(cls.url).find('a', class_='btn_first')
3194
3195
    @classmethod
3196
    def get_navi_link(cls, last_soup, next_):
3197
        """Get link to next or previous comic."""
3198
        return last_soup.find('a', class_='btn_next' if next_ else 'btn_prev')
3199
3200
    @classmethod
3201
    def get_comic_info(cls, soup, link):
3202
        """Get information about a particular comics."""
3203
        title = soup.find('h1', id="comic-name").string
3204
        desc = soup.find('meta', property='og:description')['content']
3205
        tags = soup.find('meta', attrs={'name': 'keywords'})['content']
3206
        imgs = soup.find('div', class_='comic').find_all('img')
3207
        return {
3208
            'title': title,
3209
            'desc': desc,
3210
            'tags': tags,
3211
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3212
        }
3213
3214
3215
class CubeDrone(GenericEmptyComic, GenericNavigableComic):
3216
    """Class to retrieve Cube Drone comics."""
3217
    name = 'cubedrone'
3218
    long_name = 'Cube Drone'
3219
    url = 'http://cube-drone.com/comics'
3220
    get_url_from_link = join_cls_url_to_href
3221
3222
    @classmethod
3223
    def get_first_comic_link(cls):
3224
        """Get link to first comics."""
3225
        return get_soup_at_url(cls.url).find('span', class_='glyphicon glyphicon-backward').parent
3226
3227
    @classmethod
3228
    def get_navi_link(cls, last_soup, next_):
3229
        """Get link to next or previous comic."""
3230
        class_ = 'glyphicon glyphicon-chevron-' + ('right' if next_ else 'left')
3231
        return last_soup.find('span', class_=class_).parent
3232
3233
    @classmethod
3234
    def get_comic_info(cls, soup, link):
3235
        """Get information about a particular comics."""
3236
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
3237
        url2 = soup.find('meta', attrs={'name': 'twitter:url'})['content']
3238
        # date_str = soup.find('h2', class_='comic_title').find('small').string
3239
        # day = string_to_date(date_str, "%B %d, %Y, %I:%M %p")
3240
        imgs = soup.find_all('img', class_='comic img-responsive')
3241
        title2 = imgs[0]['title']
3242
        alt = imgs[0]['alt']
3243
        return {
3244
            'url2': url2,
3245
            'title': title,
3246
            'title2': title2,
3247
            'alt': alt,
3248
            'img': [i['src'] for i in imgs],
3249
        }
3250
3251
3252
class MakeItStoopid(GenericNavigableComic):
3253
    """Class to retrieve Make It Stoopid Comics."""
3254
    name = 'stoopid'
3255
    long_name = 'Make it stoopid'
3256
    url = 'http://makeitstoopid.com/comic.php'
3257
3258
    @classmethod
3259
    def get_nav(cls, soup):
3260
        """Get the navigation elements from soup object."""
3261
        cnav = soup.find_all(class_='cnav')
3262
        nav1, nav2 = cnav[:5], cnav[5:]
3263
        assert nav1 == nav2
3264
        # begin, prev, archive, next_, end = nav1
3265
        return [None if i.get('href') is None else i for i in nav1]
3266
3267
    @classmethod
3268
    def get_first_comic_link(cls):
3269
        """Get link to first comics."""
3270
        return cls.get_nav(get_soup_at_url(cls.url))[0]
3271
3272
    @classmethod
3273
    def get_navi_link(cls, last_soup, next_):
3274
        """Get link to next or previous comic."""
3275
        return cls.get_nav(last_soup)[3 if next_ else 1]
3276
3277
    @classmethod
3278
    def get_comic_info(cls, soup, link):
3279
        """Get information about a particular comics."""
3280
        title = link['title']
3281
        imgs = soup.find_all('img', id='comicimg')
3282
        return {
3283
            'title': title,
3284
            'img': [i['src'] for i in imgs],
3285
        }
3286
3287
3288
class MarketoonistComics(GenericNavigableComic):
3289
    """Class to retrieve Marketoonist Comics."""
3290
    name = 'marketoonist'
3291
    long_name = 'Marketoonist'
3292
    url = 'https://marketoonist.com/cartoons'
3293
    get_first_comic_link = simulate_first_link
3294
    get_navi_link = get_link_rel_next
3295
    first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'
3296
3297 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3298
    def get_comic_info(cls, soup, link):
3299
        """Get information about a particular comics."""
3300
        imgs = soup.find_all('meta', property='og:image')
3301
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3302
        day = string_to_date(date_str, "%Y-%m-%d")
3303
        title = soup.find('meta', property='og:title')['content']
3304
        return {
3305
            'img': [i['content'] for i in imgs],
3306
            'day': day.day,
3307
            'month': day.month,
3308
            'year': day.year,
3309
            'title': title,
3310
        }
3311
3312
3313
class ConsoliaComics(GenericNavigableComic):
3314
    """Class to retrieve Consolia comics."""
3315
    name = 'consolia'
3316
    long_name = 'consolia'
3317
    url = 'https://consolia-comic.com'
3318
    get_url_from_link = join_cls_url_to_href
3319
3320 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3321
    def get_first_comic_link(cls):
3322
        """Get link to first comics."""
3323
        return get_soup_at_url(cls.url).find('a', class_='first')
3324
3325
    @classmethod
3326
    def get_navi_link(cls, last_soup, next_):
3327
        """Get link to next or previous comic."""
3328
        return last_soup.find('a', class_='next' if next_ else 'prev')
3329
3330
    @classmethod
3331
    def get_comic_info(cls, soup, link):
3332
        """Get information about a particular comics."""
3333
        title = soup.find('meta', property='og:title')['content']
3334
        date_str = soup.find('time')["datetime"]
3335
        day = string_to_date(date_str, "%Y-%m-%d")
3336
        imgs = soup.find_all('meta', property='og:image')
3337
        return {
3338
            'title': title,
3339
            'img': [i['content'] for i in imgs],
3340
            'day': day.day,
3341
            'month': day.month,
3342
            'year': day.year,
3343
        }
3344
3345
3346
class TuMourrasMoinsBete(GenericNavigableComic):
3347
    """Class to retrieve Tu Mourras Moins Bete comics."""
3348
    name = 'mourrasmoinsbete'
3349
    long_name = 'Tu Mourras Moins Bete'
3350
    url = 'http://tumourrasmoinsbete.blogspot.fr'
3351
    _categories = ('FRANCAIS', )
3352 View Code Duplication
    get_first_comic_link = simulate_first_link
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3353
    first_url = 'http://tumourrasmoinsbete.blogspot.fr/2008/06/essai.html'
3354
3355
    @classmethod
3356
    def get_navi_link(cls, last_soup, next_):
3357
        """Get link to next or previous comic."""
3358
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3359
3360
    @classmethod
3361
    def get_comic_info(cls, soup, link):
3362
        """Get information about a particular comics."""
3363
        title = soup.find('title').string
3364
        imgs = soup.find('div', itemprop='description articleBody').find_all('img')
3365
        author = soup.find('span', itemprop='author').string
3366
        return {
3367
            'img': [i['src'] for i in imgs],
3368
            'author': author,
3369
            'title': title,
3370
        }
3371
3372
3373
class GeekAndPoke(GenericNavigableComic):
3374
    """Class to retrieve Geek And Poke comics."""
3375
    name = 'geek'
3376
    long_name = 'Geek And Poke'
3377
    url = 'http://geek-and-poke.com'
3378
    get_url_from_link = join_cls_url_to_href
3379
    get_first_comic_link = simulate_first_link
3380
    first_url = 'http://geek-and-poke.com/geekandpoke/2006/8/27/a-new-place-for-a-not-so-old-blog.html'
3381
3382
    @classmethod
3383
    def get_navi_link(cls, last_soup, next_):
3384
        """Get link to next or previous comic."""
3385
        return last_soup.find('a', class_='prev-item' if next_ else 'next-item')
3386
3387
    @classmethod
3388
    def get_comic_info(cls, soup, link):
3389
        """Get information about a particular comics."""
3390
        title = soup.find('meta', property='og:title')['content']
3391
        desc = soup.find('meta', property='og:description')['content']
3392
        date_str = soup.find('time', class_='published')['datetime']
3393
        day = string_to_date(date_str, "%Y-%m-%d")
3394
        author = soup.find('a', rel='author').string
3395
        div_content = (soup.find('div', class_="body entry-content") or
3396
                       soup.find('div', class_="special-content"))
3397
        imgs = div_content.find_all('img')
3398
        imgs = [i for i in imgs if i.get('src') is not None]
3399
        assert all('title' not in i or i['alt'] == i['title'] for i in imgs)
3400
        alt = imgs[0].get('alt', "") if imgs else []
3401
        return {
3402
            'title': title,
3403
            'alt': alt,
3404
            'description': desc,
3405
            'author': author,
3406
            'day': day.day,
3407
            'month': day.month,
3408
            'year': day.year,
3409
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3410
        }
3411
3412
3413
class GloryOwlComix(GenericNavigableComic):
3414
    """Class to retrieve Glory Owl comics."""
3415
    name = 'gloryowl'
3416
    long_name = 'Glory Owl'
3417
    url = 'http://gloryowlcomix.blogspot.fr'
3418
    _categories = ('NSFW', 'FRANCAIS')
3419 View Code Duplication
    get_first_comic_link = simulate_first_link
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3420
    first_url = 'http://gloryowlcomix.blogspot.fr/2013/02/1_7.html'
3421
3422
    @classmethod
3423
    def get_navi_link(cls, last_soup, next_):
3424
        """Get link to next or previous comic."""
3425
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3426
3427
    @classmethod
3428
    def get_comic_info(cls, soup, link):
3429
        """Get information about a particular comics."""
3430
        title = soup.find('title').string
3431
        imgs = soup.find_all('link', rel='image_src')
3432
        author = soup.find('a', rel='author').string
3433
        return {
3434
            'img': [i['href'] for i in imgs],
3435
            'author': author,
3436
            'title': title,
3437
        }
3438
3439
3440
class GenericTumblrV1(GenericComic):
3441
    """Generic class to retrieve comics from Tumblr using the V1 API."""
3442
    _categories = ('TUMBLR', )
3443
3444
    @classmethod
3445
    def get_next_comic(cls, last_comic):
3446
        """Generic implementation of get_next_comic for Tumblr comics."""
3447
        for p in cls.get_posts(last_comic):
3448
            comic = cls.get_comic_info(p)
3449
            if comic is not None:
3450
                yield comic
3451
3452
    @classmethod
3453
    def get_url_from_post(cls, post):
3454
        url = post['url']
3455
        if not url.startswith(cls.url):
3456
            print("url '%s' does not start with '%s'" % (url, cls.url))
3457
        return url
3458
3459
    @classmethod
3460
    def get_api_url(cls):
3461
        return urljoin_wrapper(cls.url, '/api/read/')
3462
3463
    @classmethod
3464
    def get_api_url_for_id(cls, tumblr_id):
3465
        return cls.get_api_url() + '?id=%d' % (tumblr_id)
3466
3467
    @classmethod
3468
    def get_comic_info(cls, post):
3469
        """Get information about a particular comics."""
3470
        type_ = post['type']
3471
        if type_ != 'photo':
3472
            return None
3473
        tumblr_id = int(post['id'])
3474
        api_url = cls.get_api_url_for_id(tumblr_id)
3475
        day = datetime.datetime.fromtimestamp(int(post['unix-timestamp'])).date()
3476
        caption = post.find('photo-caption')
3477
        title = caption.string if caption else ""
3478
        tags = ' '.join(t.string for t in post.find_all('tag'))
3479
        # Photos may appear in 'photo' tags and/or straight in the post
3480
        photo_tags = post.find_all('photo')
3481
        if not photo_tags:
3482
            photo_tags = [post]
3483
        # Images are in multiple resolutions - taking the first one
3484
        imgs = [photo.find('photo-url') for photo in photo_tags]
3485
        return {
3486
            'url': cls.get_url_from_post(post),
3487
            'url2': post['url-with-slug'],
3488
            'day': day.day,
3489
            'month': day.month,
3490
            'year': day.year,
3491
            'title': title,
3492
            'tags': tags,
3493
            'img': [i.string for i in imgs],
3494
            'tumblr-id': tumblr_id,
3495
            'api_url': api_url,
3496
        }
3497
3498
    @classmethod
3499
    def get_posts(cls, last_comic, nb_post_per_call=10):
3500
        """Get posts using API. nb_post_per_call is max 50.
3501
3502
        Posts are retrieved from newer to older as per the tumblr v1 api
3503
        but are returned in chronological order."""
3504
        waiting_for_id = last_comic['tumblr-id'] if last_comic else None
3505
        posts_acc = []
3506
        if last_comic is not None:
3507
            # Sometimes, tumblr posts are deleted. When previous post is deleted, we
3508
            # might end up spending a lot of time looking for something that
3509
            # doesn't exist. Failing early and clearly might be a better option.
3510
            last_api_url = cls.get_api_url_for_id(waiting_for_id)
3511
            try:
3512
                get_soup_at_url(last_api_url)
3513
            except urllib.error.HTTPError:
3514
                try:
3515
                    get_soup_at_url(cls.url)
3516
                except urllib.error.HTTPError:
3517
                    print("Did not find previous post nor main url %s" % cls.url)
3518
                else:
3519
                    print("Did not find previous post %s : it might have been deleted" % last_api_url)
3520
                return reversed(posts_acc)
3521
        api_url = cls.get_api_url()
3522
        posts = get_soup_at_url(api_url).find('posts')
3523
        start, total = int(posts['start']), int(posts['total'])
3524
        assert start == 0
3525
        for starting_num in range(0, total, nb_post_per_call):
3526
            api_url2 = api_url + '?start=%d&num=%d' % (starting_num, nb_post_per_call)
3527
            posts2 = get_soup_at_url(api_url2).find('posts')
3528
            start2, total2 = int(posts2['start']), int(posts2['total'])
3529
            assert starting_num == start2, "%d != %d" % (starting_num, start2)
3530
            # This may happen and should be handled in the future
3531
            assert total == total2, "%d != %d" % (total, total2)
3532
            for p in posts2.find_all('post'):
3533
                tumblr_id = int(p['id'])
3534
                if waiting_for_id and waiting_for_id == tumblr_id:
3535
                    return reversed(posts_acc)
3536
                posts_acc.append(p)
3537
        if waiting_for_id is None:
3538
            return reversed(posts_acc)
3539
        print("Did not find %s : there might be a problem" % waiting_for_id)
3540
        return []
3541
3542
3543
class SaturdayMorningBreakfastCerealTumblr(GenericTumblrV1):
3544
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
3545
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
3546
    # Also on http://www.smbc-comics.com
3547
    name = 'smbc-tumblr'
3548
    long_name = 'Saturday Morning Breakfast Cereal (from Tumblr)'
3549
    url = 'http://smbc-comics.tumblr.com'
3550
    _categories = ('SMBC', )
3551
3552
3553
class IrwinCardozo(GenericTumblrV1):
3554
    """Class to retrieve Irwin Cardozo Comics."""
3555
    name = 'irwinc'
3556
    long_name = 'Irwin Cardozo'
3557
    url = 'http://irwincardozocomics.tumblr.com'
3558
3559
3560
class AccordingToDevin(GenericTumblrV1):
3561
    """Class to retrieve According To Devin comics."""
3562
    name = 'devin'
3563
    long_name = 'According To Devin'
3564
    url = 'http://accordingtodevin.tumblr.com'
3565
3566
3567
class ItsTheTieTumblr(GenericTumblrV1):
3568
    """Class to retrieve It's the tie comics."""
3569
    # Also on http://itsthetie.com
3570
    # Also on https://tapastic.com/series/itsthetie
3571
    name = 'tie-tumblr'
3572
    long_name = "It's the tie (from Tumblr)"
3573
    url = "http://itsthetie.tumblr.com"
3574
    _categories = ('TIE', )
3575
3576
3577
class OctopunsTumblr(GenericTumblrV1):
3578
    """Class to retrieve Octopuns comics."""
3579
    # Also on http://www.octopuns.net
3580
    name = 'octopuns-tumblr'
3581
    long_name = 'Octopuns (from Tumblr)'
3582
    url = 'http://octopuns.tumblr.com'
3583
3584
3585
class PicturesInBoxesTumblr(GenericTumblrV1):
3586
    """Class to retrieve Pictures In Boxes comics."""
3587
    # Also on http://www.picturesinboxes.com
3588
    name = 'picturesinboxes-tumblr'
3589
    long_name = 'Pictures in Boxes (from Tumblr)'
3590
    url = 'https://picturesinboxescomic.tumblr.com'
3591
3592
3593
class TubeyToonsTumblr(GenericTumblrV1):
3594
    """Class to retrieve TubeyToons comics."""
3595
    # Also on http://tapastic.com/series/Tubey-Toons
3596
    # Also on http://tubeytoons.com
3597
    name = 'tubeytoons-tumblr'
3598
    long_name = 'Tubey Toons (from Tumblr)'
3599
    url = 'https://tubeytoons.tumblr.com'
3600
    _categories = ('TUNEYTOONS', )
3601
3602
3603
class UnearthedComicsTumblr(GenericTumblrV1):
3604
    """Class to retrieve Unearthed comics."""
3605
    # Also on http://tapastic.com/series/UnearthedComics
3606
    # Also on http://unearthedcomics.com
3607
    name = 'unearthed-tumblr'
3608
    long_name = 'Unearthed Comics (from Tumblr)'
3609
    url = 'https://unearthedcomics.tumblr.com'
3610
    _categories = ('UNEARTHED', )
3611
3612
3613
class PieComic(GenericTumblrV1):
3614
    """Class to retrieve Pie Comic comics."""
3615
    name = 'pie'
3616
    long_name = 'Pie Comic'
3617
    url = "http://piecomic.tumblr.com"
3618
3619
3620
class MrEthanDiamond(GenericTumblrV1):
3621
    """Class to retrieve Mr Ethan Diamond comics."""
3622
    name = 'diamond'
3623
    long_name = 'Mr Ethan Diamond'
3624
    url = 'http://mrethandiamond.tumblr.com'
3625
3626
3627
class Flocci(GenericTumblrV1):
3628
    """Class to retrieve floccinaucinihilipilification comics."""
3629
    name = 'flocci'
3630
    long_name = 'floccinaucinihilipilification'
3631
    url = "http://floccinaucinihilipilificationa.tumblr.com"
3632
3633
3634
class UpAndOut(GenericTumblrV1):
3635
    """Class to retrieve Up & Out comics."""
3636
    # Also on http://tapastic.com/series/UP-and-OUT
3637
    name = 'upandout'
3638
    long_name = 'Up And Out (from Tumblr)'
3639
    url = 'http://upandoutcomic.tumblr.com'
3640
3641
3642
class Pundemonium(GenericTumblrV1):
3643
    """Class to retrieve Pundemonium comics."""
3644
    name = 'pundemonium'
3645
    long_name = 'Pundemonium'
3646
    url = 'http://monstika.tumblr.com'
3647
3648
3649
class PoorlyDrawnLinesTumblr(GenericTumblrV1):
3650
    """Class to retrieve Poorly Drawn Lines comics."""
3651
    # Also on http://poorlydrawnlines.com
3652
    name = 'poorlydrawn-tumblr'
3653
    long_name = 'Poorly Drawn Lines (from Tumblr)'
3654
    url = 'http://pdlcomics.tumblr.com'
3655
    _categories = ('POORLYDRAWN', )
3656
3657
3658
class PearShapedComics(GenericTumblrV1):
3659
    """Class to retrieve Pear Shaped Comics."""
3660
    name = 'pearshaped'
3661
    long_name = 'Pear-Shaped Comics'
3662
    url = 'http://pearshapedcomics.com'
3663
3664
3665
class PondScumComics(GenericTumblrV1):
3666
    """Class to retrieve Pond Scum Comics."""
3667
    name = 'pond'
3668
    long_name = 'Pond Scum'
3669
    url = 'http://pondscumcomic.tumblr.com'
3670
3671
3672
class MercworksTumblr(GenericTumblrV1):
3673
    """Class to retrieve Mercworks comics."""
3674
    # Also on http://mercworks.net
3675
    name = 'mercworks-tumblr'
3676
    long_name = 'Mercworks (from Tumblr)'
3677
    url = 'http://mercworks.tumblr.com'
3678
3679
3680
class OwlTurdTumblr(GenericTumblrV1):
3681
    """Class to retrieve Owl Turd comics."""
3682
    # Also on http://tapastic.com/series/Owl-Turd-Comix
3683
    name = 'owlturd-tumblr'
3684
    long_name = 'Owl Turd (from Tumblr)'
3685
    url = 'http://owlturdcomix.tumblr.com'
3686
    _categories = ('OWLTURD', )
3687
3688
3689
class VectorBelly(GenericTumblrV1):
3690
    """Class to retrieve Vector Belly comics."""
3691
    # Also on http://vectorbelly.com
3692
    name = 'vector'
3693
    long_name = 'Vector Belly'
3694
    url = 'http://vectorbelly.tumblr.com'
3695
3696
3697
class GoneIntoRapture(GenericTumblrV1):
3698
    """Class to retrieve Gone Into Rapture comics."""
3699
    # Also on http://goneintorapture.tumblr.com
3700
    # Also on http://tapastic.com/series/Goneintorapture
3701
    name = 'rapture'
3702
    long_name = 'Gone Into Rapture'
3703
    url = 'http://goneintorapture.com'
3704
3705
3706
class TheOatmealTumblr(GenericTumblrV1):
3707
    """Class to retrieve The Oatmeal comics."""
3708
    # Also on http://theoatmeal.com
3709
    name = 'oatmeal-tumblr'
3710
    long_name = 'The Oatmeal (from Tumblr)'
3711
    url = 'http://oatmeal.tumblr.com'
3712
3713
3714
class HeckIfIKnowComicsTumblr(GenericTumblrV1):
3715
    """Class to retrieve Heck If I Know Comics."""
3716
    # Also on http://tapastic.com/series/Regular
3717
    name = 'heck-tumblr'
3718
    long_name = 'Heck if I Know comics (from Tumblr)'
3719
    url = 'http://heckifiknowcomics.com'
3720
3721
3722
class MyJetPack(GenericTumblrV1):
3723
    """Class to retrieve My Jet Pack comics."""
3724
    name = 'jetpack'
3725
    long_name = 'My Jet Pack'
3726
    url = 'http://myjetpack.tumblr.com'
3727
3728
3729
class CheerUpEmoKidTumblr(GenericTumblrV1):
3730
    """Class to retrieve CheerUpEmoKid comics."""
3731
    # Also on http://www.cheerupemokid.com
3732
    # Also on http://tapastic.com/series/CUEK
3733
    name = 'cuek-tumblr'
3734
    long_name = 'Cheer Up Emo Kid (from Tumblr)'
3735
    url = 'https://enzocomics.tumblr.com'
3736
3737
3738
class ForLackOfABetterComic(GenericTumblrV1):
3739
    """Class to retrieve For Lack Of A Better Comics."""
3740
    # Also on http://forlackofabettercomic.com
3741
    name = 'lack'
3742
    long_name = 'For Lack Of A Better Comic'
3743
    url = 'http://forlackofabettercomic.tumblr.com'
3744
3745
3746
class ZenPencilsTumblr(GenericTumblrV1):
3747
    """Class to retrieve ZenPencils comics."""
3748
    # Also on http://zenpencils.com
3749
    # Also on http://www.gocomics.com/zen-pencils
3750
    name = 'zenpencils-tumblr'
3751
    long_name = 'Zen Pencils (from Tumblr)'
3752
    url = 'http://zenpencils.tumblr.com'
3753
    _categories = ('ZENPENCILS', )
3754
3755
3756
class ThreeWordPhraseTumblr(GenericTumblrV1):
3757
    """Class to retrieve Three Word Phrase comics."""
3758
    # Also on http://threewordphrase.com
3759
    name = 'threeword-tumblr'
3760
    long_name = 'Three Word Phrase (from Tumblr)'
3761
    url = 'http://threewordphrase.tumblr.com'
3762
3763
3764
class TimeTrabbleTumblr(GenericTumblrV1):
3765
    """Class to retrieve Time Trabble comics."""
3766
    # Also on http://timetrabble.com
3767
    name = 'timetrabble-tumblr'
3768
    long_name = 'Time Trabble (from Tumblr)'
3769
    url = 'http://timetrabble.tumblr.com'
3770
3771
3772
class SafelyEndangeredTumblr(GenericTumblrV1):
3773
    """Class to retrieve Safely Endangered comics."""
3774
    # Also on http://www.safelyendangered.com
3775
    name = 'endangered-tumblr'
3776
    long_name = 'Safely Endangered (from Tumblr)'
3777
    url = 'http://tumblr.safelyendangered.com'
3778
3779
3780
class MouseBearComedyTumblr(GenericTumblrV1):
3781
    """Class to retrieve Mouse Bear Comedy comics."""
3782
    # Also on http://www.mousebearcomedy.com
3783
    name = 'mousebear-tumblr'
3784
    long_name = 'Mouse Bear Comedy (from Tumblr)'
3785
    url = 'http://mousebearcomedy.tumblr.com'
3786
3787
3788
class BouletCorpTumblr(GenericTumblrV1):
3789
    """Class to retrieve BouletCorp comics."""
3790
    # Also on http://www.bouletcorp.com
3791
    name = 'boulet-tumblr'
3792
    long_name = 'Boulet Corp (from Tumblr)'
3793
    url = 'https://bouletcorp.tumblr.com'
3794
    _categories = ('BOULET', )
3795
3796
3797
class TheAwkwardYetiTumblr(GenericTumblrV1):
3798
    """Class to retrieve The Awkward Yeti comics."""
3799
    # Also on http://www.gocomics.com/the-awkward-yeti
3800
    # Also on http://theawkwardyeti.com
3801
    # Also on https://tapastic.com/series/TheAwkwardYeti
3802
    name = 'yeti-tumblr'
3803
    long_name = 'The Awkward Yeti (from Tumblr)'
3804
    url = 'http://larstheyeti.tumblr.com'
3805
    _categories = ('YETI', )
3806
3807
3808
class NellucNhoj(GenericTumblrV1):
3809
    """Class to retrieve NellucNhoj comics."""
3810
    name = 'nhoj'
3811
    long_name = 'Nelluc Nhoj'
3812
    url = 'http://nellucnhoj.com'
3813
3814
3815
class DownTheUpwardSpiralTumblr(GenericTumblrV1):
3816
    """Class to retrieve Down The Upward Spiral comics."""
3817
    # Also on http://www.downtheupwardspiral.com
3818
    # Also on https://tapastic.com/series/Down-the-Upward-Spiral
3819
    name = 'spiral-tumblr'
3820
    long_name = 'Down the Upward Spiral (from Tumblr)'
3821
    url = 'http://downtheupwardspiral.tumblr.com'
3822
3823
3824
class AsPerUsualTumblr(GenericTumblrV1):
3825
    """Class to retrieve As Per Usual comics."""
3826
    # Also on https://tapastic.com/series/AsPerUsual
3827
    name = 'usual-tumblr'
3828
    long_name = 'As Per Usual (from Tumblr)'
3829
    url = 'http://as-per-usual.tumblr.com'
3830
    categories = ('DAMILEE', )
3831
3832
3833
class HotComicsForCoolPeopleTumblr(GenericTumblrV1):
3834
    """Class to retrieve Hot Comics For Cool People."""
3835
    # Also on https://tapastic.com/series/Hot-Comics-For-Cool-People
3836
    # Also on http://hotcomics.biz (links to tumblr)
3837
    # Also on http://hcfcp.com (links to tumblr)
3838
    name = 'hotcomics-tumblr'
3839
    long_name = 'Hot Comics For Cool People (from Tumblr)'
3840
    url = 'http://hotcomicsforcoolpeople.tumblr.com'
3841
    categories = ('DAMILEE', )
3842
3843
3844
class OneOneOneOneComicTumblr(GenericTumblrV1):
3845
    """Class to retrieve 1111 Comics."""
3846
    # Also on http://www.1111comics.me
3847
    # Also on https://tapastic.com/series/1111-Comics
3848
    name = '1111-tumblr'
3849
    long_name = '1111 Comics (from Tumblr)'
3850
    url = 'http://comics1111.tumblr.com'
3851
    _categories = ('ONEONEONEONE', )
3852
3853
3854
class JhallComicsTumblr(GenericTumblrV1):
3855
    """Class to retrieve Jhall Comics."""
3856
    # Also on http://jhallcomics.com
3857
    name = 'jhall-tumblr'
3858
    long_name = 'Jhall Comics (from Tumblr)'
3859
    url = 'http://jhallcomics.tumblr.com'
3860
3861
3862
class BerkeleyMewsTumblr(GenericTumblrV1):
3863
    """Class to retrieve Berkeley Mews comics."""
3864
    # Also on http://www.gocomics.com/berkeley-mews
3865
    # Also on http://www.berkeleymews.com
3866
    name = 'berkeley-tumblr'
3867
    long_name = 'Berkeley Mews (from Tumblr)'
3868
    url = 'http://mews.tumblr.com'
3869
    _categories = ('BERKELEY', )
3870
3871
3872
class JoanCornellaTumblr(GenericTumblrV1):
3873
    """Class to retrieve Joan Cornella comics."""
3874
    # Also on http://joancornella.net
3875
    name = 'cornella-tumblr'
3876
    long_name = 'Joan Cornella (from Tumblr)'
3877
    url = 'http://cornellajoan.tumblr.com'
3878
3879
3880
class RespawnComicTumblr(GenericTumblrV1):
3881
    """Class to retrieve Respawn Comic."""
3882
    # Also on http://respawncomic.com
3883
    name = 'respawn-tumblr'
3884
    long_name = 'Respawn Comic (from Tumblr)'
3885
    url = 'https://respawncomic.tumblr.com'
3886
3887
3888
class ChrisHallbeckTumblr(GenericTumblrV1):
3889
    """Class to retrieve Chris Hallbeck comics."""
3890
    # Also on https://tapastic.com/ChrisHallbeck
3891
    # Also on http://maximumble.com
3892
    # Also on http://minimumble.com
3893
    # Also on http://thebookofbiff.com
3894
    name = 'hallbeck-tumblr'
3895
    long_name = 'Chris Hallback (from Tumblr)'
3896
    url = 'https://chrishallbeck.tumblr.com'
3897
    _categories = ('HALLBACK', )
3898
3899
3900
class ComicNuggets(GenericTumblrV1):
3901
    """Class to retrieve Comic Nuggets."""
3902
    name = 'nuggets'
3903
    long_name = 'Comic Nuggets'
3904
    url = 'http://comicnuggets.com'
3905
3906
3907
class PigeonGazetteTumblr(GenericTumblrV1):
3908
    """Class to retrieve The Pigeon Gazette comics."""
3909
    # Also on https://tapastic.com/series/The-Pigeon-Gazette
3910
    name = 'pigeon-tumblr'
3911
    long_name = 'The Pigeon Gazette (from Tumblr)'
3912
    url = 'http://thepigeongazette.tumblr.com'
3913
3914
3915
class CancerOwl(GenericTumblrV1):
3916
    """Class to retrieve Cancer Owl comics."""
3917
    # Also on http://cancerowl.com
3918
    name = 'cancerowl-tumblr'
3919
    long_name = 'Cancer Owl (from Tumblr)'
3920
    url = 'http://cancerowl.tumblr.com'
3921
3922
3923
class FowlLanguageTumblr(GenericTumblrV1):
3924
    """Class to retrieve Fowl Language comics."""
3925
    # Also on http://www.fowllanguagecomics.com
3926
    # Also on http://tapastic.com/series/Fowl-Language-Comics
3927
    # Also on http://www.gocomics.com/fowl-language
3928
    name = 'fowllanguage-tumblr'
3929
    long_name = 'Fowl Language Comics (from Tumblr)'
3930
    url = 'http://fowllanguagecomics.tumblr.com'
3931
    _categories = ('FOWLLANGUAGE', )
3932
3933
3934
class TheOdd1sOutTumblr(GenericTumblrV1):
3935
    """Class to retrieve The Odd 1s Out comics."""
3936
    # Also on http://theodd1sout.com
3937
    # Also on https://tapastic.com/series/Theodd1sout
3938
    name = 'theodd-tumblr'
3939
    long_name = 'The Odd 1s Out (from Tumblr)'
3940
    url = 'http://theodd1sout.tumblr.com'
3941
3942
3943
class TheUnderfoldTumblr(GenericTumblrV1):
3944
    """Class to retrieve The Underfold comics."""
3945
    # Also on http://theunderfold.com
3946
    name = 'underfold-tumblr'
3947
    long_name = 'The Underfold (from Tumblr)'
3948
    url = 'http://theunderfold.tumblr.com'
3949
3950
3951
class LolNeinTumblr(GenericTumblrV1):
3952
    """Class to retrieve Lol Nein comics."""
3953
    # Also on http://lolnein.com
3954
    name = 'lolnein-tumblr'
3955
    long_name = 'Lol Nein (from Tumblr)'
3956
    url = 'http://lolneincom.tumblr.com'
3957
3958
3959
class FatAwesomeComicsTumblr(GenericTumblrV1):
3960
    """Class to retrieve Fat Awesome Comics."""
3961
    # Also on http://fatawesome.com/comics
3962
    name = 'fatawesome-tumblr'
3963
    long_name = 'Fat Awesome (from Tumblr)'
3964
    url = 'http://fatawesomecomedy.tumblr.com'
3965
3966
3967
class TheWorldIsFlatTumblr(GenericTumblrV1):
3968
    """Class to retrieve The World Is Flat Comics."""
3969
    # Also on https://tapastic.com/series/The-World-is-Flat
3970
    name = 'flatworld-tumblr'
3971
    long_name = 'The World Is Flat (from Tumblr)'
3972
    url = 'http://theworldisflatcomics.com'
3973
3974
3975
class DorrisMc(GenericTumblrV1):
3976
    """Class to retrieve Dorris Mc Comics"""
3977
    # Also on http://www.gocomics.com/dorris-mccomics
3978
    name = 'dorrismc'
3979
    long_name = 'Dorris Mc'
3980
    url = 'http://dorrismccomics.com'
3981
3982
3983
class LeleozTumblr(GenericEmptyComic, GenericTumblrV1):
3984
    """Class to retrieve Leleoz comics."""
3985
    # Also on https://tapastic.com/series/Leleoz
3986
    name = 'leleoz-tumblr'
3987
    long_name = 'Leleoz (from Tumblr)'
3988
    url = 'http://leleozcomics.tumblr.com'
3989
3990
3991
class MoonBeardTumblr(GenericTumblrV1):
3992
    """Class to retrieve MoonBeard comics."""
3993
    # Also on http://moonbeard.com
3994
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
3995
    name = 'moonbeard-tumblr'
3996
    long_name = 'Moon Beard (from Tumblr)'
3997
    url = 'http://blog.squiresjam.es'
3998
3999
4000
class AComik(GenericTumblrV1):
4001
    """Class to retrieve A Comik"""
4002
    name = 'comik'
4003
    long_name = 'A Comik'
4004
    url = 'http://acomik.com'
4005
4006
4007
class ClassicRandy(GenericTumblrV1):
4008
    """Class to retrieve Classic Randy comics."""
4009
    name = 'randy'
4010
    long_name = 'Classic Randy'
4011
    url = 'http://classicrandy.tumblr.com'
4012
4013
4014
class DagssonTumblr(GenericTumblrV1):
4015
    """Class to retrieve Dagsson comics."""
4016
    # Also on http://www.dagsson.com
4017
    name = 'dagsson-tumblr'
4018
    long_name = 'Dagsson Hugleikur (from Tumblr)'
4019
    url = 'https://hugleikurdagsson.tumblr.com'
4020
4021
4022
class LinsEditionsTumblr(GenericTumblrV1):
4023
    """Class to retrieve L.I.N.S. Editions comics."""
4024
    # Also on https://linsedition.com
4025
    # Now on http://warandpeas.tumblr.com
4026
    name = 'lins-tumblr'
4027
    long_name = 'L.I.N.S. Editions (from Tumblr)'
4028
    url = 'https://linscomics.tumblr.com'
4029
    _categories = ('LINS', )
4030
4031
4032
class WarAndPeasTumblr(GenericTumblrV1):
4033
    """Class to retrieve War And Peas comics."""
4034
    # Was on https://linscomics.tumblr.com
4035
    name = 'warandpeas-tumblr'
4036
    long_name = 'War And Peas (from Tumblr)'
4037
    url = 'http://warandpeas.tumblr.com'
4038
    _categories = ('WARANDPEAS', )
4039
4040
4041
class OrigamiHotDish(GenericTumblrV1):
4042
    """Class to retrieve Origami Hot Dish comics."""
4043
    name = 'origamihotdish'
4044
    long_name = 'Origami Hot Dish'
4045
    url = 'http://origamihotdish.com'
4046
4047
4048
class HitAndMissComicsTumblr(GenericTumblrV1):
4049
    """Class to retrieve Hit and Miss Comics."""
4050
    name = 'hitandmiss'
4051
    long_name = 'Hit and Miss Comics'
4052
    url = 'https://hitandmisscomics.tumblr.com'
4053
4054
4055
class HMBlanc(GenericTumblrV1):
4056
    """Class to retrieve HM Blanc comics."""
4057
    name = 'hmblanc'
4058
    long_name = 'HM Blanc'
4059
    url = 'http://hmblanc.tumblr.com'
4060
4061
4062
class TalesOfAbsurdityTumblr(GenericTumblrV1):
4063
    """Class to retrieve Tales Of Absurdity comics."""
4064
    # Also on http://talesofabsurdity.com
4065
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
4066
    name = 'absurdity-tumblr'
4067
    long_name = 'Tales of Absurdity (from Tumblr)'
4068
    url = 'http://talesofabsurdity.tumblr.com'
4069
    _categories = ('ABSURDITY', )
4070
4071
4072
class RobbieAndBobby(GenericTumblrV1):
4073
    """Class to retrieve Robbie And Bobby comics."""
4074
    # Also on http://robbieandbobby.com
4075
    name = 'robbie-tumblr'
4076
    long_name = 'Robbie And Bobby (from Tumblr)'
4077
    url = 'http://robbieandbobby.tumblr.com'
4078
4079
4080
class ElectricBunnyComicTumblr(GenericTumblrV1):
4081
    """Class to retrieve Electric Bunny Comics."""
4082
    # Also on http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell
4083
    name = 'bunny-tumblr'
4084
    long_name = 'Electric Bunny Comic (from Tumblr)'
4085
    url = 'http://electricbunnycomics.tumblr.com'
4086
4087
4088
class Hoomph(GenericTumblrV1):
4089
    """Class to retrieve Hoomph comics."""
4090
    name = 'hoomph'
4091
    long_name = 'Hoomph'
4092
    url = 'http://hoom.ph'
4093
4094
4095
class BFGFSTumblr(GenericTumblrV1):
4096
    """Class to retrieve BFGFS comics."""
4097
    # Also on https://tapastic.com/series/BFGFS
4098
    # Also on http://bfgfs.com
4099
    name = 'bfgfs-tumblr'
4100
    long_name = 'BFGFS (from Tumblr)'
4101
    url = 'https://bfgfs.tumblr.com'
4102
4103
4104
class DoodleForFood(GenericTumblrV1):
4105
    """Class to retrieve Doodle For Food comics."""
4106
    # Also on https://tapastic.com/series/Doodle-for-Food
4107
    name = 'doodle'
4108
    long_name = 'Doodle For Food'
4109
    url = 'http://www.doodleforfood.com'
4110
4111
4112
class CassandraCalinTumblr(GenericTumblrV1):
4113
    """Class to retrieve C. Cassandra comics."""
4114
    # Also on http://cassandracalin.com
4115
    # Also on https://tapastic.com/series/C-Cassandra-comics
4116
    name = 'cassandra-tumblr'
4117
    long_name = 'Cassandra Calin (from Tumblr)'
4118
    url = 'http://c-cassandra.tumblr.com'
4119
4120
4121
class DougWasTaken(GenericTumblrV1):
4122
    """Class to retrieve Doug Was Taken comics."""
4123
    name = 'doug'
4124
    long_name = 'Doug Was Taken'
4125
    url = 'https://dougwastaken.tumblr.com'
4126
4127
4128
class MandatoryRollerCoaster(GenericTumblrV1):
4129
    """Class to retrieve Mandatory Roller Coaster comics."""
4130
    name = 'rollercoaster'
4131
    long_name = 'Mandatory Roller Coaster'
4132
    url = 'http://mandatoryrollercoaster.com'
4133
4134
4135
class CEstPasEnRegardantSesPompes(GenericTumblrV1):
4136
    """Class to retrieve C'Est Pas En Regardant Ses Pompes (...)  comics."""
4137
    name = 'cperspqccltt'
4138
    long_name = 'C Est Pas En Regardant Ses Pompes (...)'
4139
    url = 'http://cperspqccltt.tumblr.com'
4140
4141
4142
class TheGrohlTroll(GenericTumblrV1):
4143
    """Class to retrieve The Grohl Troll comics."""
4144
    name = 'grohltroll'
4145
    long_name = 'The Grohl Troll'
4146
    url = 'http://thegrohltroll.com'
4147
4148
4149
class WebcomicName(GenericTumblrV1):
4150
    """Class to retrieve Webcomic Name comics."""
4151
    name = 'webcomicname'
4152
    long_name = 'Webcomic Name'
4153
    url = 'http://webcomicname.com'
4154
4155
4156
class BooksOfAdam(GenericTumblrV1):
4157
    """Class to retrieve Books of Adam comics."""
4158
    # Also on http://www.booksofadam.com
4159
    name = 'booksofadam'
4160
    long_name = 'Books of Adam'
4161
    url = 'http://booksofadam.tumblr.com'
4162
4163
4164
class HarkAVagrant(GenericTumblrV1):
4165
    """Class to retrieve Hark A Vagrant comics."""
4166
    # Also on http://www.harkavagrant.com
4167
    name = 'hark-tumblr'
4168
    long_name = 'Hark A Vagrant (from Tumblr)'
4169
    url = 'http://beatonna.tumblr.com'
4170
4171
4172
class OurSuperAdventureTumblr(GenericTumblrV1):
4173
    """Class to retrieve Our Super Adventure comics."""
4174
    # Also on https://tapastic.com/series/Our-Super-Adventure
4175
    # Also on http://www.oursuperadventure.com
4176
    # http://sarahgraley.com
4177
    name = 'superadventure-tumblr'
4178
    long_name = 'Our Super Adventure (from Tumblr)'
4179
    url = 'http://sarahssketchbook.tumblr.com'
4180
4181
4182
class JakeLikesOnions(GenericTumblrV1):
4183
    """Class to retrieve Jake Likes Onions comics."""
4184
    name = 'jake'
4185
    long_name = 'Jake Likes Onions'
4186
    url = 'http://jakelikesonions.com'
4187
4188
4189
class InYourFaceCake(GenericTumblrV1):
4190
    """Class to retrieve In Your Face Cake comics."""
4191
    name = 'inyourfacecake-tumblr'
4192
    long_name = 'In Your Face Cake (from Tumblr)'
4193
    url = 'https://in-your-face-cake.tumblr.com'
4194
4195
4196
class Robospunk(GenericTumblrV1):
4197
    """Class to retrieve Robospunk comics."""
4198
    name = 'robospunk'
4199
    long_name = 'Robospunk'
4200
    url = 'http://robospunk.com'
4201
4202
4203
class BananaTwinky(GenericTumblrV1):
4204
    """Class to retrieve Banana Twinky comics."""
4205
    name = 'banana'
4206
    long_name = 'Banana Twinky'
4207
    url = 'https://bananatwinky.tumblr.com'
4208
4209
4210
class YesterdaysPopcornTumblr(GenericTumblrV1):
4211
    """Class to retrieve Yesterday's Popcorn comics."""
4212
    # Also on http://www.yesterdayspopcorn.com
4213
    # Also on https://tapastic.com/series/Yesterdays-Popcorn
4214
    name = 'popcorn-tumblr'
4215
    long_name = 'Yesterday\'s Popcorn (from Tumblr)'
4216
    url = 'http://yesterdayspopcorn.tumblr.com'
4217
4218
4219
class TwistedDoodles(GenericTumblrV1):
4220
    """Class to retrieve Twisted Doodles comics."""
4221
    name = 'twisted'
4222
    long_name = 'Twisted Doodles'
4223
    url = 'http://www.twisteddoodles.com'
4224
4225
4226
class UbertoolTumblr(GenericTumblrV1):
4227
    """Class to retrieve Ubertool comics."""
4228
    # Also on http://ubertoolcomic.com
4229
    # Also on https://tapastic.com/series/ubertool
4230
    name = 'ubertool-tumblr'
4231
    long_name = 'Ubertool (from Tumblr)'
4232
    url = 'https://ubertool.tumblr.com'
4233
    _categories = ('UBERTOOL', )
4234
4235
4236
class LittleLifeLinesTumblr(GenericTumblrV1):
4237
    """Class to retrieve Little Life Lines comics."""
4238
    # Also on http://www.littlelifelines.com
4239
    name = 'life-tumblr'
4240
    long_name = 'Little Life Lines (from Tumblr)'
4241
    url = 'https://little-life-lines.tumblr.com'
4242
4243
4244
class TheyCanTalk(GenericTumblrV1):
4245
    """Class to retrieve They Can Talk comics."""
4246
    name = 'theycantalk'
4247
    long_name = 'They Can Talk'
4248
    url = 'http://theycantalk.com'
4249
4250
4251
class Will5NeverCome(GenericTumblrV1):
4252
    """Class to retrieve Will 5:00 Never Come comics."""
4253
    name = 'will5'
4254
    long_name = 'Will 5:00 Never Come ?'
4255
    url = 'http://will5nevercome.com'
4256
4257
4258
class Sephko(GenericTumblrV1):
4259
    """Class to retrieve Sephko Comics."""
4260
    # Also on http://www.sephko.com
4261
    name = 'sephko'
4262
    long_name = 'Sephko'
4263
    url = 'https://sephko.tumblr.com'
4264
4265
4266
class BlazersAtDawn(GenericTumblrV1):
4267
    """Class to retrieve Blazers At Dawn Comics."""
4268
    name = 'blazers'
4269
    long_name = 'Blazers At Dawn'
4270
    url = 'http://blazersatdawn.tumblr.com'
4271
4272
4273
class ArtByMoga(GenericEmptyComic, GenericTumblrV1):  # Deactivated because it downloads too many things
4274
    """Class to retrieve Art By Moga Comics."""
4275
    name = 'moga'
4276
    long_name = 'Art By Moga'
4277
    url = 'http://artbymoga.tumblr.com'
4278
4279
4280
class VerbalVomitTumblr(GenericTumblrV1):
4281
    """Class to retrieve Verbal Vomit comics."""
4282
    # Also on http://www.verbal-vomit.com
4283
    name = 'vomit-tumblr'
4284
    long_name = 'Verbal Vomit (from Tumblr)'
4285
    url = 'http://verbalvomits.tumblr.com'
4286
4287
4288
class LibraryComic(GenericTumblrV1):
4289
    """Class to retrieve LibraryComic."""
4290
    # Also on http://librarycomic.com
4291
    name = 'library-tumblr'
4292
    long_name = 'LibraryComic (from Tumblr)'
4293
    url = 'https://librarycomic.tumblr.com'
4294
4295
4296
class TizzyStitchBirdTumblr(GenericTumblrV1):
4297
    """Class to retrieve Tizzy Stitch Bird comics."""
4298
    # Also on http://tizzystitchbird.com
4299
    # Also on https://tapastic.com/series/TizzyStitchbird
4300
    # Also on http://www.webtoons.com/en/challenge/tizzy-stitchbird/list?title_no=50082
4301
    name = 'tizzy-tumblr'
4302
    long_name = 'Tizzy Stitch Bird (from Tumblr)'
4303
    url = 'http://tizzystitchbird.tumblr.com'
4304
4305
4306
class VictimsOfCircumsolarTumblr(GenericTumblrV1):
4307
    """Class to retrieve VictimsOfCircumsolar comics."""
4308
    # Also on http://www.victimsofcircumsolar.com
4309
    name = 'circumsolar-tumblr'
4310
    long_name = 'Victims Of Circumsolar (from Tumblr)'
4311
    url = 'https://victimsofcomics.tumblr.com'
4312 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
4313
4314
class RockPaperCynicTumblr(GenericTumblrV1):
4315
    """Class to retrieve RockPaperCynic comics."""
4316
    # Also on http://www.rockpapercynic.com
4317
    # Also on https://tapastic.com/series/rockpapercynic
4318
    name = 'rpc-tumblr'
4319
    long_name = 'Rock Paper Cynic (from Tumblr)'
4320
    url = 'http://rockpapercynic.tumblr.com'
4321
4322
4323
class DeadlyPanelTumblr(GenericTumblrV1):
4324
    """Class to retrieve Deadly Panel comics."""
4325
    # Also on http://www.deadlypanel.com
4326
    # Also on https://tapastic.com/series/deadlypanel
4327
    name = 'deadly-tumblr'
4328
    long_name = 'Deadly Panel (from Tumblr)'
4329
    url = 'https://deadlypanel.tumblr.com'
4330
4331
4332
class CatanaComics(GenericTumblrV1):
4333
    """Class to retrieve Catana comics."""
4334
    name = 'catana'
4335
    long_name = 'Catana'
4336
    url = 'http://www.catanacomics.com'
4337
4338
4339
class AngryAtNothingTumblr(GenericTumblrV1):
4340
    """Class to retrieve Angry at Nothing comics."""
4341
    # Also on http://www.angryatnothing.net
4342
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
4343
    name = 'angry-tumblr'
4344
    long_name = 'Angry At Nothing (from Tumblr)'
4345
    url = 'http://angryatnothing.tumblr.com'
4346
4347
4348
class ShanghaiTango(GenericTumblrV1):
4349
    """Class to retrieve Shanghai Tango comic."""
4350
    name = 'tango'
4351
    long_name = 'Shanghai Tango'
4352
    url = 'http://tango2010weibo.tumblr.com'
4353
4354
4355
class OffTheLeashDogTumblr(GenericTumblrV1):
4356
    """Class to retrieve Off The Leash Dog comics."""
4357
    # Also on http://offtheleashdogcartoons.com
4358
    # Also on http://www.rupertfawcettcartoons.com
4359
    name = 'offtheleash-tumblr'
4360
    long_name = 'Off The Leash Dog (from Tumblr)'
4361
    url = 'http://rupertfawcettsdoggyblog.tumblr.com'
4362
    _categories = ('FAWCETT', )
4363
4364
4365
class ImogenQuestTumblr(GenericTumblrV1):
4366
    """Class to retrieve Imogen Quest comics."""
4367
    # Also on http://imogenquest.net
4368
    name = 'imogen-tumblr'
4369
    long_name = 'Imogen Quest (from Tumblr)'
4370
    url = 'http://imoquest.tumblr.com'
4371
4372
4373
class Shitfest(GenericTumblrV1):
4374
    """Class to retrieve Shitfest comics."""
4375
    name = 'shitfest'
4376
    long_name = 'Shitfest'
4377
    url = 'http://shitfestcomic.com'
4378
4379
4380
class IceCreamSandwichComics(GenericTumblrV1):
4381
    """Class to retrieve Ice Cream Sandwich Comics."""
4382
    name = 'icecream'
4383
    long_name = 'Ice Cream Sandwich Comics'
4384
    url = 'http://icecreamsandwichcomics.com'
4385
4386
4387
class Dustinteractive(GenericTumblrV1):
4388
    """Class to retrieve Dustinteractive comics."""
4389
    name = 'dustinteractive'
4390
    long_name = 'Dustinteractive'
4391
    url = 'http://dustinteractive.com'
4392
4393
4394
class StickyCinemaFloor(GenericTumblrV1):
4395
    """Class to retrieve Sticky Cinema Floor comics."""
4396
    name = 'stickycinema'
4397
    long_name = 'Sticky Cinema Floor'
4398
    url = 'https://stickycinemafloor.tumblr.com'
4399
4400
4401
class HorovitzComics(GenericListableComic):
4402
    """Generic class to handle the logic common to the different comics from Horovitz."""
4403
    url = 'http://www.horovitzcomics.com'
4404
    _categories = ('HOROVITZ', )
4405
    img_re = re.compile('.*comics/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
4406
    link_re = NotImplemented
4407
    get_url_from_archive_element = join_cls_url_to_href
4408
4409
    @classmethod
4410
    def get_comic_info(cls, soup, link):
4411
        """Get information about a particular comics."""
4412
        href = link['href']
4413
        num = int(cls.link_re.match(href).groups()[0])
4414
        title = link.string
4415
        imgs = soup.find_all('img', id='comic')
4416
        assert len(imgs) == 1
4417
        year, month, day = [int(s)
4418
                            for s in cls.img_re.match(imgs[0]['src']).groups()]
4419
        return {
4420
            'title': title,
4421
            'day': day,
4422
            'month': month,
4423
            'year': year,
4424
            'img': [i['src'] for i in imgs],
4425
            'num': num,
4426
        }
4427
4428
    @classmethod
4429
    def get_archive_elements(cls):
4430
        archive_url = 'http://www.horovitzcomics.com/comics/archive/'
4431
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.link_re))
4432
4433
4434
class HorovitzNew(HorovitzComics):
4435
    """Class to retrieve Horovitz new comics."""
4436
    name = 'horovitznew'
4437
    long_name = 'Horovitz New'
4438
    link_re = re.compile('^/comics/new/([0-9]+)$')
4439
4440
4441
class HorovitzClassic(HorovitzComics):
4442
    """Class to retrieve Horovitz classic comics."""
4443
    name = 'horovitzclassic'
4444
    long_name = 'Horovitz Classic'
4445
    link_re = re.compile('^/comics/classic/([0-9]+)$')
4446
4447
4448
class GenericGoComic(GenericNavigableComic):
4449
    """Generic class to handle the logic common to comics from gocomics.com."""
4450
    _categories = ('GOCOMIC', )
4451
4452
    @classmethod
4453
    def get_first_comic_link(cls):
4454
        """Get link to first comics."""
4455
        return get_soup_at_url(cls.url).find('a', class_='fa btn btn-outline-default btn-circle fa-backward sm ')
4456
4457
    @classmethod
4458
    def get_navi_link(cls, last_soup, next_):
4459
        """Get link to next or previous comic."""
4460
        PREV = 'fa btn btn-outline-default btn-circle fa-caret-left sm '
4461
        NEXT = 'fa btn btn-outline-default btn-circle fa-caret-right hidden-sm-up sm '
4462
        return last_soup.find('a', class_=NEXT if next_ else PREV)
4463
4464
    @classmethod
4465
    def get_url_from_link(cls, link):
4466
        gocomics = 'http://www.gocomics.com'
4467
        return urljoin_wrapper(gocomics, link['href'])
4468
4469
    @classmethod
4470
    def get_comic_info(cls, soup, link):
4471
        """Get information about a particular comics."""
4472
        date_str = soup.find('meta', property='article:published_time')['content']
4473
        day = string_to_date(date_str, "%Y-%m-%d")
4474
        imgs = soup.find('picture', class_='img-fluid item-comic-image').find_all('img')
4475
        author = soup.find('meta', property='article:author')['content']
4476
        tags = soup.find('meta', property='article:tag')['content']
4477
        return {
4478
            'day': day.day,
4479
            'month': day.month,
4480
            'year': day.year,
4481
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
4482
            'author': author,
4483
            'tags': tags,
4484
        }
4485
4486
4487
class PearlsBeforeSwine(GenericGoComic):
4488
    """Class to retrieve Pearls Before Swine comics."""
4489
    name = 'pearls'
4490
    long_name = 'Pearls Before Swine'
4491
    url = 'http://www.gocomics.com/pearlsbeforeswine'
4492
4493
4494
class Peanuts(GenericGoComic):
4495
    """Class to retrieve Peanuts comics."""
4496
    name = 'peanuts'
4497
    long_name = 'Peanuts'
4498
    url = 'http://www.gocomics.com/peanuts'
4499
4500
4501
class MattWuerker(GenericGoComic):
4502
    """Class to retrieve Matt Wuerker comics."""
4503
    name = 'wuerker'
4504
    long_name = 'Matt Wuerker'
4505
    url = 'http://www.gocomics.com/mattwuerker'
4506
4507
4508
class TomToles(GenericGoComic):
4509
    """Class to retrieve Tom Toles comics."""
4510
    name = 'toles'
4511
    long_name = 'Tom Toles'
4512
    url = 'http://www.gocomics.com/tomtoles'
4513
4514
4515
class BreakOfDay(GenericGoComic):
4516
    """Class to retrieve Break Of Day comics."""
4517
    name = 'breakofday'
4518
    long_name = 'Break Of Day'
4519
    url = 'http://www.gocomics.com/break-of-day'
4520
4521
4522
class Brevity(GenericGoComic):
4523
    """Class to retrieve Brevity comics."""
4524
    name = 'brevity'
4525
    long_name = 'Brevity'
4526
    url = 'http://www.gocomics.com/brevitypanel'
4527
4528
4529
class MichaelRamirez(GenericGoComic):
4530
    """Class to retrieve Michael Ramirez comics."""
4531
    name = 'ramirez'
4532
    long_name = 'Michael Ramirez'
4533
    url = 'http://www.gocomics.com/michaelramirez'
4534
4535
4536
class MikeLuckovich(GenericGoComic):
4537
    """Class to retrieve Mike Luckovich comics."""
4538
    name = 'luckovich'
4539
    long_name = 'Mike Luckovich'
4540
    url = 'http://www.gocomics.com/mikeluckovich'
4541
4542
4543
class JimBenton(GenericGoComic):
4544
    """Class to retrieve Jim Benton comics."""
4545
    # Also on http://jimbenton.tumblr.com
4546
    name = 'benton'
4547
    long_name = 'Jim Benton'
4548
    url = 'http://www.gocomics.com/jim-benton-cartoons'
4549
4550
4551
class TheArgyleSweater(GenericGoComic):
4552
    """Class to retrieve the Argyle Sweater comics."""
4553
    name = 'argyle'
4554
    long_name = 'Argyle Sweater'
4555
    url = 'http://www.gocomics.com/theargylesweater'
4556
4557
4558
class SunnyStreet(GenericGoComic):
4559
    """Class to retrieve Sunny Street comics."""
4560
    # Also on http://www.sunnystreetcomics.com
4561
    name = 'sunny'
4562
    long_name = 'Sunny Street'
4563
    url = 'http://www.gocomics.com/sunny-street'
4564
4565
4566
class OffTheMark(GenericGoComic):
4567
    """Class to retrieve Off The Mark comics."""
4568
    # Also on https://www.offthemark.com
4569
    name = 'offthemark'
4570
    long_name = 'Off The Mark'
4571
    url = 'http://www.gocomics.com/offthemark'
4572
4573
4574
class WuMo(GenericGoComic):
4575
    """Class to retrieve WuMo comics."""
4576
    # Also on http://wumo.com
4577
    name = 'wumo'
4578
    long_name = 'WuMo'
4579
    url = 'http://www.gocomics.com/wumo'
4580
4581
4582
class LunarBaboon(GenericGoComic):
4583
    """Class to retrieve Lunar Baboon comics."""
4584
    # Also on http://www.lunarbaboon.com
4585
    # Also on https://tapastic.com/series/Lunarbaboon
4586
    name = 'lunarbaboon'
4587
    long_name = 'Lunar Baboon'
4588
    url = 'http://www.gocomics.com/lunarbaboon'
4589
4590
4591
class SandersenGocomic(GenericGoComic):
4592
    """Class to retrieve Sarah Andersen comics."""
4593
    # Also on http://sarahcandersen.com
4594
    # Also on http://tapastic.com/series/Doodle-Time
4595
    name = 'sandersen-goc'
4596
    long_name = 'Sarah Andersen (from GoComics)'
4597
    url = 'http://www.gocomics.com/sarahs-scribbles'
4598
4599
4600
class SaturdayMorningBreakfastCerealGoComic(GenericGoComic):
4601
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
4602
    # Also on http://smbc-comics.tumblr.com
4603
    # Also on http://www.smbc-comics.com
4604
    name = 'smbc-goc'
4605
    long_name = 'Saturday Morning Breakfast Cereal (from GoComics)'
4606
    url = 'http://www.gocomics.com/saturday-morning-breakfast-cereal'
4607
    _categories = ('SMBC', )
4608
4609
4610
class CalvinAndHobbesGoComic(GenericGoComic):
4611
    """Class to retrieve Calvin and Hobbes comics."""
4612
    # From gocomics, not http://marcel-oehler.marcellosendos.ch/comics/ch/
4613
    name = 'calvin-goc'
4614
    long_name = 'Calvin and Hobbes (from GoComics)'
4615
    url = 'http://www.gocomics.com/calvinandhobbes'
4616
4617
4618
class RallGoComic(GenericGoComic):
4619
    """Class to retrieve Ted Rall comics."""
4620
    # Also on http://rall.com/comic
4621
    name = 'rall-goc'
4622
    long_name = "Ted Rall (from GoComics)"
4623
    url = "http://www.gocomics.com/ted-rall"
4624
    _categories = ('RALL', )
4625
4626
4627
class TheAwkwardYetiGoComic(GenericGoComic):
4628
    """Class to retrieve The Awkward Yeti comics."""
4629
    # Also on http://larstheyeti.tumblr.com
4630
    # Also on http://theawkwardyeti.com
4631
    # Also on https://tapastic.com/series/TheAwkwardYeti
4632
    name = 'yeti-goc'
4633
    long_name = 'The Awkward Yeti (from GoComics)'
4634
    url = 'http://www.gocomics.com/the-awkward-yeti'
4635
    _categories = ('YETI', )
4636
4637
4638
class BerkeleyMewsGoComics(GenericGoComic):
4639
    """Class to retrieve Berkeley Mews comics."""
4640
    # Also on http://mews.tumblr.com
4641
    # Also on http://www.berkeleymews.com
4642
    name = 'berkeley-goc'
4643
    long_name = 'Berkeley Mews (from GoComics)'
4644
    url = 'http://www.gocomics.com/berkeley-mews'
4645
    _categories = ('BERKELEY', )
4646
4647
4648
class SheldonGoComics(GenericGoComic):
4649
    """Class to retrieve Sheldon comics."""
4650
    # Also on http://www.sheldoncomics.com
4651
    name = 'sheldon-goc'
4652
    long_name = 'Sheldon Comics (from GoComics)'
4653
    url = 'http://www.gocomics.com/sheldon'
4654
4655
4656
class FowlLanguageGoComics(GenericGoComic):
4657
    """Class to retrieve Fowl Language comics."""
4658
    # Also on http://www.fowllanguagecomics.com
4659
    # Also on http://tapastic.com/series/Fowl-Language-Comics
4660
    # Also on http://fowllanguagecomics.tumblr.com
4661
    name = 'fowllanguage-goc'
4662
    long_name = 'Fowl Language Comics (from GoComics)'
4663
    url = 'http://www.gocomics.com/fowl-language'
4664
    _categories = ('FOWLLANGUAGE', )
4665
4666
4667
class NickAnderson(GenericGoComic):
4668
    """Class to retrieve Nick Anderson comics."""
4669
    name = 'nickanderson'
4670
    long_name = 'Nick Anderson'
4671
    url = 'http://www.gocomics.com/nickanderson'
4672
4673
4674
class GarfieldGoComics(GenericGoComic):
4675
    """Class to retrieve Garfield comics."""
4676
    # Also on http://garfield.com
4677
    name = 'garfield-goc'
4678
    long_name = 'Garfield (from GoComics)'
4679
    url = 'http://www.gocomics.com/garfield'
4680
    _categories = ('GARFIELD', )
4681
4682
4683
class DorrisMcGoComics(GenericGoComic):
4684
    """Class to retrieve Dorris Mc Comics"""
4685
    # Also on http://dorrismccomics.com
4686
    name = 'dorrismc-goc'
4687
    long_name = 'Dorris Mc (from GoComics)'
4688
    url = 'http://www.gocomics.com/dorris-mccomics'
4689
4690
4691
class FoxTrot(GenericGoComic):
4692
    """Class to retrieve FoxTrot comics."""
4693
    name = 'foxtrot'
4694
    long_name = 'FoxTrot'
4695
    url = 'http://www.gocomics.com/foxtrot'
4696
4697
4698
class FoxTrotClassics(GenericGoComic):
4699
    """Class to retrieve FoxTrot Classics comics."""
4700
    name = 'foxtrot-classics'
4701
    long_name = 'FoxTrot Classics'
4702
    url = 'http://www.gocomics.com/foxtrotclassics'
4703
4704
4705
class MisterAndMeGoComics(GenericEmptyComic, GenericGoComic):  # Removed ?
4706
    """Class to retrieve Mister & Me Comics."""
4707
    # Also on http://www.mister-and-me.com
4708
    # Also on https://tapastic.com/series/Mister-and-Me
4709
    name = 'mister-goc'
4710
    long_name = 'Mister & Me (from GoComics)'
4711
    url = 'http://www.gocomics.com/mister-and-me'
4712
4713
4714
class NonSequitur(GenericGoComic):
4715
    """Class to retrieve Non Sequitur (Wiley Miller) comics."""
4716
    name = 'nonsequitur'
4717
    long_name = 'Non Sequitur'
4718
    url = 'http://www.gocomics.com/nonsequitur'
4719
4720
4721
class GenericTapasticComic(GenericListableComic):
4722
    """Generic class to handle the logic common to comics from tapastic.com."""
4723
    _categories = ('TAPASTIC', )
4724
4725
    @classmethod
4726
    def get_comic_info(cls, soup, archive_elt):
4727
        """Get information about a particular comics."""
4728
        timestamp = int(archive_elt['publishDate']) / 1000.0
4729
        day = datetime.datetime.fromtimestamp(timestamp).date()
4730
        imgs = soup.find_all('img', class_='art-image')
4731
        if not imgs:
4732
            print("Comic %s is being uploaded, retry later" % cls.get_url_from_archive_element(archive_elt))
4733
            return None
4734
        assert len(imgs) > 0
4735
        return {
4736
            'day': day.day,
4737
            'year': day.year,
4738
            'month': day.month,
4739
            'img': [i['src'] for i in imgs],
4740
            'title': archive_elt['title'],
4741
        }
4742
4743
    @classmethod
4744
    def get_url_from_archive_element(cls, archive_elt):
4745
        return 'http://tapastic.com/episode/' + str(archive_elt['id'])
4746
4747
    @classmethod
4748
    def get_archive_elements(cls):
4749
        pref, suff = 'episodeList : ', ','
4750
        # Information is stored in the javascript part
4751
        # I don't know the clean way to get it so this is the ugly way.
4752
        string = [s[len(pref):-len(suff)] for s in (s.decode('utf-8').strip() for s in urlopen_wrapper(cls.url).readlines()) if s.startswith(pref) and s.endswith(suff)][0]
4753
        return json.loads(string)
4754
4755
4756
class VegetablesForDessert(GenericTapasticComic):
4757
    """Class to retrieve Vegetables For Dessert comics."""
4758
    # Also on http://vegetablesfordessert.tumblr.com
4759
    name = 'vegetables'
4760
    long_name = 'Vegetables For Dessert'
4761
    url = 'http://tapastic.com/series/vegetablesfordessert'
4762
4763
4764
class FowlLanguageTapa(GenericTapasticComic):
4765
    """Class to retrieve Fowl Language comics."""
4766
    # Also on http://www.fowllanguagecomics.com
4767
    # Also on http://fowllanguagecomics.tumblr.com
4768
    # Also on http://www.gocomics.com/fowl-language
4769
    name = 'fowllanguage-tapa'
4770
    long_name = 'Fowl Language Comics (from Tapastic)'
4771
    url = 'http://tapastic.com/series/Fowl-Language-Comics'
4772
    _categories = ('FOWLLANGUAGE', )
4773
4774
4775
class OscillatingProfundities(GenericTapasticComic):
4776
    """Class to retrieve Oscillating Profundities comics."""
4777
    name = 'oscillating'
4778
    long_name = 'Oscillating Profundities'
4779
    url = 'http://tapastic.com/series/oscillatingprofundities'
4780
4781
4782
class ZnoflatsComics(GenericTapasticComic):
4783
    """Class to retrieve Znoflats comics."""
4784
    name = 'znoflats'
4785
    long_name = 'Znoflats Comics'
4786
    url = 'http://tapastic.com/series/Znoflats-Comics'
4787
4788
4789
class SandersenTapastic(GenericTapasticComic):
4790
    """Class to retrieve Sarah Andersen comics."""
4791
    # Also on http://sarahcandersen.com
4792
    # Also on http://www.gocomics.com/sarahs-scribbles
4793
    name = 'sandersen-tapa'
4794
    long_name = 'Sarah Andersen (from Tapastic)'
4795
    url = 'http://tapastic.com/series/Doodle-Time'
4796
4797
4798
class TubeyToonsTapastic(GenericTapasticComic):
4799
    """Class to retrieve TubeyToons comics."""
4800
    # Also on http://tubeytoons.com
4801
    # Also on https://tubeytoons.tumblr.com
4802
    name = 'tubeytoons-tapa'
4803
    long_name = 'Tubey Toons (from Tapastic)'
4804
    url = 'http://tapastic.com/series/Tubey-Toons'
4805
    _categories = ('TUNEYTOONS', )
4806
4807
4808
class AnythingComicTapastic(GenericTapasticComic):
4809
    """Class to retrieve Anything Comics."""
4810
    # Also on http://www.anythingcomic.com
4811
    name = 'anythingcomic-tapa'
4812
    long_name = 'Anything Comic (from Tapastic)'
4813
    url = 'http://tapastic.com/series/anything'
4814
4815
4816
class UnearthedComicsTapastic(GenericTapasticComic):
4817
    """Class to retrieve Unearthed comics."""
4818
    # Also on http://unearthedcomics.com
4819
    # Also on https://unearthedcomics.tumblr.com
4820
    name = 'unearthed-tapa'
4821
    long_name = 'Unearthed Comics (from Tapastic)'
4822
    url = 'http://tapastic.com/series/UnearthedComics'
4823
    _categories = ('UNEARTHED', )
4824
4825
4826
class EverythingsStupidTapastic(GenericTapasticComic):
4827
    """Class to retrieve Everything's stupid Comics."""
4828
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
4829
    # Also on http://everythingsstupid.net
4830
    name = 'stupid-tapa'
4831
    long_name = "Everything's Stupid (from Tapastic)"
4832
    url = 'http://tapastic.com/series/EverythingsStupid'
4833
4834
4835
class JustSayEhTapastic(GenericTapasticComic):
4836
    """Class to retrieve Just Say Eh comics."""
4837
    # Also on http://www.justsayeh.com
4838
    name = 'justsayeh-tapa'
4839
    long_name = 'Just Say Eh (from Tapastic)'
4840
    url = 'http://tapastic.com/series/Just-Say-Eh'
4841
4842
4843
class ThorsThundershackTapastic(GenericTapasticComic):
4844
    """Class to retrieve Thor's Thundershack comics."""
4845
    # Also on http://www.thorsthundershack.com
4846
    name = 'thor-tapa'
4847
    long_name = 'Thor\'s Thundershack (from Tapastic)'
4848
    url = 'http://tapastic.com/series/Thors-Thundershac'
4849
    _categories = ('THOR', )
4850
4851
4852
class OwlTurdTapastic(GenericTapasticComic):
4853
    """Class to retrieve Owl Turd comics."""
4854
    # Also on http://owlturd.com
4855
    name = 'owlturd-tapa'
4856
    long_name = 'Owl Turd (from Tapastic)'
4857
    url = 'http://tapastic.com/series/Owl-Turd-Comix'
4858
    _categories = ('OWLTURD', )
4859
4860
4861
class GoneIntoRaptureTapastic(GenericTapasticComic):
4862
    """Class to retrieve Gone Into Rapture comics."""
4863
    # Also on http://goneintorapture.tumblr.com
4864
    # Also on http://goneintorapture.com
4865
    name = 'rapture-tapa'
4866
    long_name = 'Gone Into Rapture (from Tapastic)'
4867
    url = 'http://tapastic.com/series/Goneintorapture'
4868
4869
4870
class HeckIfIKnowComicsTapa(GenericTapasticComic):
4871
    """Class to retrieve Heck If I Know Comics."""
4872
    # Also on http://heckifiknowcomics.com
4873
    name = 'heck-tapa'
4874
    long_name = 'Heck if I Know comics (from Tapastic)'
4875
    url = 'http://tapastic.com/series/Regular'
4876
4877
4878
class CheerUpEmoKidTapa(GenericTapasticComic):
4879
    """Class to retrieve CheerUpEmoKid comics."""
4880
    # Also on http://www.cheerupemokid.com
4881
    # Also on https://enzocomics.tumblr.com
4882
    name = 'cuek-tapa'
4883
    long_name = 'Cheer Up Emo Kid (from Tapastic)'
4884
    url = 'http://tapastic.com/series/CUEK'
4885
4886
4887
class BigFootJusticeTapa(GenericTapasticComic):
4888
    """Class to retrieve Big Foot Justice comics."""
4889
    # Also on http://bigfootjustice.com
4890
    name = 'bigfoot-tapa'
4891
    long_name = 'Big Foot Justice (from Tapastic)'
4892
    url = 'http://tapastic.com/series/bigfoot-justice'
4893
4894
4895
class UpAndOutTapa(GenericTapasticComic):
4896
    """Class to retrieve Up & Out comics."""
4897
    # Also on http://upandoutcomic.tumblr.com
4898
    name = 'upandout-tapa'
4899
    long_name = 'Up And Out (from Tapastic)'
4900
    url = 'http://tapastic.com/series/UP-and-OUT'
4901
4902
4903
class ToonHoleTapa(GenericTapasticComic):
4904
    """Class to retrieve Toon Holes comics."""
4905
    # Also on http://www.toonhole.com
4906
    name = 'toonhole-tapa'
4907
    long_name = 'Toon Hole (from Tapastic)'
4908
    url = 'http://tapastic.com/series/TOONHOLE'
4909
4910
4911
class AngryAtNothingTapa(GenericTapasticComic):
4912
    """Class to retrieve Angry at Nothing comics."""
4913
    # Also on http://www.angryatnothing.net
4914
    # Also on http://angryatnothing.tumblr.com
4915
    name = 'angry-tapa'
4916
    long_name = 'Angry At Nothing (from Tapastic)'
4917
    url = 'http://tapastic.com/series/Comics-yeah-definitely-comics-'
4918
4919
4920
class LeleozTapa(GenericTapasticComic):
4921
    """Class to retrieve Leleoz comics."""
4922
    # Also on http://leleozcomics.tumblr.com
4923
    name = 'leleoz-tapa'
4924
    long_name = 'Leleoz (from Tapastic)'
4925
    url = 'https://tapastic.com/series/Leleoz'
4926
4927
4928
class TheAwkwardYetiTapa(GenericTapasticComic):
4929
    """Class to retrieve The Awkward Yeti comics."""
4930
    # Also on http://www.gocomics.com/the-awkward-yeti
4931
    # Also on http://theawkwardyeti.com
4932
    # Also on http://larstheyeti.tumblr.com
4933
    name = 'yeti-tapa'
4934
    long_name = 'The Awkward Yeti (from Tapastic)'
4935
    url = 'https://tapastic.com/series/TheAwkwardYeti'
4936
    _categories = ('YETI', )
4937
4938
4939
class AsPerUsualTapa(GenericTapasticComic):
4940
    """Class to retrieve As Per Usual comics."""
4941
    # Also on http://as-per-usual.tumblr.com
4942
    name = 'usual-tapa'
4943
    long_name = 'As Per Usual (from Tapastic)'
4944
    url = 'https://tapastic.com/series/AsPerUsual'
4945
    categories = ('DAMILEE', )
4946
4947
4948
class HotComicsForCoolPeopleTapa(GenericTapasticComic):
4949
    """Class to retrieve Hot Comics For Cool People."""
4950
    # Also on http://hotcomicsforcoolpeople.tumblr.com
4951
    # Also on http://hotcomics.biz (links to tumblr)
4952
    # Also on http://hcfcp.com (links to tumblr)
4953
    name = 'hotcomics-tapa'
4954
    long_name = 'Hot Comics For Cool People (from Tapastic)'
4955
    url = 'https://tapastic.com/series/Hot-Comics-For-Cool-People'
4956
    categories = ('DAMILEE', )
4957
4958
4959
class OneOneOneOneComicTapa(GenericTapasticComic):
4960
    """Class to retrieve 1111 Comics."""
4961
    # Also on http://www.1111comics.me
4962
    # Also on http://comics1111.tumblr.com
4963
    name = '1111-tapa'
4964
    long_name = '1111 Comics (from Tapastic)'
4965
    url = 'https://tapastic.com/series/1111-Comics'
4966
    _categories = ('ONEONEONEONE', )
4967
4968
4969
class TumbleDryTapa(GenericTapasticComic):
4970
    """Class to retrieve Tumble Dry comics."""
4971
    # Also on http://tumbledrycomics.com
4972
    name = 'tumbledry-tapa'
4973
    long_name = 'Tumblr Dry (from Tapastic)'
4974
    url = 'https://tapastic.com/series/TumbleDryComics'
4975
4976
4977
class DeadlyPanelTapa(GenericTapasticComic):
4978
    """Class to retrieve Deadly Panel comics."""
4979
    # Also on http://www.deadlypanel.com
4980
    # Also on https://deadlypanel.tumblr.com
4981
    name = 'deadly-tapa'
4982
    long_name = 'Deadly Panel (from Tapastic)'
4983
    url = 'https://tapastic.com/series/deadlypanel'
4984
4985
4986
class ChrisHallbeckMaxiTapa(GenericTapasticComic):
4987
    """Class to retrieve Chris Hallbeck comics."""
4988
    # Also on https://chrishallbeck.tumblr.com
4989
    # Also on http://maximumble.com
4990
    name = 'hallbeckmaxi-tapa'
4991
    long_name = 'Chris Hallback - Maximumble (from Tapastic)'
4992
    url = 'https://tapastic.com/series/Maximumble'
4993
    _categories = ('HALLBACK', )
4994
4995
4996
class ChrisHallbeckMiniTapa(GenericEmptyComic, GenericTapasticComic):
4997
    """Class to retrieve Chris Hallbeck comics."""
4998
    # Also on https://chrishallbeck.tumblr.com
4999
    # Also on http://minimumble.com
5000
    name = 'hallbeckmini-tapa'
5001
    long_name = 'Chris Hallback - Minimumble (from Tapastic)'
5002
    url = 'https://tapastic.com/series/Minimumble'
5003
    _categories = ('HALLBACK', )
5004
5005
5006
class ChrisHallbeckBiffTapa(GenericEmptyComic, GenericTapasticComic):
5007
    """Class to retrieve Chris Hallbeck comics."""
5008
    # Also on https://chrishallbeck.tumblr.com
5009
    # Also on http://thebookofbiff.com
5010
    name = 'hallbeckbiff-tapa'
5011
    long_name = 'Chris Hallback - The Book of Biff (from Tapastic)'
5012
    url = 'https://tapastic.com/series/Biff'
5013
    _categories = ('HALLBACK', )
5014
5015
5016
class RandoWisTapa(GenericTapasticComic):
5017
    """Class to retrieve RandoWis comics."""
5018
    # Also on https://randowis.com
5019
    name = 'randowis-tapa'
5020
    long_name = 'RandoWis (from Tapastic)'
5021
    url = 'https://tapastic.com/series/RandoWis'
5022
5023
5024
class PigeonGazetteTapa(GenericTapasticComic):
5025
    """Class to retrieve The Pigeon Gazette comics."""
5026
    # Also on http://thepigeongazette.tumblr.com
5027
    name = 'pigeon-tapa'
5028
    long_name = 'The Pigeon Gazette (from Tapastic)'
5029
    url = 'https://tapastic.com/series/The-Pigeon-Gazette'
5030
5031
5032
class TheOdd1sOutTapa(GenericTapasticComic):
5033
    """Class to retrieve The Odd 1s Out comics."""
5034
    # Also on http://theodd1sout.com
5035
    # Also on http://theodd1sout.tumblr.com
5036
    name = 'theodd-tapa'
5037
    long_name = 'The Odd 1s Out (from Tapastic)'
5038
    url = 'https://tapastic.com/series/Theodd1sout'
5039
5040
5041
class TheWorldIsFlatTapa(GenericTapasticComic):
5042
    """Class to retrieve The World Is Flat Comics."""
5043
    # Also on http://theworldisflatcomics.tumblr.com
5044
    name = 'flatworld-tapa'
5045
    long_name = 'The World Is Flat (from Tapastic)'
5046
    url = 'https://tapastic.com/series/The-World-is-Flat'
5047
5048
5049
class MisterAndMeTapa(GenericTapasticComic):
5050
    """Class to retrieve Mister & Me Comics."""
5051
    # Also on http://www.mister-and-me.com
5052
    # Also on http://www.gocomics.com/mister-and-me
5053
    name = 'mister-tapa'
5054
    long_name = 'Mister & Me (from Tapastic)'
5055
    url = 'https://tapastic.com/series/Mister-and-Me'
5056
5057
5058
class TalesOfAbsurdityTapa(GenericTapasticComic):
5059
    """Class to retrieve Tales Of Absurdity comics."""
5060
    # Also on http://talesofabsurdity.com
5061
    # Also on http://talesofabsurdity.tumblr.com
5062
    name = 'absurdity-tapa'
5063
    long_name = 'Tales of Absurdity (from Tapastic)'
5064
    url = 'http://tapastic.com/series/Tales-Of-Absurdity'
5065
    _categories = ('ABSURDITY', )
5066
5067
5068
class BFGFSTapa(GenericTapasticComic):
5069
    """Class to retrieve BFGFS comics."""
5070
    # Also on http://bfgfs.com
5071
    # Also on https://bfgfs.tumblr.com
5072
    name = 'bfgfs-tapa'
5073
    long_name = 'BFGFS (from Tapastic)'
5074
    url = 'https://tapastic.com/series/BFGFS'
5075
5076
5077
class DoodleForFoodTapa(GenericTapasticComic):
5078
    """Class to retrieve Doodle For Food comics."""
5079
    # Also on http://www.doodleforfood.com
5080
    name = 'doodle-tapa'
5081
    long_name = 'Doodle For Food (from Tapastic)'
5082
    url = 'https://tapastic.com/series/Doodle-for-Food'
5083
5084
5085
class MrLovensteinTapa(GenericTapasticComic):
5086
    """Class to retrieve Mr Lovenstein comics."""
5087
    # Also on  https://tapastic.com/series/MrLovenstein
5088
    name = 'mrlovenstein-tapa'
5089
    long_name = 'Mr. Lovenstein (from Tapastic)'
5090
    url = 'https://tapastic.com/series/MrLovenstein'
5091
5092
5093
class CassandraCalinTapa(GenericTapasticComic):
5094
    """Class to retrieve C. Cassandra comics."""
5095
    # Also on http://cassandracalin.com
5096
    # Also on http://c-cassandra.tumblr.com
5097
    name = 'cassandra-tapa'
5098
    long_name = 'Cassandra Calin (from Tapastic)'
5099
    url = 'https://tapastic.com/series/C-Cassandra-comics'
5100
5101
5102
class WafflesAndPancakes(GenericTapasticComic):
5103
    """Class to retrieve Waffles And Pancakes comics."""
5104
    # Also on http://wandpcomic.com
5105
    name = 'waffles'
5106
    long_name = 'Waffles And Pancakes'
5107
    url = 'https://tapastic.com/series/Waffles-and-Pancakes'
5108
5109
5110
class YesterdaysPopcornTapastic(GenericTapasticComic):
5111
    """Class to retrieve Yesterday's Popcorn comics."""
5112
    # Also on http://www.yesterdayspopcorn.com
5113
    # Also on http://yesterdayspopcorn.tumblr.com
5114
    name = 'popcorn-tapa'
5115
    long_name = 'Yesterday\'s Popcorn (from Tapastic)'
5116
    url = 'https://tapastic.com/series/Yesterdays-Popcorn'
5117
5118
5119
class OurSuperAdventureTapastic(GenericEmptyComic, GenericTapasticComic):
5120
    """Class to retrieve Our Super Adventure comics."""
5121
    # Also on http://www.oursuperadventure.com
5122
    # http://sarahssketchbook.tumblr.com
5123
    # http://sarahgraley.com
5124
    name = 'superadventure-tapastic'
5125
    long_name = 'Our Super Adventure (from Tapastic)'
5126
    url = 'https://tapastic.com/series/Our-Super-Adventure'
5127
5128
5129
class NamelessPCs(GenericTapasticComic):
5130
    """Class to retrieve Nameless PCs comics."""
5131
    # Also on http://namelesspcs.com
5132
    name = 'namelesspcs-tapa'
5133
    long_name = 'NamelessPCs (from Tapastic)'
5134
    url = 'https://tapastic.com/series/NamelessPC'
5135
5136
5137
class DownTheUpwardSpiralTapa(GenericTapasticComic):
5138
    """Class to retrieve Down The Upward Spiral comics."""
5139
    # Also on http://www.downtheupwardspiral.com
5140
    # Also on http://downtheupwardspiral.tumblr.com
5141
    name = 'spiral-tapa'
5142
    long_name = 'Down the Upward Spiral (from Tapastic)'
5143
    url = 'https://tapastic.com/series/Down-the-Upward-Spiral'
5144
5145
5146
class UbertoolTapa(GenericTapasticComic):
5147
    """Class to retrieve Ubertool comics."""
5148
    # Also on http://ubertoolcomic.com
5149
    # Also on https://ubertool.tumblr.com
5150
    name = 'ubertool-tapa'
5151
    long_name = 'Ubertool (from Tapastic)'
5152
    url = 'https://tapastic.com/series/ubertool'
5153
    _categories = ('UBERTOOL', )
5154
5155
5156
class BarteNerdsTapa(GenericTapasticComic):
5157
    """Class to retrieve BarteNerds comics."""
5158
    # Also on http://www.bartenerds.com
5159
    name = 'bartenerds-tapa'
5160
    long_name = 'BarteNerds (from Tapastic)'
5161
    url = 'https://tapastic.com/series/BarteNERDS'
5162
5163
5164
class SmallBlueYonderTapa(GenericTapasticComic):
5165
    """Class to retrieve Small Blue Yonder comics."""
5166
    # Also on http://www.smallblueyonder.com
5167
    name = 'smallblue-tapa'
5168
    long_name = 'Small Blue Yonder (from Tapastic)'
5169
    url = 'https://tapastic.com/series/Small-Blue-Yonder'
5170
5171
5172
class TizzyStitchBirdTapa(GenericTapasticComic):
5173
    """Class to retrieve Tizzy Stitch Bird comics."""
5174
    # Also on http://tizzystitchbird.com
5175
    # Also on http://tizzystitchbird.tumblr.com
5176
    # Also on http://www.webtoons.com/en/challenge/tizzy-stitchbird/list?title_no=50082
5177
    name = 'tizzy-tapa'
5178
    long_name = 'Tizzy Stitch Bird (from Tapastic)'
5179
    url = 'https://tapastic.com/series/TizzyStitchbird'
5180
5181
5182
class RockPaperCynicTapa(GenericTapasticComic):
5183
    """Class to retrieve RockPaperCynic comics."""
5184
    # Also on http://www.rockpapercynic.com
5185
    # Also on http://rockpapercynic.tumblr.com
5186
    name = 'rpc-tapa'
5187
    long_name = 'Rock Paper Cynic (from Tapastic)'
5188
    url = 'https://tapastic.com/series/rockpapercynic'
5189
5190
5191
class ItsTheTieTapa(GenericTapasticComic):
5192
    """Class to retrieve It's the tie comics."""
5193
    # Also on http://itsthetie.com
5194
    # Also on http://itsthetie.tumblr.com
5195
    name = 'tie-tapa'
5196
    long_name = "It's the tie (from Tapastic)"
5197
    url = "https://tapastic.com/series/itsthetie"
5198
    _categories = ('TIE', )
5199
5200
5201
def get_subclasses(klass):
5202
    """Gets the list of direct/indirect subclasses of a class"""
5203
    subclasses = klass.__subclasses__()
5204
    for derived in list(subclasses):
5205
        subclasses.extend(get_subclasses(derived))
5206
    return subclasses
5207
5208
5209
def remove_st_nd_rd_th_from_date(string):
5210
    """Function to transform 1st/2nd/3rd/4th in a parsable date format."""
5211
    # Hackish way to convert string with numeral "1st"/"2nd"/etc to date
5212
    return (string.replace('st', '')
5213
            .replace('nd', '')
5214
            .replace('rd', '')
5215
            .replace('th', '')
5216
            .replace('Augu', 'August'))
5217
5218
5219
def string_to_date(string, date_format, local=DEFAULT_LOCAL):
5220
    """Function to convert string to date object.
5221
    Wrapper around datetime.datetime.strptime."""
5222
    # format described in https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior
5223
    prev_locale = locale.setlocale(locale.LC_ALL)
5224
    if local != prev_locale:
5225
        locale.setlocale(locale.LC_ALL, local)
5226
    ret = datetime.datetime.strptime(string, date_format).date()
5227
    if local != prev_locale:
5228
        locale.setlocale(locale.LC_ALL, prev_locale)
5229
    return ret
5230
5231
5232
COMICS = set(get_subclasses(GenericComic))
5233
VALID_COMICS = [c for c in COMICS if c.name is not None]
5234
COMIC_NAMES = {c.name: c for c in VALID_COMICS}
5235
assert len(VALID_COMICS) == len(COMIC_NAMES)
5236
CLASS_NAMES = {c.__name__ for c in VALID_COMICS}
5237
assert len(VALID_COMICS) == len(CLASS_NAMES)
5238