Completed
Push — master ( 532119...43d06c )
by De
01:07
created

comics.py (2 issues)

1
#! /usr/bin/python3
2
# vim: set expandtab tabstop=4 shiftwidth=4 :
3
"""Module to retrieve webcomics"""
4
5
from comic_abstract import GenericComic, get_date_for_comic
6
import re
7
from datetime import date, timedelta
8
import datetime
9
from urlfunctions import get_soup_at_url, urljoin_wrapper,\
10
    convert_iri_to_plain_ascii_uri, load_json_at_url, urlopen_wrapper
11
import json
12
import locale
13
import urllib
14
15
DEFAULT_LOCAL = 'en_GB.UTF-8'
16
17
18
class Xkcd(GenericComic):
19
    """Class to retrieve Xkcd comics."""
20
    name = 'xkcd'
21
    long_name = 'xkcd'
22
    url = 'http://xkcd.com'
23
24
    @classmethod
25
    def get_next_comic(cls, last_comic):
26
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
27
        first_num = last_comic['num'] if last_comic else 0
28
        last_num = load_json_at_url(
29
            urljoin_wrapper(cls.url, 'info.0.json'))['num']
30
31
        for num in range(first_num + 1, last_num + 1):
32
            if num != 404:
33
                json_url = urljoin_wrapper(cls.url, '%d/info.0.json' % num)
34
                comic = load_json_at_url(json_url)
35
                comic['img'] = [comic['img']]
36
                comic['prefix'] = '%d-' % num
37
                comic['json_url'] = json_url
38
                comic['url'] = urljoin_wrapper(cls.url, str(num))
39
                comic['day'] = int(comic['day'])
40
                comic['month'] = int(comic['month'])
41
                comic['year'] = int(comic['year'])
42
                assert comic['num'] == num
43
                yield comic
44
45
46
# Helper functions corresponding to get_url_from_link/get_url_from_archive_element
47
48
49
@classmethod
50
def get_href(cls, link):
51
    """Implementation of get_url_from_link/get_url_from_archive_element."""
52
    return link['href']
53
54
55
@classmethod
56
def join_cls_url_to_href(cls, link):
57
    """Implementation of get_url_from_link/get_url_from_archive_element."""
58
    return urljoin_wrapper(cls.url, link['href'])
59
60
61
class GenericNavigableComic(GenericComic):
62
    """Generic class for "navigable" comics : with first/next arrows.
63
64
    This class applies to comic where previous and next comics can be
65
    accessed from a given comic. Once given a starting point (either
66
    the first comic or the last comic retrieved), it will handle the
67
    navigation, the retrieval of the soup object and the setting of
68
    the 'url' attribute on retrieved comics. This limits a lot the
69
    amount of boilerplate code in the different implementation classes.
70
71
    The method `get_next_comic` methods is implemented in terms of new
72
    more specialized methods to be implemented/overridden:
73
        - get_first_comic_link
74
        - get_navi_link
75
        - get_comic_info
76
        - get_url_from_link
77
    """
78
    _categories = ('NAVIGABLE', )
79
80
    @classmethod
81
    def get_first_comic_link(cls):
82
        """Get link to first comics.
83
84
        Sometimes this can be retrieved of any comic page, sometimes on
85
        the archive page, sometimes it doesn't exist at all and one has
86
        to iterate backward to find it before hardcoding the result found.
87
        """
88
        raise NotImplementedError
89
90
    @classmethod
91
    def get_navi_link(cls, last_soup, next_):
92
        """Get link to next (or previous - for dev purposes) comic."""
93
        raise NotImplementedError
94
95
    @classmethod
96
    def get_comic_info(cls, soup, link):
97
        """Get information about a particular comics."""
98
        raise NotImplementedError
99
100
    @classmethod
101
    def get_url_from_link(cls, link):
102
        """Get url corresponding to a link. Default implementation is similar to get_href."""
103
        return link['href']
104
105
    @classmethod
106
    def get_next_link(cls, last_soup):
107
        """Get link to next comic."""
108
        return cls.get_navi_link(last_soup, True)
109
110
    @classmethod
111
    def get_prev_link(cls, last_soup):
112
        """Get link to previous comic."""
113
        return cls.get_navi_link(last_soup, False)
114
115
    @classmethod
116
    def get_next_comic(cls, last_comic):
117
        """Generic implementation of get_next_comic for navigable comics."""
118
        url = last_comic['url'] if last_comic else None
119
        cls.log("starting 'get_next_comic' from %s" % url)
120
        next_comic = \
121
            cls.get_next_link(get_soup_at_url(url)) \
122
            if url else \
123
            cls.get_first_comic_link()
124
        cls.log("next/first comic will be %s (url is %s)" % (str(next_comic), url))
125
        # cls.check_navigation(url)
126
        while next_comic:
127
            prev_url, url = url, cls.get_url_from_link(next_comic)
128
            if prev_url == url:
129
                cls.log("got same url %s" % url)
130
                break
131
            cls.log("about to get %s (%s)" % (url, str(next_comic)))
132
            soup = get_soup_at_url(url)
133
            comic = cls.get_comic_info(soup, next_comic)
134
            if comic is not None:
135
                assert 'url' not in comic
136
                comic['url'] = url
137
                yield comic
138
            next_comic = cls.get_next_link(soup)
139
            cls.log("next comic will be %s" % str(next_comic))
140
141
    @classmethod
142
    def check_first_link(cls):
143
        """Check that navigation to first comic seems to be working - for dev purposes."""
144
        cls.log("about to check first link")
145
        ok = True
146
        firstlink = cls.get_first_comic_link()
147
        if firstlink is None:
148
            print("From %s : no first link" % cls.url)
149
            ok = False
150
        else:
151
            firsturl = cls.get_url_from_link(firstlink)
152
            try:
153
                get_soup_at_url(firsturl)
154
            except urllib.error.HTTPError:
155
                print("From %s : invalid first url" % cls.url)
156
                ok = False
157
        cls.log("checked first link -> returned %d" % ok)
158
        return ok
159
160
    @classmethod
161
    def check_prev_next_links(cls, url):
162
        """Check that navigation to prev/next from a given URL seems to be working - for dev purposes."""
163
        cls.log("about to check prev/next from %s" % url)
164
        ok = True
165
        if url is None:
166
            prevlink, nextlink = None, None
167
        else:
168
            soup = get_soup_at_url(url)
169
            prevlink, nextlink = cls.get_prev_link(soup), cls.get_next_link(soup)
170
        if prevlink is None and nextlink is None:
171
            print("From %s : no previous nor next" % url)
172
            ok = False
173
        else:
174
            if prevlink:
175
                prevurl = cls.get_url_from_link(prevlink)
176
                prevsoup = get_soup_at_url(prevurl)
177
                prevnext = cls.get_url_from_link(cls.get_next_link(prevsoup))
178
                if prevnext != url:
179
                    print("From %s, going backward then forward leads to %s" % (url, prevnext))
180
                    ok = False
181
            if nextlink:
182
                nexturl = cls.get_url_from_link(nextlink)
183
                if nexturl != url:
184
                    nextsoup = get_soup_at_url(nexturl)
185
                    nextprev = cls.get_url_from_link(cls.get_prev_link(nextsoup))
186
                    if nextprev != url:
187
                        print("From %s, going forward then backward leads to %s" % (url, nextprev))
188
                        ok = False
189
        cls.log("checked prev/next from %s -> returned %d" % (url, ok))
190
        return ok
191
192
    @classmethod
193
    def check_navigation(cls, url):
194
        """Check that navigation functions seem to be working - for dev purposes."""
195
        cls.log("about to check navigation from %s" % url)
196
        first = cls.check_first_link()
197
        prevnext = cls.check_prev_next_links(url)
198
        ok = first and prevnext
199
        cls.log("checked navigation from %s -> returned %d" % (url, ok))
200
        return ok
201
202
203
class GenericListableComic(GenericComic):
204
    """Generic class for "listable" comics : with a list of comics (aka 'archive')
205
206
    The method `get_next_comic` methods is implemented in terms of new
207
    more specialized methods to be implemented/overridden:
208
        - get_archive_elements
209
        - get_url_from_archive_element
210
        - get_comic_info
211
    """
212
    _categories = ('LISTABLE', )
213
214
    @classmethod
215
    def get_archive_elements(cls):
216
        """Get the archive elements (iterable)."""
217
        raise NotImplementedError
218
219
    @classmethod
220
    def get_url_from_archive_element(cls, archive_elt):
221
        """Get url corresponding to an archive element."""
222
        raise NotImplementedError
223
224
    @classmethod
225
    def get_comic_info(cls, soup, archive_elt):
226
        """Get information about a particular comics."""
227
        raise NotImplementedError
228
229
    @classmethod
230
    def get_next_comic(cls, last_comic):
231
        """Generic implementation of get_next_comic for listable comics."""
232
        waiting_for_url = last_comic['url'] if last_comic else None
233
        for archive_elt in cls.get_archive_elements():
234
            url = cls.get_url_from_archive_element(archive_elt)
235
            cls.log("considering %s" % url)
236
            if waiting_for_url is None:
237
                cls.log("about to get %s (%s)" % (url, str(archive_elt)))
238
                soup = get_soup_at_url(url)
239
                comic = cls.get_comic_info(soup, archive_elt)
240
                if comic is not None:
241
                    assert 'url' not in comic
242
                    comic['url'] = url
243
                    yield comic
244
            elif waiting_for_url == url:
245
                waiting_for_url = None
246
        if waiting_for_url is not None:
247
            print("Did not find %s : there might be a problem" % waiting_for_url)
248
249
# Helper functions corresponding to get_first_comic_link/get_navi_link
250
251
252
@classmethod
253
def get_link_rel_next(cls, last_soup, next_):
254
    """Implementation of get_navi_link."""
255
    return last_soup.find('link', rel='next' if next_ else 'prev')
256
257
258
@classmethod
259
def get_a_rel_next(cls, last_soup, next_):
260
    """Implementation of get_navi_link."""
261
    return last_soup.find('a', rel='next' if next_ else 'prev')
262
263
264
@classmethod
265
def get_a_navi_navinext(cls, last_soup, next_):
266
    """Implementation of get_navi_link."""
267
    return last_soup.find('a', class_='navi navi-next' if next_ else 'navi navi-prev')
268
269
270
@classmethod
271
def get_a_navi_comicnavnext_navinext(cls, last_soup, next_):
272
    """Implementation of get_navi_link."""
273
    return last_soup.find('a', class_='navi comic-nav-next navi-next' if next_ else 'navi comic-nav-previous navi-prev')
274
275
276
@classmethod
277
def get_a_comicnavbase_comicnavnext(cls, last_soup, next_):
278
    """Implementation of get_navi_link."""
279
    return last_soup.find('a', class_='comic-nav-base comic-nav-next' if next_ else 'comic-nav-base comic-nav-previous')
280
281
282
@classmethod
283
def get_a_navi_navifirst(cls):
284
    """Implementation of get_first_comic_link."""
285
    return get_soup_at_url(cls.url).find('a', class_='navi navi-first')
286
287
288
@classmethod
289
def get_div_navfirst_a(cls):
290
    """Implementation of get_first_comic_link."""
291
    return get_soup_at_url(cls.url).find('div', class_="nav-first").find('a')
292
293
294
@classmethod
295
def get_a_comicnavbase_comicnavfirst(cls):
296
    """Implementation of get_first_comic_link."""
297
    return get_soup_at_url(cls.url).find('a', class_='comic-nav-base comic-nav-first')
298
299
300
@classmethod
301
def simulate_first_link(cls):
302
    """Implementation of get_first_comic_link creating a link-like object from
303
    an URL provided by the class."""
304
    return {'href': cls.first_url}
305
306
307
@classmethod
308
def navigate_to_first_comic(cls):
309
    """Implementation of get_first_comic_link navigating from a user provided
310
    URL to the first comic.
311
312
    Sometimes, the first comic cannot be reached directly so to start
313
    from the first comic one has to go to the previous comic until
314
    there is no previous comics. Once this URL is reached, it
315
    is better to hardcode it but for development purposes, it
316
    is convenient to have an automatic way to find it.
317
    """
318
    url = input("Get starting URL: ")
319
    print(url)
320
    comic = cls.get_prev_link(get_soup_at_url(url))
321
    while comic:
322
        url = cls.get_url_from_link(comic)
323
        print(url)
324
        comic = cls.get_prev_link(get_soup_at_url(url))
325
    return {'href': url}
326
327
328
class GenericEmptyComic(GenericComic):
329
    """Generic class for comics where nothing is to be done.
330
331
    It can be useful to deactivate temporarily comics that do not work
332
    properly by replacing `def MyComic(GenericWhateverComic)` with
333
    `def MyComic(GenericEmptyComic, GenericWhateverComic)`."""
334
    _categories = ('EMPTY', )
335
336
    @classmethod
337
    def get_next_comic(cls, last_comic):
338
        """Implementation of get_next_comic returning no comics."""
339
        cls.log("comic is considered as empty - returning no comic")
340
        return []
341
342
343 View Code Duplication
class ExtraFabulousComics(GenericNavigableComic):
344
    """Class to retrieve Extra Fabulous Comics."""
345
    name = 'efc'
346
    long_name = 'Extra Fabulous Comics'
347
    url = 'http://extrafabulouscomics.com'
348
    get_first_comic_link = get_a_navi_navifirst
349
    get_navi_link = get_link_rel_next
350
351
    @classmethod
352
    def get_comic_info(cls, soup, link):
353
        """Get information about a particular comics."""
354
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
355
        imgs = soup.find_all('img', src=img_src_re)
356
        title = soup.find('meta', property='og:title')['content']
357
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
358
        day = string_to_date(date_str, "%Y-%m-%d")
359
        return {
360
            'title': title,
361
            'img': [i['src'] for i in imgs],
362
            'month': day.month,
363
            'year': day.year,
364
            'day': day.day,
365
            'prefix': title + '-'
366
        }
367
368
369 View Code Duplication
class GenericLeMondeBlog(GenericNavigableComic):
370
    """Generic class to retrieve comics from Le Monde blogs."""
371
    _categories = ('LEMONDE', 'FRANCAIS')
372
    get_navi_link = get_link_rel_next
373
    get_first_comic_link = simulate_first_link
374
    first_url = NotImplemented
375
376
    @classmethod
377
    def get_comic_info(cls, soup, link):
378
        """Get information about a particular comics."""
379
        url2 = soup.find('link', rel='shortlink')['href']
380
        title = soup.find('meta', property='og:title')['content']
381
        date_str = soup.find("span", class_="entry-date").string
382
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
383
        imgs = soup.find_all('meta', property='og:image')
384
        return {
385
            'title': title,
386
            'url2': url2,
387
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
388
            'month': day.month,
389
            'year': day.year,
390
            'day': day.day,
391
        }
392
393
394
class ZepWorld(GenericLeMondeBlog):
395
    """Class to retrieve Zep World comics."""
396
    name = "zep"
397
    long_name = "Zep World"
398
    url = "http://zepworld.blog.lemonde.fr"
399
    first_url = "http://zepworld.blog.lemonde.fr/2014/10/31/bientot-le-blog-de-zep/"
400
401
402
class Vidberg(GenericLeMondeBlog):
403
    """Class to retrieve Vidberg comics."""
404
    name = 'vidberg'
405
    long_name = "Vidberg - l'actu en patates"
406
    url = "http://vidberg.blog.lemonde.fr"
407
    # Not the first but I didn't find an efficient way to retrieve it
408
    first_url = "http://vidberg.blog.lemonde.fr/2012/02/09/revue-de-campagne-la-campagne-du-modem-semballe/"
409
410
411
class Plantu(GenericLeMondeBlog):
412
    """Class to retrieve Plantu comics."""
413
    name = 'plantu'
414
    long_name = "Plantu"
415
    url = "http://plantu.blog.lemonde.fr"
416
    first_url = "http://plantu.blog.lemonde.fr/2014/10/28/stress-test-a-bruxelles/"
417
418
419
class XavierGorce(GenericLeMondeBlog):
420
    """Class to retrieve Xavier Gorce comics."""
421
    name = 'gorce'
422
    long_name = "Xavier Gorce"
423
    url = "http://xaviergorce.blog.lemonde.fr"
424
    first_url = "http://xaviergorce.blog.lemonde.fr/2015/01/09/distinction/"
425
426
427
class CartooningForPeace(GenericLeMondeBlog):
428
    """Class to retrieve Cartooning For Peace comics."""
429
    name = 'forpeace'
430
    long_name = "Cartooning For Peace"
431
    url = "http://cartooningforpeace.blog.lemonde.fr"
432
    first_url = "http://cartooningforpeace.blog.lemonde.fr/2014/12/15/bado/"
433
434
435
class Aurel(GenericLeMondeBlog):
436
    """Class to retrieve Aurel comics."""
437
    name = 'aurel'
438
    long_name = "Aurel"
439
    url = "http://aurel.blog.lemonde.fr"
440
    first_url = "http://aurel.blog.lemonde.fr/2014/09/29/le-senat-repasse-a-droite/"
441
442
443
class LesCulottees(GenericLeMondeBlog):
444
    """Class to retrieve Les Culottees comics."""
445
    name = 'culottees'
446
    long_name = 'Les Culottees'
447
    url = "http://lesculottees.blog.lemonde.fr"
448
    first_url = "http://lesculottees.blog.lemonde.fr/2016/01/11/clementine-delait-femme-a-barbe/"
449
450
451
class UneAnneeAuLycee(GenericLeMondeBlog):
452
    """Class to retrieve Une Annee Au Lycee comics."""
453
    name = 'lycee'
454
    long_name = 'Une Annee au Lycee'
455
    url = 'http://uneanneeaulycee.blog.lemonde.fr'
456
    first_url = "http://uneanneeaulycee.blog.lemonde.fr/2016/06/13/la-semaine-du-bac-est-arrivee/"
457
458
459 View Code Duplication
class Rall(GenericNavigableComic):
460
    """Class to retrieve Ted Rall comics."""
461
    # Also on http://www.gocomics.com/tedrall
462
    name = 'rall'
463
    long_name = "Ted Rall"
464
    url = "http://rall.com/comic"
465
    _categories = ('RALL', )
466
    get_navi_link = get_link_rel_next
467
    get_first_comic_link = simulate_first_link
468
    # Not the first but I didn't find an efficient way to retrieve it
469
    first_url = "http://rall.com/2014/01/30/los-angeles-times-cartoon-well-miss-those-california-flowers"
470
471
    @classmethod
472
    def get_comic_info(cls, soup, link):
473
        """Get information about a particular comics."""
474
        title = soup.find('meta', property='og:title')['content']
475
        author = soup.find("span", class_="author vcard").find("a").string
476
        date_str = soup.find("span", class_="entry-date").string
477
        day = string_to_date(date_str, "%B %d, %Y")
478
        desc = soup.find('meta', property='og:description')['content']
479
        imgs = soup.find('div', class_='entry-content').find_all('img')
480
        imgs = imgs[:-7]  # remove social media buttons
481
        return {
482
            'title': title,
483
            'author': author,
484
            'month': day.month,
485
            'year': day.year,
486
            'day': day.day,
487
            'description': desc,
488
            'img': [i['src'] for i in imgs],
489
        }
490
491
492
class Dilem(GenericNavigableComic):
493
    """Class to retrieve Ali Dilem comics."""
494
    name = 'dilem'
495
    long_name = 'Ali Dilem'
496
    url = 'http://information.tv5monde.com/dilem'
497
    _categories = ('FRANCAIS', )
498
    get_url_from_link = join_cls_url_to_href
499
    get_first_comic_link = simulate_first_link
500
    first_url = "http://information.tv5monde.com/dilem/2004-06-26"
501
502
    @classmethod
503
    def get_navi_link(cls, last_soup, next_):
504
        """Get link to next or previous comic."""
505
        # prev is next / next is prev
506
        li = last_soup.find('li', class_='prev' if next_ else 'next')
507
        return li.find('a') if li else None
508
509
    @classmethod
510
    def get_comic_info(cls, soup, link):
511
        """Get information about a particular comics."""
512
        short_url = soup.find('link', rel='shortlink')['href']
513
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
514
        imgs = soup.find_all('meta', property='og:image')
515
        date_str = soup.find('span', property='dc:date')['content']
516
        date_str = date_str[:10]
517
        day = string_to_date(date_str, "%Y-%m-%d")
518
        return {
519
            'short_url': short_url,
520
            'title': title,
521
            'img': [i['content'] for i in imgs],
522
            'day': day.day,
523
            'month': day.month,
524
            'year': day.year,
525
        }
526
527
528
class SpaceAvalanche(GenericNavigableComic):
529
    """Class to retrieve Space Avalanche comics."""
530
    name = 'avalanche'
531
    long_name = 'Space Avalanche'
532
    url = 'http://www.spaceavalanche.com'
533
    get_navi_link = get_link_rel_next
534
535
    @classmethod
536
    def get_first_comic_link(cls):
537
        """Get link to first comics."""
538
        return {'href': "http://www.spaceavalanche.com/2009/02/02/irish-sea/", 'title': "Irish Sea"}
539
540
    @classmethod
541
    def get_comic_info(cls, soup, link):
542
        """Get information about a particular comics."""
543
        url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
544
        title = link['title']
545
        url = cls.get_url_from_link(link)
546
        year, month, day = [int(s)
547
                            for s in url_date_re.match(url).groups()]
548
        imgs = soup.find("div", class_="entry").find_all("img")
549
        return {
550
            'title': title,
551
            'day': day,
552
            'month': month,
553
            'year': year,
554
            'img': [i['src'] for i in imgs],
555
        }
556
557
558
class ZenPencils(GenericNavigableComic):
559
    """Class to retrieve ZenPencils comics."""
560
    # Also on http://zenpencils.tumblr.com
561
    # Also on http://www.gocomics.com/zen-pencils
562
    name = 'zenpencils'
563
    long_name = 'Zen Pencils'
564
    url = 'http://zenpencils.com'
565
    _categories = ('ZENPENCILS', )
566
    get_navi_link = get_link_rel_next
567
    get_first_comic_link = simulate_first_link
568
    first_url = "http://zenpencils.com/comic/1-ralph-waldo-emerson-make-them-cry/"
569
570
    @classmethod
571
    def get_comic_info(cls, soup, link):
572
        """Get information about a particular comics."""
573
        imgs = soup.find('div', id='comic').find_all('img')
574
        # imgs2 = soup.find_all('meta', property='og:image')
575
        post = soup.find('div', class_='post-content')
576
        author = post.find("span", class_="post-author").find("a").string
577
        title = soup.find('meta', property='og:title')['content']
578
        date_str = post.find('span', class_='post-date').string
579
        day = string_to_date(date_str, "%B %d, %Y")
580
        assert imgs
581
        assert all(i['alt'] == i['title'] for i in imgs)
582
        assert all(i['alt'] in (title, "") for i in imgs)
583
        desc = soup.find('meta', property='og:description')['content']
584
        return {
585
            'title': title,
586
            'description': desc,
587
            'author': author,
588
            'day': day.day,
589
            'month': day.month,
590
            'year': day.year,
591
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
592
        }
593
594
595
class ItsTheTie(GenericNavigableComic):
596
    """Class to retrieve It's the tie comics."""
597
    # Also on http://itsthetie.tumblr.com
598
    # Also on https://tapastic.com/series/itsthetie
599
    name = 'tie'
600
    long_name = "It's the tie"
601
    url = "http://itsthetie.com"
602
    _categories = ('TIE', )
603
    get_first_comic_link = get_div_navfirst_a
604
    get_navi_link = get_a_rel_next
605
606
    @classmethod
607
    def get_comic_info(cls, soup, link):
608
        """Get information about a particular comics."""
609
        title = soup.find('h1', class_='comic-title').find('a').string
610
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
611
        day = string_to_date(date_str, "%B %d, %Y")
612
        # Bonus images may or may not be in meta og:image.
613
        imgs = soup.find_all('meta', property='og:image')
614
        imgs_src = [i['content'] for i in imgs]
615
        bonus = soup.find_all('img', attrs={'data-oversrc': True})
616
        bonus_src = [b['data-oversrc'] for b in bonus]
617
        all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src]
618
        all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")]
619
        tag_meta = soup.find('meta', property='article:tag')
620
        tags = tag_meta['content'] if tag_meta else ""
621
        return {
622
            'title': title,
623
            'month': day.month,
624
            'year': day.year,
625
            'day': day.day,
626
            'img': all_imgs_src,
627
            'tags': tags,
628
        }
629
630
631
class PenelopeBagieu(GenericNavigableComic):
632
    """Class to retrieve comics from Penelope Bagieu's blog."""
633
    name = 'bagieu'
634
    long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
635
    url = 'http://www.penelope-jolicoeur.com'
636
    _categories = ('FRANCAIS', )
637
    get_navi_link = get_link_rel_next
638
    get_first_comic_link = simulate_first_link
639
    first_url = 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'
640
641
    @classmethod
642
    def get_comic_info(cls, soup, link):
643
        """Get information about a particular comics."""
644
        date_str = soup.find('h2', class_='date-header').string
645 View Code Duplication
        day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
646
        imgs = soup.find('div', class_='entry-body').find_all('img')
647
        title = soup.find('h3', class_='entry-header').string
648
        return {
649
            'title': title,
650
            'img': [i['src'] for i in imgs],
651
            'month': day.month,
652
            'year': day.year,
653
            'day': day.day,
654
        }
655
656
657
class OneOneOneOneComic(GenericNavigableComic):
658
    """Class to retrieve 1111 Comics."""
659
    # Also on http://comics1111.tumblr.com
660
    # Also on https://tapastic.com/series/1111-Comics
661
    name = '1111'
662
    long_name = '1111 Comics'
663
    url = 'http://www.1111comics.me'
664
    _categories = ('ONEONEONEONE', )
665
    get_first_comic_link = get_div_navfirst_a
666
    get_navi_link = get_link_rel_next
667
668
    @classmethod
669
    def get_comic_info(cls, soup, link):
670
        """Get information about a particular comics."""
671
        title = soup.find('h1', class_='comic-title').find('a').string
672
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
673
        day = string_to_date(date_str, "%B %d, %Y")
674
        imgs = soup.find_all('meta', property='og:image')
675
        return {
676
            'title': title,
677
            'month': day.month,
678
            'year': day.year,
679
            'day': day.day,
680
            'img': [i['content'] for i in imgs],
681
        }
682
683
684
class AngryAtNothing(GenericNavigableComic):
685
    """Class to retrieve Angry at Nothing comics."""
686
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
687
    name = 'angry'
688
    long_name = 'Angry At Nothing'
689
    url = 'http://www.angryatnothing.net'
690
    get_first_comic_link = get_div_navfirst_a
691
    get_navi_link = get_a_rel_next
692
693
    @classmethod
694
    def get_comic_info(cls, soup, link):
695
        """Get information about a particular comics."""
696
        title = soup.find('h1', class_='comic-title').find('a').string
697
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
698
        day = string_to_date(date_str, "%B %d, %Y")
699
        imgs = soup.find_all('meta', property='og:image')
700
        return {
701
            'title': title,
702
            'month': day.month,
703
            'year': day.year,
704
            'day': day.day,
705
            'img': [i['content'] for i in imgs],
706
        }
707
708
709
class NeDroid(GenericNavigableComic):
710
    """Class to retrieve NeDroid comics."""
711
    name = 'nedroid'
712
    long_name = 'NeDroid'
713
    url = 'http://nedroid.com'
714
    get_first_comic_link = get_div_navfirst_a
715
    get_navi_link = get_link_rel_next
716
    get_url_from_link = join_cls_url_to_href
717
718
    @classmethod
719
    def get_comic_info(cls, soup, link):
720
        """Get information about a particular comics."""
721
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
722
        comic_url_re = re.compile('//nedroid.com/comics/([0-9]*)-([0-9]*)-([0-9]*).*')
723
        short_url = cls.get_url_from_link(soup.find('link', rel='shortlink'))
724
        num = int(short_url_re.match(short_url).groups()[0])
725
        imgs = soup.find('div', id='comic').find_all('img')
726
        year, month, day = [int(s) for s in comic_url_re.match(imgs[0]['src']).groups()]
727
        assert len(imgs) == 1
728
        title = imgs[0]['alt']
729
        title2 = imgs[0]['title']
730
        return {
731
            'short_url': short_url,
732
            'title': title,
733
            'title2': title2,
734
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
735
            'day': day,
736
            'month': month,
737
            'year': year,
738
            'num': num,
739
        }
740
741
742
class Garfield(GenericNavigableComic):
743
    """Class to retrieve Garfield comics."""
744
    # Also on http://www.gocomics.com/garfield
745
    name = 'garfield'
746
    long_name = 'Garfield'
747
    url = 'https://garfield.com'
748
    _categories = ('GARFIELD', )
749
    get_first_comic_link = simulate_first_link
750
    first_url = 'https://garfield.com/comic/1978/06/19'
751
752
    @classmethod
753
    def get_navi_link(cls, last_soup, next_):
754
        """Get link to next or previous comic."""
755
        return last_soup.find('a', class_='comic-arrow-right' if next_ else 'comic-arrow-left')
756
757
    @classmethod
758 View Code Duplication
    def get_comic_info(cls, soup, link):
759
        """Get information about a particular comics."""
760
        url = cls.get_url_from_link(link)
761
        date_re = re.compile('^%s/comic/([0-9]*)/([0-9]*)/([0-9]*)' % cls.url)
762
        year, month, day = [int(s) for s in date_re.match(url).groups()]
763
        imgs = soup.find('div', class_='comic-display').find_all('img', class_='img-responsive')
764
        return {
765
            'month': month,
766
            'year': year,
767
            'day': day,
768
            'img': [i['src'] for i in imgs],
769
        }
770
771
772
class Dilbert(GenericNavigableComic):
773
    """Class to retrieve Dilbert comics."""
774
    # Also on http://www.gocomics.com/dilbert-classics
775
    name = 'dilbert'
776
    long_name = 'Dilbert'
777
    url = 'http://dilbert.com'
778
    get_url_from_link = join_cls_url_to_href
779
    get_first_comic_link = simulate_first_link
780
    first_url = 'http://dilbert.com/strip/1989-04-16'
781
782
    @classmethod
783
    def get_navi_link(cls, last_soup, next_):
784
        """Get link to next or previous comic."""
785
        link = last_soup.find('div', class_='nav-comic nav-right' if next_ else 'nav-comic nav-left')
786
        return link.find('a') if link else None
787
788
    @classmethod
789
    def get_comic_info(cls, soup, link):
790
        """Get information about a particular comics."""
791
        title = soup.find('meta', property='og:title')['content']
792
        imgs = soup.find_all('meta', property='og:image')
793
        desc = soup.find('meta', property='og:description')['content']
794
        date_str = soup.find('meta', property='article:publish_date')['content']
795
        day = string_to_date(date_str, "%B %d, %Y")
796
        author = soup.find('meta', property='article:author')['content']
797
        tags = soup.find('meta', property='article:tag')['content']
798
        return {
799
            'title': title,
800
            'description': desc,
801
            'img': [i['content'] for i in imgs],
802
            'author': author,
803
            'tags': tags,
804
            'day': day.day,
805
            'month': day.month,
806
            'year': day.year
807
        }
808
809
810
class VictimsOfCircumsolar(GenericNavigableComic):
811
    """Class to retrieve VictimsOfCircumsolar comics."""
812
    name = 'circumsolar'
813
    long_name = 'Victims Of Circumsolar'
814
    url = 'http://www.victimsofcircumsolar.com'
815
    get_navi_link = get_a_navi_comicnavnext_navinext
816
    get_first_comic_link = simulate_first_link
817
    first_url = 'http://www.victimsofcircumsolar.com/comic/modern-addiction'
818
819
    @classmethod
820
    def get_comic_info(cls, soup, link):
821
        """Get information about a particular comics."""
822
        # Date is on the archive page
823
        title = soup.find_all('meta', property='og:title')[-1]['content']
824
        desc = soup.find_all('meta', property='og:description')[-1]['content']
825
        imgs = soup.find('div', id='comic').find_all('img')
826
        assert all(i['title'] == i['alt'] == title for i in imgs)
827
        return {
828
            'title': title,
829
            'description': desc,
830
            'img': [i['src'] for i in imgs],
831
        }
832
833
834
class ThreeWordPhrase(GenericNavigableComic):
835
    """Class to retrieve Three Word Phrase comics."""
836
    # Also on http://www.threewordphrase.tumblr.com
837
    name = 'threeword'
838
    long_name = 'Three Word Phrase'
839
    url = 'http://threewordphrase.com'
840
    get_url_from_link = join_cls_url_to_href
841
842
    @classmethod
843
    def get_first_comic_link(cls):
844
        """Get link to first comics."""
845
        return get_soup_at_url(cls.url).find('img', src='/firstlink.gif').parent
846
847
    @classmethod
848
    def get_navi_link(cls, last_soup, next_):
849
        """Get link to next or previous comic."""
850
        link = last_soup.find('img', src='/nextlink.gif' if next_ else '/prevlink.gif').parent
851
        return None if link.get('href') is None else link
852
853
    @classmethod
854
    def get_comic_info(cls, soup, link):
855
        """Get information about a particular comics."""
856
        title = soup.find('title')
857
        imgs = [img for img in soup.find_all('img')
858
                if not img['src'].endswith(
859
                    ('link.gif', '32.png', 'twpbookad.jpg',
860
                     'merchad.jpg', 'header.gif', 'tipjar.jpg'))]
861
        return {
862
            'title': title.string if title else None,
863
            'title2': '  '.join(img.get('alt') for img in imgs if img.get('alt')),
864
            'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs],
865
        }
866
867
868
class DeadlyPanel(GenericEmptyComic, GenericNavigableComic):
869
    """Class to retrieve Deadly Panel comics."""
870
    # Also on https://tapastic.com/series/deadlypanel
871
    name = 'deadly'
872
    long_name = 'Deadly Panel'
873
    url = 'http://www.deadlypanel.com'
874
    get_first_comic_link = get_a_navi_navifirst
875
    get_navi_link = get_a_navi_comicnavnext_navinext
876
877
    @classmethod
878
    def get_comic_info(cls, soup, link):
879
        """Get information about a particular comics."""
880
        imgs = soup.find('div', id='comic').find_all('img')
881
        assert all(i['alt'] == i['title'] for i in imgs)
882
        return {
883
            'img': [i['src'] for i in imgs],
884
        }
885
886
887
class TheGentlemanArmchair(GenericNavigableComic):
888
    """Class to retrieve The Gentleman Armchair comics."""
889
    name = 'gentlemanarmchair'
890
    long_name = 'The Gentleman Armchair'
891
    url = 'http://thegentlemansarmchair.com'
892
    get_first_comic_link = get_a_navi_navifirst
893
    get_navi_link = get_link_rel_next
894
895
    @classmethod
896
    def get_comic_info(cls, soup, link):
897
        """Get information about a particular comics."""
898
        title = soup.find('h2', class_='post-title').string
899
        author = soup.find("span", class_="post-author").find("a").string
900
        date_str = soup.find('span', class_='post-date').string
901
        day = string_to_date(date_str, "%B %d, %Y")
902
        imgs = soup.find('div', id='comic').find_all('img')
903
        return {
904
            'img': [i['src'] for i in imgs],
905
            'title': title,
906
            'author': author,
907
            'month': day.month,
908
            'year': day.year,
909
            'day': day.day,
910
        }
911
912
913 View Code Duplication
class MyExtraLife(GenericNavigableComic):
914
    """Class to retrieve My Extra Life comics."""
915
    name = 'extralife'
916
    long_name = 'My Extra Life'
917
    url = 'http://www.myextralife.com'
918
    get_navi_link = get_link_rel_next
919
920
    @classmethod
921
    def get_first_comic_link(cls):
922
        """Get link to first comics."""
923
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
924
925
    @classmethod
926
    def get_comic_info(cls, soup, link):
927
        """Get information about a particular comics."""
928
        title = soup.find("h1", class_="comic_title").string
929
        date_str = soup.find("span", class_="comic_date").string
930
        day = string_to_date(date_str, "%B %d, %Y")
931
        imgs = soup.find_all("img", class_="comic")
932
        assert all(i['alt'] == i['title'] == title for i in imgs)
933
        return {
934
            'title': title,
935
            'img': [i['src'] for i in imgs if i["src"]],
936
            'day': day.day,
937
            'month': day.month,
938
            'year': day.year
939
        }
940
941
942
class SaturdayMorningBreakfastCereal(GenericNavigableComic):
943
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
944
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
945
    # Also on http://smbc-comics.tumblr.com
946
    name = 'smbc'
947
    long_name = 'Saturday Morning Breakfast Cereal'
948
    url = 'http://www.smbc-comics.com'
949
    _categories = ('SMBC', )
950
    get_navi_link = get_a_rel_next
951
952
    @classmethod
953
    def get_first_comic_link(cls):
954
        """Get link to first comics."""
955
        return get_soup_at_url(cls.url).find('a', rel='start')
956
957
    @classmethod
958
    def get_comic_info(cls, soup, link):
959
        """Get information about a particular comics."""
960
        image1 = soup.find('img', id='cc-comic')
961
        image_url1 = image1['src']
962
        aftercomic = soup.find('div', id='aftercomic')
963
        image_url2 = aftercomic.find('img')['src'] if aftercomic else ''
964
        imgs = [image_url1] + ([image_url2] if image_url2 else [])
965
        date_str = soup.find('div', class_='cc-publishtime').contents[0]
966
        day = string_to_date(date_str, "%B %d, %Y")
967
        return {
968
            'title': image1['title'],
969
            'img': [urljoin_wrapper(cls.url, i) for i in imgs],
970
            'day': day.day,
971
            'month': day.month,
972
            'year': day.year
973
        }
974
975
976
class PerryBibleFellowship(GenericListableComic):
977
    """Class to retrieve Perry Bible Fellowship comics."""
978
    name = 'pbf'
979
    long_name = 'Perry Bible Fellowship'
980
    url = 'http://pbfcomics.com'
981
    get_url_from_archive_element = join_cls_url_to_href
982
983
    @classmethod
984
    def get_archive_elements(cls):
985
        comic_link_re = re.compile('^/[0-9]*/$')
986
        return reversed(get_soup_at_url(cls.url).find_all('a', href=comic_link_re))
987
988
    @classmethod
989
    def get_comic_info(cls, soup, link):
990
        """Get information about a particular comics."""
991
        url = cls.get_url_from_archive_element(link)
992
        comic_img_re = re.compile('^/archive_b/PBF.*')
993
        name = link.string
994
        num = int(link['name'])
995
        href = link['href']
996
        assert href == '/%d/' % num
997
        imgs = soup.find_all('img', src=comic_img_re)
998
        assert len(imgs) == 1
999
        assert imgs[0]['alt'] == name
1000
        return {
1001
            'num': num,
1002
            'name': name,
1003
            'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1004
            'prefix': '%d-' % num,
1005
        }
1006 View Code Duplication
1007
1008
class Mercworks(GenericNavigableComic):
1009
    """Class to retrieve Mercworks comics."""
1010
    # Also on http://mercworks.tumblr.com
1011
    name = 'mercworks'
1012
    long_name = 'Mercworks'
1013
    url = 'http://mercworks.net'
1014
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1015
    get_navi_link = get_a_rel_next
1016
1017
    @classmethod
1018
    def get_comic_info(cls, soup, link):
1019
        """Get information about a particular comics."""
1020
        title = soup.find('meta', property='og:title')['content']
1021
        metadesc = soup.find('meta', property='og:description')
1022
        desc = metadesc['content'] if metadesc else ""
1023
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1024
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1025
        date_str = date_str[:10]
1026
        day = string_to_date(date_str, "%Y-%m-%d")
1027
        imgs = soup.find_all('meta', property='og:image')
1028
        return {
1029
            'img': [i['content'] for i in imgs],
1030
            'title': title,
1031
            'author': author,
1032
            'desc': desc,
1033
            'day': day.day,
1034
            'month': day.month,
1035
            'year': day.year
1036
        }
1037
1038
1039
class BerkeleyMews(GenericListableComic):
1040
    """Class to retrieve Berkeley Mews comics."""
1041
    # Also on http://mews.tumblr.com
1042
    # Also on http://www.gocomics.com/berkeley-mews
1043
    name = 'berkeley'
1044
    long_name = 'Berkeley Mews'
1045
    url = 'http://www.berkeleymews.com'
1046
    _categories = ('BERKELEY', )
1047
    get_url_from_archive_element = get_href
1048
    comic_num_re = re.compile('%s/\\?p=([0-9]*)$' % url)
1049
1050
    @classmethod
1051
    def get_archive_elements(cls):
1052
        archive_url = urljoin_wrapper(cls.url, "?page_id=2")
1053
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_num_re))
1054
1055
    @classmethod
1056
    def get_comic_info(cls, soup, link):
1057
        """Get information about a particular comics."""
1058
        comic_date_re = re.compile('.*/([0-9]*)-([0-9]*)-([0-9]*)-.*')
1059
        url = cls.get_url_from_archive_element(link)
1060
        num = int(cls.comic_num_re.match(url).groups()[0])
1061
        img = soup.find('div', id='comic').find('img')
1062
        assert all(i['alt'] == i['title'] for i in [img])
1063
        title2 = img['title']
1064
        img_url = img['src']
1065
        year, month, day = [int(s) for s in comic_date_re.match(img_url).groups()]
1066
        return {
1067
            'num': num,
1068
            'title': link.string,
1069
            'title2': title2,
1070
            'img': [img_url],
1071
            'year': year,
1072
            'month': month,
1073
            'day': day,
1074
        }
1075
1076
1077
class GenericBouletCorp(GenericNavigableComic):
1078
    """Generic class to retrieve BouletCorp comics in different languages."""
1079
    # Also on http://bouletcorp.tumblr.com
1080
    _categories = ('BOULET', )
1081
    get_navi_link = get_link_rel_next
1082
1083
    @classmethod
1084
    def get_first_comic_link(cls):
1085
        """Get link to first comics."""
1086
        return get_soup_at_url(cls.url).find('div', id='centered_nav').find_all('a')[0]
1087
1088
    @classmethod
1089
    def get_comic_info(cls, soup, link):
1090
        """Get information about a particular comics."""
1091
        url = cls.get_url_from_link(link)
1092
        date_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/' % cls.url)
1093
        year, month, day = [int(s) for s in date_re.match(url).groups()]
1094
        imgs = soup.find('div', id='notes').find('div', class_='storycontent').find_all('img')
1095
        texts = '  '.join(t for t in (i.get('title') for i in imgs) if t)
1096
        title = soup.find('title').string
1097
        return {
1098
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs if i.get('src') is not None],
1099
            'title': title,
1100
            'texts': texts,
1101
            'year': year,
1102
            'month': month,
1103
            'day': day,
1104
        }
1105
1106
1107
class BouletCorp(GenericBouletCorp):
1108
    """Class to retrieve BouletCorp comics."""
1109
    name = 'boulet'
1110
    long_name = 'Boulet Corp'
1111
    url = 'http://www.bouletcorp.com'
1112
    _categories = ('FRANCAIS', )
1113
1114
1115
class BouletCorpEn(GenericBouletCorp):
1116
    """Class to retrieve EnglishBouletCorp comics."""
1117
    name = 'boulet_en'
1118
    long_name = 'Boulet Corp English'
1119
    url = 'http://english.bouletcorp.com'
1120
1121
1122
class AmazingSuperPowers(GenericNavigableComic):
1123
    """Class to retrieve Amazing Super Powers comics."""
1124
    name = 'asp'
1125
    long_name = 'Amazing Super Powers'
1126
    url = 'http://www.amazingsuperpowers.com'
1127
    get_first_comic_link = get_a_navi_navifirst
1128
    get_navi_link = get_a_navi_navinext
1129
1130
    @classmethod
1131
    def get_comic_info(cls, soup, link):
1132
        """Get information about a particular comics."""
1133
        author = soup.find("span", class_="post-author").find("a").string
1134
        date_str = soup.find('span', class_='post-date').string
1135
        day = string_to_date(date_str, "%B %d, %Y")
1136
        imgs = soup.find('div', id='comic').find_all('img')
1137
        title = ' '.join(i['title'] for i in imgs)
1138
        assert all(i['alt'] == i['title'] for i in imgs)
1139
        return {
1140
            'title': title,
1141
            'author': author,
1142
            'img': [img['src'] for img in imgs],
1143
            'day': day.day,
1144
            'month': day.month,
1145
            'year': day.year
1146
        }
1147
1148
1149
class ToonHole(GenericNavigableComic):
1150
    """Class to retrieve Toon Holes comics."""
1151
    # Also on http://tapastic.com/series/TOONHOLE
1152
    name = 'toonhole'
1153
    long_name = 'Toon Hole'
1154
    url = 'http://www.toonhole.com'
1155
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1156
    get_navi_link = get_link_rel_next
1157
1158
    @classmethod
1159
    def get_comic_info(cls, soup, link):
1160
        """Get information about a particular comics."""
1161
        short_url = soup.find('link', rel='shortlink')['href']
1162
        date_str = soup.find('div', class_='entry-meta').contents[0].strip()
1163
        day = string_to_date(date_str, "%B %d, %Y")
1164
        imgs = soup.find('div', id='comic').find_all('img')
1165
        if imgs:
1166
            img = imgs[0]
1167
            title = img['alt']
1168
            assert img['title'] == title
1169
        else:
1170
            title = ""
1171
        return {
1172
            'short_url': short_url,
1173
            'title': title,
1174
            'month': day.month,
1175
            'year': day.year,
1176
            'day': day.day,
1177
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs],
1178
        }
1179
1180
1181
class Channelate(GenericNavigableComic):
1182
    """Class to retrieve Channelate comics."""
1183
    name = 'channelate'
1184
    long_name = 'Channelate'
1185
    url = 'http://www.channelate.com'
1186
    get_first_comic_link = get_div_navfirst_a
1187
    get_navi_link = get_link_rel_next
1188
    get_url_from_link = join_cls_url_to_href
1189
1190
    @classmethod
1191
    def get_comic_info(cls, soup, link):
1192
        """Get information about a particular comics."""
1193
        author = soup.find("span", class_="post-author").find("a").string
1194
        date_str = soup.find('span', class_='post-date').string
1195
        day = string_to_date(date_str, '%Y/%m/%d')
1196
        title = soup.find('meta', property='og:title')['content']
1197
        post = soup.find('div', id='comic')
1198
        imgs = post.find_all('img') if post else []
1199
        extra_url = None
1200
        extra_div = soup.find('div', id='extrapanelbutton')
1201
        if extra_div:
1202
            extra_url = extra_div.find('a')['href']
1203
            extra_soup = get_soup_at_url(extra_url)
1204
            extra_imgs = extra_soup.find_all('img', class_='extrapanelimage')
1205
            imgs.extend(extra_imgs)
1206
        return {
1207
            'url_extra': extra_url,
1208
            'title': title,
1209
            'author': author,
1210
            'month': day.month,
1211
            'year': day.year,
1212
            'day': day.day,
1213
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1214
        }
1215
1216
1217
class CyanideAndHappiness(GenericNavigableComic):
1218
    """Class to retrieve Cyanide And Happiness comics."""
1219
    name = 'cyanide'
1220
    long_name = 'Cyanide and Happiness'
1221
    url = 'http://explosm.net'
1222
    _categories = ('NSFW', )
1223
    get_url_from_link = join_cls_url_to_href
1224
1225
    @classmethod
1226
    def get_first_comic_link(cls):
1227
        """Get link to first comics."""
1228
        return get_soup_at_url(cls.url).find('a', title='Oldest comic')
1229
1230
    @classmethod
1231
    def get_navi_link(cls, last_soup, next_):
1232
        """Get link to next or previous comic."""
1233
        link = last_soup.find('a', class_='next-comic' if next_ else 'previous-comic ')
1234
        return None if link.get('href') is None else link
1235
1236
    @classmethod
1237
    def get_comic_info(cls, soup, link):
1238
        """Get information about a particular comics."""
1239
        url2 = soup.find('meta', property='og:url')['content']
1240
        num = int(url2.split('/')[-2])
1241
        date_str = soup.find('h3').find('a').string
1242
        day = string_to_date(date_str, '%Y.%m.%d')
1243
        author = soup.find('small', class_="author-credit-name").string
1244
        assert author.startswith('by ')
1245
        author = author[3:]
1246
        imgs = soup.find_all('img', id='main-comic')
1247
        return {
1248
            'num': num,
1249
            'author': author,
1250
            'month': day.month,
1251
            'year': day.year,
1252
            'day': day.day,
1253
            'prefix': '%d-' % num,
1254
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i['src'])) for i in imgs]
1255
        }
1256
1257
1258
class MrLovenstein(GenericComic):
1259
    """Class to retrieve Mr Lovenstein comics."""
1260
    # Also on https://tapastic.com/series/MrLovenstein
1261
    name = 'mrlovenstein'
1262
    long_name = 'Mr. Lovenstein'
1263
    url = 'http://www.mrlovenstein.com'
1264
1265
    @classmethod
1266
    def get_next_comic(cls, last_comic):
1267
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1268
        # TODO: more info from http://www.mrlovenstein.com/archive
1269
        comic_num_re = re.compile('^/comic/([0-9]*)$')
1270
        nums = [int(comic_num_re.match(link['href']).groups()[0])
1271
                for link in get_soup_at_url(cls.url).find_all('a', href=comic_num_re)]
1272
        first, last = min(nums), max(nums)
1273
        if last_comic:
1274
            first = last_comic['num'] + 1
1275
        for num in range(first, last + 1):
1276
            url = urljoin_wrapper(cls.url, '/comic/%d' % num)
1277
            soup = get_soup_at_url(url)
1278
            imgs = list(
1279
                reversed(soup.find_all('img', src=re.compile('^/images/comics/'))))
1280
            description = soup.find('meta', attrs={'name': 'description'})['content']
1281
            yield {
1282
                'url': url,
1283
                'num': num,
1284
                'texts': '  '.join(t for t in (i.get('title') for i in imgs) if t),
1285
                'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1286
                'description': description,
1287
            }
1288
1289
1290
class DinosaurComics(GenericListableComic):
1291
    """Class to retrieve Dinosaur Comics comics."""
1292
    name = 'dinosaur'
1293
    long_name = 'Dinosaur Comics'
1294
    url = 'http://www.qwantz.com'
1295
    get_url_from_archive_element = get_href
1296
    comic_link_re = re.compile('^%s/index.php\\?comic=([0-9]*)$' % url)
1297
1298
    @classmethod
1299
    def get_archive_elements(cls):
1300
        archive_url = urljoin_wrapper(cls.url, 'archive.php')
1301
        # first link is random -> skip it
1302
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re)[1:])
1303
1304
    @classmethod
1305
    def get_comic_info(cls, soup, link):
1306
        """Get information about a particular comics."""
1307
        url = cls.get_url_from_archive_element(link)
1308
        num = int(cls.comic_link_re.match(url).groups()[0])
1309
        date_str = link.string
1310
        text = link.next_sibling.string
1311
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1312
        comic_img_re = re.compile('^%s/comics/' % cls.url)
1313
        img = soup.find('img', src=comic_img_re)
1314
        return {
1315
            'month': day.month,
1316
            'year': day.year,
1317
            'day': day.day,
1318
            'img': [img.get('src')],
1319
            'title': img.get('title'),
1320
            'text': text,
1321
            'num': num,
1322
        }
1323
1324
1325
class ButterSafe(GenericListableComic):
1326
    """Class to retrieve Butter Safe comics."""
1327 View Code Duplication
    name = 'butter'
1328
    long_name = 'ButterSafe'
1329
    url = 'http://buttersafe.com'
1330
    get_url_from_archive_element = get_href
1331
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)
1332
1333
    @classmethod
1334
    def get_archive_elements(cls):
1335
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1336
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
1337
1338
    @classmethod
1339
    def get_comic_info(cls, soup, link):
1340
        """Get information about a particular comics."""
1341
        url = cls.get_url_from_archive_element(link)
1342
        title = link.string
1343
        year, month, day = [int(s) for s in cls.comic_link_re.match(url).groups()]
1344
        img = soup.find('div', id='comic').find('img')
1345
        assert img['alt'] == title
1346
        return {
1347
            'title': title,
1348
            'day': day,
1349
            'month': month,
1350
            'year': year,
1351
            'img': [img['src']],
1352
        }
1353
1354
1355
class CalvinAndHobbes(GenericComic):
1356
    """Class to retrieve Calvin and Hobbes comics."""
1357
    # Also on http://www.gocomics.com/calvinandhobbes/
1358
    name = 'calvin'
1359
    long_name = 'Calvin and Hobbes'
1360
    # This is not through any official webpage but eh...
1361
    url = 'http://marcel-oehler.marcellosendos.ch/comics/ch/'
1362
1363
    @classmethod
1364
    def get_next_comic(cls, last_comic):
1365
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1366
        last_date = get_date_for_comic(
1367
            last_comic) if last_comic else date(1985, 11, 1)
1368
        link_re = re.compile('^([0-9]*)/([0-9]*)/')
1369
        img_re = re.compile('')
1370
        for link in get_soup_at_url(cls.url).find_all('a', href=link_re):
1371
            url = link['href']
1372
            year, month = link_re.match(url).groups()
1373
            if date(int(year), int(month), 1) + timedelta(days=31) >= last_date:
1374
                img_re = re.compile('^%s%s([0-9]*)' % (year, month))
1375
                month_url = urljoin_wrapper(cls.url, url)
1376
                for img in get_soup_at_url(month_url).find_all('img', src=img_re):
1377
                    img_src = img['src']
1378
                    day = int(img_re.match(img_src).groups()[0])
1379
                    comic_date = date(int(year), int(month), day)
1380
                    if comic_date > last_date:
1381
                        yield {
1382
                            'url': month_url,
1383
                            'year': int(year),
1384
                            'month': int(month),
1385
                            'day': int(day),
1386
                            'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)],
1387
                        }
1388
                        last_date = comic_date
1389
1390
1391
class AbstruseGoose(GenericListableComic):
1392
    """Class to retrieve AbstruseGoose Comics."""
1393 View Code Duplication
    name = 'abstruse'
1394
    long_name = 'Abstruse Goose'
1395
    url = 'http://abstrusegoose.com'
1396
    get_url_from_archive_element = get_href
1397
    comic_url_re = re.compile('^%s/([0-9]*)$' % url)
1398
    comic_img_re = re.compile('^%s/strips/.*' % url)
1399
1400
    @classmethod
1401
    def get_archive_elements(cls):
1402
        archive_url = urljoin_wrapper(cls.url, 'archive')
1403
        return get_soup_at_url(archive_url).find_all('a', href=cls.comic_url_re)
1404
1405
    @classmethod
1406
    def get_comic_info(cls, soup, archive_elt):
1407
        comic_url = cls.get_url_from_archive_element(archive_elt)
1408
        num = int(cls.comic_url_re.match(comic_url).groups()[0])
1409
        return {
1410
            'num': num,
1411
            'title': archive_elt.string,
1412
            'img': [soup.find('img', src=cls.comic_img_re)['src']]
1413
        }
1414
1415
1416
class PhDComics(GenericNavigableComic):
1417
    """Class to retrieve PHD Comics."""
1418
    name = 'phd'
1419
    long_name = 'PhD Comics'
1420
    url = 'http://phdcomics.com/comics/archive.php'
1421
    get_url_from_link = join_cls_url_to_href
1422
1423
    @classmethod
1424
    def get_first_comic_link(cls):
1425
        """Get link to first comics."""
1426
        return get_soup_at_url(cls.url).find('img', src='images/first_button.gif').parent
1427
1428
    @classmethod
1429
    def get_navi_link(cls, last_soup, next_):
1430
        """Get link to next or previous comic."""
1431
        img = last_soup.find('img', src='images/next_button.gif' if next_ else 'images/prev_button.gif')
1432
        return None if img is None else img.parent
1433 View Code Duplication
1434
    @classmethod
1435
    def get_comic_info(cls, soup, link):
1436
        """Get information about a particular comics."""
1437
        date_str = soup.find('font', face='Arial,Helvetica,Geneva,Swiss,SunSans-Regular', color='white').string.strip()
1438
        try:
1439
            day = string_to_date(date_str, '%m/%d/%Y')
1440
        except ValueError:
1441
            print("Invalid date %s" % date_str)
1442
            day = date.today()
1443
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
1444
        return {
1445
            'year': day.year,
1446
            'month': day.month,
1447
            'day': day.day,
1448
            'img': [soup.find('img', id='comic')['src']],
1449
            'title': title,
1450
        }
1451
1452
1453
class Octopuns(GenericEmptyComic, GenericNavigableComic):
1454
    """Class to retrieve Octopuns comics."""
1455
    # Also on http://octopuns.tumblr.com
1456
    name = 'octopuns'
1457
    long_name = 'Octopuns'
1458
    url = 'http://www.octopuns.net'
1459
1460
    @classmethod
1461
    def get_first_comic_link(cls):
1462
        """Get link to first comics."""
1463
        return get_soup_at_url(cls.url).find('img', src=re.compile('.*/First.png')).parent
1464
1465
    @classmethod
1466
    def get_navi_link(cls, last_soup, next_):
1467
        """Get link to next or previous comic."""
1468
        link = last_soup.find('img', src=re.compile('.*/Next.png' if next_ else '.*/Back.png')).parent
1469
        return None if link.get('href') is None else link
1470
1471
    @classmethod
1472
    def get_comic_info(cls, soup, link):
1473
        """Get information about a particular comics."""
1474
        title = soup.find('h3', class_='post-title entry-title').string
1475
        date_str = soup.find('h2', class_='date-header').string
1476
        day = string_to_date(date_str, "%A, %B %d, %Y")
1477
        imgs = soup.find_all('link', rel='image_src')
1478
        return {
1479
            'img': [i['href'] for i in imgs],
1480
            'title': title,
1481
            'day': day.day,
1482
            'month': day.month,
1483
            'year': day.year,
1484
        }
1485
1486
1487
class Quarktees(GenericNavigableComic):
1488
    """Class to retrieve the Quarktees comics."""
1489
    name = 'quarktees'
1490
    long_name = 'Quarktees'
1491
    url = 'http://www.quarktees.com/blogs/news'
1492
    get_url_from_link = join_cls_url_to_href
1493
    get_first_comic_link = simulate_first_link
1494
    first_url = 'http://www.quarktees.com/blogs/news/12486621-coming-soon'
1495
1496
    @classmethod
1497
    def get_navi_link(cls, last_soup, next_):
1498
        """Get link to next or previous comic."""
1499
        return last_soup.find('a', id='article-next' if next_ else 'article-prev')
1500
1501
    @classmethod
1502
    def get_comic_info(cls, soup, link):
1503
        """Get information about a particular comics."""
1504
        title = soup.find('meta', property='og:title')['content']
1505
        article = soup.find('div', class_='single-article')
1506
        imgs = article.find_all('img')
1507
        return {
1508
            'title': title,
1509
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1510
        }
1511
1512
1513
class OverCompensating(GenericNavigableComic):
1514
    """Class to retrieve the Over Compensating comics."""
1515
    name = 'compensating'
1516
    long_name = 'Over Compensating'
1517
    url = 'http://www.overcompensating.com'
1518
    get_url_from_link = join_cls_url_to_href
1519
1520
    @classmethod
1521
    def get_first_comic_link(cls):
1522
        """Get link to first comics."""
1523
        return get_soup_at_url(cls.url).find('a', href=re.compile('comic=1$'))
1524
1525
    @classmethod
1526
    def get_navi_link(cls, last_soup, next_):
1527
        """Get link to next or previous comic."""
1528
        return last_soup.find('a', title='next comic' if next_ else 'go back already')
1529
1530
    @classmethod
1531
    def get_comic_info(cls, soup, link):
1532
        """Get information about a particular comics."""
1533
        img_src_re = re.compile('^/oc/comics/.*')
1534
        comic_num_re = re.compile('.*comic=([0-9]*)$')
1535
        comic_url = cls.get_url_from_link(link)
1536
        num = int(comic_num_re.match(comic_url).groups()[0])
1537
        img = soup.find('img', src=img_src_re)
1538
        return {
1539
            'num': num,
1540
            'img': [urljoin_wrapper(comic_url, img['src'])],
1541
            'title': img.get('title')
1542
        }
1543
1544
1545
class Oglaf(GenericNavigableComic):
1546
    """Class to retrieve Oglaf comics."""
1547
    name = 'oglaf'
1548
    long_name = 'Oglaf [NSFW]'
1549
    url = 'http://oglaf.com'
1550
    _categories = ('NSFW', )
1551
    get_url_from_link = join_cls_url_to_href
1552
1553
    @classmethod
1554
    def get_first_comic_link(cls):
1555
        """Get link to first comics."""
1556
        return get_soup_at_url(cls.url).find("div", id="st").parent
1557
1558
    @classmethod
1559
    def get_navi_link(cls, last_soup, next_):
1560
        """Get link to next or previous comic."""
1561
        div = last_soup.find("div", id="nx" if next_ else "pvs")
1562
        return div.parent if div else None
1563
1564
    @classmethod
1565
    def get_comic_info(cls, soup, link):
1566
        """Get information about a particular comics."""
1567
        title = soup.find('title').string
1568
        title_imgs = soup.find('div', id='tt').find_all('img')
1569
        assert len(title_imgs) == 1
1570
        strip_imgs = soup.find_all('img', id='strip')
1571
        assert len(strip_imgs) == 1
1572
        imgs = title_imgs + strip_imgs
1573
        desc = ' '.join(i['title'] for i in imgs)
1574
        return {
1575
            'title': title,
1576
            'img': [i['src'] for i in imgs],
1577
            'description': desc,
1578
        }
1579
1580
1581
class ScandinaviaAndTheWorld(GenericNavigableComic):
1582
    """Class to retrieve Scandinavia And The World comics."""
1583
    name = 'satw'
1584
    long_name = 'Scandinavia And The World'
1585
    url = 'http://satwcomic.com'
1586
    get_first_comic_link = simulate_first_link
1587
    first_url = 'http://satwcomic.com/sweden-denmark-and-norway'
1588
1589
    @classmethod
1590
    def get_navi_link(cls, last_soup, next_):
1591
        """Get link to next or previous comic."""
1592
        return last_soup.find('a', accesskey='n' if next_ else 'p')
1593
1594
    @classmethod
1595
    def get_comic_info(cls, soup, link):
1596
        """Get information about a particular comics."""
1597
        title = soup.find('meta', attrs={'name': 'twitter:label1'})['content']
1598
        desc = soup.find('meta', property='og:description')['content']
1599
        imgs = soup.find_all('img', itemprop="image")
1600
        return {
1601
            'title': title,
1602
            'description': desc,
1603
            'img': [i['src'] for i in imgs],
1604
        }
1605
1606
1607
class SomethingOfThatIlk(GenericEmptyComic):  # Does not exist anymore
1608
    """Class to retrieve the Something Of That Ilk comics."""
1609
    name = 'somethingofthatilk'
1610
    long_name = 'Something Of That Ilk'
1611
    url = 'http://www.somethingofthatilk.com'
1612
1613
1614
class InfiniteMonkeyBusiness(GenericNavigableComic):
1615
    """Generic class to retrieve InfiniteMonkeyBusiness comics."""
1616
    name = 'monkey'
1617
    long_name = 'Infinite Monkey Business'
1618
    url = 'http://infinitemonkeybusiness.net'
1619
    get_navi_link = get_a_navi_comicnavnext_navinext
1620
    get_first_comic_link = simulate_first_link
1621
    first_url = 'http://infinitemonkeybusiness.net/comic/pillory/'
1622
1623
    @classmethod
1624
    def get_comic_info(cls, soup, link):
1625
        """Get information about a particular comics."""
1626
        title = soup.find('meta', property='og:title')['content']
1627
        imgs = soup.find('div', id='comic').find_all('img')
1628
        return {
1629
            'title': title,
1630
            'img': [i['src'] for i in imgs],
1631
        }
1632
1633
1634
class Wondermark(GenericListableComic):
1635
    """Class to retrieve the Wondermark comics."""
1636
    name = 'wondermark'
1637
    long_name = 'Wondermark'
1638
    url = 'http://wondermark.com'
1639
    get_url_from_archive_element = get_href
1640
1641
    @classmethod
1642
    def get_archive_elements(cls):
1643
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1644
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1645
1646
    @classmethod
1647
    def get_comic_info(cls, soup, link):
1648
        """Get information about a particular comics."""
1649
        date_str = soup.find('div', class_='postdate').find('em').string
1650 View Code Duplication
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1651
        div = soup.find('div', id='comic')
1652
        if div:
1653
            img = div.find('img')
1654
            img_src = [img['src']]
1655
            alt = img['alt']
1656
            assert alt == img['title']
1657
            title = soup.find('meta', property='og:title')['content']
1658
        else:
1659
            img_src = []
1660
            alt = ''
1661
            title = ''
1662
        return {
1663
            'month': day.month,
1664
            'year': day.year,
1665
            'day': day.day,
1666
            'img': img_src,
1667
            'title': title,
1668
            'alt': alt,
1669
            'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')),
1670
        }
1671
1672
1673
class WarehouseComic(GenericNavigableComic):
1674
    """Class to retrieve Warehouse Comic comics."""
1675
    name = 'warehouse'
1676
    long_name = 'Warehouse Comic'
1677
    url = 'http://warehousecomic.com'
1678
    get_first_comic_link = get_a_navi_navifirst
1679
    get_navi_link = get_link_rel_next
1680
1681
    @classmethod
1682
    def get_comic_info(cls, soup, link):
1683
        """Get information about a particular comics."""
1684
        title = soup.find('h2', class_='post-title').string
1685
        date_str = soup.find('span', class_='post-date').string
1686
        day = string_to_date(date_str, "%B %d, %Y")
1687
        imgs = soup.find('div', id='comic').find_all('img')
1688
        return {
1689
            'img': [i['src'] for i in imgs],
1690
            'title': title,
1691
            'day': day.day,
1692
            'month': day.month,
1693
            'year': day.year,
1694
        }
1695
1696
1697
class JustSayEh(GenericNavigableComic):
1698
    """Class to retrieve Just Say Eh comics."""
1699
    # Also on http//tapastic.com/series/Just-Say-Eh
1700
    name = 'justsayeh'
1701
    long_name = 'Just Say Eh'
1702
    url = 'http://www.justsayeh.com'
1703
    get_first_comic_link = get_a_navi_navifirst
1704
    get_navi_link = get_a_navi_comicnavnext_navinext
1705
1706
    @classmethod
1707
    def get_comic_info(cls, soup, link):
1708
        """Get information about a particular comics."""
1709
        title = soup.find('h2', class_='post-title').string
1710
        imgs = soup.find("div", id="comic").find_all("img")
1711
        assert all(i['alt'] == i['title'] for i in imgs)
1712
        alt = imgs[0]['alt']
1713
        return {
1714
            'img': [i['src'] for i in imgs],
1715
            'title': title,
1716
            'alt': alt,
1717
        }
1718
1719
1720
class MouseBearComedy(GenericNavigableComic):
1721
    """Class to retrieve Mouse Bear Comedy comics."""
1722
    # Also on http://mousebearcomedy.tumblr.com
1723
    name = 'mousebear'
1724
    long_name = 'Mouse Bear Comedy'
1725
    url = 'http://www.mousebearcomedy.com'
1726
    get_first_comic_link = get_a_navi_navifirst
1727
    get_navi_link = get_a_navi_comicnavnext_navinext
1728
1729
    @classmethod
1730
    def get_comic_info(cls, soup, link):
1731
        """Get information about a particular comics."""
1732
        title = soup.find('h2', class_='post-title').string
1733
        author = soup.find("span", class_="post-author").find("a").string
1734
        date_str = soup.find("span", class_="post-date").string
1735
        day = string_to_date(date_str, '%B %d, %Y')
1736
        imgs = soup.find("div", id="comic").find_all("img")
1737
        assert all(i['alt'] == i['title'] == title for i in imgs)
1738
        return {
1739
            'day': day.day,
1740
            'month': day.month,
1741
            'year': day.year,
1742
            'img': [i['src'] for i in imgs],
1743
            'title': title,
1744
            'author': author,
1745
        }
1746 View Code Duplication
1747
1748
class BigFootJustice(GenericNavigableComic):
1749
    """Class to retrieve Big Foot Justice comics."""
1750
    # Also on http://tapastic.com/series/bigfoot-justice
1751
    name = 'bigfoot'
1752
    long_name = 'Big Foot Justice'
1753
    url = 'http://bigfootjustice.com'
1754
    get_first_comic_link = get_a_navi_navifirst
1755
    get_navi_link = get_a_navi_comicnavnext_navinext
1756
1757
    @classmethod
1758
    def get_comic_info(cls, soup, link):
1759
        """Get information about a particular comics."""
1760
        imgs = soup.find('div', id='comic').find_all('img')
1761
        assert all(i['title'] == i['alt'] for i in imgs)
1762
        title = ' '.join(i['title'] for i in imgs)
1763
        return {
1764
            'img': [i['src'] for i in imgs],
1765
            'title': title,
1766
        }
1767 View Code Duplication
1768
1769
class RespawnComic(GenericNavigableComic):
1770
    """Class to retrieve Respawn Comic."""
1771
    # Also on http://respawncomic.tumblr.com
1772
    name = 'respawn'
1773
    long_name = 'Respawn Comic'
1774
    url = 'http://respawncomic.com '
1775
    _categories = ('RESPAWN', )
1776
    get_navi_link = get_a_rel_next
1777
    get_first_comic_link = simulate_first_link
1778
    first_url = 'http://respawncomic.com/comic/c0001/'
1779
1780
    @classmethod
1781
    def get_comic_info(cls, soup, link):
1782
        """Get information about a particular comics."""
1783
        title = soup.find('meta', property='og:title')['content']
1784
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1785
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1786
        date_str = date_str[:10]
1787
        day = string_to_date(date_str, "%Y-%m-%d")
1788
        imgs = soup.find_all('meta', property='og:image')
1789
        skip_imgs = {
1790
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1791
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1792
        }
1793
        return {
1794
            'title': title,
1795
            'author': author,
1796
            'day': day.day,
1797
            'month': day.month,
1798
            'year': day.year,
1799
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1800
        }
1801
1802
1803
class SafelyEndangered(GenericNavigableComic):
1804
    """Class to retrieve Safely Endangered comics."""
1805
    # Also on http://tumblr.safelyendangered.com
1806
    name = 'endangered'
1807
    long_name = 'Safely Endangered'
1808
    url = 'http://www.safelyendangered.com'
1809
    get_navi_link = get_link_rel_next
1810
    get_first_comic_link = simulate_first_link
1811
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1812
1813
    @classmethod
1814
    def get_comic_info(cls, soup, link):
1815
        """Get information about a particular comics."""
1816
        title = soup.find('h2', class_='post-title').string
1817
        date_str = soup.find('span', class_='post-date').string
1818
        day = string_to_date(date_str, '%B %d, %Y')
1819
        imgs = soup.find('div', id='comic').find_all('img')
1820
        alt = imgs[0]['alt']
1821
        assert all(i['alt'] == i['title'] for i in imgs)
1822
        return {
1823
            'day': day.day,
1824
            'month': day.month,
1825
            'year': day.year,
1826
            'img': [i['src'] for i in imgs],
1827
            'title': title,
1828
            'alt': alt,
1829
        }
1830
1831
1832
class PicturesInBoxes(GenericNavigableComic):
1833
    """Class to retrieve Pictures In Boxes comics."""
1834
    # Also on http://picturesinboxescomic.tumblr.com
1835
    name = 'picturesinboxes'
1836
    long_name = 'Pictures in Boxes'
1837
    url = 'http://www.picturesinboxes.com'
1838
    get_navi_link = get_a_navi_navinext
1839
    get_first_comic_link = simulate_first_link
1840
    first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'
1841
1842
    @classmethod
1843
    def get_comic_info(cls, soup, link):
1844
        """Get information about a particular comics."""
1845
        title = soup.find('h2', class_='post-title').string
1846
        author = soup.find("span", class_="post-author").find("a").string
1847
        date_str = soup.find('span', class_='post-date').string
1848
        day = string_to_date(date_str, '%B %d, %Y')
1849
        imgs = soup.find('div', class_='comicpane').find_all('img')
1850
        assert imgs
1851
        assert all(i['title'] == i['alt'] == title for i in imgs)
1852
        return {
1853
            'day': day.day,
1854
            'month': day.month,
1855
            'year': day.year,
1856
            'img': [i['src'] for i in imgs],
1857
            'title': title,
1858
            'author': author,
1859
        }
1860
1861
1862
class Penmen(GenericEmptyComic):
1863
    """Class to retrieve Penmen comics."""
1864
    name = 'penmen'
1865
    long_name = 'Penmen'
1866
    url = 'http://penmen.com'
1867
1868
1869
class TheDoghouseDiaries(GenericNavigableComic):
1870
    """Class to retrieve The Dog House Diaries comics."""
1871
    name = 'doghouse'
1872
    long_name = 'The Dog House Diaries'
1873
    url = 'http://thedoghousediaries.com'
1874
1875
    @classmethod
1876
    def get_first_comic_link(cls):
1877
        """Get link to first comics."""
1878
        return get_soup_at_url(cls.url).find('a', id='firstlink')
1879
1880
    @classmethod
1881
    def get_navi_link(cls, last_soup, next_):
1882
        """Get link to next or previous comic."""
1883
        return last_soup.find('a', id='nextlink' if next_ else 'previouslink')
1884
1885
    @classmethod
1886
    def get_comic_info(cls, soup, link):
1887
        """Get information about a particular comics."""
1888
        comic_img_re = re.compile('^dhdcomics/.*')
1889
        img = soup.find('img', src=comic_img_re)
1890
        comic_url = cls.get_url_from_link(link)
1891
        return {
1892
            'title': soup.find('h2', id='titleheader').string,
1893
            'title2': soup.find('div', id='subtext').string,
1894
            'alt': img.get('title'),
1895
            'img': [urljoin_wrapper(comic_url, img['src'].strip())],
1896
            'num': int(comic_url.split('/')[-1]),
1897
        }
1898
1899
1900
class InvisibleBread(GenericListableComic):
1901
    """Class to retrieve Invisible Bread comics."""
1902
    # Also on http://www.gocomics.com/invisible-bread
1903
    name = 'invisiblebread'
1904
    long_name = 'Invisible Bread'
1905
    url = 'http://invisiblebread.com'
1906
1907
    @classmethod
1908
    def get_archive_elements(cls):
1909
        archive_url = urljoin_wrapper(cls.url, 'archives/')
1910
        return reversed(get_soup_at_url(archive_url).find_all('td', class_='archive-title'))
1911
1912
    @classmethod
1913
    def get_url_from_archive_element(cls, td):
1914 View Code Duplication
        return td.find('a')['href']
1915
1916
    @classmethod
1917
    def get_comic_info(cls, soup, td):
1918
        """Get information about a particular comics."""
1919
        url = cls.get_url_from_archive_element(td)
1920
        title = td.find('a').string
1921
        month_and_day = td.previous_sibling.string
1922
        link_re = re.compile('^%s/([0-9]+)/' % cls.url)
1923
        year = link_re.match(url).groups()[0]
1924
        date_str = month_and_day + ' ' + year
1925
        day = string_to_date(date_str, '%b %d %Y')
1926
        imgs = [soup.find('div', id='comic').find('img')]
1927
        assert len(imgs) == 1
1928
        assert all(i['title'] == i['alt'] == title for i in imgs)
1929
        return {
1930
            'month': day.month,
1931
            'year': day.year,
1932
            'day': day.day,
1933
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1934
            'title': title,
1935
        }
1936
1937
1938
class DiscoBleach(GenericEmptyComic):  # Does not work anymore
1939
    """Class to retrieve Disco Bleach Comics."""
1940
    name = 'discobleach'
1941
    long_name = 'Disco Bleach'
1942
    url = 'http://discobleach.com'
1943
1944
1945
class TubeyToons(GenericEmptyComic):  # Does not work anymore
1946
    """Class to retrieve TubeyToons comics."""
1947
    # Also on http://tapastic.com/series/Tubey-Toons
1948
    # Also on http://tubeytoons.tumblr.com
1949
    name = 'tubeytoons'
1950
    long_name = 'Tubey Toons'
1951
    url = 'http://tubeytoons.com'
1952
    _categories = ('TUNEYTOONS', )
1953
1954
1955
class CompletelySeriousComics(GenericNavigableComic):
1956
    """Class to retrieve Completely Serious comics."""
1957
    name = 'completelyserious'
1958
    long_name = 'Completely Serious Comics'
1959
    url = 'http://completelyseriouscomics.com'
1960
    get_first_comic_link = get_a_navi_navifirst
1961
    get_navi_link = get_a_navi_navinext
1962
1963
    @classmethod
1964
    def get_comic_info(cls, soup, link):
1965
        """Get information about a particular comics."""
1966
        title = soup.find('h2', class_='post-title').string
1967
        author = soup.find('span', class_='post-author').contents[1].string
1968
        date_str = soup.find('span', class_='post-date').string
1969
        day = string_to_date(date_str, '%B %d, %Y')
1970
        imgs = soup.find('div', class_='comicpane').find_all('img')
1971
        assert imgs
1972
        alt = imgs[0]['title']
1973
        assert all(i['title'] == i['alt'] == alt for i in imgs)
1974
        return {
1975
            'month': day.month,
1976
            'year': day.year,
1977
            'day': day.day,
1978
            'img': [i['src'] for i in imgs],
1979
            'title': title,
1980
            'alt': alt,
1981
            'author': author,
1982
        }
1983
1984
1985
class PoorlyDrawnLines(GenericListableComic):
1986
    """Class to retrieve Poorly Drawn Lines comics."""
1987 View Code Duplication
    # Also on http://pdlcomics.tumblr.com
1988
    name = 'poorlydrawn'
1989
    long_name = 'Poorly Drawn Lines'
1990
    url = 'http://poorlydrawnlines.com'
1991
    _categories = ('POORLYDRAWN', )
1992
    get_url_from_archive_element = get_href
1993
1994
    @classmethod
1995
    def get_comic_info(cls, soup, link):
1996
        """Get information about a particular comics."""
1997
        imgs = soup.find('div', class_='post').find_all('img')
1998
        assert len(imgs) <= 1
1999
        return {
2000
            'img': [i['src'] for i in imgs],
2001
            'title': imgs[0].get('title', "") if imgs else "",
2002
        }
2003
2004
    @classmethod
2005
    def get_archive_elements(cls):
2006
        archive_url = urljoin_wrapper(cls.url, 'archive')
2007
        url_re = re.compile('^%s/comic/.' % cls.url)
2008
        return reversed(get_soup_at_url(archive_url).find_all('a', href=url_re))
2009
2010
2011
class LoadingComics(GenericNavigableComic):
2012
    """Class to retrieve Loading Artist comics."""
2013
    name = 'loadingartist'
2014
    long_name = 'Loading Artist'
2015
    url = 'http://www.loadingartist.com/latest'
2016
2017
    @classmethod
2018
    def get_first_comic_link(cls):
2019
        """Get link to first comics."""
2020
        return get_soup_at_url(cls.url).find('a', title="First")
2021
2022
    @classmethod
2023
    def get_navi_link(cls, last_soup, next_):
2024
        """Get link to next or previous comic."""
2025
        return last_soup.find('a', title='Next' if next_ else 'Previous')
2026
2027
    @classmethod
2028
    def get_comic_info(cls, soup, link):
2029
        """Get information about a particular comics."""
2030
        title = soup.find('h1').string
2031
        date_str = soup.find('span', class_='date').string.strip()
2032
        day = string_to_date(date_str, "%B %d, %Y")
2033
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
2034
        return {
2035
            'title': title,
2036
            'img': [i['src'] for i in imgs],
2037
            'month': day.month,
2038
            'year': day.year,
2039
            'day': day.day,
2040
        }
2041
2042
2043
class ChuckleADuck(GenericNavigableComic):
2044
    """Class to retrieve Chuckle-A-Duck comics."""
2045
    name = 'chuckleaduck'
2046
    long_name = 'Chuckle-A-duck'
2047
    url = 'http://chuckleaduck.com'
2048
    get_first_comic_link = get_div_navfirst_a
2049
    get_navi_link = get_link_rel_next
2050
2051
    @classmethod
2052
    def get_comic_info(cls, soup, link):
2053
        """Get information about a particular comics."""
2054
        date_str = soup.find('span', class_='post-date').string
2055
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
2056
        author = soup.find('span', class_='post-author').string
2057
        div = soup.find('div', id='comic')
2058
        imgs = div.find_all('img') if div else []
2059
        title = imgs[0]['title'] if imgs else ""
2060
        assert all(i['title'] == i['alt'] == title for i in imgs)
2061
        return {
2062
            'month': day.month,
2063
            'year': day.year,
2064
            'day': day.day,
2065
            'img': [i['src'] for i in imgs],
2066
            'title': title,
2067
            'author': author,
2068
        }
2069
2070
2071
class DepressedAlien(GenericNavigableComic):
2072
    """Class to retrieve Depressed Alien Comics."""
2073
    name = 'depressedalien'
2074
    long_name = 'Depressed Alien'
2075
    url = 'http://depressedalien.com'
2076
    get_url_from_link = join_cls_url_to_href
2077
2078
    @classmethod
2079
    def get_first_comic_link(cls):
2080
        """Get link to first comics."""
2081
        return get_soup_at_url(cls.url).find('img', attrs={'name': 'beginArrow'}).parent
2082
2083
    @classmethod
2084
    def get_navi_link(cls, last_soup, next_):
2085
        """Get link to next or previous comic."""
2086
        return last_soup.find('img', attrs={'name': 'rightArrow' if next_ else 'leftArrow'}).parent
2087
2088
    @classmethod
2089
    def get_comic_info(cls, soup, link):
2090
        """Get information about a particular comics."""
2091
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2092
        imgs = soup.find_all('meta', property='og:image')
2093
        return {
2094
            'title': title,
2095
            'img': [i['content'] for i in imgs],
2096
        }
2097
2098
2099
class ThingsInSquares(GenericListableComic):
2100
    """Class to retrieve Things In Squares comics."""
2101
    # This can be retrieved in other languages
2102
    # Also on https://tapastic.com/series/Things-in-Squares
2103
    name = 'squares'
2104
    long_name = 'Things in squares'
2105
    url = 'http://www.thingsinsquares.com'
2106
2107
    @classmethod
2108
    def get_comic_info(cls, soup, tr):
2109
        """Get information about a particular comics."""
2110
        _, td2, td3 = tr.find_all('td')
2111
        a = td2.find('a')
2112
        date_str = td3.string
2113
        day = string_to_date(date_str, "%m.%d.%y")
2114
        title = a.string
2115
        title2 = soup.find('meta', property='og:title')['content']
2116
        desc = soup.find('meta', property='og:description')
2117
        description = desc['content'] if desc else ''
2118
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2119
        imgs = soup.find('div', class_='entry-content').find_all('img')
2120
        return {
2121
            'day': day.day,
2122
            'month': day.month,
2123
            'year': day.year,
2124
            'title': title,
2125
            'title2': title2,
2126
            'description': description,
2127
            'tags': tags,
2128
            'img': [i['src'] for i in imgs],
2129
            'alt': ' '.join(i['alt'] for i in imgs),
2130
        }
2131
2132
    @classmethod
2133
    def get_url_from_archive_element(cls, tr):
2134
        _, td2, td3 = tr.find_all('td')
2135
        return td2.find('a')['href']
2136
2137
    @classmethod
2138
    def get_archive_elements(cls):
2139
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
2140
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2141
2142
2143
class HappleTea(GenericNavigableComic):
2144
    """Class to retrieve Happle Tea Comics."""
2145
    name = 'happletea'
2146
    long_name = 'Happle Tea'
2147
    url = 'http://www.happletea.com'
2148
    get_first_comic_link = get_a_navi_navifirst
2149
    get_navi_link = get_link_rel_next
2150
2151
    @classmethod
2152
    def get_comic_info(cls, soup, link):
2153
        """Get information about a particular comics."""
2154
        imgs = soup.find('div', id='comic').find_all('img')
2155
        post = soup.find('div', class_='post-content')
2156
        title = post.find('h2', class_='post-title').string
2157
        author = post.find('a', rel='author').string
2158
        date_str = post.find('span', class_='post-date').string
2159
        day = string_to_date(date_str, "%B %d, %Y")
2160
        assert all(i['alt'] == i['title'] for i in imgs)
2161
        return {
2162
            'title': title,
2163
            'img': [i['src'] for i in imgs],
2164
            'alt': ''.join(i['alt'] for i in imgs),
2165
            'month': day.month,
2166
            'year': day.year,
2167
            'day': day.day,
2168
            'author': author,
2169
        }
2170
2171
2172
class FatAwesomeComics(GenericNavigableComic):
2173
    """Class to retrieve Fat Awesome Comics."""
2174
    # Also on http://fatawesomecomedy.tumblr.com
2175
    name = 'fatawesome'
2176
    long_name = 'Fat Awesome'
2177
    url = 'http://fatawesome.com/comics'
2178
    get_navi_link = get_a_rel_next
2179
    get_first_comic_link = simulate_first_link
2180
    first_url = 'http://fatawesome.com/shortbus/'
2181
2182
    @classmethod
2183
    def get_comic_info(cls, soup, link):
2184
        """Get information about a particular comics."""
2185
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2186
        description = soup.find('meta', attrs={'name': 'description'})['content']
2187
        tags_prop = soup.find('meta', property='article:tag')
2188
        tags = tags_prop['content'] if tags_prop else ""
2189
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2190
        day = string_to_date(date_str, "%Y-%m-%d")
2191
        imgs = soup.find_all('img', attrs={'data-recalc-dims': "1"})
2192
        assert len(imgs) == 1
2193
        return {
2194
            'title': title,
2195
            'description': description,
2196
            'tags': tags,
2197
            'alt': "".join(i['alt'] for i in imgs),
2198
            'img': [i['src'].rsplit('?', 1)[0] for i in imgs],
2199
            'month': day.month,
2200
            'year': day.year,
2201
            'day': day.day,
2202
        }
2203
2204
2205
class AnythingComic(GenericListableComic):
2206
    """Class to retrieve Anything Comics."""
2207
    # Also on http://tapastic.com/series/anything
2208
    name = 'anythingcomic'
2209
    long_name = 'Anything Comic'
2210
    url = 'http://www.anythingcomic.com'
2211
2212
    @classmethod
2213
    def get_archive_elements(cls):
2214
        archive_url = urljoin_wrapper(cls.url, 'archive/')
2215
        # The first 2 <tr>'s do not correspond to comics
2216
        return get_soup_at_url(archive_url).find('table', id='chapter_table').find_all('tr')[2:]
2217
2218
    @classmethod
2219
    def get_url_from_archive_element(cls, tr):
2220
        """Get url corresponding to an archive element."""
2221
        td_num, td_comic, td_date, _ = tr.find_all('td')
2222
        link = td_comic.find('a')
2223 View Code Duplication
        return urljoin_wrapper(cls.url, link['href'])
2224
2225
    @classmethod
2226
    def get_comic_info(cls, soup, tr):
2227
        """Get information about a particular comics."""
2228
        td_num, td_comic, td_date, _ = tr.find_all('td')
2229
        num = int(td_num.string)
2230
        link = td_comic.find('a')
2231
        title = link.string
2232
        imgs = soup.find_all('img', id='comic_image')
2233
        date_str = td_date.string
2234
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y, %I:%M %p")
2235
        assert len(imgs) == 1
2236
        assert all(i.get('alt') == i.get('title') for i in imgs)
2237
        return {
2238
            'num': num,
2239
            'title': title,
2240
            'alt': imgs[0].get('alt', ''),
2241
            'img': [i['src'] for i in imgs],
2242
            'month': day.month,
2243
            'year': day.year,
2244
            'day': day.day,
2245
        }
2246
2247
2248
class LonnieMillsap(GenericNavigableComic):
2249
    """Class to retrieve Lonnie Millsap's comics."""
2250
    name = 'millsap'
2251
    long_name = 'Lonnie Millsap'
2252
    url = 'http://www.lonniemillsap.com'
2253
    get_navi_link = get_link_rel_next
2254
    get_first_comic_link = simulate_first_link
2255
    first_url = 'http://www.lonniemillsap.com/?p=42'
2256
2257
    @classmethod
2258
    def get_comic_info(cls, soup, link):
2259
        """Get information about a particular comics."""
2260
        title = soup.find('h2', class_='post-title').string
2261
        post = soup.find('div', class_='post-content')
2262
        author = post.find("span", class_="post-author").find("a").string
2263
        date_str = post.find("span", class_="post-date").string
2264
        day = string_to_date(date_str, "%B %d, %Y")
2265
        imgs = post.find("div", class_="entry").find_all("img")
2266
        return {
2267
            'title': title,
2268
            'author': author,
2269
            'img': [i['src'] for i in imgs],
2270
            'month': day.month,
2271
            'year': day.year,
2272
            'day': day.day,
2273
        }
2274
2275
2276 View Code Duplication
class LinsEditions(GenericNavigableComic):
2277
    """Class to retrieve L.I.N.S. Editions comics."""
2278
    # Also on http://linscomics.tumblr.com
2279
    # Now on https://warandpeas.com
2280
    name = 'lins'
2281
    long_name = 'L.I.N.S. Editions'
2282
    url = 'https://linsedition.com'
2283
    _categories = ('LINS', )
2284
    get_navi_link = get_link_rel_next
2285
    get_first_comic_link = simulate_first_link
2286
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2287
2288
    @classmethod
2289
    def get_comic_info(cls, soup, link):
2290
        """Get information about a particular comics."""
2291
        title = soup.find('meta', property='og:title')['content']
2292
        imgs = soup.find_all('meta', property='og:image')
2293
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2294
        day = string_to_date(date_str, "%Y-%m-%d")
2295
        return {
2296
            'title': title,
2297
            'img': [i['content'] for i in imgs],
2298
            'month': day.month,
2299
            'year': day.year,
2300
            'day': day.day,
2301
        }
2302
2303
2304
class ThorsThundershack(GenericNavigableComic):
2305
    """Class to retrieve Thor's Thundershack comics."""
2306
    # Also on http://tapastic.com/series/Thors-Thundershac
2307
    name = 'thor'
2308
    long_name = 'Thor\'s Thundershack'
2309
    url = 'http://www.thorsthundershack.com'
2310
    _categories = ('THOR', )
2311
    get_url_from_link = join_cls_url_to_href
2312
2313
    @classmethod
2314
    def get_first_comic_link(cls):
2315
        """Get link to first comics."""
2316
        return get_soup_at_url(cls.url).find('a', class_='first navlink')
2317
2318
    @classmethod
2319
    def get_navi_link(cls, last_soup, next_):
2320
        """Get link to next or previous comic."""
2321
        for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
2322
            if link['href'] != '/comic':
2323
                return link
2324
        return None
2325
2326
    @classmethod
2327
    def get_comic_info(cls, soup, link):
2328
        """Get information about a particular comics."""
2329
        title = soup.find('meta', attrs={'name': 'description'})["content"]
2330
        description = soup.find('div', itemprop='articleBody').text
2331
        author = soup.find('span', itemprop='author copyrightHolder').string
2332
        imgs = soup.find_all('img', itemprop='image')
2333
        assert all(i['title'] == i['alt'] for i in imgs)
2334
        alt = imgs[0]['alt'] if imgs else ""
2335
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
2336
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2337
        return {
2338
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2339
            'month': day.month,
2340
            'year': day.year,
2341
            'day': day.day,
2342
            'author': author,
2343
            'title': title,
2344
            'alt': alt,
2345
            'description': description,
2346
        }
2347
2348
2349
class GerbilWithAJetpack(GenericNavigableComic):
2350
    """Class to retrieve GerbilWithAJetpack comics."""
2351
    name = 'gerbil'
2352
    long_name = 'Gerbil With A Jetpack'
2353
    url = 'http://gerbilwithajetpack.com'
2354
    get_first_comic_link = get_a_navi_navifirst
2355
    get_navi_link = get_a_rel_next
2356
2357
    @classmethod
2358
    def get_comic_info(cls, soup, link):
2359
        """Get information about a particular comics."""
2360
        title = soup.find('h2', class_='post-title').string
2361
        author = soup.find("span", class_="post-author").find("a").string
2362
        date_str = soup.find("span", class_="post-date").string
2363
        day = string_to_date(date_str, "%B %d, %Y")
2364
        imgs = soup.find("div", id="comic").find_all("img")
2365
        alt = imgs[0]['alt']
2366
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2367
        return {
2368
            'img': [i['src'] for i in imgs],
2369
            'title': title,
2370
            'alt': alt,
2371
            'author': author,
2372
            'day': day.day,
2373
            'month': day.month,
2374
            'year': day.year
2375
        }
2376
2377
2378
class EveryDayBlues(GenericNavigableComic):
2379
    """Class to retrieve EveryDayBlues Comics."""
2380
    name = "blues"
2381
    long_name = "Every Day Blues"
2382
    url = "http://everydayblues.net"
2383
    get_first_comic_link = get_a_navi_navifirst
2384
    get_navi_link = get_link_rel_next
2385
2386
    @classmethod
2387
    def get_comic_info(cls, soup, link):
2388
        """Get information about a particular comics."""
2389
        title = soup.find("h2", class_="post-title").string
2390
        author = soup.find("span", class_="post-author").find("a").string
2391
        date_str = soup.find("span", class_="post-date").string
2392
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2393
        imgs = soup.find("div", id="comic").find_all("img")
2394
        assert all(i['alt'] == i['title'] == title for i in imgs)
2395
        assert len(imgs) <= 1
2396
        return {
2397
            'img': [i['src'] for i in imgs],
2398
            'title': title,
2399
            'author': author,
2400
            'day': day.day,
2401
            'month': day.month,
2402
            'year': day.year
2403
        }
2404
2405
2406
class BiterComics(GenericNavigableComic):
2407
    """Class to retrieve Biter Comics."""
2408
    name = "biter"
2409
    long_name = "Biter Comics"
2410
    url = "http://www.bitercomics.com"
2411
    get_first_comic_link = get_a_navi_navifirst
2412
    get_navi_link = get_link_rel_next
2413
2414
    @classmethod
2415
    def get_comic_info(cls, soup, link):
2416
        """Get information about a particular comics."""
2417
        title = soup.find("h1", class_="entry-title").string
2418
        author = soup.find("span", class_="author vcard").find("a").string
2419
        date_str = soup.find("span", class_="entry-date").string
2420
        day = string_to_date(date_str, "%B %d, %Y")
2421
        imgs = soup.find("div", id="comic").find_all("img")
2422
        assert all(i['alt'] == i['title'] for i in imgs)
2423
        assert len(imgs) == 1
2424
        alt = imgs[0]['alt']
2425
        return {
2426
            'img': [i['src'] for i in imgs],
2427
            'title': title,
2428
            'alt': alt,
2429
            'author': author,
2430
            'day': day.day,
2431
            'month': day.month,
2432
            'year': day.year
2433
        }
2434
2435
2436
class TheAwkwardYeti(GenericNavigableComic):
2437
    """Class to retrieve The Awkward Yeti comics."""
2438
    # Also on http://www.gocomics.com/the-awkward-yeti
2439
    # Also on http://larstheyeti.tumblr.com
2440
    # Also on https://tapastic.com/series/TheAwkwardYeti
2441
    name = 'yeti'
2442
    long_name = 'The Awkward Yeti'
2443
    url = 'http://theawkwardyeti.com'
2444
    _categories = ('YETI', )
2445
    get_first_comic_link = get_a_navi_navifirst
2446
    get_navi_link = get_link_rel_next
2447
2448
    @classmethod
2449
    def get_comic_info(cls, soup, link):
2450
        """Get information about a particular comics."""
2451
        title = soup.find('h2', class_='post-title').string
2452
        date_str = soup.find("span", class_="post-date").string
2453
        day = string_to_date(date_str, "%B %d, %Y")
2454
        imgs = soup.find("div", id="comic").find_all("img")
2455
        assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
2456
        return {
2457
            'img': [i['src'] for i in imgs],
2458
            'title': title,
2459
            'day': day.day,
2460
            'month': day.month,
2461
            'year': day.year
2462
        }
2463
2464
2465
class PleasantThoughts(GenericNavigableComic):
2466
    """Class to retrieve Pleasant Thoughts comics."""
2467
    name = 'pleasant'
2468
    long_name = 'Pleasant Thoughts'
2469
    url = 'http://pleasant-thoughts.com'
2470
    get_first_comic_link = get_a_navi_navifirst
2471
    get_navi_link = get_link_rel_next
2472
2473
    @classmethod
2474
    def get_comic_info(cls, soup, link):
2475
        """Get information about a particular comics."""
2476
        post = soup.find('div', class_='post-content')
2477
        title = post.find('h2', class_='post-title').string
2478
        imgs = post.find("div", class_="entry").find_all("img")
2479
        return {
2480
            'title': title,
2481
            'img': [i['src'] for i in imgs],
2482
        }
2483
2484
2485
class MisterAndMe(GenericNavigableComic):
2486
    """Class to retrieve Mister & Me Comics."""
2487
    # Also on http://www.gocomics.com/mister-and-me
2488
    # Also on https://tapastic.com/series/Mister-and-Me
2489
    name = 'mister'
2490
    long_name = 'Mister & Me'
2491
    url = 'http://www.mister-and-me.com'
2492
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2493
    get_navi_link = get_link_rel_next
2494
2495
    @classmethod
2496
    def get_comic_info(cls, soup, link):
2497
        """Get information about a particular comics."""
2498
        title = soup.find('h2', class_='post-title').string
2499
        author = soup.find("span", class_="post-author").find("a").string
2500
        date_str = soup.find("span", class_="post-date").string
2501
        day = string_to_date(date_str, "%B %d, %Y")
2502
        imgs = soup.find("div", id="comic").find_all("img")
2503
        assert all(i['alt'] == i['title'] for i in imgs)
2504
        assert len(imgs) <= 1
2505
        alt = imgs[0]['alt'] if imgs else ""
2506
        return {
2507
            'img': [i['src'] for i in imgs],
2508
            'title': title,
2509
            'alt': alt,
2510
            'author': author,
2511
            'day': day.day,
2512
            'month': day.month,
2513
            'year': day.year
2514
        }
2515
2516
2517
class LastPlaceComics(GenericNavigableComic):
2518
    """Class to retrieve Last Place Comics."""
2519
    name = 'lastplace'
2520
    long_name = 'Last Place Comics'
2521
    url = "http://lastplacecomics.com"
2522
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2523
    get_navi_link = get_link_rel_next
2524
2525
    @classmethod
2526
    def get_comic_info(cls, soup, link):
2527
        """Get information about a particular comics."""
2528
        title = soup.find('h2', class_='post-title').string
2529
        author = soup.find("span", class_="post-author").find("a").string
2530
        date_str = soup.find("span", class_="post-date").string
2531
        day = string_to_date(date_str, "%B %d, %Y")
2532
        imgs = soup.find("div", id="comic").find_all("img")
2533
        assert all(i['alt'] == i['title'] for i in imgs)
2534
        assert len(imgs) <= 1
2535
        alt = imgs[0]['alt'] if imgs else ""
2536
        return {
2537
            'img': [i['src'] for i in imgs],
2538
            'title': title,
2539
            'alt': alt,
2540
            'author': author,
2541
            'day': day.day,
2542
            'month': day.month,
2543
            'year': day.year
2544
        }
2545
2546
2547
class TalesOfAbsurdity(GenericNavigableComic):
2548
    """Class to retrieve Tales Of Absurdity comics."""
2549
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
2550
    # Also on http://talesofabsurdity.tumblr.com
2551
    name = 'absurdity'
2552
    long_name = 'Tales of Absurdity'
2553
    url = 'http://talesofabsurdity.com'
2554
    _categories = ('ABSURDITY', )
2555
    get_first_comic_link = get_a_navi_navifirst
2556
    get_navi_link = get_a_navi_comicnavnext_navinext
2557
2558
    @classmethod
2559
    def get_comic_info(cls, soup, link):
2560
        """Get information about a particular comics."""
2561
        title = soup.find('h2', class_='post-title').string
2562
        author = soup.find("span", class_="post-author").find("a").string
2563
        date_str = soup.find("span", class_="post-date").string
2564
        day = string_to_date(date_str, "%B %d, %Y")
2565
        imgs = soup.find("div", id="comic").find_all("img")
2566
        assert all(i['alt'] == i['title'] for i in imgs)
2567
        alt = imgs[0]['alt'] if imgs else ""
2568
        return {
2569
            'img': [i['src'] for i in imgs],
2570
            'title': title,
2571
            'alt': alt,
2572
            'author': author,
2573
            'day': day.day,
2574
            'month': day.month,
2575
            'year': day.year
2576
        }
2577
2578
2579
class EndlessOrigami(GenericEmptyComic, GenericNavigableComic):
2580
    """Class to retrieve Endless Origami Comics."""
2581
    name = "origami"
2582
    long_name = "Endless Origami"
2583
    url = "http://endlessorigami.com"
2584
    get_first_comic_link = get_a_navi_navifirst
2585
    get_navi_link = get_link_rel_next
2586
2587
    @classmethod
2588
    def get_comic_info(cls, soup, link):
2589
        """Get information about a particular comics."""
2590
        title = soup.find('h2', class_='post-title').string
2591
        author = soup.find("span", class_="post-author").find("a").string
2592
        date_str = soup.find("span", class_="post-date").string
2593
        day = string_to_date(date_str, "%B %d, %Y")
2594
        imgs = soup.find("div", id="comic").find_all("img")
2595
        assert all(i['alt'] == i['title'] for i in imgs)
2596
        alt = imgs[0]['alt'] if imgs else ""
2597
        return {
2598
            'img': [i['src'] for i in imgs],
2599
            'title': title,
2600
            'alt': alt,
2601
            'author': author,
2602
            'day': day.day,
2603
            'month': day.month,
2604
            'year': day.year
2605
        }
2606
2607
2608
class PlanC(GenericNavigableComic):
2609
    """Class to retrieve Plan C comics."""
2610
    name = 'planc'
2611
    long_name = 'Plan C'
2612
    url = 'http://www.plancomic.com'
2613
    get_first_comic_link = get_a_navi_navifirst
2614
    get_navi_link = get_a_navi_comicnavnext_navinext
2615
2616
    @classmethod
2617
    def get_comic_info(cls, soup, link):
2618
        """Get information about a particular comics."""
2619
        title = soup.find('h2', class_='post-title').string
2620
        date_str = soup.find("span", class_="post-date").string
2621
        day = string_to_date(date_str, "%B %d, %Y")
2622
        imgs = soup.find('div', id='comic').find_all('img')
2623
        return {
2624
            'title': title,
2625
            'img': [i['src'] for i in imgs],
2626
            'month': day.month,
2627
            'year': day.year,
2628
            'day': day.day,
2629
        }
2630 View Code Duplication
2631
2632
class BuniComic(GenericNavigableComic):
2633
    """Class to retrieve Buni Comics."""
2634
    name = 'buni'
2635
    long_name = 'BuniComics'
2636
    url = 'http://www.bunicomic.com'
2637
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2638
    get_navi_link = get_link_rel_next
2639
2640
    @classmethod
2641
    def get_comic_info(cls, soup, link):
2642
        """Get information about a particular comics."""
2643
        imgs = soup.find('div', id='comic').find_all('img')
2644
        assert all(i['alt'] == i['title'] for i in imgs)
2645
        assert len(imgs) == 1
2646
        return {
2647
            'img': [i['src'] for i in imgs],
2648
            'title': imgs[0]['title'],
2649
        }
2650
2651
2652
class GenericCommitStrip(GenericNavigableComic):
2653
    """Generic class to retrieve Commit Strips in different languages."""
2654
    get_navi_link = get_a_rel_next
2655
    get_first_comic_link = simulate_first_link
2656 View Code Duplication
    first_url = NotImplemented
2657
2658
    @classmethod
2659
    def get_comic_info(cls, soup, link):
2660
        """Get information about a particular comics."""
2661
        desc = soup.find('meta', property='og:description')['content']
2662
        title = soup.find('meta', property='og:title')['content']
2663
        imgs = soup.find('div', class_='entry-content').find_all('img')
2664
        title2 = ' '.join(i.get('title', '') for i in imgs)
2665
        return {
2666
            'title': title,
2667
            'title2': title2,
2668
            'description': desc,
2669
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2670
        }
2671
2672
2673
class CommitStripFr(GenericCommitStrip):
2674
    """Class to retrieve Commit Strips in French."""
2675
    name = 'commit_fr'
2676
    long_name = 'Commit Strip (Fr)'
2677
    url = 'http://www.commitstrip.com/fr'
2678
    _categories = ('FRANCAIS', )
2679
    first_url = 'http://www.commitstrip.com/fr/2012/02/22/interview/'
2680
2681
2682
class CommitStripEn(GenericCommitStrip):
2683
    """Class to retrieve Commit Strips in English."""
2684
    name = 'commit_en'
2685
    long_name = 'Commit Strip (En)'
2686
    url = 'http://www.commitstrip.com/en'
2687
    first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'
2688
2689
2690
class GenericBoumerie(GenericNavigableComic):
2691
    """Generic class to retrieve Boumeries comics in different languages."""
2692
    get_first_comic_link = get_a_navi_navifirst
2693
    get_navi_link = get_link_rel_next
2694
    date_format = NotImplemented
2695
    lang = NotImplemented
2696
2697
    @classmethod
2698
    def get_comic_info(cls, soup, link):
2699
        """Get information about a particular comics."""
2700
        title = soup.find('h2', class_='post-title').string
2701
        short_url = soup.find('link', rel='shortlink')['href']
2702
        author = soup.find("span", class_="post-author").find("a").string
2703
        date_str = soup.find('span', class_='post-date').string
2704
        day = string_to_date(date_str, cls.date_format, cls.lang)
2705
        imgs = soup.find('div', id='comic').find_all('img')
2706
        assert all(i['alt'] == i['title'] for i in imgs)
2707
        return {
2708
            'short_url': short_url,
2709
            'img': [i['src'] for i in imgs],
2710
            'title': title,
2711
            'author': author,
2712
            'month': day.month,
2713
            'year': day.year,
2714
            'day': day.day,
2715
        }
2716
2717
2718
class BoumerieEn(GenericBoumerie):
2719
    """Class to retrieve Boumeries comics in English."""
2720
    name = 'boumeries_en'
2721
    long_name = 'Boumeries (En)'
2722
    url = 'http://comics.boumerie.com'
2723
    date_format = "%B %d, %Y"
2724
    lang = 'en_GB.UTF-8'
2725
2726
2727
class BoumerieFr(GenericBoumerie):
2728
    """Class to retrieve Boumeries comics in French."""
2729
    name = 'boumeries_fr'
2730
    long_name = 'Boumeries (Fr)'
2731
    url = 'http://bd.boumerie.com'
2732
    _categories = ('FRANCAIS', )
2733
    date_format = "%A, %d %B %Y"
2734
    lang = "fr_FR.utf8"
2735
2736
2737 View Code Duplication
class UnearthedComics(GenericNavigableComic):
2738
    """Class to retrieve Unearthed comics."""
2739
    # Also on http://tapastic.com/series/UnearthedComics
2740
    # Also on http://unearthedcomics.tumblr.com
2741
    name = 'unearthed'
2742
    long_name = 'Unearthed Comics'
2743
    url = 'http://unearthedcomics.com'
2744
    _categories = ('UNEARTHED', )
2745
    get_navi_link = get_link_rel_next
2746
    get_first_comic_link = simulate_first_link
2747
    first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'
2748
2749
    @classmethod
2750
    def get_comic_info(cls, soup, link):
2751
        """Get information about a particular comics."""
2752
        short_url = soup.find('link', rel='shortlink')['href']
2753
        title_elt = soup.find('h1') or soup.find('h2')
2754
        title = title_elt.string if title_elt else ""
2755
        desc = soup.find('meta', property='og:description')
2756
        date_str = soup.find('time', class_='published updated hidden')['datetime']
2757
        day = string_to_date(date_str, "%Y-%m-%d")
2758
        post = soup.find('div', class_="entry content entry-content type-portfolio")
2759
        imgs = post.find_all('img')
2760
        return {
2761
            'title': title,
2762
            'description': desc,
2763
            'url2': short_url,
2764
            'img': [i['src'] for i in imgs],
2765
            'month': day.month,
2766
            'year': day.year,
2767
            'day': day.day,
2768
        }
2769
2770
2771
class Optipess(GenericNavigableComic):
2772
    """Class to retrieve Optipess comics."""
2773
    name = 'optipess'
2774
    long_name = 'Optipess'
2775
    url = 'http://www.optipess.com'
2776
    get_first_comic_link = get_a_navi_navifirst
2777
    get_navi_link = get_link_rel_next
2778
2779
    @classmethod
2780
    def get_comic_info(cls, soup, link):
2781
        """Get information about a particular comics."""
2782
        title = soup.find('h2', class_='post-title').string
2783
        author = soup.find("span", class_="post-author").find("a").string
2784
        comic = soup.find('div', id='comic')
2785
        imgs = comic.find_all('img') if comic else []
2786
        alt = imgs[0]['title'] if imgs else ""
2787
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2788
        date_str = soup.find('span', class_='post-date').string
2789
        day = string_to_date(date_str, "%B %d, %Y")
2790
        return {
2791
            'title': title,
2792
            'alt': alt,
2793
            'author': author,
2794
            'img': [i['src'] for i in imgs],
2795
            'month': day.month,
2796
            'year': day.year,
2797
            'day': day.day,
2798
        }
2799
2800
2801
class PainTrainComic(GenericNavigableComic):
2802
    """Class to retrieve Pain Train Comics."""
2803
    name = 'paintrain'
2804
    long_name = 'Pain Train Comics'
2805
    url = 'http://paintraincomic.com'
2806
    get_first_comic_link = get_a_navi_navifirst
2807
    get_navi_link = get_link_rel_next
2808
2809
    @classmethod
2810
    def get_comic_info(cls, soup, link):
2811
        """Get information about a particular comics."""
2812
        title = soup.find('h2', class_='post-title').string
2813
        short_url = soup.find('link', rel='shortlink')['href']
2814
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2815
        num = int(short_url_re.match(short_url).groups()[0])
2816
        imgs = soup.find('div', id='comic').find_all('img')
2817
        alt = imgs[0]['title']
2818
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2819
        date_str = soup.find('span', class_='post-date').string
2820
        day = string_to_date(date_str, "%d/%m/%Y")
2821
        return {
2822
            'short_url': short_url,
2823
            'num': num,
2824
            'img': [i['src'] for i in imgs],
2825
            'month': day.month,
2826
            'year': day.year,
2827
            'day': day.day,
2828
            'alt': alt,
2829
            'title': title,
2830
        }
2831
2832
2833
class MoonBeard(GenericNavigableComic):
2834
    """Class to retrieve MoonBeard comics."""
2835
    # Also on http://blog.squiresjam.es/moonbeard
2836
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
2837
    name = 'moonbeard'
2838
    long_name = 'Moon Beard'
2839
    url = 'http://moonbeard.com'
2840
    get_first_comic_link = get_a_navi_navifirst
2841
    get_navi_link = get_a_navi_navinext
2842
2843
    @classmethod
2844
    def get_comic_info(cls, soup, link):
2845
        """Get information about a particular comics."""
2846
        title = soup.find('h2', class_='post-title').string
2847
        short_url = soup.find('link', rel='shortlink')['href']
2848
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2849
        num = int(short_url_re.match(short_url).groups()[0])
2850
        imgs = soup.find('div', id='comic').find_all('img')
2851
        alt = imgs[0]['title']
2852
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2853
        date_str = soup.find('span', class_='post-date').string
2854
        day = string_to_date(date_str, "%B %d, %Y")
2855
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2856
        author = soup.find('span', class_='post-author').string
2857
        return {
2858
            'short_url': short_url,
2859
            'num': num,
2860
            'img': [i['src'] for i in imgs],
2861
            'month': day.month,
2862
            'year': day.year,
2863
            'day': day.day,
2864
            'title': title,
2865
            'tags': tags,
2866
            'alt': alt,
2867
            'author': author,
2868
        }
2869 View Code Duplication
2870
2871
class AHamADay(GenericNavigableComic):
2872
    """Class to retrieve class A Ham A Day comics."""
2873
    name = 'ham'
2874
    long_name = 'A Ham A Day'
2875
    url = 'http://www.ahammaday.com'
2876
    get_url_from_link = join_cls_url_to_href
2877
    get_first_comic_link = simulate_first_link
2878
    first_url = 'http://www.ahammaday.com/today/3/6/french'
2879
2880
    @classmethod
2881
    def get_navi_link(cls, last_soup, next_):
2882
        """Get link to next or previous comic."""
2883
        # prev is next / next is prev
2884
        return last_soup.find('li', class_='previous' if next_ else 'next').find('a')
2885
2886
    @classmethod
2887
    def get_comic_info(cls, soup, link):
2888
        """Get information about a particular comics."""
2889
        date_str = soup.find('time', class_='published')['datetime']
2890
        day = string_to_date(date_str, "%Y-%m-%d")
2891
        author = soup.find('span', class_='blog-author').find('a').string
2892
        title = soup.find('meta', property='og:title')['content']
2893
        imgs = soup.find_all('meta', itemprop='image')
2894
        return {
2895
            'img': [i['content'] for i in imgs],
2896
            'title': title,
2897
            'author': author,
2898
            'day': day.day,
2899
            'month': day.month,
2900
            'year': day.year,
2901
        }
2902
2903
2904
class LittleLifeLines(GenericNavigableComic):
2905
    """Class to retrieve Little Life Lines comics."""
2906
    # Also on https://little-life-lines.tumblr.com
2907
    name = 'life'
2908
    long_name = 'Little Life Lines'
2909
    url = 'http://www.littlelifelines.com'
2910
    get_url_from_link = join_cls_url_to_href
2911
    get_first_comic_link = simulate_first_link
2912
    first_url = 'http://www.littlelifelines.com/comics/well-done'
2913
2914
    @classmethod
2915
    def get_navi_link(cls, last_soup, next_):
2916
        """Get link to next or previous comic."""
2917
        # prev is next / next is prev
2918
        li = last_soup.find('li', class_='prev' if next_ else 'next')
2919
        return li.find('a') if li else None
2920
2921
    @classmethod
2922
    def get_comic_info(cls, soup, link):
2923
        """Get information about a particular comics."""
2924
        title = soup.find('meta', property='og:title')['content']
2925
        desc = soup.find('meta', property='og:description')['content']
2926
        date_str = soup.find('time', class_='published')['datetime']
2927
        day = string_to_date(date_str, "%Y-%m-%d")
2928
        author = soup.find('a', rel='author').string
2929
        div_content = soup.find('div', class_="body entry-content")
2930
        imgs = div_content.find_all('img')
2931
        imgs = [i for i in imgs if i.get('src') is not None]
2932
        alt = imgs[0]['alt']
2933
        return {
2934
            'title': title,
2935
            'alt': alt,
2936
            'description': desc,
2937
            'author': author,
2938
            'day': day.day,
2939
            'month': day.month,
2940
            'year': day.year,
2941
            'img': [i['src'] for i in imgs],
2942
        }
2943
2944
2945
class GenericWordPressInkblot(GenericNavigableComic):
2946
    """Generic class to retrieve comics using WordPress with Inkblot."""
2947
    get_navi_link = get_link_rel_next
2948
2949
    @classmethod
2950
    def get_first_comic_link(cls):
2951
        """Get link to first comics."""
2952 View Code Duplication
        return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')
2953
2954
    @classmethod
2955
    def get_comic_info(cls, soup, link):
2956
        """Get information about a particular comics."""
2957
        title = soup.find('meta', property='og:title')['content']
2958
        imgs = soup.find('div', class_='webcomic-image').find_all('img')
2959
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2960
        day = string_to_date(date_str, "%Y-%m-%d")
2961
        return {
2962
            'title': title,
2963
            'day': day.day,
2964
            'month': day.month,
2965
            'year': day.year,
2966
            'img': [i['src'] for i in imgs],
2967
        }
2968
2969
2970
class EverythingsStupid(GenericWordPressInkblot):
2971
    """Class to retrieve Everything's stupid Comics."""
2972
    # Also on http://tapastic.com/series/EverythingsStupid
2973
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
2974
    # Also on http://everythingsstupidcomics.tumblr.com
2975
    name = 'stupid'
2976
    long_name = "Everything's Stupid"
2977
    url = 'http://everythingsstupid.net'
2978
2979
2980
class TheIsmComics(GenericWordPressInkblot):
2981
    """Class to retrieve The Ism Comics."""
2982
    # Also on https://tapastic.com/series/TheIsm (?)
2983
    name = 'theism'
2984
    long_name = "The Ism"
2985
    url = 'http://www.theism-comics.com'
2986
2987
2988
class WoodenPlankStudios(GenericWordPressInkblot):
2989
    """Class to retrieve Wooden Plank Studios comics."""
2990
    name = 'woodenplank'
2991
    long_name = 'Wooden Plank Studios'
2992
    url = 'http://woodenplankstudios.com'
2993
2994
2995
class ElectricBunnyComic(GenericNavigableComic):
2996
    """Class to retrieve Electric Bunny Comics."""
2997
    # Also on http://electricbunnycomics.tumblr.com
2998
    name = 'bunny'
2999
    long_name = 'Electric Bunny Comic'
3000
    url = 'http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell'
3001
    get_url_from_link = join_cls_url_to_href
3002
3003
    @classmethod
3004
    def get_first_comic_link(cls):
3005
        """Get link to first comics."""
3006
        return get_soup_at_url(cls.url).find('img', alt='First').parent
3007
3008
    @classmethod
3009
    def get_navi_link(cls, last_soup, next_):
3010
        """Get link to next or previous comic."""
3011
        img = last_soup.find('img', alt='Next' if next_ else 'Back')
3012
        return img.parent if img else None
3013
3014
    @classmethod
3015
    def get_comic_info(cls, soup, link):
3016
        """Get information about a particular comics."""
3017
        title = soup.find('meta', property='og:title')['content']
3018
        imgs = soup.find_all('meta', property='og:image')
3019
        return {
3020
            'title': title,
3021
            'img': [i['content'] for i in imgs],
3022
        }
3023
3024
3025
class SheldonComics(GenericNavigableComic):
3026
    """Class to retrieve Sheldon comics."""
3027
    # Also on http://www.gocomics.com/sheldon
3028
    name = 'sheldon'
3029
    long_name = 'Sheldon Comics'
3030
    url = 'http://www.sheldoncomics.com'
3031
3032
    @classmethod
3033
    def get_first_comic_link(cls):
3034
        """Get link to first comics."""
3035
        return get_soup_at_url(cls.url).find("a", id="nav-first")
3036
3037
    @classmethod
3038
    def get_navi_link(cls, last_soup, next_):
3039
        """Get link to next or previous comic."""
3040
        for link in last_soup.find_all("a", id="nav-next" if next_ else "nav-prev"):
3041
            if link['href'] != 'http://www.sheldoncomics.com':
3042
                return link
3043
        return None
3044
3045
    @classmethod
3046
    def get_comic_info(cls, soup, link):
3047
        """Get information about a particular comics."""
3048
        imgs = soup.find("div", id="comic-foot").find_all("img")
3049
        assert all(i['alt'] == i['title'] for i in imgs)
3050
        assert len(imgs) == 1
3051
        title = imgs[0]['title']
3052
        return {
3053
            'title': title,
3054
            'img': [i['src'] for i in imgs],
3055
        }
3056
3057
3058
class Ubertool(GenericNavigableComic):
3059
    """Class to retrieve Ubertool comics."""
3060
    # Also on http://ubertool.tumblr.com
3061
    # Also on https://tapastic.com/series/ubertool
3062
    name = 'ubertool'
3063
    long_name = 'Ubertool'
3064
    url = 'http://ubertoolcomic.com'
3065
    _categories = ('UBERTOOL', )
3066
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
3067
    get_navi_link = get_a_comicnavbase_comicnavnext
3068
3069
    @classmethod
3070
    def get_comic_info(cls, soup, link):
3071
        """Get information about a particular comics."""
3072
        title = soup.find('h2', class_='post-title').string
3073
        date_str = soup.find('span', class_='post-date').string
3074
        day = string_to_date(date_str, "%B %d, %Y")
3075
        imgs = soup.find('div', id='comic').find_all('img')
3076
        return {
3077
            'img': [i['src'] for i in imgs],
3078
            'title': title,
3079
            'month': day.month,
3080
            'year': day.year,
3081
            'day': day.day,
3082
        }
3083
3084
3085 View Code Duplication
class EarthExplodes(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3086
    """Class to retrieve The Earth Explodes comics."""
3087
    name = 'earthexplodes'
3088
    long_name = 'The Earth Explodes'
3089
    url = 'http://www.earthexplodes.com'
3090
    get_url_from_link = join_cls_url_to_href
3091
    get_first_comic_link = simulate_first_link
3092
    first_url = 'http://www.earthexplodes.com/comics/000/'
3093
3094
    @classmethod
3095
    def get_navi_link(cls, last_soup, next_):
3096
        """Get link to next or previous comic."""
3097
        return last_soup.find('a', id='next' if next_ else 'prev')
3098
3099
    @classmethod
3100
    def get_comic_info(cls, soup, link):
3101
        """Get information about a particular comics."""
3102
        title = soup.find('title').string
3103
        imgs = soup.find('div', id='image').find_all('img')
3104
        alt = imgs[0].get('title', '')
3105
        return {
3106
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3107
            'title': title,
3108
            'alt': alt,
3109
        }
3110
3111
3112
class CubeDrone(GenericNavigableComic):
3113
    """Class to retrieve Cube Drone comics."""
3114
    name = 'cubedrone'
3115
    long_name = 'Cube Drone'
3116
    url = 'http://cube-drone.com/comics'
3117
    get_url_from_link = join_cls_url_to_href
3118
3119
    @classmethod
3120
    def get_first_comic_link(cls):
3121
        """Get link to first comics."""
3122
        return get_soup_at_url(cls.url).find('span', class_='glyphicon glyphicon-backward').parent
3123
3124 View Code Duplication
    @classmethod
3125
    def get_navi_link(cls, last_soup, next_):
3126
        """Get link to next or previous comic."""
3127
        class_ = 'glyphicon glyphicon-chevron-' + ('right' if next_ else 'left')
3128
        return last_soup.find('span', class_=class_).parent
3129
3130
    @classmethod
3131
    def get_comic_info(cls, soup, link):
3132
        """Get information about a particular comics."""
3133
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
3134
        url2 = soup.find('meta', attrs={'name': 'twitter:url'})['content']
3135
        # date_str = soup.find('h2', class_='comic_title').find('small').string
3136
        # day = string_to_date(date_str, "%B %d, %Y, %I:%M %p")
3137
        imgs = soup.find_all('img', class_='comic img-responsive')
3138
        title2 = imgs[0]['title']
3139
        alt = imgs[0]['alt']
3140
        return {
3141
            'url2': url2,
3142
            'title': title,
3143
            'title2': title2,
3144
            'alt': alt,
3145
            'img': [i['src'] for i in imgs],
3146
        }
3147
3148
3149
class MakeItStoopid(GenericNavigableComic):
3150
    """Class to retrieve Make It Stoopid Comics."""
3151
    name = 'stoopid'
3152
    long_name = 'Make it stoopid'
3153
    url = 'http://makeitstoopid.com/comic.php'
3154
3155
    @classmethod
3156
    def get_nav(cls, soup):
3157
        """Get the navigation elements from soup object."""
3158
        cnav = soup.find_all(class_='cnav')
3159
        nav1, nav2 = cnav[:5], cnav[5:]
3160
        assert nav1 == nav2
3161
        # begin, prev, archive, next_, end = nav1
3162
        return [None if i.get('href') is None else i for i in nav1]
3163
3164
    @classmethod
3165
    def get_first_comic_link(cls):
3166
        """Get link to first comics."""
3167
        return cls.get_nav(get_soup_at_url(cls.url))[0]
3168
3169
    @classmethod
3170
    def get_navi_link(cls, last_soup, next_):
3171
        """Get link to next or previous comic."""
3172
        return cls.get_nav(last_soup)[3 if next_ else 1]
3173
3174
    @classmethod
3175
    def get_comic_info(cls, soup, link):
3176
        """Get information about a particular comics."""
3177
        title = link['title']
3178
        imgs = soup.find_all('img', id='comicimg')
3179
        return {
3180
            'title': title,
3181
            'img': [i['src'] for i in imgs],
3182
        }
3183
3184
3185
class ConsoliaComics(GenericNavigableComic):
3186
    """Class to retrieve Consolia comics."""
3187
    name = 'consolia'
3188
    long_name = 'consolia'
3189
    url = 'https://consolia-comic.com'
3190
    get_url_from_link = join_cls_url_to_href
3191 View Code Duplication
3192
    @classmethod
3193
    def get_first_comic_link(cls):
3194
        """Get link to first comics."""
3195
        return get_soup_at_url(cls.url).find('span', class_='first').find('a')
3196
3197
    @classmethod
3198
    def get_navi_link(cls, last_soup, next_):
3199
        """Get link to next or previous comic."""
3200
        return last_soup.find('span', class_='next' if next_ else 'prev').find('a')
3201
3202
    @classmethod
3203
    def get_comic_info(cls, soup, link):
3204
        """Get information about a particular comics."""
3205
        title = soup.find('meta', property='og:title')['content']
3206
        date_str = soup.find('time')["datetime"]
3207
        day = string_to_date(date_str, "%Y-%m-%d")
3208
        imgs = soup.find('div', id='comic').find_all('img')
3209
        alt = imgs[0]['title']
3210
        # article = soup.find('div', id='blag')
3211
        # text = article.encode_contents()
3212
        return {
3213
            'title': title,
3214
            'alt': alt,
3215
            'img': [i['src'] for i in imgs],
3216
            # 'text': text,
3217
            'day': day.day,
3218
            'month': day.month,
3219
            'year': day.year,
3220
        }
3221
3222
3223 View Code Duplication
class TuMourrasMoinsBete(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3224
    """Class to retrieve Tu Mourras Moins Bete comics."""
3225
    name = 'mourrasmoinsbete'
3226
    long_name = 'Tu Mourras Moins Bete'
3227
    url = 'http://tumourrasmoinsbete.blogspot.fr'
3228
    _categories = ('FRANCAIS', )
3229
    get_first_comic_link = simulate_first_link
3230
    first_url = 'http://tumourrasmoinsbete.blogspot.fr/2008/06/essai.html'
3231
3232
    @classmethod
3233
    def get_navi_link(cls, last_soup, next_):
3234
        """Get link to next or previous comic."""
3235
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3236
3237
    @classmethod
3238
    def get_comic_info(cls, soup, link):
3239
        """Get information about a particular comics."""
3240
        title = soup.find('title').string
3241
        imgs = soup.find('div', itemprop='description articleBody').find_all('img')
3242
        author = soup.find('span', itemprop='author').string
3243
        return {
3244
            'img': [i['src'] for i in imgs],
3245
            'author': author,
3246
            'title': title,
3247
        }
3248
3249
3250
class GeekAndPoke(GenericNavigableComic):
3251
    """Class to retrieve Geek And Poke comics."""
3252
    name = 'geek'
3253
    long_name = 'Geek And Poke'
3254
    url = 'http://geek-and-poke.com'
3255
    get_url_from_link = join_cls_url_to_href
3256
    get_first_comic_link = simulate_first_link
3257
    first_url = 'http://geek-and-poke.com/geekandpoke/2006/8/27/a-new-place-for-a-not-so-old-blog.html'
3258
3259
    @classmethod
3260
    def get_navi_link(cls, last_soup, next_):
3261
        """Get link to next or previous comic."""
3262
        return last_soup.find('a', class_='prev-item' if next_ else 'next-item')
3263
3264
    @classmethod
3265
    def get_comic_info(cls, soup, link):
3266
        """Get information about a particular comics."""
3267
        title = soup.find('meta', property='og:title')['content']
3268
        desc = soup.find('meta', property='og:description')['content']
3269
        date_str = soup.find('time', class_='published')['datetime']
3270
        day = string_to_date(date_str, "%Y-%m-%d")
3271
        author = soup.find('a', rel='author').string
3272
        div_content = (soup.find('div', class_="body entry-content") or
3273
                       soup.find('div', class_="special-content"))
3274
        imgs = div_content.find_all('img')
3275
        imgs = [i for i in imgs if i.get('src') is not None]
3276
        assert all('title' not in i or i['alt'] == i['title'] for i in imgs)
3277
        alt = imgs[0].get('alt', "") if imgs else []
3278
        return {
3279
            'title': title,
3280
            'alt': alt,
3281
            'description': desc,
3282
            'author': author,
3283
            'day': day.day,
3284
            'month': day.month,
3285
            'year': day.year,
3286
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3287
        }
3288
3289
3290
class GloryOwlComix(GenericNavigableComic):
3291
    """Class to retrieve Glory Owl comics."""
3292
    name = 'gloryowl'
3293
    long_name = 'Glory Owl'
3294
    url = 'http://gloryowlcomix.blogspot.fr'
3295
    _categories = ('NSFW', 'FRANCAIS')
3296
    get_first_comic_link = simulate_first_link
3297
    first_url = 'http://gloryowlcomix.blogspot.fr/2013/02/1_7.html'
3298
3299
    @classmethod
3300
    def get_navi_link(cls, last_soup, next_):
3301
        """Get link to next or previous comic."""
3302
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3303
3304
    @classmethod
3305
    def get_comic_info(cls, soup, link):
3306
        """Get information about a particular comics."""
3307
        title = soup.find('title').string
3308
        imgs = soup.find_all('link', rel='image_src')
3309
        author = soup.find('a', rel='author').string
3310
        return {
3311
            'img': [i['href'] for i in imgs],
3312
            'author': author,
3313
            'title': title,
3314
        }
3315
3316
3317
class GenericTumblrV1(GenericComic):
3318
    """Generic class to retrieve comics from Tumblr using the V1 API."""
3319
    _categories = ('TUMBLR', )
3320
3321
    @classmethod
3322
    def get_next_comic(cls, last_comic):
3323
        """Generic implementation of get_next_comic for Tumblr comics."""
3324
        for p in cls.get_posts(last_comic):
3325
            comic = cls.get_comic_info(p)
3326
            if comic is not None:
3327
                yield comic
3328
3329
    @classmethod
3330
    def get_url_from_post(cls, post):
3331
        return post['url']
3332
3333
    @classmethod
3334
    def get_api_url(cls):
3335
        return urljoin_wrapper(cls.url, '/api/read/')
3336
3337
    @classmethod
3338
    def get_comic_info(cls, post):
3339
        """Get information about a particular comics."""
3340
        type_ = post['type']
3341
        if type_ != 'photo':
3342
            return None
3343
        tumblr_id = int(post['id'])
3344
        api_url = cls.get_api_url() + '?id=%d' % (tumblr_id)
3345
        day = datetime.datetime.fromtimestamp(int(post['unix-timestamp'])).date()
3346
        caption = post.find('photo-caption')
3347
        title = caption.string if caption else ""
3348
        tags = ' '.join(t.string for t in post.find_all('tag'))
3349
        # Photos may appear in 'photo' tags and/or straight in the post
3350
        photo_tags = post.find_all('photo')
3351
        if not photo_tags:
3352
            photo_tags = [post]
3353
        # Images are in multiple resolutions - taking the first one
3354
        imgs = [photo.find('photo-url') for photo in photo_tags]
3355
        return {
3356
            'url': cls.get_url_from_post(post),
3357
            'url2': post['url-with-slug'],
3358
            'day': day.day,
3359
            'month': day.month,
3360
            'year': day.year,
3361
            'title': title,
3362
            'tags': tags,
3363
            'img': [i.string for i in imgs],
3364
            'tumblr-id': tumblr_id,
3365
            'api_url': api_url,
3366
        }
3367
3368
    @classmethod
3369
    def get_posts(cls, last_comic, nb_post_per_call=10):
3370
        """Get posts using API. nb_post_per_call is max 50.
3371
3372
        Posts are retrieved from newer to older as per the tumblr v1 api
3373
        but are returned in chronological order."""
3374
        waiting_for_url = last_comic['url'] if last_comic else None
3375
        posts_acc = []
3376
        if last_comic is not None:
3377
            # Sometimes, tumblr posts are deleted. When previous post is deleted, we
3378
            # might end up spending a lot of time looking for something that
3379
            # doesn't exist. Failing early and clearly might be a better option.
3380
            last_api_url = last_comic['api_url']
3381
            try:
3382
                get_soup_at_url(last_api_url)
3383
            except urllib.error.HTTPError:
3384
                try:
3385
                    get_soup_at_url(cls.url)
3386
                except urllib.error.HTTPError:
3387
                    print("Did not find previous post nor main url %s" % cls.url)
3388
                else:
3389
                    print("Did not find previous post %s : it might have been deleted" % last_api_url)
3390
                return reversed(posts_acc)
3391
        api_url = cls.get_api_url()
3392
        posts = get_soup_at_url(api_url).find('posts')
3393
        start, total = int(posts['start']), int(posts['total'])
3394
        assert start == 0
3395
        for starting_num in range(0, total, nb_post_per_call):
3396
            api_url2 = api_url + '?start=%d&num=%d' % (starting_num, nb_post_per_call)
3397
            posts2 = get_soup_at_url(api_url2).find('posts')
3398
            start2, total2 = int(posts2['start']), int(posts2['total'])
3399
            assert starting_num == start2, "%d != %d" % (starting_num, start2)
3400
            # This may happen and should be handled in the future
3401
            assert total == total2, "%d != %d" % (total, total2)
3402
            for p in posts2.find_all('post'):
3403
                if waiting_for_url and waiting_for_url == cls.get_url_from_post(p):
3404
                    return reversed(posts_acc)
3405
                posts_acc.append(p)
3406
        if waiting_for_url is None:
3407
            return reversed(posts_acc)
3408
        print("Did not find %s : there might be a problem" % waiting_for_url)
3409
        return []
3410
3411
3412
class SaturdayMorningBreakfastCerealTumblr(GenericTumblrV1):
3413
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
3414
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
3415
    # Also on http://www.smbc-comics.com
3416
    name = 'smbc-tumblr'
3417
    long_name = 'Saturday Morning Breakfast Cereal (from Tumblr)'
3418
    url = 'http://smbc-comics.tumblr.com'
3419
    _categories = ('SMBC', )
3420
3421
3422
class IrwinCardozo(GenericTumblrV1):
3423
    """Class to retrieve Irwin Cardozo Comics."""
3424
    name = 'irwinc'
3425
    long_name = 'Irwin Cardozo'
3426
    url = 'http://irwincardozocomics.tumblr.com'
3427
3428
3429
class AccordingToDevin(GenericTumblrV1):
3430
    """Class to retrieve According To Devin comics."""
3431
    name = 'devin'
3432
    long_name = 'According To Devin'
3433
    url = 'http://accordingtodevin.tumblr.com'
3434
3435
3436
class ItsTheTieTumblr(GenericTumblrV1):
3437
    """Class to retrieve It's the tie comics."""
3438
    # Also on http://itsthetie.com
3439
    # Also on https://tapastic.com/series/itsthetie
3440
    name = 'tie-tumblr'
3441
    long_name = "It's the tie (from Tumblr)"
3442
    url = "http://itsthetie.tumblr.com"
3443
    _categories = ('TIE', )
3444
3445
3446
class OctopunsTumblr(GenericTumblrV1):
3447
    """Class to retrieve Octopuns comics."""
3448
    # Also on http://www.octopuns.net
3449
    name = 'octopuns-tumblr'
3450
    long_name = 'Octopuns (from Tumblr)'
3451
    url = 'http://octopuns.tumblr.com'
3452
3453
3454
class PicturesInBoxesTumblr(GenericTumblrV1):
3455
    """Class to retrieve Pictures In Boxes comics."""
3456
    # Also on http://www.picturesinboxes.com
3457
    name = 'picturesinboxes-tumblr'
3458
    long_name = 'Pictures in Boxes (from Tumblr)'
3459
    url = 'http://picturesinboxescomic.tumblr.com'
3460
3461
3462
class TubeyToonsTumblr(GenericTumblrV1):
3463
    """Class to retrieve TubeyToons comics."""
3464
    # Also on http://tapastic.com/series/Tubey-Toons
3465
    # Also on http://tubeytoons.com
3466
    name = 'tubeytoons-tumblr'
3467
    long_name = 'Tubey Toons (from Tumblr)'
3468
    url = 'http://tubeytoons.tumblr.com'
3469
    _categories = ('TUNEYTOONS', )
3470
3471
3472
class UnearthedComicsTumblr(GenericTumblrV1):
3473
    """Class to retrieve Unearthed comics."""
3474
    # Also on http://tapastic.com/series/UnearthedComics
3475
    # Also on http://unearthedcomics.com
3476
    name = 'unearthed-tumblr'
3477
    long_name = 'Unearthed Comics (from Tumblr)'
3478
    url = 'http://unearthedcomics.tumblr.com'
3479
    _categories = ('UNEARTHED', )
3480
3481
3482
class PieComic(GenericTumblrV1):
3483
    """Class to retrieve Pie Comic comics."""
3484
    name = 'pie'
3485
    long_name = 'Pie Comic'
3486
    url = "http://piecomic.tumblr.com"
3487
3488
3489
class MrEthanDiamond(GenericTumblrV1):
3490
    """Class to retrieve Mr Ethan Diamond comics."""
3491
    name = 'diamond'
3492
    long_name = 'Mr Ethan Diamond'
3493
    url = 'http://mrethandiamond.tumblr.com'
3494
3495
3496
class Flocci(GenericTumblrV1):
3497
    """Class to retrieve floccinaucinihilipilification comics."""
3498
    name = 'flocci'
3499
    long_name = 'floccinaucinihilipilification'
3500
    url = "http://floccinaucinihilipilificationa.tumblr.com"
3501
3502
3503
class UpAndOut(GenericTumblrV1):
3504
    """Class to retrieve Up & Out comics."""
3505
    # Also on http://tapastic.com/series/UP-and-OUT
3506
    name = 'upandout'
3507
    long_name = 'Up And Out (from Tumblr)'
3508
    url = 'http://upandoutcomic.tumblr.com'
3509
3510
3511
class Pundemonium(GenericTumblrV1):
3512
    """Class to retrieve Pundemonium comics."""
3513
    name = 'pundemonium'
3514
    long_name = 'Pundemonium'
3515
    url = 'http://monstika.tumblr.com'
3516
3517
3518
class PoorlyDrawnLinesTumblr(GenericTumblrV1):
3519
    """Class to retrieve Poorly Drawn Lines comics."""
3520
    # Also on http://poorlydrawnlines.com
3521
    name = 'poorlydrawn-tumblr'
3522
    long_name = 'Poorly Drawn Lines (from Tumblr)'
3523
    url = 'http://pdlcomics.tumblr.com'
3524
    _categories = ('POORLYDRAWN', )
3525
3526
3527
class PearShapedComics(GenericTumblrV1):
3528
    """Class to retrieve Pear Shaped Comics."""
3529
    name = 'pearshaped'
3530
    long_name = 'Pear-Shaped Comics'
3531
    url = 'http://pearshapedcomics.com'
3532
3533
3534
class PondScumComics(GenericTumblrV1):
3535
    """Class to retrieve Pond Scum Comics."""
3536
    name = 'pond'
3537
    long_name = 'Pond Scum'
3538
    url = 'http://pondscumcomic.tumblr.com'
3539
3540
3541
class MercworksTumblr(GenericTumblrV1):
3542
    """Class to retrieve Mercworks comics."""
3543
    # Also on http://mercworks.net
3544
    name = 'mercworks-tumblr'
3545
    long_name = 'Mercworks (from Tumblr)'
3546
    url = 'http://mercworks.tumblr.com'
3547
3548
3549
class OwlTurdTumblr(GenericTumblrV1):
3550
    """Class to retrieve Owl Turd comics."""
3551
    # Also on http://tapastic.com/series/Owl-Turd-Comix
3552
    name = 'owlturd-tumblr'
3553
    long_name = 'Owl Turd (from Tumblr)'
3554
    url = 'http://owlturd.com'
3555
    _categories = ('OWLTURD', )
3556
3557
3558
class VectorBelly(GenericTumblrV1):
3559
    """Class to retrieve Vector Belly comics."""
3560
    # Also on http://vectorbelly.com
3561
    name = 'vector'
3562
    long_name = 'Vector Belly'
3563
    url = 'http://vectorbelly.tumblr.com'
3564
3565
3566
class GoneIntoRapture(GenericTumblrV1):
3567
    """Class to retrieve Gone Into Rapture comics."""
3568
    # Also on http://goneintorapture.tumblr.com
3569
    # Also on http://tapastic.com/series/Goneintorapture
3570
    name = 'rapture'
3571
    long_name = 'Gone Into Rapture'
3572
    url = 'http://www.goneintorapture.com'
3573
3574
3575
class TheOatmealTumblr(GenericTumblrV1):
3576
    """Class to retrieve The Oatmeal comics."""
3577
    # Also on http://theoatmeal.com
3578
    name = 'oatmeal-tumblr'
3579
    long_name = 'The Oatmeal (from Tumblr)'
3580
    url = 'http://oatmeal.tumblr.com'
3581
3582
3583
class HeckIfIKnowComicsTumblr(GenericTumblrV1):
3584
    """Class to retrieve Heck If I Know Comics."""
3585
    # Also on http://tapastic.com/series/Regular
3586
    name = 'heck-tumblr'
3587
    long_name = 'Heck if I Know comics (from Tumblr)'
3588
    url = 'http://heckifiknowcomics.com'
3589
3590
3591
class MyJetPack(GenericTumblrV1):
3592
    """Class to retrieve My Jet Pack comics."""
3593
    name = 'jetpack'
3594
    long_name = 'My Jet Pack'
3595
    url = 'http://myjetpack.tumblr.com'
3596
3597
3598
class CheerUpEmoKidTumblr(GenericTumblrV1):
3599
    """Class to retrieve CheerUpEmoKid comics."""
3600
    # Also on http://www.cheerupemokid.com
3601
    # Also on http://tapastic.com/series/CUEK
3602
    name = 'cuek-tumblr'
3603
    long_name = 'Cheer Up Emo Kid (from Tumblr)'
3604
    url = 'http://enzocomics.tumblr.com'
3605
3606
3607
class ForLackOfABetterComic(GenericTumblrV1):
3608
    """Class to retrieve For Lack Of A Better Comics."""
3609
    # Also on http://forlackofabettercomic.com
3610
    name = 'lack'
3611
    long_name = 'For Lack Of A Better Comic'
3612
    url = 'http://forlackofabettercomic.tumblr.com'
3613
3614
3615
class ZenPencilsTumblr(GenericTumblrV1):
3616
    """Class to retrieve ZenPencils comics."""
3617
    # Also on http://zenpencils.com
3618
    # Also on http://www.gocomics.com/zen-pencils
3619
    name = 'zenpencils-tumblr'
3620
    long_name = 'Zen Pencils (from Tumblr)'
3621
    url = 'http://zenpencils.tumblr.com'
3622
    _categories = ('ZENPENCILS', )
3623
3624
3625
class ThreeWordPhraseTumblr(GenericTumblrV1):
3626
    """Class to retrieve Three Word Phrase comics."""
3627
    # Also on http://threewordphrase.com
3628
    name = 'threeword-tumblr'
3629
    long_name = 'Three Word Phrase (from Tumblr)'
3630
    url = 'http://www.threewordphrase.tumblr.com'
3631
3632
3633
class TimeTrabbleTumblr(GenericTumblrV1):
3634
    """Class to retrieve Time Trabble comics."""
3635
    # Also on http://timetrabble.com
3636
    name = 'timetrabble-tumblr'
3637
    long_name = 'Time Trabble (from Tumblr)'
3638
    url = 'http://timetrabble.tumblr.com'
3639
3640
3641
class SafelyEndangeredTumblr(GenericTumblrV1):
3642
    """Class to retrieve Safely Endangered comics."""
3643
    # Also on http://www.safelyendangered.com
3644
    name = 'endangered-tumblr'
3645
    long_name = 'Safely Endangered (from Tumblr)'
3646
    url = 'http://tumblr.safelyendangered.com'
3647
3648
3649
class MouseBearComedyTumblr(GenericTumblrV1):
3650
    """Class to retrieve Mouse Bear Comedy comics."""
3651
    # Also on http://www.mousebearcomedy.com
3652
    name = 'mousebear-tumblr'
3653
    long_name = 'Mouse Bear Comedy (from Tumblr)'
3654
    url = 'http://mousebearcomedy.tumblr.com'
3655
3656
3657
class BouletCorpTumblr(GenericTumblrV1):
3658
    """Class to retrieve BouletCorp comics."""
3659
    # Also on http://www.bouletcorp.com
3660
    name = 'boulet-tumblr'
3661
    long_name = 'Boulet Corp (from Tumblr)'
3662
    url = 'http://bouletcorp.tumblr.com'
3663
    _categories = ('BOULET', )
3664
3665
3666
class TheAwkwardYetiTumblr(GenericTumblrV1):
3667
    """Class to retrieve The Awkward Yeti comics."""
3668
    # Also on http://www.gocomics.com/the-awkward-yeti
3669
    # Also on http://theawkwardyeti.com
3670
    # Also on https://tapastic.com/series/TheAwkwardYeti
3671
    name = 'yeti-tumblr'
3672
    long_name = 'The Awkward Yeti (from Tumblr)'
3673
    url = 'http://larstheyeti.tumblr.com'
3674
    _categories = ('YETI', )
3675
3676
3677
class NellucNhoj(GenericTumblrV1):
3678
    """Class to retrieve NellucNhoj comics."""
3679
    name = 'nhoj'
3680
    long_name = 'Nelluc Nhoj'
3681
    url = 'http://nellucnhoj.com'
3682
3683
3684
class DownTheUpwardSpiralTumblr(GenericTumblrV1):
3685
    """Class to retrieve Down The Upward Spiral comics."""
3686
    # Also on http://www.downtheupwardspiral.com
3687
    name = 'spiral-tumblr'
3688
    long_name = 'Down the Upward Spiral (from Tumblr)'
3689
    url = 'http://downtheupwardspiral.tumblr.com'
3690
3691
3692
class AsPerUsualTumblr(GenericTumblrV1):
3693
    """Class to retrieve As Per Usual comics."""
3694
    # Also on https://tapastic.com/series/AsPerUsual
3695
    name = 'usual-tumblr'
3696
    long_name = 'As Per Usual (from Tumblr)'
3697
    url = 'http://as-per-usual.tumblr.com'
3698
    categories = ('DAMILEE', )
3699
3700
3701
class HotComicsForCoolPeopleTumblr(GenericTumblrV1):
3702
    """Class to retrieve Hot Comics For Cool People."""
3703
    # Also on https://tapastic.com/series/Hot-Comics-For-Cool-People
3704
    # Also on http://hotcomics.biz (links to tumblr)
3705
    # Also on http://hcfcp.com (links to tumblr)
3706
    name = 'hotcomics-tumblr'
3707
    long_name = 'Hot Comics For Cool People (from Tumblr)'
3708
    url = 'http://hotcomicsforcoolpeople.tumblr.com'
3709
    categories = ('DAMILEE', )
3710
3711
3712
class OneOneOneOneComicTumblr(GenericTumblrV1):
3713
    """Class to retrieve 1111 Comics."""
3714
    # Also on http://www.1111comics.me
3715
    # Also on https://tapastic.com/series/1111-Comics
3716
    name = '1111-tumblr'
3717
    long_name = '1111 Comics (from Tumblr)'
3718
    url = 'http://comics1111.tumblr.com'
3719
    _categories = ('ONEONEONEONE', )
3720
3721
3722
class JhallComicsTumblr(GenericTumblrV1):
3723
    """Class to retrieve Jhall Comics."""
3724
    # Also on http://jhallcomics.com
3725
    name = 'jhall-tumblr'
3726
    long_name = 'Jhall Comics (from Tumblr)'
3727
    url = 'http://jhallcomics.tumblr.com'
3728
3729
3730
class BerkeleyMewsTumblr(GenericTumblrV1):
3731
    """Class to retrieve Berkeley Mews comics."""
3732
    # Also on http://www.gocomics.com/berkeley-mews
3733
    # Also on http://www.berkeleymews.com
3734
    name = 'berkeley-tumblr'
3735
    long_name = 'Berkeley Mews (from Tumblr)'
3736
    url = 'http://mews.tumblr.com'
3737
    _categories = ('BERKELEY', )
3738
3739
3740
class JoanCornellaTumblr(GenericTumblrV1):
3741
    """Class to retrieve Joan Cornella comics."""
3742
    # Also on http://joancornella.net
3743
    name = 'cornella-tumblr'
3744
    long_name = 'Joan Cornella (from Tumblr)'
3745
    url = 'http://cornellajoan.tumblr.com'
3746
3747
3748
class RespawnComicTumblr(GenericTumblrV1):
3749
    """Class to retrieve Respawn Comic."""
3750
    # Also on http://respawncomic.com
3751
    name = 'respawn-tumblr'
3752
    long_name = 'Respawn Comic (from Tumblr)'
3753
    url = 'http://respawncomic.tumblr.com'
3754
3755
3756
class ChrisHallbeckTumblr(GenericTumblrV1):
3757
    """Class to retrieve Chris Hallbeck comics."""
3758
    # Also on https://tapastic.com/ChrisHallbeck
3759
    # Also on http://maximumble.com
3760
    # Also on http://minimumble.com
3761
    # Also on http://thebookofbiff.com
3762
    name = 'hallbeck-tumblr'
3763
    long_name = 'Chris Hallback (from Tumblr)'
3764
    url = 'http://chrishallbeck.tumblr.com'
3765
    _categories = ('HALLBACK', )
3766
3767
3768
class ComicNuggets(GenericTumblrV1):
3769
    """Class to retrieve Comic Nuggets."""
3770
    name = 'nuggets'
3771
    long_name = 'Comic Nuggets'
3772
    url = 'http://comicnuggets.com'
3773
3774
3775
class PigeonGazetteTumblr(GenericTumblrV1):
3776
    """Class to retrieve The Pigeon Gazette comics."""
3777
    # Also on https://tapastic.com/series/The-Pigeon-Gazette
3778
    name = 'pigeon-tumblr'
3779
    long_name = 'The Pigeon Gazette (from Tumblr)'
3780
    url = 'http://thepigeongazette.tumblr.com'
3781
3782
3783
class CancerOwl(GenericTumblrV1):
3784
    """Class to retrieve Cancer Owl comics."""
3785
    # Also on http://cancerowl.com
3786
    name = 'cancerowl-tumblr'
3787
    long_name = 'Cancer Owl (from Tumblr)'
3788
    url = 'http://cancerowl.tumblr.com'
3789
3790
3791
class FowlLanguageTumblr(GenericTumblrV1):
3792
    """Class to retrieve Fowl Language comics."""
3793
    # Also on http://www.fowllanguagecomics.com
3794
    # Also on http://tapastic.com/series/Fowl-Language-Comics
3795
    # Also on http://www.gocomics.com/fowl-language
3796
    name = 'fowllanguage-tumblr'
3797
    long_name = 'Fowl Language Comics (from Tumblr)'
3798
    url = 'http://fowllanguagecomics.tumblr.com'
3799
    _categories = ('FOWLLANGUAGE', )
3800
3801
3802
class TheOdd1sOutTumblr(GenericTumblrV1):
3803
    """Class to retrieve The Odd 1s Out comics."""
3804
    # Also on http://theodd1sout.com
3805
    # Also on https://tapastic.com/series/Theodd1sout
3806
    name = 'theodd-tumblr'
3807
    long_name = 'The Odd 1s Out (from Tumblr)'
3808
    url = 'http://theodd1sout.tumblr.com'
3809
3810
3811
class TheUnderfoldTumblr(GenericTumblrV1):
3812
    """Class to retrieve The Underfold comics."""
3813
    # Also on http://theunderfold.com
3814
    name = 'underfold-tumblr'
3815
    long_name = 'The Underfold (from Tumblr)'
3816
    url = 'http://theunderfold.tumblr.com'
3817
3818
3819
class LolNeinTumblr(GenericTumblrV1):
3820
    """Class to retrieve Lol Nein comics."""
3821
    # Also on http://lolnein.com
3822
    name = 'lolnein-tumblr'
3823
    long_name = 'Lol Nein (from Tumblr)'
3824
    url = 'http://lolneincom.tumblr.com'
3825
3826
3827
class FatAwesomeComicsTumblr(GenericTumblrV1):
3828
    """Class to retrieve Fat Awesome Comics."""
3829
    # Also on http://fatawesome.com/comics
3830
    name = 'fatawesome-tumblr'
3831
    long_name = 'Fat Awesome (from Tumblr)'
3832
    url = 'http://fatawesomecomedy.tumblr.com'
3833
3834
3835
class TheWorldIsFlatTumblr(GenericTumblrV1):
3836
    """Class to retrieve The World Is Flat Comics."""
3837
    # Also on https://tapastic.com/series/The-World-is-Flat
3838
    name = 'flatworld-tumblr'
3839
    long_name = 'The World Is Flat (from Tumblr)'
3840
    url = 'http://theworldisflatcomics.tumblr.com'
3841
3842
3843
class DorrisMc(GenericTumblrV1):
3844
    """Class to retrieve Dorris Mc Comics"""
3845
    # Also on http://www.gocomics.com/dorris-mccomics
3846
    name = 'dorrismc'
3847
    long_name = 'Dorris Mc'
3848
    url = 'http://dorrismccomics.com'
3849
3850
3851
class LeleozTumblr(GenericEmptyComic, GenericTumblrV1):
3852
    """Class to retrieve Leleoz comics."""
3853
    # Also on https://tapastic.com/series/Leleoz
3854
    name = 'leleoz-tumblr'
3855
    long_name = 'Leleoz (from Tumblr)'
3856
    url = 'http://leleozcomics.tumblr.com'
3857
3858
3859
class MoonBeardTumblr(GenericTumblrV1):
3860
    """Class to retrieve MoonBeard comics."""
3861
    # Also on http://moonbeard.com
3862
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
3863
    name = 'moonbeard-tumblr'
3864
    long_name = 'Moon Beard (from Tumblr)'
3865
    url = 'http://blog.squiresjam.es/moonbeard'
3866
3867
3868
class AComik(GenericTumblrV1):
3869
    """Class to retrieve A Comik"""
3870
    name = 'comik'
3871
    long_name = 'A Comik'
3872
    url = 'http://acomik.com'
3873
3874
3875
class ClassicRandy(GenericTumblrV1):
3876
    """Class to retrieve Classic Randy comics."""
3877
    name = 'randy'
3878
    long_name = 'Classic Randy'
3879
    url = 'http://classicrandy.tumblr.com'
3880
3881
3882
class DagssonTumblr(GenericTumblrV1):
3883
    """Class to retrieve Dagsson comics."""
3884
    # Also on http://www.dagsson.com
3885
    name = 'dagsson-tumblr'
3886
    long_name = 'Dagsson Hugleikur (from Tumblr)'
3887
    url = 'http://hugleikurdagsson.tumblr.com'
3888
3889
3890
class LinsEditionsTumblr(GenericTumblrV1):
3891
    """Class to retrieve L.I.N.S. Editions comics."""
3892
    # Also on https://linsedition.com
3893
    # Now on http://warandpeas.tumblr.com
3894
    name = 'lins-tumblr'
3895
    long_name = 'L.I.N.S. Editions (from Tumblr)'
3896
    url = 'http://linscomics.tumblr.com'
3897
    _categories = ('LINS', )
3898
3899
3900
class WarAndPeasTumblr(GenericTumblrV1):
3901
    """Class to retrieve War And Peas comics."""
3902
    # Was on http://linscomics.tumblr.com
3903
    name = 'warandpeas-tumblr'
3904
    long_name = 'War And Peas (from Tumblr)'
3905
    url = 'http://warandpeas.tumblr.com'
3906
    _categories = ('WARANDPEAS', )
3907
3908
3909
class OrigamiHotDish(GenericTumblrV1):
3910
    """Class to retrieve Origami Hot Dish comics."""
3911
    name = 'origamihotdish'
3912
    long_name = 'Origami Hot Dish'
3913
    url = 'http://origamihotdish.com'
3914
3915
3916
class HitAndMissComicsTumblr(GenericTumblrV1):
3917
    """Class to retrieve Hit and Miss Comics."""
3918
    name = 'hitandmiss'
3919
    long_name = 'Hit and Miss Comics'
3920
    url = 'http://hitandmisscomics.tumblr.com'
3921
3922
3923
class HMBlanc(GenericTumblrV1):
3924
    """Class to retrieve HM Blanc comics."""
3925
    name = 'hmblanc'
3926
    long_name = 'HM Blanc'
3927
    url = 'http://hmblanc.tumblr.com'
3928
3929
3930
class TalesOfAbsurdityTumblr(GenericTumblrV1):
3931
    """Class to retrieve Tales Of Absurdity comics."""
3932
    # Also on http://talesofabsurdity.com
3933
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
3934
    name = 'absurdity-tumblr'
3935
    long_name = 'Tales of Absurdity (from Tumblr)'
3936
    url = 'http://talesofabsurdity.tumblr.com'
3937
    _categories = ('ABSURDITY', )
3938
3939
3940
class RobbieAndBobby(GenericTumblrV1):
3941
    """Class to retrieve Robbie And Bobby comics."""
3942
    # Also on http://robbieandbobby.com
3943
    name = 'robbie-tumblr'
3944
    long_name = 'Robbie And Bobby (from Tumblr)'
3945
    url = 'http://robbieandbobby.tumblr.com'
3946
3947
3948
class ElectricBunnyComicTumblr(GenericTumblrV1):
3949
    """Class to retrieve Electric Bunny Comics."""
3950
    # Also on http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell
3951
    name = 'bunny-tumblr'
3952
    long_name = 'Electric Bunny Comic (from Tumblr)'
3953
    url = 'http://electricbunnycomics.tumblr.com'
3954
3955
3956
class Hoomph(GenericTumblrV1):
3957
    """Class to retrieve Hoomph comics."""
3958
    name = 'hoomph'
3959
    long_name = 'Hoomph'
3960
    url = 'http://hoom.ph'
3961
3962
3963
class BFGFSTumblr(GenericTumblrV1):
3964
    """Class to retrieve BFGFS comics."""
3965
    # Also on https://tapastic.com/series/BFGFS
3966
    # Also on http://bfgfs.com
3967
    name = 'bfgfs-tumblr'
3968
    long_name = 'BFGFS (from Tumblr)'
3969
    url = 'http://bfgfs.tumblr.com'
3970
3971
3972
class DoodleForFood(GenericTumblrV1):
3973
    """Class to retrieve Doodle For Food comics."""
3974
    # Also on http://doodleforfood.com
3975
    name = 'doodle'
3976
    long_name = 'Doodle For Food'
3977
    url = 'http://doodleforfood.com'
3978
3979
3980
class CassandraCalinTumblr(GenericTumblrV1):
3981
    """Class to retrieve C. Cassandra comics."""
3982
    # Also on http://cassandracalin.com
3983
    # Also on https://tapastic.com/series/C-Cassandra-comics
3984
    name = 'cassandra-tumblr'
3985
    long_name = 'Cassandra Calin (from Tumblr)'
3986
    url = 'http://c-cassandra.tumblr.com'
3987
3988
3989
class DougWasTaken(GenericTumblrV1):
3990
    """Class to retrieve Doug Was Taken comics."""
3991
    name = 'doug'
3992
    long_name = 'Doug Was Taken'
3993
    url = 'http://dougwastaken.tumblr.com'
3994
3995
3996
class MandatoryRollerCoaster(GenericTumblrV1):
3997
    """Class to retrieve Mandatory Roller Coaster comics."""
3998
    name = 'rollercoaster'
3999
    long_name = 'Mandatory Roller Coaster'
4000
    url = 'http://mandatoryrollercoaster.com'
4001
4002
4003
class CEstPasEnRegardantSesPompes(GenericTumblrV1):
4004
    """Class to retrieve C'Est Pas En Regardant Ses Pompes (...)  comics."""
4005
    name = 'cperspqccltt'
4006
    long_name = 'C Est Pas En Regardant Ses Pompes (...)'
4007
    url = 'http://cperspqccltt.tumblr.com'
4008
4009
4010
class TheGrohlTroll(GenericTumblrV1):
4011
    """Class to retrieve The Grohl Troll comics."""
4012
    name = 'grohltroll'
4013
    long_name = 'The Grohl Troll'
4014
    url = 'http://thegrohltroll.com'
4015
4016
4017
class WebcomicName(GenericTumblrV1):
4018
    """Class to retrieve Webcomic Name comics."""
4019
    name = 'webcomicname'
4020
    long_name = 'Webcomic Name'
4021
    url = 'http://webcomicname.com'
4022
4023
4024
class BooksOfAdam(GenericTumblrV1):
4025
    """Class to retrieve Books of Adam comics."""
4026
    # Also on http://www.booksofadam.com
4027
    name = 'booksofadam'
4028
    long_name = 'Books of Adam'
4029
    url = 'http://booksofadam.tumblr.com'
4030
4031
4032
class HarkAVagrant(GenericTumblrV1):
4033
    """Class to retrieve Hark A Vagrant comics."""
4034
    # Also on http://www.harkavagrant.com
4035
    name = 'hark-tumblr'
4036
    long_name = 'Hark A Vagrant (from Tumblr)'
4037
    url = 'http://beatonna.tumblr.com'
4038
4039
4040
class OurSuperAdventureTumblr(GenericTumblrV1):
4041
    """Class to retrieve Our Super Adventure comics."""
4042
    # Also on https://tapastic.com/series/Our-Super-Adventure
4043
    # Also on http://www.oursuperadventure.com
4044
    # http://sarahgraley.com
4045
    name = 'superadventure-tumblr'
4046
    long_name = 'Our Super Adventure (from Tumblr)'
4047
    url = 'http://sarahssketchbook.tumblr.com'
4048
4049
4050
class JakeLikesOnions(GenericTumblrV1):
4051
    """Class to retrieve Jake Likes Onions comics."""
4052
    name = 'jake'
4053
    long_name = 'Jake Likes Onions'
4054
    url = 'http://jakelikesonions.com'
4055
4056
4057
class InYourFaceCake(GenericTumblrV1):
4058
    """Class to retrieve In Your Face Cake comics."""
4059
    name = 'inyourfacecake-tumblr'
4060
    long_name = 'In Your Face Cake (from Tumblr)'
4061
    url = 'http://in-your-face-cake.tumblr.com'
4062
4063
4064
class Robospunk(GenericTumblrV1):
4065
    """Class to retrieve Robospunk comics."""
4066
    name = 'robospunk'
4067
    long_name = 'Robospunk'
4068
    url = 'http://robospunk.com'
4069
4070
4071
class BananaTwinky(GenericTumblrV1):
4072
    """Class to retrieve Banana Twinky comics."""
4073
    name = 'banana'
4074
    long_name = 'Banana Twinky'
4075
    url = 'http://bananatwinky.tumblr.com'
4076
4077
4078
class YesterdaysPopcornTumblr(GenericTumblrV1):
4079
    """Class to retrieve Yesterday's Popcorn comics."""
4080
    # Also on http://www.yesterdayspopcorn.com
4081
    # Also on https://tapastic.com/series/Yesterdays-Popcorn
4082
    name = 'popcorn-tumblr'
4083
    long_name = 'Yesterday\'s Popcorn (from Tumblr)'
4084
    url = 'http://yesterdayspopcorn.tumblr.com'
4085
4086
4087
class TwistedDoodles(GenericTumblrV1):
4088
    """Class to retrieve Twisted Doodles comics."""
4089
    name = 'twisted'
4090
    long_name = 'Twisted Doodles'
4091
    url = 'http://www.twisteddoodles.com'
4092
4093
4094
class UbertoolTumblr(GenericTumblrV1):
4095
    """Class to retrieve Ubertool comics."""
4096
    # Also on http://ubertoolcomic.com
4097
    # Also on https://tapastic.com/series/ubertool
4098
    name = 'ubertool-tumblr'
4099
    long_name = 'Ubertool (from Tumblr)'
4100
    url = 'http://ubertool.tumblr.com'
4101
    _categories = ('UBERTOOL', )
4102
4103
4104
class LittleLifeLinesTumblr(GenericTumblrV1):
4105
    """Class to retrieve Little Life Lines comics."""
4106
    # Also on http://www.littlelifelines.com
4107
    name = 'life-tumblr'
4108
    long_name = 'Little Life Lines (from Tumblr)'
4109
    url = 'https://little-life-lines.tumblr.com'
4110
4111
4112
class TheyCanTalk(GenericTumblrV1):
4113
    """Class to retrieve They Can Talk comics."""
4114
    name = 'theycantalk'
4115
    long_name = 'They Can Talk'
4116
    url = 'http://theycantalk.com'
4117
4118
4119
class Will5NeverCome(GenericTumblrV1):
4120
    """Class to retrieve Will 5:00 Never Come comics."""
4121
    name = 'will5'
4122
    long_name = 'Will 5:00 Never Come ?'
4123
    url = 'http://will5nevercome.com'
4124
4125
4126
class Sephko(GenericTumblrV1):
4127
    """Class to retrieve Sephko Comics."""
4128
    # Also on http://www.sephko.com
4129
    name = 'sephko'
4130
    long_name = 'Sephko'
4131
    url = 'http://sephko.tumblr.com'
4132
4133
4134
class BlazersAtDawn(GenericTumblrV1):
4135
    """Class to retrieve Blazers At Dawn Comics."""
4136
    name = 'blazers'
4137
    long_name = 'Blazers At Dawn'
4138
    url = 'http://blazersatdawn.tumblr.com'
4139
4140
4141
class ArtByMoga(GenericEmptyComic, GenericTumblrV1):  # Deactivated because it downloads too many things
4142
    """Class to retrieve Art By Moga Comics."""
4143
    name = 'moga'
4144
    long_name = 'Art By Moga'
4145
    url = 'http://artbymoga.tumblr.com'
4146
4147
4148
class VerbalVomitTumblr(GenericTumblrV1):
4149
    """Class to retrieve Verbal Vomit comics."""
4150
    # Also on http://www.verbal-vomit.com
4151
    name = 'vomit-tumblr'
4152
    long_name = 'Verbal Vomit (from Tumblr)'
4153
    url = 'http://verbalvomits.tumblr.com'
4154
4155
4156
class LibraryComic(GenericTumblrV1):
4157
    """Class to retrieve LibraryComic."""
4158
    # Also on http://librarycomic.com
4159
    name = 'library-tumblr'
4160
    long_name = 'LibraryComic (from Tumblr)'
4161
    url = 'http://librarycomic.tumblr.com'
4162
4163
4164
class HorovitzComics(GenericListableComic):
4165
    """Generic class to handle the logic common to the different comics from Horovitz."""
4166
    url = 'http://www.horovitzcomics.com'
4167
    _categories = ('HOROVITZ', )
4168
    img_re = re.compile('.*comics/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
4169
    link_re = NotImplemented
4170
    get_url_from_archive_element = join_cls_url_to_href
4171
4172
    @classmethod
4173
    def get_comic_info(cls, soup, link):
4174
        """Get information about a particular comics."""
4175
        href = link['href']
4176
        num = int(cls.link_re.match(href).groups()[0])
4177
        title = link.string
4178
        imgs = soup.find_all('img', id='comic')
4179
        assert len(imgs) == 1
4180
        year, month, day = [int(s)
4181
                            for s in cls.img_re.match(imgs[0]['src']).groups()]
4182
        return {
4183
            'title': title,
4184 View Code Duplication
            'day': day,
4185
            'month': month,
4186
            'year': year,
4187
            'img': [i['src'] for i in imgs],
4188
            'num': num,
4189
        }
4190
4191
    @classmethod
4192
    def get_archive_elements(cls):
4193
        archive_url = 'http://www.horovitzcomics.com/comics/archive/'
4194
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.link_re))
4195
4196
4197
class HorovitzNew(HorovitzComics):
4198
    """Class to retrieve Horovitz new comics."""
4199
    name = 'horovitznew'
4200
    long_name = 'Horovitz New'
4201
    link_re = re.compile('^/comics/new/([0-9]+)$')
4202
4203
4204
class HorovitzClassic(HorovitzComics):
4205
    """Class to retrieve Horovitz classic comics."""
4206
    name = 'horovitzclassic'
4207
    long_name = 'Horovitz Classic'
4208
    link_re = re.compile('^/comics/classic/([0-9]+)$')
4209
4210
4211
class GenericGoComic(GenericNavigableComic):
4212
    """Generic class to handle the logic common to comics from gocomics.com."""
4213
    _categories = ('GOCOMIC', )
4214
4215
    @classmethod
4216
    def get_first_comic_link(cls):
4217
        """Get link to first comics."""
4218
        return get_soup_at_url(cls.url).find('a', class_='fa btn btn-outline-default btn-circle fa-backward sm ')
4219
4220
    @classmethod
4221
    def get_navi_link(cls, last_soup, next_):
4222
        """Get link to next or previous comic."""
4223
        PREV = 'fa btn btn-outline-default btn-circle fa-caret-left sm '
4224
        NEXT = 'fa btn btn-outline-default btn-circle fa-caret-right sm '
4225
        return last_soup.find('a', class_=NEXT if next_ else PREV)
4226
4227
    @classmethod
4228
    def get_url_from_link(cls, link):
4229
        gocomics = 'http://www.gocomics.com'
4230
        return urljoin_wrapper(gocomics, link['href'])
4231
4232
    @classmethod
4233
    def get_comic_info(cls, soup, link):
4234
        """Get information about a particular comics."""
4235
        date_str = soup.find('meta', property='article:published_time')['content']
4236
        day = string_to_date(date_str, "%Y-%m-%d")
4237
        imgs = soup.find('picture', class_='img-fluid item-comic-image').find_all('img')
4238
        author = soup.find('meta', property='article:author')['content']
4239
        tags = soup.find('meta', property='article:tag')['content']
4240
        return {
4241
            'day': day.day,
4242
            'month': day.month,
4243
            'year': day.year,
4244
            'img': [i['src'] for i in imgs],
4245
            'author': author,
4246
            'tags': tags,
4247
        }
4248
4249
4250
class PearlsBeforeSwine(GenericGoComic):
4251
    """Class to retrieve Pearls Before Swine comics."""
4252
    name = 'pearls'
4253
    long_name = 'Pearls Before Swine'
4254
    url = 'http://www.gocomics.com/pearlsbeforeswine'
4255
4256
4257
class Peanuts(GenericGoComic):
4258
    """Class to retrieve Peanuts comics."""
4259
    name = 'peanuts'
4260
    long_name = 'Peanuts'
4261
    url = 'http://www.gocomics.com/peanuts'
4262
4263
4264
class MattWuerker(GenericGoComic):
4265
    """Class to retrieve Matt Wuerker comics."""
4266
    name = 'wuerker'
4267
    long_name = 'Matt Wuerker'
4268
    url = 'http://www.gocomics.com/mattwuerker'
4269
4270
4271
class TomToles(GenericGoComic):
4272
    """Class to retrieve Tom Toles comics."""
4273
    name = 'toles'
4274
    long_name = 'Tom Toles'
4275
    url = 'http://www.gocomics.com/tomtoles'
4276
4277
4278
class BreakOfDay(GenericGoComic):
4279
    """Class to retrieve Break Of Day comics."""
4280
    name = 'breakofday'
4281
    long_name = 'Break Of Day'
4282
    url = 'http://www.gocomics.com/break-of-day'
4283
4284
4285
class Brevity(GenericGoComic):
4286
    """Class to retrieve Brevity comics."""
4287
    name = 'brevity'
4288
    long_name = 'Brevity'
4289
    url = 'http://www.gocomics.com/brevitypanel'
4290
4291
4292
class MichaelRamirez(GenericGoComic):
4293
    """Class to retrieve Michael Ramirez comics."""
4294
    name = 'ramirez'
4295
    long_name = 'Michael Ramirez'
4296
    url = 'http://www.gocomics.com/michaelramirez'
4297
4298
4299
class MikeLuckovich(GenericGoComic):
4300
    """Class to retrieve Mike Luckovich comics."""
4301
    name = 'luckovich'
4302
    long_name = 'Mike Luckovich'
4303
    url = 'http://www.gocomics.com/mikeluckovich'
4304
4305
4306
class JimBenton(GenericGoComic):
4307
    """Class to retrieve Jim Benton comics."""
4308
    # Also on http://jimbenton.tumblr.com
4309
    name = 'benton'
4310
    long_name = 'Jim Benton'
4311
    url = 'http://www.gocomics.com/jim-benton-cartoons'
4312
4313
4314
class TheArgyleSweater(GenericGoComic):
4315
    """Class to retrieve the Argyle Sweater comics."""
4316
    name = 'argyle'
4317
    long_name = 'Argyle Sweater'
4318
    url = 'http://www.gocomics.com/theargylesweater'
4319
4320
4321
class SunnyStreet(GenericGoComic):
4322
    """Class to retrieve Sunny Street comics."""
4323
    # Also on http://www.sunnystreetcomics.com
4324
    name = 'sunny'
4325
    long_name = 'Sunny Street'
4326
    url = 'http://www.gocomics.com/sunny-street'
4327
4328
4329
class OffTheMark(GenericGoComic):
4330
    """Class to retrieve Off The Mark comics."""
4331
    # Also on https://www.offthemark.com
4332
    name = 'offthemark'
4333
    long_name = 'Off The Mark'
4334
    url = 'http://www.gocomics.com/offthemark'
4335
4336
4337
class WuMo(GenericGoComic):
4338
    """Class to retrieve WuMo comics."""
4339
    # Also on http://wumo.com
4340
    name = 'wumo'
4341
    long_name = 'WuMo'
4342
    url = 'http://www.gocomics.com/wumo'
4343
4344
4345
class LunarBaboon(GenericGoComic):
4346
    """Class to retrieve Lunar Baboon comics."""
4347
    # Also on http://www.lunarbaboon.com
4348
    # Also on https://tapastic.com/series/Lunarbaboon
4349
    name = 'lunarbaboon'
4350
    long_name = 'Lunar Baboon'
4351
    url = 'http://www.gocomics.com/lunarbaboon'
4352
4353
4354
class SandersenGocomic(GenericGoComic):
4355
    """Class to retrieve Sarah Andersen comics."""
4356
    # Also on http://sarahcandersen.com
4357
    # Also on http://tapastic.com/series/Doodle-Time
4358
    name = 'sandersen-goc'
4359
    long_name = 'Sarah Andersen (from GoComics)'
4360
    url = 'http://www.gocomics.com/sarahs-scribbles'
4361
4362
4363
class SaturdayMorningBreakfastCerealGoComic(GenericGoComic):
4364
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
4365
    # Also on http://smbc-comics.tumblr.com
4366
    # Also on http://www.smbc-comics.com
4367
    name = 'smbc-goc'
4368
    long_name = 'Saturday Morning Breakfast Cereal (from GoComics)'
4369
    url = 'http://www.gocomics.com/saturday-morning-breakfast-cereal'
4370
    _categories = ('SMBC', )
4371
4372
4373
class CalvinAndHobbesGoComic(GenericGoComic):
4374
    """Class to retrieve Calvin and Hobbes comics."""
4375
    # From gocomics, not http://marcel-oehler.marcellosendos.ch/comics/ch/
4376
    name = 'calvin-goc'
4377
    long_name = 'Calvin and Hobbes (from GoComics)'
4378
    url = 'http://www.gocomics.com/calvinandhobbes'
4379
4380
4381
class RallGoComic(GenericGoComic):
4382
    """Class to retrieve Ted Rall comics."""
4383
    # Also on http://rall.com/comic
4384
    name = 'rall-goc'
4385
    long_name = "Ted Rall (from GoComics)"
4386
    url = "http://www.gocomics.com/ted-rall"
4387
    _categories = ('RALL', )
4388
4389
4390
class TheAwkwardYetiGoComic(GenericGoComic):
4391
    """Class to retrieve The Awkward Yeti comics."""
4392
    # Also on http://larstheyeti.tumblr.com
4393
    # Also on http://theawkwardyeti.com
4394
    # Also on https://tapastic.com/series/TheAwkwardYeti
4395
    name = 'yeti-goc'
4396
    long_name = 'The Awkward Yeti (from GoComics)'
4397
    url = 'http://www.gocomics.com/the-awkward-yeti'
4398
    _categories = ('YETI', )
4399
4400
4401
class BerkeleyMewsGoComics(GenericGoComic):
4402
    """Class to retrieve Berkeley Mews comics."""
4403
    # Also on http://mews.tumblr.com
4404
    # Also on http://www.berkeleymews.com
4405
    name = 'berkeley-goc'
4406
    long_name = 'Berkeley Mews (from GoComics)'
4407
    url = 'http://www.gocomics.com/berkeley-mews'
4408
    _categories = ('BERKELEY', )
4409
4410
4411
class SheldonGoComics(GenericGoComic):
4412
    """Class to retrieve Sheldon comics."""
4413
    # Also on http://www.sheldoncomics.com
4414
    name = 'sheldon-goc'
4415
    long_name = 'Sheldon Comics (from GoComics)'
4416
    url = 'http://www.gocomics.com/sheldon'
4417
4418
4419
class FowlLanguageGoComics(GenericGoComic):
4420
    """Class to retrieve Fowl Language comics."""
4421
    # Also on http://www.fowllanguagecomics.com
4422
    # Also on http://tapastic.com/series/Fowl-Language-Comics
4423
    # Also on http://fowllanguagecomics.tumblr.com
4424
    name = 'fowllanguage-goc'
4425
    long_name = 'Fowl Language Comics (from GoComics)'
4426
    url = 'http://www.gocomics.com/fowl-language'
4427
    _categories = ('FOWLLANGUAGE', )
4428
4429
4430
class NickAnderson(GenericGoComic):
4431
    """Class to retrieve Nick Anderson comics."""
4432
    name = 'nickanderson'
4433
    long_name = 'Nick Anderson'
4434
    url = 'http://www.gocomics.com/nickanderson'
4435
4436
4437
class GarfieldGoComics(GenericGoComic):
4438
    """Class to retrieve Garfield comics."""
4439
    # Also on http://garfield.com
4440
    name = 'garfield-goc'
4441
    long_name = 'Garfield (from GoComics)'
4442
    url = 'http://www.gocomics.com/garfield'
4443
    _categories = ('GARFIELD', )
4444
4445
4446
class DorrisMcGoComics(GenericGoComic):
4447
    """Class to retrieve Dorris Mc Comics"""
4448
    # Also on http://dorrismccomics.com
4449
    name = 'dorrismc-goc'
4450
    long_name = 'Dorris Mc (from GoComics)'
4451
    url = 'http://www.gocomics.com/dorris-mccomics'
4452
4453
4454
class FoxTrot(GenericGoComic):
4455
    """Class to retrieve FoxTrot comics."""
4456
    name = 'foxtrot'
4457
    long_name = 'FoxTrot'
4458
    url = 'http://www.gocomics.com/foxtrot'
4459
4460
4461
class FoxTrotClassics(GenericGoComic):
4462
    """Class to retrieve FoxTrot Classics comics."""
4463
    name = 'foxtrot-classics'
4464
    long_name = 'FoxTrot Classics'
4465
    url = 'http://www.gocomics.com/foxtrotclassics'
4466
4467
4468
class MisterAndMeGoComics(GenericGoComic):
4469
    """Class to retrieve Mister & Me Comics."""
4470
    # Also on http://www.mister-and-me.com
4471
    # Also on https://tapastic.com/series/Mister-and-Me
4472
    name = 'mister-goc'
4473
    long_name = 'Mister & Me (from GoComics)'
4474
    url = 'http://www.gocomics.com/mister-and-me'
4475
4476
4477
class NonSequitur(GenericGoComic):
4478
    """Class to retrieve Non Sequitur (Wiley Miller) comics."""
4479
    name = 'nonsequitur'
4480
    long_name = 'Non Sequitur'
4481
    url = 'http://www.gocomics.com/nonsequitur'
4482
4483
4484
class GenericTapasticComic(GenericListableComic):
4485
    """Generic class to handle the logic common to comics from tapastic.com."""
4486
    _categories = ('TAPASTIC', )
4487
4488
    @classmethod
4489
    def get_comic_info(cls, soup, archive_elt):
4490
        """Get information about a particular comics."""
4491
        timestamp = int(archive_elt['publishDate']) / 1000.0
4492
        day = datetime.datetime.fromtimestamp(timestamp).date()
4493
        imgs = soup.find_all('img', class_='art-image')
4494
        if not imgs:
4495
            print("Comic %s is being uploaded, retry later" % cls.get_url_from_archive_element(archive_elt))
4496
            return None
4497
        assert len(imgs) > 0
4498
        return {
4499
            'day': day.day,
4500
            'year': day.year,
4501
            'month': day.month,
4502
            'img': [i['src'] for i in imgs],
4503
            'title': archive_elt['title'],
4504
        }
4505
4506
    @classmethod
4507
    def get_url_from_archive_element(cls, archive_elt):
4508
        return 'http://tapastic.com/episode/' + str(archive_elt['id'])
4509
4510
    @classmethod
4511
    def get_archive_elements(cls):
4512
        pref, suff = 'episodeList : ', ','
4513
        # Information is stored in the javascript part
4514
        # I don't know the clean way to get it so this is the ugly way.
4515
        string = [s[len(pref):-len(suff)] for s in (s.decode('utf-8').strip() for s in urlopen_wrapper(cls.url).readlines()) if s.startswith(pref) and s.endswith(suff)][0]
4516
        return json.loads(string)
4517
4518
4519
class VegetablesForDessert(GenericTapasticComic):
4520
    """Class to retrieve Vegetables For Dessert comics."""
4521
    # Also on http://vegetablesfordessert.tumblr.com
4522
    name = 'vegetables'
4523
    long_name = 'Vegetables For Dessert'
4524
    url = 'http://tapastic.com/series/vegetablesfordessert'
4525
4526
4527
class FowlLanguageTapa(GenericTapasticComic):
4528
    """Class to retrieve Fowl Language comics."""
4529
    # Also on http://www.fowllanguagecomics.com
4530
    # Also on http://fowllanguagecomics.tumblr.com
4531
    # Also on http://www.gocomics.com/fowl-language
4532
    name = 'fowllanguage-tapa'
4533
    long_name = 'Fowl Language Comics (from Tapastic)'
4534
    url = 'http://tapastic.com/series/Fowl-Language-Comics'
4535
    _categories = ('FOWLLANGUAGE', )
4536
4537
4538
class OscillatingProfundities(GenericTapasticComic):
4539
    """Class to retrieve Oscillating Profundities comics."""
4540
    name = 'oscillating'
4541
    long_name = 'Oscillating Profundities'
4542
    url = 'http://tapastic.com/series/oscillatingprofundities'
4543
4544
4545
class ZnoflatsComics(GenericTapasticComic):
4546
    """Class to retrieve Znoflats comics."""
4547
    name = 'znoflats'
4548
    long_name = 'Znoflats Comics'
4549
    url = 'http://tapastic.com/series/Znoflats-Comics'
4550
4551
4552
class SandersenTapastic(GenericTapasticComic):
4553
    """Class to retrieve Sarah Andersen comics."""
4554
    # Also on http://sarahcandersen.com
4555
    # Also on http://www.gocomics.com/sarahs-scribbles
4556
    name = 'sandersen-tapa'
4557
    long_name = 'Sarah Andersen (from Tapastic)'
4558
    url = 'http://tapastic.com/series/Doodle-Time'
4559
4560
4561
class TubeyToonsTapastic(GenericTapasticComic):
4562
    """Class to retrieve TubeyToons comics."""
4563
    # Also on http://tubeytoons.com
4564
    # Also on http://tubeytoons.tumblr.com
4565
    name = 'tubeytoons-tapa'
4566
    long_name = 'Tubey Toons (from Tapastic)'
4567
    url = 'http://tapastic.com/series/Tubey-Toons'
4568
    _categories = ('TUNEYTOONS', )
4569
4570
4571
class AnythingComicTapastic(GenericTapasticComic):
4572
    """Class to retrieve Anything Comics."""
4573
    # Also on http://www.anythingcomic.com
4574
    name = 'anythingcomic-tapa'
4575
    long_name = 'Anything Comic (from Tapastic)'
4576
    url = 'http://tapastic.com/series/anything'
4577
4578
4579
class UnearthedComicsTapastic(GenericTapasticComic):
4580
    """Class to retrieve Unearthed comics."""
4581
    # Also on http://unearthedcomics.com
4582
    # Also on http://unearthedcomics.tumblr.com
4583
    name = 'unearthed-tapa'
4584
    long_name = 'Unearthed Comics (from Tapastic)'
4585
    url = 'http://tapastic.com/series/UnearthedComics'
4586
    _categories = ('UNEARTHED', )
4587
4588
4589
class EverythingsStupidTapastic(GenericTapasticComic):
4590
    """Class to retrieve Everything's stupid Comics."""
4591
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
4592
    # Also on http://everythingsstupid.net
4593
    name = 'stupid-tapa'
4594
    long_name = "Everything's Stupid (from Tapastic)"
4595
    url = 'http://tapastic.com/series/EverythingsStupid'
4596
4597
4598
class JustSayEhTapastic(GenericTapasticComic):
4599
    """Class to retrieve Just Say Eh comics."""
4600
    # Also on http://www.justsayeh.com
4601
    name = 'justsayeh-tapa'
4602
    long_name = 'Just Say Eh (from Tapastic)'
4603
    url = 'http://tapastic.com/series/Just-Say-Eh'
4604
4605
4606
class ThorsThundershackTapastic(GenericTapasticComic):
4607
    """Class to retrieve Thor's Thundershack comics."""
4608
    # Also on http://www.thorsthundershack.com
4609
    name = 'thor-tapa'
4610
    long_name = 'Thor\'s Thundershack (from Tapastic)'
4611
    url = 'http://tapastic.com/series/Thors-Thundershac'
4612
    _categories = ('THOR', )
4613
4614
4615
class OwlTurdTapastic(GenericTapasticComic):
4616
    """Class to retrieve Owl Turd comics."""
4617
    # Also on http://owlturd.com
4618
    name = 'owlturd-tapa'
4619
    long_name = 'Owl Turd (from Tapastic)'
4620
    url = 'http://tapastic.com/series/Owl-Turd-Comix'
4621
    _categories = ('OWLTURD', )
4622
4623
4624
class GoneIntoRaptureTapastic(GenericTapasticComic):
4625
    """Class to retrieve Gone Into Rapture comics."""
4626
    # Also on http://goneintorapture.tumblr.com
4627
    # Also on http://www.goneintorapture.com
4628
    name = 'rapture-tapa'
4629
    long_name = 'Gone Into Rapture (from Tapastic)'
4630
    url = 'http://tapastic.com/series/Goneintorapture'
4631
4632
4633
class HeckIfIKnowComicsTapa(GenericTapasticComic):
4634
    """Class to retrieve Heck If I Know Comics."""
4635
    # Also on http://heckifiknowcomics.com
4636
    name = 'heck-tapa'
4637
    long_name = 'Heck if I Know comics (from Tapastic)'
4638
    url = 'http://tapastic.com/series/Regular'
4639
4640
4641
class CheerUpEmoKidTapa(GenericTapasticComic):
4642
    """Class to retrieve CheerUpEmoKid comics."""
4643
    # Also on http://www.cheerupemokid.com
4644
    # Also on http://enzocomics.tumblr.com
4645
    name = 'cuek-tapa'
4646
    long_name = 'Cheer Up Emo Kid (from Tapastic)'
4647
    url = 'http://tapastic.com/series/CUEK'
4648
4649
4650
class BigFootJusticeTapa(GenericTapasticComic):
4651
    """Class to retrieve Big Foot Justice comics."""
4652
    # Also on http://bigfootjustice.com
4653
    name = 'bigfoot-tapa'
4654
    long_name = 'Big Foot Justice (from Tapastic)'
4655
    url = 'http://tapastic.com/series/bigfoot-justice'
4656
4657
4658
class UpAndOutTapa(GenericTapasticComic):
4659
    """Class to retrieve Up & Out comics."""
4660
    # Also on http://upandoutcomic.tumblr.com
4661
    name = 'upandout-tapa'
4662
    long_name = 'Up And Out (from Tapastic)'
4663
    url = 'http://tapastic.com/series/UP-and-OUT'
4664
4665
4666
class ToonHoleTapa(GenericTapasticComic):
4667
    """Class to retrieve Toon Holes comics."""
4668
    # Also on http://www.toonhole.com
4669
    name = 'toonhole-tapa'
4670
    long_name = 'Toon Hole (from Tapastic)'
4671
    url = 'http://tapastic.com/series/TOONHOLE'
4672
4673
4674
class AngryAtNothingTapa(GenericTapasticComic):
4675
    """Class to retrieve Angry at Nothing comics."""
4676
    # Also on http://www.angryatnothing.net
4677
    name = 'angry-tapa'
4678
    long_name = 'Angry At Nothing (from Tapastic)'
4679
    url = 'http://tapastic.com/series/Comics-yeah-definitely-comics-'
4680
4681
4682
class LeleozTapa(GenericTapasticComic):
4683
    """Class to retrieve Leleoz comics."""
4684
    # Also on http://leleozcomics.tumblr.com
4685
    name = 'leleoz-tapa'
4686
    long_name = 'Leleoz (from Tapastic)'
4687
    url = 'https://tapastic.com/series/Leleoz'
4688
4689
4690
class TheAwkwardYetiTapa(GenericTapasticComic):
4691
    """Class to retrieve The Awkward Yeti comics."""
4692
    # Also on http://www.gocomics.com/the-awkward-yeti
4693
    # Also on http://theawkwardyeti.com
4694
    # Also on http://larstheyeti.tumblr.com
4695
    name = 'yeti-tapa'
4696
    long_name = 'The Awkward Yeti (from Tapastic)'
4697
    url = 'https://tapastic.com/series/TheAwkwardYeti'
4698
    _categories = ('YETI', )
4699
4700
4701
class AsPerUsualTapa(GenericTapasticComic):
4702
    """Class to retrieve As Per Usual comics."""
4703
    # Also on http://as-per-usual.tumblr.com
4704
    name = 'usual-tapa'
4705
    long_name = 'As Per Usual (from Tapastic)'
4706
    url = 'https://tapastic.com/series/AsPerUsual'
4707
    categories = ('DAMILEE', )
4708
4709
4710
class HotComicsForCoolPeopleTapa(GenericTapasticComic):
4711
    """Class to retrieve Hot Comics For Cool People."""
4712
    # Also on http://hotcomicsforcoolpeople.tumblr.com
4713
    # Also on http://hotcomics.biz (links to tumblr)
4714
    # Also on http://hcfcp.com (links to tumblr)
4715
    name = 'hotcomics-tapa'
4716
    long_name = 'Hot Comics For Cool People (from Tapastic)'
4717
    url = 'https://tapastic.com/series/Hot-Comics-For-Cool-People'
4718
    categories = ('DAMILEE', )
4719
4720
4721
class OneOneOneOneComicTapa(GenericTapasticComic):
4722
    """Class to retrieve 1111 Comics."""
4723
    # Also on http://www.1111comics.me
4724
    # Also on http://comics1111.tumblr.com
4725
    name = '1111-tapa'
4726
    long_name = '1111 Comics (from Tapastic)'
4727
    url = 'https://tapastic.com/series/1111-Comics'
4728
    _categories = ('ONEONEONEONE', )
4729
4730
4731
class TumbleDryTapa(GenericTapasticComic):
4732
    """Class to retrieve Tumble Dry comics."""
4733
    # Also on http://tumbledrycomics.com
4734
    name = 'tumbledry-tapa'
4735
    long_name = 'Tumblr Dry (from Tapastic)'
4736
    url = 'https://tapastic.com/series/TumbleDryComics'
4737
4738
4739
class DeadlyPanelTapa(GenericTapasticComic):
4740
    """Class to retrieve Deadly Panel comics."""
4741
    # Also on http://www.deadlypanel.com
4742
    name = 'deadly-tapa'
4743
    long_name = 'Deadly Panel (from Tapastic)'
4744
    url = 'https://tapastic.com/series/deadlypanel'
4745
4746
4747
class ChrisHallbeckMaxiTapa(GenericTapasticComic):
4748
    """Class to retrieve Chris Hallbeck comics."""
4749
    # Also on http://chrishallbeck.tumblr.com
4750
    # Also on http://maximumble.com
4751
    name = 'hallbeckmaxi-tapa'
4752
    long_name = 'Chris Hallback - Maximumble (from Tapastic)'
4753
    url = 'https://tapastic.com/series/Maximumble'
4754
    _categories = ('HALLBACK', )
4755
4756
4757
class ChrisHallbeckMiniTapa(GenericTapasticComic):
4758
    """Class to retrieve Chris Hallbeck comics."""
4759
    # Also on http://chrishallbeck.tumblr.com
4760
    # Also on http://minimumble.com
4761
    name = 'hallbeckmini-tapa'
4762
    long_name = 'Chris Hallback - Minimumble (from Tapastic)'
4763
    url = 'https://tapastic.com/series/Minimumble'
4764
    _categories = ('HALLBACK', )
4765
4766
4767
class ChrisHallbeckBiffTapa(GenericTapasticComic):
4768
    """Class to retrieve Chris Hallbeck comics."""
4769
    # Also on http://chrishallbeck.tumblr.com
4770
    # Also on http://thebookofbiff.com
4771
    name = 'hallbeckbiff-tapa'
4772
    long_name = 'Chris Hallback - The Book of Biff (from Tapastic)'
4773
    url = 'https://tapastic.com/series/Biff'
4774
    _categories = ('HALLBACK', )
4775
4776
4777
class RandoWisTapa(GenericTapasticComic):
4778
    """Class to retrieve RandoWis comics."""
4779
    # Also on https://randowis.com
4780
    name = 'randowis-tapa'
4781
    long_name = 'RandoWis (from Tapastic)'
4782
    url = 'https://tapastic.com/series/RandoWis'
4783
4784
4785
class PigeonGazetteTapa(GenericTapasticComic):
4786
    """Class to retrieve The Pigeon Gazette comics."""
4787
    # Also on http://thepigeongazette.tumblr.com
4788
    name = 'pigeon-tapa'
4789
    long_name = 'The Pigeon Gazette (from Tapastic)'
4790
    url = 'https://tapastic.com/series/The-Pigeon-Gazette'
4791
4792
4793
class TheOdd1sOutTapa(GenericTapasticComic):
4794
    """Class to retrieve The Odd 1s Out comics."""
4795
    # Also on http://theodd1sout.com
4796
    # Also on http://theodd1sout.tumblr.com
4797
    name = 'theodd-tapa'
4798
    long_name = 'The Odd 1s Out (from Tapastic)'
4799
    url = 'https://tapastic.com/series/Theodd1sout'
4800
4801
4802
class TheWorldIsFlatTapa(GenericTapasticComic):
4803
    """Class to retrieve The World Is Flat Comics."""
4804
    # Also on http://theworldisflatcomics.tumblr.com
4805
    name = 'flatworld-tapa'
4806
    long_name = 'The World Is Flat (from Tapastic)'
4807
    url = 'https://tapastic.com/series/The-World-is-Flat'
4808
4809
4810
class MisterAndMeTapa(GenericTapasticComic):
4811
    """Class to retrieve Mister & Me Comics."""
4812
    # Also on http://www.mister-and-me.com
4813
    # Also on http://www.gocomics.com/mister-and-me
4814
    name = 'mister-tapa'
4815
    long_name = 'Mister & Me (from Tapastic)'
4816
    url = 'https://tapastic.com/series/Mister-and-Me'
4817
4818
4819
class TalesOfAbsurdityTapa(GenericTapasticComic):
4820
    """Class to retrieve Tales Of Absurdity comics."""
4821
    # Also on http://talesofabsurdity.com
4822
    # Also on http://talesofabsurdity.tumblr.com
4823
    name = 'absurdity-tapa'
4824
    long_name = 'Tales of Absurdity (from Tapastic)'
4825
    url = 'http://tapastic.com/series/Tales-Of-Absurdity'
4826
    _categories = ('ABSURDITY', )
4827
4828
4829
class BFGFSTapa(GenericTapasticComic):
4830
    """Class to retrieve BFGFS comics."""
4831
    # Also on http://bfgfs.com
4832
    # Also on http://bfgfs.tumblr.com
4833
    name = 'bfgfs-tapa'
4834
    long_name = 'BFGFS (from Tapastic)'
4835
    url = 'https://tapastic.com/series/BFGFS'
4836
4837
4838
class DoodleForFoodTapa(GenericTapasticComic):
4839
    """Class to retrieve Doodle For Food comics."""
4840
    # Also on http://doodleforfood.com
4841
    name = 'doodle-tapa'
4842
    long_name = 'Doodle For Food (from Tapastic)'
4843
    url = 'https://tapastic.com/series/Doodle-for-Food'
4844
4845
4846
class MrLovensteinTapa(GenericTapasticComic):
4847
    """Class to retrieve Mr Lovenstein comics."""
4848
    # Also on  https://tapastic.com/series/MrLovenstein
4849
    name = 'mrlovenstein-tapa'
4850
    long_name = 'Mr. Lovenstein (from Tapastic)'
4851
    url = 'https://tapastic.com/series/MrLovenstein'
4852
4853
4854
class CassandraCalinTapa(GenericTapasticComic):
4855
    """Class to retrieve C. Cassandra comics."""
4856
    # Also on http://cassandracalin.com
4857
    # Also on http://c-cassandra.tumblr.com
4858
    name = 'cassandra-tapa'
4859
    long_name = 'Cassandra Calin (from Tapastic)'
4860
    url = 'https://tapastic.com/series/C-Cassandra-comics'
4861
4862
4863
class WafflesAndPancakes(GenericTapasticComic):
4864
    """Class to retrieve Waffles And Pancakes comics."""
4865
    # Also on http://wandpcomic.com
4866
    name = 'waffles'
4867
    long_name = 'Waffles And Pancakes'
4868
    url = 'https://tapastic.com/series/Waffles-and-Pancakes'
4869
4870
4871
class YesterdaysPopcornTapastic(GenericTapasticComic):
4872
    """Class to retrieve Yesterday's Popcorn comics."""
4873
    # Also on http://www.yesterdayspopcorn.com
4874
    # Also on http://yesterdayspopcorn.tumblr.com
4875
    name = 'popcorn-tapa'
4876
    long_name = 'Yesterday\'s Popcorn (from Tapastic)'
4877
    url = 'https://tapastic.com/series/Yesterdays-Popcorn'
4878
4879
4880
class OurSuperAdventureTapastic(GenericTapasticComic):
4881
    """Class to retrieve Our Super Adventure comics."""
4882
    # Also on http://www.oursuperadventure.com
4883
    # http://sarahssketchbook.tumblr.com
4884
    # http://sarahgraley.com
4885
    name = 'superadventure-tapastic'
4886
    long_name = 'Our Super Adventure (from Tapastic)'
4887
    url = 'https://tapastic.com/series/Our-Super-Adventure'
4888
4889
4890
class NamelessPCs(GenericTapasticComic):
4891
    """Class to retrieve Nameless PCs comics."""
4892
    # Also on http://namelesspcs.com
4893
    name = 'namelesspcs-tapa'
4894
    long_name = 'NamelessPCs (from Tapastic)'
4895
    url = 'https://tapastic.com/series/NamelessPC'
4896
4897
4898
class UbertoolTapa(GenericTapasticComic):
4899
    """Class to retrieve Ubertool comics."""
4900
    # Also on http://ubertoolcomic.com
4901
    # Also on http://ubertool.tumblr.com
4902
    name = 'ubertool-tapa'
4903
    long_name = 'Ubertool (from Tapastic)'
4904
    url = 'https://tapastic.com/series/ubertool'
4905
    _categories = ('UBERTOOL', )
4906
4907
4908
class SmallBlueYonderTapa(GenericTapasticComic):
4909
    """Class to retrieve Small Blue Yonder comics."""
4910
    # Also on http://www.smallblueyonder.com
4911
    name = 'smallblue-tapa'
4912
    long_name = 'Small Blue Yonder (from Tapastic)'
4913
    url = 'https://tapastic.com/series/Small-Blue-Yonder'
4914
4915
4916
def get_subclasses(klass):
4917
    """Gets the list of direct/indirect subclasses of a class"""
4918
    subclasses = klass.__subclasses__()
4919
    for derived in list(subclasses):
4920
        subclasses.extend(get_subclasses(derived))
4921
    return subclasses
4922
4923
4924
def remove_st_nd_rd_th_from_date(string):
4925
    """Function to transform 1st/2nd/3rd/4th in a parsable date format."""
4926
    # Hackish way to convert string with numeral "1st"/"2nd"/etc to date
4927
    return (string.replace('st', '')
4928
            .replace('nd', '')
4929
            .replace('rd', '')
4930
            .replace('th', '')
4931
            .replace('Augu', 'August'))
4932
4933
4934
def string_to_date(string, date_format, local=DEFAULT_LOCAL):
4935
    """Function to convert string to date object.
4936
    Wrapper around datetime.datetime.strptime."""
4937
    # format described in https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior
4938
    prev_locale = locale.setlocale(locale.LC_ALL)
4939
    if local != prev_locale:
4940
        locale.setlocale(locale.LC_ALL, local)
4941
    ret = datetime.datetime.strptime(string, date_format).date()
4942
    if local != prev_locale:
4943
        locale.setlocale(locale.LC_ALL, prev_locale)
4944
    return ret
4945
4946
4947
COMICS = set(get_subclasses(GenericComic))
4948
VALID_COMICS = [c for c in COMICS if c.name is not None]
4949
COMIC_NAMES = {c.name: c for c in VALID_COMICS}
4950
assert len(VALID_COMICS) == len(COMIC_NAMES)
4951
CLASS_NAMES = {c.__name__ for c in VALID_COMICS}
4952
assert len(VALID_COMICS) == len(CLASS_NAMES)
4953