Completed
Push — master ( c3e188...e0aad2 )
by De
01:05
created

comics.py (5 issues)

Code
1
#! /usr/bin/python3
2
# vim: set expandtab tabstop=4 shiftwidth=4 :
3
"""Module to retrieve webcomics"""
4
5
from comic_abstract import GenericComic, get_date_for_comic
6
import re
7
from datetime import date, timedelta
8
import datetime
9
from urlfunctions import get_soup_at_url, urljoin_wrapper,\
10
    convert_iri_to_plain_ascii_uri, load_json_at_url, urlopen_wrapper
11
import json
12
import locale
13
import urllib
14
15
DEFAULT_LOCAL = 'en_GB.UTF-8'
16
17
18
class Xkcd(GenericComic):
19
    """Class to retrieve Xkcd comics."""
20
    name = 'xkcd'
21
    long_name = 'xkcd'
22
    url = 'http://xkcd.com'
23
24
    @classmethod
25
    def get_next_comic(cls, last_comic):
26
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
27
        first_num = last_comic['num'] if last_comic else 0
28
        last_num = load_json_at_url(
29
            urljoin_wrapper(cls.url, 'info.0.json'))['num']
30
31
        for num in range(first_num + 1, last_num + 1):
32
            if num != 404:
33
                json_url = urljoin_wrapper(cls.url, '%d/info.0.json' % num)
34
                comic = load_json_at_url(json_url)
35
                comic['img'] = [comic['img']]
36
                comic['prefix'] = '%d-' % num
37
                comic['json_url'] = json_url
38
                comic['url'] = urljoin_wrapper(cls.url, str(num))
39
                comic['day'] = int(comic['day'])
40
                comic['month'] = int(comic['month'])
41
                comic['year'] = int(comic['year'])
42
                assert comic['num'] == num
43
                yield comic
44
45
46
# Helper functions corresponding to get_url_from_link/get_url_from_archive_element
47
48
49
@classmethod
50
def get_href(cls, link):
51
    """Implementation of get_url_from_link/get_url_from_archive_element."""
52
    return link['href']
53
54
55
@classmethod
56
def join_cls_url_to_href(cls, link):
57
    """Implementation of get_url_from_link/get_url_from_archive_element."""
58
    return urljoin_wrapper(cls.url, link['href'])
59
60
61
class GenericNavigableComic(GenericComic):
62
    """Generic class for "navigable" comics : with first/next arrows.
63
64
    This class applies to comic where previous and next comics can be
65
    accessed from a given comic. Once given a starting point (either
66
    the first comic or the last comic retrieved), it will handle the
67
    navigation, the retrieval of the soup object and the setting of
68
    the 'url' attribute on retrieved comics. This limits a lot the
69
    amount of boilerplate code in the different implementation classes.
70
71
    The method `get_next_comic` methods is implemented in terms of new
72
    more specialized methods to be implemented/overridden:
73
        - get_first_comic_link
74
        - get_navi_link
75
        - get_comic_info
76
        - get_url_from_link
77
    """
78
    _categories = ('NAVIGABLE', )
79
80
    @classmethod
81
    def get_first_comic_link(cls):
82
        """Get link to first comics.
83
84
        Sometimes this can be retrieved of any comic page, sometimes on
85
        the archive page, sometimes it doesn't exist at all and one has
86
        to iterate backward to find it before hardcoding the result found.
87
        """
88
        raise NotImplementedError
89
90
    @classmethod
91
    def get_navi_link(cls, last_soup, next_):
92
        """Get link to next (or previous - for dev purposes) comic."""
93
        raise NotImplementedError
94
95
    @classmethod
96
    def get_comic_info(cls, soup, link):
97
        """Get information about a particular comics."""
98
        raise NotImplementedError
99
100
    @classmethod
101
    def get_url_from_link(cls, link):
102
        """Get url corresponding to a link. Default implementation is similar to get_href."""
103
        return link['href']
104
105
    @classmethod
106
    def get_next_link(cls, last_soup):
107
        """Get link to next comic."""
108
        return cls.get_navi_link(last_soup, True)
109
110
    @classmethod
111
    def get_prev_link(cls, last_soup):
112
        """Get link to previous comic."""
113
        return cls.get_navi_link(last_soup, False)
114
115
    @classmethod
116
    def get_next_comic(cls, last_comic):
117
        """Generic implementation of get_next_comic for navigable comics."""
118
        url = last_comic['url'] if last_comic else None
119
        cls.log("starting 'get_next_comic' from %s" % url)
120
        next_comic = \
121
            cls.get_next_link(get_soup_at_url(url)) \
122
            if url else \
123
            cls.get_first_comic_link()
124
        cls.log("next/first comic will be %s (url is %s)" % (str(next_comic), url))
125
        # cls.check_navigation(url)
126
        while next_comic:
127
            prev_url, url = url, cls.get_url_from_link(next_comic)
128
            if prev_url == url:
129
                cls.log("got same url %s" % url)
130
                break
131
            cls.log("about to get %s (%s)" % (url, str(next_comic)))
132
            soup = get_soup_at_url(url)
133
            comic = cls.get_comic_info(soup, next_comic)
134
            if comic is not None:
135
                assert 'url' not in comic
136
                comic['url'] = url
137
                yield comic
138
            next_comic = cls.get_next_link(soup)
139
            cls.log("next comic will be %s" % str(next_comic))
140
141
    @classmethod
142
    def check_first_link(cls):
143
        """Check that navigation to first comic seems to be working - for dev purposes."""
144
        cls.log("about to check first link")
145
        ok = True
146
        firstlink = cls.get_first_comic_link()
147
        if firstlink is None:
148
            print("From %s : no first link" % cls.url)
149
            ok = False
150
        else:
151
            firsturl = cls.get_url_from_link(firstlink)
152
            try:
153
                get_soup_at_url(firsturl)
154
            except urllib.error.HTTPError:
155
                print("From %s : invalid first url" % cls.url)
156
                ok = False
157
        cls.log("checked first link -> returned %d" % ok)
158
        return ok
159
160
    @classmethod
161
    def check_prev_next_links(cls, url):
162
        """Check that navigation to prev/next from a given URL seems to be working - for dev purposes."""
163
        cls.log("about to check prev/next from %s" % url)
164
        ok = True
165
        if url is None:
166
            prevlink, nextlink = None, None
167
        else:
168
            soup = get_soup_at_url(url)
169
            prevlink, nextlink = cls.get_prev_link(soup), cls.get_next_link(soup)
170
        if prevlink is None and nextlink is None:
171
            print("From %s : no previous nor next" % url)
172
            ok = False
173
        else:
174
            if prevlink:
175
                prevurl = cls.get_url_from_link(prevlink)
176
                prevsoup = get_soup_at_url(prevurl)
177
                prevnext = cls.get_url_from_link(cls.get_next_link(prevsoup))
178
                if prevnext != url:
179
                    print("From %s, going backward then forward leads to %s" % (url, prevnext))
180
                    ok = False
181
            if nextlink:
182
                nexturl = cls.get_url_from_link(nextlink)
183
                if nexturl != url:
184
                    nextsoup = get_soup_at_url(nexturl)
185
                    nextprev = cls.get_url_from_link(cls.get_prev_link(nextsoup))
186
                    if nextprev != url:
187
                        print("From %s, going forward then backward leads to %s" % (url, nextprev))
188
                        ok = False
189
        cls.log("checked prev/next from %s -> returned %d" % (url, ok))
190
        return ok
191
192
    @classmethod
193
    def check_navigation(cls, url):
194
        """Check that navigation functions seem to be working - for dev purposes."""
195
        cls.log("about to check navigation from %s" % url)
196
        first = cls.check_first_link()
197
        prevnext = cls.check_prev_next_links(url)
198
        ok = first and prevnext
199
        cls.log("checked navigation from %s -> returned %d" % (url, ok))
200
        return ok
201
202
203
class GenericListableComic(GenericComic):
204
    """Generic class for "listable" comics : with a list of comics (aka 'archive')
205
206
    The method `get_next_comic` methods is implemented in terms of new
207
    more specialized methods to be implemented/overridden:
208
        - get_archive_elements
209
        - get_url_from_archive_element
210
        - get_comic_info
211
    """
212
    _categories = ('LISTABLE', )
213
214
    @classmethod
215
    def get_archive_elements(cls):
216
        """Get the archive elements (iterable)."""
217
        raise NotImplementedError
218
219
    @classmethod
220
    def get_url_from_archive_element(cls, archive_elt):
221
        """Get url corresponding to an archive element."""
222
        raise NotImplementedError
223
224
    @classmethod
225
    def get_comic_info(cls, soup, archive_elt):
226
        """Get information about a particular comics."""
227
        raise NotImplementedError
228
229
    @classmethod
230
    def get_next_comic(cls, last_comic):
231
        """Generic implementation of get_next_comic for listable comics."""
232
        waiting_for_url = last_comic['url'] if last_comic else None
233
        for archive_elt in cls.get_archive_elements():
234
            url = cls.get_url_from_archive_element(archive_elt)
235
            cls.log("considering %s" % url)
236
            if waiting_for_url is None:
237
                cls.log("about to get %s (%s)" % (url, str(archive_elt)))
238
                soup = get_soup_at_url(url)
239
                comic = cls.get_comic_info(soup, archive_elt)
240
                if comic is not None:
241
                    assert 'url' not in comic
242
                    comic['url'] = url
243
                    yield comic
244
            elif waiting_for_url == url:
245
                waiting_for_url = None
246
        if waiting_for_url is not None:
247
            print("Did not find %s : there might be a problem" % waiting_for_url)
248
249
# Helper functions corresponding to get_first_comic_link/get_navi_link
250
251
252
@classmethod
253
def get_link_rel_next(cls, last_soup, next_):
254
    """Implementation of get_navi_link."""
255
    return last_soup.find('link', rel='next' if next_ else 'prev')
256
257
258
@classmethod
259
def get_a_rel_next(cls, last_soup, next_):
260
    """Implementation of get_navi_link."""
261
    return last_soup.find('a', rel='next' if next_ else 'prev')
262
263
264
@classmethod
265
def get_a_navi_navinext(cls, last_soup, next_):
266
    """Implementation of get_navi_link."""
267
    return last_soup.find('a', class_='navi navi-next' if next_ else 'navi navi-prev')
268
269
270
@classmethod
271
def get_a_navi_comicnavnext_navinext(cls, last_soup, next_):
272
    """Implementation of get_navi_link."""
273
    return last_soup.find('a', class_='navi comic-nav-next navi-next' if next_ else 'navi comic-nav-previous navi-prev')
274
275
276
@classmethod
277
def get_a_comicnavbase_comicnavnext(cls, last_soup, next_):
278
    """Implementation of get_navi_link."""
279
    return last_soup.find('a', class_='comic-nav-base comic-nav-next' if next_ else 'comic-nav-base comic-nav-previous')
280
281
282
@classmethod
283
def get_a_navi_navifirst(cls):
284
    """Implementation of get_first_comic_link."""
285
    return get_soup_at_url(cls.url).find('a', class_='navi navi-first')
286
287
288
@classmethod
289
def get_div_navfirst_a(cls):
290
    """Implementation of get_first_comic_link."""
291
    return get_soup_at_url(cls.url).find('div', class_="nav-first").find('a')
292
293
294
@classmethod
295
def get_a_comicnavbase_comicnavfirst(cls):
296
    """Implementation of get_first_comic_link."""
297
    return get_soup_at_url(cls.url).find('a', class_='comic-nav-base comic-nav-first')
298
299
300
@classmethod
301
def simulate_first_link(cls):
302
    """Implementation of get_first_comic_link creating a link-like object from
303
    an URL provided by the class."""
304
    return {'href': cls.first_url}
305
306
307
@classmethod
308
def navigate_to_first_comic(cls):
309
    """Implementation of get_first_comic_link navigating from a user provided
310
    URL to the first comic.
311
312
    Sometimes, the first comic cannot be reached directly so to start
313
    from the first comic one has to go to the previous comic until
314
    there is no previous comics. Once this URL is reached, it
315
    is better to hardcode it but for development purposes, it
316
    is convenient to have an automatic way to find it.
317
    """
318
    url = input("Get starting URL: ")
319
    print(url)
320
    comic = cls.get_prev_link(get_soup_at_url(url))
321
    while comic:
322
        url = cls.get_url_from_link(comic)
323
        print(url)
324
        comic = cls.get_prev_link(get_soup_at_url(url))
325
    return {'href': url}
326
327
328
class GenericEmptyComic(GenericComic):
329
    """Generic class for comics where nothing is to be done.
330
331
    It can be useful to deactivate temporarily comics that do not work
332
    properly by replacing `def MyComic(GenericWhateverComic)` with
333
    `def MyComic(GenericEmptyComic, GenericWhateverComic)`."""
334
    _categories = ('EMPTY', )
335
336
    @classmethod
337
    def get_next_comic(cls, last_comic):
338
        """Implementation of get_next_comic returning no comics."""
339
        cls.log("comic is considered as empty - returning no comic")
340
        return []
341
342
343
class ExtraFabulousComics(GenericNavigableComic):
344
    """Class to retrieve Extra Fabulous Comics."""
345
    name = 'efc'
346
    long_name = 'Extra Fabulous Comics'
347
    url = 'http://extrafabulouscomics.com'
348
    get_first_comic_link = get_a_navi_navifirst
349
    get_navi_link = get_link_rel_next
350
351
    @classmethod
352
    def get_comic_info(cls, soup, link):
353
        """Get information about a particular comics."""
354
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
355
        imgs = soup.find_all('img', src=img_src_re)
356
        title = soup.find('meta', property='og:title')['content']
357
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
358
        day = string_to_date(date_str, "%Y-%m-%d")
359
        return {
360
            'title': title,
361
            'img': [i['src'] for i in imgs],
362
            'month': day.month,
363
            'year': day.year,
364
            'day': day.day,
365
            'prefix': title + '-'
366
        }
367
368
369
class GenericLeMondeBlog(GenericNavigableComic):
370
    """Generic class to retrieve comics from Le Monde blogs."""
371
    _categories = ('LEMONDE', 'FRANCAIS')
372
    get_navi_link = get_link_rel_next
373
    get_first_comic_link = simulate_first_link
374
    first_url = NotImplemented
375
376
    @classmethod
377
    def get_comic_info(cls, soup, link):
378
        """Get information about a particular comics."""
379
        url2 = soup.find('link', rel='shortlink')['href']
380
        title = soup.find('meta', property='og:title')['content']
381
        date_str = soup.find("span", class_="entry-date").string
382
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
383
        imgs = soup.find_all('meta', property='og:image')
384
        return {
385
            'title': title,
386
            'url2': url2,
387
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
388
            'month': day.month,
389
            'year': day.year,
390
            'day': day.day,
391
        }
392
393
394
class ZepWorld(GenericLeMondeBlog):
395
    """Class to retrieve Zep World comics."""
396
    name = "zep"
397
    long_name = "Zep World"
398
    url = "http://zepworld.blog.lemonde.fr"
399
    first_url = "http://zepworld.blog.lemonde.fr/2014/10/31/bientot-le-blog-de-zep/"
400
401
402
class Vidberg(GenericLeMondeBlog):
403
    """Class to retrieve Vidberg comics."""
404
    name = 'vidberg'
405
    long_name = "Vidberg - l'actu en patates"
406
    url = "http://vidberg.blog.lemonde.fr"
407
    # Not the first but I didn't find an efficient way to retrieve it
408
    first_url = "http://vidberg.blog.lemonde.fr/2012/02/09/revue-de-campagne-la-campagne-du-modem-semballe/"
409
410
411
class Plantu(GenericLeMondeBlog):
412
    """Class to retrieve Plantu comics."""
413
    name = 'plantu'
414
    long_name = "Plantu"
415
    url = "http://plantu.blog.lemonde.fr"
416
    first_url = "http://plantu.blog.lemonde.fr/2014/10/28/stress-test-a-bruxelles/"
417
418
419
class XavierGorce(GenericLeMondeBlog):
420
    """Class to retrieve Xavier Gorce comics."""
421
    name = 'gorce'
422
    long_name = "Xavier Gorce"
423
    url = "http://xaviergorce.blog.lemonde.fr"
424
    first_url = "http://xaviergorce.blog.lemonde.fr/2015/01/09/distinction/"
425
426
427
class CartooningForPeace(GenericLeMondeBlog):
428
    """Class to retrieve Cartooning For Peace comics."""
429
    name = 'forpeace'
430
    long_name = "Cartooning For Peace"
431
    url = "http://cartooningforpeace.blog.lemonde.fr"
432
    first_url = "http://cartooningforpeace.blog.lemonde.fr/2014/12/15/bado/"
433
434
435
class Aurel(GenericLeMondeBlog):
436
    """Class to retrieve Aurel comics."""
437
    name = 'aurel'
438
    long_name = "Aurel"
439
    url = "http://aurel.blog.lemonde.fr"
440
    first_url = "http://aurel.blog.lemonde.fr/2014/09/29/le-senat-repasse-a-droite/"
441
442
443
class LesCulottees(GenericLeMondeBlog):
444
    """Class to retrieve Les Culottees comics."""
445
    name = 'culottees'
446
    long_name = 'Les Culottees'
447
    url = "http://lesculottees.blog.lemonde.fr"
448
    first_url = "http://lesculottees.blog.lemonde.fr/2016/01/11/clementine-delait-femme-a-barbe/"
449
450
451
class UneAnneeAuLycee(GenericLeMondeBlog):
452
    """Class to retrieve Une Annee Au Lycee comics."""
453
    name = 'lycee'
454
    long_name = 'Une Annee au Lycee'
455
    url = 'http://uneanneeaulycee.blog.lemonde.fr'
456
    first_url = "http://uneanneeaulycee.blog.lemonde.fr/2016/06/13/la-semaine-du-bac-est-arrivee/"
457
458
459
class Rall(GenericNavigableComic):
460
    """Class to retrieve Ted Rall comics."""
461
    # Also on http://www.gocomics.com/tedrall
462
    name = 'rall'
463
    long_name = "Ted Rall"
464
    url = "http://rall.com/comic"
465
    _categories = ('RALL', )
466
    get_navi_link = get_link_rel_next
467
    get_first_comic_link = simulate_first_link
468
    # Not the first but I didn't find an efficient way to retrieve it
469
    first_url = "http://rall.com/2014/01/30/los-angeles-times-cartoon-well-miss-those-california-flowers"
470
471
    @classmethod
472
    def get_comic_info(cls, soup, link):
473
        """Get information about a particular comics."""
474
        title = soup.find('meta', property='og:title')['content']
475
        author = soup.find("span", class_="author vcard").find("a").string
476
        date_str = soup.find("span", class_="entry-date").string
477
        day = string_to_date(date_str, "%B %d, %Y")
478
        desc = soup.find('meta', property='og:description')['content']
479
        imgs = soup.find('div', class_='entry-content').find_all('img')
480
        imgs = imgs[:-7]  # remove social media buttons
481
        return {
482
            'title': title,
483
            'author': author,
484
            'month': day.month,
485
            'year': day.year,
486
            'day': day.day,
487
            'description': desc,
488
            'img': [i['src'] for i in imgs],
489
        }
490
491
492
class Dilem(GenericNavigableComic):
493
    """Class to retrieve Ali Dilem comics."""
494
    name = 'dilem'
495
    long_name = 'Ali Dilem'
496
    url = 'http://information.tv5monde.com/dilem'
497
    _categories = ('FRANCAIS', )
498
    get_url_from_link = join_cls_url_to_href
499
    get_first_comic_link = simulate_first_link
500
    first_url = "http://information.tv5monde.com/dilem/2004-06-26"
501
502
    @classmethod
503
    def get_navi_link(cls, last_soup, next_):
504
        """Get link to next or previous comic."""
505
        # prev is next / next is prev
506
        li = last_soup.find('li', class_='prev' if next_ else 'next')
507
        return li.find('a') if li else None
508
509
    @classmethod
510
    def get_comic_info(cls, soup, link):
511
        """Get information about a particular comics."""
512
        short_url = soup.find('link', rel='shortlink')['href']
513
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
514
        imgs = soup.find_all('meta', property='og:image')
515
        date_str = soup.find('span', property='dc:date')['content']
516
        date_str = date_str[:10]
517
        day = string_to_date(date_str, "%Y-%m-%d")
518
        return {
519
            'short_url': short_url,
520
            'title': title,
521
            'img': [i['content'] for i in imgs],
522
            'day': day.day,
523
            'month': day.month,
524
            'year': day.year,
525
        }
526
527
528
class SpaceAvalanche(GenericNavigableComic):
529
    """Class to retrieve Space Avalanche comics."""
530
    name = 'avalanche'
531
    long_name = 'Space Avalanche'
532
    url = 'http://www.spaceavalanche.com'
533
    get_navi_link = get_link_rel_next
534
535
    @classmethod
536
    def get_first_comic_link(cls):
537
        """Get link to first comics."""
538
        return {'href': "http://www.spaceavalanche.com/2009/02/02/irish-sea/", 'title': "Irish Sea"}
539
540
    @classmethod
541
    def get_comic_info(cls, soup, link):
542
        """Get information about a particular comics."""
543
        url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
544
        title = link['title']
545
        url = cls.get_url_from_link(link)
546
        year, month, day = [int(s)
547
                            for s in url_date_re.match(url).groups()]
548
        imgs = soup.find("div", class_="entry").find_all("img")
549
        return {
550
            'title': title,
551
            'day': day,
552
            'month': month,
553
            'year': year,
554
            'img': [i['src'] for i in imgs],
555
        }
556
557
558
class ZenPencils(GenericNavigableComic):
559
    """Class to retrieve ZenPencils comics."""
560
    # Also on http://zenpencils.tumblr.com
561
    # Also on http://www.gocomics.com/zen-pencils
562
    name = 'zenpencils'
563
    long_name = 'Zen Pencils'
564
    url = 'http://zenpencils.com'
565
    _categories = ('ZENPENCILS', )
566
    get_navi_link = get_link_rel_next
567
    get_first_comic_link = simulate_first_link
568
    first_url = "http://zenpencils.com/comic/1-ralph-waldo-emerson-make-them-cry/"
569
570
    @classmethod
571
    def get_comic_info(cls, soup, link):
572
        """Get information about a particular comics."""
573
        imgs = soup.find('div', id='comic').find_all('img')
574
        # imgs2 = soup.find_all('meta', property='og:image')
575
        post = soup.find('div', class_='post-content')
576
        author = post.find("span", class_="post-author").find("a").string
577
        title = soup.find('meta', property='og:title')['content']
578
        date_str = post.find('span', class_='post-date').string
579
        day = string_to_date(date_str, "%B %d, %Y")
580
        assert imgs
581
        assert all(i['alt'] == i['title'] for i in imgs)
582
        assert all(i['alt'] in (title, "") for i in imgs)
583
        desc = soup.find('meta', property='og:description')['content']
584
        return {
585
            'title': title,
586
            'description': desc,
587
            'author': author,
588
            'day': day.day,
589
            'month': day.month,
590
            'year': day.year,
591
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
592
        }
593
594
595
class ItsTheTie(GenericNavigableComic):
596
    """Class to retrieve It's the tie comics."""
597
    # Also on http://itsthetie.tumblr.com
598
    # Also on https://tapastic.com/series/itsthetie
599
    name = 'tie'
600
    long_name = "It's the tie"
601
    url = "http://itsthetie.com"
602
    _categories = ('TIE', )
603
    get_first_comic_link = get_div_navfirst_a
604
    get_navi_link = get_a_rel_next
605
606
    @classmethod
607
    def get_comic_info(cls, soup, link):
608
        """Get information about a particular comics."""
609
        title = soup.find('h1', class_='comic-title').find('a').string
610
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
611
        day = string_to_date(date_str, "%B %d, %Y")
612
        # Bonus images may or may not be in meta og:image.
613
        imgs = soup.find_all('meta', property='og:image')
614
        imgs_src = [i['content'] for i in imgs]
615
        bonus = soup.find_all('img', attrs={'data-oversrc': True})
616
        bonus_src = [b['data-oversrc'] for b in bonus]
617
        all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src]
618
        all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")]
619
        tag_meta = soup.find('meta', property='article:tag')
620
        tags = tag_meta['content'] if tag_meta else ""
621
        return {
622
            'title': title,
623
            'month': day.month,
624
            'year': day.year,
625
            'day': day.day,
626
            'img': all_imgs_src,
627
            'tags': tags,
628
        }
629
630
631
class PenelopeBagieu(GenericNavigableComic):
632
    """Class to retrieve comics from Penelope Bagieu's blog."""
633
    name = 'bagieu'
634
    long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
635
    url = 'http://www.penelope-jolicoeur.com'
636
    _categories = ('FRANCAIS', )
637
    get_navi_link = get_link_rel_next
638
    get_first_comic_link = simulate_first_link
639
    first_url = 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'
640
641
    @classmethod
642
    def get_comic_info(cls, soup, link):
643
        """Get information about a particular comics."""
644
        date_str = soup.find('h2', class_='date-header').string
645 View Code Duplication
        day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
646
        imgs = soup.find('div', class_='entry-body').find_all('img')
647
        title = soup.find('h3', class_='entry-header').string
648
        return {
649
            'title': title,
650
            'img': [i['src'] for i in imgs],
651
            'month': day.month,
652
            'year': day.year,
653
            'day': day.day,
654
        }
655
656
657
class OneOneOneOneComic(GenericNavigableComic):
658
    """Class to retrieve 1111 Comics."""
659
    # Also on http://comics1111.tumblr.com
660
    # Also on https://tapastic.com/series/1111-Comics
661
    name = '1111'
662
    long_name = '1111 Comics'
663
    url = 'http://www.1111comics.me'
664
    _categories = ('ONEONEONEONE', )
665
    get_first_comic_link = get_div_navfirst_a
666
    get_navi_link = get_link_rel_next
667
668
    @classmethod
669
    def get_comic_info(cls, soup, link):
670
        """Get information about a particular comics."""
671
        title = soup.find('h1', class_='comic-title').find('a').string
672
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
673
        day = string_to_date(date_str, "%B %d, %Y")
674
        imgs = soup.find_all('meta', property='og:image')
675
        return {
676
            'title': title,
677
            'month': day.month,
678
            'year': day.year,
679
            'day': day.day,
680
            'img': [i['content'] for i in imgs],
681
        }
682
683
684
class AngryAtNothing(GenericNavigableComic):
685
    """Class to retrieve Angry at Nothing comics."""
686
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
687
    name = 'angry'
688
    long_name = 'Angry At Nothing'
689
    url = 'http://www.angryatnothing.net'
690
    get_first_comic_link = get_div_navfirst_a
691
    get_navi_link = get_a_rel_next
692
693
    @classmethod
694
    def get_comic_info(cls, soup, link):
695
        """Get information about a particular comics."""
696
        title = soup.find('h1', class_='comic-title').find('a').string
697
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
698
        day = string_to_date(date_str, "%B %d, %Y")
699
        imgs = soup.find_all('meta', property='og:image')
700
        return {
701
            'title': title,
702
            'month': day.month,
703
            'year': day.year,
704
            'day': day.day,
705
            'img': [i['content'] for i in imgs],
706
        }
707
708
709
class NeDroid(GenericNavigableComic):
710
    """Class to retrieve NeDroid comics."""
711
    name = 'nedroid'
712
    long_name = 'NeDroid'
713
    url = 'http://nedroid.com'
714
    get_first_comic_link = get_div_navfirst_a
715
    get_navi_link = get_link_rel_next
716
    get_url_from_link = join_cls_url_to_href
717
718
    @classmethod
719
    def get_comic_info(cls, soup, link):
720
        """Get information about a particular comics."""
721
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
722
        comic_url_re = re.compile('//nedroid.com/comics/([0-9]*)-([0-9]*)-([0-9]*).*')
723
        short_url = cls.get_url_from_link(soup.find('link', rel='shortlink'))
724
        num = int(short_url_re.match(short_url).groups()[0])
725
        imgs = soup.find('div', id='comic').find_all('img')
726
        year, month, day = [int(s) for s in comic_url_re.match(imgs[0]['src']).groups()]
727
        assert len(imgs) == 1
728
        title = imgs[0]['alt']
729
        title2 = imgs[0]['title']
730
        return {
731
            'short_url': short_url,
732
            'title': title,
733
            'title2': title2,
734
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
735
            'day': day,
736
            'month': month,
737
            'year': year,
738
            'num': num,
739
        }
740
741
742
class Garfield(GenericNavigableComic):
743
    """Class to retrieve Garfield comics."""
744
    # Also on http://www.gocomics.com/garfield
745
    name = 'garfield'
746
    long_name = 'Garfield'
747
    url = 'https://garfield.com'
748
    _categories = ('GARFIELD', )
749
    get_first_comic_link = simulate_first_link
750
    first_url = 'https://garfield.com/comic/1978/06/19'
751
752
    @classmethod
753
    def get_navi_link(cls, last_soup, next_):
754
        """Get link to next or previous comic."""
755
        return last_soup.find('a', class_='comic-arrow-right' if next_ else 'comic-arrow-left')
756
757
    @classmethod
758 View Code Duplication
    def get_comic_info(cls, soup, link):
759
        """Get information about a particular comics."""
760
        url = cls.get_url_from_link(link)
761
        date_re = re.compile('^%s/comic/([0-9]*)/([0-9]*)/([0-9]*)' % cls.url)
762
        year, month, day = [int(s) for s in date_re.match(url).groups()]
763
        imgs = soup.find('div', class_='comic-display').find_all('img', class_='img-responsive')
764
        return {
765
            'month': month,
766
            'year': year,
767
            'day': day,
768
            'img': [i['src'] for i in imgs],
769
        }
770
771
772
class Dilbert(GenericNavigableComic):
773
    """Class to retrieve Dilbert comics."""
774
    # Also on http://www.gocomics.com/dilbert-classics
775
    name = 'dilbert'
776
    long_name = 'Dilbert'
777
    url = 'http://dilbert.com'
778
    get_url_from_link = join_cls_url_to_href
779
    get_first_comic_link = simulate_first_link
780
    first_url = 'http://dilbert.com/strip/1989-04-16'
781
782
    @classmethod
783
    def get_navi_link(cls, last_soup, next_):
784
        """Get link to next or previous comic."""
785
        link = last_soup.find('div', class_='nav-comic nav-right' if next_ else 'nav-comic nav-left')
786
        return link.find('a') if link else None
787
788
    @classmethod
789
    def get_comic_info(cls, soup, link):
790
        """Get information about a particular comics."""
791
        title = soup.find('meta', property='og:title')['content']
792
        imgs = soup.find_all('meta', property='og:image')
793
        desc = soup.find('meta', property='og:description')['content']
794
        date_str = soup.find('meta', property='article:publish_date')['content']
795
        day = string_to_date(date_str, "%B %d, %Y")
796
        author = soup.find('meta', property='article:author')['content']
797
        tags = soup.find('meta', property='article:tag')['content']
798
        return {
799
            'title': title,
800
            'description': desc,
801
            'img': [i['content'] for i in imgs],
802
            'author': author,
803
            'tags': tags,
804
            'day': day.day,
805
            'month': day.month,
806
            'year': day.year
807
        }
808
809
810
class VictimsOfCircumsolar(GenericNavigableComic):
811
    """Class to retrieve VictimsOfCircumsolar comics."""
812
    name = 'circumsolar'
813
    long_name = 'Victims Of Circumsolar'
814
    url = 'http://www.victimsofcircumsolar.com'
815
    get_navi_link = get_a_navi_comicnavnext_navinext
816
    get_first_comic_link = simulate_first_link
817
    first_url = 'http://www.victimsofcircumsolar.com/comic/modern-addiction'
818
819
    @classmethod
820
    def get_comic_info(cls, soup, link):
821
        """Get information about a particular comics."""
822
        # Date is on the archive page
823
        title = soup.find_all('meta', property='og:title')[-1]['content']
824
        desc = soup.find_all('meta', property='og:description')[-1]['content']
825
        imgs = soup.find('div', id='comic').find_all('img')
826
        assert all(i['title'] == i['alt'] == title for i in imgs)
827
        return {
828
            'title': title,
829
            'description': desc,
830
            'img': [i['src'] for i in imgs],
831
        }
832
833
834
class ThreeWordPhrase(GenericNavigableComic):
835
    """Class to retrieve Three Word Phrase comics."""
836
    # Also on http://www.threewordphrase.tumblr.com
837
    name = 'threeword'
838
    long_name = 'Three Word Phrase'
839
    url = 'http://threewordphrase.com'
840
    get_url_from_link = join_cls_url_to_href
841
842
    @classmethod
843
    def get_first_comic_link(cls):
844
        """Get link to first comics."""
845
        return get_soup_at_url(cls.url).find('img', src='/firstlink.gif').parent
846
847
    @classmethod
848
    def get_navi_link(cls, last_soup, next_):
849
        """Get link to next or previous comic."""
850
        link = last_soup.find('img', src='/nextlink.gif' if next_ else '/prevlink.gif').parent
851
        return None if link.get('href') is None else link
852
853
    @classmethod
854
    def get_comic_info(cls, soup, link):
855
        """Get information about a particular comics."""
856
        title = soup.find('title')
857
        imgs = [img for img in soup.find_all('img')
858
                if not img['src'].endswith(
859
                    ('link.gif', '32.png', 'twpbookad.jpg',
860
                     'merchad.jpg', 'header.gif', 'tipjar.jpg'))]
861
        return {
862
            'title': title.string if title else None,
863
            'title2': '  '.join(img.get('alt') for img in imgs if img.get('alt')),
864
            'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs],
865
        }
866
867
868
class DeadlyPanel(GenericEmptyComic, GenericNavigableComic):
869
    """Class to retrieve Deadly Panel comics."""
870
    # Also on https://tapastic.com/series/deadlypanel
871
    name = 'deadly'
872
    long_name = 'Deadly Panel'
873
    url = 'http://www.deadlypanel.com'
874
    get_first_comic_link = get_a_navi_navifirst
875
    get_navi_link = get_a_navi_comicnavnext_navinext
876
877
    @classmethod
878
    def get_comic_info(cls, soup, link):
879
        """Get information about a particular comics."""
880
        imgs = soup.find('div', id='comic').find_all('img')
881
        assert all(i['alt'] == i['title'] for i in imgs)
882
        return {
883
            'img': [i['src'] for i in imgs],
884
        }
885
886
887
class TheGentlemanArmchair(GenericNavigableComic):
888
    """Class to retrieve The Gentleman Armchair comics."""
889
    name = 'gentlemanarmchair'
890
    long_name = 'The Gentleman Armchair'
891
    url = 'http://thegentlemansarmchair.com'
892
    get_first_comic_link = get_a_navi_navifirst
893
    get_navi_link = get_link_rel_next
894
895
    @classmethod
896
    def get_comic_info(cls, soup, link):
897
        """Get information about a particular comics."""
898
        title = soup.find('h2', class_='post-title').string
899
        author = soup.find("span", class_="post-author").find("a").string
900
        date_str = soup.find('span', class_='post-date').string
901
        day = string_to_date(date_str, "%B %d, %Y")
902
        imgs = soup.find('div', id='comic').find_all('img')
903
        return {
904
            'img': [i['src'] for i in imgs],
905
            'title': title,
906
            'author': author,
907
            'month': day.month,
908
            'year': day.year,
909
            'day': day.day,
910
        }
911
912
913
class MyExtraLife(GenericNavigableComic):
914
    """Class to retrieve My Extra Life comics."""
915
    name = 'extralife'
916
    long_name = 'My Extra Life'
917
    url = 'http://www.myextralife.com'
918
    get_navi_link = get_link_rel_next
919
920
    @classmethod
921
    def get_first_comic_link(cls):
922
        """Get link to first comics."""
923
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
924
925
    @classmethod
926
    def get_comic_info(cls, soup, link):
927
        """Get information about a particular comics."""
928
        title = soup.find("h1", class_="comic_title").string
929
        date_str = soup.find("span", class_="comic_date").string
930
        day = string_to_date(date_str, "%B %d, %Y")
931
        imgs = soup.find_all("img", class_="comic")
932
        assert all(i['alt'] == i['title'] == title for i in imgs)
933
        return {
934
            'title': title,
935
            'img': [i['src'] for i in imgs if i["src"]],
936
            'day': day.day,
937
            'month': day.month,
938
            'year': day.year
939
        }
940
941
942
class SaturdayMorningBreakfastCereal(GenericNavigableComic):
943
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
944
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
945
    # Also on http://smbc-comics.tumblr.com
946
    name = 'smbc'
947
    long_name = 'Saturday Morning Breakfast Cereal'
948
    url = 'http://www.smbc-comics.com'
949
    _categories = ('SMBC', )
950
    get_navi_link = get_a_rel_next
951
952
    @classmethod
953
    def get_first_comic_link(cls):
954
        """Get link to first comics."""
955
        return get_soup_at_url(cls.url).find('a', rel='start')
956
957
    @classmethod
958
    def get_comic_info(cls, soup, link):
959
        """Get information about a particular comics."""
960
        image1 = soup.find('img', id='cc-comic')
961
        image_url1 = image1['src']
962
        aftercomic = soup.find('div', id='aftercomic')
963
        image_url2 = aftercomic.find('img')['src'] if aftercomic else ''
964
        imgs = [image_url1] + ([image_url2] if image_url2 else [])
965
        date_str = soup.find('div', class_='cc-publishtime').contents[0]
966
        day = string_to_date(date_str, "%B %d, %Y")
967
        return {
968
            'title': image1['title'],
969
            'img': [urljoin_wrapper(cls.url, i) for i in imgs],
970
            'day': day.day,
971
            'month': day.month,
972
            'year': day.year
973
        }
974
975
976
class PerryBibleFellowship(GenericListableComic):
977
    """Class to retrieve Perry Bible Fellowship comics."""
978
    name = 'pbf'
979
    long_name = 'Perry Bible Fellowship'
980
    url = 'http://pbfcomics.com'
981
    get_url_from_archive_element = join_cls_url_to_href
982
983
    @classmethod
984
    def get_archive_elements(cls):
985
        comic_link_re = re.compile('^/[0-9]*/$')
986
        return reversed(get_soup_at_url(cls.url).find_all('a', href=comic_link_re))
987
988
    @classmethod
989
    def get_comic_info(cls, soup, link):
990
        """Get information about a particular comics."""
991
        url = cls.get_url_from_archive_element(link)
992
        comic_img_re = re.compile('^/archive_b/PBF.*')
993
        name = link.string
994
        num = int(link['name'])
995
        href = link['href']
996
        assert href == '/%d/' % num
997
        imgs = soup.find_all('img', src=comic_img_re)
998
        assert len(imgs) == 1
999
        assert imgs[0]['alt'] == name
1000
        return {
1001
            'num': num,
1002
            'name': name,
1003
            'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1004
            'prefix': '%d-' % num,
1005
        }
1006 View Code Duplication
1007
1008
class Mercworks(GenericNavigableComic):
1009
    """Class to retrieve Mercworks comics."""
1010
    # Also on http://mercworks.tumblr.com
1011
    name = 'mercworks'
1012
    long_name = 'Mercworks'
1013
    url = 'http://mercworks.net'
1014
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1015
    get_navi_link = get_a_rel_next
1016
1017
    @classmethod
1018
    def get_comic_info(cls, soup, link):
1019
        """Get information about a particular comics."""
1020
        title = soup.find('meta', property='og:title')['content']
1021
        metadesc = soup.find('meta', property='og:description')
1022
        desc = metadesc['content'] if metadesc else ""
1023
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1024
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1025
        date_str = date_str[:10]
1026
        day = string_to_date(date_str, "%Y-%m-%d")
1027
        imgs = soup.find_all('meta', property='og:image')
1028
        return {
1029
            'img': [i['content'] for i in imgs],
1030
            'title': title,
1031
            'author': author,
1032
            'desc': desc,
1033
            'day': day.day,
1034
            'month': day.month,
1035
            'year': day.year
1036
        }
1037
1038
1039
class BerkeleyMews(GenericListableComic):
1040
    """Class to retrieve Berkeley Mews comics."""
1041
    # Also on http://mews.tumblr.com
1042
    # Also on http://www.gocomics.com/berkeley-mews
1043
    name = 'berkeley'
1044
    long_name = 'Berkeley Mews'
1045
    url = 'http://www.berkeleymews.com'
1046
    _categories = ('BERKELEY', )
1047
    get_url_from_archive_element = get_href
1048
    comic_num_re = re.compile('%s/\\?p=([0-9]*)$' % url)
1049
1050
    @classmethod
1051
    def get_archive_elements(cls):
1052
        archive_url = urljoin_wrapper(cls.url, "?page_id=2")
1053
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_num_re))
1054
1055
    @classmethod
1056
    def get_comic_info(cls, soup, link):
1057
        """Get information about a particular comics."""
1058
        comic_date_re = re.compile('.*/([0-9]*)-([0-9]*)-([0-9]*)-.*')
1059
        url = cls.get_url_from_archive_element(link)
1060
        num = int(cls.comic_num_re.match(url).groups()[0])
1061
        img = soup.find('div', id='comic').find('img')
1062
        assert all(i['alt'] == i['title'] for i in [img])
1063
        title2 = img['title']
1064
        img_url = img['src']
1065
        year, month, day = [int(s) for s in comic_date_re.match(img_url).groups()]
1066
        return {
1067
            'num': num,
1068
            'title': link.string,
1069
            'title2': title2,
1070
            'img': [img_url],
1071
            'year': year,
1072
            'month': month,
1073
            'day': day,
1074
        }
1075
1076
1077
class GenericBouletCorp(GenericNavigableComic):
1078
    """Generic class to retrieve BouletCorp comics in different languages."""
1079
    # Also on http://bouletcorp.tumblr.com
1080
    _categories = ('BOULET', )
1081
    get_navi_link = get_link_rel_next
1082
1083
    @classmethod
1084
    def get_first_comic_link(cls):
1085
        """Get link to first comics."""
1086
        return get_soup_at_url(cls.url).find('div', id='centered_nav').find_all('a')[0]
1087
1088
    @classmethod
1089
    def get_comic_info(cls, soup, link):
1090
        """Get information about a particular comics."""
1091
        url = cls.get_url_from_link(link)
1092
        date_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/' % cls.url)
1093
        year, month, day = [int(s) for s in date_re.match(url).groups()]
1094
        imgs = soup.find('div', id='notes').find('div', class_='storycontent').find_all('img')
1095
        texts = '  '.join(t for t in (i.get('title') for i in imgs) if t)
1096
        title = soup.find('title').string
1097
        return {
1098
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs if i.get('src') is not None],
1099
            'title': title,
1100
            'texts': texts,
1101
            'year': year,
1102
            'month': month,
1103
            'day': day,
1104
        }
1105
1106
1107
class BouletCorp(GenericBouletCorp):
1108
    """Class to retrieve BouletCorp comics."""
1109
    name = 'boulet'
1110
    long_name = 'Boulet Corp'
1111
    url = 'http://www.bouletcorp.com'
1112
    _categories = ('FRANCAIS', )
1113
1114
1115
class BouletCorpEn(GenericBouletCorp):
1116
    """Class to retrieve EnglishBouletCorp comics."""
1117
    name = 'boulet_en'
1118
    long_name = 'Boulet Corp English'
1119
    url = 'http://english.bouletcorp.com'
1120
1121
1122
class AmazingSuperPowers(GenericNavigableComic):
1123
    """Class to retrieve Amazing Super Powers comics."""
1124
    name = 'asp'
1125
    long_name = 'Amazing Super Powers'
1126
    url = 'http://www.amazingsuperpowers.com'
1127
    get_first_comic_link = get_a_navi_navifirst
1128
    get_navi_link = get_a_navi_navinext
1129
1130
    @classmethod
1131
    def get_comic_info(cls, soup, link):
1132
        """Get information about a particular comics."""
1133
        author = soup.find("span", class_="post-author").find("a").string
1134
        date_str = soup.find('span', class_='post-date').string
1135
        day = string_to_date(date_str, "%B %d, %Y")
1136
        imgs = soup.find('div', id='comic').find_all('img')
1137
        title = ' '.join(i['title'] for i in imgs)
1138
        assert all(i['alt'] == i['title'] for i in imgs)
1139
        return {
1140
            'title': title,
1141
            'author': author,
1142
            'img': [img['src'] for img in imgs],
1143
            'day': day.day,
1144
            'month': day.month,
1145
            'year': day.year
1146
        }
1147
1148
1149
class ToonHole(GenericNavigableComic):
1150
    """Class to retrieve Toon Holes comics."""
1151
    # Also on http://tapastic.com/series/TOONHOLE
1152
    name = 'toonhole'
1153
    long_name = 'Toon Hole'
1154
    url = 'http://www.toonhole.com'
1155
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1156
    get_navi_link = get_link_rel_next
1157
1158
    @classmethod
1159
    def get_comic_info(cls, soup, link):
1160
        """Get information about a particular comics."""
1161
        short_url = soup.find('link', rel='shortlink')['href']
1162
        date_str = soup.find('div', class_='entry-meta').contents[0].strip()
1163
        day = string_to_date(date_str, "%B %d, %Y")
1164
        imgs = soup.find('div', id='comic').find_all('img')
1165
        if imgs:
1166
            img = imgs[0]
1167
            title = img['alt']
1168
            assert img['title'] == title
1169
        else:
1170
            title = ""
1171
        return {
1172
            'short_url': short_url,
1173
            'title': title,
1174
            'month': day.month,
1175
            'year': day.year,
1176
            'day': day.day,
1177
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs],
1178
        }
1179
1180
1181
class Channelate(GenericNavigableComic):
1182
    """Class to retrieve Channelate comics."""
1183
    name = 'channelate'
1184
    long_name = 'Channelate'
1185
    url = 'http://www.channelate.com'
1186
    get_first_comic_link = get_div_navfirst_a
1187
    get_navi_link = get_link_rel_next
1188
    get_url_from_link = join_cls_url_to_href
1189
1190
    @classmethod
1191
    def get_comic_info(cls, soup, link):
1192
        """Get information about a particular comics."""
1193
        author = soup.find("span", class_="post-author").find("a").string
1194
        date_str = soup.find('span', class_='post-date').string
1195
        day = string_to_date(date_str, '%Y/%m/%d')
1196
        title = soup.find('meta', property='og:title')['content']
1197
        post = soup.find('div', id='comic')
1198
        imgs = post.find_all('img') if post else []
1199
        extra_url = None
1200
        extra_div = soup.find('div', id='extrapanelbutton')
1201
        if extra_div:
1202
            extra_url = extra_div.find('a')['href']
1203
            extra_soup = get_soup_at_url(extra_url)
1204
            extra_imgs = extra_soup.find_all('img', class_='extrapanelimage')
1205
            imgs.extend(extra_imgs)
1206
        return {
1207
            'url_extra': extra_url,
1208
            'title': title,
1209
            'author': author,
1210
            'month': day.month,
1211
            'year': day.year,
1212
            'day': day.day,
1213
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1214
        }
1215
1216
1217
class CyanideAndHappiness(GenericNavigableComic):
1218
    """Class to retrieve Cyanide And Happiness comics."""
1219
    name = 'cyanide'
1220
    long_name = 'Cyanide and Happiness'
1221
    url = 'http://explosm.net'
1222
    _categories = ('NSFW', )
1223
    get_url_from_link = join_cls_url_to_href
1224
1225
    @classmethod
1226
    def get_first_comic_link(cls):
1227
        """Get link to first comics."""
1228
        return get_soup_at_url(cls.url).find('a', title='Oldest comic')
1229
1230
    @classmethod
1231
    def get_navi_link(cls, last_soup, next_):
1232
        """Get link to next or previous comic."""
1233
        link = last_soup.find('a', class_='next-comic' if next_ else 'previous-comic ')
1234
        return None if link.get('href') is None else link
1235
1236
    @classmethod
1237
    def get_comic_info(cls, soup, link):
1238
        """Get information about a particular comics."""
1239
        url2 = soup.find('meta', property='og:url')['content']
1240
        num = int(url2.split('/')[-2])
1241
        date_str = soup.find('h3').find('a').string
1242
        day = string_to_date(date_str, '%Y.%m.%d')
1243
        author = soup.find('small', class_="author-credit-name").string
1244
        assert author.startswith('by ')
1245
        author = author[3:]
1246
        imgs = soup.find_all('img', id='main-comic')
1247
        return {
1248
            'num': num,
1249
            'author': author,
1250
            'month': day.month,
1251
            'year': day.year,
1252
            'day': day.day,
1253
            'prefix': '%d-' % num,
1254
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i['src'])) for i in imgs]
1255
        }
1256
1257
1258
class MrLovenstein(GenericComic):
1259
    """Class to retrieve Mr Lovenstein comics."""
1260
    # Also on https://tapastic.com/series/MrLovenstein
1261
    name = 'mrlovenstein'
1262
    long_name = 'Mr. Lovenstein'
1263
    url = 'http://www.mrlovenstein.com'
1264
1265
    @classmethod
1266
    def get_next_comic(cls, last_comic):
1267
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1268
        # TODO: more info from http://www.mrlovenstein.com/archive
1269
        comic_num_re = re.compile('^/comic/([0-9]*)$')
1270
        nums = [int(comic_num_re.match(link['href']).groups()[0])
1271
                for link in get_soup_at_url(cls.url).find_all('a', href=comic_num_re)]
1272
        first, last = min(nums), max(nums)
1273
        if last_comic:
1274
            first = last_comic['num'] + 1
1275
        for num in range(first, last + 1):
1276
            url = urljoin_wrapper(cls.url, '/comic/%d' % num)
1277
            soup = get_soup_at_url(url)
1278
            imgs = list(
1279
                reversed(soup.find_all('img', src=re.compile('^/images/comics/'))))
1280
            description = soup.find('meta', attrs={'name': 'description'})['content']
1281
            yield {
1282
                'url': url,
1283
                'num': num,
1284
                'texts': '  '.join(t for t in (i.get('title') for i in imgs) if t),
1285
                'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1286
                'description': description,
1287
            }
1288
1289
1290
class DinosaurComics(GenericListableComic):
1291
    """Class to retrieve Dinosaur Comics comics."""
1292
    name = 'dinosaur'
1293
    long_name = 'Dinosaur Comics'
1294
    url = 'http://www.qwantz.com'
1295
    get_url_from_archive_element = get_href
1296
    comic_link_re = re.compile('^%s/index.php\\?comic=([0-9]*)$' % url)
1297
1298
    @classmethod
1299
    def get_archive_elements(cls):
1300
        archive_url = urljoin_wrapper(cls.url, 'archive.php')
1301
        # first link is random -> skip it
1302
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re)[1:])
1303
1304
    @classmethod
1305
    def get_comic_info(cls, soup, link):
1306
        """Get information about a particular comics."""
1307
        url = cls.get_url_from_archive_element(link)
1308
        num = int(cls.comic_link_re.match(url).groups()[0])
1309
        date_str = link.string
1310
        text = link.next_sibling.string
1311
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1312
        comic_img_re = re.compile('^%s/comics/' % cls.url)
1313
        img = soup.find('img', src=comic_img_re)
1314
        return {
1315
            'month': day.month,
1316
            'year': day.year,
1317
            'day': day.day,
1318
            'img': [img.get('src')],
1319
            'title': img.get('title'),
1320
            'text': text,
1321
            'num': num,
1322
        }
1323
1324
1325
class ButterSafe(GenericListableComic):
1326
    """Class to retrieve Butter Safe comics."""
1327 View Code Duplication
    name = 'butter'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1328
    long_name = 'ButterSafe'
1329
    url = 'http://buttersafe.com'
1330
    get_url_from_archive_element = get_href
1331
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)
1332
1333
    @classmethod
1334
    def get_archive_elements(cls):
1335
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1336
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
1337
1338
    @classmethod
1339
    def get_comic_info(cls, soup, link):
1340
        """Get information about a particular comics."""
1341
        url = cls.get_url_from_archive_element(link)
1342
        title = link.string
1343
        year, month, day = [int(s) for s in cls.comic_link_re.match(url).groups()]
1344
        img = soup.find('div', id='comic').find('img')
1345
        assert img['alt'] == title
1346
        return {
1347
            'title': title,
1348
            'day': day,
1349
            'month': month,
1350
            'year': year,
1351
            'img': [img['src']],
1352
        }
1353
1354
1355
class CalvinAndHobbes(GenericComic):
1356
    """Class to retrieve Calvin and Hobbes comics."""
1357
    # Also on http://www.gocomics.com/calvinandhobbes/
1358
    name = 'calvin'
1359
    long_name = 'Calvin and Hobbes'
1360
    # This is not through any official webpage but eh...
1361
    url = 'http://marcel-oehler.marcellosendos.ch/comics/ch/'
1362
1363
    @classmethod
1364
    def get_next_comic(cls, last_comic):
1365
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1366
        last_date = get_date_for_comic(
1367
            last_comic) if last_comic else date(1985, 11, 1)
1368
        link_re = re.compile('^([0-9]*)/([0-9]*)/')
1369
        img_re = re.compile('')
1370
        for link in get_soup_at_url(cls.url).find_all('a', href=link_re):
1371
            url = link['href']
1372
            year, month = link_re.match(url).groups()
1373
            if date(int(year), int(month), 1) + timedelta(days=31) >= last_date:
1374
                img_re = re.compile('^%s%s([0-9]*)' % (year, month))
1375
                month_url = urljoin_wrapper(cls.url, url)
1376
                for img in get_soup_at_url(month_url).find_all('img', src=img_re):
1377
                    img_src = img['src']
1378
                    day = int(img_re.match(img_src).groups()[0])
1379
                    comic_date = date(int(year), int(month), day)
1380
                    if comic_date > last_date:
1381
                        yield {
1382
                            'url': month_url,
1383
                            'year': int(year),
1384
                            'month': int(month),
1385
                            'day': int(day),
1386
                            'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)],
1387
                        }
1388
                        last_date = comic_date
1389
1390
1391
class AbstruseGoose(GenericListableComic):
1392
    """Class to retrieve AbstruseGoose Comics."""
1393 View Code Duplication
    name = 'abstruse'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1394
    long_name = 'Abstruse Goose'
1395
    url = 'http://abstrusegoose.com'
1396
    get_url_from_archive_element = get_href
1397
    comic_url_re = re.compile('^%s/([0-9]*)$' % url)
1398
    comic_img_re = re.compile('^%s/strips/.*' % url)
1399
1400
    @classmethod
1401
    def get_archive_elements(cls):
1402
        archive_url = urljoin_wrapper(cls.url, 'archive')
1403
        return get_soup_at_url(archive_url).find_all('a', href=cls.comic_url_re)
1404
1405
    @classmethod
1406
    def get_comic_info(cls, soup, archive_elt):
1407
        comic_url = cls.get_url_from_archive_element(archive_elt)
1408
        num = int(cls.comic_url_re.match(comic_url).groups()[0])
1409
        return {
1410
            'num': num,
1411
            'title': archive_elt.string,
1412
            'img': [soup.find('img', src=cls.comic_img_re)['src']]
1413
        }
1414
1415
1416
class PhDComics(GenericNavigableComic):
1417
    """Class to retrieve PHD Comics."""
1418
    name = 'phd'
1419
    long_name = 'PhD Comics'
1420
    url = 'http://phdcomics.com/comics/archive.php'
1421
    get_url_from_link = join_cls_url_to_href
1422
1423
    @classmethod
1424
    def get_first_comic_link(cls):
1425
        """Get link to first comics."""
1426
        return get_soup_at_url(cls.url).find('img', src='images/first_button.gif').parent
1427
1428
    @classmethod
1429
    def get_navi_link(cls, last_soup, next_):
1430
        """Get link to next or previous comic."""
1431
        img = last_soup.find('img', src='images/next_button.gif' if next_ else 'images/prev_button.gif')
1432
        return None if img is None else img.parent
1433 View Code Duplication
1434
    @classmethod
1435
    def get_comic_info(cls, soup, link):
1436
        """Get information about a particular comics."""
1437
        date_str = soup.find('font', face='Arial,Helvetica,Geneva,Swiss,SunSans-Regular', color='white').string.strip()
1438
        try:
1439
            day = string_to_date(date_str, '%m/%d/%Y')
1440
        except ValueError:
1441
            print("Invalid date %s" % date_str)
1442
            day = date.today()
1443
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
1444
        return {
1445
            'year': day.year,
1446
            'month': day.month,
1447
            'day': day.day,
1448
            'img': [soup.find('img', id='comic')['src']],
1449
            'title': title,
1450
        }
1451
1452
1453
class Octopuns(GenericEmptyComic, GenericNavigableComic):
1454
    """Class to retrieve Octopuns comics."""
1455
    # Also on http://octopuns.tumblr.com
1456
    name = 'octopuns'
1457
    long_name = 'Octopuns'
1458
    url = 'http://www.octopuns.net'
1459
1460
    @classmethod
1461
    def get_first_comic_link(cls):
1462
        """Get link to first comics."""
1463
        return get_soup_at_url(cls.url).find('img', src=re.compile('.*/First.png')).parent
1464
1465
    @classmethod
1466
    def get_navi_link(cls, last_soup, next_):
1467
        """Get link to next or previous comic."""
1468
        link = last_soup.find('img', src=re.compile('.*/Next.png' if next_ else '.*/Back.png')).parent
1469
        return None if link.get('href') is None else link
1470
1471
    @classmethod
1472
    def get_comic_info(cls, soup, link):
1473
        """Get information about a particular comics."""
1474
        title = soup.find('h3', class_='post-title entry-title').string
1475
        date_str = soup.find('h2', class_='date-header').string
1476
        day = string_to_date(date_str, "%A, %B %d, %Y")
1477
        imgs = soup.find_all('link', rel='image_src')
1478
        return {
1479
            'img': [i['href'] for i in imgs],
1480
            'title': title,
1481
            'day': day.day,
1482
            'month': day.month,
1483
            'year': day.year,
1484
        }
1485
1486
1487
class Quarktees(GenericNavigableComic):
1488
    """Class to retrieve the Quarktees comics."""
1489
    name = 'quarktees'
1490
    long_name = 'Quarktees'
1491
    url = 'http://www.quarktees.com/blogs/news'
1492
    get_url_from_link = join_cls_url_to_href
1493
    get_first_comic_link = simulate_first_link
1494
    first_url = 'http://www.quarktees.com/blogs/news/12486621-coming-soon'
1495
1496
    @classmethod
1497
    def get_navi_link(cls, last_soup, next_):
1498
        """Get link to next or previous comic."""
1499
        return last_soup.find('a', id='article-next' if next_ else 'article-prev')
1500
1501
    @classmethod
1502
    def get_comic_info(cls, soup, link):
1503
        """Get information about a particular comics."""
1504
        title = soup.find('meta', property='og:title')['content']
1505
        article = soup.find('div', class_='single-article')
1506
        imgs = article.find_all('img')
1507
        return {
1508
            'title': title,
1509
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1510
        }
1511
1512
1513
class OverCompensating(GenericNavigableComic):
1514
    """Class to retrieve the Over Compensating comics."""
1515
    name = 'compensating'
1516
    long_name = 'Over Compensating'
1517
    url = 'http://www.overcompensating.com'
1518
    get_url_from_link = join_cls_url_to_href
1519
1520
    @classmethod
1521
    def get_first_comic_link(cls):
1522
        """Get link to first comics."""
1523
        return get_soup_at_url(cls.url).find('a', href=re.compile('comic=1$'))
1524
1525
    @classmethod
1526
    def get_navi_link(cls, last_soup, next_):
1527
        """Get link to next or previous comic."""
1528
        return last_soup.find('a', title='next comic' if next_ else 'go back already')
1529
1530
    @classmethod
1531
    def get_comic_info(cls, soup, link):
1532
        """Get information about a particular comics."""
1533
        img_src_re = re.compile('^/oc/comics/.*')
1534
        comic_num_re = re.compile('.*comic=([0-9]*)$')
1535
        comic_url = cls.get_url_from_link(link)
1536
        num = int(comic_num_re.match(comic_url).groups()[0])
1537
        img = soup.find('img', src=img_src_re)
1538
        return {
1539
            'num': num,
1540
            'img': [urljoin_wrapper(comic_url, img['src'])],
1541
            'title': img.get('title')
1542
        }
1543
1544
1545
class Oglaf(GenericNavigableComic):
1546
    """Class to retrieve Oglaf comics."""
1547
    name = 'oglaf'
1548
    long_name = 'Oglaf [NSFW]'
1549
    url = 'http://oglaf.com'
1550
    _categories = ('NSFW', )
1551
    get_url_from_link = join_cls_url_to_href
1552
1553
    @classmethod
1554
    def get_first_comic_link(cls):
1555
        """Get link to first comics."""
1556
        return get_soup_at_url(cls.url).find("div", id="st").parent
1557
1558
    @classmethod
1559
    def get_navi_link(cls, last_soup, next_):
1560
        """Get link to next or previous comic."""
1561
        div = last_soup.find("div", id="nx" if next_ else "pvs")
1562
        return div.parent if div else None
1563
1564
    @classmethod
1565
    def get_comic_info(cls, soup, link):
1566
        """Get information about a particular comics."""
1567
        title = soup.find('title').string
1568
        title_imgs = soup.find('div', id='tt').find_all('img')
1569
        assert len(title_imgs) == 1
1570
        strip_imgs = soup.find_all('img', id='strip')
1571
        assert len(strip_imgs) == 1
1572
        imgs = title_imgs + strip_imgs
1573
        desc = ' '.join(i['title'] for i in imgs)
1574
        return {
1575
            'title': title,
1576
            'img': [i['src'] for i in imgs],
1577
            'description': desc,
1578
        }
1579
1580
1581
class ScandinaviaAndTheWorld(GenericNavigableComic):
1582
    """Class to retrieve Scandinavia And The World comics."""
1583
    name = 'satw'
1584
    long_name = 'Scandinavia And The World'
1585
    url = 'http://satwcomic.com'
1586
    get_first_comic_link = simulate_first_link
1587
    first_url = 'http://satwcomic.com/sweden-denmark-and-norway'
1588
1589
    @classmethod
1590
    def get_navi_link(cls, last_soup, next_):
1591
        """Get link to next or previous comic."""
1592
        return last_soup.find('a', accesskey='n' if next_ else 'p')
1593
1594
    @classmethod
1595
    def get_comic_info(cls, soup, link):
1596
        """Get information about a particular comics."""
1597
        title = soup.find('meta', attrs={'name': 'twitter:label1'})['content']
1598
        desc = soup.find('meta', property='og:description')['content']
1599
        imgs = soup.find_all('img', itemprop="image")
1600
        return {
1601
            'title': title,
1602
            'description': desc,
1603
            'img': [i['src'] for i in imgs],
1604
        }
1605
1606
1607
class SomethingOfThatIlk(GenericEmptyComic):  # Does not exist anymore
1608
    """Class to retrieve the Something Of That Ilk comics."""
1609
    name = 'somethingofthatilk'
1610
    long_name = 'Something Of That Ilk'
1611
    url = 'http://www.somethingofthatilk.com'
1612
1613
1614
class InfiniteMonkeyBusiness(GenericNavigableComic):
1615
    """Generic class to retrieve InfiniteMonkeyBusiness comics."""
1616
    name = 'monkey'
1617
    long_name = 'Infinite Monkey Business'
1618
    url = 'http://infinitemonkeybusiness.net'
1619
    get_navi_link = get_a_navi_comicnavnext_navinext
1620
    get_first_comic_link = simulate_first_link
1621
    first_url = 'http://infinitemonkeybusiness.net/comic/pillory/'
1622
1623
    @classmethod
1624
    def get_comic_info(cls, soup, link):
1625
        """Get information about a particular comics."""
1626
        title = soup.find('meta', property='og:title')['content']
1627
        imgs = soup.find('div', id='comic').find_all('img')
1628
        return {
1629
            'title': title,
1630
            'img': [i['src'] for i in imgs],
1631
        }
1632
1633
1634
class Wondermark(GenericListableComic):
1635
    """Class to retrieve the Wondermark comics."""
1636
    name = 'wondermark'
1637
    long_name = 'Wondermark'
1638
    url = 'http://wondermark.com'
1639
    get_url_from_archive_element = get_href
1640
1641
    @classmethod
1642
    def get_archive_elements(cls):
1643
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1644
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1645
1646
    @classmethod
1647
    def get_comic_info(cls, soup, link):
1648
        """Get information about a particular comics."""
1649
        date_str = soup.find('div', class_='postdate').find('em').string
1650 View Code Duplication
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1651
        div = soup.find('div', id='comic')
1652
        if div:
1653
            img = div.find('img')
1654
            img_src = [img['src']]
1655
            alt = img['alt']
1656
            assert alt == img['title']
1657
            title = soup.find('meta', property='og:title')['content']
1658
        else:
1659
            img_src = []
1660
            alt = ''
1661
            title = ''
1662
        return {
1663
            'month': day.month,
1664
            'year': day.year,
1665
            'day': day.day,
1666
            'img': img_src,
1667
            'title': title,
1668
            'alt': alt,
1669
            'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')),
1670
        }
1671
1672
1673
class WarehouseComic(GenericNavigableComic):
1674
    """Class to retrieve Warehouse Comic comics."""
1675
    name = 'warehouse'
1676
    long_name = 'Warehouse Comic'
1677
    url = 'http://warehousecomic.com'
1678
    get_first_comic_link = get_a_navi_navifirst
1679
    get_navi_link = get_link_rel_next
1680
1681
    @classmethod
1682
    def get_comic_info(cls, soup, link):
1683
        """Get information about a particular comics."""
1684
        title = soup.find('h2', class_='post-title').string
1685
        date_str = soup.find('span', class_='post-date').string
1686
        day = string_to_date(date_str, "%B %d, %Y")
1687
        imgs = soup.find('div', id='comic').find_all('img')
1688
        return {
1689
            'img': [i['src'] for i in imgs],
1690
            'title': title,
1691
            'day': day.day,
1692
            'month': day.month,
1693
            'year': day.year,
1694
        }
1695
1696
1697
class JustSayEh(GenericNavigableComic):
1698
    """Class to retrieve Just Say Eh comics."""
1699
    # Also on http//tapastic.com/series/Just-Say-Eh
1700
    name = 'justsayeh'
1701
    long_name = 'Just Say Eh'
1702
    url = 'http://www.justsayeh.com'
1703
    get_first_comic_link = get_a_navi_navifirst
1704
    get_navi_link = get_a_navi_comicnavnext_navinext
1705
1706
    @classmethod
1707
    def get_comic_info(cls, soup, link):
1708
        """Get information about a particular comics."""
1709
        title = soup.find('h2', class_='post-title').string
1710
        imgs = soup.find("div", id="comic").find_all("img")
1711
        assert all(i['alt'] == i['title'] for i in imgs)
1712
        alt = imgs[0]['alt']
1713
        return {
1714
            'img': [i['src'] for i in imgs],
1715
            'title': title,
1716
            'alt': alt,
1717
        }
1718
1719
1720
class MouseBearComedy(GenericNavigableComic):
1721
    """Class to retrieve Mouse Bear Comedy comics."""
1722
    # Also on http://mousebearcomedy.tumblr.com
1723
    name = 'mousebear'
1724
    long_name = 'Mouse Bear Comedy'
1725
    url = 'http://www.mousebearcomedy.com'
1726
    get_first_comic_link = get_a_navi_navifirst
1727
    get_navi_link = get_a_navi_comicnavnext_navinext
1728
1729
    @classmethod
1730
    def get_comic_info(cls, soup, link):
1731
        """Get information about a particular comics."""
1732
        title = soup.find('h2', class_='post-title').string
1733
        author = soup.find("span", class_="post-author").find("a").string
1734
        date_str = soup.find("span", class_="post-date").string
1735
        day = string_to_date(date_str, '%B %d, %Y')
1736
        imgs = soup.find("div", id="comic").find_all("img")
1737
        assert all(i['alt'] == i['title'] == title for i in imgs)
1738
        return {
1739
            'day': day.day,
1740
            'month': day.month,
1741
            'year': day.year,
1742
            'img': [i['src'] for i in imgs],
1743
            'title': title,
1744
            'author': author,
1745
        }
1746 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1747
1748
class BigFootJustice(GenericNavigableComic):
1749
    """Class to retrieve Big Foot Justice comics."""
1750
    # Also on http://tapastic.com/series/bigfoot-justice
1751
    name = 'bigfoot'
1752
    long_name = 'Big Foot Justice'
1753
    url = 'http://bigfootjustice.com'
1754
    get_first_comic_link = get_a_navi_navifirst
1755
    get_navi_link = get_a_navi_comicnavnext_navinext
1756
1757
    @classmethod
1758
    def get_comic_info(cls, soup, link):
1759
        """Get information about a particular comics."""
1760
        imgs = soup.find('div', id='comic').find_all('img')
1761
        assert all(i['title'] == i['alt'] for i in imgs)
1762
        title = ' '.join(i['title'] for i in imgs)
1763
        return {
1764
            'img': [i['src'] for i in imgs],
1765
            'title': title,
1766
        }
1767 View Code Duplication
1768
1769
class RespawnComic(GenericNavigableComic):
1770
    """Class to retrieve Respawn Comic."""
1771
    # Also on http://respawncomic.tumblr.com
1772
    name = 'respawn'
1773
    long_name = 'Respawn Comic'
1774
    url = 'http://respawncomic.com '
1775
    _categories = ('RESPAWN', )
1776
    get_navi_link = get_a_rel_next
1777
    get_first_comic_link = simulate_first_link
1778
    first_url = 'http://respawncomic.com/comic/c0001/'
1779
1780
    @classmethod
1781
    def get_comic_info(cls, soup, link):
1782
        """Get information about a particular comics."""
1783
        title = soup.find('meta', property='og:title')['content']
1784
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1785
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1786
        date_str = date_str[:10]
1787
        day = string_to_date(date_str, "%Y-%m-%d")
1788
        imgs = soup.find_all('meta', property='og:image')
1789
        skip_imgs = {
1790
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1791
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1792
        }
1793
        return {
1794
            'title': title,
1795
            'author': author,
1796
            'day': day.day,
1797
            'month': day.month,
1798
            'year': day.year,
1799
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1800
        }
1801
1802
1803
class SafelyEndangered(GenericNavigableComic):
1804
    """Class to retrieve Safely Endangered comics."""
1805
    # Also on http://tumblr.safelyendangered.com
1806
    name = 'endangered'
1807
    long_name = 'Safely Endangered'
1808
    url = 'http://www.safelyendangered.com'
1809
    get_navi_link = get_link_rel_next
1810
    get_first_comic_link = simulate_first_link
1811
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1812
1813
    @classmethod
1814
    def get_comic_info(cls, soup, link):
1815
        """Get information about a particular comics."""
1816
        title = soup.find('h2', class_='post-title').string
1817
        date_str = soup.find('span', class_='post-date').string
1818
        day = string_to_date(date_str, '%B %d, %Y')
1819
        imgs = soup.find('div', id='comic').find_all('img')
1820
        alt = imgs[0]['alt']
1821
        assert all(i['alt'] == i['title'] for i in imgs)
1822
        return {
1823
            'day': day.day,
1824
            'month': day.month,
1825
            'year': day.year,
1826
            'img': [i['src'] for i in imgs],
1827
            'title': title,
1828
            'alt': alt,
1829
        }
1830
1831
1832
class PicturesInBoxes(GenericNavigableComic):
1833
    """Class to retrieve Pictures In Boxes comics."""
1834
    # Also on http://picturesinboxescomic.tumblr.com
1835
    name = 'picturesinboxes'
1836
    long_name = 'Pictures in Boxes'
1837
    url = 'http://www.picturesinboxes.com'
1838
    get_navi_link = get_a_navi_navinext
1839
    get_first_comic_link = simulate_first_link
1840
    first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'
1841
1842
    @classmethod
1843
    def get_comic_info(cls, soup, link):
1844
        """Get information about a particular comics."""
1845
        title = soup.find('h2', class_='post-title').string
1846
        author = soup.find("span", class_="post-author").find("a").string
1847
        date_str = soup.find('span', class_='post-date').string
1848
        day = string_to_date(date_str, '%B %d, %Y')
1849
        imgs = soup.find('div', class_='comicpane').find_all('img')
1850
        assert imgs
1851
        assert all(i['title'] == i['alt'] == title for i in imgs)
1852
        return {
1853
            'day': day.day,
1854
            'month': day.month,
1855
            'year': day.year,
1856
            'img': [i['src'] for i in imgs],
1857
            'title': title,
1858
            'author': author,
1859
        }
1860
1861
1862
class Penmen(GenericEmptyComic):
1863
    """Class to retrieve Penmen comics."""
1864
    name = 'penmen'
1865
    long_name = 'Penmen'
1866
    url = 'http://penmen.com'
1867
1868
1869
class TheDoghouseDiaries(GenericNavigableComic):
1870
    """Class to retrieve The Dog House Diaries comics."""
1871
    name = 'doghouse'
1872
    long_name = 'The Dog House Diaries'
1873
    url = 'http://thedoghousediaries.com'
1874
1875
    @classmethod
1876
    def get_first_comic_link(cls):
1877
        """Get link to first comics."""
1878
        return get_soup_at_url(cls.url).find('a', id='firstlink')
1879
1880
    @classmethod
1881
    def get_navi_link(cls, last_soup, next_):
1882
        """Get link to next or previous comic."""
1883
        return last_soup.find('a', id='nextlink' if next_ else 'previouslink')
1884
1885
    @classmethod
1886
    def get_comic_info(cls, soup, link):
1887
        """Get information about a particular comics."""
1888
        comic_img_re = re.compile('^dhdcomics/.*')
1889
        img = soup.find('img', src=comic_img_re)
1890
        comic_url = cls.get_url_from_link(link)
1891
        return {
1892
            'title': soup.find('h2', id='titleheader').string,
1893
            'title2': soup.find('div', id='subtext').string,
1894
            'alt': img.get('title'),
1895
            'img': [urljoin_wrapper(comic_url, img['src'].strip())],
1896
            'num': int(comic_url.split('/')[-1]),
1897
        }
1898
1899
1900
class InvisibleBread(GenericListableComic):
1901
    """Class to retrieve Invisible Bread comics."""
1902
    # Also on http://www.gocomics.com/invisible-bread
1903
    name = 'invisiblebread'
1904
    long_name = 'Invisible Bread'
1905
    url = 'http://invisiblebread.com'
1906
1907
    @classmethod
1908
    def get_archive_elements(cls):
1909
        archive_url = urljoin_wrapper(cls.url, 'archives/')
1910
        return reversed(get_soup_at_url(archive_url).find_all('td', class_='archive-title'))
1911
1912
    @classmethod
1913
    def get_url_from_archive_element(cls, td):
1914 View Code Duplication
        return td.find('a')['href']
1915
1916
    @classmethod
1917
    def get_comic_info(cls, soup, td):
1918
        """Get information about a particular comics."""
1919
        url = cls.get_url_from_archive_element(td)
1920
        title = td.find('a').string
1921
        month_and_day = td.previous_sibling.string
1922
        link_re = re.compile('^%s/([0-9]+)/' % cls.url)
1923
        year = link_re.match(url).groups()[0]
1924
        date_str = month_and_day + ' ' + year
1925
        day = string_to_date(date_str, '%b %d %Y')
1926
        imgs = [soup.find('div', id='comic').find('img')]
1927
        assert len(imgs) == 1
1928
        assert all(i['title'] == i['alt'] == title for i in imgs)
1929
        return {
1930
            'month': day.month,
1931
            'year': day.year,
1932
            'day': day.day,
1933
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1934
            'title': title,
1935
        }
1936
1937
1938
class DiscoBleach(GenericEmptyComic):  # Does not work anymore
1939
    """Class to retrieve Disco Bleach Comics."""
1940
    name = 'discobleach'
1941
    long_name = 'Disco Bleach'
1942
    url = 'http://discobleach.com'
1943
1944
1945
class TubeyToons(GenericEmptyComic):  # Does not work anymore
1946
    """Class to retrieve TubeyToons comics."""
1947
    # Also on http://tapastic.com/series/Tubey-Toons
1948
    # Also on http://tubeytoons.tumblr.com
1949
    name = 'tubeytoons'
1950
    long_name = 'Tubey Toons'
1951
    url = 'http://tubeytoons.com'
1952
    _categories = ('TUNEYTOONS', )
1953
1954
1955
class CompletelySeriousComics(GenericNavigableComic):
1956
    """Class to retrieve Completely Serious comics."""
1957
    name = 'completelyserious'
1958
    long_name = 'Completely Serious Comics'
1959
    url = 'http://completelyseriouscomics.com'
1960
    get_first_comic_link = get_a_navi_navifirst
1961
    get_navi_link = get_a_navi_navinext
1962
1963
    @classmethod
1964
    def get_comic_info(cls, soup, link):
1965
        """Get information about a particular comics."""
1966
        title = soup.find('h2', class_='post-title').string
1967
        author = soup.find('span', class_='post-author').contents[1].string
1968
        date_str = soup.find('span', class_='post-date').string
1969
        day = string_to_date(date_str, '%B %d, %Y')
1970
        imgs = soup.find('div', class_='comicpane').find_all('img')
1971
        assert imgs
1972
        alt = imgs[0]['title']
1973
        assert all(i['title'] == i['alt'] == alt for i in imgs)
1974
        return {
1975
            'month': day.month,
1976
            'year': day.year,
1977
            'day': day.day,
1978
            'img': [i['src'] for i in imgs],
1979
            'title': title,
1980
            'alt': alt,
1981
            'author': author,
1982
        }
1983
1984
1985
class PoorlyDrawnLines(GenericListableComic):
1986
    """Class to retrieve Poorly Drawn Lines comics."""
1987 View Code Duplication
    # Also on http://pdlcomics.tumblr.com
1988
    name = 'poorlydrawn'
1989
    long_name = 'Poorly Drawn Lines'
1990
    url = 'http://poorlydrawnlines.com'
1991
    _categories = ('POORLYDRAWN', )
1992
    get_url_from_archive_element = get_href
1993
1994
    @classmethod
1995
    def get_comic_info(cls, soup, link):
1996
        """Get information about a particular comics."""
1997
        imgs = soup.find('div', class_='post').find_all('img')
1998
        assert len(imgs) <= 1
1999
        return {
2000
            'img': [i['src'] for i in imgs],
2001
            'title': imgs[0].get('title', "") if imgs else "",
2002
        }
2003
2004
    @classmethod
2005
    def get_archive_elements(cls):
2006
        archive_url = urljoin_wrapper(cls.url, 'archive')
2007
        url_re = re.compile('^%s/comic/.' % cls.url)
2008
        return reversed(get_soup_at_url(archive_url).find_all('a', href=url_re))
2009
2010
2011
class LoadingComics(GenericNavigableComic):
2012
    """Class to retrieve Loading Artist comics."""
2013
    name = 'loadingartist'
2014
    long_name = 'Loading Artist'
2015
    url = 'http://www.loadingartist.com/latest'
2016
2017
    @classmethod
2018
    def get_first_comic_link(cls):
2019
        """Get link to first comics."""
2020
        return get_soup_at_url(cls.url).find('a', title="First")
2021
2022
    @classmethod
2023
    def get_navi_link(cls, last_soup, next_):
2024
        """Get link to next or previous comic."""
2025
        return last_soup.find('a', title='Next' if next_ else 'Previous')
2026
2027
    @classmethod
2028
    def get_comic_info(cls, soup, link):
2029
        """Get information about a particular comics."""
2030
        title = soup.find('h1').string
2031
        date_str = soup.find('span', class_='date').string.strip()
2032
        day = string_to_date(date_str, "%B %d, %Y")
2033
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
2034
        return {
2035
            'title': title,
2036
            'img': [i['src'] for i in imgs],
2037
            'month': day.month,
2038
            'year': day.year,
2039
            'day': day.day,
2040
        }
2041
2042
2043
class ChuckleADuck(GenericNavigableComic):
2044
    """Class to retrieve Chuckle-A-Duck comics."""
2045
    name = 'chuckleaduck'
2046
    long_name = 'Chuckle-A-duck'
2047
    url = 'http://chuckleaduck.com'
2048
    get_first_comic_link = get_div_navfirst_a
2049
    get_navi_link = get_link_rel_next
2050
2051
    @classmethod
2052
    def get_comic_info(cls, soup, link):
2053
        """Get information about a particular comics."""
2054
        date_str = soup.find('span', class_='post-date').string
2055
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
2056
        author = soup.find('span', class_='post-author').string
2057
        div = soup.find('div', id='comic')
2058
        imgs = div.find_all('img') if div else []
2059
        title = imgs[0]['title'] if imgs else ""
2060
        assert all(i['title'] == i['alt'] == title for i in imgs)
2061
        return {
2062
            'month': day.month,
2063
            'year': day.year,
2064
            'day': day.day,
2065
            'img': [i['src'] for i in imgs],
2066
            'title': title,
2067
            'author': author,
2068
        }
2069
2070
2071
class DepressedAlien(GenericNavigableComic):
2072
    """Class to retrieve Depressed Alien Comics."""
2073
    name = 'depressedalien'
2074
    long_name = 'Depressed Alien'
2075
    url = 'http://depressedalien.com'
2076
    get_url_from_link = join_cls_url_to_href
2077
2078
    @classmethod
2079
    def get_first_comic_link(cls):
2080
        """Get link to first comics."""
2081
        return get_soup_at_url(cls.url).find('img', attrs={'name': 'beginArrow'}).parent
2082
2083
    @classmethod
2084
    def get_navi_link(cls, last_soup, next_):
2085
        """Get link to next or previous comic."""
2086
        return last_soup.find('img', attrs={'name': 'rightArrow' if next_ else 'leftArrow'}).parent
2087
2088
    @classmethod
2089
    def get_comic_info(cls, soup, link):
2090
        """Get information about a particular comics."""
2091
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2092
        imgs = soup.find_all('meta', property='og:image')
2093
        return {
2094
            'title': title,
2095
            'img': [i['content'] for i in imgs],
2096
        }
2097
2098
2099
class ThingsInSquares(GenericListableComic):
2100
    """Class to retrieve Things In Squares comics."""
2101
    # This can be retrieved in other languages
2102
    # Also on https://tapastic.com/series/Things-in-Squares
2103
    name = 'squares'
2104
    long_name = 'Things in squares'
2105
    url = 'http://www.thingsinsquares.com'
2106
2107
    @classmethod
2108
    def get_comic_info(cls, soup, tr):
2109
        """Get information about a particular comics."""
2110
        _, td2, td3 = tr.find_all('td')
2111
        a = td2.find('a')
2112
        date_str = td3.string
2113
        day = string_to_date(date_str, "%m.%d.%y")
2114
        title = a.string
2115
        title2 = soup.find('meta', property='og:title')['content']
2116
        desc = soup.find('meta', property='og:description')
2117
        description = desc['content'] if desc else ''
2118
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2119
        imgs = soup.find('div', class_='entry-content').find_all('img')
2120
        return {
2121
            'day': day.day,
2122
            'month': day.month,
2123
            'year': day.year,
2124
            'title': title,
2125
            'title2': title2,
2126
            'description': description,
2127
            'tags': tags,
2128
            'img': [i['src'] for i in imgs],
2129
            'alt': ' '.join(i['alt'] for i in imgs),
2130
        }
2131
2132
    @classmethod
2133
    def get_url_from_archive_element(cls, tr):
2134
        _, td2, td3 = tr.find_all('td')
2135
        return td2.find('a')['href']
2136
2137
    @classmethod
2138
    def get_archive_elements(cls):
2139
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
2140
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2141
2142
2143
class HappleTea(GenericNavigableComic):
2144
    """Class to retrieve Happle Tea Comics."""
2145
    name = 'happletea'
2146
    long_name = 'Happle Tea'
2147
    url = 'http://www.happletea.com'
2148
    get_first_comic_link = get_a_navi_navifirst
2149
    get_navi_link = get_link_rel_next
2150
2151
    @classmethod
2152
    def get_comic_info(cls, soup, link):
2153
        """Get information about a particular comics."""
2154
        imgs = soup.find('div', id='comic').find_all('img')
2155
        post = soup.find('div', class_='post-content')
2156
        title = post.find('h2', class_='post-title').string
2157
        author = post.find('a', rel='author').string
2158
        date_str = post.find('span', class_='post-date').string
2159
        day = string_to_date(date_str, "%B %d, %Y")
2160
        assert all(i['alt'] == i['title'] for i in imgs)
2161
        return {
2162
            'title': title,
2163
            'img': [i['src'] for i in imgs],
2164
            'alt': ''.join(i['alt'] for i in imgs),
2165
            'month': day.month,
2166
            'year': day.year,
2167
            'day': day.day,
2168
            'author': author,
2169
        }
2170
2171
2172
class FatAwesomeComics(GenericNavigableComic):
2173
    """Class to retrieve Fat Awesome Comics."""
2174
    # Also on http://fatawesomecomedy.tumblr.com
2175
    name = 'fatawesome'
2176
    long_name = 'Fat Awesome'
2177
    url = 'http://fatawesome.com/comics'
2178
    get_navi_link = get_a_rel_next
2179
    get_first_comic_link = simulate_first_link
2180
    first_url = 'http://fatawesome.com/shortbus/'
2181
2182
    @classmethod
2183
    def get_comic_info(cls, soup, link):
2184
        """Get information about a particular comics."""
2185
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2186
        description = soup.find('meta', attrs={'name': 'description'})['content']
2187
        tags_prop = soup.find('meta', property='article:tag')
2188
        tags = tags_prop['content'] if tags_prop else ""
2189
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2190
        day = string_to_date(date_str, "%Y-%m-%d")
2191
        imgs = soup.find_all('img', attrs={'data-recalc-dims': "1"})
2192
        assert len(imgs) == 1
2193
        return {
2194
            'title': title,
2195
            'description': description,
2196
            'tags': tags,
2197
            'alt': "".join(i['alt'] for i in imgs),
2198
            'img': [i['src'].rsplit('?', 1)[0] for i in imgs],
2199
            'month': day.month,
2200
            'year': day.year,
2201
            'day': day.day,
2202
        }
2203
2204
2205
class AnythingComic(GenericListableComic):
2206
    """Class to retrieve Anything Comics."""
2207
    # Also on http://tapastic.com/series/anything
2208
    name = 'anythingcomic'
2209
    long_name = 'Anything Comic'
2210
    url = 'http://www.anythingcomic.com'
2211
2212
    @classmethod
2213
    def get_archive_elements(cls):
2214
        archive_url = urljoin_wrapper(cls.url, 'archive/')
2215
        # The first 2 <tr>'s do not correspond to comics
2216
        return get_soup_at_url(archive_url).find('table', id='chapter_table').find_all('tr')[2:]
2217
2218
    @classmethod
2219
    def get_url_from_archive_element(cls, tr):
2220
        """Get url corresponding to an archive element."""
2221
        td_num, td_comic, td_date, _ = tr.find_all('td')
2222
        link = td_comic.find('a')
2223 View Code Duplication
        return urljoin_wrapper(cls.url, link['href'])
2224
2225
    @classmethod
2226
    def get_comic_info(cls, soup, tr):
2227
        """Get information about a particular comics."""
2228
        td_num, td_comic, td_date, _ = tr.find_all('td')
2229
        num = int(td_num.string)
2230
        link = td_comic.find('a')
2231
        title = link.string
2232
        imgs = soup.find_all('img', id='comic_image')
2233
        date_str = td_date.string
2234
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y, %I:%M %p")
2235
        assert len(imgs) == 1
2236
        assert all(i.get('alt') == i.get('title') for i in imgs)
2237
        return {
2238
            'num': num,
2239
            'title': title,
2240
            'alt': imgs[0].get('alt', ''),
2241
            'img': [i['src'] for i in imgs],
2242
            'month': day.month,
2243
            'year': day.year,
2244
            'day': day.day,
2245
        }
2246
2247
2248
class LonnieMillsap(GenericNavigableComic):
2249
    """Class to retrieve Lonnie Millsap's comics."""
2250
    name = 'millsap'
2251
    long_name = 'Lonnie Millsap'
2252
    url = 'http://www.lonniemillsap.com'
2253
    get_navi_link = get_link_rel_next
2254
    get_first_comic_link = simulate_first_link
2255
    first_url = 'http://www.lonniemillsap.com/?p=42'
2256
2257
    @classmethod
2258
    def get_comic_info(cls, soup, link):
2259
        """Get information about a particular comics."""
2260
        title = soup.find('h2', class_='post-title').string
2261
        post = soup.find('div', class_='post-content')
2262
        author = post.find("span", class_="post-author").find("a").string
2263
        date_str = post.find("span", class_="post-date").string
2264
        day = string_to_date(date_str, "%B %d, %Y")
2265
        imgs = post.find("div", class_="entry").find_all("img")
2266
        return {
2267
            'title': title,
2268
            'author': author,
2269
            'img': [i['src'] for i in imgs],
2270
            'month': day.month,
2271
            'year': day.year,
2272
            'day': day.day,
2273
        }
2274
2275
2276
class LinsEditions(GenericNavigableComic):
2277
    """Class to retrieve L.I.N.S. Editions comics."""
2278
    # Also on http://linscomics.tumblr.com
2279
    # Now on https://warandpeas.com
2280
    name = 'lins'
2281
    long_name = 'L.I.N.S. Editions'
2282
    url = 'https://linsedition.com'
2283
    _categories = ('LINS', )
2284
    get_navi_link = get_link_rel_next
2285
    get_first_comic_link = simulate_first_link
2286
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2287
2288
    @classmethod
2289
    def get_comic_info(cls, soup, link):
2290
        """Get information about a particular comics."""
2291
        title = soup.find('meta', property='og:title')['content']
2292
        imgs = soup.find_all('meta', property='og:image')
2293
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2294
        day = string_to_date(date_str, "%Y-%m-%d")
2295
        return {
2296
            'title': title,
2297
            'img': [i['content'] for i in imgs],
2298
            'month': day.month,
2299
            'year': day.year,
2300
            'day': day.day,
2301
        }
2302
2303
2304
class ThorsThundershack(GenericNavigableComic):
2305
    """Class to retrieve Thor's Thundershack comics."""
2306
    # Also on http://tapastic.com/series/Thors-Thundershac
2307
    name = 'thor'
2308
    long_name = 'Thor\'s Thundershack'
2309
    url = 'http://www.thorsthundershack.com'
2310
    _categories = ('THOR', )
2311
    get_url_from_link = join_cls_url_to_href
2312
2313
    @classmethod
2314
    def get_first_comic_link(cls):
2315
        """Get link to first comics."""
2316
        return get_soup_at_url(cls.url).find('a', class_='first navlink')
2317
2318
    @classmethod
2319
    def get_navi_link(cls, last_soup, next_):
2320
        """Get link to next or previous comic."""
2321
        for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
2322
            if link['href'] != '/comic':
2323
                return link
2324
        return None
2325
2326
    @classmethod
2327
    def get_comic_info(cls, soup, link):
2328
        """Get information about a particular comics."""
2329
        title = soup.find('meta', attrs={'name': 'description'})["content"]
2330
        description = soup.find('div', itemprop='articleBody').text
2331
        author = soup.find('span', itemprop='author copyrightHolder').string
2332
        imgs = soup.find_all('img', itemprop='image')
2333
        assert all(i['title'] == i['alt'] for i in imgs)
2334
        alt = imgs[0]['alt'] if imgs else ""
2335
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
2336
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2337
        return {
2338
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2339
            'month': day.month,
2340
            'year': day.year,
2341
            'day': day.day,
2342
            'author': author,
2343
            'title': title,
2344
            'alt': alt,
2345
            'description': description,
2346
        }
2347
2348
2349
class GerbilWithAJetpack(GenericNavigableComic):
2350
    """Class to retrieve GerbilWithAJetpack comics."""
2351
    name = 'gerbil'
2352
    long_name = 'Gerbil With A Jetpack'
2353
    url = 'http://gerbilwithajetpack.com'
2354
    get_first_comic_link = get_a_navi_navifirst
2355
    get_navi_link = get_a_rel_next
2356
2357
    @classmethod
2358
    def get_comic_info(cls, soup, link):
2359
        """Get information about a particular comics."""
2360
        title = soup.find('h2', class_='post-title').string
2361
        author = soup.find("span", class_="post-author").find("a").string
2362
        date_str = soup.find("span", class_="post-date").string
2363
        day = string_to_date(date_str, "%B %d, %Y")
2364
        imgs = soup.find("div", id="comic").find_all("img")
2365
        alt = imgs[0]['alt']
2366
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2367
        return {
2368
            'img': [i['src'] for i in imgs],
2369
            'title': title,
2370
            'alt': alt,
2371
            'author': author,
2372
            'day': day.day,
2373
            'month': day.month,
2374
            'year': day.year
2375
        }
2376
2377
2378
class EveryDayBlues(GenericNavigableComic):
2379
    """Class to retrieve EveryDayBlues Comics."""
2380
    name = "blues"
2381
    long_name = "Every Day Blues"
2382
    url = "http://everydayblues.net"
2383
    get_first_comic_link = get_a_navi_navifirst
2384
    get_navi_link = get_link_rel_next
2385
2386
    @classmethod
2387
    def get_comic_info(cls, soup, link):
2388
        """Get information about a particular comics."""
2389
        title = soup.find("h2", class_="post-title").string
2390
        author = soup.find("span", class_="post-author").find("a").string
2391
        date_str = soup.find("span", class_="post-date").string
2392
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2393
        imgs = soup.find("div", id="comic").find_all("img")
2394
        assert all(i['alt'] == i['title'] == title for i in imgs)
2395
        assert len(imgs) <= 1
2396
        return {
2397
            'img': [i['src'] for i in imgs],
2398
            'title': title,
2399
            'author': author,
2400
            'day': day.day,
2401
            'month': day.month,
2402
            'year': day.year
2403
        }
2404
2405
2406
class BiterComics(GenericNavigableComic):
2407
    """Class to retrieve Biter Comics."""
2408
    name = "biter"
2409
    long_name = "Biter Comics"
2410
    url = "http://www.bitercomics.com"
2411
    get_first_comic_link = get_a_navi_navifirst
2412
    get_navi_link = get_link_rel_next
2413
2414
    @classmethod
2415
    def get_comic_info(cls, soup, link):
2416
        """Get information about a particular comics."""
2417
        title = soup.find("h1", class_="entry-title").string
2418
        author = soup.find("span", class_="author vcard").find("a").string
2419
        date_str = soup.find("span", class_="entry-date").string
2420
        day = string_to_date(date_str, "%B %d, %Y")
2421
        imgs = soup.find("div", id="comic").find_all("img")
2422
        assert all(i['alt'] == i['title'] for i in imgs)
2423
        assert len(imgs) == 1
2424
        alt = imgs[0]['alt']
2425
        return {
2426
            'img': [i['src'] for i in imgs],
2427
            'title': title,
2428
            'alt': alt,
2429
            'author': author,
2430
            'day': day.day,
2431
            'month': day.month,
2432
            'year': day.year
2433
        }
2434
2435
2436
class TheAwkwardYeti(GenericNavigableComic):
2437
    """Class to retrieve The Awkward Yeti comics."""
2438
    # Also on http://www.gocomics.com/the-awkward-yeti
2439
    # Also on http://larstheyeti.tumblr.com
2440
    # Also on https://tapastic.com/series/TheAwkwardYeti
2441
    name = 'yeti'
2442
    long_name = 'The Awkward Yeti'
2443
    url = 'http://theawkwardyeti.com'
2444
    _categories = ('YETI', )
2445
    get_first_comic_link = get_a_navi_navifirst
2446
    get_navi_link = get_link_rel_next
2447
2448
    @classmethod
2449
    def get_comic_info(cls, soup, link):
2450
        """Get information about a particular comics."""
2451
        title = soup.find('h2', class_='post-title').string
2452
        date_str = soup.find("span", class_="post-date").string
2453
        day = string_to_date(date_str, "%B %d, %Y")
2454
        imgs = soup.find("div", id="comic").find_all("img")
2455
        assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
2456
        return {
2457
            'img': [i['src'] for i in imgs],
2458
            'title': title,
2459
            'day': day.day,
2460
            'month': day.month,
2461
            'year': day.year
2462
        }
2463
2464
2465
class PleasantThoughts(GenericNavigableComic):
2466
    """Class to retrieve Pleasant Thoughts comics."""
2467
    name = 'pleasant'
2468
    long_name = 'Pleasant Thoughts'
2469
    url = 'http://pleasant-thoughts.com'
2470
    get_first_comic_link = get_a_navi_navifirst
2471
    get_navi_link = get_link_rel_next
2472
2473
    @classmethod
2474
    def get_comic_info(cls, soup, link):
2475
        """Get information about a particular comics."""
2476
        post = soup.find('div', class_='post-content')
2477
        title = post.find('h2', class_='post-title').string
2478
        imgs = post.find("div", class_="entry").find_all("img")
2479
        return {
2480
            'title': title,
2481
            'img': [i['src'] for i in imgs],
2482
        }
2483
2484
2485
class MisterAndMe(GenericNavigableComic):
2486
    """Class to retrieve Mister & Me Comics."""
2487
    # Also on http://www.gocomics.com/mister-and-me
2488
    # Also on https://tapastic.com/series/Mister-and-Me
2489
    name = 'mister'
2490
    long_name = 'Mister & Me'
2491
    url = 'http://www.mister-and-me.com'
2492
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2493
    get_navi_link = get_link_rel_next
2494
2495
    @classmethod
2496
    def get_comic_info(cls, soup, link):
2497
        """Get information about a particular comics."""
2498
        title = soup.find('h2', class_='post-title').string
2499
        author = soup.find("span", class_="post-author").find("a").string
2500
        date_str = soup.find("span", class_="post-date").string
2501
        day = string_to_date(date_str, "%B %d, %Y")
2502
        imgs = soup.find("div", id="comic").find_all("img")
2503
        assert all(i['alt'] == i['title'] for i in imgs)
2504
        assert len(imgs) <= 1
2505
        alt = imgs[0]['alt'] if imgs else ""
2506
        return {
2507
            'img': [i['src'] for i in imgs],
2508
            'title': title,
2509
            'alt': alt,
2510
            'author': author,
2511
            'day': day.day,
2512
            'month': day.month,
2513
            'year': day.year
2514
        }
2515
2516
2517
class LastPlaceComics(GenericNavigableComic):
2518
    """Class to retrieve Last Place Comics."""
2519
    name = 'lastplace'
2520
    long_name = 'Last Place Comics'
2521
    url = "http://lastplacecomics.com"
2522
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2523
    get_navi_link = get_link_rel_next
2524
2525
    @classmethod
2526
    def get_comic_info(cls, soup, link):
2527
        """Get information about a particular comics."""
2528
        title = soup.find('h2', class_='post-title').string
2529
        author = soup.find("span", class_="post-author").find("a").string
2530
        date_str = soup.find("span", class_="post-date").string
2531
        day = string_to_date(date_str, "%B %d, %Y")
2532
        imgs = soup.find("div", id="comic").find_all("img")
2533
        assert all(i['alt'] == i['title'] for i in imgs)
2534
        assert len(imgs) <= 1
2535
        alt = imgs[0]['alt'] if imgs else ""
2536
        return {
2537
            'img': [i['src'] for i in imgs],
2538
            'title': title,
2539
            'alt': alt,
2540
            'author': author,
2541
            'day': day.day,
2542
            'month': day.month,
2543
            'year': day.year
2544
        }
2545
2546
2547
class TalesOfAbsurdity(GenericNavigableComic):
2548
    """Class to retrieve Tales Of Absurdity comics."""
2549
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
2550
    # Also on http://talesofabsurdity.tumblr.com
2551
    name = 'absurdity'
2552
    long_name = 'Tales of Absurdity'
2553
    url = 'http://talesofabsurdity.com'
2554
    _categories = ('ABSURDITY', )
2555
    get_first_comic_link = get_a_navi_navifirst
2556
    get_navi_link = get_a_navi_comicnavnext_navinext
2557
2558
    @classmethod
2559
    def get_comic_info(cls, soup, link):
2560
        """Get information about a particular comics."""
2561
        title = soup.find('h2', class_='post-title').string
2562
        author = soup.find("span", class_="post-author").find("a").string
2563
        date_str = soup.find("span", class_="post-date").string
2564
        day = string_to_date(date_str, "%B %d, %Y")
2565
        imgs = soup.find("div", id="comic").find_all("img")
2566
        assert all(i['alt'] == i['title'] for i in imgs)
2567
        alt = imgs[0]['alt'] if imgs else ""
2568
        return {
2569
            'img': [i['src'] for i in imgs],
2570
            'title': title,
2571
            'alt': alt,
2572
            'author': author,
2573
            'day': day.day,
2574
            'month': day.month,
2575
            'year': day.year
2576
        }
2577
2578
2579
class EndlessOrigami(GenericEmptyComic, GenericNavigableComic):
2580
    """Class to retrieve Endless Origami Comics."""
2581
    name = "origami"
2582
    long_name = "Endless Origami"
2583
    url = "http://endlessorigami.com"
2584
    get_first_comic_link = get_a_navi_navifirst
2585
    get_navi_link = get_link_rel_next
2586
2587
    @classmethod
2588
    def get_comic_info(cls, soup, link):
2589
        """Get information about a particular comics."""
2590
        title = soup.find('h2', class_='post-title').string
2591
        author = soup.find("span", class_="post-author").find("a").string
2592
        date_str = soup.find("span", class_="post-date").string
2593
        day = string_to_date(date_str, "%B %d, %Y")
2594
        imgs = soup.find("div", id="comic").find_all("img")
2595
        assert all(i['alt'] == i['title'] for i in imgs)
2596
        alt = imgs[0]['alt'] if imgs else ""
2597
        return {
2598
            'img': [i['src'] for i in imgs],
2599
            'title': title,
2600
            'alt': alt,
2601
            'author': author,
2602
            'day': day.day,
2603
            'month': day.month,
2604
            'year': day.year
2605
        }
2606
2607
2608
class PlanC(GenericNavigableComic):
2609
    """Class to retrieve Plan C comics."""
2610
    name = 'planc'
2611
    long_name = 'Plan C'
2612
    url = 'http://www.plancomic.com'
2613
    get_first_comic_link = get_a_navi_navifirst
2614
    get_navi_link = get_a_navi_comicnavnext_navinext
2615
2616
    @classmethod
2617
    def get_comic_info(cls, soup, link):
2618
        """Get information about a particular comics."""
2619
        title = soup.find('h2', class_='post-title').string
2620
        date_str = soup.find("span", class_="post-date").string
2621
        day = string_to_date(date_str, "%B %d, %Y")
2622
        imgs = soup.find('div', id='comic').find_all('img')
2623
        return {
2624
            'title': title,
2625
            'img': [i['src'] for i in imgs],
2626
            'month': day.month,
2627
            'year': day.year,
2628
            'day': day.day,
2629
        }
2630 View Code Duplication
2631
2632
class BuniComic(GenericNavigableComic):
2633
    """Class to retrieve Buni Comics."""
2634
    name = 'buni'
2635
    long_name = 'BuniComics'
2636
    url = 'http://www.bunicomic.com'
2637
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2638
    get_navi_link = get_link_rel_next
2639
2640
    @classmethod
2641
    def get_comic_info(cls, soup, link):
2642
        """Get information about a particular comics."""
2643
        imgs = soup.find('div', id='comic').find_all('img')
2644
        assert all(i['alt'] == i['title'] for i in imgs)
2645
        assert len(imgs) == 1
2646
        return {
2647
            'img': [i['src'] for i in imgs],
2648
            'title': imgs[0]['title'],
2649
        }
2650
2651
2652
class GenericCommitStrip(GenericNavigableComic):
2653
    """Generic class to retrieve Commit Strips in different languages."""
2654
    get_navi_link = get_a_rel_next
2655
    get_first_comic_link = simulate_first_link
2656 View Code Duplication
    first_url = NotImplemented
2657
2658
    @classmethod
2659
    def get_comic_info(cls, soup, link):
2660
        """Get information about a particular comics."""
2661
        desc = soup.find('meta', property='og:description')['content']
2662
        title = soup.find('meta', property='og:title')['content']
2663
        imgs = soup.find('div', class_='entry-content').find_all('img')
2664
        title2 = ' '.join(i.get('title', '') for i in imgs)
2665
        return {
2666
            'title': title,
2667
            'title2': title2,
2668
            'description': desc,
2669
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2670
        }
2671
2672
2673
class CommitStripFr(GenericCommitStrip):
2674
    """Class to retrieve Commit Strips in French."""
2675
    name = 'commit_fr'
2676
    long_name = 'Commit Strip (Fr)'
2677
    url = 'http://www.commitstrip.com/fr'
2678
    _categories = ('FRANCAIS', )
2679
    first_url = 'http://www.commitstrip.com/fr/2012/02/22/interview/'
2680
2681
2682
class CommitStripEn(GenericCommitStrip):
2683
    """Class to retrieve Commit Strips in English."""
2684
    name = 'commit_en'
2685
    long_name = 'Commit Strip (En)'
2686
    url = 'http://www.commitstrip.com/en'
2687
    first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'
2688
2689
2690
class GenericBoumerie(GenericNavigableComic):
2691
    """Generic class to retrieve Boumeries comics in different languages."""
2692
    get_first_comic_link = get_a_navi_navifirst
2693
    get_navi_link = get_link_rel_next
2694
    date_format = NotImplemented
2695
    lang = NotImplemented
2696
2697
    @classmethod
2698
    def get_comic_info(cls, soup, link):
2699
        """Get information about a particular comics."""
2700
        title = soup.find('h2', class_='post-title').string
2701
        short_url = soup.find('link', rel='shortlink')['href']
2702
        author = soup.find("span", class_="post-author").find("a").string
2703
        date_str = soup.find('span', class_='post-date').string
2704
        day = string_to_date(date_str, cls.date_format, cls.lang)
2705
        imgs = soup.find('div', id='comic').find_all('img')
2706
        assert all(i['alt'] == i['title'] for i in imgs)
2707
        return {
2708
            'short_url': short_url,
2709
            'img': [i['src'] for i in imgs],
2710
            'title': title,
2711
            'author': author,
2712
            'month': day.month,
2713
            'year': day.year,
2714
            'day': day.day,
2715
        }
2716
2717
2718
class BoumerieEn(GenericBoumerie):
2719
    """Class to retrieve Boumeries comics in English."""
2720
    name = 'boumeries_en'
2721
    long_name = 'Boumeries (En)'
2722
    url = 'http://comics.boumerie.com'
2723
    date_format = "%B %d, %Y"
2724
    lang = 'en_GB.UTF-8'
2725
2726
2727
class BoumerieFr(GenericBoumerie):
2728
    """Class to retrieve Boumeries comics in French."""
2729
    name = 'boumeries_fr'
2730
    long_name = 'Boumeries (Fr)'
2731
    url = 'http://bd.boumerie.com'
2732
    _categories = ('FRANCAIS', )
2733
    date_format = "%A, %d %B %Y"
2734
    lang = "fr_FR.utf8"
2735
2736
2737
class UnearthedComics(GenericNavigableComic):
2738
    """Class to retrieve Unearthed comics."""
2739
    # Also on http://tapastic.com/series/UnearthedComics
2740
    # Also on http://unearthedcomics.tumblr.com
2741
    name = 'unearthed'
2742
    long_name = 'Unearthed Comics'
2743
    url = 'http://unearthedcomics.com'
2744
    _categories = ('UNEARTHED', )
2745
    get_navi_link = get_link_rel_next
2746
    get_first_comic_link = simulate_first_link
2747
    first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'
2748
2749
    @classmethod
2750
    def get_comic_info(cls, soup, link):
2751
        """Get information about a particular comics."""
2752
        short_url = soup.find('link', rel='shortlink')['href']
2753
        title_elt = soup.find('h1') or soup.find('h2')
2754
        title = title_elt.string if title_elt else ""
2755
        desc = soup.find('meta', property='og:description')
2756
        date_str = soup.find('time', class_='published updated hidden')['datetime']
2757
        day = string_to_date(date_str, "%Y-%m-%d")
2758
        post = soup.find('div', class_="entry content entry-content type-portfolio")
2759
        imgs = post.find_all('img')
2760
        return {
2761
            'title': title,
2762
            'description': desc,
2763
            'url2': short_url,
2764
            'img': [i['src'] for i in imgs],
2765
            'month': day.month,
2766
            'year': day.year,
2767
            'day': day.day,
2768
        }
2769
2770
2771
class Optipess(GenericNavigableComic):
2772
    """Class to retrieve Optipess comics."""
2773
    name = 'optipess'
2774
    long_name = 'Optipess'
2775
    url = 'http://www.optipess.com'
2776
    get_first_comic_link = get_a_navi_navifirst
2777
    get_navi_link = get_link_rel_next
2778
2779
    @classmethod
2780
    def get_comic_info(cls, soup, link):
2781
        """Get information about a particular comics."""
2782
        title = soup.find('h2', class_='post-title').string
2783
        author = soup.find("span", class_="post-author").find("a").string
2784
        comic = soup.find('div', id='comic')
2785
        imgs = comic.find_all('img') if comic else []
2786
        alt = imgs[0]['title'] if imgs else ""
2787
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2788
        date_str = soup.find('span', class_='post-date').string
2789
        day = string_to_date(date_str, "%B %d, %Y")
2790
        return {
2791
            'title': title,
2792
            'alt': alt,
2793
            'author': author,
2794
            'img': [i['src'] for i in imgs],
2795
            'month': day.month,
2796
            'year': day.year,
2797
            'day': day.day,
2798
        }
2799
2800
2801
class PainTrainComic(GenericNavigableComic):
2802
    """Class to retrieve Pain Train Comics."""
2803
    name = 'paintrain'
2804
    long_name = 'Pain Train Comics'
2805
    url = 'http://paintraincomic.com'
2806
    get_first_comic_link = get_a_navi_navifirst
2807
    get_navi_link = get_link_rel_next
2808
2809
    @classmethod
2810
    def get_comic_info(cls, soup, link):
2811
        """Get information about a particular comics."""
2812
        title = soup.find('h2', class_='post-title').string
2813
        short_url = soup.find('link', rel='shortlink')['href']
2814
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2815
        num = int(short_url_re.match(short_url).groups()[0])
2816
        imgs = soup.find('div', id='comic').find_all('img')
2817
        alt = imgs[0]['title']
2818
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2819
        date_str = soup.find('span', class_='post-date').string
2820
        day = string_to_date(date_str, "%d/%m/%Y")
2821
        return {
2822
            'short_url': short_url,
2823
            'num': num,
2824
            'img': [i['src'] for i in imgs],
2825
            'month': day.month,
2826
            'year': day.year,
2827
            'day': day.day,
2828
            'alt': alt,
2829
            'title': title,
2830
        }
2831
2832
2833
class MoonBeard(GenericNavigableComic):
2834
    """Class to retrieve MoonBeard comics."""
2835
    # Also on http://blog.squiresjam.es/moonbeard
2836
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
2837
    name = 'moonbeard'
2838
    long_name = 'Moon Beard'
2839
    url = 'http://moonbeard.com'
2840
    get_first_comic_link = get_a_navi_navifirst
2841
    get_navi_link = get_a_navi_navinext
2842
2843
    @classmethod
2844
    def get_comic_info(cls, soup, link):
2845
        """Get information about a particular comics."""
2846
        title = soup.find('h2', class_='post-title').string
2847
        short_url = soup.find('link', rel='shortlink')['href']
2848
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2849
        num = int(short_url_re.match(short_url).groups()[0])
2850
        imgs = soup.find('div', id='comic').find_all('img')
2851
        alt = imgs[0]['title']
2852
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2853
        date_str = soup.find('span', class_='post-date').string
2854
        day = string_to_date(date_str, "%B %d, %Y")
2855
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2856
        author = soup.find('span', class_='post-author').string
2857
        return {
2858
            'short_url': short_url,
2859
            'num': num,
2860
            'img': [i['src'] for i in imgs],
2861
            'month': day.month,
2862
            'year': day.year,
2863
            'day': day.day,
2864
            'title': title,
2865
            'tags': tags,
2866
            'alt': alt,
2867
            'author': author,
2868
        }
2869 View Code Duplication
2870
2871
class AHamADay(GenericNavigableComic):
2872
    """Class to retrieve class A Ham A Day comics."""
2873
    name = 'ham'
2874
    long_name = 'A Ham A Day'
2875
    url = 'http://www.ahammaday.com'
2876
    get_url_from_link = join_cls_url_to_href
2877
    get_first_comic_link = simulate_first_link
2878
    first_url = 'http://www.ahammaday.com/today/3/6/french'
2879
2880
    @classmethod
2881
    def get_navi_link(cls, last_soup, next_):
2882
        """Get link to next or previous comic."""
2883
        # prev is next / next is prev
2884
        return last_soup.find('li', class_='previous' if next_ else 'next').find('a')
2885
2886
    @classmethod
2887
    def get_comic_info(cls, soup, link):
2888
        """Get information about a particular comics."""
2889
        date_str = soup.find('time', class_='published')['datetime']
2890
        day = string_to_date(date_str, "%Y-%m-%d")
2891
        author = soup.find('span', class_='blog-author').find('a').string
2892
        title = soup.find('meta', property='og:title')['content']
2893
        imgs = soup.find_all('meta', itemprop='image')
2894
        return {
2895
            'img': [i['content'] for i in imgs],
2896
            'title': title,
2897
            'author': author,
2898
            'day': day.day,
2899
            'month': day.month,
2900
            'year': day.year,
2901
        }
2902
2903
2904
class LittleLifeLines(GenericNavigableComic):
2905
    """Class to retrieve Little Life Lines comics."""
2906
    # Also on https://little-life-lines.tumblr.com
2907
    name = 'life'
2908
    long_name = 'Little Life Lines'
2909
    url = 'http://www.littlelifelines.com'
2910
    get_url_from_link = join_cls_url_to_href
2911
    get_first_comic_link = simulate_first_link
2912
    first_url = 'http://www.littlelifelines.com/comics/well-done'
2913
2914
    @classmethod
2915
    def get_navi_link(cls, last_soup, next_):
2916
        """Get link to next or previous comic."""
2917
        # prev is next / next is prev
2918
        li = last_soup.find('li', class_='prev' if next_ else 'next')
2919
        return li.find('a') if li else None
2920
2921
    @classmethod
2922
    def get_comic_info(cls, soup, link):
2923
        """Get information about a particular comics."""
2924
        title = soup.find('meta', property='og:title')['content']
2925
        desc = soup.find('meta', property='og:description')['content']
2926
        date_str = soup.find('time', class_='published')['datetime']
2927
        day = string_to_date(date_str, "%Y-%m-%d")
2928
        author = soup.find('a', rel='author').string
2929
        div_content = soup.find('div', class_="body entry-content")
2930
        imgs = div_content.find_all('img')
2931
        imgs = [i for i in imgs if i.get('src') is not None]
2932
        alt = imgs[0]['alt']
2933
        return {
2934
            'title': title,
2935
            'alt': alt,
2936
            'description': desc,
2937
            'author': author,
2938
            'day': day.day,
2939
            'month': day.month,
2940
            'year': day.year,
2941
            'img': [i['src'] for i in imgs],
2942
        }
2943
2944
2945
class GenericWordPressInkblot(GenericNavigableComic):
2946
    """Generic class to retrieve comics using WordPress with Inkblot."""
2947
    get_navi_link = get_link_rel_next
2948
2949
    @classmethod
2950
    def get_first_comic_link(cls):
2951
        """Get link to first comics."""
2952 View Code Duplication
        return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')
2953
2954
    @classmethod
2955
    def get_comic_info(cls, soup, link):
2956
        """Get information about a particular comics."""
2957
        title = soup.find('meta', property='og:title')['content']
2958
        imgs = soup.find('div', class_='webcomic-image').find_all('img')
2959
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2960
        day = string_to_date(date_str, "%Y-%m-%d")
2961
        return {
2962
            'title': title,
2963
            'day': day.day,
2964
            'month': day.month,
2965
            'year': day.year,
2966
            'img': [i['src'] for i in imgs],
2967
        }
2968
2969
2970
class EverythingsStupid(GenericWordPressInkblot):
2971
    """Class to retrieve Everything's stupid Comics."""
2972
    # Also on http://tapastic.com/series/EverythingsStupid
2973
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
2974
    # Also on http://everythingsstupidcomics.tumblr.com
2975
    name = 'stupid'
2976
    long_name = "Everything's Stupid"
2977
    url = 'http://everythingsstupid.net'
2978
2979
2980
class TheIsmComics(GenericWordPressInkblot):
2981
    """Class to retrieve The Ism Comics."""
2982
    # Also on https://tapastic.com/series/TheIsm (?)
2983
    name = 'theism'
2984
    long_name = "The Ism"
2985
    url = 'http://www.theism-comics.com'
2986
2987
2988
class WoodenPlankStudios(GenericWordPressInkblot):
2989
    """Class to retrieve Wooden Plank Studios comics."""
2990
    name = 'woodenplank'
2991
    long_name = 'Wooden Plank Studios'
2992
    url = 'http://woodenplankstudios.com'
2993
2994
2995
class ElectricBunnyComic(GenericNavigableComic):
2996
    """Class to retrieve Electric Bunny Comics."""
2997
    # Also on http://electricbunnycomics.tumblr.com
2998
    name = 'bunny'
2999
    long_name = 'Electric Bunny Comic'
3000
    url = 'http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell'
3001
    get_url_from_link = join_cls_url_to_href
3002
3003
    @classmethod
3004
    def get_first_comic_link(cls):
3005
        """Get link to first comics."""
3006
        return get_soup_at_url(cls.url).find('img', alt='First').parent
3007
3008
    @classmethod
3009
    def get_navi_link(cls, last_soup, next_):
3010
        """Get link to next or previous comic."""
3011
        img = last_soup.find('img', alt='Next' if next_ else 'Back')
3012
        return img.parent if img else None
3013
3014
    @classmethod
3015
    def get_comic_info(cls, soup, link):
3016
        """Get information about a particular comics."""
3017
        title = soup.find('meta', property='og:title')['content']
3018
        imgs = soup.find_all('meta', property='og:image')
3019
        return {
3020
            'title': title,
3021
            'img': [i['content'] for i in imgs],
3022
        }
3023
3024
3025
class SheldonComics(GenericNavigableComic):
3026
    """Class to retrieve Sheldon comics."""
3027
    # Also on http://www.gocomics.com/sheldon
3028
    name = 'sheldon'
3029
    long_name = 'Sheldon Comics'
3030
    url = 'http://www.sheldoncomics.com'
3031
3032
    @classmethod
3033
    def get_first_comic_link(cls):
3034
        """Get link to first comics."""
3035
        return get_soup_at_url(cls.url).find("a", id="nav-first")
3036
3037
    @classmethod
3038
    def get_navi_link(cls, last_soup, next_):
3039
        """Get link to next or previous comic."""
3040
        for link in last_soup.find_all("a", id="nav-next" if next_ else "nav-prev"):
3041
            if link['href'] != 'http://www.sheldoncomics.com':
3042
                return link
3043
        return None
3044
3045
    @classmethod
3046
    def get_comic_info(cls, soup, link):
3047
        """Get information about a particular comics."""
3048
        imgs = soup.find("div", id="comic-foot").find_all("img")
3049
        assert all(i['alt'] == i['title'] for i in imgs)
3050
        assert len(imgs) == 1
3051
        title = imgs[0]['title']
3052
        return {
3053
            'title': title,
3054
            'img': [i['src'] for i in imgs],
3055
        }
3056
3057
3058
class Ubertool(GenericNavigableComic):
3059
    """Class to retrieve Ubertool comics."""
3060
    # Also on http://ubertool.tumblr.com
3061
    # Also on https://tapastic.com/series/ubertool
3062
    name = 'ubertool'
3063
    long_name = 'Ubertool'
3064
    url = 'http://ubertoolcomic.com'
3065
    _categories = ('UBERTOOL', )
3066
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
3067
    get_navi_link = get_a_comicnavbase_comicnavnext
3068
3069
    @classmethod
3070
    def get_comic_info(cls, soup, link):
3071
        """Get information about a particular comics."""
3072
        title = soup.find('h2', class_='post-title').string
3073
        date_str = soup.find('span', class_='post-date').string
3074
        day = string_to_date(date_str, "%B %d, %Y")
3075
        imgs = soup.find('div', id='comic').find_all('img')
3076
        return {
3077
            'img': [i['src'] for i in imgs],
3078
            'title': title,
3079
            'month': day.month,
3080
            'year': day.year,
3081
            'day': day.day,
3082
        }
3083
3084
3085
class EarthExplodes(GenericNavigableComic):
3086
    """Class to retrieve The Earth Explodes comics."""
3087
    name = 'earthexplodes'
3088
    long_name = 'The Earth Explodes'
3089
    url = 'http://www.earthexplodes.com'
3090
    get_url_from_link = join_cls_url_to_href
3091
    get_first_comic_link = simulate_first_link
3092
    first_url = 'http://www.earthexplodes.com/comics/000/'
3093
3094
    @classmethod
3095
    def get_navi_link(cls, last_soup, next_):
3096
        """Get link to next or previous comic."""
3097
        return last_soup.find('a', id='next' if next_ else 'prev')
3098
3099
    @classmethod
3100
    def get_comic_info(cls, soup, link):
3101
        """Get information about a particular comics."""
3102
        title = soup.find('title').string
3103
        imgs = soup.find('div', id='image').find_all('img')
3104
        alt = imgs[0].get('title', '')
3105
        return {
3106
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3107
            'title': title,
3108
            'alt': alt,
3109
        }
3110
3111
3112
class CubeDrone(GenericNavigableComic):
3113
    """Class to retrieve Cube Drone comics."""
3114
    name = 'cubedrone'
3115
    long_name = 'Cube Drone'
3116
    url = 'http://cube-drone.com/comics'
3117
    get_url_from_link = join_cls_url_to_href
3118
3119
    @classmethod
3120
    def get_first_comic_link(cls):
3121
        """Get link to first comics."""
3122
        return get_soup_at_url(cls.url).find('span', class_='glyphicon glyphicon-backward').parent
3123
3124 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3125
    def get_navi_link(cls, last_soup, next_):
3126
        """Get link to next or previous comic."""
3127
        class_ = 'glyphicon glyphicon-chevron-' + ('right' if next_ else 'left')
3128
        return last_soup.find('span', class_=class_).parent
3129
3130
    @classmethod
3131
    def get_comic_info(cls, soup, link):
3132
        """Get information about a particular comics."""
3133
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
3134
        url2 = soup.find('meta', attrs={'name': 'twitter:url'})['content']
3135
        # date_str = soup.find('h2', class_='comic_title').find('small').string
3136
        # day = string_to_date(date_str, "%B %d, %Y, %I:%M %p")
3137
        imgs = soup.find_all('img', class_='comic img-responsive')
3138
        title2 = imgs[0]['title']
3139
        alt = imgs[0]['alt']
3140
        return {
3141
            'url2': url2,
3142
            'title': title,
3143
            'title2': title2,
3144
            'alt': alt,
3145
            'img': [i['src'] for i in imgs],
3146
        }
3147
3148
3149
class MakeItStoopid(GenericNavigableComic):
3150
    """Class to retrieve Make It Stoopid Comics."""
3151
    name = 'stoopid'
3152
    long_name = 'Make it stoopid'
3153
    url = 'http://makeitstoopid.com/comic.php'
3154
3155
    @classmethod
3156
    def get_nav(cls, soup):
3157
        """Get the navigation elements from soup object."""
3158
        cnav = soup.find_all(class_='cnav')
3159
        nav1, nav2 = cnav[:5], cnav[5:]
3160
        assert nav1 == nav2
3161
        # begin, prev, archive, next_, end = nav1
3162
        return [None if i.get('href') is None else i for i in nav1]
3163
3164
    @classmethod
3165
    def get_first_comic_link(cls):
3166
        """Get link to first comics."""
3167
        return cls.get_nav(get_soup_at_url(cls.url))[0]
3168
3169
    @classmethod
3170
    def get_navi_link(cls, last_soup, next_):
3171
        """Get link to next or previous comic."""
3172
        return cls.get_nav(last_soup)[3 if next_ else 1]
3173
3174
    @classmethod
3175
    def get_comic_info(cls, soup, link):
3176
        """Get information about a particular comics."""
3177
        title = link['title']
3178
        imgs = soup.find_all('img', id='comicimg')
3179
        return {
3180
            'title': title,
3181
            'img': [i['src'] for i in imgs],
3182
        }
3183
3184
3185
class TuMourrasMoinsBete(GenericNavigableComic):
3186
    """Class to retrieve Tu Mourras Moins Bete comics."""
3187
    name = 'mourrasmoinsbete'
3188
    long_name = 'Tu Mourras Moins Bete'
3189
    url = 'http://tumourrasmoinsbete.blogspot.fr'
3190
    _categories = ('FRANCAIS', )
3191 View Code Duplication
    get_first_comic_link = simulate_first_link
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3192
    first_url = 'http://tumourrasmoinsbete.blogspot.fr/2008/06/essai.html'
3193
3194
    @classmethod
3195
    def get_navi_link(cls, last_soup, next_):
3196
        """Get link to next or previous comic."""
3197
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3198
3199
    @classmethod
3200
    def get_comic_info(cls, soup, link):
3201
        """Get information about a particular comics."""
3202
        title = soup.find('title').string
3203
        imgs = soup.find('div', itemprop='description articleBody').find_all('img')
3204
        author = soup.find('span', itemprop='author').string
3205
        return {
3206
            'img': [i['src'] for i in imgs],
3207
            'author': author,
3208
            'title': title,
3209
        }
3210
3211
3212
class GeekAndPoke(GenericNavigableComic):
3213
    """Class to retrieve Geek And Poke comics."""
3214
    name = 'geek'
3215
    long_name = 'Geek And Poke'
3216
    url = 'http://geek-and-poke.com'
3217
    get_url_from_link = join_cls_url_to_href
3218
    get_first_comic_link = simulate_first_link
3219
    first_url = 'http://geek-and-poke.com/geekandpoke/2006/8/27/a-new-place-for-a-not-so-old-blog.html'
3220
3221
    @classmethod
3222
    def get_navi_link(cls, last_soup, next_):
3223
        """Get link to next or previous comic."""
3224
        return last_soup.find('a', class_='prev-item' if next_ else 'next-item')
3225
3226
    @classmethod
3227
    def get_comic_info(cls, soup, link):
3228
        """Get information about a particular comics."""
3229
        title = soup.find('meta', property='og:title')['content']
3230
        desc = soup.find('meta', property='og:description')['content']
3231
        date_str = soup.find('time', class_='published')['datetime']
3232
        day = string_to_date(date_str, "%Y-%m-%d")
3233
        author = soup.find('a', rel='author').string
3234
        div_content = (soup.find('div', class_="body entry-content") or
3235
                       soup.find('div', class_="special-content"))
3236
        imgs = div_content.find_all('img')
3237
        imgs = [i for i in imgs if i.get('src') is not None]
3238
        assert all('title' not in i or i['alt'] == i['title'] for i in imgs)
3239
        alt = imgs[0].get('alt', "") if imgs else []
3240
        return {
3241
            'title': title,
3242
            'alt': alt,
3243
            'description': desc,
3244
            'author': author,
3245
            'day': day.day,
3246
            'month': day.month,
3247
            'year': day.year,
3248
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3249
        }
3250
3251
3252
class GloryOwlComix(GenericNavigableComic):
3253
    """Class to retrieve Glory Owl comics."""
3254
    name = 'gloryowl'
3255
    long_name = 'Glory Owl'
3256
    url = 'http://gloryowlcomix.blogspot.fr'
3257
    _categories = ('NSFW', 'FRANCAIS')
3258
    get_first_comic_link = simulate_first_link
3259
    first_url = 'http://gloryowlcomix.blogspot.fr/2013/02/1_7.html'
3260
3261
    @classmethod
3262
    def get_navi_link(cls, last_soup, next_):
3263
        """Get link to next or previous comic."""
3264
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3265
3266
    @classmethod
3267
    def get_comic_info(cls, soup, link):
3268
        """Get information about a particular comics."""
3269
        title = soup.find('title').string
3270
        imgs = soup.find_all('link', rel='image_src')
3271
        author = soup.find('a', rel='author').string
3272
        return {
3273
            'img': [i['href'] for i in imgs],
3274
            'author': author,
3275
            'title': title,
3276
        }
3277
3278
3279
class GenericTumblrV1(GenericComic):
3280
    """Generic class to retrieve comics from Tumblr using the V1 API."""
3281
    _categories = ('TUMBLR', )
3282
3283
    @classmethod
3284
    def get_next_comic(cls, last_comic):
3285
        """Generic implementation of get_next_comic for Tumblr comics."""
3286
        for p in cls.get_posts(last_comic):
3287
            comic = cls.get_comic_info(p)
3288
            if comic is not None:
3289
                yield comic
3290
3291
    @classmethod
3292
    def get_url_from_post(cls, post):
3293
        return post['url']
3294
3295
    @classmethod
3296
    def get_api_url(cls):
3297
        return urljoin_wrapper(cls.url, '/api/read/')
3298
3299
    @classmethod
3300
    def get_comic_info(cls, post):
3301
        """Get information about a particular comics."""
3302
        type_ = post['type']
3303
        if type_ != 'photo':
3304
            return None
3305
        tumblr_id = int(post['id'])
3306
        api_url = cls.get_api_url() + '?id=%d' % (tumblr_id)
3307
        day = datetime.datetime.fromtimestamp(int(post['unix-timestamp'])).date()
3308
        caption = post.find('photo-caption')
3309
        title = caption.string if caption else ""
3310
        tags = ' '.join(t.string for t in post.find_all('tag'))
3311
        # Photos may appear in 'photo' tags and/or straight in the post
3312
        photo_tags = post.find_all('photo')
3313
        if not photo_tags:
3314
            photo_tags = [post]
3315
        # Images are in multiple resolutions - taking the first one
3316
        imgs = [photo.find('photo-url') for photo in photo_tags]
3317
        return {
3318
            'url': cls.get_url_from_post(post),
3319
            'url2': post['url-with-slug'],
3320
            'day': day.day,
3321
            'month': day.month,
3322
            'year': day.year,
3323
            'title': title,
3324
            'tags': tags,
3325
            'img': [i.string for i in imgs],
3326
            'tumblr-id': tumblr_id,
3327
            'api_url': api_url,
3328
        }
3329
3330
    @classmethod
3331
    def get_posts(cls, last_comic, nb_post_per_call=10):
3332
        """Get posts using API. nb_post_per_call is max 50.
3333
3334
        Posts are retrieved from newer to older as per the tumblr v1 api
3335
        but are returned in chronological order."""
3336
        waiting_for_url = last_comic['url'] if last_comic else None
3337
        posts_acc = []
3338
        if last_comic is not None:
3339
            # Sometimes, tumblr posts are deleted. When previous post is deleted, we
3340
            # might end up spending a lot of time looking for something that
3341
            # doesn't exist. Failing early and clearly might be a better option.
3342
            last_api_url = last_comic['api_url']
3343
            try:
3344
                get_soup_at_url(last_api_url)
3345
            except urllib.error.HTTPError:
3346
                try:
3347
                    get_soup_at_url(cls.url)
3348
                except urllib.error.HTTPError:
3349
                    print("Did not find previous post nor main url %s" % cls.url)
3350
                else:
3351
                    print("Did not find previous post %s : it might have been deleted" % last_api_url)
3352
                return reversed(posts_acc)
3353
        api_url = cls.get_api_url()
3354
        posts = get_soup_at_url(api_url).find('posts')
3355
        start, total = int(posts['start']), int(posts['total'])
3356
        assert start == 0
3357
        for starting_num in range(0, total, nb_post_per_call):
3358
            api_url2 = api_url + '?start=%d&num=%d' % (starting_num, nb_post_per_call)
3359
            posts2 = get_soup_at_url(api_url2).find('posts')
3360
            start2, total2 = int(posts2['start']), int(posts2['total'])
3361
            assert starting_num == start2, "%d != %d" % (starting_num, start2)
3362
            # This may happen and should be handled in the future
3363
            assert total == total2, "%d != %d" % (total, total2)
3364
            for p in posts2.find_all('post'):
3365
                if waiting_for_url and waiting_for_url == cls.get_url_from_post(p):
3366
                    return reversed(posts_acc)
3367
                posts_acc.append(p)
3368
        if waiting_for_url is None:
3369
            return reversed(posts_acc)
3370
        print("Did not find %s : there might be a problem" % waiting_for_url)
3371
        return []
3372
3373
3374
class SaturdayMorningBreakfastCerealTumblr(GenericTumblrV1):
3375
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
3376
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
3377
    # Also on http://www.smbc-comics.com
3378
    name = 'smbc-tumblr'
3379
    long_name = 'Saturday Morning Breakfast Cereal (from Tumblr)'
3380
    url = 'http://smbc-comics.tumblr.com'
3381
    _categories = ('SMBC', )
3382
3383
3384
class IrwinCardozo(GenericTumblrV1):
3385
    """Class to retrieve Irwin Cardozo Comics."""
3386
    name = 'irwinc'
3387
    long_name = 'Irwin Cardozo'
3388
    url = 'http://irwincardozocomics.tumblr.com'
3389
3390
3391
class AccordingToDevin(GenericTumblrV1):
3392
    """Class to retrieve According To Devin comics."""
3393
    name = 'devin'
3394
    long_name = 'According To Devin'
3395
    url = 'http://accordingtodevin.tumblr.com'
3396
3397
3398
class ItsTheTieTumblr(GenericTumblrV1):
3399
    """Class to retrieve It's the tie comics."""
3400
    # Also on http://itsthetie.com
3401
    # Also on https://tapastic.com/series/itsthetie
3402
    name = 'tie-tumblr'
3403
    long_name = "It's the tie (from Tumblr)"
3404
    url = "http://itsthetie.tumblr.com"
3405
    _categories = ('TIE', )
3406
3407
3408
class OctopunsTumblr(GenericTumblrV1):
3409
    """Class to retrieve Octopuns comics."""
3410
    # Also on http://www.octopuns.net
3411
    name = 'octopuns-tumblr'
3412
    long_name = 'Octopuns (from Tumblr)'
3413
    url = 'http://octopuns.tumblr.com'
3414
3415
3416
class PicturesInBoxesTumblr(GenericTumblrV1):
3417
    """Class to retrieve Pictures In Boxes comics."""
3418
    # Also on http://www.picturesinboxes.com
3419
    name = 'picturesinboxes-tumblr'
3420
    long_name = 'Pictures in Boxes (from Tumblr)'
3421
    url = 'http://picturesinboxescomic.tumblr.com'
3422
3423
3424
class TubeyToonsTumblr(GenericTumblrV1):
3425
    """Class to retrieve TubeyToons comics."""
3426
    # Also on http://tapastic.com/series/Tubey-Toons
3427
    # Also on http://tubeytoons.com
3428
    name = 'tubeytoons-tumblr'
3429
    long_name = 'Tubey Toons (from Tumblr)'
3430
    url = 'http://tubeytoons.tumblr.com'
3431
    _categories = ('TUNEYTOONS', )
3432
3433
3434
class UnearthedComicsTumblr(GenericTumblrV1):
3435
    """Class to retrieve Unearthed comics."""
3436
    # Also on http://tapastic.com/series/UnearthedComics
3437
    # Also on http://unearthedcomics.com
3438
    name = 'unearthed-tumblr'
3439
    long_name = 'Unearthed Comics (from Tumblr)'
3440
    url = 'http://unearthedcomics.tumblr.com'
3441
    _categories = ('UNEARTHED', )
3442
3443
3444
class PieComic(GenericTumblrV1):
3445
    """Class to retrieve Pie Comic comics."""
3446
    name = 'pie'
3447
    long_name = 'Pie Comic'
3448
    url = "http://piecomic.tumblr.com"
3449
3450
3451
class MrEthanDiamond(GenericTumblrV1):
3452
    """Class to retrieve Mr Ethan Diamond comics."""
3453
    name = 'diamond'
3454
    long_name = 'Mr Ethan Diamond'
3455
    url = 'http://mrethandiamond.tumblr.com'
3456
3457
3458
class Flocci(GenericTumblrV1):
3459
    """Class to retrieve floccinaucinihilipilification comics."""
3460
    name = 'flocci'
3461
    long_name = 'floccinaucinihilipilification'
3462
    url = "http://floccinaucinihilipilificationa.tumblr.com"
3463
3464
3465
class UpAndOut(GenericTumblrV1):
3466
    """Class to retrieve Up & Out comics."""
3467
    # Also on http://tapastic.com/series/UP-and-OUT
3468
    name = 'upandout'
3469
    long_name = 'Up And Out (from Tumblr)'
3470
    url = 'http://upandoutcomic.tumblr.com'
3471
3472
3473
class Pundemonium(GenericTumblrV1):
3474
    """Class to retrieve Pundemonium comics."""
3475
    name = 'pundemonium'
3476
    long_name = 'Pundemonium'
3477
    url = 'http://monstika.tumblr.com'
3478
3479
3480
class PoorlyDrawnLinesTumblr(GenericTumblrV1):
3481
    """Class to retrieve Poorly Drawn Lines comics."""
3482
    # Also on http://poorlydrawnlines.com
3483
    name = 'poorlydrawn-tumblr'
3484
    long_name = 'Poorly Drawn Lines (from Tumblr)'
3485
    url = 'http://pdlcomics.tumblr.com'
3486
    _categories = ('POORLYDRAWN', )
3487
3488
3489
class PearShapedComics(GenericTumblrV1):
3490
    """Class to retrieve Pear Shaped Comics."""
3491
    name = 'pearshaped'
3492
    long_name = 'Pear-Shaped Comics'
3493
    url = 'http://pearshapedcomics.com'
3494
3495
3496
class PondScumComics(GenericTumblrV1):
3497
    """Class to retrieve Pond Scum Comics."""
3498
    name = 'pond'
3499
    long_name = 'Pond Scum'
3500
    url = 'http://pondscumcomic.tumblr.com'
3501
3502
3503
class MercworksTumblr(GenericTumblrV1):
3504
    """Class to retrieve Mercworks comics."""
3505
    # Also on http://mercworks.net
3506
    name = 'mercworks-tumblr'
3507
    long_name = 'Mercworks (from Tumblr)'
3508
    url = 'http://mercworks.tumblr.com'
3509
3510
3511
class OwlTurdTumblr(GenericTumblrV1):
3512
    """Class to retrieve Owl Turd comics."""
3513
    # Also on http://tapastic.com/series/Owl-Turd-Comix
3514
    name = 'owlturd-tumblr'
3515
    long_name = 'Owl Turd (from Tumblr)'
3516
    url = 'http://owlturd.com'
3517
    _categories = ('OWLTURD', )
3518
3519
3520
class VectorBelly(GenericTumblrV1):
3521
    """Class to retrieve Vector Belly comics."""
3522
    # Also on http://vectorbelly.com
3523
    name = 'vector'
3524
    long_name = 'Vector Belly'
3525
    url = 'http://vectorbelly.tumblr.com'
3526
3527
3528
class GoneIntoRapture(GenericTumblrV1):
3529
    """Class to retrieve Gone Into Rapture comics."""
3530
    # Also on http://goneintorapture.tumblr.com
3531
    # Also on http://tapastic.com/series/Goneintorapture
3532
    name = 'rapture'
3533
    long_name = 'Gone Into Rapture'
3534
    url = 'http://www.goneintorapture.com'
3535
3536
3537
class TheOatmealTumblr(GenericTumblrV1):
3538
    """Class to retrieve The Oatmeal comics."""
3539
    # Also on http://theoatmeal.com
3540
    name = 'oatmeal-tumblr'
3541
    long_name = 'The Oatmeal (from Tumblr)'
3542
    url = 'http://oatmeal.tumblr.com'
3543
3544
3545
class HeckIfIKnowComicsTumblr(GenericTumblrV1):
3546
    """Class to retrieve Heck If I Know Comics."""
3547
    # Also on http://tapastic.com/series/Regular
3548
    name = 'heck-tumblr'
3549
    long_name = 'Heck if I Know comics (from Tumblr)'
3550
    url = 'http://heckifiknowcomics.com'
3551
3552
3553
class MyJetPack(GenericTumblrV1):
3554
    """Class to retrieve My Jet Pack comics."""
3555
    name = 'jetpack'
3556
    long_name = 'My Jet Pack'
3557
    url = 'http://myjetpack.tumblr.com'
3558
3559
3560
class CheerUpEmoKidTumblr(GenericTumblrV1):
3561
    """Class to retrieve CheerUpEmoKid comics."""
3562
    # Also on http://www.cheerupemokid.com
3563
    # Also on http://tapastic.com/series/CUEK
3564
    name = 'cuek-tumblr'
3565
    long_name = 'Cheer Up Emo Kid (from Tumblr)'
3566
    url = 'http://enzocomics.tumblr.com'
3567
3568
3569
class ForLackOfABetterComic(GenericTumblrV1):
3570
    """Class to retrieve For Lack Of A Better Comics."""
3571
    # Also on http://forlackofabettercomic.com
3572
    name = 'lack'
3573
    long_name = 'For Lack Of A Better Comic'
3574
    url = 'http://forlackofabettercomic.tumblr.com'
3575
3576
3577
class ZenPencilsTumblr(GenericTumblrV1):
3578
    """Class to retrieve ZenPencils comics."""
3579
    # Also on http://zenpencils.com
3580
    # Also on http://www.gocomics.com/zen-pencils
3581
    name = 'zenpencils-tumblr'
3582
    long_name = 'Zen Pencils (from Tumblr)'
3583
    url = 'http://zenpencils.tumblr.com'
3584
    _categories = ('ZENPENCILS', )
3585
3586
3587
class ThreeWordPhraseTumblr(GenericTumblrV1):
3588
    """Class to retrieve Three Word Phrase comics."""
3589
    # Also on http://threewordphrase.com
3590
    name = 'threeword-tumblr'
3591
    long_name = 'Three Word Phrase (from Tumblr)'
3592
    url = 'http://www.threewordphrase.tumblr.com'
3593
3594
3595
class TimeTrabbleTumblr(GenericTumblrV1):
3596
    """Class to retrieve Time Trabble comics."""
3597
    # Also on http://timetrabble.com
3598
    name = 'timetrabble-tumblr'
3599
    long_name = 'Time Trabble (from Tumblr)'
3600
    url = 'http://timetrabble.tumblr.com'
3601
3602
3603
class SafelyEndangeredTumblr(GenericTumblrV1):
3604
    """Class to retrieve Safely Endangered comics."""
3605
    # Also on http://www.safelyendangered.com
3606
    name = 'endangered-tumblr'
3607
    long_name = 'Safely Endangered (from Tumblr)'
3608
    url = 'http://tumblr.safelyendangered.com'
3609
3610
3611
class MouseBearComedyTumblr(GenericTumblrV1):
3612
    """Class to retrieve Mouse Bear Comedy comics."""
3613
    # Also on http://www.mousebearcomedy.com
3614
    name = 'mousebear-tumblr'
3615
    long_name = 'Mouse Bear Comedy (from Tumblr)'
3616
    url = 'http://mousebearcomedy.tumblr.com'
3617
3618
3619
class BouletCorpTumblr(GenericTumblrV1):
3620
    """Class to retrieve BouletCorp comics."""
3621
    # Also on http://www.bouletcorp.com
3622
    name = 'boulet-tumblr'
3623
    long_name = 'Boulet Corp (from Tumblr)'
3624
    url = 'http://bouletcorp.tumblr.com'
3625
    _categories = ('BOULET', )
3626
3627
3628
class TheAwkwardYetiTumblr(GenericTumblrV1):
3629
    """Class to retrieve The Awkward Yeti comics."""
3630
    # Also on http://www.gocomics.com/the-awkward-yeti
3631
    # Also on http://theawkwardyeti.com
3632
    # Also on https://tapastic.com/series/TheAwkwardYeti
3633
    name = 'yeti-tumblr'
3634
    long_name = 'The Awkward Yeti (from Tumblr)'
3635
    url = 'http://larstheyeti.tumblr.com'
3636
    _categories = ('YETI', )
3637
3638
3639
class NellucNhoj(GenericTumblrV1):
3640
    """Class to retrieve NellucNhoj comics."""
3641
    name = 'nhoj'
3642
    long_name = 'Nelluc Nhoj'
3643
    url = 'http://nellucnhoj.com'
3644
3645
3646
class DownTheUpwardSpiralTumblr(GenericTumblrV1):
3647
    """Class to retrieve Down The Upward Spiral comics."""
3648
    # Also on http://www.downtheupwardspiral.com
3649
    name = 'spiral-tumblr'
3650
    long_name = 'Down the Upward Spiral (from Tumblr)'
3651
    url = 'http://downtheupwardspiral.tumblr.com'
3652
3653
3654
class AsPerUsualTumblr(GenericTumblrV1):
3655
    """Class to retrieve As Per Usual comics."""
3656
    # Also on https://tapastic.com/series/AsPerUsual
3657
    name = 'usual-tumblr'
3658
    long_name = 'As Per Usual (from Tumblr)'
3659
    url = 'http://as-per-usual.tumblr.com'
3660
    categories = ('DAMILEE', )
3661
3662
3663
class HotComicsForCoolPeopleTumblr(GenericTumblrV1):
3664
    """Class to retrieve Hot Comics For Cool People."""
3665
    # Also on https://tapastic.com/series/Hot-Comics-For-Cool-People
3666
    # Also on http://hotcomics.biz (links to tumblr)
3667
    # Also on http://hcfcp.com (links to tumblr)
3668
    name = 'hotcomics-tumblr'
3669
    long_name = 'Hot Comics For Cool People (from Tumblr)'
3670
    url = 'http://hotcomicsforcoolpeople.tumblr.com'
3671
    categories = ('DAMILEE', )
3672
3673
3674
class OneOneOneOneComicTumblr(GenericTumblrV1):
3675
    """Class to retrieve 1111 Comics."""
3676
    # Also on http://www.1111comics.me
3677
    # Also on https://tapastic.com/series/1111-Comics
3678
    name = '1111-tumblr'
3679
    long_name = '1111 Comics (from Tumblr)'
3680
    url = 'http://comics1111.tumblr.com'
3681
    _categories = ('ONEONEONEONE', )
3682
3683
3684
class JhallComicsTumblr(GenericTumblrV1):
3685
    """Class to retrieve Jhall Comics."""
3686
    # Also on http://jhallcomics.com
3687
    name = 'jhall-tumblr'
3688
    long_name = 'Jhall Comics (from Tumblr)'
3689
    url = 'http://jhallcomics.tumblr.com'
3690
3691
3692
class BerkeleyMewsTumblr(GenericTumblrV1):
3693
    """Class to retrieve Berkeley Mews comics."""
3694
    # Also on http://www.gocomics.com/berkeley-mews
3695
    # Also on http://www.berkeleymews.com
3696
    name = 'berkeley-tumblr'
3697
    long_name = 'Berkeley Mews (from Tumblr)'
3698
    url = 'http://mews.tumblr.com'
3699
    _categories = ('BERKELEY', )
3700
3701
3702
class JoanCornellaTumblr(GenericTumblrV1):
3703
    """Class to retrieve Joan Cornella comics."""
3704
    # Also on http://joancornella.net
3705
    name = 'cornella-tumblr'
3706
    long_name = 'Joan Cornella (from Tumblr)'
3707
    url = 'http://cornellajoan.tumblr.com'
3708
3709
3710
class RespawnComicTumblr(GenericTumblrV1):
3711
    """Class to retrieve Respawn Comic."""
3712
    # Also on http://respawncomic.com
3713
    name = 'respawn-tumblr'
3714
    long_name = 'Respawn Comic (from Tumblr)'
3715
    url = 'http://respawncomic.tumblr.com'
3716
3717
3718
class ChrisHallbeckTumblr(GenericTumblrV1):
3719
    """Class to retrieve Chris Hallbeck comics."""
3720
    # Also on https://tapastic.com/ChrisHallbeck
3721
    # Also on http://maximumble.com
3722
    # Also on http://minimumble.com
3723
    # Also on http://thebookofbiff.com
3724
    name = 'hallbeck-tumblr'
3725
    long_name = 'Chris Hallback (from Tumblr)'
3726
    url = 'http://chrishallbeck.tumblr.com'
3727
    _categories = ('HALLBACK', )
3728
3729
3730
class ComicNuggets(GenericTumblrV1):
3731
    """Class to retrieve Comic Nuggets."""
3732
    name = 'nuggets'
3733
    long_name = 'Comic Nuggets'
3734
    url = 'http://comicnuggets.com'
3735
3736
3737
class PigeonGazetteTumblr(GenericTumblrV1):
3738
    """Class to retrieve The Pigeon Gazette comics."""
3739
    # Also on https://tapastic.com/series/The-Pigeon-Gazette
3740
    name = 'pigeon-tumblr'
3741
    long_name = 'The Pigeon Gazette (from Tumblr)'
3742
    url = 'http://thepigeongazette.tumblr.com'
3743
3744
3745
class CancerOwl(GenericTumblrV1):
3746
    """Class to retrieve Cancer Owl comics."""
3747
    # Also on http://cancerowl.com
3748
    name = 'cancerowl-tumblr'
3749
    long_name = 'Cancer Owl (from Tumblr)'
3750
    url = 'http://cancerowl.tumblr.com'
3751
3752
3753
class FowlLanguageTumblr(GenericTumblrV1):
3754
    """Class to retrieve Fowl Language comics."""
3755
    # Also on http://www.fowllanguagecomics.com
3756
    # Also on http://tapastic.com/series/Fowl-Language-Comics
3757
    # Also on http://www.gocomics.com/fowl-language
3758
    name = 'fowllanguage-tumblr'
3759
    long_name = 'Fowl Language Comics (from Tumblr)'
3760
    url = 'http://fowllanguagecomics.tumblr.com'
3761
    _categories = ('FOWLLANGUAGE', )
3762
3763
3764
class TheOdd1sOutTumblr(GenericTumblrV1):
3765
    """Class to retrieve The Odd 1s Out comics."""
3766
    # Also on http://theodd1sout.com
3767
    # Also on https://tapastic.com/series/Theodd1sout
3768
    name = 'theodd-tumblr'
3769
    long_name = 'The Odd 1s Out (from Tumblr)'
3770
    url = 'http://theodd1sout.tumblr.com'
3771
3772
3773
class TheUnderfoldTumblr(GenericTumblrV1):
3774
    """Class to retrieve The Underfold comics."""
3775
    # Also on http://theunderfold.com
3776
    name = 'underfold-tumblr'
3777
    long_name = 'The Underfold (from Tumblr)'
3778
    url = 'http://theunderfold.tumblr.com'
3779
3780
3781
class LolNeinTumblr(GenericTumblrV1):
3782
    """Class to retrieve Lol Nein comics."""
3783
    # Also on http://lolnein.com
3784
    name = 'lolnein-tumblr'
3785
    long_name = 'Lol Nein (from Tumblr)'
3786
    url = 'http://lolneincom.tumblr.com'
3787
3788
3789
class FatAwesomeComicsTumblr(GenericTumblrV1):
3790
    """Class to retrieve Fat Awesome Comics."""
3791
    # Also on http://fatawesome.com/comics
3792
    name = 'fatawesome-tumblr'
3793
    long_name = 'Fat Awesome (from Tumblr)'
3794
    url = 'http://fatawesomecomedy.tumblr.com'
3795
3796
3797
class TheWorldIsFlatTumblr(GenericTumblrV1):
3798
    """Class to retrieve The World Is Flat Comics."""
3799
    # Also on https://tapastic.com/series/The-World-is-Flat
3800
    name = 'flatworld-tumblr'
3801
    long_name = 'The World Is Flat (from Tumblr)'
3802
    url = 'http://theworldisflatcomics.tumblr.com'
3803
3804
3805
class DorrisMc(GenericTumblrV1):
3806
    """Class to retrieve Dorris Mc Comics"""
3807
    # Also on http://www.gocomics.com/dorris-mccomics
3808
    name = 'dorrismc'
3809
    long_name = 'Dorris Mc'
3810
    url = 'http://dorrismccomics.com'
3811
3812
3813
class LeleozTumblr(GenericEmptyComic, GenericTumblrV1):
3814
    """Class to retrieve Leleoz comics."""
3815
    # Also on https://tapastic.com/series/Leleoz
3816
    name = 'leleoz-tumblr'
3817
    long_name = 'Leleoz (from Tumblr)'
3818
    url = 'http://leleozcomics.tumblr.com'
3819
3820
3821
class MoonBeardTumblr(GenericTumblrV1):
3822
    """Class to retrieve MoonBeard comics."""
3823
    # Also on http://moonbeard.com
3824
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
3825
    name = 'moonbeard-tumblr'
3826
    long_name = 'Moon Beard (from Tumblr)'
3827
    url = 'http://blog.squiresjam.es/moonbeard'
3828
3829
3830
class AComik(GenericTumblrV1):
3831
    """Class to retrieve A Comik"""
3832
    name = 'comik'
3833
    long_name = 'A Comik'
3834
    url = 'http://acomik.com'
3835
3836
3837
class ClassicRandy(GenericTumblrV1):
3838
    """Class to retrieve Classic Randy comics."""
3839
    name = 'randy'
3840
    long_name = 'Classic Randy'
3841
    url = 'http://classicrandy.tumblr.com'
3842
3843
3844
class DagssonTumblr(GenericTumblrV1):
3845
    """Class to retrieve Dagsson comics."""
3846
    # Also on http://www.dagsson.com
3847
    name = 'dagsson-tumblr'
3848
    long_name = 'Dagsson Hugleikur (from Tumblr)'
3849
    url = 'http://hugleikurdagsson.tumblr.com'
3850
3851
3852
class LinsEditionsTumblr(GenericTumblrV1):
3853
    """Class to retrieve L.I.N.S. Editions comics."""
3854
    # Also on https://linsedition.com
3855
    # Now on http://warandpeas.tumblr.com
3856
    name = 'lins-tumblr'
3857
    long_name = 'L.I.N.S. Editions (from Tumblr)'
3858
    url = 'http://linscomics.tumblr.com'
3859
    _categories = ('LINS', )
3860
3861
3862
class WarAndPeasTumblr(GenericTumblrV1):
3863
    """Class to retrieve War And Peas comics."""
3864
    # Was on http://linscomics.tumblr.com
3865
    name = 'warandpeas-tumblr'
3866
    long_name = 'War And Peas (from Tumblr)'
3867
    url = 'http://warandpeas.tumblr.com'
3868
    _categories = ('WARANDPEAS', )
3869
3870
3871
class OrigamiHotDish(GenericTumblrV1):
3872
    """Class to retrieve Origami Hot Dish comics."""
3873
    name = 'origamihotdish'
3874
    long_name = 'Origami Hot Dish'
3875
    url = 'http://origamihotdish.com'
3876
3877
3878
class HitAndMissComicsTumblr(GenericTumblrV1):
3879
    """Class to retrieve Hit and Miss Comics."""
3880
    name = 'hitandmiss'
3881
    long_name = 'Hit and Miss Comics'
3882
    url = 'http://hitandmisscomics.tumblr.com'
3883
3884
3885
class HMBlanc(GenericTumblrV1):
3886
    """Class to retrieve HM Blanc comics."""
3887
    name = 'hmblanc'
3888
    long_name = 'HM Blanc'
3889
    url = 'http://hmblanc.tumblr.com'
3890
3891
3892
class TalesOfAbsurdityTumblr(GenericTumblrV1):
3893
    """Class to retrieve Tales Of Absurdity comics."""
3894
    # Also on http://talesofabsurdity.com
3895
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
3896
    name = 'absurdity-tumblr'
3897
    long_name = 'Tales of Absurdity (from Tumblr)'
3898
    url = 'http://talesofabsurdity.tumblr.com'
3899
    _categories = ('ABSURDITY', )
3900
3901
3902
class RobbieAndBobby(GenericTumblrV1):
3903
    """Class to retrieve Robbie And Bobby comics."""
3904
    # Also on http://robbieandbobby.com
3905
    name = 'robbie-tumblr'
3906
    long_name = 'Robbie And Bobby (from Tumblr)'
3907
    url = 'http://robbieandbobby.tumblr.com'
3908
3909
3910
class ElectricBunnyComicTumblr(GenericTumblrV1):
3911
    """Class to retrieve Electric Bunny Comics."""
3912
    # Also on http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell
3913
    name = 'bunny-tumblr'
3914
    long_name = 'Electric Bunny Comic (from Tumblr)'
3915
    url = 'http://electricbunnycomics.tumblr.com'
3916
3917
3918
class Hoomph(GenericTumblrV1):
3919
    """Class to retrieve Hoomph comics."""
3920
    name = 'hoomph'
3921
    long_name = 'Hoomph'
3922
    url = 'http://hoom.ph'
3923
3924
3925
class BFGFSTumblr(GenericTumblrV1):
3926
    """Class to retrieve BFGFS comics."""
3927
    # Also on https://tapastic.com/series/BFGFS
3928
    # Also on http://bfgfs.com
3929
    name = 'bfgfs-tumblr'
3930
    long_name = 'BFGFS (from Tumblr)'
3931
    url = 'http://bfgfs.tumblr.com'
3932
3933
3934
class DoodleForFood(GenericTumblrV1):
3935
    """Class to retrieve Doodle For Food comics."""
3936
    # Also on http://doodleforfood.com
3937
    name = 'doodle'
3938
    long_name = 'Doodle For Food'
3939
    url = 'http://doodleforfood.com'
3940
3941
3942
class CassandraCalinTumblr(GenericTumblrV1):
3943
    """Class to retrieve C. Cassandra comics."""
3944
    # Also on http://cassandracalin.com
3945
    # Also on https://tapastic.com/series/C-Cassandra-comics
3946
    name = 'cassandra-tumblr'
3947
    long_name = 'Cassandra Calin (from Tumblr)'
3948
    url = 'http://c-cassandra.tumblr.com'
3949
3950
3951
class DougWasTaken(GenericTumblrV1):
3952
    """Class to retrieve Doug Was Taken comics."""
3953
    name = 'doug'
3954
    long_name = 'Doug Was Taken'
3955
    url = 'http://dougwastaken.tumblr.com'
3956
3957
3958
class MandatoryRollerCoaster(GenericTumblrV1):
3959
    """Class to retrieve Mandatory Roller Coaster comics."""
3960
    name = 'rollercoaster'
3961
    long_name = 'Mandatory Roller Coaster'
3962
    url = 'http://mandatoryrollercoaster.com'
3963
3964
3965
class CEstPasEnRegardantSesPompes(GenericTumblrV1):
3966
    """Class to retrieve C'Est Pas En Regardant Ses Pompes (...)  comics."""
3967
    name = 'cperspqccltt'
3968
    long_name = 'C Est Pas En Regardant Ses Pompes (...)'
3969
    url = 'http://cperspqccltt.tumblr.com'
3970
3971
3972
class TheGrohlTroll(GenericTumblrV1):
3973
    """Class to retrieve The Grohl Troll comics."""
3974
    name = 'grohltroll'
3975
    long_name = 'The Grohl Troll'
3976
    url = 'http://thegrohltroll.com'
3977
3978
3979
class WebcomicName(GenericTumblrV1):
3980
    """Class to retrieve Webcomic Name comics."""
3981
    name = 'webcomicname'
3982
    long_name = 'Webcomic Name'
3983
    url = 'http://webcomicname.com'
3984
3985
3986
class BooksOfAdam(GenericTumblrV1):
3987
    """Class to retrieve Books of Adam comics."""
3988
    # Also on http://www.booksofadam.com
3989
    name = 'booksofadam'
3990
    long_name = 'Books of Adam'
3991
    url = 'http://booksofadam.tumblr.com'
3992
3993
3994
class HarkAVagrant(GenericTumblrV1):
3995
    """Class to retrieve Hark A Vagrant comics."""
3996
    # Also on http://www.harkavagrant.com
3997
    name = 'hark-tumblr'
3998
    long_name = 'Hark A Vagrant (from Tumblr)'
3999
    url = 'http://beatonna.tumblr.com'
4000
4001
4002
class OurSuperAdventureTumblr(GenericTumblrV1):
4003
    """Class to retrieve Our Super Adventure comics."""
4004
    # Also on https://tapastic.com/series/Our-Super-Adventure
4005
    # Also on http://www.oursuperadventure.com
4006
    # http://sarahgraley.com
4007
    name = 'superadventure-tumblr'
4008
    long_name = 'Our Super Adventure (from Tumblr)'
4009
    url = 'http://sarahssketchbook.tumblr.com'
4010
4011
4012
class JakeLikesOnions(GenericTumblrV1):
4013
    """Class to retrieve Jake Likes Onions comics."""
4014
    name = 'jake'
4015
    long_name = 'Jake Likes Onions'
4016
    url = 'http://jakelikesonions.com'
4017
4018
4019
class InYourFaceCake(GenericTumblrV1):
4020
    """Class to retrieve In Your Face Cake comics."""
4021
    name = 'inyourfacecake-tumblr'
4022
    long_name = 'In Your Face Cake (from Tumblr)'
4023
    url = 'http://in-your-face-cake.tumblr.com'
4024
4025
4026
class Robospunk(GenericTumblrV1):
4027
    """Class to retrieve Robospunk comics."""
4028
    name = 'robospunk'
4029
    long_name = 'Robospunk'
4030
    url = 'http://robospunk.com'
4031
4032
4033
class BananaTwinky(GenericTumblrV1):
4034
    """Class to retrieve Banana Twinky comics."""
4035
    name = 'banana'
4036
    long_name = 'Banana Twinky'
4037
    url = 'http://bananatwinky.tumblr.com'
4038
4039
4040
class YesterdaysPopcornTumblr(GenericTumblrV1):
4041
    """Class to retrieve Yesterday's Popcorn comics."""
4042
    # Also on http://www.yesterdayspopcorn.com
4043
    # Also on https://tapastic.com/series/Yesterdays-Popcorn
4044
    name = 'popcorn-tumblr'
4045
    long_name = 'Yesterday\'s Popcorn (from Tumblr)'
4046
    url = 'http://yesterdayspopcorn.tumblr.com'
4047
4048
4049
class TwistedDoodles(GenericTumblrV1):
4050
    """Class to retrieve Twisted Doodles comics."""
4051
    name = 'twisted'
4052
    long_name = 'Twisted Doodles'
4053
    url = 'http://www.twisteddoodles.com'
4054
4055
4056
class UbertoolTumblr(GenericTumblrV1):
4057
    """Class to retrieve Ubertool comics."""
4058
    # Also on http://ubertoolcomic.com
4059
    # Also on https://tapastic.com/series/ubertool
4060
    name = 'ubertool-tumblr'
4061
    long_name = 'Ubertool (from Tumblr)'
4062
    url = 'http://ubertool.tumblr.com'
4063
    _categories = ('UBERTOOL', )
4064
4065
4066
class LittleLifeLinesTumblr(GenericTumblrV1):
4067
    """Class to retrieve Little Life Lines comics."""
4068
    # Also on http://www.littlelifelines.com
4069
    name = 'life-tumblr'
4070
    long_name = 'Little Life Lines (from Tumblr)'
4071
    url = 'https://little-life-lines.tumblr.com'
4072
4073
4074
class TheyCanTalk(GenericTumblrV1):
4075
    """Class to retrieve They Can Talk comics."""
4076
    name = 'theycantalk'
4077
    long_name = 'They Can Talk'
4078
    url = 'http://theycantalk.com'
4079
4080
4081
class Will5NeverCome(GenericTumblrV1):
4082
    """Class to retrieve Will 5:00 Never Come comics."""
4083
    name = 'will5'
4084
    long_name = 'Will 5:00 Never Come ?'
4085
    url = 'http://will5nevercome.com'
4086
4087
4088
class Sephko(GenericTumblrV1):
4089
    """Class to retrieve Sephko Comics."""
4090
    # Also on http://www.sephko.com
4091
    name = 'sephko'
4092
    long_name = 'Sephko'
4093
    url = 'http://sephko.tumblr.com'
4094
4095
4096
class BlazersAtDawn(GenericTumblrV1):
4097
    """Class to retrieve Blazers At Dawn Comics."""
4098
    name = 'blazers'
4099
    long_name = 'Blazers At Dawn'
4100
    url = 'http://blazersatdawn.tumblr.com'
4101
4102
4103
class ArtByMoga(GenericEmptyComic, GenericTumblrV1):  # Deactivated because it downloads too many things
4104
    """Class to retrieve Art By Moga Comics."""
4105
    name = 'moga'
4106
    long_name = 'Art By Moga'
4107
    url = 'http://artbymoga.tumblr.com'
4108
4109
4110
class VerbalVomitTumblr(GenericTumblrV1):
4111
    """Class to retrieve Verbal Vomit comics."""
4112
    # Also on http://www.verbal-vomit.com
4113
    name = 'vomit-tumblr'
4114
    long_name = 'Verbal Vomit (from Tumblr)'
4115
    url = 'http://verbalvomits.tumblr.com'
4116
4117
4118
class LibraryComic(GenericTumblrV1):
4119
    """Class to retrieve LibraryComic."""
4120
    # Also on http://librarycomic.com
4121
    name = 'library-tumblr'
4122
    long_name = 'LibraryComic (from Tumblr)'
4123
    url = 'http://librarycomic.tumblr.com'
4124
4125
4126
class HorovitzComics(GenericListableComic):
4127
    """Generic class to handle the logic common to the different comics from Horovitz."""
4128
    url = 'http://www.horovitzcomics.com'
4129
    _categories = ('HOROVITZ', )
4130
    img_re = re.compile('.*comics/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
4131
    link_re = NotImplemented
4132
    get_url_from_archive_element = join_cls_url_to_href
4133
4134
    @classmethod
4135
    def get_comic_info(cls, soup, link):
4136
        """Get information about a particular comics."""
4137
        href = link['href']
4138
        num = int(cls.link_re.match(href).groups()[0])
4139
        title = link.string
4140
        imgs = soup.find_all('img', id='comic')
4141
        assert len(imgs) == 1
4142
        year, month, day = [int(s)
4143
                            for s in cls.img_re.match(imgs[0]['src']).groups()]
4144
        return {
4145
            'title': title,
4146
            'day': day,
4147
            'month': month,
4148
            'year': year,
4149
            'img': [i['src'] for i in imgs],
4150
            'num': num,
4151
        }
4152
4153
    @classmethod
4154
    def get_archive_elements(cls):
4155
        archive_url = 'http://www.horovitzcomics.com/comics/archive/'
4156
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.link_re))
4157
4158
4159
class HorovitzNew(HorovitzComics):
4160
    """Class to retrieve Horovitz new comics."""
4161
    name = 'horovitznew'
4162
    long_name = 'Horovitz New'
4163
    link_re = re.compile('^/comics/new/([0-9]+)$')
4164
4165
4166
class HorovitzClassic(HorovitzComics):
4167
    """Class to retrieve Horovitz classic comics."""
4168
    name = 'horovitzclassic'
4169
    long_name = 'Horovitz Classic'
4170
    link_re = re.compile('^/comics/classic/([0-9]+)$')
4171
4172
4173
class GenericGoComic(GenericNavigableComic):
4174
    """Generic class to handle the logic common to comics from gocomics.com."""
4175
    _categories = ('GOCOMIC', )
4176
4177
    @classmethod
4178
    def get_first_comic_link(cls):
4179
        """Get link to first comics."""
4180
        return get_soup_at_url(cls.url).find('a', class_='fa btn btn-outline-default btn-circle fa-backward sm ')
4181
4182
    @classmethod
4183
    def get_navi_link(cls, last_soup, next_):
4184 View Code Duplication
        """Get link to next or previous comic."""
4185
        PREV = 'fa btn btn-outline-default btn-circle fa-caret-left sm '
4186
        NEXT = 'fa btn btn-outline-default btn-circle fa-caret-right sm '
4187
        return last_soup.find('a', class_=NEXT if next_ else PREV)
4188
4189
    @classmethod
4190
    def get_url_from_link(cls, link):
4191
        gocomics = 'http://www.gocomics.com'
4192
        return urljoin_wrapper(gocomics, link['href'])
4193
4194
    @classmethod
4195
    def get_comic_info(cls, soup, link):
4196
        """Get information about a particular comics."""
4197
        date_str = soup.find('meta', property='article:published_time')['content']
4198
        day = string_to_date(date_str, "%Y-%m-%d")
4199
        imgs = soup.find('picture', class_='img-fluid item-comic-image').find_all('img')
4200
        author = soup.find('meta', property='article:author')['content']
4201
        tags = soup.find('meta', property='article:tag')['content']
4202
        return {
4203
            'day': day.day,
4204
            'month': day.month,
4205
            'year': day.year,
4206
            'img': [i['src'] for i in imgs],
4207
            'author': author,
4208
            'tags': tags,
4209
        }
4210
4211
4212
class PearlsBeforeSwine(GenericGoComic):
4213
    """Class to retrieve Pearls Before Swine comics."""
4214
    name = 'pearls'
4215
    long_name = 'Pearls Before Swine'
4216
    url = 'http://www.gocomics.com/pearlsbeforeswine'
4217
4218
4219
class Peanuts(GenericGoComic):
4220
    """Class to retrieve Peanuts comics."""
4221
    name = 'peanuts'
4222
    long_name = 'Peanuts'
4223
    url = 'http://www.gocomics.com/peanuts'
4224
4225
4226
class MattWuerker(GenericGoComic):
4227
    """Class to retrieve Matt Wuerker comics."""
4228
    name = 'wuerker'
4229
    long_name = 'Matt Wuerker'
4230
    url = 'http://www.gocomics.com/mattwuerker'
4231
4232
4233
class TomToles(GenericGoComic):
4234
    """Class to retrieve Tom Toles comics."""
4235
    name = 'toles'
4236
    long_name = 'Tom Toles'
4237
    url = 'http://www.gocomics.com/tomtoles'
4238
4239
4240
class BreakOfDay(GenericGoComic):
4241
    """Class to retrieve Break Of Day comics."""
4242
    name = 'breakofday'
4243
    long_name = 'Break Of Day'
4244
    url = 'http://www.gocomics.com/break-of-day'
4245
4246
4247
class Brevity(GenericGoComic):
4248
    """Class to retrieve Brevity comics."""
4249
    name = 'brevity'
4250
    long_name = 'Brevity'
4251
    url = 'http://www.gocomics.com/brevitypanel'
4252
4253
4254
class MichaelRamirez(GenericGoComic):
4255
    """Class to retrieve Michael Ramirez comics."""
4256
    name = 'ramirez'
4257
    long_name = 'Michael Ramirez'
4258
    url = 'http://www.gocomics.com/michaelramirez'
4259
4260
4261
class MikeLuckovich(GenericGoComic):
4262
    """Class to retrieve Mike Luckovich comics."""
4263
    name = 'luckovich'
4264
    long_name = 'Mike Luckovich'
4265
    url = 'http://www.gocomics.com/mikeluckovich'
4266
4267
4268
class JimBenton(GenericGoComic):
4269
    """Class to retrieve Jim Benton comics."""
4270
    # Also on http://jimbenton.tumblr.com
4271
    name = 'benton'
4272
    long_name = 'Jim Benton'
4273
    url = 'http://www.gocomics.com/jim-benton-cartoons'
4274
4275
4276
class TheArgyleSweater(GenericGoComic):
4277
    """Class to retrieve the Argyle Sweater comics."""
4278
    name = 'argyle'
4279
    long_name = 'Argyle Sweater'
4280
    url = 'http://www.gocomics.com/theargylesweater'
4281
4282
4283
class SunnyStreet(GenericGoComic):
4284
    """Class to retrieve Sunny Street comics."""
4285
    # Also on http://www.sunnystreetcomics.com
4286
    name = 'sunny'
4287
    long_name = 'Sunny Street'
4288
    url = 'http://www.gocomics.com/sunny-street'
4289
4290
4291
class OffTheMark(GenericGoComic):
4292
    """Class to retrieve Off The Mark comics."""
4293
    # Also on https://www.offthemark.com
4294
    name = 'offthemark'
4295
    long_name = 'Off The Mark'
4296
    url = 'http://www.gocomics.com/offthemark'
4297
4298
4299
class WuMo(GenericGoComic):
4300
    """Class to retrieve WuMo comics."""
4301
    # Also on http://wumo.com
4302
    name = 'wumo'
4303
    long_name = 'WuMo'
4304
    url = 'http://www.gocomics.com/wumo'
4305
4306
4307
class LunarBaboon(GenericGoComic):
4308
    """Class to retrieve Lunar Baboon comics."""
4309
    # Also on http://www.lunarbaboon.com
4310
    # Also on https://tapastic.com/series/Lunarbaboon
4311
    name = 'lunarbaboon'
4312
    long_name = 'Lunar Baboon'
4313
    url = 'http://www.gocomics.com/lunarbaboon'
4314
4315
4316
class SandersenGocomic(GenericGoComic):
4317
    """Class to retrieve Sarah Andersen comics."""
4318
    # Also on http://sarahcandersen.com
4319
    # Also on http://tapastic.com/series/Doodle-Time
4320
    name = 'sandersen-goc'
4321
    long_name = 'Sarah Andersen (from GoComics)'
4322
    url = 'http://www.gocomics.com/sarahs-scribbles'
4323
4324
4325
class SaturdayMorningBreakfastCerealGoComic(GenericGoComic):
4326
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
4327
    # Also on http://smbc-comics.tumblr.com
4328
    # Also on http://www.smbc-comics.com
4329
    name = 'smbc-goc'
4330
    long_name = 'Saturday Morning Breakfast Cereal (from GoComics)'
4331
    url = 'http://www.gocomics.com/saturday-morning-breakfast-cereal'
4332
    _categories = ('SMBC', )
4333
4334
4335
class CalvinAndHobbesGoComic(GenericGoComic):
4336
    """Class to retrieve Calvin and Hobbes comics."""
4337
    # From gocomics, not http://marcel-oehler.marcellosendos.ch/comics/ch/
4338
    name = 'calvin-goc'
4339
    long_name = 'Calvin and Hobbes (from GoComics)'
4340
    url = 'http://www.gocomics.com/calvinandhobbes'
4341
4342
4343
class RallGoComic(GenericGoComic):
4344
    """Class to retrieve Ted Rall comics."""
4345
    # Also on http://rall.com/comic
4346
    name = 'rall-goc'
4347
    long_name = "Ted Rall (from GoComics)"
4348
    url = "http://www.gocomics.com/ted-rall"
4349
    _categories = ('RALL', )
4350
4351
4352
class TheAwkwardYetiGoComic(GenericGoComic):
4353
    """Class to retrieve The Awkward Yeti comics."""
4354
    # Also on http://larstheyeti.tumblr.com
4355
    # Also on http://theawkwardyeti.com
4356
    # Also on https://tapastic.com/series/TheAwkwardYeti
4357
    name = 'yeti-goc'
4358
    long_name = 'The Awkward Yeti (from GoComics)'
4359
    url = 'http://www.gocomics.com/the-awkward-yeti'
4360
    _categories = ('YETI', )
4361
4362
4363
class BerkeleyMewsGoComics(GenericGoComic):
4364
    """Class to retrieve Berkeley Mews comics."""
4365
    # Also on http://mews.tumblr.com
4366
    # Also on http://www.berkeleymews.com
4367
    name = 'berkeley-goc'
4368
    long_name = 'Berkeley Mews (from GoComics)'
4369
    url = 'http://www.gocomics.com/berkeley-mews'
4370
    _categories = ('BERKELEY', )
4371
4372
4373
class SheldonGoComics(GenericGoComic):
4374
    """Class to retrieve Sheldon comics."""
4375
    # Also on http://www.sheldoncomics.com
4376
    name = 'sheldon-goc'
4377
    long_name = 'Sheldon Comics (from GoComics)'
4378
    url = 'http://www.gocomics.com/sheldon'
4379
4380
4381
class FowlLanguageGoComics(GenericGoComic):
4382
    """Class to retrieve Fowl Language comics."""
4383
    # Also on http://www.fowllanguagecomics.com
4384
    # Also on http://tapastic.com/series/Fowl-Language-Comics
4385
    # Also on http://fowllanguagecomics.tumblr.com
4386
    name = 'fowllanguage-goc'
4387
    long_name = 'Fowl Language Comics (from GoComics)'
4388
    url = 'http://www.gocomics.com/fowl-language'
4389
    _categories = ('FOWLLANGUAGE', )
4390
4391
4392
class NickAnderson(GenericGoComic):
4393
    """Class to retrieve Nick Anderson comics."""
4394
    name = 'nickanderson'
4395
    long_name = 'Nick Anderson'
4396
    url = 'http://www.gocomics.com/nickanderson'
4397
4398
4399
class GarfieldGoComics(GenericGoComic):
4400
    """Class to retrieve Garfield comics."""
4401
    # Also on http://garfield.com
4402
    name = 'garfield-goc'
4403
    long_name = 'Garfield (from GoComics)'
4404
    url = 'http://www.gocomics.com/garfield'
4405
    _categories = ('GARFIELD', )
4406
4407
4408
class DorrisMcGoComics(GenericGoComic):
4409
    """Class to retrieve Dorris Mc Comics"""
4410
    # Also on http://dorrismccomics.com
4411
    name = 'dorrismc-goc'
4412
    long_name = 'Dorris Mc (from GoComics)'
4413
    url = 'http://www.gocomics.com/dorris-mccomics'
4414
4415
4416
class FoxTrot(GenericGoComic):
4417
    """Class to retrieve FoxTrot comics."""
4418
    name = 'foxtrot'
4419
    long_name = 'FoxTrot'
4420
    url = 'http://www.gocomics.com/foxtrot'
4421
4422
4423
class FoxTrotClassics(GenericGoComic):
4424
    """Class to retrieve FoxTrot Classics comics."""
4425
    name = 'foxtrot-classics'
4426
    long_name = 'FoxTrot Classics'
4427
    url = 'http://www.gocomics.com/foxtrotclassics'
4428
4429
4430
class MisterAndMeGoComics(GenericGoComic):
4431
    """Class to retrieve Mister & Me Comics."""
4432
    # Also on http://www.mister-and-me.com
4433
    # Also on https://tapastic.com/series/Mister-and-Me
4434
    name = 'mister-goc'
4435
    long_name = 'Mister & Me (from GoComics)'
4436
    url = 'http://www.gocomics.com/mister-and-me'
4437
4438
4439
class NonSequitur(GenericGoComic):
4440
    """Class to retrieve Non Sequitur (Wiley Miller) comics."""
4441
    name = 'nonsequitur'
4442
    long_name = 'Non Sequitur'
4443
    url = 'http://www.gocomics.com/nonsequitur'
4444
4445
4446
class GenericTapasticComic(GenericListableComic):
4447
    """Generic class to handle the logic common to comics from tapastic.com."""
4448
    _categories = ('TAPASTIC', )
4449
4450
    @classmethod
4451
    def get_comic_info(cls, soup, archive_elt):
4452
        """Get information about a particular comics."""
4453
        timestamp = int(archive_elt['publishDate']) / 1000.0
4454
        day = datetime.datetime.fromtimestamp(timestamp).date()
4455
        imgs = soup.find_all('img', class_='art-image')
4456
        if not imgs:
4457
            print("Comic %s is being uploaded, retry later" % cls.get_url_from_archive_element(archive_elt))
4458
            return None
4459
        assert len(imgs) > 0
4460
        return {
4461
            'day': day.day,
4462
            'year': day.year,
4463
            'month': day.month,
4464
            'img': [i['src'] for i in imgs],
4465
            'title': archive_elt['title'],
4466
        }
4467
4468
    @classmethod
4469
    def get_url_from_archive_element(cls, archive_elt):
4470
        return 'http://tapastic.com/episode/' + str(archive_elt['id'])
4471
4472
    @classmethod
4473
    def get_archive_elements(cls):
4474
        pref, suff = 'episodeList : ', ','
4475
        # Information is stored in the javascript part
4476
        # I don't know the clean way to get it so this is the ugly way.
4477
        string = [s[len(pref):-len(suff)] for s in (s.decode('utf-8').strip() for s in urlopen_wrapper(cls.url).readlines()) if s.startswith(pref) and s.endswith(suff)][0]
4478
        return json.loads(string)
4479
4480
4481
class VegetablesForDessert(GenericTapasticComic):
4482
    """Class to retrieve Vegetables For Dessert comics."""
4483
    # Also on http://vegetablesfordessert.tumblr.com
4484
    name = 'vegetables'
4485
    long_name = 'Vegetables For Dessert'
4486
    url = 'http://tapastic.com/series/vegetablesfordessert'
4487
4488
4489
class FowlLanguageTapa(GenericTapasticComic):
4490
    """Class to retrieve Fowl Language comics."""
4491
    # Also on http://www.fowllanguagecomics.com
4492
    # Also on http://fowllanguagecomics.tumblr.com
4493
    # Also on http://www.gocomics.com/fowl-language
4494
    name = 'fowllanguage-tapa'
4495
    long_name = 'Fowl Language Comics (from Tapastic)'
4496
    url = 'http://tapastic.com/series/Fowl-Language-Comics'
4497
    _categories = ('FOWLLANGUAGE', )
4498
4499
4500
class OscillatingProfundities(GenericTapasticComic):
4501
    """Class to retrieve Oscillating Profundities comics."""
4502
    name = 'oscillating'
4503
    long_name = 'Oscillating Profundities'
4504
    url = 'http://tapastic.com/series/oscillatingprofundities'
4505
4506
4507
class ZnoflatsComics(GenericTapasticComic):
4508
    """Class to retrieve Znoflats comics."""
4509
    name = 'znoflats'
4510
    long_name = 'Znoflats Comics'
4511
    url = 'http://tapastic.com/series/Znoflats-Comics'
4512
4513
4514
class SandersenTapastic(GenericTapasticComic):
4515
    """Class to retrieve Sarah Andersen comics."""
4516
    # Also on http://sarahcandersen.com
4517
    # Also on http://www.gocomics.com/sarahs-scribbles
4518
    name = 'sandersen-tapa'
4519
    long_name = 'Sarah Andersen (from Tapastic)'
4520
    url = 'http://tapastic.com/series/Doodle-Time'
4521
4522
4523
class TubeyToonsTapastic(GenericTapasticComic):
4524
    """Class to retrieve TubeyToons comics."""
4525
    # Also on http://tubeytoons.com
4526
    # Also on http://tubeytoons.tumblr.com
4527
    name = 'tubeytoons-tapa'
4528
    long_name = 'Tubey Toons (from Tapastic)'
4529
    url = 'http://tapastic.com/series/Tubey-Toons'
4530
    _categories = ('TUNEYTOONS', )
4531
4532
4533
class AnythingComicTapastic(GenericTapasticComic):
4534
    """Class to retrieve Anything Comics."""
4535
    # Also on http://www.anythingcomic.com
4536
    name = 'anythingcomic-tapa'
4537
    long_name = 'Anything Comic (from Tapastic)'
4538
    url = 'http://tapastic.com/series/anything'
4539
4540
4541
class UnearthedComicsTapastic(GenericTapasticComic):
4542
    """Class to retrieve Unearthed comics."""
4543
    # Also on http://unearthedcomics.com
4544
    # Also on http://unearthedcomics.tumblr.com
4545
    name = 'unearthed-tapa'
4546
    long_name = 'Unearthed Comics (from Tapastic)'
4547
    url = 'http://tapastic.com/series/UnearthedComics'
4548
    _categories = ('UNEARTHED', )
4549
4550
4551
class EverythingsStupidTapastic(GenericTapasticComic):
4552
    """Class to retrieve Everything's stupid Comics."""
4553
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
4554
    # Also on http://everythingsstupid.net
4555
    name = 'stupid-tapa'
4556
    long_name = "Everything's Stupid (from Tapastic)"
4557
    url = 'http://tapastic.com/series/EverythingsStupid'
4558
4559
4560
class JustSayEhTapastic(GenericTapasticComic):
4561
    """Class to retrieve Just Say Eh comics."""
4562
    # Also on http://www.justsayeh.com
4563
    name = 'justsayeh-tapa'
4564
    long_name = 'Just Say Eh (from Tapastic)'
4565
    url = 'http://tapastic.com/series/Just-Say-Eh'
4566
4567
4568
class ThorsThundershackTapastic(GenericTapasticComic):
4569
    """Class to retrieve Thor's Thundershack comics."""
4570
    # Also on http://www.thorsthundershack.com
4571
    name = 'thor-tapa'
4572
    long_name = 'Thor\'s Thundershack (from Tapastic)'
4573
    url = 'http://tapastic.com/series/Thors-Thundershac'
4574
    _categories = ('THOR', )
4575
4576
4577
class OwlTurdTapastic(GenericTapasticComic):
4578
    """Class to retrieve Owl Turd comics."""
4579
    # Also on http://owlturd.com
4580
    name = 'owlturd-tapa'
4581
    long_name = 'Owl Turd (from Tapastic)'
4582
    url = 'http://tapastic.com/series/Owl-Turd-Comix'
4583
    _categories = ('OWLTURD', )
4584
4585
4586
class GoneIntoRaptureTapastic(GenericTapasticComic):
4587
    """Class to retrieve Gone Into Rapture comics."""
4588
    # Also on http://goneintorapture.tumblr.com
4589
    # Also on http://www.goneintorapture.com
4590
    name = 'rapture-tapa'
4591
    long_name = 'Gone Into Rapture (from Tapastic)'
4592
    url = 'http://tapastic.com/series/Goneintorapture'
4593
4594
4595
class HeckIfIKnowComicsTapa(GenericTapasticComic):
4596
    """Class to retrieve Heck If I Know Comics."""
4597
    # Also on http://heckifiknowcomics.com
4598
    name = 'heck-tapa'
4599
    long_name = 'Heck if I Know comics (from Tapastic)'
4600
    url = 'http://tapastic.com/series/Regular'
4601
4602
4603
class CheerUpEmoKidTapa(GenericTapasticComic):
4604
    """Class to retrieve CheerUpEmoKid comics."""
4605
    # Also on http://www.cheerupemokid.com
4606
    # Also on http://enzocomics.tumblr.com
4607
    name = 'cuek-tapa'
4608
    long_name = 'Cheer Up Emo Kid (from Tapastic)'
4609
    url = 'http://tapastic.com/series/CUEK'
4610
4611
4612
class BigFootJusticeTapa(GenericTapasticComic):
4613
    """Class to retrieve Big Foot Justice comics."""
4614
    # Also on http://bigfootjustice.com
4615
    name = 'bigfoot-tapa'
4616
    long_name = 'Big Foot Justice (from Tapastic)'
4617
    url = 'http://tapastic.com/series/bigfoot-justice'
4618
4619
4620
class UpAndOutTapa(GenericTapasticComic):
4621
    """Class to retrieve Up & Out comics."""
4622
    # Also on http://upandoutcomic.tumblr.com
4623
    name = 'upandout-tapa'
4624
    long_name = 'Up And Out (from Tapastic)'
4625
    url = 'http://tapastic.com/series/UP-and-OUT'
4626
4627
4628
class ToonHoleTapa(GenericTapasticComic):
4629
    """Class to retrieve Toon Holes comics."""
4630
    # Also on http://www.toonhole.com
4631
    name = 'toonhole-tapa'
4632
    long_name = 'Toon Hole (from Tapastic)'
4633
    url = 'http://tapastic.com/series/TOONHOLE'
4634
4635
4636
class AngryAtNothingTapa(GenericTapasticComic):
4637
    """Class to retrieve Angry at Nothing comics."""
4638
    # Also on http://www.angryatnothing.net
4639
    name = 'angry-tapa'
4640
    long_name = 'Angry At Nothing (from Tapastic)'
4641
    url = 'http://tapastic.com/series/Comics-yeah-definitely-comics-'
4642
4643
4644
class LeleozTapa(GenericTapasticComic):
4645
    """Class to retrieve Leleoz comics."""
4646
    # Also on http://leleozcomics.tumblr.com
4647
    name = 'leleoz-tapa'
4648
    long_name = 'Leleoz (from Tapastic)'
4649
    url = 'https://tapastic.com/series/Leleoz'
4650
4651
4652
class TheAwkwardYetiTapa(GenericTapasticComic):
4653
    """Class to retrieve The Awkward Yeti comics."""
4654
    # Also on http://www.gocomics.com/the-awkward-yeti
4655
    # Also on http://theawkwardyeti.com
4656
    # Also on http://larstheyeti.tumblr.com
4657
    name = 'yeti-tapa'
4658
    long_name = 'The Awkward Yeti (from Tapastic)'
4659
    url = 'https://tapastic.com/series/TheAwkwardYeti'
4660
    _categories = ('YETI', )
4661
4662
4663
class AsPerUsualTapa(GenericTapasticComic):
4664
    """Class to retrieve As Per Usual comics."""
4665
    # Also on http://as-per-usual.tumblr.com
4666
    name = 'usual-tapa'
4667
    long_name = 'As Per Usual (from Tapastic)'
4668
    url = 'https://tapastic.com/series/AsPerUsual'
4669
    categories = ('DAMILEE', )
4670
4671
4672
class HotComicsForCoolPeopleTapa(GenericTapasticComic):
4673
    """Class to retrieve Hot Comics For Cool People."""
4674
    # Also on http://hotcomicsforcoolpeople.tumblr.com
4675
    # Also on http://hotcomics.biz (links to tumblr)
4676
    # Also on http://hcfcp.com (links to tumblr)
4677
    name = 'hotcomics-tapa'
4678
    long_name = 'Hot Comics For Cool People (from Tapastic)'
4679
    url = 'https://tapastic.com/series/Hot-Comics-For-Cool-People'
4680
    categories = ('DAMILEE', )
4681
4682
4683
class OneOneOneOneComicTapa(GenericTapasticComic):
4684
    """Class to retrieve 1111 Comics."""
4685
    # Also on http://www.1111comics.me
4686
    # Also on http://comics1111.tumblr.com
4687
    name = '1111-tapa'
4688
    long_name = '1111 Comics (from Tapastic)'
4689
    url = 'https://tapastic.com/series/1111-Comics'
4690
    _categories = ('ONEONEONEONE', )
4691
4692
4693
class TumbleDryTapa(GenericTapasticComic):
4694
    """Class to retrieve Tumble Dry comics."""
4695
    # Also on http://tumbledrycomics.com
4696
    name = 'tumbledry-tapa'
4697
    long_name = 'Tumblr Dry (from Tapastic)'
4698
    url = 'https://tapastic.com/series/TumbleDryComics'
4699
4700
4701
class DeadlyPanelTapa(GenericTapasticComic):
4702
    """Class to retrieve Deadly Panel comics."""
4703
    # Also on http://www.deadlypanel.com
4704
    name = 'deadly-tapa'
4705
    long_name = 'Deadly Panel (from Tapastic)'
4706
    url = 'https://tapastic.com/series/deadlypanel'
4707
4708
4709
class ChrisHallbeckMaxiTapa(GenericTapasticComic):
4710
    """Class to retrieve Chris Hallbeck comics."""
4711
    # Also on http://chrishallbeck.tumblr.com
4712
    # Also on http://maximumble.com
4713
    name = 'hallbeckmaxi-tapa'
4714
    long_name = 'Chris Hallback - Maximumble (from Tapastic)'
4715
    url = 'https://tapastic.com/series/Maximumble'
4716
    _categories = ('HALLBACK', )
4717
4718
4719
class ChrisHallbeckMiniTapa(GenericTapasticComic):
4720
    """Class to retrieve Chris Hallbeck comics."""
4721
    # Also on http://chrishallbeck.tumblr.com
4722
    # Also on http://minimumble.com
4723
    name = 'hallbeckmini-tapa'
4724
    long_name = 'Chris Hallback - Minimumble (from Tapastic)'
4725
    url = 'https://tapastic.com/series/Minimumble'
4726
    _categories = ('HALLBACK', )
4727
4728
4729
class ChrisHallbeckBiffTapa(GenericTapasticComic):
4730
    """Class to retrieve Chris Hallbeck comics."""
4731
    # Also on http://chrishallbeck.tumblr.com
4732
    # Also on http://thebookofbiff.com
4733
    name = 'hallbeckbiff-tapa'
4734
    long_name = 'Chris Hallback - The Book of Biff (from Tapastic)'
4735
    url = 'https://tapastic.com/series/Biff'
4736
    _categories = ('HALLBACK', )
4737
4738
4739
class RandoWisTapa(GenericTapasticComic):
4740
    """Class to retrieve RandoWis comics."""
4741
    # Also on https://randowis.com
4742
    name = 'randowis-tapa'
4743
    long_name = 'RandoWis (from Tapastic)'
4744
    url = 'https://tapastic.com/series/RandoWis'
4745
4746
4747
class PigeonGazetteTapa(GenericTapasticComic):
4748
    """Class to retrieve The Pigeon Gazette comics."""
4749
    # Also on http://thepigeongazette.tumblr.com
4750
    name = 'pigeon-tapa'
4751
    long_name = 'The Pigeon Gazette (from Tapastic)'
4752
    url = 'https://tapastic.com/series/The-Pigeon-Gazette'
4753
4754
4755
class TheOdd1sOutTapa(GenericTapasticComic):
4756
    """Class to retrieve The Odd 1s Out comics."""
4757
    # Also on http://theodd1sout.com
4758
    # Also on http://theodd1sout.tumblr.com
4759
    name = 'theodd-tapa'
4760
    long_name = 'The Odd 1s Out (from Tapastic)'
4761
    url = 'https://tapastic.com/series/Theodd1sout'
4762
4763
4764
class TheWorldIsFlatTapa(GenericTapasticComic):
4765
    """Class to retrieve The World Is Flat Comics."""
4766
    # Also on http://theworldisflatcomics.tumblr.com
4767
    name = 'flatworld-tapa'
4768
    long_name = 'The World Is Flat (from Tapastic)'
4769
    url = 'https://tapastic.com/series/The-World-is-Flat'
4770
4771
4772
class MisterAndMeTapa(GenericTapasticComic):
4773
    """Class to retrieve Mister & Me Comics."""
4774
    # Also on http://www.mister-and-me.com
4775
    # Also on http://www.gocomics.com/mister-and-me
4776
    name = 'mister-tapa'
4777
    long_name = 'Mister & Me (from Tapastic)'
4778
    url = 'https://tapastic.com/series/Mister-and-Me'
4779
4780
4781
class TalesOfAbsurdityTapa(GenericTapasticComic):
4782
    """Class to retrieve Tales Of Absurdity comics."""
4783
    # Also on http://talesofabsurdity.com
4784
    # Also on http://talesofabsurdity.tumblr.com
4785
    name = 'absurdity-tapa'
4786
    long_name = 'Tales of Absurdity (from Tapastic)'
4787
    url = 'http://tapastic.com/series/Tales-Of-Absurdity'
4788
    _categories = ('ABSURDITY', )
4789
4790
4791
class BFGFSTapa(GenericTapasticComic):
4792
    """Class to retrieve BFGFS comics."""
4793
    # Also on http://bfgfs.com
4794
    # Also on http://bfgfs.tumblr.com
4795
    name = 'bfgfs-tapa'
4796
    long_name = 'BFGFS (from Tapastic)'
4797
    url = 'https://tapastic.com/series/BFGFS'
4798
4799
4800
class DoodleForFoodTapa(GenericTapasticComic):
4801
    """Class to retrieve Doodle For Food comics."""
4802
    # Also on http://doodleforfood.com
4803
    name = 'doodle-tapa'
4804
    long_name = 'Doodle For Food (from Tapastic)'
4805
    url = 'https://tapastic.com/series/Doodle-for-Food'
4806
4807
4808
class MrLovensteinTapa(GenericTapasticComic):
4809
    """Class to retrieve Mr Lovenstein comics."""
4810
    # Also on  https://tapastic.com/series/MrLovenstein
4811
    name = 'mrlovenstein-tapa'
4812
    long_name = 'Mr. Lovenstein (from Tapastic)'
4813
    url = 'https://tapastic.com/series/MrLovenstein'
4814
4815
4816
class CassandraCalinTapa(GenericTapasticComic):
4817
    """Class to retrieve C. Cassandra comics."""
4818
    # Also on http://cassandracalin.com
4819
    # Also on http://c-cassandra.tumblr.com
4820
    name = 'cassandra-tapa'
4821
    long_name = 'Cassandra Calin (from Tapastic)'
4822
    url = 'https://tapastic.com/series/C-Cassandra-comics'
4823
4824
4825
class WafflesAndPancakes(GenericTapasticComic):
4826
    """Class to retrieve Waffles And Pancakes comics."""
4827
    # Also on http://wandpcomic.com
4828
    name = 'waffles'
4829
    long_name = 'Waffles And Pancakes'
4830
    url = 'https://tapastic.com/series/Waffles-and-Pancakes'
4831
4832
4833
class YesterdaysPopcornTapastic(GenericTapasticComic):
4834
    """Class to retrieve Yesterday's Popcorn comics."""
4835
    # Also on http://www.yesterdayspopcorn.com
4836
    # Also on http://yesterdayspopcorn.tumblr.com
4837
    name = 'popcorn-tapa'
4838
    long_name = 'Yesterday\'s Popcorn (from Tapastic)'
4839
    url = 'https://tapastic.com/series/Yesterdays-Popcorn'
4840
4841
4842
class OurSuperAdventureTapastic(GenericTapasticComic):
4843
    """Class to retrieve Our Super Adventure comics."""
4844
    # Also on http://www.oursuperadventure.com
4845
    # http://sarahssketchbook.tumblr.com
4846
    # http://sarahgraley.com
4847
    name = 'superadventure-tapastic'
4848
    long_name = 'Our Super Adventure (from Tapastic)'
4849
    url = 'https://tapastic.com/series/Our-Super-Adventure'
4850
4851
4852
class NamelessPCs(GenericTapasticComic):
4853
    """Class to retrieve Nameless PCs comics."""
4854
    # Also on http://namelesspcs.com
4855
    name = 'namelesspcs-tapa'
4856
    long_name = 'NamelessPCs (from Tapastic)'
4857
    url = 'https://tapastic.com/series/NamelessPC'
4858
4859
4860
class UbertoolTapa(GenericTapasticComic):
4861
    """Class to retrieve Ubertool comics."""
4862
    # Also on http://ubertoolcomic.com
4863
    # Also on http://ubertool.tumblr.com
4864
    name = 'ubertool-tapa'
4865
    long_name = 'Ubertool (from Tapastic)'
4866
    url = 'https://tapastic.com/series/ubertool'
4867
    _categories = ('UBERTOOL', )
4868
4869
4870
class SmallBlueYonderTapa(GenericTapasticComic):
4871
    """Class to retrieve Small Blue Yonder comics."""
4872
    # Also on http://www.smallblueyonder.com
4873
    name = 'smallblue-tapa'
4874
    long_name = 'Small Blue Yonder (from Tapastic)'
4875
    url = 'https://tapastic.com/series/Small-Blue-Yonder'
4876
4877
4878
def get_subclasses(klass):
4879
    """Gets the list of direct/indirect subclasses of a class"""
4880
    subclasses = klass.__subclasses__()
4881
    for derived in list(subclasses):
4882
        subclasses.extend(get_subclasses(derived))
4883
    return subclasses
4884
4885
4886
def remove_st_nd_rd_th_from_date(string):
4887
    """Function to transform 1st/2nd/3rd/4th in a parsable date format."""
4888
    # Hackish way to convert string with numeral "1st"/"2nd"/etc to date
4889
    return (string.replace('st', '')
4890
            .replace('nd', '')
4891
            .replace('rd', '')
4892
            .replace('th', '')
4893
            .replace('Augu', 'August'))
4894
4895
4896
def string_to_date(string, date_format, local=DEFAULT_LOCAL):
4897
    """Function to convert string to date object.
4898
    Wrapper around datetime.datetime.strptime."""
4899
    # format described in https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior
4900
    prev_locale = locale.setlocale(locale.LC_ALL)
4901
    if local != prev_locale:
4902
        locale.setlocale(locale.LC_ALL, local)
4903
    ret = datetime.datetime.strptime(string, date_format).date()
4904
    if local != prev_locale:
4905
        locale.setlocale(locale.LC_ALL, prev_locale)
4906
    return ret
4907
4908
4909
COMICS = set(get_subclasses(GenericComic))
4910
VALID_COMICS = [c for c in COMICS if c.name is not None]
4911
COMIC_NAMES = {c.name: c for c in VALID_COMICS}
4912
assert len(VALID_COMICS) == len(COMIC_NAMES)
4913
CLASS_NAMES = {c.__name__ for c in VALID_COMICS}
4914
assert len(VALID_COMICS) == len(CLASS_NAMES)
4915