Completed
Push — master ( 2f6ce5...b9f2cc )
by De
01:04
created

comics.py (2 issues)

1
#! /usr/bin/python3
2
# vim: set expandtab tabstop=4 shiftwidth=4 :
3
"""Module to retrieve webcomics"""
4
5
from comic_abstract import GenericComic, get_date_for_comic
6
import re
7
from datetime import date, timedelta
8
import datetime
9
from urlfunctions import get_soup_at_url, urljoin_wrapper,\
10
    convert_iri_to_plain_ascii_uri, load_json_at_url, urlopen_wrapper
11
import json
12
import locale
13
import urllib
14
15
DEFAULT_LOCAL = 'en_GB.UTF-8'
16
17
18
class Xkcd(GenericComic):
19
    """Class to retrieve Xkcd comics."""
20
    name = 'xkcd'
21
    long_name = 'xkcd'
22
    url = 'http://xkcd.com'
23
24
    @classmethod
25
    def get_next_comic(cls, last_comic):
26
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
27
        first_num = last_comic['num'] if last_comic else 0
28
        last_num = load_json_at_url(
29
            urljoin_wrapper(cls.url, 'info.0.json'))['num']
30
31
        for num in range(first_num + 1, last_num + 1):
32
            if num != 404:
33
                json_url = urljoin_wrapper(cls.url, '%d/info.0.json' % num)
34
                comic = load_json_at_url(json_url)
35
                comic['img'] = [comic['img']]
36
                comic['prefix'] = '%d-' % num
37
                comic['json_url'] = json_url
38
                comic['url'] = urljoin_wrapper(cls.url, str(num))
39
                comic['day'] = int(comic['day'])
40
                comic['month'] = int(comic['month'])
41
                comic['year'] = int(comic['year'])
42
                assert comic['num'] == num
43
                yield comic
44
45
46
# Helper functions corresponding to get_url_from_link/get_url_from_archive_element
47
48
49
@classmethod
50
def get_href(cls, link):
51
    """Implementation of get_url_from_link/get_url_from_archive_element."""
52
    return link['href']
53
54
55
@classmethod
56
def join_cls_url_to_href(cls, link):
57
    """Implementation of get_url_from_link/get_url_from_archive_element."""
58
    return urljoin_wrapper(cls.url, link['href'])
59
60
61
class GenericNavigableComic(GenericComic):
62
    """Generic class for "navigable" comics : with first/next arrows.
63
64
    This class applies to comic where previous and next comics can be
65
    accessed from a given comic. Once given a starting point (either
66
    the first comic or the last comic retrieved), it will handle the
67
    navigation, the retrieval of the soup object and the setting of
68
    the 'url' attribute on retrieved comics. This limits a lot the
69
    amount of boilerplate code in the different implementation classes.
70
71
    The method `get_next_comic` methods is implemented in terms of new
72
    more specialized methods to be implemented/overridden:
73
        - get_first_comic_link
74
        - get_navi_link
75
        - get_comic_info
76
        - get_url_from_link
77
    """
78
    _categories = ('NAVIGABLE', )
79
80
    @classmethod
81
    def get_first_comic_link(cls):
82
        """Get link to first comics.
83
84
        Sometimes this can be retrieved of any comic page, sometimes on
85
        the archive page, sometimes it doesn't exist at all and one has
86
        to iterate backward to find it before hardcoding the result found.
87
        """
88
        raise NotImplementedError
89
90
    @classmethod
91
    def get_navi_link(cls, last_soup, next_):
92
        """Get link to next (or previous - for dev purposes) comic."""
93
        raise NotImplementedError
94
95
    @classmethod
96
    def get_comic_info(cls, soup, link):
97
        """Get information about a particular comics."""
98
        raise NotImplementedError
99
100
    @classmethod
101
    def get_url_from_link(cls, link):
102
        """Get url corresponding to a link. Default implementation is similar to get_href."""
103
        return link['href']
104
105
    @classmethod
106
    def get_next_link(cls, last_soup):
107
        """Get link to next comic."""
108
        return cls.get_navi_link(last_soup, True)
109
110
    @classmethod
111
    def get_prev_link(cls, last_soup):
112
        """Get link to previous comic."""
113
        return cls.get_navi_link(last_soup, False)
114
115
    @classmethod
116
    def get_next_comic(cls, last_comic):
117
        """Generic implementation of get_next_comic for navigable comics."""
118
        url = last_comic['url'] if last_comic else None
119
        next_comic = \
120
            cls.get_next_link(get_soup_at_url(url)) \
121
            if url else \
122
            cls.get_first_comic_link()
123
        cls.log("next/first comic will be %s (url is %s)" % (str(next_comic), url))
124
        while next_comic:
125
            prev_url, url = url, cls.get_url_from_link(next_comic)
126
            if prev_url == url:
127
                cls.log("got same url %s" % url)
128
                break
129
            cls.log("about to get %s (%s)" % (url, str(next_comic)))
130
            soup = get_soup_at_url(url)
131
            comic = cls.get_comic_info(soup, next_comic)
132
            if comic is not None:
133
                assert 'url' not in comic
134
                comic['url'] = url
135
                yield comic
136
            next_comic = cls.get_next_link(soup)
137
            cls.log("next comic will be %s" % str(next_comic))
138
139
    @classmethod
140
    def check_first_link(cls):
141
        """Check that navigation to first comic seems to be working - for dev purposes."""
142
        cls.log("about to check first link")
143
        ok = True
144
        firstlink = cls.get_first_comic_link()
145
        if firstlink is None:
146
            print("From %s : no first link" % cls.url)
147
            ok = False
148
        else:
149
            firsturl = cls.get_url_from_link(firstlink)
150
            try:
151
                get_soup_at_url(firsturl)
152
            except urllib.error.HTTPError:
153
                print("From %s : invalid first url" % cls.url)
154
                ok = False
155
        cls.log("checked first link -> returned %d" % ok)
156
        return ok
157
158
    @classmethod
159
    def check_prev_next_links(cls, url):
160
        """Check that navigation to prev/next from a given URL seems to be working - for dev purposes."""
161
        cls.log("about to check prev/next from %s" % url)
162
        ok = True
163
        if url is None:
164
            prevlink, nextlink = None, None
165
        else:
166
            soup = get_soup_at_url(url)
167
            prevlink, nextlink = cls.get_prev_link(soup), cls.get_next_link(soup)
168
        if prevlink is None and nextlink is None:
169
            print("From %s : no previous nor next" % url)
170
            ok = False
171
        else:
172
            if prevlink:
173
                prevurl = cls.get_url_from_link(prevlink)
174
                prevsoup = get_soup_at_url(prevurl)
175
                prevnext = cls.get_url_from_link(cls.get_next_link(prevsoup))
176
                if prevnext != url:
177
                    print("From %s, going backward then forward leads to %s" % (url, prevnext))
178
                    ok = False
179
            if nextlink:
180
                nexturl = cls.get_url_from_link(nextlink)
181
                if nexturl != url:
182
                    nextsoup = get_soup_at_url(nexturl)
183
                    nextprev = cls.get_url_from_link(cls.get_prev_link(nextsoup))
184
                    if nextprev != url:
185
                        print("From %s, going forward then backward leads to %s" % (url, nextprev))
186
                        ok = False
187
        cls.log("checked prev/next from %s -> returned %d" % (url, ok))
188
        return ok
189
190
    @classmethod
191
    def check_navigation(cls, url):
192
        """Check that navigation functions seem to be working - for dev purposes."""
193
        cls.log("about to check navigation from %s" % url)
194
        first = cls.check_first_link()
195
        prevnext = cls.check_prev_next_links(url)
196
        ok = first and prevnext
197
        cls.log("checked navigation from %s -> returned %d" % (url, ok))
198
        return ok
199
200
201
class GenericListableComic(GenericComic):
202
    """Generic class for "listable" comics : with a list of comics (aka 'archive')
203
204
    The method `get_next_comic` methods is implemented in terms of new
205
    more specialized methods to be implemented/overridden:
206
        - get_archive_elements
207
        - get_url_from_archive_element
208
        - get_comic_info
209
    """
210
    _categories = ('LISTABLE', )
211
212
    @classmethod
213
    def get_archive_elements(cls):
214
        """Get the archive elements (iterable)."""
215
        raise NotImplementedError
216
217
    @classmethod
218
    def get_url_from_archive_element(cls, archive_elt):
219
        """Get url corresponding to an archive element."""
220
        raise NotImplementedError
221
222
    @classmethod
223
    def get_comic_info(cls, soup, archive_elt):
224
        """Get information about a particular comics."""
225
        raise NotImplementedError
226
227
    @classmethod
228
    def get_next_comic(cls, last_comic):
229
        """Generic implementation of get_next_comic for listable comics."""
230
        waiting_for_url = last_comic['url'] if last_comic else None
231
        for archive_elt in cls.get_archive_elements():
232
            url = cls.get_url_from_archive_element(archive_elt)
233
            cls.log("considering %s" % url)
234
            if waiting_for_url and waiting_for_url == url:
235
                waiting_for_url = None
236
            elif waiting_for_url is None:
237
                cls.log("about to get %s (%s)" % (url, str(archive_elt)))
238
                soup = get_soup_at_url(url)
239
                comic = cls.get_comic_info(soup, archive_elt)
240
                if comic is not None:
241
                    assert 'url' not in comic
242
                    comic['url'] = url
243
                    yield comic
244
        if waiting_for_url is not None:
245
            print("Did not find %s : there might be a problem" % waiting_for_url)
246
247
# Helper functions corresponding to get_first_comic_link/get_navi_link
248
249
250
@classmethod
251
def get_link_rel_next(cls, last_soup, next_):
252
    """Implementation of get_navi_link."""
253
    return last_soup.find('link', rel='next' if next_ else 'prev')
254
255
256
@classmethod
257
def get_a_rel_next(cls, last_soup, next_):
258
    """Implementation of get_navi_link."""
259
    return last_soup.find('a', rel='next' if next_ else 'prev')
260
261
262
@classmethod
263
def get_a_navi_navinext(cls, last_soup, next_):
264
    """Implementation of get_navi_link."""
265
    return last_soup.find('a', class_='navi navi-next' if next_ else 'navi navi-prev')
266
267
268
@classmethod
269
def get_a_navi_comicnavnext_navinext(cls, last_soup, next_):
270
    """Implementation of get_navi_link."""
271
    return last_soup.find('a', class_='navi comic-nav-next navi-next' if next_ else 'navi comic-nav-previous navi-prev')
272
273
274
@classmethod
275
def get_a_comicnavbase_comicnavnext(cls, last_soup, next_):
276
    """Implementation of get_navi_link."""
277
    return last_soup.find('a', class_='comic-nav-base comic-nav-next' if next_ else 'comic-nav-base comic-nav-previous')
278
279
280
@classmethod
281
def get_a_navi_navifirst(cls):
282
    """Implementation of get_first_comic_link."""
283
    return get_soup_at_url(cls.url).find('a', class_='navi navi-first')
284
285
286
@classmethod
287
def get_div_navfirst_a(cls):
288
    """Implementation of get_first_comic_link."""
289
    return get_soup_at_url(cls.url).find('div', class_="nav-first").find('a')
290
291
292
@classmethod
293
def get_a_comicnavbase_comicnavfirst(cls):
294
    """Implementation of get_first_comic_link."""
295
    return get_soup_at_url(cls.url).find('a', class_='comic-nav-base comic-nav-first')
296
297
298
@classmethod
299
def simulate_first_link(cls):
300
    """Implementation of get_first_comic_link creating a link-like object from
301
    an URL provided by the class."""
302
    return {'href': cls.first_url}
303
304
305
@classmethod
306
def navigate_to_first_comic(cls):
307
    """Implementation of get_first_comic_link navigating from a user provided
308
    URL to the first comic.
309
310
    Sometimes, the first comic cannot be reached directly so to start
311
    from the first comic one has to go to the previous comic until
312
    there is no previous comics. Once this URL is reached, it
313
    is better to hardcode it but for development purposes, it
314
    is convenient to have an automatic way to find it.
315
    """
316
    url = input("Get starting URL: ")
317
    print(url)
318
    comic = cls.get_prev_link(get_soup_at_url(url))
319
    while comic:
320
        url = cls.get_url_from_link(comic)
321
        print(url)
322
        comic = cls.get_prev_link(get_soup_at_url(url))
323
    return {'href': url}
324
325
326
class GenericEmptyComic(GenericComic):
327
    """Generic class for comics where nothing is to be done.
328
329
    It can be useful to deactivate temporarily comics that do not work
330
    properly by replacing `def MyComic(GenericWhateverComic)` with
331
    `def MyComic(GenericEmptyComic, GenericWhateverComic)`."""
332
    _categories = ('EMPTY', )
333
334
    @classmethod
335
    def get_next_comic(cls, last_comic):
336
        """Implementation of get_next_comic returning no comics."""
337
        cls.log("comic is considered as empty - returning no comic")
338 View Code Duplication
        return []
339
340
341
class ExtraFabulousComics(GenericNavigableComic):
342
    """Class to retrieve Extra Fabulous Comics."""
343
    name = 'efc'
344
    long_name = 'Extra Fabulous Comics'
345
    url = 'http://extrafabulouscomics.com'
346
    get_first_comic_link = get_a_navi_navifirst
347
    get_navi_link = get_link_rel_next
348
349
    @classmethod
350
    def get_comic_info(cls, soup, link):
351
        """Get information about a particular comics."""
352
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
353
        imgs = soup.find_all('img', src=img_src_re)
354
        title = soup.find('meta', property='og:title')['content']
355
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
356
        day = string_to_date(date_str, "%Y-%m-%d")
357
        return {
358
            'title': title,
359
            'img': [i['src'] for i in imgs],
360
            'month': day.month,
361
            'year': day.year,
362
            'day': day.day,
363
            'prefix': title + '-'
364
        }
365
366
367
class GenericLeMondeBlog(GenericNavigableComic):
368
    """Generic class to retrieve comics from Le Monde blogs."""
369
    _categories = ('LEMONDE', 'FRANCAIS')
370
    get_navi_link = get_link_rel_next
371
    get_first_comic_link = simulate_first_link
372
    first_url = NotImplemented
373
374
    @classmethod
375
    def get_comic_info(cls, soup, link):
376
        """Get information about a particular comics."""
377
        url2 = soup.find('link', rel='shortlink')['href']
378
        title = soup.find('meta', property='og:title')['content']
379
        date_str = soup.find("span", class_="entry-date").string
380
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
381
        imgs = soup.find_all('meta', property='og:image')
382
        return {
383
            'title': title,
384
            'url2': url2,
385
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
386
            'month': day.month,
387
            'year': day.year,
388
            'day': day.day,
389
        }
390
391
392
class ZepWorld(GenericLeMondeBlog):
393
    """Class to retrieve Zep World comics."""
394
    name = "zep"
395
    long_name = "Zep World"
396
    url = "http://zepworld.blog.lemonde.fr"
397
    first_url = "http://zepworld.blog.lemonde.fr/2014/10/31/bientot-le-blog-de-zep/"
398
399
400
class Vidberg(GenericLeMondeBlog):
401
    """Class to retrieve Vidberg comics."""
402
    name = 'vidberg'
403
    long_name = "Vidberg - l'actu en patates"
404
    url = "http://vidberg.blog.lemonde.fr"
405
    # Not the first but I didn't find an efficient way to retrieve it
406
    first_url = "http://vidberg.blog.lemonde.fr/2012/02/09/revue-de-campagne-la-campagne-du-modem-semballe/"
407
408
409
class Plantu(GenericLeMondeBlog):
410
    """Class to retrieve Plantu comics."""
411
    name = 'plantu'
412
    long_name = "Plantu"
413
    url = "http://plantu.blog.lemonde.fr"
414
    first_url = "http://plantu.blog.lemonde.fr/2014/10/28/stress-test-a-bruxelles/"
415
416
417
class XavierGorce(GenericLeMondeBlog):
418
    """Class to retrieve Xavier Gorce comics."""
419
    name = 'gorce'
420
    long_name = "Xavier Gorce"
421
    url = "http://xaviergorce.blog.lemonde.fr"
422
    first_url = "http://xaviergorce.blog.lemonde.fr/2015/01/09/distinction/"
423
424
425
class CartooningForPeace(GenericLeMondeBlog):
426
    """Class to retrieve Cartooning For Peace comics."""
427
    name = 'forpeace'
428
    long_name = "Cartooning For Peace"
429
    url = "http://cartooningforpeace.blog.lemonde.fr"
430
    first_url = "http://cartooningforpeace.blog.lemonde.fr/2014/12/15/bado/"
431
432
433
class Aurel(GenericLeMondeBlog):
434
    """Class to retrieve Aurel comics."""
435
    name = 'aurel'
436
    long_name = "Aurel"
437
    url = "http://aurel.blog.lemonde.fr"
438
    first_url = "http://aurel.blog.lemonde.fr/2014/09/29/le-senat-repasse-a-droite/"
439
440
441
class LesCulottees(GenericLeMondeBlog):
442
    """Class to retrieve Les Culottees comics."""
443
    name = 'culottees'
444
    long_name = 'Les Culottees'
445
    url = "http://lesculottees.blog.lemonde.fr"
446
    first_url = "http://lesculottees.blog.lemonde.fr/2016/01/11/clementine-delait-femme-a-barbe/"
447
448
449
class UneAnneeAuLycee(GenericLeMondeBlog):
450
    """Class to retrieve Une Annee Au Lycee comics."""
451
    name = 'lycee'
452
    long_name = 'Une Annee au Lycee'
453
    url = 'http://uneanneeaulycee.blog.lemonde.fr'
454
    first_url = "http://uneanneeaulycee.blog.lemonde.fr/2016/06/13/la-semaine-du-bac-est-arrivee/"
455
456
457
class Rall(GenericNavigableComic):
458
    """Class to retrieve Ted Rall comics."""
459
    # Also on http://www.gocomics.com/tedrall
460
    name = 'rall'
461
    long_name = "Ted Rall"
462
    url = "http://rall.com/comic"
463
    _categories = ('RALL', )
464
    get_navi_link = get_link_rel_next
465
    get_first_comic_link = simulate_first_link
466
    # Not the first but I didn't find an efficient way to retrieve it
467
    first_url = "http://rall.com/2014/01/30/los-angeles-times-cartoon-well-miss-those-california-flowers"
468
469
    @classmethod
470
    def get_comic_info(cls, soup, link):
471
        """Get information about a particular comics."""
472
        title = soup.find('meta', property='og:title')['content']
473
        author = soup.find("span", class_="author vcard").find("a").string
474
        date_str = soup.find("span", class_="entry-date").string
475
        day = string_to_date(date_str, "%B %d, %Y")
476
        desc = soup.find('meta', property='og:description')['content']
477
        imgs = soup.find('div', class_='entry-content').find_all('img')
478
        imgs = imgs[:-7]  # remove social media buttons
479
        return {
480
            'title': title,
481
            'author': author,
482
            'month': day.month,
483
            'year': day.year,
484
            'day': day.day,
485
            'description': desc,
486
            'img': [i['src'] for i in imgs],
487
        }
488
489
490
class Dilem(GenericNavigableComic):
491
    """Class to retrieve Ali Dilem comics."""
492
    name = 'dilem'
493
    long_name = 'Ali Dilem'
494
    url = 'http://information.tv5monde.com/dilem'
495
    _categories = ('FRANCAIS', )
496
    get_url_from_link = join_cls_url_to_href
497
    get_first_comic_link = simulate_first_link
498
    first_url = "http://information.tv5monde.com/dilem/2004-06-26"
499
500
    @classmethod
501
    def get_navi_link(cls, last_soup, next_):
502
        """Get link to next or previous comic."""
503
        # prev is next / next is prev
504
        li = last_soup.find('li', class_='prev' if next_ else 'next')
505
        return li.find('a') if li else None
506
507
    @classmethod
508
    def get_comic_info(cls, soup, link):
509
        """Get information about a particular comics."""
510
        short_url = soup.find('link', rel='shortlink')['href']
511
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
512
        imgs = soup.find_all('meta', property='og:image')
513
        date_str = soup.find('span', property='dc:date')['content']
514
        date_str = date_str[:10]
515
        day = string_to_date(date_str, "%Y-%m-%d")
516
        return {
517
            'short_url': short_url,
518
            'title': title,
519
            'img': [i['content'] for i in imgs],
520
            'day': day.day,
521
            'month': day.month,
522
            'year': day.year,
523
        }
524
525
526
class SpaceAvalanche(GenericNavigableComic):
527
    """Class to retrieve Space Avalanche comics."""
528
    name = 'avalanche'
529
    long_name = 'Space Avalanche'
530
    url = 'http://www.spaceavalanche.com'
531
    get_navi_link = get_link_rel_next
532
533
    @classmethod
534
    def get_first_comic_link(cls):
535
        """Get link to first comics."""
536
        return {'href': "http://www.spaceavalanche.com/2009/02/02/irish-sea/", 'title': "Irish Sea"}
537
538
    @classmethod
539
    def get_comic_info(cls, soup, link):
540
        """Get information about a particular comics."""
541
        url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
542
        title = link['title']
543
        url = cls.get_url_from_link(link)
544
        year, month, day = [int(s)
545
                            for s in url_date_re.match(url).groups()]
546
        imgs = soup.find("div", class_="entry").find_all("img")
547
        return {
548
            'title': title,
549
            'day': day,
550
            'month': month,
551
            'year': year,
552
            'img': [i['src'] for i in imgs],
553
        }
554
555
556
class ZenPencils(GenericNavigableComic):
557
    """Class to retrieve ZenPencils comics."""
558
    # Also on http://zenpencils.tumblr.com
559
    # Also on http://www.gocomics.com/zen-pencils
560
    name = 'zenpencils'
561
    long_name = 'Zen Pencils'
562
    url = 'http://zenpencils.com'
563
    _categories = ('ZENPENCILS', )
564
    get_navi_link = get_link_rel_next
565
    get_first_comic_link = simulate_first_link
566
    first_url = "http://zenpencils.com/comic/1-ralph-waldo-emerson-make-them-cry/"
567
568
    @classmethod
569
    def get_comic_info(cls, soup, link):
570
        """Get information about a particular comics."""
571
        imgs = soup.find('div', id='comic').find_all('img')
572
        # imgs2 = soup.find_all('meta', property='og:image')
573
        post = soup.find('div', class_='post-content')
574
        author = post.find("span", class_="post-author").find("a").string
575
        title = soup.find('meta', property='og:title')['content']
576
        date_str = post.find('span', class_='post-date').string
577
        day = string_to_date(date_str, "%B %d, %Y")
578
        assert imgs
579
        assert all(i['alt'] == i['title'] for i in imgs)
580
        assert all(i['alt'] in (title, "") for i in imgs)
581
        desc = soup.find('meta', property='og:description')['content']
582
        return {
583
            'title': title,
584
            'description': desc,
585
            'author': author,
586
            'day': day.day,
587
            'month': day.month,
588
            'year': day.year,
589
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
590
        }
591
592
593
class ItsTheTie(GenericNavigableComic):
594
    """Class to retrieve It's the tie comics."""
595
    # Also on http://itsthetie.tumblr.com
596
    # Also on https://tapastic.com/series/itsthetie
597
    name = 'tie'
598
    long_name = "It's the tie"
599
    url = "http://itsthetie.com"
600
    _categories = ('TIE', )
601
    get_first_comic_link = get_div_navfirst_a
602
    get_navi_link = get_a_rel_next
603
604
    @classmethod
605
    def get_comic_info(cls, soup, link):
606
        """Get information about a particular comics."""
607
        title = soup.find('h1', class_='comic-title').find('a').string
608
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
609
        day = string_to_date(date_str, "%B %d, %Y")
610
        # Bonus images may or may not be in meta og:image.
611
        imgs = soup.find_all('meta', property='og:image')
612
        imgs_src = [i['content'] for i in imgs]
613
        bonus = soup.find_all('img', attrs={'data-oversrc': True})
614
        bonus_src = [b['data-oversrc'] for b in bonus]
615
        all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src]
616
        all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")]
617
        tag_meta = soup.find('meta', property='article:tag')
618
        tags = tag_meta['content'] if tag_meta else ""
619
        return {
620
            'title': title,
621
            'month': day.month,
622
            'year': day.year,
623
            'day': day.day,
624
            'img': all_imgs_src,
625
            'tags': tags,
626
        }
627
628
629
class PenelopeBagieu(GenericNavigableComic):
630
    """Class to retrieve comics from Penelope Bagieu's blog."""
631
    name = 'bagieu'
632
    long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
633
    url = 'http://www.penelope-jolicoeur.com'
634
    _categories = ('FRANCAIS', )
635
    get_navi_link = get_link_rel_next
636
    get_first_comic_link = simulate_first_link
637
    first_url = 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'
638
639
    @classmethod
640
    def get_comic_info(cls, soup, link):
641
        """Get information about a particular comics."""
642
        date_str = soup.find('h2', class_='date-header').string
643
        day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
644
        imgs = soup.find('div', class_='entry-body').find_all('img')
645 View Code Duplication
        title = soup.find('h3', class_='entry-header').string
646
        return {
647
            'title': title,
648
            'img': [i['src'] for i in imgs],
649
            'month': day.month,
650
            'year': day.year,
651
            'day': day.day,
652
        }
653
654
655
class OneOneOneOneComic(GenericNavigableComic):
656
    """Class to retrieve 1111 Comics."""
657
    # Also on http://comics1111.tumblr.com
658
    # Also on https://tapastic.com/series/1111-Comics
659
    name = '1111'
660
    long_name = '1111 Comics'
661
    url = 'http://www.1111comics.me'
662
    _categories = ('ONEONEONEONE', )
663
    get_first_comic_link = get_div_navfirst_a
664
    get_navi_link = get_link_rel_next
665
666
    @classmethod
667
    def get_comic_info(cls, soup, link):
668
        """Get information about a particular comics."""
669
        title = soup.find('h1', class_='comic-title').find('a').string
670
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
671
        day = string_to_date(date_str, "%B %d, %Y")
672
        imgs = soup.find_all('meta', property='og:image')
673
        return {
674
            'title': title,
675
            'month': day.month,
676
            'year': day.year,
677
            'day': day.day,
678
            'img': [i['content'] for i in imgs],
679
        }
680
681
682
class AngryAtNothing(GenericNavigableComic):
683
    """Class to retrieve Angry at Nothing comics."""
684
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
685
    name = 'angry'
686
    long_name = 'Angry At Nothing'
687
    url = 'http://www.angryatnothing.net'
688
    get_first_comic_link = get_div_navfirst_a
689
    get_navi_link = get_a_rel_next
690
691
    @classmethod
692
    def get_comic_info(cls, soup, link):
693
        """Get information about a particular comics."""
694
        title = soup.find('h1', class_='comic-title').find('a').string
695
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
696
        day = string_to_date(date_str, "%B %d, %Y")
697
        imgs = soup.find_all('meta', property='og:image')
698
        return {
699
            'title': title,
700
            'month': day.month,
701
            'year': day.year,
702
            'day': day.day,
703
            'img': [i['content'] for i in imgs],
704
        }
705
706
707
class NeDroid(GenericNavigableComic):
708
    """Class to retrieve NeDroid comics."""
709
    name = 'nedroid'
710
    long_name = 'NeDroid'
711
    url = 'http://nedroid.com'
712
    get_first_comic_link = get_div_navfirst_a
713
    get_navi_link = get_link_rel_next
714
    get_url_from_link = join_cls_url_to_href
715
716
    @classmethod
717
    def get_comic_info(cls, soup, link):
718
        """Get information about a particular comics."""
719
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
720
        comic_url_re = re.compile('//nedroid.com/comics/([0-9]*)-([0-9]*)-([0-9]*).*')
721
        short_url = cls.get_url_from_link(soup.find('link', rel='shortlink'))
722
        num = int(short_url_re.match(short_url).groups()[0])
723
        imgs = soup.find('div', id='comic').find_all('img')
724
        year, month, day = [int(s) for s in comic_url_re.match(imgs[0]['src']).groups()]
725
        assert len(imgs) == 1
726
        title = imgs[0]['alt']
727
        title2 = imgs[0]['title']
728
        return {
729
            'short_url': short_url,
730
            'title': title,
731
            'title2': title2,
732
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
733
            'day': day,
734
            'month': month,
735
            'year': year,
736
            'num': num,
737
        }
738
739
740
class Garfield(GenericNavigableComic):
741
    """Class to retrieve Garfield comics."""
742
    # Also on http://www.gocomics.com/garfield
743
    name = 'garfield'
744
    long_name = 'Garfield'
745
    url = 'https://garfield.com'
746
    _categories = ('GARFIELD', )
747
    get_first_comic_link = simulate_first_link
748
    first_url = 'https://garfield.com/comic/1978/06/19'
749
750
    @classmethod
751
    def get_navi_link(cls, last_soup, next_):
752
        """Get link to next or previous comic."""
753
        return last_soup.find('a', class_='comic-arrow-right' if next_ else 'comic-arrow-left')
754
755
    @classmethod
756
    def get_comic_info(cls, soup, link):
757
        """Get information about a particular comics."""
758 View Code Duplication
        url = cls.get_url_from_link(link)
759
        date_re = re.compile('^%s/comic/([0-9]*)/([0-9]*)/([0-9]*)' % cls.url)
760
        year, month, day = [int(s) for s in date_re.match(url).groups()]
761
        imgs = soup.find('div', class_='comic-display').find_all('img', class_='img-responsive')
762
        return {
763
            'month': month,
764
            'year': year,
765
            'day': day,
766
            'img': [i['src'] for i in imgs],
767
        }
768
769
770
class Dilbert(GenericNavigableComic):
771
    """Class to retrieve Dilbert comics."""
772
    # Also on http://www.gocomics.com/dilbert-classics
773
    name = 'dilbert'
774
    long_name = 'Dilbert'
775
    url = 'http://dilbert.com'
776
    get_url_from_link = join_cls_url_to_href
777
    get_first_comic_link = simulate_first_link
778
    first_url = 'http://dilbert.com/strip/1989-04-16'
779
780
    @classmethod
781
    def get_navi_link(cls, last_soup, next_):
782
        """Get link to next or previous comic."""
783
        link = last_soup.find('div', class_='nav-comic nav-right' if next_ else 'nav-comic nav-left')
784
        return link.find('a') if link else None
785
786
    @classmethod
787
    def get_comic_info(cls, soup, link):
788
        """Get information about a particular comics."""
789
        title = soup.find('meta', property='og:title')['content']
790
        imgs = soup.find_all('meta', property='og:image')
791
        desc = soup.find('meta', property='og:description')['content']
792
        date_str = soup.find('meta', property='article:publish_date')['content']
793
        day = string_to_date(date_str, "%B %d, %Y")
794
        author = soup.find('meta', property='article:author')['content']
795
        tags = soup.find('meta', property='article:tag')['content']
796
        return {
797
            'title': title,
798
            'description': desc,
799
            'img': [i['content'] for i in imgs],
800
            'author': author,
801
            'tags': tags,
802
            'day': day.day,
803
            'month': day.month,
804
            'year': day.year
805
        }
806
807
808
class VictimsOfCircumsolar(GenericNavigableComic):
809
    """Class to retrieve VictimsOfCircumsolar comics."""
810
    name = 'circumsolar'
811
    long_name = 'Victims Of Circumsolar'
812
    url = 'http://www.victimsofcircumsolar.com'
813
    get_navi_link = get_a_navi_comicnavnext_navinext
814
    get_first_comic_link = simulate_first_link
815
    first_url = 'http://www.victimsofcircumsolar.com/comic/modern-addiction'
816
817
    @classmethod
818
    def get_comic_info(cls, soup, link):
819
        """Get information about a particular comics."""
820
        # Date is on the archive page
821
        title = soup.find_all('meta', property='og:title')[-1]['content']
822
        desc = soup.find_all('meta', property='og:description')[-1]['content']
823
        imgs = soup.find('div', id='comic').find_all('img')
824
        assert all(i['title'] == i['alt'] == title for i in imgs)
825
        return {
826
            'title': title,
827
            'description': desc,
828
            'img': [i['src'] for i in imgs],
829
        }
830
831
832
class ThreeWordPhrase(GenericNavigableComic):
833
    """Class to retrieve Three Word Phrase comics."""
834
    # Also on http://www.threewordphrase.tumblr.com
835
    name = 'threeword'
836
    long_name = 'Three Word Phrase'
837
    url = 'http://threewordphrase.com'
838
    get_url_from_link = join_cls_url_to_href
839
840
    @classmethod
841
    def get_first_comic_link(cls):
842
        """Get link to first comics."""
843
        return get_soup_at_url(cls.url).find('img', src='/firstlink.gif').parent
844
845
    @classmethod
846
    def get_navi_link(cls, last_soup, next_):
847
        """Get link to next or previous comic."""
848
        link = last_soup.find('img', src='/nextlink.gif' if next_ else '/prevlink.gif').parent
849
        return None if link.get('href') is None else link
850
851
    @classmethod
852
    def get_comic_info(cls, soup, link):
853
        """Get information about a particular comics."""
854
        title = soup.find('title')
855
        imgs = [img for img in soup.find_all('img')
856
                if not img['src'].endswith(
857
                    ('link.gif', '32.png', 'twpbookad.jpg',
858
                     'merchad.jpg', 'header.gif', 'tipjar.jpg'))]
859
        return {
860
            'title': title.string if title else None,
861
            'title2': '  '.join(img.get('alt') for img in imgs if img.get('alt')),
862
            'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs],
863
        }
864
865
866
class DeadlyPanel(GenericEmptyComic, GenericNavigableComic):
867
    """Class to retrieve Deadly Panel comics."""
868
    # Also on https://tapastic.com/series/deadlypanel
869
    name = 'deadly'
870
    long_name = 'Deadly Panel'
871
    url = 'http://www.deadlypanel.com'
872
    get_first_comic_link = get_a_navi_navifirst
873
    get_navi_link = get_a_navi_comicnavnext_navinext
874
875
    @classmethod
876
    def get_comic_info(cls, soup, link):
877
        """Get information about a particular comics."""
878
        imgs = soup.find('div', id='comic').find_all('img')
879
        assert all(i['alt'] == i['title'] for i in imgs)
880
        return {
881
            'img': [i['src'] for i in imgs],
882
        }
883
884
885 View Code Duplication
class TheGentlemanArmchair(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
886
    """Class to retrieve The Gentleman Armchair comics."""
887
    name = 'gentlemanarmchair'
888
    long_name = 'The Gentleman Armchair'
889
    url = 'http://thegentlemansarmchair.com'
890
    get_first_comic_link = get_a_navi_navifirst
891
    get_navi_link = get_link_rel_next
892
893
    @classmethod
894
    def get_comic_info(cls, soup, link):
895
        """Get information about a particular comics."""
896
        title = soup.find('h2', class_='post-title').string
897
        author = soup.find("span", class_="post-author").find("a").string
898
        date_str = soup.find('span', class_='post-date').string
899
        day = string_to_date(date_str, "%B %d, %Y")
900
        imgs = soup.find('div', id='comic').find_all('img')
901
        return {
902
            'img': [i['src'] for i in imgs],
903
            'title': title,
904
            'author': author,
905
            'month': day.month,
906
            'year': day.year,
907
            'day': day.day,
908
        }
909
910
911
class MyExtraLife(GenericNavigableComic):
912
    """Class to retrieve My Extra Life comics."""
913
    name = 'extralife'
914
    long_name = 'My Extra Life'
915
    url = 'http://www.myextralife.com'
916
    get_navi_link = get_link_rel_next
917
918
    @classmethod
919
    def get_first_comic_link(cls):
920
        """Get link to first comics."""
921
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
922
923
    @classmethod
924
    def get_comic_info(cls, soup, link):
925
        """Get information about a particular comics."""
926
        title = soup.find("h1", class_="comic_title").string
927
        date_str = soup.find("span", class_="comic_date").string
928
        day = string_to_date(date_str, "%B %d, %Y")
929
        imgs = soup.find_all("img", class_="comic")
930
        assert all(i['alt'] == i['title'] == title for i in imgs)
931
        return {
932
            'title': title,
933
            'img': [i['src'] for i in imgs if i["src"]],
934
            'day': day.day,
935
            'month': day.month,
936
            'year': day.year
937
        }
938
939
940
class SaturdayMorningBreakfastCereal(GenericNavigableComic):
941
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
942
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
943
    # Also on http://smbc-comics.tumblr.com
944
    name = 'smbc'
945
    long_name = 'Saturday Morning Breakfast Cereal'
946
    url = 'http://www.smbc-comics.com'
947
    _categories = ('SMBC', )
948
    get_navi_link = get_a_rel_next
949
950
    @classmethod
951
    def get_first_comic_link(cls):
952
        """Get link to first comics."""
953
        return get_soup_at_url(cls.url).find('a', rel='start')
954
955
    @classmethod
956
    def get_comic_info(cls, soup, link):
957
        """Get information about a particular comics."""
958
        image1 = soup.find('img', id='cc-comic')
959
        image_url1 = image1['src']
960
        aftercomic = soup.find('div', id='aftercomic')
961
        image_url2 = aftercomic.find('img')['src'] if aftercomic else ''
962
        imgs = [image_url1] + ([image_url2] if image_url2 else [])
963
        date_str = soup.find('div', class_='cc-publishtime').contents[0]
964
        day = string_to_date(date_str, "%B %d, %Y")
965
        return {
966
            'title': image1['title'],
967
            'img': [urljoin_wrapper(cls.url, i) for i in imgs],
968
            'day': day.day,
969
            'month': day.month,
970
            'year': day.year
971
        }
972
973
974
class PerryBibleFellowship(GenericListableComic):
975
    """Class to retrieve Perry Bible Fellowship comics."""
976
    name = 'pbf'
977
    long_name = 'Perry Bible Fellowship'
978
    url = 'http://pbfcomics.com'
979
    get_url_from_archive_element = join_cls_url_to_href
980
981
    @classmethod
982
    def get_archive_elements(cls):
983
        comic_link_re = re.compile('^/[0-9]*/$')
984
        return reversed(get_soup_at_url(cls.url).find_all('a', href=comic_link_re))
985
986
    @classmethod
987
    def get_comic_info(cls, soup, link):
988
        """Get information about a particular comics."""
989
        url = cls.get_url_from_archive_element(link)
990
        comic_img_re = re.compile('^/archive_b/PBF.*')
991
        name = link.string
992
        num = int(link['name'])
993
        href = link['href']
994
        assert href == '/%d/' % num
995
        imgs = soup.find_all('img', src=comic_img_re)
996
        assert len(imgs) == 1
997
        assert imgs[0]['alt'] == name
998
        return {
999
            'num': num,
1000
            'name': name,
1001
            'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1002
            'prefix': '%d-' % num,
1003
        }
1004
1005
1006
class Mercworks(GenericNavigableComic):
1007
    """Class to retrieve Mercworks comics."""
1008
    # Also on http://mercworks.tumblr.com
1009
    name = 'mercworks'
1010
    long_name = 'Mercworks'
1011
    url = 'http://mercworks.net'
1012
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1013
    get_navi_link = get_a_rel_next
1014
1015
    @classmethod
1016
    def get_comic_info(cls, soup, link):
1017
        """Get information about a particular comics."""
1018
        title = soup.find('meta', property='og:title')['content']
1019
        metadesc = soup.find('meta', property='og:description')
1020
        desc = metadesc['content'] if metadesc else ""
1021
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1022
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1023
        date_str = date_str[:10]
1024
        day = string_to_date(date_str, "%Y-%m-%d")
1025
        imgs = soup.find_all('meta', property='og:image')
1026
        return {
1027
            'img': [i['content'] for i in imgs],
1028
            'title': title,
1029
            'author': author,
1030
            'desc': desc,
1031
            'day': day.day,
1032
            'month': day.month,
1033
            'year': day.year
1034
        }
1035
1036
1037
class BerkeleyMews(GenericListableComic):
1038
    """Class to retrieve Berkeley Mews comics."""
1039
    # Also on http://mews.tumblr.com
1040
    # Also on http://www.gocomics.com/berkeley-mews
1041
    name = 'berkeley'
1042
    long_name = 'Berkeley Mews'
1043
    url = 'http://www.berkeleymews.com'
1044
    _categories = ('BERKELEY', )
1045
    get_url_from_archive_element = get_href
1046
    comic_num_re = re.compile('%s/\\?p=([0-9]*)$' % url)
1047
1048
    @classmethod
1049
    def get_archive_elements(cls):
1050
        archive_url = urljoin_wrapper(cls.url, "?page_id=2")
1051
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_num_re))
1052
1053
    @classmethod
1054
    def get_comic_info(cls, soup, link):
1055
        """Get information about a particular comics."""
1056
        comic_date_re = re.compile('.*/([0-9]*)-([0-9]*)-([0-9]*)-.*')
1057
        url = cls.get_url_from_archive_element(link)
1058
        num = int(cls.comic_num_re.match(url).groups()[0])
1059
        img = soup.find('div', id='comic').find('img')
1060
        assert all(i['alt'] == i['title'] for i in [img])
1061
        title2 = img['title']
1062
        img_url = img['src']
1063
        year, month, day = [int(s) for s in comic_date_re.match(img_url).groups()]
1064
        return {
1065
            'num': num,
1066
            'title': link.string,
1067
            'title2': title2,
1068
            'img': [img_url],
1069
            'year': year,
1070
            'month': month,
1071
            'day': day,
1072
        }
1073
1074
1075
class GenericBouletCorp(GenericNavigableComic):
1076
    """Generic class to retrieve BouletCorp comics in different languages."""
1077
    # Also on http://bouletcorp.tumblr.com
1078
    _categories = ('BOULET', )
1079
    get_navi_link = get_link_rel_next
1080
1081
    @classmethod
1082
    def get_first_comic_link(cls):
1083
        """Get link to first comics."""
1084
        return get_soup_at_url(cls.url).find('div', id='centered_nav').find_all('a')[0]
1085
1086
    @classmethod
1087
    def get_comic_info(cls, soup, link):
1088
        """Get information about a particular comics."""
1089
        url = cls.get_url_from_link(link)
1090
        date_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/' % cls.url)
1091
        year, month, day = [int(s) for s in date_re.match(url).groups()]
1092
        imgs = soup.find('div', id='notes').find('div', class_='storycontent').find_all('img')
1093
        texts = '  '.join(t for t in (i.get('title') for i in imgs) if t)
1094
        title = soup.find('title').string
1095
        return {
1096
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs if i.get('src') is not None],
1097
            'title': title,
1098
            'texts': texts,
1099
            'year': year,
1100
            'month': month,
1101
            'day': day,
1102
        }
1103
1104
1105
class BouletCorp(GenericBouletCorp):
1106
    """Class to retrieve BouletCorp comics."""
1107
    name = 'boulet'
1108
    long_name = 'Boulet Corp'
1109
    url = 'http://www.bouletcorp.com'
1110
    _categories = ('FRANCAIS', )
1111
1112
1113
class BouletCorpEn(GenericBouletCorp):
1114
    """Class to retrieve EnglishBouletCorp comics."""
1115
    name = 'boulet_en'
1116
    long_name = 'Boulet Corp English'
1117
    url = 'http://english.bouletcorp.com'
1118
1119
1120
class AmazingSuperPowers(GenericNavigableComic):
1121
    """Class to retrieve Amazing Super Powers comics."""
1122
    name = 'asp'
1123
    long_name = 'Amazing Super Powers'
1124
    url = 'http://www.amazingsuperpowers.com'
1125
    get_first_comic_link = get_a_navi_navifirst
1126
    get_navi_link = get_a_navi_navinext
1127
1128
    @classmethod
1129
    def get_comic_info(cls, soup, link):
1130
        """Get information about a particular comics."""
1131
        author = soup.find("span", class_="post-author").find("a").string
1132
        date_str = soup.find('span', class_='post-date').string
1133
        day = string_to_date(date_str, "%B %d, %Y")
1134
        imgs = soup.find('div', id='comic').find_all('img')
1135
        title = ' '.join(i['title'] for i in imgs)
1136
        assert all(i['alt'] == i['title'] for i in imgs)
1137
        return {
1138
            'title': title,
1139
            'author': author,
1140
            'img': [img['src'] for img in imgs],
1141
            'day': day.day,
1142
            'month': day.month,
1143
            'year': day.year
1144
        }
1145
1146
1147
class ToonHole(GenericListableComic):
1148
    """Class to retrieve Toon Holes comics."""
1149
    # Also on http://tapastic.com/series/TOONHOLE
1150
    name = 'toonhole'
1151
    long_name = 'Toon Hole'
1152
    url = 'http://www.toonhole.com'
1153
    get_url_from_archive_element = get_href
1154
1155
    @classmethod
1156
    def get_comic_info(cls, soup, link):
1157
        """Get information about a particular comics."""
1158
        title = link.string
1159
        date_str = remove_st_nd_rd_th_from_date(soup.find('div', class_='comicdate').string.strip())
1160
        day = string_to_date(date_str, "%B %d, %Y")
1161
        imgs = soup.find('div', id='comic').find_all('img')
1162
        assert all(i['alt'] == i['title'] == title for i in imgs)
1163
        return {
1164
            'title': title,
1165
            'month': day.month,
1166
            'year': day.year,
1167
            'day': day.day,
1168
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs],
1169
        }
1170
1171
    @classmethod
1172
    def get_archive_elements(cls):
1173
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1174
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1175
1176
1177
class Channelate(GenericNavigableComic):
1178
    """Class to retrieve Channelate comics."""
1179
    name = 'channelate'
1180
    long_name = 'Channelate'
1181
    url = 'http://www.channelate.com'
1182
    get_first_comic_link = get_div_navfirst_a
1183
    get_navi_link = get_link_rel_next
1184
    get_url_from_link = join_cls_url_to_href
1185
1186
    @classmethod
1187
    def get_comic_info(cls, soup, link):
1188
        """Get information about a particular comics."""
1189
        author = soup.find("span", class_="post-author").find("a").string
1190
        date_str = soup.find('span', class_='post-date').string
1191
        day = string_to_date(date_str, '%Y/%m/%d')
1192
        title = soup.find('meta', property='og:title')['content']
1193
        post = soup.find('div', id='comic')
1194
        imgs = post.find_all('img') if post else []
1195
        extra_url = None
1196
        extra_div = soup.find('div', id='extrapanelbutton')
1197
        if extra_div:
1198
            extra_url = extra_div.find('a')['href']
1199
            extra_soup = get_soup_at_url(extra_url)
1200
            extra_imgs = extra_soup.find_all('img', class_='extrapanelimage')
1201
            imgs.extend(extra_imgs)
1202
        return {
1203
            'url_extra': extra_url,
1204
            'title': title,
1205
            'author': author,
1206
            'month': day.month,
1207
            'year': day.year,
1208
            'day': day.day,
1209
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1210
        }
1211
1212
1213
class CyanideAndHappiness(GenericNavigableComic):
1214
    """Class to retrieve Cyanide And Happiness comics."""
1215
    name = 'cyanide'
1216
    long_name = 'Cyanide and Happiness'
1217
    url = 'http://explosm.net'
1218
    _categories = ('NSFW', )
1219
    get_url_from_link = join_cls_url_to_href
1220
1221
    @classmethod
1222
    def get_first_comic_link(cls):
1223
        """Get link to first comics."""
1224
        return get_soup_at_url(cls.url).find('a', title='Oldest comic')
1225
1226
    @classmethod
1227
    def get_navi_link(cls, last_soup, next_):
1228
        """Get link to next or previous comic."""
1229
        link = last_soup.find('a', class_='next-comic' if next_ else 'previous-comic ')
1230
        return None if link.get('href') is None else link
1231
1232
    @classmethod
1233
    def get_comic_info(cls, soup, link):
1234
        """Get information about a particular comics."""
1235
        url2 = soup.find('meta', property='og:url')['content']
1236
        num = int(url2.split('/')[-2])
1237
        date_str = soup.find('h3').find('a').string
1238
        day = string_to_date(date_str, '%Y.%m.%d')
1239
        author = soup.find('small', class_="author-credit-name").string
1240
        assert author.startswith('by ')
1241
        author = author[3:]
1242
        imgs = soup.find_all('img', id='main-comic')
1243
        return {
1244
            'num': num,
1245
            'author': author,
1246
            'month': day.month,
1247
            'year': day.year,
1248
            'day': day.day,
1249
            'prefix': '%d-' % num,
1250
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i['src'])) for i in imgs]
1251
        }
1252
1253
1254
class MrLovenstein(GenericComic):
1255
    """Class to retrieve Mr Lovenstein comics."""
1256
    # Also on https://tapastic.com/series/MrLovenstein
1257
    name = 'mrlovenstein'
1258
    long_name = 'Mr. Lovenstein'
1259
    url = 'http://www.mrlovenstein.com'
1260
1261
    @classmethod
1262
    def get_next_comic(cls, last_comic):
1263
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1264
        # TODO: more info from http://www.mrlovenstein.com/archive
1265
        comic_num_re = re.compile('^/comic/([0-9]*)$')
1266
        nums = [int(comic_num_re.match(link['href']).groups()[0])
1267
                for link in get_soup_at_url(cls.url).find_all('a', href=comic_num_re)]
1268
        first, last = min(nums), max(nums)
1269
        if last_comic:
1270
            first = last_comic['num'] + 1
1271
        for num in range(first, last + 1):
1272
            url = urljoin_wrapper(cls.url, '/comic/%d' % num)
1273
            soup = get_soup_at_url(url)
1274
            imgs = list(
1275
                reversed(soup.find_all('img', src=re.compile('^/images/comics/'))))
1276
            description = soup.find('meta', attrs={'name': 'description'})['content']
1277
            yield {
1278
                'url': url,
1279
                'num': num,
1280
                'texts': '  '.join(t for t in (i.get('title') for i in imgs) if t),
1281
                'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1282
                'description': description,
1283
            }
1284
1285
1286
class DinosaurComics(GenericListableComic):
1287
    """Class to retrieve Dinosaur Comics comics."""
1288
    name = 'dinosaur'
1289
    long_name = 'Dinosaur Comics'
1290
    url = 'http://www.qwantz.com'
1291
    get_url_from_archive_element = get_href
1292
    comic_link_re = re.compile('^%s/index.php\\?comic=([0-9]*)$' % url)
1293
1294
    @classmethod
1295
    def get_archive_elements(cls):
1296
        archive_url = urljoin_wrapper(cls.url, 'archive.php')
1297
        # first link is random -> skip it
1298
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re)[1:])
1299
1300
    @classmethod
1301
    def get_comic_info(cls, soup, link):
1302
        """Get information about a particular comics."""
1303
        url = cls.get_url_from_archive_element(link)
1304
        num = int(cls.comic_link_re.match(url).groups()[0])
1305
        date_str = link.string
1306
        text = link.next_sibling.string
1307
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1308
        comic_img_re = re.compile('^%s/comics/' % cls.url)
1309
        img = soup.find('img', src=comic_img_re)
1310
        return {
1311
            'month': day.month,
1312
            'year': day.year,
1313
            'day': day.day,
1314
            'img': [img.get('src')],
1315
            'title': img.get('title'),
1316
            'text': text,
1317
            'num': num,
1318
        }
1319
1320
1321
class ButterSafe(GenericListableComic):
1322
    """Class to retrieve Butter Safe comics."""
1323
    name = 'butter'
1324
    long_name = 'ButterSafe'
1325
    url = 'http://buttersafe.com'
1326
    get_url_from_archive_element = get_href
1327 View Code Duplication
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)
1328
1329
    @classmethod
1330
    def get_archive_elements(cls):
1331
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1332
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
1333
1334
    @classmethod
1335
    def get_comic_info(cls, soup, link):
1336
        """Get information about a particular comics."""
1337
        url = cls.get_url_from_archive_element(link)
1338
        title = link.string
1339
        year, month, day = [int(s) for s in cls.comic_link_re.match(url).groups()]
1340
        img = soup.find('div', id='comic').find('img')
1341
        assert img['alt'] == title
1342
        return {
1343
            'title': title,
1344
            'day': day,
1345
            'month': month,
1346
            'year': year,
1347
            'img': [img['src']],
1348
        }
1349
1350
1351
class CalvinAndHobbes(GenericComic):
1352
    """Class to retrieve Calvin and Hobbes comics."""
1353
    # Also on http://www.gocomics.com/calvinandhobbes/
1354
    name = 'calvin'
1355
    long_name = 'Calvin and Hobbes'
1356
    # This is not through any official webpage but eh...
1357
    url = 'http://marcel-oehler.marcellosendos.ch/comics/ch/'
1358
1359
    @classmethod
1360
    def get_next_comic(cls, last_comic):
1361
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1362
        last_date = get_date_for_comic(
1363
            last_comic) if last_comic else date(1985, 11, 1)
1364
        link_re = re.compile('^([0-9]*)/([0-9]*)/')
1365
        img_re = re.compile('')
1366
        for link in get_soup_at_url(cls.url).find_all('a', href=link_re):
1367
            url = link['href']
1368
            year, month = link_re.match(url).groups()
1369
            if date(int(year), int(month), 1) + timedelta(days=31) >= last_date:
1370
                img_re = re.compile('^%s%s([0-9]*)' % (year, month))
1371
                month_url = urljoin_wrapper(cls.url, url)
1372
                for img in get_soup_at_url(month_url).find_all('img', src=img_re):
1373
                    img_src = img['src']
1374
                    day = int(img_re.match(img_src).groups()[0])
1375
                    comic_date = date(int(year), int(month), day)
1376
                    if comic_date > last_date:
1377
                        yield {
1378
                            'url': month_url,
1379
                            'year': int(year),
1380
                            'month': int(month),
1381
                            'day': int(day),
1382
                            'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)],
1383
                        }
1384
                        last_date = comic_date
1385
1386
1387
class AbstruseGoose(GenericListableComic):
1388
    """Class to retrieve AbstruseGoose Comics."""
1389
    name = 'abstruse'
1390
    long_name = 'Abstruse Goose'
1391
    url = 'http://abstrusegoose.com'
1392
    get_url_from_archive_element = get_href
1393 View Code Duplication
    comic_url_re = re.compile('^%s/([0-9]*)$' % url)
1394
    comic_img_re = re.compile('^%s/strips/.*' % url)
1395
1396
    @classmethod
1397
    def get_archive_elements(cls):
1398
        archive_url = urljoin_wrapper(cls.url, 'archive')
1399
        return get_soup_at_url(archive_url).find_all('a', href=cls.comic_url_re)
1400
1401
    @classmethod
1402
    def get_comic_info(cls, soup, archive_elt):
1403
        comic_url = cls.get_url_from_archive_element(archive_elt)
1404
        num = int(cls.comic_url_re.match(comic_url).groups()[0])
1405
        return {
1406
            'num': num,
1407
            'title': archive_elt.string,
1408
            'img': [soup.find('img', src=cls.comic_img_re)['src']]
1409
        }
1410
1411
1412
class PhDComics(GenericNavigableComic):
1413
    """Class to retrieve PHD Comics."""
1414
    name = 'phd'
1415
    long_name = 'PhD Comics'
1416
    url = 'http://phdcomics.com/comics/archive.php'
1417
    get_url_from_link = join_cls_url_to_href
1418
1419
    @classmethod
1420
    def get_first_comic_link(cls):
1421
        """Get link to first comics."""
1422
        return get_soup_at_url(cls.url).find('img', src='images/first_button.gif').parent
1423
1424
    @classmethod
1425
    def get_navi_link(cls, last_soup, next_):
1426
        """Get link to next or previous comic."""
1427
        img = last_soup.find('img', src='images/next_button.gif' if next_ else 'images/prev_button.gif')
1428
        return None if img is None else img.parent
1429
1430
    @classmethod
1431
    def get_comic_info(cls, soup, link):
1432
        """Get information about a particular comics."""
1433 View Code Duplication
        date_str = soup.find('font', face='Arial,Helvetica,Geneva,Swiss,SunSans-Regular', color='white').string.strip()
1434
        try:
1435
            day = string_to_date(date_str, '%m/%d/%Y')
1436
        except ValueError:
1437
            print("Invalid date %s" % date_str)
1438
            day = date.today()
1439
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
1440
        return {
1441
            'year': day.year,
1442
            'month': day.month,
1443
            'day': day.day,
1444
            'img': [soup.find('img', id='comic')['src']],
1445
            'title': title,
1446
        }
1447
1448
1449
class Octopuns(GenericNavigableComic):
1450
    """Class to retrieve Octopuns comics."""
1451
    # Also on http://octopuns.tumblr.com
1452
    name = 'octopuns'
1453
    long_name = 'Octopuns'
1454
    url = 'http://www.octopuns.net'
1455
1456
    @classmethod
1457
    def get_first_comic_link(cls):
1458
        """Get link to first comics."""
1459
        return get_soup_at_url(cls.url).find('img', src=re.compile('.*/First.png')).parent
1460
1461
    @classmethod
1462
    def get_navi_link(cls, last_soup, next_):
1463
        """Get link to next or previous comic."""
1464
        link = last_soup.find('img', src=re.compile('.*/Next.png' if next_ else '.*/Back.png')).parent
1465
        return None if link.get('href') is None else link
1466
1467
    @classmethod
1468
    def get_comic_info(cls, soup, link):
1469
        """Get information about a particular comics."""
1470
        title = soup.find('h3', class_='post-title entry-title').string
1471
        date_str = soup.find('h2', class_='date-header').string
1472
        day = string_to_date(date_str, "%A, %B %d, %Y")
1473
        imgs = soup.find_all('link', rel='image_src')
1474
        return {
1475
            'img': [i['href'] for i in imgs],
1476
            'title': title,
1477
            'day': day.day,
1478
            'month': day.month,
1479
            'year': day.year,
1480
        }
1481
1482
1483
class Quarktees(GenericNavigableComic):
1484
    """Class to retrieve the Quarktees comics."""
1485
    name = 'quarktees'
1486
    long_name = 'Quarktees'
1487
    url = 'http://www.quarktees.com/blogs/news'
1488
    get_url_from_link = join_cls_url_to_href
1489
    get_first_comic_link = simulate_first_link
1490
    first_url = 'http://www.quarktees.com/blogs/news/12486621-coming-soon'
1491
1492
    @classmethod
1493
    def get_navi_link(cls, last_soup, next_):
1494
        """Get link to next or previous comic."""
1495
        return last_soup.find('a', id='article-next' if next_ else 'article-prev')
1496
1497
    @classmethod
1498
    def get_comic_info(cls, soup, link):
1499
        """Get information about a particular comics."""
1500
        title = soup.find('meta', property='og:title')['content']
1501
        article = soup.find('div', class_='single-article')
1502
        imgs = article.find_all('img')
1503
        return {
1504
            'title': title,
1505
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1506
        }
1507
1508
1509
class OverCompensating(GenericNavigableComic):
1510
    """Class to retrieve the Over Compensating comics."""
1511
    name = 'compensating'
1512
    long_name = 'Over Compensating'
1513
    url = 'http://www.overcompensating.com'
1514
    get_url_from_link = join_cls_url_to_href
1515
1516
    @classmethod
1517
    def get_first_comic_link(cls):
1518
        """Get link to first comics."""
1519
        return get_soup_at_url(cls.url).find('a', href=re.compile('comic=1$'))
1520
1521
    @classmethod
1522
    def get_navi_link(cls, last_soup, next_):
1523
        """Get link to next or previous comic."""
1524
        return last_soup.find('a', title='next comic' if next_ else 'go back already')
1525
1526
    @classmethod
1527
    def get_comic_info(cls, soup, link):
1528
        """Get information about a particular comics."""
1529
        img_src_re = re.compile('^/oc/comics/.*')
1530
        comic_num_re = re.compile('.*comic=([0-9]*)$')
1531
        comic_url = cls.get_url_from_link(link)
1532
        num = int(comic_num_re.match(comic_url).groups()[0])
1533
        img = soup.find('img', src=img_src_re)
1534
        return {
1535
            'num': num,
1536
            'img': [urljoin_wrapper(comic_url, img['src'])],
1537
            'title': img.get('title')
1538
        }
1539
1540
1541
class Oglaf(GenericNavigableComic):
1542
    """Class to retrieve Oglaf comics."""
1543
    name = 'oglaf'
1544
    long_name = 'Oglaf [NSFW]'
1545
    url = 'http://oglaf.com'
1546
    _categories = ('NSFW', )
1547
    get_url_from_link = join_cls_url_to_href
1548
1549
    @classmethod
1550
    def get_first_comic_link(cls):
1551
        """Get link to first comics."""
1552
        return get_soup_at_url(cls.url).find("div", id="st").parent
1553
1554
    @classmethod
1555
    def get_navi_link(cls, last_soup, next_):
1556
        """Get link to next or previous comic."""
1557
        div = last_soup.find("div", id="nx" if next_ else "pvs")
1558
        return div.parent if div else None
1559
1560
    @classmethod
1561
    def get_comic_info(cls, soup, link):
1562
        """Get information about a particular comics."""
1563
        title = soup.find('title').string
1564
        title_imgs = soup.find('div', id='tt').find_all('img')
1565
        assert len(title_imgs) == 1
1566
        strip_imgs = soup.find_all('img', id='strip')
1567
        assert len(strip_imgs) == 1
1568
        imgs = title_imgs + strip_imgs
1569
        desc = ' '.join(i['title'] for i in imgs)
1570
        return {
1571
            'title': title,
1572
            'img': [i['src'] for i in imgs],
1573
            'description': desc,
1574
        }
1575
1576
1577
class ScandinaviaAndTheWorld(GenericNavigableComic):
1578
    """Class to retrieve Scandinavia And The World comics."""
1579
    name = 'satw'
1580
    long_name = 'Scandinavia And The World'
1581
    url = 'http://satwcomic.com'
1582
    get_first_comic_link = simulate_first_link
1583
    first_url = 'http://satwcomic.com/sweden-denmark-and-norway'
1584
1585
    @classmethod
1586
    def get_navi_link(cls, last_soup, next_):
1587
        """Get link to next or previous comic."""
1588
        return last_soup.find('a', accesskey='n' if next_ else 'p')
1589
1590
    @classmethod
1591
    def get_comic_info(cls, soup, link):
1592
        """Get information about a particular comics."""
1593
        title = soup.find('meta', attrs={'name': 'twitter:label1'})['content']
1594
        desc = soup.find('meta', property='og:description')['content']
1595
        imgs = soup.find_all('img', itemprop="image")
1596
        return {
1597
            'title': title,
1598
            'description': desc,
1599
            'img': [i['src'] for i in imgs],
1600
        }
1601
1602
1603
class SomethingOfThatIlk(GenericEmptyComic):  # Does not exist anymore
1604
    """Class to retrieve the Something Of That Ilk comics."""
1605
    name = 'somethingofthatilk'
1606
    long_name = 'Something Of That Ilk'
1607
    url = 'http://www.somethingofthatilk.com'
1608
1609
1610
class InfiniteMonkeyBusiness(GenericNavigableComic):
1611
    """Generic class to retrieve InfiniteMonkeyBusiness comics."""
1612
    name = 'monkey'
1613
    long_name = 'Infinite Monkey Business'
1614
    url = 'http://infinitemonkeybusiness.net'
1615
    get_navi_link = get_a_navi_comicnavnext_navinext
1616
    get_first_comic_link = simulate_first_link
1617
    first_url = 'http://infinitemonkeybusiness.net/comic/pillory/'
1618
1619
    @classmethod
1620
    def get_comic_info(cls, soup, link):
1621
        """Get information about a particular comics."""
1622
        title = soup.find('meta', property='og:title')['content']
1623
        imgs = soup.find('div', id='comic').find_all('img')
1624
        return {
1625
            'title': title,
1626
            'img': [i['src'] for i in imgs],
1627
        }
1628
1629
1630
class Wondermark(GenericListableComic):
1631
    """Class to retrieve the Wondermark comics."""
1632
    name = 'wondermark'
1633
    long_name = 'Wondermark'
1634
    url = 'http://wondermark.com'
1635
    get_url_from_archive_element = get_href
1636
1637
    @classmethod
1638
    def get_archive_elements(cls):
1639
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1640
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1641
1642
    @classmethod
1643
    def get_comic_info(cls, soup, link):
1644
        """Get information about a particular comics."""
1645
        date_str = soup.find('div', class_='postdate').find('em').string
1646
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1647
        div = soup.find('div', id='comic')
1648
        if div:
1649
            img = div.find('img')
1650 View Code Duplication
            img_src = [img['src']]
1651
            alt = img['alt']
1652
            assert alt == img['title']
1653
            title = soup.find('meta', property='og:title')['content']
1654
        else:
1655
            img_src = []
1656
            alt = ''
1657
            title = ''
1658
        return {
1659
            'month': day.month,
1660
            'year': day.year,
1661
            'day': day.day,
1662
            'img': img_src,
1663
            'title': title,
1664
            'alt': alt,
1665
            'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')),
1666
        }
1667
1668
1669
class WarehouseComic(GenericNavigableComic):
1670
    """Class to retrieve Warehouse Comic comics."""
1671
    name = 'warehouse'
1672
    long_name = 'Warehouse Comic'
1673
    url = 'http://warehousecomic.com'
1674
    get_first_comic_link = get_a_navi_navifirst
1675
    get_navi_link = get_link_rel_next
1676
1677
    @classmethod
1678
    def get_comic_info(cls, soup, link):
1679
        """Get information about a particular comics."""
1680
        title = soup.find('h2', class_='post-title').string
1681
        date_str = soup.find('span', class_='post-date').string
1682
        day = string_to_date(date_str, "%B %d, %Y")
1683
        imgs = soup.find('div', id='comic').find_all('img')
1684
        return {
1685
            'img': [i['src'] for i in imgs],
1686
            'title': title,
1687
            'day': day.day,
1688
            'month': day.month,
1689
            'year': day.year,
1690
        }
1691
1692
1693
class JustSayEh(GenericNavigableComic):
1694
    """Class to retrieve Just Say Eh comics."""
1695
    # Also on http//tapastic.com/series/Just-Say-Eh
1696
    name = 'justsayeh'
1697
    long_name = 'Just Say Eh'
1698
    url = 'http://www.justsayeh.com'
1699
    get_first_comic_link = get_a_navi_navifirst
1700
    get_navi_link = get_a_navi_comicnavnext_navinext
1701
1702
    @classmethod
1703
    def get_comic_info(cls, soup, link):
1704
        """Get information about a particular comics."""
1705
        title = soup.find('h2', class_='post-title').string
1706
        imgs = soup.find("div", id="comic").find_all("img")
1707
        assert all(i['alt'] == i['title'] for i in imgs)
1708
        alt = imgs[0]['alt']
1709
        return {
1710
            'img': [i['src'] for i in imgs],
1711
            'title': title,
1712
            'alt': alt,
1713
        }
1714
1715
1716
class MouseBearComedy(GenericNavigableComic):
1717
    """Class to retrieve Mouse Bear Comedy comics."""
1718
    # Also on http://mousebearcomedy.tumblr.com
1719
    name = 'mousebear'
1720
    long_name = 'Mouse Bear Comedy'
1721
    url = 'http://www.mousebearcomedy.com'
1722
    get_first_comic_link = get_a_navi_navifirst
1723
    get_navi_link = get_a_navi_comicnavnext_navinext
1724
1725
    @classmethod
1726
    def get_comic_info(cls, soup, link):
1727
        """Get information about a particular comics."""
1728
        title = soup.find('h2', class_='post-title').string
1729
        author = soup.find("span", class_="post-author").find("a").string
1730
        date_str = soup.find("span", class_="post-date").string
1731
        day = string_to_date(date_str, '%B %d, %Y')
1732
        imgs = soup.find("div", id="comic").find_all("img")
1733
        assert all(i['alt'] == i['title'] == title for i in imgs)
1734
        return {
1735
            'day': day.day,
1736
            'month': day.month,
1737
            'year': day.year,
1738
            'img': [i['src'] for i in imgs],
1739
            'title': title,
1740
            'author': author,
1741
        }
1742
1743
1744 View Code Duplication
class BigFootJustice(GenericNavigableComic):
1745
    """Class to retrieve Big Foot Justice comics."""
1746
    # Also on http://tapastic.com/series/bigfoot-justice
1747
    name = 'bigfoot'
1748
    long_name = 'Big Foot Justice'
1749
    url = 'http://bigfootjustice.com'
1750
    get_first_comic_link = get_a_navi_navifirst
1751
    get_navi_link = get_a_navi_comicnavnext_navinext
1752
1753
    @classmethod
1754
    def get_comic_info(cls, soup, link):
1755
        """Get information about a particular comics."""
1756
        imgs = soup.find('div', id='comic').find_all('img')
1757
        assert all(i['title'] == i['alt'] for i in imgs)
1758
        title = ' '.join(i['title'] for i in imgs)
1759
        return {
1760
            'img': [i['src'] for i in imgs],
1761
            'title': title,
1762
        }
1763
1764
1765
class RespawnComic(GenericNavigableComic):
1766
    """Class to retrieve Respawn Comic."""
1767
    # Also on http://respawncomic.tumblr.com
1768
    name = 'respawn'
1769
    long_name = 'Respawn Comic'
1770
    url = 'http://respawncomic.com '
1771
    _categories = ('RESPAWN', )
1772
    get_navi_link = get_a_rel_next
1773
    get_first_comic_link = simulate_first_link
1774
    first_url = 'http://respawncomic.com/comic/c0001/'
1775
1776
    @classmethod
1777
    def get_comic_info(cls, soup, link):
1778
        """Get information about a particular comics."""
1779
        title = soup.find('meta', property='og:title')['content']
1780
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1781 View Code Duplication
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1782
        date_str = date_str[:10]
1783
        day = string_to_date(date_str, "%Y-%m-%d")
1784
        imgs = soup.find_all('meta', property='og:image')
1785
        skip_imgs = {
1786
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1787
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1788
        }
1789
        return {
1790
            'title': title,
1791
            'author': author,
1792
            'day': day.day,
1793
            'month': day.month,
1794
            'year': day.year,
1795
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1796
        }
1797
1798
1799
class SafelyEndangered(GenericNavigableComic):
1800
    """Class to retrieve Safely Endangered comics."""
1801
    # Also on http://tumblr.safelyendangered.com
1802
    name = 'endangered'
1803
    long_name = 'Safely Endangered'
1804
    url = 'http://www.safelyendangered.com'
1805
    get_navi_link = get_link_rel_next
1806
    get_first_comic_link = simulate_first_link
1807
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1808
1809
    @classmethod
1810 View Code Duplication
    def get_comic_info(cls, soup, link):
1811
        """Get information about a particular comics."""
1812
        title = soup.find('h2', class_='post-title').string
1813
        date_str = soup.find('span', class_='post-date').string
1814
        day = string_to_date(date_str, '%B %d, %Y')
1815
        imgs = soup.find('div', id='comic').find_all('img')
1816
        alt = imgs[0]['alt']
1817
        assert all(i['alt'] == i['title'] for i in imgs)
1818
        return {
1819
            'day': day.day,
1820
            'month': day.month,
1821
            'year': day.year,
1822
            'img': [i['src'] for i in imgs],
1823
            'title': title,
1824
            'alt': alt,
1825
        }
1826
1827
1828
class PicturesInBoxes(GenericNavigableComic):
1829
    """Class to retrieve Pictures In Boxes comics."""
1830
    # Also on http://picturesinboxescomic.tumblr.com
1831
    name = 'picturesinboxes'
1832
    long_name = 'Pictures in Boxes'
1833
    url = 'http://www.picturesinboxes.com'
1834
    get_navi_link = get_a_navi_navinext
1835
    get_first_comic_link = simulate_first_link
1836
    first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'
1837
1838
    @classmethod
1839
    def get_comic_info(cls, soup, link):
1840
        """Get information about a particular comics."""
1841
        title = soup.find('h2', class_='post-title').string
1842
        author = soup.find("span", class_="post-author").find("a").string
1843
        date_str = soup.find('span', class_='post-date').string
1844
        day = string_to_date(date_str, '%B %d, %Y')
1845
        imgs = soup.find('div', class_='comicpane').find_all('img')
1846
        assert imgs
1847
        assert all(i['title'] == i['alt'] == title for i in imgs)
1848
        return {
1849
            'day': day.day,
1850
            'month': day.month,
1851
            'year': day.year,
1852
            'img': [i['src'] for i in imgs],
1853
            'title': title,
1854
            'author': author,
1855
        }
1856
1857
1858
class Penmen(GenericEmptyComic):
1859
    """Class to retrieve Penmen comics."""
1860
    name = 'penmen'
1861
    long_name = 'Penmen'
1862
    url = 'http://penmen.com'
1863
1864
1865
class TheDoghouseDiaries(GenericNavigableComic):
1866
    """Class to retrieve The Dog House Diaries comics."""
1867
    name = 'doghouse'
1868
    long_name = 'The Dog House Diaries'
1869
    url = 'http://thedoghousediaries.com'
1870
1871
    @classmethod
1872
    def get_first_comic_link(cls):
1873
        """Get link to first comics."""
1874
        return get_soup_at_url(cls.url).find('a', id='firstlink')
1875
1876
    @classmethod
1877
    def get_navi_link(cls, last_soup, next_):
1878
        """Get link to next or previous comic."""
1879
        return last_soup.find('a', id='nextlink' if next_ else 'previouslink')
1880
1881
    @classmethod
1882
    def get_comic_info(cls, soup, link):
1883
        """Get information about a particular comics."""
1884
        comic_img_re = re.compile('^dhdcomics/.*')
1885
        img = soup.find('img', src=comic_img_re)
1886
        comic_url = cls.get_url_from_link(link)
1887
        return {
1888
            'title': soup.find('h2', id='titleheader').string,
1889
            'title2': soup.find('div', id='subtext').string,
1890
            'alt': img.get('title'),
1891
            'img': [urljoin_wrapper(comic_url, img['src'].strip())],
1892
            'num': int(comic_url.split('/')[-1]),
1893
        }
1894
1895
1896
class InvisibleBread(GenericListableComic):
1897
    """Class to retrieve Invisible Bread comics."""
1898
    # Also on http://www.gocomics.com/invisible-bread
1899
    name = 'invisiblebread'
1900
    long_name = 'Invisible Bread'
1901
    url = 'http://invisiblebread.com'
1902
1903
    @classmethod
1904
    def get_archive_elements(cls):
1905
        archive_url = urljoin_wrapper(cls.url, 'archives/')
1906
        return reversed(get_soup_at_url(archive_url).find_all('td', class_='archive-title'))
1907
1908
    @classmethod
1909
    def get_url_from_archive_element(cls, td):
1910
        return td.find('a')['href']
1911
1912
    @classmethod
1913
    def get_comic_info(cls, soup, td):
1914
        """Get information about a particular comics."""
1915
        url = cls.get_url_from_archive_element(td)
1916
        title = td.find('a').string
1917
        month_and_day = td.previous_sibling.string
1918
        link_re = re.compile('^%s/([0-9]+)/' % cls.url)
1919
        year = link_re.match(url).groups()[0]
1920
        date_str = month_and_day + ' ' + year
1921
        day = string_to_date(date_str, '%b %d %Y')
1922
        imgs = [soup.find('div', id='comic').find('img')]
1923
        assert len(imgs) == 1
1924
        assert all(i['title'] == i['alt'] == title for i in imgs)
1925
        return {
1926
            'month': day.month,
1927
            'year': day.year,
1928
            'day': day.day,
1929
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1930
            'title': title,
1931
        }
1932 View Code Duplication
1933
1934
class DiscoBleach(GenericEmptyComic):  # Does not work anymore
1935
    """Class to retrieve Disco Bleach Comics."""
1936
    name = 'discobleach'
1937
    long_name = 'Disco Bleach'
1938
    url = 'http://discobleach.com'
1939
1940
1941
class TubeyToons(GenericEmptyComic):  # Does not work anymore
1942
    """Class to retrieve TubeyToons comics."""
1943
    # Also on http://tapastic.com/series/Tubey-Toons
1944
    # Also on http://tubeytoons.tumblr.com
1945
    name = 'tubeytoons'
1946
    long_name = 'Tubey Toons'
1947
    url = 'http://tubeytoons.com'
1948
    _categories = ('TUNEYTOONS', )
1949
1950
1951
class CompletelySeriousComics(GenericNavigableComic):
1952
    """Class to retrieve Completely Serious comics."""
1953
    name = 'completelyserious'
1954
    long_name = 'Completely Serious Comics'
1955
    url = 'http://completelyseriouscomics.com'
1956
    get_first_comic_link = get_a_navi_navifirst
1957
    get_navi_link = get_a_navi_navinext
1958
1959
    @classmethod
1960
    def get_comic_info(cls, soup, link):
1961
        """Get information about a particular comics."""
1962
        title = soup.find('h2', class_='post-title').string
1963
        author = soup.find('span', class_='post-author').contents[1].string
1964
        date_str = soup.find('span', class_='post-date').string
1965
        day = string_to_date(date_str, '%B %d, %Y')
1966
        imgs = soup.find('div', class_='comicpane').find_all('img')
1967
        assert imgs
1968
        alt = imgs[0]['title']
1969
        assert all(i['title'] == i['alt'] == alt for i in imgs)
1970
        return {
1971
            'month': day.month,
1972
            'year': day.year,
1973
            'day': day.day,
1974
            'img': [i['src'] for i in imgs],
1975
            'title': title,
1976
            'alt': alt,
1977
            'author': author,
1978
        }
1979
1980
1981
class PoorlyDrawnLines(GenericListableComic):
1982
    """Class to retrieve Poorly Drawn Lines comics."""
1983
    # Also on http://pdlcomics.tumblr.com
1984
    name = 'poorlydrawn'
1985
    long_name = 'Poorly Drawn Lines'
1986
    url = 'http://poorlydrawnlines.com'
1987 View Code Duplication
    _categories = ('POORLYDRAWN', )
1988
    get_url_from_archive_element = get_href
1989
1990
    @classmethod
1991
    def get_comic_info(cls, soup, link):
1992
        """Get information about a particular comics."""
1993
        imgs = soup.find('div', class_='post').find_all('img')
1994
        assert len(imgs) <= 1
1995
        return {
1996
            'img': [i['src'] for i in imgs],
1997
            'title': imgs[0].get('title', "") if imgs else "",
1998
        }
1999
2000
    @classmethod
2001
    def get_archive_elements(cls):
2002
        archive_url = urljoin_wrapper(cls.url, 'archive')
2003
        url_re = re.compile('^%s/comic/.' % cls.url)
2004
        return reversed(get_soup_at_url(archive_url).find_all('a', href=url_re))
2005
2006
2007
class LoadingComics(GenericNavigableComic):
2008
    """Class to retrieve Loading Artist comics."""
2009
    name = 'loadingartist'
2010
    long_name = 'Loading Artist'
2011
    url = 'http://www.loadingartist.com/latest'
2012
2013
    @classmethod
2014
    def get_first_comic_link(cls):
2015
        """Get link to first comics."""
2016
        return get_soup_at_url(cls.url).find('a', title="First")
2017
2018
    @classmethod
2019 View Code Duplication
    def get_navi_link(cls, last_soup, next_):
2020
        """Get link to next or previous comic."""
2021
        return last_soup.find('a', title='Next' if next_ else 'Previous')
2022
2023
    @classmethod
2024
    def get_comic_info(cls, soup, link):
2025
        """Get information about a particular comics."""
2026
        title = soup.find('h1').string
2027
        date_str = soup.find('span', class_='date').string.strip()
2028
        day = string_to_date(date_str, "%B %d, %Y")
2029
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
2030
        return {
2031
            'title': title,
2032
            'img': [i['src'] for i in imgs],
2033
            'month': day.month,
2034
            'year': day.year,
2035
            'day': day.day,
2036
        }
2037
2038
2039
class ChuckleADuck(GenericNavigableComic):
2040
    """Class to retrieve Chuckle-A-Duck comics."""
2041
    name = 'chuckleaduck'
2042
    long_name = 'Chuckle-A-duck'
2043
    url = 'http://chuckleaduck.com'
2044
    get_first_comic_link = get_div_navfirst_a
2045
    get_navi_link = get_link_rel_next
2046
2047
    @classmethod
2048
    def get_comic_info(cls, soup, link):
2049
        """Get information about a particular comics."""
2050
        date_str = soup.find('span', class_='post-date').string
2051
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
2052
        author = soup.find('span', class_='post-author').string
2053
        div = soup.find('div', id='comic')
2054
        imgs = div.find_all('img') if div else []
2055
        title = imgs[0]['title'] if imgs else ""
2056
        assert all(i['title'] == i['alt'] == title for i in imgs)
2057
        return {
2058
            'month': day.month,
2059
            'year': day.year,
2060
            'day': day.day,
2061
            'img': [i['src'] for i in imgs],
2062
            'title': title,
2063
            'author': author,
2064
        }
2065
2066
2067
class DepressedAlien(GenericNavigableComic):
2068
    """Class to retrieve Depressed Alien Comics."""
2069
    name = 'depressedalien'
2070
    long_name = 'Depressed Alien'
2071
    url = 'http://depressedalien.com'
2072
    get_url_from_link = join_cls_url_to_href
2073
2074
    @classmethod
2075
    def get_first_comic_link(cls):
2076
        """Get link to first comics."""
2077
        return get_soup_at_url(cls.url).find('img', attrs={'name': 'beginArrow'}).parent
2078
2079
    @classmethod
2080
    def get_navi_link(cls, last_soup, next_):
2081
        """Get link to next or previous comic."""
2082
        return last_soup.find('img', attrs={'name': 'rightArrow' if next_ else 'leftArrow'}).parent
2083
2084
    @classmethod
2085
    def get_comic_info(cls, soup, link):
2086
        """Get information about a particular comics."""
2087
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2088
        imgs = soup.find_all('meta', property='og:image')
2089
        return {
2090
            'title': title,
2091
            'img': [i['content'] for i in imgs],
2092
        }
2093
2094
2095
class ThingsInSquares(GenericListableComic):
2096
    """Class to retrieve Things In Squares comics."""
2097
    # This can be retrieved in other languages
2098
    # Also on https://tapastic.com/series/Things-in-Squares
2099
    name = 'squares'
2100
    long_name = 'Things in squares'
2101
    url = 'http://www.thingsinsquares.com'
2102
2103
    @classmethod
2104
    def get_comic_info(cls, soup, tr):
2105
        """Get information about a particular comics."""
2106
        _, td2, td3 = tr.find_all('td')
2107
        a = td2.find('a')
2108
        date_str = td3.string
2109
        day = string_to_date(date_str, "%m.%d.%y")
2110
        title = a.string
2111
        title2 = soup.find('meta', property='og:title')['content']
2112
        desc = soup.find('meta', property='og:description')
2113
        description = desc['content'] if desc else ''
2114
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2115
        imgs = soup.find('div', class_='entry-content').find_all('img')
2116
        return {
2117
            'day': day.day,
2118
            'month': day.month,
2119 View Code Duplication
            'year': day.year,
2120
            'title': title,
2121
            'title2': title2,
2122
            'description': description,
2123
            'tags': tags,
2124
            'img': [i['src'] for i in imgs],
2125
            'alt': ' '.join(i['alt'] for i in imgs),
2126
        }
2127
2128
    @classmethod
2129
    def get_url_from_archive_element(cls, tr):
2130
        _, td2, td3 = tr.find_all('td')
2131
        return td2.find('a')['href']
2132
2133
    @classmethod
2134
    def get_archive_elements(cls):
2135
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
2136
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2137
2138
2139
class HappleTea(GenericNavigableComic):
2140
    """Class to retrieve Happle Tea Comics."""
2141
    name = 'happletea'
2142
    long_name = 'Happle Tea'
2143
    url = 'http://www.happletea.com'
2144
    get_first_comic_link = get_a_navi_navifirst
2145
    get_navi_link = get_link_rel_next
2146
2147
    @classmethod
2148
    def get_comic_info(cls, soup, link):
2149
        """Get information about a particular comics."""
2150
        imgs = soup.find('div', id='comic').find_all('img')
2151
        post = soup.find('div', class_='post-content')
2152
        title = post.find('h2', class_='post-title').string
2153
        author = post.find('a', rel='author').string
2154
        date_str = post.find('span', class_='post-date').string
2155
        day = string_to_date(date_str, "%B %d, %Y")
2156
        assert all(i['alt'] == i['title'] for i in imgs)
2157
        return {
2158
            'title': title,
2159
            'img': [i['src'] for i in imgs],
2160
            'alt': ''.join(i['alt'] for i in imgs),
2161
            'month': day.month,
2162
            'year': day.year,
2163
            'day': day.day,
2164
            'author': author,
2165
        }
2166
2167
2168
class FatAwesomeComics(GenericNavigableComic):
2169
    """Class to retrieve Fat Awesome Comics."""
2170
    # Also on http://fatawesomecomedy.tumblr.com
2171
    name = 'fatawesome'
2172
    long_name = 'Fat Awesome'
2173
    url = 'http://fatawesome.com/comics'
2174
    get_navi_link = get_a_rel_next
2175
    get_first_comic_link = simulate_first_link
2176
    first_url = 'http://fatawesome.com/shortbus/'
2177
2178
    @classmethod
2179
    def get_comic_info(cls, soup, link):
2180
        """Get information about a particular comics."""
2181
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2182
        description = soup.find('meta', attrs={'name': 'description'})['content']
2183
        tags_prop = soup.find('meta', property='article:tag')
2184
        tags = tags_prop['content'] if tags_prop else ""
2185
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2186
        day = string_to_date(date_str, "%Y-%m-%d")
2187
        imgs = soup.find_all('img', attrs={'data-recalc-dims': "1"})
2188
        assert len(imgs) == 1
2189
        return {
2190
            'title': title,
2191
            'description': description,
2192
            'tags': tags,
2193
            'alt': "".join(i['alt'] for i in imgs),
2194
            'img': [i['src'].rsplit('?', 1)[0] for i in imgs],
2195
            'month': day.month,
2196
            'year': day.year,
2197
            'day': day.day,
2198
        }
2199
2200
2201
class AnythingComic(GenericListableComic):
2202
    """Class to retrieve Anything Comics."""
2203
    # Also on http://tapastic.com/series/anything
2204
    name = 'anythingcomic'
2205
    long_name = 'Anything Comic'
2206
    url = 'http://www.anythingcomic.com'
2207
2208
    @classmethod
2209
    def get_archive_elements(cls):
2210
        archive_url = urljoin_wrapper(cls.url, 'archive/')
2211
        # The first 2 <tr>'s do not correspond to comics
2212
        return get_soup_at_url(archive_url).find('table', id='chapter_table').find_all('tr')[2:]
2213
2214
    @classmethod
2215
    def get_url_from_archive_element(cls, tr):
2216
        """Get url corresponding to an archive element."""
2217
        td_num, td_comic, td_date, _ = tr.find_all('td')
2218
        link = td_comic.find('a')
2219
        return urljoin_wrapper(cls.url, link['href'])
2220
2221
    @classmethod
2222
    def get_comic_info(cls, soup, tr):
2223
        """Get information about a particular comics."""
2224
        td_num, td_comic, td_date, _ = tr.find_all('td')
2225
        num = int(td_num.string)
2226
        link = td_comic.find('a')
2227
        title = link.string
2228
        imgs = soup.find_all('img', id='comic_image')
2229
        day = string_to_date(td_date.string, '%d %b %Y %I:%M %p')
2230
        assert len(imgs) == 1
2231
        assert all(i.get('alt') == i.get('title') for i in imgs)
2232
        return {
2233
            'num': num,
2234
            'title': title,
2235
            'alt': imgs[0].get('alt', ''),
2236
            'img': [i['src'] for i in imgs],
2237
            'month': day.month,
2238
            'year': day.year,
2239
            'day': day.day,
2240
        }
2241
2242
2243 View Code Duplication
class LonnieMillsap(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2244
    """Class to retrieve Lonnie Millsap's comics."""
2245
    name = 'millsap'
2246
    long_name = 'Lonnie Millsap'
2247
    url = 'http://www.lonniemillsap.com'
2248
    get_navi_link = get_link_rel_next
2249
    get_first_comic_link = simulate_first_link
2250
    first_url = 'http://www.lonniemillsap.com/?p=42'
2251
2252
    @classmethod
2253
    def get_comic_info(cls, soup, link):
2254
        """Get information about a particular comics."""
2255
        title = soup.find('h2', class_='post-title').string
2256
        post = soup.find('div', class_='post-content')
2257
        author = post.find("span", class_="post-author").find("a").string
2258
        date_str = post.find("span", class_="post-date").string
2259
        day = string_to_date(date_str, "%B %d, %Y")
2260
        imgs = post.find("div", class_="entry").find_all("img")
2261
        return {
2262
            'title': title,
2263
            'author': author,
2264
            'img': [i['src'] for i in imgs],
2265
            'month': day.month,
2266
            'year': day.year,
2267
            'day': day.day,
2268
        }
2269
2270
2271
class LinsEditions(GenericNavigableComic):
2272
    """Class to retrieve L.I.N.S. Editions comics."""
2273
    # Also on http://linscomics.tumblr.com
2274
    name = 'lins'
2275
    long_name = 'L.I.N.S. Editions'
2276
    url = 'https://linsedition.com'
2277
    _categories = ('LINS', )
2278
    get_navi_link = get_link_rel_next
2279
    get_first_comic_link = simulate_first_link
2280
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2281
2282
    @classmethod
2283
    def get_comic_info(cls, soup, link):
2284
        """Get information about a particular comics."""
2285
        title = soup.find('meta', property='og:title')['content']
2286
        imgs = soup.find_all('meta', property='og:image')
2287
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2288
        day = string_to_date(date_str, "%Y-%m-%d")
2289
        return {
2290
            'title': title,
2291
            'img': [i['content'] for i in imgs],
2292
            'month': day.month,
2293
            'year': day.year,
2294
            'day': day.day,
2295
        }
2296
2297
2298
class ThorsThundershack(GenericNavigableComic):
2299
    """Class to retrieve Thor's Thundershack comics."""
2300
    # Also on http://tapastic.com/series/Thors-Thundershac
2301
    name = 'thor'
2302
    long_name = 'Thor\'s Thundershack'
2303
    url = 'http://www.thorsthundershack.com'
2304
    _categories = ('THOR', )
2305
    get_url_from_link = join_cls_url_to_href
2306
2307
    @classmethod
2308
    def get_first_comic_link(cls):
2309
        """Get link to first comics."""
2310
        return get_soup_at_url(cls.url).find('a', class_='first navlink')
2311
2312
    @classmethod
2313
    def get_navi_link(cls, last_soup, next_):
2314
        """Get link to next or previous comic."""
2315
        for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
2316
            if link['href'] != '/comic':
2317
                return link
2318
        return None
2319
2320
    @classmethod
2321 View Code Duplication
    def get_comic_info(cls, soup, link):
2322
        """Get information about a particular comics."""
2323
        title = soup.find('meta', attrs={'name': 'description'})["content"]
2324
        description = soup.find('div', itemprop='articleBody').text
2325
        author = soup.find('span', itemprop='author copyrightHolder').string
2326
        imgs = soup.find_all('img', itemprop='image')
2327
        assert all(i['title'] == i['alt'] for i in imgs)
2328
        alt = imgs[0]['alt'] if imgs else ""
2329
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
2330
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2331
        return {
2332
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2333
            'month': day.month,
2334
            'year': day.year,
2335
            'day': day.day,
2336
            'author': author,
2337
            'title': title,
2338
            'alt': alt,
2339
            'description': description,
2340
        }
2341
2342
2343
class GerbilWithAJetpack(GenericNavigableComic):
2344
    """Class to retrieve GerbilWithAJetpack comics."""
2345
    name = 'gerbil'
2346
    long_name = 'Gerbil With A Jetpack'
2347
    url = 'http://gerbilwithajetpack.com'
2348
    get_first_comic_link = get_a_navi_navifirst
2349
    get_navi_link = get_a_rel_next
2350
2351
    @classmethod
2352
    def get_comic_info(cls, soup, link):
2353
        """Get information about a particular comics."""
2354
        title = soup.find('h2', class_='post-title').string
2355
        author = soup.find("span", class_="post-author").find("a").string
2356
        date_str = soup.find("span", class_="post-date").string
2357
        day = string_to_date(date_str, "%B %d, %Y")
2358
        imgs = soup.find("div", id="comic").find_all("img")
2359
        alt = imgs[0]['alt']
2360
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2361
        return {
2362
            'img': [i['src'] for i in imgs],
2363
            'title': title,
2364
            'alt': alt,
2365
            'author': author,
2366
            'day': day.day,
2367
            'month': day.month,
2368
            'year': day.year
2369
        }
2370
2371
2372
class EveryDayBlues(GenericNavigableComic):
2373
    """Class to retrieve EveryDayBlues Comics."""
2374
    name = "blues"
2375
    long_name = "Every Day Blues"
2376
    url = "http://everydayblues.net"
2377
    get_first_comic_link = get_a_navi_navifirst
2378 View Code Duplication
    get_navi_link = get_link_rel_next
2379
2380
    @classmethod
2381
    def get_comic_info(cls, soup, link):
2382
        """Get information about a particular comics."""
2383
        title = soup.find("h2", class_="post-title").string
2384
        author = soup.find("span", class_="post-author").find("a").string
2385
        date_str = soup.find("span", class_="post-date").string
2386
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2387
        imgs = soup.find("div", id="comic").find_all("img")
2388
        assert all(i['alt'] == i['title'] == title for i in imgs)
2389
        assert len(imgs) <= 1
2390
        return {
2391
            'img': [i['src'] for i in imgs],
2392
            'title': title,
2393
            'author': author,
2394
            'day': day.day,
2395
            'month': day.month,
2396
            'year': day.year
2397
        }
2398
2399
2400
class BiterComics(GenericNavigableComic):
2401
    """Class to retrieve Biter Comics."""
2402
    name = "biter"
2403
    long_name = "Biter Comics"
2404
    url = "http://www.bitercomics.com"
2405
    get_first_comic_link = get_a_navi_navifirst
2406
    get_navi_link = get_link_rel_next
2407
2408
    @classmethod
2409
    def get_comic_info(cls, soup, link):
2410
        """Get information about a particular comics."""
2411
        title = soup.find("h1", class_="entry-title").string
2412
        author = soup.find("span", class_="author vcard").find("a").string
2413
        date_str = soup.find("span", class_="entry-date").string
2414
        day = string_to_date(date_str, "%B %d, %Y")
2415
        imgs = soup.find("div", id="comic").find_all("img")
2416
        assert all(i['alt'] == i['title'] for i in imgs)
2417
        assert len(imgs) == 1
2418
        alt = imgs[0]['alt']
2419
        return {
2420
            'img': [i['src'] for i in imgs],
2421
            'title': title,
2422
            'alt': alt,
2423
            'author': author,
2424
            'day': day.day,
2425
            'month': day.month,
2426
            'year': day.year
2427
        }
2428
2429
2430
class TheAwkwardYeti(GenericNavigableComic):
2431
    """Class to retrieve The Awkward Yeti comics."""
2432
    # Also on http://www.gocomics.com/the-awkward-yeti
2433
    # Also on http://larstheyeti.tumblr.com
2434
    # Also on https://tapastic.com/series/TheAwkwardYeti
2435
    name = 'yeti'
2436
    long_name = 'The Awkward Yeti'
2437
    url = 'http://theawkwardyeti.com'
2438
    _categories = ('YETI', )
2439
    get_first_comic_link = get_a_navi_navifirst
2440
    get_navi_link = get_link_rel_next
2441
2442
    @classmethod
2443
    def get_comic_info(cls, soup, link):
2444
        """Get information about a particular comics."""
2445
        title = soup.find('h2', class_='post-title').string
2446
        date_str = soup.find("span", class_="post-date").string
2447
        day = string_to_date(date_str, "%B %d, %Y")
2448
        imgs = soup.find("div", id="comic").find_all("img")
2449
        assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
2450
        return {
2451
            'img': [i['src'] for i in imgs],
2452
            'title': title,
2453
            'day': day.day,
2454
            'month': day.month,
2455
            'year': day.year
2456
        }
2457
2458
2459
class PleasantThoughts(GenericNavigableComic):
2460
    """Class to retrieve Pleasant Thoughts comics."""
2461
    name = 'pleasant'
2462
    long_name = 'Pleasant Thoughts'
2463
    url = 'http://pleasant-thoughts.com'
2464
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2465
    get_navi_link = get_link_rel_next
2466
2467
    @classmethod
2468
    def get_comic_info(cls, soup, link):
2469
        """Get information about a particular comics."""
2470
        post = soup.find('div', class_='post-content')
2471
        title = post.find('h2', class_='post-title').string
2472
        imgs = post.find("div", class_="entry").find_all("img")
2473
        return {
2474
            'title': title,
2475
            'img': [i['src'] for i in imgs],
2476
        }
2477
2478
2479
class MisterAndMe(GenericNavigableComic):
2480
    """Class to retrieve Mister & Me Comics."""
2481
    # Also on http://www.gocomics.com/mister-and-me
2482
    # Also on https://tapastic.com/series/Mister-and-Me
2483
    name = 'mister'
2484
    long_name = 'Mister & Me'
2485
    url = 'http://www.mister-and-me.com'
2486
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2487
    get_navi_link = get_link_rel_next
2488 View Code Duplication
2489
    @classmethod
2490
    def get_comic_info(cls, soup, link):
2491
        """Get information about a particular comics."""
2492
        title = soup.find('h2', class_='post-title').string
2493
        author = soup.find("span", class_="post-author").find("a").string
2494
        date_str = soup.find("span", class_="post-date").string
2495
        day = string_to_date(date_str, "%B %d, %Y")
2496
        imgs = soup.find("div", id="comic").find_all("img")
2497
        assert all(i['alt'] == i['title'] for i in imgs)
2498
        assert len(imgs) <= 1
2499
        alt = imgs[0]['alt'] if imgs else ""
2500
        return {
2501
            'img': [i['src'] for i in imgs],
2502
            'title': title,
2503
            'alt': alt,
2504
            'author': author,
2505
            'day': day.day,
2506
            'month': day.month,
2507
            'year': day.year
2508
        }
2509
2510
2511
class LastPlaceComics(GenericNavigableComic):
2512
    """Class to retrieve Last Place Comics."""
2513
    name = 'lastplace'
2514
    long_name = 'Last Place Comics'
2515
    url = "http://lastplacecomics.com"
2516
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2517
    get_navi_link = get_link_rel_next
2518 View Code Duplication
2519
    @classmethod
2520
    def get_comic_info(cls, soup, link):
2521
        """Get information about a particular comics."""
2522
        title = soup.find('h2', class_='post-title').string
2523
        author = soup.find("span", class_="post-author").find("a").string
2524
        date_str = soup.find("span", class_="post-date").string
2525
        day = string_to_date(date_str, "%B %d, %Y")
2526
        imgs = soup.find("div", id="comic").find_all("img")
2527
        assert all(i['alt'] == i['title'] for i in imgs)
2528
        assert len(imgs) <= 1
2529
        alt = imgs[0]['alt'] if imgs else ""
2530
        return {
2531
            'img': [i['src'] for i in imgs],
2532
            'title': title,
2533
            'alt': alt,
2534
            'author': author,
2535
            'day': day.day,
2536
            'month': day.month,
2537
            'year': day.year
2538
        }
2539
2540
2541
class TalesOfAbsurdity(GenericNavigableComic):
2542
    """Class to retrieve Tales Of Absurdity comics."""
2543
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
2544
    # Also on http://talesofabsurdity.tumblr.com
2545
    name = 'absurdity'
2546
    long_name = 'Tales of Absurdity'
2547
    url = 'http://talesofabsurdity.com'
2548
    _categories = ('ABSURDITY', )
2549
    get_first_comic_link = get_a_navi_navifirst
2550
    get_navi_link = get_a_navi_comicnavnext_navinext
2551
2552
    @classmethod
2553
    def get_comic_info(cls, soup, link):
2554
        """Get information about a particular comics."""
2555
        title = soup.find('h2', class_='post-title').string
2556
        author = soup.find("span", class_="post-author").find("a").string
2557
        date_str = soup.find("span", class_="post-date").string
2558
        day = string_to_date(date_str, "%B %d, %Y")
2559
        imgs = soup.find("div", id="comic").find_all("img")
2560
        assert all(i['alt'] == i['title'] for i in imgs)
2561
        alt = imgs[0]['alt'] if imgs else ""
2562
        return {
2563
            'img': [i['src'] for i in imgs],
2564
            'title': title,
2565
            'alt': alt,
2566
            'author': author,
2567
            'day': day.day,
2568
            'month': day.month,
2569
            'year': day.year
2570
        }
2571
2572
2573
class EndlessOrigami(GenericNavigableComic):
2574
    """Class to retrieve Endless Origami Comics."""
2575
    name = "origami"
2576
    long_name = "Endless Origami"
2577
    url = "http://endlessorigami.com"
2578
    get_first_comic_link = get_a_navi_navifirst
2579
    get_navi_link = get_link_rel_next
2580
2581
    @classmethod
2582
    def get_comic_info(cls, soup, link):
2583
        """Get information about a particular comics."""
2584
        title = soup.find('h2', class_='post-title').string
2585
        author = soup.find("span", class_="post-author").find("a").string
2586
        date_str = soup.find("span", class_="post-date").string
2587
        day = string_to_date(date_str, "%B %d, %Y")
2588
        imgs = soup.find("div", id="comic").find_all("img")
2589
        assert all(i['alt'] == i['title'] for i in imgs)
2590
        alt = imgs[0]['alt'] if imgs else ""
2591
        return {
2592
            'img': [i['src'] for i in imgs],
2593
            'title': title,
2594
            'alt': alt,
2595
            'author': author,
2596
            'day': day.day,
2597
            'month': day.month,
2598
            'year': day.year
2599
        }
2600
2601
2602
class PlanC(GenericNavigableComic):
2603
    """Class to retrieve Plan C comics."""
2604
    name = 'planc'
2605
    long_name = 'Plan C'
2606
    url = 'http://www.plancomic.com'
2607
    get_first_comic_link = get_a_navi_navifirst
2608
    get_navi_link = get_a_navi_comicnavnext_navinext
2609
2610
    @classmethod
2611
    def get_comic_info(cls, soup, link):
2612
        """Get information about a particular comics."""
2613
        title = soup.find('h2', class_='post-title').string
2614
        date_str = soup.find("span", class_="post-date").string
2615
        day = string_to_date(date_str, "%B %d, %Y")
2616
        imgs = soup.find('div', id='comic').find_all('img')
2617
        return {
2618
            'title': title,
2619
            'img': [i['src'] for i in imgs],
2620
            'month': day.month,
2621
            'year': day.year,
2622
            'day': day.day,
2623
        }
2624
2625
2626 View Code Duplication
class BuniComic(GenericNavigableComic):
2627
    """Class to retrieve Buni Comics."""
2628
    name = 'buni'
2629
    long_name = 'BuniComics'
2630
    url = 'http://www.bunicomic.com'
2631
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2632
    get_navi_link = get_link_rel_next
2633
2634
    @classmethod
2635
    def get_comic_info(cls, soup, link):
2636
        """Get information about a particular comics."""
2637
        imgs = soup.find('div', id='comic').find_all('img')
2638
        assert all(i['alt'] == i['title'] for i in imgs)
2639
        assert len(imgs) == 1
2640
        return {
2641
            'img': [i['src'] for i in imgs],
2642
            'title': imgs[0]['title'],
2643
        }
2644
2645
2646
class GenericCommitStrip(GenericNavigableComic):
2647
    """Generic class to retrieve Commit Strips in different languages."""
2648
    get_navi_link = get_a_rel_next
2649
    get_first_comic_link = simulate_first_link
2650
    first_url = NotImplemented
2651
2652
    @classmethod
2653
    def get_comic_info(cls, soup, link):
2654
        """Get information about a particular comics."""
2655
        desc = soup.find('meta', property='og:description')['content']
2656
        title = soup.find('meta', property='og:title')['content']
2657
        imgs = soup.find('div', class_='entry-content').find_all('img')
2658
        title2 = ' '.join(i.get('title', '') for i in imgs)
2659 View Code Duplication
        return {
2660
            'title': title,
2661
            'title2': title2,
2662
            'description': desc,
2663
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2664
        }
2665
2666
2667
class CommitStripFr(GenericCommitStrip):
2668
    """Class to retrieve Commit Strips in French."""
2669
    name = 'commit_fr'
2670
    long_name = 'Commit Strip (Fr)'
2671
    url = 'http://www.commitstrip.com/fr'
2672
    _categories = ('FRANCAIS', )
2673
    first_url = 'http://www.commitstrip.com/fr/2012/02/22/interview/'
2674
2675
2676
class CommitStripEn(GenericCommitStrip):
2677
    """Class to retrieve Commit Strips in English."""
2678
    name = 'commit_en'
2679
    long_name = 'Commit Strip (En)'
2680
    url = 'http://www.commitstrip.com/en'
2681
    first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'
2682
2683
2684
class GenericBoumerie(GenericNavigableComic):
2685
    """Generic class to retrieve Boumeries comics in different languages."""
2686
    get_first_comic_link = get_a_navi_navifirst
2687
    get_navi_link = get_link_rel_next
2688
    date_format = NotImplemented
2689
    lang = NotImplemented
2690
2691
    @classmethod
2692
    def get_comic_info(cls, soup, link):
2693
        """Get information about a particular comics."""
2694
        title = soup.find('h2', class_='post-title').string
2695
        short_url = soup.find('link', rel='shortlink')['href']
2696
        author = soup.find("span", class_="post-author").find("a").string
2697
        date_str = soup.find('span', class_='post-date').string
2698
        day = string_to_date(date_str, cls.date_format, cls.lang)
2699
        imgs = soup.find('div', id='comic').find_all('img')
2700
        assert all(i['alt'] == i['title'] for i in imgs)
2701
        return {
2702
            'short_url': short_url,
2703
            'img': [i['src'] for i in imgs],
2704
            'title': title,
2705
            'author': author,
2706
            'month': day.month,
2707
            'year': day.year,
2708
            'day': day.day,
2709
        }
2710
2711
2712
class BoumerieEn(GenericBoumerie):
2713
    """Class to retrieve Boumeries comics in English."""
2714
    name = 'boumeries_en'
2715
    long_name = 'Boumeries (En)'
2716
    url = 'http://comics.boumerie.com'
2717
    date_format = "%B %d, %Y"
2718
    lang = 'en_GB.UTF-8'
2719
2720
2721
class BoumerieFr(GenericBoumerie):
2722
    """Class to retrieve Boumeries comics in French."""
2723
    name = 'boumeries_fr'
2724
    long_name = 'Boumeries (Fr)'
2725
    url = 'http://bd.boumerie.com'
2726
    _categories = ('FRANCAIS', )
2727
    date_format = "%A, %d %B %Y"
2728
    lang = "fr_FR.utf8"
2729
2730
2731
class UnearthedComics(GenericNavigableComic):
2732
    """Class to retrieve Unearthed comics."""
2733
    # Also on http://tapastic.com/series/UnearthedComics
2734
    # Also on http://unearthedcomics.tumblr.com
2735
    name = 'unearthed'
2736
    long_name = 'Unearthed Comics'
2737
    url = 'http://unearthedcomics.com'
2738 View Code Duplication
    _categories = ('UNEARTHED', )
2739
    get_navi_link = get_link_rel_next
2740
    get_first_comic_link = simulate_first_link
2741
    first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'
2742
2743
    @classmethod
2744
    def get_comic_info(cls, soup, link):
2745
        """Get information about a particular comics."""
2746
        short_url = soup.find('link', rel='shortlink')['href']
2747
        title_elt = soup.find('h1') or soup.find('h2')
2748
        title = title_elt.string if title_elt else ""
2749
        desc = soup.find('meta', property='og:description')
2750
        date_str = soup.find('time', class_='published updated hidden')['datetime']
2751
        day = string_to_date(date_str, "%Y-%m-%d")
2752
        post = soup.find('div', class_="entry content entry-content type-portfolio")
2753
        imgs = post.find_all('img')
2754
        return {
2755
            'title': title,
2756
            'description': desc,
2757
            'url2': short_url,
2758
            'img': [i['src'] for i in imgs],
2759
            'month': day.month,
2760
            'year': day.year,
2761
            'day': day.day,
2762
        }
2763
2764
2765
class Optipess(GenericNavigableComic):
2766
    """Class to retrieve Optipess comics."""
2767
    name = 'optipess'
2768
    long_name = 'Optipess'
2769
    url = 'http://www.optipess.com'
2770
    get_first_comic_link = get_a_navi_navifirst
2771
    get_navi_link = get_link_rel_next
2772
2773
    @classmethod
2774
    def get_comic_info(cls, soup, link):
2775
        """Get information about a particular comics."""
2776
        title = soup.find('h2', class_='post-title').string
2777
        author = soup.find("span", class_="post-author").find("a").string
2778
        comic = soup.find('div', id='comic')
2779
        imgs = comic.find_all('img') if comic else []
2780
        alt = imgs[0]['title'] if imgs else ""
2781
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2782
        date_str = soup.find('span', class_='post-date').string
2783
        day = string_to_date(date_str, "%B %d, %Y")
2784
        return {
2785
            'title': title,
2786
            'alt': alt,
2787
            'author': author,
2788
            'img': [i['src'] for i in imgs],
2789
            'month': day.month,
2790
            'year': day.year,
2791
            'day': day.day,
2792
        }
2793
2794
2795
class PainTrainComic(GenericNavigableComic):
2796
    """Class to retrieve Pain Train Comics."""
2797
    name = 'paintrain'
2798
    long_name = 'Pain Train Comics'
2799
    url = 'http://paintraincomic.com'
2800
    get_first_comic_link = get_a_navi_navifirst
2801
    get_navi_link = get_link_rel_next
2802
2803
    @classmethod
2804
    def get_comic_info(cls, soup, link):
2805
        """Get information about a particular comics."""
2806
        title = soup.find('h2', class_='post-title').string
2807
        short_url = soup.find('link', rel='shortlink')['href']
2808
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2809
        num = int(short_url_re.match(short_url).groups()[0])
2810
        imgs = soup.find('div', id='comic').find_all('img')
2811
        alt = imgs[0]['title']
2812
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2813
        date_str = soup.find('span', class_='post-date').string
2814
        day = string_to_date(date_str, "%d/%m/%Y")
2815
        return {
2816
            'short_url': short_url,
2817
            'num': num,
2818
            'img': [i['src'] for i in imgs],
2819
            'month': day.month,
2820
            'year': day.year,
2821
            'day': day.day,
2822
            'alt': alt,
2823
            'title': title,
2824
        }
2825
2826
2827
class MoonBeard(GenericNavigableComic):
2828
    """Class to retrieve MoonBeard comics."""
2829
    # Also on http://blog.squiresjam.es/moonbeard
2830
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
2831
    name = 'moonbeard'
2832
    long_name = 'Moon Beard'
2833
    url = 'http://moonbeard.com'
2834
    get_first_comic_link = get_a_navi_navifirst
2835
    get_navi_link = get_a_navi_navinext
2836
2837
    @classmethod
2838
    def get_comic_info(cls, soup, link):
2839
        """Get information about a particular comics."""
2840
        title = soup.find('h2', class_='post-title').string
2841
        short_url = soup.find('link', rel='shortlink')['href']
2842
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2843
        num = int(short_url_re.match(short_url).groups()[0])
2844
        imgs = soup.find('div', id='comic').find_all('img')
2845
        alt = imgs[0]['title']
2846
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2847
        date_str = soup.find('span', class_='post-date').string
2848
        day = string_to_date(date_str, "%B %d, %Y")
2849
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2850
        author = soup.find('span', class_='post-author').string
2851
        return {
2852
            'short_url': short_url,
2853
            'num': num,
2854
            'img': [i['src'] for i in imgs],
2855
            'month': day.month,
2856
            'year': day.year,
2857
            'day': day.day,
2858
            'title': title,
2859
            'tags': tags,
2860
            'alt': alt,
2861
            'author': author,
2862
        }
2863
2864
2865
class AHamADay(GenericNavigableComic):
2866
    """Class to retrieve class A Ham A Day comics."""
2867
    name = 'ham'
2868
    long_name = 'A Ham A Day'
2869 View Code Duplication
    url = 'http://www.ahammaday.com'
2870
    get_url_from_link = join_cls_url_to_href
2871
    get_first_comic_link = simulate_first_link
2872
    first_url = 'http://www.ahammaday.com/today/3/6/french'
2873
2874
    @classmethod
2875
    def get_navi_link(cls, last_soup, next_):
2876
        """Get link to next or previous comic."""
2877
        # prev is next / next is prev
2878
        return last_soup.find('li', class_='previous' if next_ else 'next').find('a')
2879
2880
    @classmethod
2881
    def get_comic_info(cls, soup, link):
2882
        """Get information about a particular comics."""
2883
        date_str = soup.find('time', class_='published')['datetime']
2884
        day = string_to_date(date_str, "%Y-%m-%d")
2885
        author = soup.find('span', class_='blog-author').find('a').string
2886
        title = soup.find('meta', property='og:title')['content']
2887
        imgs = soup.find_all('meta', itemprop='image')
2888
        return {
2889
            'img': [i['content'] for i in imgs],
2890
            'title': title,
2891
            'author': author,
2892
            'day': day.day,
2893
            'month': day.month,
2894
            'year': day.year,
2895
        }
2896
2897
2898
class LittleLifeLines(GenericNavigableComic):
2899
    """Class to retrieve Little Life Lines comics."""
2900
    # Also on https://little-life-lines.tumblr.com
2901
    name = 'life'
2902
    long_name = 'Little Life Lines'
2903
    url = 'http://www.littlelifelines.com'
2904
    get_url_from_link = join_cls_url_to_href
2905
    get_first_comic_link = simulate_first_link
2906
    first_url = 'http://www.littlelifelines.com/latest/well-done'
2907
2908
    @classmethod
2909
    def get_navi_link(cls, last_soup, next_):
2910
        """Get link to next or previous comic."""
2911
        # prev is next / next is prev
2912
        li = last_soup.find('li', class_='prev' if next_ else 'next')
2913
        return li.find('a') if li else None
2914
2915
    @classmethod
2916
    def get_comic_info(cls, soup, link):
2917
        """Get information about a particular comics."""
2918
        title = soup.find('meta', property='og:title')['content']
2919
        desc = soup.find('meta', property='og:description')['content']
2920
        date_str = soup.find('time', class_='published')['datetime']
2921
        day = string_to_date(date_str, "%Y-%m-%d")
2922
        author = soup.find('a', rel='author').string
2923
        div_content = soup.find('div', class_="body entry-content")
2924
        imgs = div_content.find_all('img')
2925
        imgs = [i for i in imgs if i.get('src') is not None]
2926
        alt = imgs[0]['alt']
2927
        return {
2928
            'title': title,
2929
            'alt': alt,
2930
            'description': desc,
2931
            'author': author,
2932
            'day': day.day,
2933
            'month': day.month,
2934
            'year': day.year,
2935
            'img': [i['src'] for i in imgs],
2936
        }
2937
2938
2939
class GenericWordPressInkblot(GenericNavigableComic):
2940
    """Generic class to retrieve comics using WordPress with Inkblot."""
2941
    get_navi_link = get_link_rel_next
2942
2943
    @classmethod
2944
    def get_first_comic_link(cls):
2945
        """Get link to first comics."""
2946
        return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')
2947
2948
    @classmethod
2949
    def get_comic_info(cls, soup, link):
2950
        """Get information about a particular comics."""
2951
        title = soup.find('meta', property='og:title')['content']
2952
        imgs = soup.find('div', class_='webcomic-image').find_all('img')
2953
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2954
        day = string_to_date(date_str, "%Y-%m-%d")
2955
        return {
2956
            'title': title,
2957
            'day': day.day,
2958
            'month': day.month,
2959
            'year': day.year,
2960
            'img': [i['src'] for i in imgs],
2961
        }
2962
2963
2964
class EverythingsStupid(GenericWordPressInkblot):
2965
    """Class to retrieve Everything's stupid Comics."""
2966
    # Also on http://tapastic.com/series/EverythingsStupid
2967
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
2968
    # Also on http://everythingsstupidcomics.tumblr.com
2969
    name = 'stupid'
2970
    long_name = "Everything's Stupid"
2971
    url = 'http://everythingsstupid.net'
2972
2973
2974
class TheIsmComics(GenericWordPressInkblot):
2975
    """Class to retrieve The Ism Comics."""
2976
    # Also on https://tapastic.com/series/TheIsm (?)
2977
    name = 'theism'
2978
    long_name = "The Ism"
2979
    url = 'http://www.theism-comics.com'
2980
2981
2982
class WoodenPlankStudios(GenericWordPressInkblot):
2983
    """Class to retrieve Wooden Plank Studios comics."""
2984
    name = 'woodenplank'
2985
    long_name = 'Wooden Plank Studios'
2986
    url = 'http://woodenplankstudios.com'
2987
2988
2989
class ElectricBunnyComic(GenericNavigableComic):
2990
    """Class to retrieve Electric Bunny Comics."""
2991
    # Also on http://electricbunnycomics.tumblr.com
2992
    name = 'bunny'
2993
    long_name = 'Electric Bunny Comic'
2994
    url = 'http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell'
2995
    get_url_from_link = join_cls_url_to_href
2996
2997
    @classmethod
2998
    def get_first_comic_link(cls):
2999
        """Get link to first comics."""
3000
        return get_soup_at_url(cls.url).find('img', alt='First').parent
3001
3002
    @classmethod
3003
    def get_navi_link(cls, last_soup, next_):
3004
        """Get link to next or previous comic."""
3005
        img = last_soup.find('img', alt='Next' if next_ else 'Back')
3006
        return img.parent if img else None
3007
3008
    @classmethod
3009
    def get_comic_info(cls, soup, link):
3010
        """Get information about a particular comics."""
3011
        title = soup.find('meta', property='og:title')['content']
3012
        imgs = soup.find_all('meta', property='og:image')
3013
        return {
3014
            'title': title,
3015
            'img': [i['content'] for i in imgs],
3016
        }
3017
3018
3019
class SheldonComics(GenericNavigableComic):
3020
    """Class to retrieve Sheldon comics."""
3021
    # Also on http://www.gocomics.com/sheldon
3022
    name = 'sheldon'
3023
    long_name = 'Sheldon Comics'
3024
    url = 'http://www.sheldoncomics.com'
3025
3026
    @classmethod
3027
    def get_first_comic_link(cls):
3028
        """Get link to first comics."""
3029
        return get_soup_at_url(cls.url).find("a", id="nav-first")
3030
3031
    @classmethod
3032
    def get_navi_link(cls, last_soup, next_):
3033
        """Get link to next or previous comic."""
3034
        for link in last_soup.find_all("a", id="nav-next" if next_ else "nav-prev"):
3035
            if link['href'] != 'http://www.sheldoncomics.com':
3036
                return link
3037
        return None
3038
3039
    @classmethod
3040
    def get_comic_info(cls, soup, link):
3041
        """Get information about a particular comics."""
3042
        imgs = soup.find("div", id="comic-foot").find_all("img")
3043
        assert all(i['alt'] == i['title'] for i in imgs)
3044
        assert len(imgs) == 1
3045
        title = imgs[0]['title']
3046
        return {
3047
            'title': title,
3048
            'img': [i['src'] for i in imgs],
3049
        }
3050
3051
3052
class CubeDrone(GenericNavigableComic):
3053
    """Class to retrieve Cube Drone comics."""
3054
    name = 'cubedrone'
3055
    long_name = 'Cube Drone'
3056
    url = 'http://cube-drone.com/comics'
3057
    get_url_from_link = join_cls_url_to_href
3058
3059
    @classmethod
3060
    def get_first_comic_link(cls):
3061
        """Get link to first comics."""
3062
        return get_soup_at_url(cls.url).find('span', class_='glyphicon glyphicon-backward').parent
3063
3064
    @classmethod
3065
    def get_navi_link(cls, last_soup, next_):
3066
        """Get link to next or previous comic."""
3067
        class_ = 'glyphicon glyphicon-chevron-' + ('right' if next_ else 'left')
3068
        return last_soup.find('span', class_=class_).parent
3069
3070
    @classmethod
3071
    def get_comic_info(cls, soup, link):
3072
        """Get information about a particular comics."""
3073
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
3074
        url2 = soup.find('meta', attrs={'name': 'twitter:url'})['content']
3075
        # date_str = soup.find('h2', class_='comic_title').find('small').string
3076
        # day = string_to_date(date_str, "%B %d, %Y, %I:%M %p")
3077
        imgs = soup.find_all('img', class_='comic img-responsive')
3078
        title2 = imgs[0]['title']
3079
        alt = imgs[0]['alt']
3080
        return {
3081
            'url2': url2,
3082
            'title': title,
3083
            'title2': title2,
3084
            'alt': alt,
3085
            'img': [i['src'] for i in imgs],
3086
        }
3087
3088
3089
class MakeItStoopid(GenericNavigableComic):
3090
    """Class to retrieve Make It Stoopid Comics."""
3091
    name = 'stoopid'
3092
    long_name = 'Make it stoopid'
3093
    url = 'http://makeitstoopid.com/comic.php'
3094
3095
    @classmethod
3096
    def get_nav(cls, soup):
3097
        """Get the navigation elements from soup object."""
3098
        cnav = soup.find_all(class_='cnav')
3099
        nav1, nav2 = cnav[:5], cnav[5:]
3100
        assert nav1 == nav2
3101
        # begin, prev, archive, next_, end = nav1
3102
        return [None if i.get('href') is None else i for i in nav1]
3103
3104
    @classmethod
3105
    def get_first_comic_link(cls):
3106
        """Get link to first comics."""
3107
        return cls.get_nav(get_soup_at_url(cls.url))[0]
3108
3109
    @classmethod
3110
    def get_navi_link(cls, last_soup, next_):
3111
        """Get link to next or previous comic."""
3112
        return cls.get_nav(last_soup)[3 if next_ else 1]
3113
3114
    @classmethod
3115
    def get_comic_info(cls, soup, link):
3116
        """Get information about a particular comics."""
3117
        title = link['title']
3118
        imgs = soup.find_all('img', id='comicimg')
3119
        return {
3120
            'title': title,
3121
            'img': [i['src'] for i in imgs],
3122
        }
3123
3124 View Code Duplication
3125
class TuMourrasMoinsBete(GenericNavigableComic):
3126
    """Class to retrieve Tu Mourras Moins Bete comics."""
3127
    name = 'mourrasmoinsbete'
3128
    long_name = 'Tu Mourras Moins Bete'
3129
    url = 'http://tumourrasmoinsbete.blogspot.fr'
3130
    _categories = ('FRANCAIS', )
3131
    get_first_comic_link = simulate_first_link
3132
    first_url = 'http://tumourrasmoinsbete.blogspot.fr/2008/06/essai.html'
3133
3134
    @classmethod
3135
    def get_navi_link(cls, last_soup, next_):
3136
        """Get link to next or previous comic."""
3137
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3138
3139
    @classmethod
3140
    def get_comic_info(cls, soup, link):
3141
        """Get information about a particular comics."""
3142
        title = soup.find('title').string
3143
        imgs = soup.find('div', itemprop='description articleBody').find_all('img')
3144
        author = soup.find('span', itemprop='author').string
3145
        return {
3146
            'img': [i['src'] for i in imgs],
3147
            'author': author,
3148
            'title': title,
3149
        }
3150
3151
3152
class GeekAndPoke(GenericNavigableComic):
3153
    """Class to retrieve Geek And Poke comics."""
3154
    name = 'geek'
3155
    long_name = 'Geek And Poke'
3156
    url = 'http://geek-and-poke.com'
3157
    get_url_from_link = join_cls_url_to_href
3158
    get_first_comic_link = simulate_first_link
3159
    first_url = 'http://geek-and-poke.com/geekandpoke/2006/8/27/a-new-place-for-a-not-so-old-blog.html'
3160
3161
    @classmethod
3162
    def get_navi_link(cls, last_soup, next_):
3163
        """Get link to next or previous comic."""
3164
        return last_soup.find('a', class_='prev-item' if next_ else 'next-item')
3165
3166
    @classmethod
3167
    def get_comic_info(cls, soup, link):
3168
        """Get information about a particular comics."""
3169
        title = soup.find('meta', property='og:title')['content']
3170
        desc = soup.find('meta', property='og:description')['content']
3171
        date_str = soup.find('time', class_='published')['datetime']
3172
        day = string_to_date(date_str, "%Y-%m-%d")
3173
        author = soup.find('a', rel='author').string
3174
        div_content = (soup.find('div', class_="body entry-content") or
3175
                       soup.find('div', class_="special-content"))
3176
        imgs = div_content.find_all('img')
3177
        imgs = [i for i in imgs if i.get('src') is not None]
3178
        assert all('title' not in i or i['alt'] == i['title'] for i in imgs)
3179
        alt = imgs[0].get('alt', "") if imgs else []
3180
        return {
3181
            'title': title,
3182
            'alt': alt,
3183
            'description': desc,
3184
            'author': author,
3185
            'day': day.day,
3186
            'month': day.month,
3187
            'year': day.year,
3188
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3189
        }
3190
3191 View Code Duplication
3192
class GloryOwlComix(GenericNavigableComic):
3193
    """Class to retrieve Glory Owl comics."""
3194
    name = 'gloryowl'
3195
    long_name = 'Glory Owl'
3196
    url = 'http://gloryowlcomix.blogspot.fr'
3197
    _categories = ('NSFW', 'FRANCAIS')
3198
    get_first_comic_link = simulate_first_link
3199
    first_url = 'http://gloryowlcomix.blogspot.fr/2013/02/1_7.html'
3200
3201
    @classmethod
3202
    def get_navi_link(cls, last_soup, next_):
3203
        """Get link to next or previous comic."""
3204
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3205
3206
    @classmethod
3207
    def get_comic_info(cls, soup, link):
3208
        """Get information about a particular comics."""
3209
        title = soup.find('title').string
3210
        imgs = soup.find_all('link', rel='image_src')
3211
        author = soup.find('a', rel='author').string
3212
        return {
3213
            'img': [i['href'] for i in imgs],
3214
            'author': author,
3215
            'title': title,
3216
        }
3217
3218
3219
class GenericTumblrV1(GenericComic):
3220
    """Generic class to retrieve comics from Tumblr using the V1 API."""
3221
    _categories = ('TUMBLR', )
3222
3223
    @classmethod
3224
    def get_next_comic(cls, last_comic):
3225
        """Generic implementation of get_next_comic for Tumblr comics."""
3226
        for p in cls.get_posts(last_comic):
3227
            comic = cls.get_comic_info(p)
3228
            if comic is not None:
3229
                yield comic
3230
3231
    @classmethod
3232
    def get_url_from_post(cls, post):
3233
        return post['url']
3234
3235
    @classmethod
3236
    def get_api_url(cls):
3237
        return urljoin_wrapper(cls.url, '/api/read/')
3238
3239
    @classmethod
3240
    def get_comic_info(cls, post):
3241
        """Get information about a particular comics."""
3242
        # print(post)
3243
        type_ = post['type']
3244
        if type_ != 'photo':
3245
            # print("Type is %s" % type_)
3246
            return None
3247
        tumblr_id = int(post['id'])
3248
        api_url = cls.get_api_url() + '?id=%d' % (tumblr_id)
3249
        day = datetime.datetime.fromtimestamp(int(post['unix-timestamp'])).date()
3250
        caption = post.find('photo-caption')
3251
        title = caption.string if caption else ""
3252
        tags = ' '.join(t.string for t in post.find_all('tag'))
3253
        # Photos may appear in 'photo' tags and/or straight in the post
3254
        photo_tags = post.find_all('photo')
3255
        if not photo_tags:
3256
            photo_tags = [post]
3257
        # Images are in multiple resolutions - taking the first one
3258
        imgs = [photo.find('photo-url') for photo in photo_tags]
3259
        return {
3260
            'url': cls.get_url_from_post(post),
3261
            'url2': post['url-with-slug'],
3262
            'day': day.day,
3263
            'month': day.month,
3264
            'year': day.year,
3265
            'title': title,
3266
            'tags': tags,
3267
            'img': [i.string for i in imgs],
3268
            'tumblr-id': tumblr_id,
3269
            'api_url': api_url,  # for debug purposes
3270
        }
3271
3272
    @classmethod
3273
    def get_posts(cls, last_comic, nb_post_per_call=10):
3274
        """Get posts using API. nb_post_per_call is max 50.
3275
3276
        Posts are retrieved from newer to older as per the tumblr v1 api
3277
        but are returned in chronological order."""
3278
        waiting_for_url = last_comic['url'] if last_comic else None
3279
        posts_acc = []
3280
        if last_comic is not None:
3281
            # Sometimes, tumblr posts are deleted. We previous post is deleted, we
3282
            # might end up spending a lot of time looking for something that
3283
            # doesn't exist. Failing early and clearly might be a better option.
3284
            last_api_url = last_comic['api_url']
3285
            try:
3286
                get_soup_at_url(last_api_url)
3287
            except urllib.error.HTTPError:
3288
                try:
3289
                    get_soup_at_url(cls.url)
3290
                except urllib.error.HTTPError:
3291
                    print("Did not find previous post nor main url %s" % cls.url)
3292
                else:
3293
                    print("Did not find previous post %s : it might have been deleted" % last_api_url)
3294
                return reversed(posts_acc)
3295
        api_url = cls.get_api_url()
3296
        posts = get_soup_at_url(api_url).find('posts')
3297
        start, total = int(posts['start']), int(posts['total'])
3298
        assert start == 0
3299
        for starting_num in range(0, total, nb_post_per_call):
3300
            api_url2 = api_url + '?start=%d&num=%d' % (starting_num, nb_post_per_call)
3301
            # print(api_url2)
3302
            posts2 = get_soup_at_url(api_url2).find('posts')
3303
            start2, total2 = int(posts2['start']), int(posts2['total'])
3304
            assert starting_num == start2, "%d != %d" % (starting_num, start2)
3305
            # This may happen and should be handled in the future
3306
            assert total == total2, "%d != %d" % (total, total2)
3307
            for p in posts2.find_all('post'):
3308
                if waiting_for_url and waiting_for_url == cls.get_url_from_post(p):
3309
                    return reversed(posts_acc)
3310
                posts_acc.append(p)
3311
        if waiting_for_url is None:
3312
            return reversed(posts_acc)
3313
        print("Did not find %s : there might be a problem" % waiting_for_url)
3314
        return []
3315
3316
3317
class SaturdayMorningBreakfastCerealTumblr(GenericEmptyComic, GenericTumblrV1):
3318
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
3319
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
3320
    # Also on http://www.smbc-comics.com
3321
    name = 'smbc-tumblr'
3322
    long_name = 'Saturday Morning Breakfast Cereal (from Tumblr)'
3323
    url = 'http://smbc-comics.tumblr.com'
3324
    _categories = ('SMBC', )
3325
3326
3327
class IrwinCardozo(GenericTumblrV1):
3328
    """Class to retrieve Irwin Cardozo Comics."""
3329
    name = 'irwinc'
3330
    long_name = 'Irwin Cardozo'
3331
    url = 'http://irwincardozocomics.tumblr.com'
3332
3333
3334
class AccordingToDevin(GenericTumblrV1):
3335
    """Class to retrieve According To Devin comics."""
3336
    name = 'devin'
3337
    long_name = 'According To Devin'
3338
    url = 'http://accordingtodevin.tumblr.com'
3339
3340
3341
class ItsTheTieTumblr(GenericTumblrV1):
3342
    """Class to retrieve It's the tie comics."""
3343
    # Also on http://itsthetie.com
3344
    # Also on https://tapastic.com/series/itsthetie
3345
    name = 'tie-tumblr'
3346
    long_name = "It's the tie (from Tumblr)"
3347
    url = "http://itsthetie.tumblr.com"
3348
    _categories = ('TIE', )
3349
3350
3351
class OctopunsTumblr(GenericTumblrV1):
3352
    """Class to retrieve Octopuns comics."""
3353
    # Also on http://www.octopuns.net
3354
    name = 'octopuns-tumblr'
3355
    long_name = 'Octopuns (from Tumblr)'
3356
    url = 'http://octopuns.tumblr.com'
3357
3358
3359
class PicturesInBoxesTumblr(GenericTumblrV1):
3360
    """Class to retrieve Pictures In Boxes comics."""
3361
    # Also on http://www.picturesinboxes.com
3362
    name = 'picturesinboxes-tumblr'
3363
    long_name = 'Pictures in Boxes (from Tumblr)'
3364
    url = 'http://picturesinboxescomic.tumblr.com'
3365
3366
3367
class TubeyToonsTumblr(GenericTumblrV1):
3368
    """Class to retrieve TubeyToons comics."""
3369
    # Also on http://tapastic.com/series/Tubey-Toons
3370
    # Also on http://tubeytoons.com
3371
    name = 'tubeytoons-tumblr'
3372
    long_name = 'Tubey Toons (from Tumblr)'
3373
    url = 'http://tubeytoons.tumblr.com'
3374
    _categories = ('TUNEYTOONS', )
3375
3376
3377
class UnearthedComicsTumblr(GenericTumblrV1):
3378
    """Class to retrieve Unearthed comics."""
3379
    # Also on http://tapastic.com/series/UnearthedComics
3380
    # Also on http://unearthedcomics.com
3381
    name = 'unearthed-tumblr'
3382
    long_name = 'Unearthed Comics (from Tumblr)'
3383
    url = 'http://unearthedcomics.tumblr.com'
3384
    _categories = ('UNEARTHED', )
3385
3386
3387
class PieComic(GenericTumblrV1):
3388
    """Class to retrieve Pie Comic comics."""
3389
    name = 'pie'
3390
    long_name = 'Pie Comic'
3391
    url = "http://piecomic.tumblr.com"
3392
3393
3394
class MrEthanDiamond(GenericTumblrV1):
3395
    """Class to retrieve Mr Ethan Diamond comics."""
3396
    name = 'diamond'
3397
    long_name = 'Mr Ethan Diamond'
3398
    url = 'http://mrethandiamond.tumblr.com'
3399
3400
3401
class Flocci(GenericTumblrV1):
3402
    """Class to retrieve floccinaucinihilipilification comics."""
3403
    name = 'flocci'
3404
    long_name = 'floccinaucinihilipilification'
3405
    url = "http://floccinaucinihilipilificationa.tumblr.com"
3406
3407
3408
class UpAndOut(GenericTumblrV1):
3409
    """Class to retrieve Up & Out comics."""
3410
    # Also on http://tapastic.com/series/UP-and-OUT
3411
    name = 'upandout'
3412
    long_name = 'Up And Out (from Tumblr)'
3413
    url = 'http://upandoutcomic.tumblr.com'
3414
3415
3416
class Pundemonium(GenericTumblrV1):
3417
    """Class to retrieve Pundemonium comics."""
3418
    name = 'pundemonium'
3419
    long_name = 'Pundemonium'
3420
    url = 'http://monstika.tumblr.com'
3421
3422
3423
class PoorlyDrawnLinesTumblr(GenericEmptyComic, GenericTumblrV1):
3424
    """Class to retrieve Poorly Drawn Lines comics."""
3425
    # Also on http://poorlydrawnlines.com
3426
    name = 'poorlydrawn-tumblr'
3427
    long_name = 'Poorly Drawn Lines (from Tumblr)'
3428
    url = 'http://pdlcomics.tumblr.com'
3429
    _categories = ('POORLYDRAWN', )
3430
3431
3432
class PearShapedComics(GenericTumblrV1):
3433
    """Class to retrieve Pear Shaped Comics."""
3434
    name = 'pearshaped'
3435
    long_name = 'Pear-Shaped Comics'
3436
    url = 'http://pearshapedcomics.com'
3437
3438
3439
class PondScumComics(GenericTumblrV1):
3440
    """Class to retrieve Pond Scum Comics."""
3441
    name = 'pond'
3442
    long_name = 'Pond Scum'
3443
    url = 'http://pondscumcomic.tumblr.com'
3444
3445
3446
class MercworksTumblr(GenericTumblrV1):
3447
    """Class to retrieve Mercworks comics."""
3448
    # Also on http://mercworks.net
3449
    name = 'mercworks-tumblr'
3450
    long_name = 'Mercworks (from Tumblr)'
3451
    url = 'http://mercworks.tumblr.com'
3452
3453
3454
class OwlTurdTumblr(GenericEmptyComic, GenericTumblrV1):
3455
    """Class to retrieve Owl Turd comics."""
3456
    # Also on http://tapastic.com/series/Owl-Turd-Comix
3457
    name = 'owlturd-tumblr'
3458
    long_name = 'Owl Turd (from Tumblr)'
3459
    url = 'http://owlturd.com'
3460
    _categories = ('OWLTURD', )
3461
3462
3463
class VectorBelly(GenericTumblrV1):
3464
    """Class to retrieve Vector Belly comics."""
3465
    # Also on http://vectorbelly.com
3466
    name = 'vector'
3467
    long_name = 'Vector Belly'
3468
    url = 'http://vectorbelly.tumblr.com'
3469
3470
3471
class GoneIntoRapture(GenericTumblrV1):
3472
    """Class to retrieve Gone Into Rapture comics."""
3473
    # Also on http://goneintorapture.tumblr.com
3474
    # Also on http://tapastic.com/series/Goneintorapture
3475
    name = 'rapture'
3476
    long_name = 'Gone Into Rapture'
3477
    url = 'http://www.goneintorapture.com'
3478
3479
3480
class TheOatmealTumblr(GenericTumblrV1):
3481
    """Class to retrieve The Oatmeal comics."""
3482
    # Also on http://theoatmeal.com
3483
    name = 'oatmeal-tumblr'
3484
    long_name = 'The Oatmeal (from Tumblr)'
3485
    url = 'http://oatmeal.tumblr.com'
3486
3487
3488
class HeckIfIKnowComicsTumblr(GenericTumblrV1):
3489
    """Class to retrieve Heck If I Know Comics."""
3490
    # Also on http://tapastic.com/series/Regular
3491
    name = 'heck-tumblr'
3492
    long_name = 'Heck if I Know comics (from Tumblr)'
3493
    url = 'http://heckifiknowcomics.com'
3494
3495
3496
class MyJetPack(GenericTumblrV1):
3497
    """Class to retrieve My Jet Pack comics."""
3498
    name = 'jetpack'
3499
    long_name = 'My Jet Pack'
3500
    url = 'http://myjetpack.tumblr.com'
3501
3502
3503
class CheerUpEmoKidTumblr(GenericTumblrV1):
3504
    """Class to retrieve CheerUpEmoKid comics."""
3505
    # Also on http://www.cheerupemokid.com
3506
    # Also on http://tapastic.com/series/CUEK
3507
    name = 'cuek-tumblr'
3508
    long_name = 'Cheer Up Emo Kid (from Tumblr)'
3509
    url = 'http://enzocomics.tumblr.com'
3510
3511
3512
class ForLackOfABetterComic(GenericEmptyComic, GenericTumblrV1):
3513
    """Class to retrieve For Lack Of A Better Comics."""
3514
    # Also on http://forlackofabettercomic.com
3515
    name = 'lack'
3516
    long_name = 'For Lack Of A Better Comic'
3517
    url = 'http://forlackofabettercomic.tumblr.com'
3518
3519
3520
class ZenPencilsTumblr(GenericTumblrV1):
3521
    """Class to retrieve ZenPencils comics."""
3522
    # Also on http://zenpencils.com
3523
    # Also on http://www.gocomics.com/zen-pencils
3524
    name = 'zenpencils-tumblr'
3525
    long_name = 'Zen Pencils (from Tumblr)'
3526
    url = 'http://zenpencils.tumblr.com'
3527
    _categories = ('ZENPENCILS', )
3528
3529
3530
class ThreeWordPhraseTumblr(GenericTumblrV1):
3531
    """Class to retrieve Three Word Phrase comics."""
3532
    # Also on http://threewordphrase.com
3533
    name = 'threeword-tumblr'
3534
    long_name = 'Three Word Phrase (from Tumblr)'
3535
    url = 'http://www.threewordphrase.tumblr.com'
3536
3537
3538
class TimeTrabbleTumblr(GenericTumblrV1):
3539
    """Class to retrieve Time Trabble comics."""
3540
    # Also on http://timetrabble.com
3541
    name = 'timetrabble-tumblr'
3542
    long_name = 'Time Trabble (from Tumblr)'
3543
    url = 'http://timetrabble.tumblr.com'
3544
3545
3546
class SafelyEndangeredTumblr(GenericTumblrV1):
3547
    """Class to retrieve Safely Endangered comics."""
3548
    # Also on http://www.safelyendangered.com
3549
    name = 'endangered-tumblr'
3550
    long_name = 'Safely Endangered (from Tumblr)'
3551
    url = 'http://tumblr.safelyendangered.com'
3552
3553
3554
class MouseBearComedyTumblr(GenericTumblrV1):
3555
    """Class to retrieve Mouse Bear Comedy comics."""
3556
    # Also on http://www.mousebearcomedy.com
3557
    name = 'mousebear-tumblr'
3558
    long_name = 'Mouse Bear Comedy (from Tumblr)'
3559
    url = 'http://mousebearcomedy.tumblr.com'
3560
3561
3562
class BouletCorpTumblr(GenericTumblrV1):
3563
    """Class to retrieve BouletCorp comics."""
3564
    # Also on http://www.bouletcorp.com
3565
    name = 'boulet-tumblr'
3566
    long_name = 'Boulet Corp (from Tumblr)'
3567
    url = 'http://bouletcorp.tumblr.com'
3568
    _categories = ('BOULET', )
3569
3570
3571
class TheAwkwardYetiTumblr(GenericEmptyComic, GenericTumblrV1):
3572
    """Class to retrieve The Awkward Yeti comics."""
3573
    # Also on http://www.gocomics.com/the-awkward-yeti
3574
    # Also on http://theawkwardyeti.com
3575
    # Also on https://tapastic.com/series/TheAwkwardYeti
3576
    name = 'yeti-tumblr'
3577
    long_name = 'The Awkward Yeti (from Tumblr)'
3578
    url = 'http://larstheyeti.tumblr.com'
3579
    _categories = ('YETI', )
3580
3581
3582
class NellucNhoj(GenericTumblrV1):
3583
    """Class to retrieve NellucNhoj comics."""
3584
    name = 'nhoj'
3585
    long_name = 'Nelluc Nhoj'
3586
    url = 'http://nellucnhoj.com'
3587
3588
3589
class DownTheUpwardSpiralTumblr(GenericTumblrV1):
3590
    """Class to retrieve Down The Upward Spiral comics."""
3591
    # Also on http://www.downtheupwardspiral.com
3592
    name = 'spiral-tumblr'
3593
    long_name = 'Down the Upward Spiral (from Tumblr)'
3594
    url = 'http://downtheupwardspiral.tumblr.com'
3595
3596
3597
class AsPerUsualTumblr(GenericTumblrV1):
3598
    """Class to retrieve As Per Usual comics."""
3599
    # Also on https://tapastic.com/series/AsPerUsual
3600
    name = 'usual-tumblr'
3601
    long_name = 'As Per Usual (from Tumblr)'
3602
    url = 'http://as-per-usual.tumblr.com'
3603
3604
3605
class OneOneOneOneComicTumblr(GenericTumblrV1):
3606
    """Class to retrieve 1111 Comics."""
3607
    # Also on http://www.1111comics.me
3608
    # Also on https://tapastic.com/series/1111-Comics
3609
    name = '1111-tumblr'
3610
    long_name = '1111 Comics (from Tumblr)'
3611
    url = 'http://comics1111.tumblr.com'
3612
    _categories = ('ONEONEONEONE', )
3613
3614
3615
class JhallComicsTumblr(GenericTumblrV1):
3616
    """Class to retrieve Jhall Comics."""
3617
    # Also on http://jhallcomics.com
3618
    name = 'jhall-tumblr'
3619
    long_name = 'Jhall Comics (from Tumblr)'
3620
    url = 'http://jhallcomics.tumblr.com'
3621
3622
3623
class BerkeleyMewsTumblr(GenericTumblrV1):
3624
    """Class to retrieve Berkeley Mews comics."""
3625
    # Also on http://www.gocomics.com/berkeley-mews
3626
    # Also on http://www.berkeleymews.com
3627
    name = 'berkeley-tumblr'
3628
    long_name = 'Berkeley Mews (from Tumblr)'
3629
    url = 'http://mews.tumblr.com'
3630
    _categories = ('BERKELEY', )
3631
3632
3633
class JoanCornellaTumblr(GenericTumblrV1):
3634
    """Class to retrieve Joan Cornella comics."""
3635
    # Also on http://joancornella.net
3636
    name = 'cornella-tumblr'
3637
    long_name = 'Joan Cornella (from Tumblr)'
3638
    url = 'http://cornellajoan.tumblr.com'
3639
3640
3641
class RespawnComicTumblr(GenericTumblrV1):
3642
    """Class to retrieve Respawn Comic."""
3643
    # Also on http://respawncomic.com
3644
    name = 'respawn-tumblr'
3645
    long_name = 'Respawn Comic (from Tumblr)'
3646
    url = 'http://respawncomic.tumblr.com'
3647
3648
3649
class ChrisHallbeckTumblr(GenericEmptyComic, GenericTumblrV1):
3650
    """Class to retrieve Chris Hallbeck comics."""
3651
    # Also on https://tapastic.com/ChrisHallbeck
3652
    # Also on http://maximumble.com
3653
    # Also on http://minimumble.com
3654
    # Also on http://thebookofbiff.com
3655
    name = 'hallbeck-tumblr'
3656
    long_name = 'Chris Hallback (from Tumblr)'
3657
    url = 'http://chrishallbeck.tumblr.com'
3658
    _categories = ('HALLBACK', )
3659
3660
3661
class ComicNuggets(GenericTumblrV1):
3662
    """Class to retrieve Comic Nuggets."""
3663
    name = 'nuggets'
3664
    long_name = 'Comic Nuggets'
3665
    url = 'http://comicnuggets.com'
3666
3667
3668
class PigeonGazetteTumblr(GenericTumblrV1):
3669
    """Class to retrieve The Pigeon Gazette comics."""
3670
    # Also on https://tapastic.com/series/The-Pigeon-Gazette
3671
    name = 'pigeon-tumblr'
3672
    long_name = 'The Pigeon Gazette (from Tumblr)'
3673
    url = 'http://thepigeongazette.tumblr.com'
3674
3675
3676
class CancerOwl(GenericTumblrV1):
3677
    """Class to retrieve Cancer Owl comics."""
3678
    # Also on http://cancerowl.com
3679
    name = 'cancerowl-tumblr'
3680
    long_name = 'Cancer Owl (from Tumblr)'
3681
    url = 'http://cancerowl.tumblr.com'
3682
3683
3684
class FowlLanguageTumblr(GenericTumblrV1):
3685
    """Class to retrieve Fowl Language comics."""
3686
    # Also on http://www.fowllanguagecomics.com
3687
    # Also on http://tapastic.com/series/Fowl-Language-Comics
3688
    # Also on http://www.gocomics.com/fowl-language
3689
    name = 'fowllanguage-tumblr'
3690
    long_name = 'Fowl Language Comics (from Tumblr)'
3691
    url = 'http://fowllanguagecomics.tumblr.com'
3692
    _categories = ('FOWLLANGUAGE', )
3693
3694
3695
class TheOdd1sOutTumblr(GenericTumblrV1):
3696
    """Class to retrieve The Odd 1s Out comics."""
3697
    # Also on http://theodd1sout.com
3698
    # Also on https://tapastic.com/series/Theodd1sout
3699
    name = 'theodd-tumblr'
3700
    long_name = 'The Odd 1s Out (from Tumblr)'
3701
    url = 'http://theodd1sout.tumblr.com'
3702
3703
3704
class TheUnderfoldTumblr(GenericTumblrV1):
3705
    """Class to retrieve The Underfold comics."""
3706
    # Also on http://theunderfold.com
3707
    name = 'underfold-tumblr'
3708
    long_name = 'The Underfold (from Tumblr)'
3709
    url = 'http://theunderfold.tumblr.com'
3710
3711
3712
class LolNeinTumblr(GenericTumblrV1):
3713
    """Class to retrieve Lol Nein comics."""
3714
    # Also on http://lolnein.com
3715
    name = 'lolnein-tumblr'
3716
    long_name = 'Lol Nein (from Tumblr)'
3717
    url = 'http://lolneincom.tumblr.com'
3718
3719
3720
class FatAwesomeComicsTumblr(GenericTumblrV1):
3721
    """Class to retrieve Fat Awesome Comics."""
3722
    # Also on http://fatawesome.com/comics
3723
    name = 'fatawesome-tumblr'
3724
    long_name = 'Fat Awesome (from Tumblr)'
3725
    url = 'http://fatawesomecomedy.tumblr.com'
3726
3727
3728
class TheWorldIsFlatTumblr(GenericTumblrV1):
3729
    """Class to retrieve The World Is Flat Comics."""
3730
    # Also on https://tapastic.com/series/The-World-is-Flat
3731
    name = 'flatworld-tumblr'
3732
    long_name = 'The World Is Flat (from Tumblr)'
3733
    url = 'http://theworldisflatcomics.tumblr.com'
3734
3735
3736
class DorrisMc(GenericEmptyComic, GenericTumblrV1):
3737
    """Class to retrieve Dorris Mc Comics"""
3738
    # Also on http://www.gocomics.com/dorris-mccomics
3739
    name = 'dorrismc'
3740
    long_name = 'Dorris Mc'
3741
    url = 'http://dorrismccomics.com'
3742
3743
3744
class LeleozTumblr(GenericEmptyComic, GenericTumblrV1):
3745
    """Class to retrieve Leleoz comics."""
3746
    # Also on https://tapastic.com/series/Leleoz
3747
    name = 'leleoz-tumblr'
3748
    long_name = 'Leleoz (from Tumblr)'
3749
    url = 'http://leleozcomics.tumblr.com'
3750
3751
3752
class MoonBeardTumblr(GenericTumblrV1):
3753
    """Class to retrieve MoonBeard comics."""
3754
    # Also on http://moonbeard.com
3755
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
3756
    name = 'moonbeard-tumblr'
3757
    long_name = 'Moon Beard (from Tumblr)'
3758
    url = 'http://blog.squiresjam.es/moonbeard'
3759
3760
3761
class AComik(GenericTumblrV1):
3762
    """Class to retrieve A Comik"""
3763
    name = 'comik'
3764
    long_name = 'A Comik'
3765
    url = 'http://acomik.com'
3766
3767
3768
class ClassicRandy(GenericTumblrV1):
3769
    """Class to retrieve Classic Randy comics."""
3770
    name = 'randy'
3771
    long_name = 'Classic Randy'
3772
    url = 'http://classicrandy.tumblr.com'
3773
3774
3775
class DagssonTumblr(GenericTumblrV1):
3776
    """Class to retrieve Dagsson comics."""
3777
    # Also on http://www.dagsson.com
3778
    name = 'dagsson-tumblr'
3779
    long_name = 'Dagsson Hugleikur (from Tumblr)'
3780
    url = 'http://hugleikurdagsson.tumblr.com'
3781
3782
3783
class LinsEditionsTumblr(GenericTumblrV1):
3784
    """Class to retrieve L.I.N.S. Editions comics."""
3785
    # Also on https://linsedition.com
3786
    name = 'lins-tumblr'
3787
    long_name = 'L.I.N.S. Editions (from Tumblr)'
3788
    url = 'http://linscomics.tumblr.com'
3789
    _categories = ('LINS', )
3790
3791
3792
class OrigamiHotDish(GenericTumblrV1):
3793
    """Class to retrieve Origami Hot Dish comics."""
3794
    name = 'origamihotdish'
3795
    long_name = 'Origami Hot Dish'
3796
    url = 'http://origamihotdish.com'
3797
3798
3799
class HitAndMissComicsTumblr(GenericTumblrV1):
3800
    """Class to retrieve Hit and Miss Comics."""
3801
    name = 'hitandmiss'
3802
    long_name = 'Hit and Miss Comics'
3803
    url = 'http://hitandmisscomics.tumblr.com'
3804
3805
3806
class HMBlanc(GenericTumblrV1):
3807
    """Class to retrieve HM Blanc comics."""
3808
    name = 'hmblanc'
3809
    long_name = 'HM Blanc'
3810
    url = 'http://hmblanc.tumblr.com'
3811
3812
3813
class TalesOfAbsurdityTumblr(GenericTumblrV1):
3814
    """Class to retrieve Tales Of Absurdity comics."""
3815
    # Also on http://talesofabsurdity.com
3816
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
3817
    name = 'absurdity-tumblr'
3818
    long_name = 'Tales of Absurdity (from Tumblr)'
3819
    url = 'http://talesofabsurdity.tumblr.com'
3820
    _categories = ('ABSURDITY', )
3821
3822
3823
class RobbieAndBobby(GenericTumblrV1):
3824
    """Class to retrieve Robbie And Bobby comics."""
3825
    # Also on http://robbieandbobby.com
3826
    name = 'robbie-tumblr'
3827
    long_name = 'Robbie And Bobby (from Tumblr)'
3828
    url = 'http://robbieandbobby.tumblr.com'
3829
3830
3831
class ElectricBunnyComicTumblr(GenericTumblrV1):
3832
    """Class to retrieve Electric Bunny Comics."""
3833
    # Also on http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell
3834
    name = 'bunny-tumblr'
3835
    long_name = 'Electric Bunny Comic (from Tumblr)'
3836
    url = 'http://electricbunnycomics.tumblr.com'
3837
3838
3839
class Hoomph(GenericTumblrV1):
3840
    """Class to retrieve Hoomph comics."""
3841
    name = 'hoomph'
3842
    long_name = 'Hoomph'
3843
    url = 'http://hoom.ph'
3844
3845
3846
class BFGFSTumblr(GenericTumblrV1):
3847
    """Class to retrieve BFGFS comics."""
3848
    # Also on https://tapastic.com/series/BFGFS
3849
    # Also on http://bfgfs.com
3850
    name = 'bfgfs-tumblr'
3851
    long_name = 'BFGFS (from Tumblr)'
3852
    url = 'http://bfgfs.tumblr.com'
3853
3854
3855
class DoodleForFood(GenericTumblrV1):
3856
    """Class to retrieve Doodle For Food comics."""
3857
    # Also on http://doodleforfood.com
3858
    name = 'doodle'
3859
    long_name = 'Doodle For Food'
3860
    url = 'http://doodleforfood.com'
3861
3862
3863
class CassandraCalinTumblr(GenericEmptyComic, GenericTumblrV1):
3864
    """Class to retrieve C. Cassandra comics."""
3865
    # Also on http://cassandracalin.com
3866
    # Also on https://tapastic.com/series/C-Cassandra-comics
3867
    name = 'cassandra-tumblr'
3868
    long_name = 'Cassandra Calin (from Tumblr)'
3869
    url = 'http://c-cassandra.tumblr.com'
3870
3871
3872
class DougWasTaken(GenericTumblrV1):
3873
    """Class to retrieve Doug Was Taken comics."""
3874
    name = 'doog'
3875
    long_name = 'Doug Was Taken'
3876
    url = 'http://dougwastaken.tumblr.com'
3877
3878
3879
class MandatoryRollerCoaster(GenericEmptyComic, GenericTumblrV1):
3880
    """Class to retrieve Mandatory Roller Coaster comics."""
3881
    name = 'rollercoaster'
3882
    long_name = 'Mandatory Roller Coaster'
3883
    url = 'http://mandatoryrollercoaster.com'
3884
3885
3886
class CEstPasEnRegardantSesPompes(GenericEmptyComic, GenericTumblrV1):
3887
    """Class to retrieve C'Est Pas En Regardant Ses Pompes (...)  comics."""
3888
    name = 'cperspqccltt'
3889
    long_name = 'C Est Pas En Regardant Ses Pompes (...)'
3890
    url = 'http://cperspqccltt.tumblr.com'
3891
3892
3893
class TheGrohlTroll(GenericEmptyComic, GenericTumblrV1):
3894
    """Class to retrieve The Grohl Troll comics."""
3895
    name = 'grohltroll'
3896
    long_name = 'The Grohl Troll'
3897
    url = 'http://thegrohltroll.com'
3898
3899
3900
class WebcomicName(GenericEmptyComic, GenericTumblrV1):
3901
    """Class to retrieve Webcomic Name comics."""
3902
    name = 'webcomicname'
3903
    long_name = 'Webcomic Name'
3904
    url = 'http://webcomicname.com'
3905
3906
3907
class BooksOfAdam(GenericEmptyComic, GenericTumblrV1):
3908
    """Class to retrieve Books of Adam comics."""
3909
    # Also on http://www.booksofadam.com
3910
    name = 'booksofadam'
3911
    long_name = 'Books of Adam'
3912
    url = 'http://booksofadam.tumblr.com'
3913
3914
3915
class HarkAVagrant(GenericEmptyComic, GenericTumblrV1):
3916
    """Class to retrieve Hark A Vagrant comics."""
3917
    # Also on http://www.harkavagrant.com
3918
    name = 'hark-tumblr'
3919
    long_name = 'Hark A Vagrant (from Tumblr)'
3920
    url = 'http://beatonna.tumblr.com'
3921
3922
3923
class OurSuperAdventureTumblr(GenericEmptyComic, GenericTumblrV1):
3924
    """Class to retrieve Our Super Adventure comics."""
3925
    # Also on https://tapastic.com/series/Our-Super-Adventure
3926
    # Also on http://www.oursuperadventure.com
3927
    # http://sarahgraley.com
3928
    name = 'superadventure-tumblr'
3929
    long_name = 'Our Super Adventure (from Tumblr)'
3930
    url = 'http://sarahssketchbook.tumblr.com'
3931
3932
3933
class JakeLikesOnions(GenericTumblrV1):
3934
    """Class to retrieve Jake Likes Onions comics."""
3935
    name = 'jake'
3936
    long_name = 'Jake Likes Onions'
3937
    url = 'http://jakelikesonions.com'
3938
3939
3940
class InYourFaceCake(GenericEmptyComic, GenericTumblrV1):
3941
    """Class to retrieve In Your Face Cake comics."""
3942
    name = 'inyourfacecake-tumblr'
3943
    long_name = 'In Your Face Cake (from Tumblr)'
3944
    url = 'http://in-your-face-cake.tumblr.com'
3945
3946
3947
class Robospunk(GenericTumblrV1):
3948
    """Class to retrieve Robospunk comics."""
3949
    name = 'robospunk'
3950
    long_name = 'Robospunk'
3951
    url = 'http://robospunk.com'
3952
3953
3954
class BananaTwinky(GenericTumblrV1):
3955
    """Class to retrieve Banana Twinky comics."""
3956
    name = 'banana'
3957
    long_name = 'Banana Twinky'
3958
    url = 'http://bananatwinky.tumblr.com'
3959
3960
3961
class YesterdaysPopcornTumblr(GenericTumblrV1):
3962
    """Class to retrieve Yesterday's Popcorn comics."""
3963
    # Also on http://www.yesterdayspopcorn.com
3964
    # Also on https://tapastic.com/series/Yesterdays-Popcorn
3965
    name = 'popcorn-tumblr'
3966
    long_name = 'Yesterday\'s Popcorn (from Tumblr)'
3967
    url = 'http://yesterdayspopcorn.tumblr.com'
3968
3969
3970
class TwistedDoodles(GenericEmptyComic, GenericTumblrV1):
3971
    """Class to retrieve Twisted Doodles comics."""
3972
    name = 'twisted'
3973
    long_name = 'Twisted Doodles'
3974
    url = 'http://www.twisteddoodles.com'
3975
3976
3977
class LittleLifeLinesTumblr(GenericTumblrV1):
3978
    """Class to retrieve Little Life Lines comics."""
3979
    # Also on http://www.littlelifelines.com
3980
    name = 'life-tumblr'
3981
    long_name = 'Little Life Lines (from Tumblr)'
3982
    url = 'https://little-life-lines.tumblr.com'
3983
3984
3985
class HorovitzComics(GenericListableComic):
3986
    """Generic class to handle the logic common to the different comics from Horovitz."""
3987
    url = 'http://www.horovitzcomics.com'
3988
    _categories = ('HOROVITZ', )
3989
    img_re = re.compile('.*comics/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
3990
    link_re = NotImplemented
3991
    get_url_from_archive_element = join_cls_url_to_href
3992
3993
    @classmethod
3994
    def get_comic_info(cls, soup, link):
3995
        """Get information about a particular comics."""
3996
        href = link['href']
3997
        num = int(cls.link_re.match(href).groups()[0])
3998
        title = link.string
3999
        imgs = soup.find_all('img', id='comic')
4000
        assert len(imgs) == 1
4001
        year, month, day = [int(s)
4002
                            for s in cls.img_re.match(imgs[0]['src']).groups()]
4003
        return {
4004
            'title': title,
4005
            'day': day,
4006
            'month': month,
4007
            'year': year,
4008
            'img': [i['src'] for i in imgs],
4009
            'num': num,
4010
        }
4011
4012
    @classmethod
4013
    def get_archive_elements(cls):
4014
        archive_url = 'http://www.horovitzcomics.com/comics/archive/'
4015
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.link_re))
4016
4017
4018
class HorovitzNew(HorovitzComics):
4019
    """Class to retrieve Horovitz new comics."""
4020
    name = 'horovitznew'
4021
    long_name = 'Horovitz New'
4022
    link_re = re.compile('^/comics/new/([0-9]+)$')
4023
4024
4025
class HorovitzClassic(HorovitzComics):
4026
    """Class to retrieve Horovitz classic comics."""
4027
    name = 'horovitzclassic'
4028
    long_name = 'Horovitz Classic'
4029
    link_re = re.compile('^/comics/classic/([0-9]+)$')
4030
4031
4032
class GenericGoComic(GenericNavigableComic):
4033
    """Generic class to handle the logic common to comics from gocomics.com."""
4034
    _categories = ('GOCOMIC', )
4035
    url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)$')
4036
4037
    @classmethod
4038
    def get_first_comic_link(cls):
4039
        """Get link to first comics."""
4040
        return get_soup_at_url(cls.url).find('a', class_='beginning')
4041
4042
    @classmethod
4043
    def get_navi_link(cls, last_soup, next_):
4044
        """Get link to next or previous comic."""
4045
        return last_soup.find('a', class_='next' if next_ else 'prev', href=cls.url_date_re)
4046
4047
    @classmethod
4048
    def get_url_from_link(cls, link):
4049
        gocomics = 'http://www.gocomics.com'
4050
        return urljoin_wrapper(gocomics, link['href'])
4051
4052
    @classmethod
4053
    def get_comic_info(cls, soup, link):
4054
        """Get information about a particular comics."""
4055
        url = cls.get_url_from_link(link)
4056
        year, month, day = [int(s)
4057
                            for s in cls.url_date_re.match(url).groups()]
4058
        return {
4059
            'day': day,
4060
            'month': month,
4061
            'year': year,
4062
            'img': [soup.find_all('img', class_='strip')[-1]['src']],
4063
            'author': soup.find('meta', attrs={'name': 'author'})['content']
4064
        }
4065
4066
4067
class PearlsBeforeSwine(GenericGoComic):
4068
    """Class to retrieve Pearls Before Swine comics."""
4069
    name = 'pearls'
4070
    long_name = 'Pearls Before Swine'
4071
    url = 'http://www.gocomics.com/pearlsbeforeswine'
4072
4073
4074
class Peanuts(GenericGoComic):
4075
    """Class to retrieve Peanuts comics."""
4076
    name = 'peanuts'
4077
    long_name = 'Peanuts'
4078
    url = 'http://www.gocomics.com/peanuts'
4079
4080
4081
class MattWuerker(GenericGoComic):
4082
    """Class to retrieve Matt Wuerker comics."""
4083
    name = 'wuerker'
4084
    long_name = 'Matt Wuerker'
4085
    url = 'http://www.gocomics.com/mattwuerker'
4086
4087
4088
class TomToles(GenericGoComic):
4089
    """Class to retrieve Tom Toles comics."""
4090
    name = 'toles'
4091
    long_name = 'Tom Toles'
4092
    url = 'http://www.gocomics.com/tomtoles'
4093
4094
4095
class BreakOfDay(GenericGoComic):
4096
    """Class to retrieve Break Of Day comics."""
4097
    name = 'breakofday'
4098
    long_name = 'Break Of Day'
4099
    url = 'http://www.gocomics.com/break-of-day'
4100
4101
4102
class Brevity(GenericGoComic):
4103
    """Class to retrieve Brevity comics."""
4104
    name = 'brevity'
4105
    long_name = 'Brevity'
4106
    url = 'http://www.gocomics.com/brevity'
4107
4108
4109
class MichaelRamirez(GenericGoComic):
4110
    """Class to retrieve Michael Ramirez comics."""
4111
    name = 'ramirez'
4112
    long_name = 'Michael Ramirez'
4113
    url = 'http://www.gocomics.com/michaelramirez'
4114
4115
4116
class MikeLuckovich(GenericGoComic):
4117
    """Class to retrieve Mike Luckovich comics."""
4118
    name = 'luckovich'
4119
    long_name = 'Mike Luckovich'
4120
    url = 'http://www.gocomics.com/mikeluckovich'
4121
4122
4123
class JimBenton(GenericGoComic):
4124
    """Class to retrieve Jim Benton comics."""
4125
    # Also on http://jimbenton.tumblr.com
4126
    name = 'benton'
4127
    long_name = 'Jim Benton'
4128
    url = 'http://www.gocomics.com/jim-benton-cartoons'
4129
4130
4131
class TheArgyleSweater(GenericGoComic):
4132
    """Class to retrieve the Argyle Sweater comics."""
4133
    name = 'argyle'
4134
    long_name = 'Argyle Sweater'
4135
    url = 'http://www.gocomics.com/theargylesweater'
4136
4137
4138
class SunnyStreet(GenericGoComic):
4139
    """Class to retrieve Sunny Street comics."""
4140
    # Also on http://www.sunnystreetcomics.com
4141
    name = 'sunny'
4142
    long_name = 'Sunny Street'
4143
    url = 'http://www.gocomics.com/sunny-street'
4144
4145
4146
class OffTheMark(GenericGoComic):
4147
    """Class to retrieve Off The Mark comics."""
4148
    # Also on https://www.offthemark.com
4149
    name = 'offthemark'
4150
    long_name = 'Off The Mark'
4151
    url = 'http://www.gocomics.com/offthemark'
4152
4153
4154
class WuMo(GenericGoComic):
4155
    """Class to retrieve WuMo comics."""
4156
    # Also on http://wumo.com
4157
    name = 'wumo'
4158
    long_name = 'WuMo'
4159
    url = 'http://www.gocomics.com/wumo'
4160
4161
4162
class LunarBaboon(GenericGoComic):
4163
    """Class to retrieve Lunar Baboon comics."""
4164
    # Also on http://www.lunarbaboon.com
4165
    # Also on https://tapastic.com/series/Lunarbaboon
4166
    name = 'lunarbaboon'
4167
    long_name = 'Lunar Baboon'
4168
    url = 'http://www.gocomics.com/lunarbaboon'
4169
4170
4171
class SandersenGocomic(GenericGoComic):
4172
    """Class to retrieve Sarah Andersen comics."""
4173
    # Also on http://sarahcandersen.com
4174
    # Also on http://tapastic.com/series/Doodle-Time
4175
    name = 'sandersen-goc'
4176
    long_name = 'Sarah Andersen (from GoComics)'
4177
    url = 'http://www.gocomics.com/sarahs-scribbles'
4178
4179
4180
class SaturdayMorningBreakfastCerealGoComic(GenericGoComic):
4181
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
4182
    # Also on http://smbc-comics.tumblr.com
4183
    # Also on http://www.smbc-comics.com
4184
    name = 'smbc-goc'
4185
    long_name = 'Saturday Morning Breakfast Cereal (from GoComics)'
4186
    url = 'http://www.gocomics.com/saturday-morning-breakfast-cereal'
4187
    _categories = ('SMBC', )
4188
4189
4190
class CalvinAndHobbesGoComic(GenericGoComic):
4191
    """Class to retrieve Calvin and Hobbes comics."""
4192
    # From gocomics, not http://marcel-oehler.marcellosendos.ch/comics/ch/
4193
    name = 'calvin-goc'
4194
    long_name = 'Calvin and Hobbes (from GoComics)'
4195
    url = 'http://www.gocomics.com/calvinandhobbes'
4196
4197
4198
class RallGoComic(GenericGoComic):
4199
    """Class to retrieve Ted Rall comics."""
4200
    # Also on http://rall.com/comic
4201
    name = 'rall-goc'
4202
    long_name = "Ted Rall (from GoComics)"
4203
    url = "http://www.gocomics.com/tedrall"
4204
    _categories = ('RALL', )
4205
4206
4207
class TheAwkwardYetiGoComic(GenericGoComic):
4208
    """Class to retrieve The Awkward Yeti comics."""
4209
    # Also on http://larstheyeti.tumblr.com
4210
    # Also on http://theawkwardyeti.com
4211
    # Also on https://tapastic.com/series/TheAwkwardYeti
4212
    name = 'yeti-goc'
4213
    long_name = 'The Awkward Yeti (from GoComics)'
4214
    url = 'http://www.gocomics.com/the-awkward-yeti'
4215
    _categories = ('YETI', )
4216
4217
4218
class BerkeleyMewsGoComics(GenericGoComic):
4219
    """Class to retrieve Berkeley Mews comics."""
4220
    # Also on http://mews.tumblr.com
4221
    # Also on http://www.berkeleymews.com
4222
    name = 'berkeley-goc'
4223
    long_name = 'Berkeley Mews (from GoComics)'
4224
    url = 'http://www.gocomics.com/berkeley-mews'
4225
    _categories = ('BERKELEY', )
4226
4227
4228
class SheldonGoComics(GenericGoComic):
4229
    """Class to retrieve Sheldon comics."""
4230
    # Also on http://www.sheldoncomics.com
4231
    name = 'sheldon-goc'
4232
    long_name = 'Sheldon Comics (from GoComics)'
4233
    url = 'http://www.gocomics.com/sheldon'
4234
4235
4236
class FowlLanguageGoComics(GenericGoComic):
4237
    """Class to retrieve Fowl Language comics."""
4238
    # Also on http://www.fowllanguagecomics.com
4239
    # Also on http://tapastic.com/series/Fowl-Language-Comics
4240
    # Also on http://fowllanguagecomics.tumblr.com
4241
    name = 'fowllanguage-goc'
4242
    long_name = 'Fowl Language Comics (from GoComics)'
4243
    url = 'http://www.gocomics.com/fowl-language'
4244
    _categories = ('FOWLLANGUAGE', )
4245
4246
4247
class NickAnderson(GenericGoComic):
4248
    """Class to retrieve Nick Anderson comics."""
4249
    name = 'nickanderson'
4250
    long_name = 'Nick Anderson'
4251
    url = 'http://www.gocomics.com/nickanderson'
4252
4253
4254
class GarfieldGoComics(GenericGoComic):
4255
    """Class to retrieve Garfield comics."""
4256
    # Also on http://garfield.com
4257
    name = 'garfield-goc'
4258
    long_name = 'Garfield (from GoComics)'
4259
    url = 'http://www.gocomics.com/garfield'
4260
    _categories = ('GARFIELD', )
4261
4262
4263
class DorrisMcGoComics(GenericGoComic):
4264
    """Class to retrieve Dorris Mc Comics"""
4265
    # Also on http://dorrismccomics.com
4266
    name = 'dorrismc-goc'
4267
    long_name = 'Dorris Mc (from GoComics)'
4268
    url = 'http://www.gocomics.com/dorris-mccomics'
4269
4270
4271
class FoxTrot(GenericGoComic):
4272
    """Class to retrieve FoxTrot comics."""
4273
    name = 'foxtrot'
4274
    long_name = 'FoxTrot'
4275
    url = 'http://www.gocomics.com/foxtrot'
4276
4277
4278
class FoxTrotClassics(GenericGoComic):
4279
    """Class to retrieve FoxTrot Classics comics."""
4280
    name = 'foxtrot-classics'
4281
    long_name = 'FoxTrot Classics'
4282
    url = 'http://www.gocomics.com/foxtrotclassics'
4283
4284
4285
class MisterAndMeGoComics(GenericGoComic):
4286
    """Class to retrieve Mister & Me Comics."""
4287
    # Also on http://www.mister-and-me.com
4288
    # Also on https://tapastic.com/series/Mister-and-Me
4289
    name = 'mister-goc'
4290
    long_name = 'Mister & Me (from GoComics)'
4291
    url = 'http://www.gocomics.com/mister-and-me'
4292
4293
4294
class NonSequitur(GenericGoComic):
4295
    """Class to retrieve Non Sequitur (Wiley Miller) comics."""
4296
    name = 'nonsequitur'
4297
    long_name = 'Non Sequitur'
4298
    url = 'http://www.gocomics.com/nonsequitur'
4299
4300
4301
class GenericTapasticComic(GenericListableComic):
4302
    """Generic class to handle the logic common to comics from tapastic.com."""
4303
    _categories = ('TAPASTIC', )
4304
4305
    @classmethod
4306
    def get_comic_info(cls, soup, archive_elt):
4307
        """Get information about a particular comics."""
4308
        timestamp = int(archive_elt['publishDate']) / 1000.0
4309
        day = datetime.datetime.fromtimestamp(timestamp).date()
4310
        imgs = soup.find_all('img', class_='art-image')
4311
        if not imgs:
4312
            print("Comic %s is being uploaded, retry later" % cls.get_url_from_archive_element(archive_elt))
4313
            return None
4314
        assert len(imgs) > 0
4315
        return {
4316
            'day': day.day,
4317
            'year': day.year,
4318
            'month': day.month,
4319
            'img': [i['src'] for i in imgs],
4320
            'title': archive_elt['title'],
4321
        }
4322
4323
    @classmethod
4324
    def get_url_from_archive_element(cls, archive_elt):
4325
        return 'http://tapastic.com/episode/' + str(archive_elt['id'])
4326
4327
    @classmethod
4328
    def get_archive_elements(cls):
4329
        pref, suff = 'episodeList : ', ','
4330
        # Information is stored in the javascript part
4331
        # I don't know the clean way to get it so this is the ugly way.
4332
        string = [s[len(pref):-len(suff)] for s in (s.decode('utf-8').strip() for s in urlopen_wrapper(cls.url).readlines()) if s.startswith(pref) and s.endswith(suff)][0]
4333
        return json.loads(string)
4334
4335
4336
class VegetablesForDessert(GenericTapasticComic):
4337
    """Class to retrieve Vegetables For Dessert comics."""
4338
    # Also on http://vegetablesfordessert.tumblr.com
4339
    name = 'vegetables'
4340
    long_name = 'Vegetables For Dessert'
4341
    url = 'http://tapastic.com/series/vegetablesfordessert'
4342
4343
4344
class FowlLanguageTapa(GenericTapasticComic):
4345
    """Class to retrieve Fowl Language comics."""
4346
    # Also on http://www.fowllanguagecomics.com
4347
    # Also on http://fowllanguagecomics.tumblr.com
4348
    # Also on http://www.gocomics.com/fowl-language
4349
    name = 'fowllanguage-tapa'
4350
    long_name = 'Fowl Language Comics (from Tapastic)'
4351
    url = 'http://tapastic.com/series/Fowl-Language-Comics'
4352
    _categories = ('FOWLLANGUAGE', )
4353
4354
4355
class OscillatingProfundities(GenericTapasticComic):
4356
    """Class to retrieve Oscillating Profundities comics."""
4357
    name = 'oscillating'
4358
    long_name = 'Oscillating Profundities'
4359
    url = 'http://tapastic.com/series/oscillatingprofundities'
4360
4361
4362
class ZnoflatsComics(GenericTapasticComic):
4363
    """Class to retrieve Znoflats comics."""
4364
    name = 'znoflats'
4365
    long_name = 'Znoflats Comics'
4366
    url = 'http://tapastic.com/series/Znoflats-Comics'
4367
4368
4369
class SandersenTapastic(GenericTapasticComic):
4370
    """Class to retrieve Sarah Andersen comics."""
4371
    # Also on http://sarahcandersen.com
4372
    # Also on http://www.gocomics.com/sarahs-scribbles
4373
    name = 'sandersen-tapa'
4374
    long_name = 'Sarah Andersen (from Tapastic)'
4375
    url = 'http://tapastic.com/series/Doodle-Time'
4376
4377
4378
class TubeyToonsTapastic(GenericTapasticComic):
4379
    """Class to retrieve TubeyToons comics."""
4380
    # Also on http://tubeytoons.com
4381
    # Also on http://tubeytoons.tumblr.com
4382
    name = 'tubeytoons-tapa'
4383
    long_name = 'Tubey Toons (from Tapastic)'
4384
    url = 'http://tapastic.com/series/Tubey-Toons'
4385
    _categories = ('TUNEYTOONS', )
4386
4387
4388
class AnythingComicTapastic(GenericTapasticComic):
4389
    """Class to retrieve Anything Comics."""
4390
    # Also on http://www.anythingcomic.com
4391
    name = 'anythingcomic-tapa'
4392
    long_name = 'Anything Comic (from Tapastic)'
4393
    url = 'http://tapastic.com/series/anything'
4394
4395
4396
class UnearthedComicsTapastic(GenericTapasticComic):
4397
    """Class to retrieve Unearthed comics."""
4398
    # Also on http://unearthedcomics.com
4399
    # Also on http://unearthedcomics.tumblr.com
4400
    name = 'unearthed-tapa'
4401
    long_name = 'Unearthed Comics (from Tapastic)'
4402
    url = 'http://tapastic.com/series/UnearthedComics'
4403
    _categories = ('UNEARTHED', )
4404
4405
4406
class EverythingsStupidTapastic(GenericTapasticComic):
4407
    """Class to retrieve Everything's stupid Comics."""
4408
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
4409
    # Also on http://everythingsstupid.net
4410
    name = 'stupid-tapa'
4411
    long_name = "Everything's Stupid (from Tapastic)"
4412
    url = 'http://tapastic.com/series/EverythingsStupid'
4413
4414
4415
class JustSayEhTapastic(GenericTapasticComic):
4416
    """Class to retrieve Just Say Eh comics."""
4417
    # Also on http://www.justsayeh.com
4418
    name = 'justsayeh-tapa'
4419
    long_name = 'Just Say Eh (from Tapastic)'
4420
    url = 'http://tapastic.com/series/Just-Say-Eh'
4421
4422
4423
class ThorsThundershackTapastic(GenericTapasticComic):
4424
    """Class to retrieve Thor's Thundershack comics."""
4425
    # Also on http://www.thorsthundershack.com
4426
    name = 'thor-tapa'
4427
    long_name = 'Thor\'s Thundershack (from Tapastic)'
4428
    url = 'http://tapastic.com/series/Thors-Thundershac'
4429
    _categories = ('THOR', )
4430
4431
4432
class OwlTurdTapastic(GenericTapasticComic):
4433
    """Class to retrieve Owl Turd comics."""
4434
    # Also on http://owlturd.com
4435
    name = 'owlturd-tapa'
4436
    long_name = 'Owl Turd (from Tapastic)'
4437
    url = 'http://tapastic.com/series/Owl-Turd-Comix'
4438
    _categories = ('OWLTURD', )
4439
4440
4441
class GoneIntoRaptureTapastic(GenericTapasticComic):
4442
    """Class to retrieve Gone Into Rapture comics."""
4443
    # Also on http://goneintorapture.tumblr.com
4444
    # Also on http://www.goneintorapture.com
4445
    name = 'rapture-tapa'
4446
    long_name = 'Gone Into Rapture (from Tapastic)'
4447
    url = 'http://tapastic.com/series/Goneintorapture'
4448
4449
4450
class HeckIfIKnowComicsTapa(GenericTapasticComic):
4451
    """Class to retrieve Heck If I Know Comics."""
4452
    # Also on http://heckifiknowcomics.com
4453
    name = 'heck-tapa'
4454
    long_name = 'Heck if I Know comics (from Tapastic)'
4455
    url = 'http://tapastic.com/series/Regular'
4456
4457
4458
class CheerUpEmoKidTapa(GenericTapasticComic):
4459
    """Class to retrieve CheerUpEmoKid comics."""
4460
    # Also on http://www.cheerupemokid.com
4461
    # Also on http://enzocomics.tumblr.com
4462
    name = 'cuek-tapa'
4463
    long_name = 'Cheer Up Emo Kid (from Tapastic)'
4464
    url = 'http://tapastic.com/series/CUEK'
4465
4466
4467
class BigFootJusticeTapa(GenericTapasticComic):
4468
    """Class to retrieve Big Foot Justice comics."""
4469
    # Also on http://bigfootjustice.com
4470
    name = 'bigfoot-tapa'
4471
    long_name = 'Big Foot Justice (from Tapastic)'
4472
    url = 'http://tapastic.com/series/bigfoot-justice'
4473
4474
4475
class UpAndOutTapa(GenericTapasticComic):
4476
    """Class to retrieve Up & Out comics."""
4477
    # Also on http://upandoutcomic.tumblr.com
4478
    name = 'upandout-tapa'
4479
    long_name = 'Up And Out (from Tapastic)'
4480
    url = 'http://tapastic.com/series/UP-and-OUT'
4481
4482
4483
class ToonHoleTapa(GenericTapasticComic):
4484
    """Class to retrieve Toon Holes comics."""
4485
    # Also on http://www.toonhole.com
4486
    name = 'toonhole-tapa'
4487
    long_name = 'Toon Hole (from Tapastic)'
4488
    url = 'http://tapastic.com/series/TOONHOLE'
4489
4490
4491
class AngryAtNothingTapa(GenericTapasticComic):
4492
    """Class to retrieve Angry at Nothing comics."""
4493
    # Also on http://www.angryatnothing.net
4494
    name = 'angry-tapa'
4495
    long_name = 'Angry At Nothing (from Tapastic)'
4496
    url = 'http://tapastic.com/series/Comics-yeah-definitely-comics-'
4497
4498
4499
class LeleozTapa(GenericTapasticComic):
4500
    """Class to retrieve Leleoz comics."""
4501
    # Also on http://leleozcomics.tumblr.com
4502
    name = 'leleoz-tapa'
4503
    long_name = 'Leleoz (from Tapastic)'
4504
    url = 'https://tapastic.com/series/Leleoz'
4505
4506
4507
class TheAwkwardYetiTapa(GenericTapasticComic):
4508
    """Class to retrieve The Awkward Yeti comics."""
4509
    # Also on http://www.gocomics.com/the-awkward-yeti
4510
    # Also on http://theawkwardyeti.com
4511
    # Also on http://larstheyeti.tumblr.com
4512
    name = 'yeti-tapa'
4513
    long_name = 'The Awkward Yeti (from Tapastic)'
4514
    url = 'https://tapastic.com/series/TheAwkwardYeti'
4515
    _categories = ('YETI', )
4516
4517
4518
class AsPerUsualTapa(GenericTapasticComic):
4519
    """Class to retrieve As Per Usual comics."""
4520
    # Also on http://as-per-usual.tumblr.com
4521
    name = 'usual-tapa'
4522
    long_name = 'As Per Usual (from Tapastic)'
4523
    url = 'https://tapastic.com/series/AsPerUsual'
4524
4525
4526
class OneOneOneOneComicTapa(GenericTapasticComic):
4527
    """Class to retrieve 1111 Comics."""
4528
    # Also on http://www.1111comics.me
4529
    # Also on http://comics1111.tumblr.com
4530
    name = '1111-tapa'
4531
    long_name = '1111 Comics (from Tapastic)'
4532
    url = 'https://tapastic.com/series/1111-Comics'
4533
    _categories = ('ONEONEONEONE', )
4534
4535
4536
class TumbleDryTapa(GenericTapasticComic):
4537
    """Class to retrieve Tumble Dry comics."""
4538
    # Also on http://tumbledrycomics.com
4539
    name = 'tumbledry-tapa'
4540
    long_name = 'Tumblr Dry (from Tapastic)'
4541
    url = 'https://tapastic.com/series/TumbleDryComics'
4542
4543
4544
class DeadlyPanelTapa(GenericTapasticComic):
4545
    """Class to retrieve Deadly Panel comics."""
4546
    # Also on http://www.deadlypanel.com
4547
    name = 'deadly-tapa'
4548
    long_name = 'Deadly Panel (from Tapastic)'
4549
    url = 'https://tapastic.com/series/deadlypanel'
4550
4551
4552
class ChrisHallbeckMaxiTapa(GenericTapasticComic):
4553
    """Class to retrieve Chris Hallbeck comics."""
4554
    # Also on http://chrishallbeck.tumblr.com
4555
    # Also on http://maximumble.com
4556
    name = 'hallbeckmaxi-tapa'
4557
    long_name = 'Chris Hallback - Maximumble (from Tapastic)'
4558
    url = 'https://tapastic.com/series/Maximumble'
4559
    _categories = ('HALLBACK', )
4560
4561
4562
class ChrisHallbeckMiniTapa(GenericTapasticComic):
4563
    """Class to retrieve Chris Hallbeck comics."""
4564
    # Also on http://chrishallbeck.tumblr.com
4565
    # Also on http://minimumble.com
4566
    name = 'hallbeckmini-tapa'
4567
    long_name = 'Chris Hallback - Minimumble (from Tapastic)'
4568
    url = 'https://tapastic.com/series/Minimumble'
4569
    _categories = ('HALLBACK', )
4570
4571
4572
class ChrisHallbeckBiffTapa(GenericTapasticComic):
4573
    """Class to retrieve Chris Hallbeck comics."""
4574
    # Also on http://chrishallbeck.tumblr.com
4575
    # Also on http://thebookofbiff.com
4576
    name = 'hallbeckbiff-tapa'
4577
    long_name = 'Chris Hallback - The Book of Biff (from Tapastic)'
4578
    url = 'https://tapastic.com/series/Biff'
4579
    _categories = ('HALLBACK', )
4580
4581
4582
class RandoWisTapa(GenericTapasticComic):
4583
    """Class to retrieve RandoWis comics."""
4584
    # Also on https://randowis.com
4585
    name = 'randowis-tapa'
4586
    long_name = 'RandoWis (from Tapastic)'
4587
    url = 'https://tapastic.com/series/RandoWis'
4588
4589
4590
class PigeonGazetteTapa(GenericTapasticComic):
4591
    """Class to retrieve The Pigeon Gazette comics."""
4592
    # Also on http://thepigeongazette.tumblr.com
4593
    name = 'pigeon-tapa'
4594
    long_name = 'The Pigeon Gazette (from Tapastic)'
4595
    url = 'https://tapastic.com/series/The-Pigeon-Gazette'
4596
4597
4598
class TheOdd1sOutTapa(GenericTapasticComic):
4599
    """Class to retrieve The Odd 1s Out comics."""
4600
    # Also on http://theodd1sout.com
4601
    # Also on http://theodd1sout.tumblr.com
4602
    name = 'theodd-tapa'
4603
    long_name = 'The Odd 1s Out (from Tapastic)'
4604
    url = 'https://tapastic.com/series/Theodd1sout'
4605
4606
4607
class TheWorldIsFlatTapa(GenericTapasticComic):
4608
    """Class to retrieve The World Is Flat Comics."""
4609
    # Also on http://theworldisflatcomics.tumblr.com
4610
    name = 'flatworld-tapa'
4611
    long_name = 'The World Is Flat (from Tapastic)'
4612
    url = 'https://tapastic.com/series/The-World-is-Flat'
4613
4614
4615
class MisterAndMeTapa(GenericTapasticComic):
4616
    """Class to retrieve Mister & Me Comics."""
4617
    # Also on http://www.mister-and-me.com
4618
    # Also on http://www.gocomics.com/mister-and-me
4619
    name = 'mister-tapa'
4620
    long_name = 'Mister & Me (from Tapastic)'
4621
    url = 'https://tapastic.com/series/Mister-and-Me'
4622
4623
4624
class TalesOfAbsurdityTapa(GenericTapasticComic):
4625
    """Class to retrieve Tales Of Absurdity comics."""
4626
    # Also on http://talesofabsurdity.com
4627
    # Also on http://talesofabsurdity.tumblr.com
4628
    name = 'absurdity-tapa'
4629
    long_name = 'Tales of Absurdity (from Tapastic)'
4630
    url = 'http://tapastic.com/series/Tales-Of-Absurdity'
4631
    _categories = ('ABSURDITY', )
4632
4633
4634
class BFGFSTapa(GenericTapasticComic):
4635
    """Class to retrieve BFGFS comics."""
4636
    # Also on http://bfgfs.com
4637
    # Also on http://bfgfs.tumblr.com
4638
    name = 'bfgfs-tapa'
4639
    long_name = 'BFGFS (from Tapastic)'
4640
    url = 'https://tapastic.com/series/BFGFS'
4641
4642
4643
class DoodleForFoodTapa(GenericTapasticComic):
4644
    """Class to retrieve Doodle For Food comics."""
4645
    # Also on http://doodleforfood.com
4646
    name = 'doodle-tapa'
4647
    long_name = 'Doodle For Food (from Tapastic)'
4648
    url = 'https://tapastic.com/series/Doodle-for-Food'
4649
4650
4651
class MrLovensteinTapa(GenericTapasticComic):
4652
    """Class to retrieve Mr Lovenstein comics."""
4653
    # Also on  https://tapastic.com/series/MrLovenstein
4654
    name = 'mrlovenstein-tapa'
4655
    long_name = 'Mr. Lovenstein (from Tapastic)'
4656
    url = 'https://tapastic.com/series/MrLovenstein'
4657
4658
4659
class CassandraCalinTapa(GenericTapasticComic):
4660
    """Class to retrieve C. Cassandra comics."""
4661
    # Also on http://cassandracalin.com
4662
    # Also on http://c-cassandra.tumblr.com
4663
    name = 'cassandra-tapa'
4664
    long_name = 'Cassandra Calin (from Tapastic)'
4665
    url = 'https://tapastic.com/series/C-Cassandra-comics'
4666
4667
4668
class WafflesAndPancakes(GenericTapasticComic):
4669
    """Class to retrieve Waffles And Pancakes comics."""
4670
    # Also on http://wandpcomic.com
4671
    name = 'waffles'
4672
    long_name = 'Waffles And Pancakes'
4673
    url = 'https://tapastic.com/series/Waffles-and-Pancakes'
4674
4675
4676
class YesterdaysPopcornTapastic(GenericTapasticComic):
4677
    """Class to retrieve Yesterday's Popcorn comics."""
4678
    # Also on http://www.yesterdayspopcorn.com
4679
    # Also on http://yesterdayspopcorn.tumblr.com
4680
    name = 'popcorn-tapa'
4681
    long_name = 'Yesterday\'s Popcorn (from Tapastic)'
4682
    url = 'https://tapastic.com/series/Yesterdays-Popcorn'
4683
4684
4685
class OurSuperAdventureTapastic(GenericTapasticComic):
4686
    """Class to retrieve Our Super Adventure comics."""
4687
    # Also on http://www.oursuperadventure.com
4688
    # http://sarahssketchbook.tumblr.com
4689
    # http://sarahgraley.com
4690
    name = 'superadventure-tapastic'
4691
    long_name = 'Our Super Adventure (from Tapastic)'
4692
    url = 'https://tapastic.com/series/Our-Super-Adventure'
4693
4694
4695
class NamelessPCs(GenericTapasticComic):
4696
    """Class to retrieve Nameless PCs comics."""
4697
    # Also on http://namelesspcs.com
4698
    name = 'namelesspcs-tapa'
4699
    long_name = 'NamelessPCs (from Tapastic)'
4700
    url = 'https://tapastic.com/series/NamelessPC'
4701
4702
4703
def get_subclasses(klass):
4704
    """Gets the list of direct/indirect subclasses of a class"""
4705
    subclasses = klass.__subclasses__()
4706
    for derived in list(subclasses):
4707
        subclasses.extend(get_subclasses(derived))
4708
    return subclasses
4709
4710
4711
def remove_st_nd_rd_th_from_date(string):
4712
    """Function to transform 1st/2nd/3rd/4th in a parsable date format."""
4713
    # Hackish way to convert string with numeral "1st"/"2nd"/etc to date
4714
    return (string.replace('st', '')
4715
            .replace('nd', '')
4716
            .replace('rd', '')
4717
            .replace('th', '')
4718
            .replace('Augu', 'August'))
4719
4720
4721
def string_to_date(string, date_format, local=DEFAULT_LOCAL):
4722
    """Function to convert string to date object.
4723
    Wrapper around datetime.datetime.strptime."""
4724
    # format described in https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior
4725
    prev_locale = locale.setlocale(locale.LC_ALL)
4726
    if local != prev_locale:
4727
        locale.setlocale(locale.LC_ALL, local)
4728
    ret = datetime.datetime.strptime(string, date_format).date()
4729
    if local != prev_locale:
4730
        locale.setlocale(locale.LC_ALL, prev_locale)
4731
    return ret
4732
4733
4734
COMICS = set(get_subclasses(GenericComic))
4735
VALID_COMICS = [c for c in COMICS if c.name is not None]
4736
COMIC_NAMES = {c.name: c for c in VALID_COMICS}
4737
assert len(VALID_COMICS) == len(COMIC_NAMES)
4738
CLASS_NAMES = {c.__name__ for c in VALID_COMICS}
4739
assert len(VALID_COMICS) == len(CLASS_NAMES)
4740