Completed
Push — master ( 8c5d47...22bd4c )
by De
01:27
created

comics.py (2 issues)

1
#! /usr/bin/python3
2
# vim: set expandtab tabstop=4 shiftwidth=4 :
3
"""Module to retrieve webcomics"""
4
5
from comic_abstract import GenericComic, get_date_for_comic
6
import re
7
from datetime import date, timedelta
8
import datetime
9
from urlfunctions import get_soup_at_url, urljoin_wrapper,\
10
    convert_iri_to_plain_ascii_uri, load_json_at_url, urlopen_wrapper
11
import json
12
import locale
13
import urllib
14
15
DEFAULT_LOCAL = 'en_GB.UTF-8'
16
17
18
class Xkcd(GenericComic):
19
    """Class to retrieve Xkcd comics."""
20
    name = 'xkcd'
21
    long_name = 'xkcd'
22
    url = 'http://xkcd.com'
23
24
    @classmethod
25
    def get_next_comic(cls, last_comic):
26
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
27
        json_url = urljoin_wrapper(cls.url, 'info.0.json')
28
        first_num = last_comic['num'] if last_comic else 0
29
        last_num = load_json_at_url(json_url)['num']
30
31
        for num in range(first_num + 1, last_num + 1):
32
            comic = cls.get_comic_info(num)
33
            if comic is not None:
34
                yield comic
35
36
    @classmethod
37
    def get_comic_info(cls, num):
38
        """Get information about a particular comics."""
39
        if num == 404:
40
            return None
41
        json_url = urljoin_wrapper(cls.url, '%d/info.0.json' % num)
42
        comic_json = load_json_at_url(json_url)
43
        assert comic_json['num'] == num, json_url
44
        return {
45
            'json_url': json_url,
46
            'num': num,
47
            'url': urljoin_wrapper(cls.url, str(num)),
48
            'prefix': '%d-' % num,
49
            'img': [comic_json['img']],
50
            'day': int(comic_json['day']),
51
            'month': int(comic_json['month']),
52
            'year': int(comic_json['year']),
53
            'link': comic_json['link'],
54
            'news': comic_json['news'],
55
            'safe_title': comic_json['safe_title'],
56
            'transcript': comic_json['transcript'],
57
            'alt': comic_json['alt'],
58
            'title': comic_json['title'],
59
        }
60
61
62
# Helper functions corresponding to get_url_from_link/get_url_from_archive_element
63
64
65
@classmethod
66
def get_href(cls, link):
67
    """Implementation of get_url_from_link/get_url_from_archive_element."""
68
    return link['href']
69
70
71
@classmethod
72
def join_cls_url_to_href(cls, link):
73
    """Implementation of get_url_from_link/get_url_from_archive_element."""
74
    return urljoin_wrapper(cls.url, link['href'])
75
76
77
class GenericNavigableComic(GenericComic):
78
    """Generic class for "navigable" comics : with first/next arrows.
79
80
    This class applies to comic where previous and next comics can be
81
    accessed from a given comic. Once given a starting point (either
82
    the first comic or the last comic retrieved), it will handle the
83
    navigation, the retrieval of the soup object and the setting of
84
    the 'url' attribute on retrieved comics. This limits a lot the
85
    amount of boilerplate code in the different implementation classes.
86
87
    The method `get_next_comic` methods is implemented in terms of new
88
    more specialized methods to be implemented/overridden:
89
        - get_first_comic_link
90
        - get_navi_link
91
        - get_comic_info
92
        - get_url_from_link
93
    """
94
    _categories = ('NAVIGABLE', )
95
96
    @classmethod
97
    def get_first_comic_link(cls):
98
        """Get link to first comics.
99
100
        Sometimes this can be retrieved of any comic page, sometimes on
101
        the archive page, sometimes it doesn't exist at all and one has
102
        to iterate backward to find it before hardcoding the result found.
103
        """
104
        raise NotImplementedError
105
106
    @classmethod
107
    def get_navi_link(cls, last_soup, next_):
108
        """Get link to next (or previous - for dev purposes) comic."""
109
        raise NotImplementedError
110
111
    @classmethod
112
    def get_comic_info(cls, soup, link):
113
        """Get information about a particular comics."""
114
        raise NotImplementedError
115
116
    @classmethod
117
    def get_url_from_link(cls, link):
118
        """Get url corresponding to a link. Default implementation is similar to get_href."""
119
        return link['href']
120
121
    @classmethod
122
    def get_next_link(cls, last_soup):
123
        """Get link to next comic."""
124
        link = cls.get_navi_link(last_soup, True)
125
        cls.log("Next link is %s" % link)
126
        return link
127
128
    @classmethod
129
    def get_prev_link(cls, last_soup):
130
        """Get link to previous comic."""
131
        link = cls.get_navi_link(last_soup, False)
132
        cls.log("Prev link is %s" % link)
133
        return link
134
135
    @classmethod
136
    def get_next_comic(cls, last_comic):
137
        """Generic implementation of get_next_comic for navigable comics."""
138
        url = last_comic['url'] if last_comic else None
139
        cls.log("starting 'get_next_comic' from %s" % url)
140
        next_comic = \
141
            cls.get_next_link(get_soup_at_url(url)) \
142
            if url else \
143
            cls.get_first_comic_link()
144
        cls.log("next/first comic will be %s (url is %s)" % (str(next_comic), url))
145
        # cls.check_navigation(url)
146
        while next_comic:
147
            prev_url, url = url, cls.get_url_from_link(next_comic)
148
            if prev_url == url:
149
                cls.log("got same url %s" % url)
150
                break
151
            cls.log("about to get %s (%s)" % (url, str(next_comic)))
152
            soup = get_soup_at_url(url)
153
            comic = cls.get_comic_info(soup, next_comic)
154
            if comic is not None:
155
                assert 'url' not in comic
156
                comic['url'] = url
157
                yield comic
158
            next_comic = cls.get_next_link(soup)
159
            cls.log("next comic will be %s" % str(next_comic))
160
161
    @classmethod
162
    def check_first_link(cls):
163
        """Check that navigation to first comic seems to be working - for dev purposes."""
164
        cls.log("about to check first link")
165
        ok = True
166
        firstlink = cls.get_first_comic_link()
167
        if firstlink is None:
168
            print("From %s : no first link" % cls.url)
169
            ok = False
170
        else:
171
            firsturl = cls.get_url_from_link(firstlink)
172
            try:
173
                get_soup_at_url(firsturl)
174
            except urllib.error.HTTPError:
175
                print("From %s : invalid first url" % cls.url)
176
                ok = False
177
        cls.log("checked first link -> returned %d" % ok)
178
        return ok
179
180
    @classmethod
181
    def check_prev_next_links(cls, url):
182
        """Check that navigation to prev/next from a given URL seems to be working - for dev purposes."""
183
        cls.log("about to check prev/next from %s" % url)
184
        ok = True
185
        if url is None:
186
            prevlink, nextlink = None, None
187
        else:
188
            soup = get_soup_at_url(url)
189
            prevlink, nextlink = cls.get_prev_link(soup), cls.get_next_link(soup)
190
        if prevlink is None and nextlink is None:
191
            print("From %s : no previous nor next" % url)
192
            ok = False
193
        else:
194
            if prevlink:
195
                prevurl = cls.get_url_from_link(prevlink)
196
                prevsoup = get_soup_at_url(prevurl)
197
                prevnextlink = cls.get_next_link(prevsoup)
198
                prevnext = cls.get_url_from_link(prevnextlink) if prevnextlink is not None else "NO URL"
199
                if prevnext != url:
200
                    print("From %s, going backward then forward leads to %s" % (url, prevnext))
201
                    ok = False
202
            if nextlink:
203
                nexturl = cls.get_url_from_link(nextlink)
204
                if nexturl != url:
205
                    nextsoup = get_soup_at_url(nexturl)
206
                    nextprevlink = cls.get_prev_link(nextsoup)
207
                    nextprev = cls.get_url_from_link(nextprevlink) if nextprevlink is not None else "NO URL"
208
                    if nextprev != url:
209
                        print("From %s, going forward then backward leads to %s" % (url, nextprev))
210
                        ok = False
211
        cls.log("checked prev/next from %s -> returned %d" % (url, ok))
212
        return ok
213
214
    @classmethod
215
    def check_navigation(cls, url):
216
        """Check that navigation functions seem to be working - for dev purposes."""
217
        cls.log("about to check navigation from %s" % url)
218
        first = cls.check_first_link()
219
        prevnext = cls.check_prev_next_links(url)
220
        ok = first and prevnext
221
        cls.log("checked navigation from %s -> returned %d" % (url, ok))
222
        return ok
223
224
225
class GenericListableComic(GenericComic):
226
    """Generic class for "listable" comics : with a list of comics (aka 'archive')
227
228
    The method `get_next_comic` methods is implemented in terms of new
229
    more specialized methods to be implemented/overridden:
230
        - get_archive_elements
231
        - get_url_from_archive_element
232
        - get_comic_info
233
    """
234
    _categories = ('LISTABLE', )
235
236
    @classmethod
237
    def get_archive_elements(cls):
238
        """Get the archive elements (iterable)."""
239
        raise NotImplementedError
240
241
    @classmethod
242
    def get_url_from_archive_element(cls, archive_elt):
243
        """Get url corresponding to an archive element."""
244
        raise NotImplementedError
245
246
    @classmethod
247
    def get_comic_info(cls, soup, archive_elt):
248
        """Get information about a particular comics."""
249
        raise NotImplementedError
250
251
    @classmethod
252
    def get_next_comic(cls, last_comic):
253
        """Generic implementation of get_next_comic for listable comics."""
254
        waiting_for_url = last_comic['url'] if last_comic else None
255
        archive_elts = list(cls.get_archive_elements())
256
        for archive_elt in archive_elts:
257
            url = cls.get_url_from_archive_element(archive_elt)
258
            cls.log("considering %s" % url)
259
            if waiting_for_url is None:
260
                cls.log("about to get %s (%s)" % (url, str(archive_elt)))
261
                soup = get_soup_at_url(url)
262
                comic = cls.get_comic_info(soup, archive_elt)
263
                if comic is not None:
264
                    assert 'url' not in comic
265
                    comic['url'] = url
266
                    yield comic
267
            elif waiting_for_url == url:
268
                waiting_for_url = None
269
        if waiting_for_url is not None:
270
            print("Did not find %s in the %d comics: there might be a problem" %
271
                  (waiting_for_url, len(archive_elts)))
272
273
# Helper functions corresponding to get_first_comic_link/get_navi_link
274
275
276
@classmethod
277
def get_link_rel_next(cls, last_soup, next_):
278
    """Implementation of get_navi_link."""
279
    return last_soup.find('link', rel='next' if next_ else 'prev')
280
281
282
@classmethod
283
def get_a_rel_next(cls, last_soup, next_):
284
    """Implementation of get_navi_link."""
285
    return last_soup.find('a', rel='next' if next_ else 'prev')
286
287
288
@classmethod
289
def get_a_navi_navinext(cls, last_soup, next_):
290
    """Implementation of get_navi_link."""
291
    # ComicPress (WordPress plugin)
292
    return last_soup.find('a', class_='navi navi-next' if next_ else 'navi navi-prev')
293
294
295
@classmethod
296
def get_a_navi_comicnavnext_navinext(cls, last_soup, next_):
297
    """Implementation of get_navi_link."""
298
    return last_soup.find('a', class_='navi comic-nav-next navi-next' if next_ else 'navi comic-nav-previous navi-prev')
299
300
301
@classmethod
302
def get_a_comicnavbase_comicnavnext(cls, last_soup, next_):
303
    """Implementation of get_navi_link."""
304
    return last_soup.find('a', class_='comic-nav-base comic-nav-next' if next_ else 'comic-nav-base comic-nav-previous')
305
306
307
@classmethod
308
def get_a_navi_navifirst(cls):
309
    """Implementation of get_first_comic_link."""
310
    # ComicPress (WordPress plugin)
311
    return get_soup_at_url(cls.url).find('a', class_='navi navi-first')
312
313
314
@classmethod
315
def get_div_navfirst_a(cls):
316
    """Implementation of get_first_comic_link."""
317
    return get_soup_at_url(cls.url).find('div', class_="nav-first").find('a')
318
319
320
@classmethod
321
def get_a_comicnavbase_comicnavfirst(cls):
322
    """Implementation of get_first_comic_link."""
323
    return get_soup_at_url(cls.url).find('a', class_='comic-nav-base comic-nav-first')
324
325
326
@classmethod
327
def simulate_first_link(cls):
328
    """Implementation of get_first_comic_link creating a link-like object from
329
    an URL provided by the class.
330
331
    Note: The first URL can easily be found using :
332
    `get_first_comic_link = navigate_to_first_comic`.
333
    """
334
    return {'href': cls.first_url}
335
336
337
@classmethod
338
def navigate_to_first_comic(cls):
339
    """Implementation of get_first_comic_link navigating from a user provided
340
    URL to the first comic.
341
342
    Sometimes, the first comic cannot be reached directly so to start
343
    from the first comic one has to go to the previous comic until
344
    there is no previous comics. Once this URL is reached, it
345
    is better to hardcode it but for development purposes, it
346
    is convenient to have an automatic way to find it.
347
348
    Then, the URL found can easily be used via `simulate_first_link`.
349
    """
350
    url = getattr(cls, 'first_url', None)
351
    if url is None or url == NotImplemented:
352
        prompt = "Get starting URL for %s (%s):" % (cls.name, cls.url)
353
        url = input(prompt)
354
    print(url)
355
    comic = cls.get_prev_link(get_soup_at_url(url))
356
    while comic:
357
        url = cls.get_url_from_link(comic)
358
        print(url)
359
        comic = cls.get_prev_link(get_soup_at_url(url))
360
    cls.first_url = url
361
    return {'href': url}
362
363
364
class GenericEmptyComic(GenericComic):
365
    """Generic class for comics where nothing is to be done.
366
367
    It can be useful to deactivate temporarily comics that do not work
368
    properly by replacing `def MyComic(GenericWhateverComic)` with
369
    `def MyComic(GenericEmptyComic, GenericWhateverComic)`."""
370
    _categories = ('EMPTY', )
371
372
    @classmethod
373
    def get_next_comic(cls, last_comic):
374
        """Implementation of get_next_comic returning no comics."""
375
        cls.log("comic is considered as empty - returning no comic")
376
        return []
377
378
379
class GenericComicNotWorking(GenericEmptyComic):
380
    """Subclass of GenericEmptyComic used when comic is not working.
381
382
    This is more explicit than GenericEmptyComic as it hilights that
383
    only the implementation is not working and it can be fixed."""
384
    _categories = ('NOTWORKING', )
385
386
387
class GenericUnavailableComic(GenericEmptyComic):
388
    """Subclass of GenericEmptyComic used when a comic is not available.
389
390
    This is more explicit than GenericEmptyComic as it hilights that
391
    the source of the comic is not available but we expect it to be back
392
    soonish. See also GenericDeletedComic."""
393
    _categories = ('UNAVAILABLE', )
394
395
396
class GenericDeletedComic(GenericEmptyComic):
397
    """Subclass of GenericEmptyComic used when a comic does not exist anymore.
398
399
    This is more explicit than GenericEmptyComic as it hilights that
400
    the source of the comic does not exist anymore and it probably cannot
401
    be fixed. Corresponding classes are kept as we can still use the
402
    downloaded data. See also GenericUnavailableComic."""
403
    _categories = ('DELETED', )
404
405
406
class ExtraFabulousComics(GenericNavigableComic):
407
    """Class to retrieve Extra Fabulous Comics."""
408
    # Also on https://extrafabulouscomics.tumblr.com
409
    name = 'efc'
410
    long_name = 'Extra Fabulous Comics'
411
    url = 'http://extrafabulouscomics.com'
412
    _categories = ('EFC', )
413
    get_navi_link = get_link_rel_next
414
    get_first_comic_link = simulate_first_link
415
    first_url = 'http://extrafabulouscomics.com/comic/buttfly/'
416
417
    @classmethod
418
    def get_comic_info(cls, soup, link):
419
        """Get information about a particular comics."""
420
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
421
        imgs = soup.find_all('img', src=img_src_re)
422
        title = soup.find('meta', property='og:title')['content']
423
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
424
        day = string_to_date(date_str, "%Y-%m-%d")
425
        return {
426
            'title': title,
427
            'img': [i['src'] for i in imgs],
428
            'month': day.month,
429
            'year': day.year,
430
            'day': day.day,
431
            'prefix': title + '-'
432
        }
433
434
435
class GenericLeMondeBlog(GenericNavigableComic):
436
    """Generic class to retrieve comics from Le Monde blogs."""
437
    _categories = ('LEMONDE', 'FRANCAIS')
438
    get_navi_link = get_link_rel_next
439
    get_first_comic_link = simulate_first_link
440
    first_url = NotImplemented
441
    date_format = "%d %B %Y"
442
443
    @classmethod
444
    def get_comic_info(cls, soup, link):
445
        """Get information about a particular comics."""
446
        url2 = soup.find('link', rel='shortlink')['href']
447
        title = soup.find('meta', property='og:title')['content']
448
        date_str = soup.find("span", class_="entry-date").string
449
        day = string_to_date(date_str, cls.date_format, "fr_FR.utf8")
450
        imgs = soup.find_all('meta', property='og:image')
451
        return {
452
            'title': title,
453
            'url2': url2,
454
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
455
            'month': day.month,
456
            'year': day.year,
457
            'day': day.day,
458
        }
459
460
461
class ZepWorld(GenericLeMondeBlog):
462
    """Class to retrieve Zep World comics."""
463
    name = "zep"
464
    long_name = "Zep World"
465
    url = "http://zepworld.blog.lemonde.fr"
466
    first_url = "http://zepworld.blog.lemonde.fr/2014/10/31/bientot-le-blog-de-zep/"
467
468
469
class Vidberg(GenericLeMondeBlog):
470
    """Class to retrieve Vidberg comics."""
471
    name = 'vidberg'
472
    long_name = "Vidberg - l'actu en patates"
473
    url = "http://vidberg.blog.lemonde.fr"
474
    # Not the first but I didn't find an efficient way to retrieve it
475
    first_url = "http://vidberg.blog.lemonde.fr/2012/02/09/revue-de-campagne-la-campagne-du-modem-semballe/"
476
477
478
class Plantu(GenericLeMondeBlog):
479
    """Class to retrieve Plantu comics."""
480
    name = 'plantu'
481
    long_name = "Plantu"
482
    url = "http://plantu.blog.lemonde.fr"
483
    first_url = "http://plantu.blog.lemonde.fr/2014/10/28/stress-test-a-bruxelles/"
484
485
486
class XavierGorce(GenericLeMondeBlog):
487
    """Class to retrieve Xavier Gorce comics."""
488
    name = 'gorce'
489
    long_name = "Xavier Gorce"
490
    url = "http://xaviergorce.blog.lemonde.fr"
491
    first_url = "http://xaviergorce.blog.lemonde.fr/2015/01/09/distinction/"
492
493
494
class CartooningForPeace(GenericLeMondeBlog):
495
    """Class to retrieve Cartooning For Peace comics."""
496
    name = 'forpeace'
497
    long_name = "Cartooning For Peace"
498
    url = "http://cartooningforpeace.blog.lemonde.fr"
499
    first_url = "http://cartooningforpeace.blog.lemonde.fr/2014/12/15/bado/"
500
501
502
class Aurel(GenericLeMondeBlog):
503
    """Class to retrieve Aurel comics."""
504
    name = 'aurel'
505
    long_name = "Aurel"
506
    url = "http://aurel.blog.lemonde.fr"
507
    first_url = "http://aurel.blog.lemonde.fr/2014/09/29/le-senat-repasse-a-droite/"
508
509
510
class LesCulottees(GenericLeMondeBlog):
511
    """Class to retrieve Les Culottees comics."""
512
    name = 'culottees'
513
    long_name = 'Les Culottees'
514
    url = "http://lesculottees.blog.lemonde.fr"
515
    first_url = "http://lesculottees.blog.lemonde.fr/2016/01/11/clementine-delait-femme-a-barbe/"
516
517
518
class UneAnneeAuLycee(GenericLeMondeBlog):
519
    """Class to retrieve Une Annee Au Lycee comics."""
520
    name = 'lycee'
521
    long_name = 'Une Annee au Lycee'
522
    url = 'http://uneanneeaulycee.blog.lemonde.fr'
523
    first_url = "http://uneanneeaulycee.blog.lemonde.fr/2016/06/13/la-semaine-du-bac-est-arrivee/"
524
525
526
class LisaMandel(GenericLeMondeBlog):
527
    """Class to retrieve Lisa Mandel comics."""
528
    name = 'mandel'
529
    long_name = 'Lisa Mandel (HP, hors-service)'
530
    url = 'http://lisamandel.blog.lemonde.fr'
531
    first_url = 'http://lisamandel.blog.lemonde.fr/2016/02/23/premiers-jours-a-calais/'
532
533
534
class Avventura(GenericLeMondeBlog):
535
    """Class to retrieve L'Avventura comics."""
536
    name = 'avventura'
537
    long_name = 'Avventura'
538
    url = 'http://lavventura.blog.lemonde.fr'
539
    first_url = 'http://lavventura.blog.lemonde.fr/2013/11/23/roma-paris-aller-simple/'
540
    date_format = "%d/%m/%Y"
541
542
543
class MorganNavarro(GenericLeMondeBlog):
544
    """Class to retrieve Morgan Navarro comics."""
545
    name = 'navarro'
546
    long_name = 'Morgan Navarro (Ma vie de reac)'
547
    url = 'http://morgannavarro.blog.lemonde.fr'
548
    first_url = 'http://morgannavarro.blog.lemonde.fr/2015/09/09/le-doute/'
549
550
551
class Rall(GenericComicNotWorking, GenericNavigableComic):
552
    """Class to retrieve Ted Rall comics."""
553
    # Also on http://www.gocomics.com/tedrall
554
    name = 'rall'
555
    long_name = "Ted Rall"
556
    url = "http://rall.com/comic"
557
    _categories = ('RALL', )
558
    get_navi_link = get_link_rel_next
559
    get_first_comic_link = simulate_first_link
560
    # Not the first but I didn't find an efficient way to retrieve it
561
    first_url = "http://rall.com/2014/01/30/los-angeles-times-cartoon-well-miss-those-california-flowers"
562
563
    @classmethod
564
    def get_comic_info(cls, soup, link):
565
        """Get information about a particular comics."""
566
        title = soup.find('meta', property='og:title')['content']
567
        author = soup.find("span", class_="author vcard").find("a").string
568
        date_str = soup.find("span", class_="entry-date").string
569
        day = string_to_date(date_str, "%B %d, %Y")
570
        desc = soup.find('meta', property='og:description')['content']
571
        imgs = soup.find('div', class_='entry-content').find_all('img')
572
        imgs = imgs[:-7]  # remove social media buttons
573
        return {
574
            'title': title,
575
            'author': author,
576
            'month': day.month,
577
            'year': day.year,
578
            'day': day.day,
579
            'description': desc,
580
            'img': [i['src'] for i in imgs],
581
        }
582
583
584
class Dilem(GenericNavigableComic):
585
    """Class to retrieve Ali Dilem comics."""
586
    name = 'dilem'
587
    long_name = 'Ali Dilem'
588
    url = 'http://information.tv5monde.com/dilem'
589
    _categories = ('FRANCAIS', )
590
    get_url_from_link = join_cls_url_to_href
591
    get_first_comic_link = simulate_first_link
592
    first_url = "http://information.tv5monde.com/dilem/2004-06-26"
593
594
    @classmethod
595
    def get_navi_link(cls, last_soup, next_):
596
        """Get link to next or previous comic."""
597
        # prev is next / next is prev
598
        li = last_soup.find('li', class_='prev' if next_ else 'next')
599
        return li.find('a') if li else None
600 View Code Duplication
601
    @classmethod
602
    def get_comic_info(cls, soup, link):
603
        """Get information about a particular comics."""
604
        short_url = soup.find('link', rel='shortlink')['href']
605
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
606
        imgs = soup.find_all('meta', property='og:image')
607
        date_str = soup.find('span', property='dc:date')['content']
608
        date_str = date_str[:10]
609
        day = string_to_date(date_str, "%Y-%m-%d")
610
        return {
611
            'short_url': short_url,
612
            'title': title,
613
            'img': [i['content'] for i in imgs],
614
            'day': day.day,
615
            'month': day.month,
616
            'year': day.year,
617
        }
618
619
620
class SpaceAvalanche(GenericNavigableComic):
621
    """Class to retrieve Space Avalanche comics."""
622
    name = 'avalanche'
623
    long_name = 'Space Avalanche'
624
    url = 'http://www.spaceavalanche.com'
625
    get_navi_link = get_link_rel_next
626
627
    @classmethod
628
    def get_first_comic_link(cls):
629
        """Get link to first comics."""
630
        return {'href': "http://www.spaceavalanche.com/2009/02/02/irish-sea/", 'title': "Irish Sea"}
631
632
    @classmethod
633
    def get_comic_info(cls, soup, link):
634
        """Get information about a particular comics."""
635
        url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
636
        title = link['title']
637
        url = cls.get_url_from_link(link)
638
        year, month, day = [int(s)
639
                            for s in url_date_re.match(url).groups()]
640
        imgs = soup.find("div", class_="entry").find_all("img")
641
        return {
642
            'title': title,
643
            'day': day,
644
            'month': month,
645
            'year': year,
646
            'img': [i['src'] for i in imgs],
647
        }
648
649
650
class ZenPencils(GenericNavigableComic):
651
    """Class to retrieve ZenPencils comics."""
652
    # Also on http://zenpencils.tumblr.com
653
    # Also on http://www.gocomics.com/zen-pencils
654
    name = 'zenpencils'
655
    long_name = 'Zen Pencils'
656
    url = 'http://zenpencils.com'
657
    _categories = ('ZENPENCILS', )
658
    get_navi_link = get_link_rel_next
659
    get_first_comic_link = simulate_first_link
660
    first_url = "http://zenpencils.com/comic/1-ralph-waldo-emerson-make-them-cry/"
661
662
    @classmethod
663
    def get_comic_info(cls, soup, link):
664
        """Get information about a particular comics."""
665
        imgs = soup.find('div', id='comic').find_all('img')
666
        # imgs2 = soup.find_all('meta', property='og:image')
667
        post = soup.find('div', class_='post-content')
668
        author = post.find("span", class_="post-author").find("a").string
669
        title = soup.find('h2', class_='post-title').string
670
        date_str = post.find('span', class_='post-date').string
671
        day = string_to_date(date_str, "%B %d, %Y")
672
        assert imgs
673
        assert all(i['alt'] == i['title'] for i in imgs)
674
        assert all(i['alt'] in (title, "") for i in imgs)
675
        return {
676
            'title': title,
677
            'author': author,
678
            'day': day.day,
679
            'month': day.month,
680
            'year': day.year,
681
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
682
        }
683
684
685
class ItsTheTie(GenericDeletedComic, GenericNavigableComic):
686
    """Class to retrieve It's the tie comics."""
687
    # Also on http://itsthetie.tumblr.com
688
    # Also on https://tapastic.com/series/itsthetie
689
    name = 'tie'
690
    long_name = "It's the tie"
691
    url = "http://itsthetie.com"
692
    _categories = ('TIE', )
693
    get_first_comic_link = get_div_navfirst_a
694
    get_navi_link = get_a_rel_next
695
696
    @classmethod
697
    def get_comic_info(cls, soup, link):
698
        """Get information about a particular comics."""
699
        title = soup.find('h1', class_='comic-title').find('a').string
700
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
701
        day = string_to_date(date_str, "%B %d, %Y")
702
        # Bonus images may or may not be in meta og:image.
703
        imgs = soup.find_all('meta', property='og:image')
704
        imgs_src = [i['content'] for i in imgs]
705
        bonus = soup.find_all('img', attrs={'data-oversrc': True})
706
        bonus_src = [b['data-oversrc'] for b in bonus]
707
        all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src]
708
        all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")]
709
        tag_meta = soup.find('meta', property='article:tag')
710
        tags = tag_meta['content'] if tag_meta else ""
711
        return {
712
            'title': title,
713
            'month': day.month,
714
            'year': day.year,
715
            'day': day.day,
716
            'img': all_imgs_src,
717
            'tags': tags,
718
        }
719
720
721
class PenelopeBagieu(GenericNavigableComic):
722
    """Class to retrieve comics from Penelope Bagieu's blog."""
723
    name = 'bagieu'
724
    long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
725
    url = 'http://www.penelope-jolicoeur.com'
726
    _categories = ('FRANCAIS', )
727
    get_navi_link = get_link_rel_next
728
    get_first_comic_link = simulate_first_link
729
    first_url = 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'
730
731
    @classmethod
732
    def get_comic_info(cls, soup, link):
733
        """Get information about a particular comics."""
734
        date_str = soup.find('h2', class_='date-header').string
735
        day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
736
        imgs = soup.find('div', class_='entry-body').find_all('img')
737
        title = soup.find('h3', class_='entry-header').string
738
        return {
739
            'title': title,
740
            'img': [i['src'] for i in imgs],
741
            'month': day.month,
742
            'year': day.year,
743
            'day': day.day,
744
        }
745
746
747
class OneOneOneOneComic(GenericComicNotWorking, GenericNavigableComic):
748
    """Class to retrieve 1111 Comics."""
749
    # Also on http://comics1111.tumblr.com
750
    # Also on https://tapastic.com/series/1111-Comics
751
    name = '1111'
752
    long_name = '1111 Comics'
753
    url = 'http://www.1111comics.me'
754
    _categories = ('ONEONEONEONE', )
755
    get_first_comic_link = get_div_navfirst_a
756
    get_navi_link = get_link_rel_next
757
758
    @classmethod
759
    def get_comic_info(cls, soup, link):
760
        """Get information about a particular comics."""
761
        title = soup.find('h1', class_='comic-title').find('a').string
762
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
763
        day = string_to_date(date_str, "%B %d, %Y")
764
        imgs = soup.find_all('meta', property='og:image')
765
        return {
766
            'title': title,
767
            'month': day.month,
768
            'year': day.year,
769
            'day': day.day,
770
            'img': [i['content'] for i in imgs],
771
        }
772
773
774
class AngryAtNothing(GenericDeletedComic, GenericNavigableComic):
775
    """Class to retrieve Angry at Nothing comics."""
776
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
777
    # Also on http://angryatnothing.tumblr.com
778
    name = 'angry'
779
    long_name = 'Angry At Nothing'
780
    url = 'http://www.angryatnothing.net'
781
    get_first_comic_link = get_div_navfirst_a
782
    get_navi_link = get_a_rel_next
783
784
    @classmethod
785
    def get_comic_info(cls, soup, link):
786
        """Get information about a particular comics."""
787
        title = soup.find('h1', class_='comic-title').find('a').string
788
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
789
        day = string_to_date(date_str, "%B %d, %Y")
790
        imgs = soup.find_all('meta', property='og:image')
791
        return {
792
            'title': title,
793
            'month': day.month,
794
            'year': day.year,
795
            'day': day.day,
796
            'img': [i['content'] for i in imgs],
797
        }
798
799
800
class NeDroid(GenericNavigableComic):
801
    """Class to retrieve NeDroid comics."""
802
    name = 'nedroid'
803
    long_name = 'NeDroid'
804
    url = 'http://nedroid.com'
805
    get_first_comic_link = get_div_navfirst_a
806
    get_navi_link = get_link_rel_next
807
    get_url_from_link = join_cls_url_to_href
808 View Code Duplication
809
    @classmethod
810
    def get_comic_info(cls, soup, link):
811
        """Get information about a particular comics."""
812
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
813
        short_url = cls.get_url_from_link(soup.find('link', rel='shortlink'))
814
        num = int(short_url_re.match(short_url).groups()[0])
815
        imgs = soup.find('div', id='comic').find_all('img')
816
        assert len(imgs) == 1, imgs
817
        title = imgs[0]['alt']
818
        title2 = imgs[0]['title']
819
        return {
820
            'short_url': short_url,
821
            'title': title,
822
            'title2': title2,
823
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
824
            'num': num,
825
        }
826
827
828
class Garfield(GenericNavigableComic):
829
    """Class to retrieve Garfield comics."""
830
    # Also on http://www.gocomics.com/garfield
831
    name = 'garfield'
832
    long_name = 'Garfield'
833
    url = 'https://garfield.com'
834
    _categories = ('GARFIELD', )
835
    get_first_comic_link = simulate_first_link
836
    first_url = 'https://garfield.com/comic/1978/06/19'
837
838
    @classmethod
839
    def get_navi_link(cls, last_soup, next_):
840
        """Get link to next or previous comic."""
841
        return last_soup.find('a', class_='comic-arrow-right' if next_ else 'comic-arrow-left')
842
843
    @classmethod
844
    def get_comic_info(cls, soup, link):
845
        """Get information about a particular comics."""
846
        url = cls.get_url_from_link(link)
847
        date_re = re.compile('^%s/comic/([0-9]*)/([0-9]*)/([0-9]*)' % cls.url)
848
        year, month, day = [int(s) for s in date_re.match(url).groups()]
849
        imgs = soup.find('div', class_='comic-display').find_all('img', class_='img-responsive')
850
        return {
851
            'month': month,
852
            'year': year,
853
            'day': day,
854
            'img': [i['src'] for i in imgs],
855
        }
856
857 View Code Duplication
858
class Dilbert(GenericNavigableComic):
859
    """Class to retrieve Dilbert comics."""
860
    # Also on http://www.gocomics.com/dilbert-classics
861
    name = 'dilbert'
862
    long_name = 'Dilbert'
863
    url = 'http://dilbert.com'
864
    get_url_from_link = join_cls_url_to_href
865
    get_first_comic_link = simulate_first_link
866
    first_url = 'http://dilbert.com/strip/1989-04-16'
867
868
    @classmethod
869
    def get_navi_link(cls, last_soup, next_):
870
        """Get link to next or previous comic."""
871
        link = last_soup.find('div', class_='nav-comic nav-right' if next_ else 'nav-comic nav-left')
872
        return link.find('a') if link else None
873
874
    @classmethod
875
    def get_comic_info(cls, soup, link):
876
        """Get information about a particular comics."""
877
        title = soup.find('meta', property='og:title')['content']
878
        imgs = soup.find_all('meta', property='og:image')
879
        desc = soup.find('meta', property='og:description')['content']
880
        date_str = soup.find('meta', property='article:publish_date')['content']
881
        day = string_to_date(date_str, "%B %d, %Y")
882
        author = soup.find('meta', property='article:author')['content']
883
        tags = soup.find('meta', property='article:tag')['content']
884
        return {
885
            'title': title,
886
            'description': desc,
887
            'img': [i['content'] for i in imgs],
888
            'author': author,
889
            'tags': tags,
890
            'day': day.day,
891
            'month': day.month,
892
            'year': day.year
893
        }
894
895
896
class VictimsOfCircumsolar(GenericDeletedComic, GenericNavigableComic):
897
    """Class to retrieve VictimsOfCircumsolar comics."""
898
    # Also on https://victimsofcomics.tumblr.com
899
    name = 'circumsolar'
900
    long_name = 'Victims Of Circumsolar'
901
    url = 'http://www.victimsofcircumsolar.com'
902
    get_navi_link = get_a_navi_comicnavnext_navinext
903
    get_first_comic_link = simulate_first_link
904
    first_url = 'http://www.victimsofcircumsolar.com/comic/modern-addiction'
905
906
    @classmethod
907
    def get_comic_info(cls, soup, link):
908
        """Get information about a particular comics."""
909
        # Date is on the archive page
910
        title = soup.find_all('meta', property='og:title')[-1]['content']
911
        desc = soup.find_all('meta', property='og:description')[-1]['content']
912
        imgs = soup.find('div', id='comic').find_all('img')
913
        assert all(i['title'] == i['alt'] == title for i in imgs)
914
        return {
915
            'title': title,
916
            'description': desc,
917
            'img': [i['src'] for i in imgs],
918
        }
919
920
921
class ThreeWordPhrase(GenericNavigableComic):
922
    """Class to retrieve Three Word Phrase comics."""
923
    # Also on http://www.threewordphrase.tumblr.com
924
    name = 'threeword'
925
    long_name = 'Three Word Phrase'
926
    url = 'http://threewordphrase.com'
927
    get_url_from_link = join_cls_url_to_href
928
929
    @classmethod
930
    def get_first_comic_link(cls):
931
        """Get link to first comics."""
932
        return get_soup_at_url(cls.url).find('img', src='/firstlink.gif').parent
933
934
    @classmethod
935
    def get_navi_link(cls, last_soup, next_):
936
        """Get link to next or previous comic."""
937
        link = last_soup.find('img', src='/nextlink.gif' if next_ else '/prevlink.gif').parent
938
        return None if link.get('href') is None else link
939
940
    @classmethod
941
    def get_comic_info(cls, soup, link):
942
        """Get information about a particular comics."""
943
        title = soup.find('title')
944
        imgs = [img for img in soup.find_all('img')
945
                if not img['src'].endswith(
946
                    ('link.gif', '32.png', 'twpbookad.jpg',
947
                     'merchad.jpg', 'header.gif', 'tipjar.jpg'))]
948
        return {
949
            'title': title.string if title else None,
950
            'title2': '  '.join(img.get('alt') for img in imgs if img.get('alt')),
951
            'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs],
952
        }
953
954
955
class DeadlyPanel(GenericComicNotWorking, GenericNavigableComic):  # Not working on my machine
956
    """Class to retrieve Deadly Panel comics."""
957
    # Also on https://tapastic.com/series/deadlypanel
958
    # Also on https://deadlypanel.tumblr.com
959
    name = 'deadly'
960
    long_name = 'Deadly Panel'
961
    url = 'http://www.deadlypanel.com'
962
    get_first_comic_link = get_a_navi_navifirst
963
    get_navi_link = get_a_navi_comicnavnext_navinext
964
965
    @classmethod
966
    def get_comic_info(cls, soup, link):
967
        """Get information about a particular comics."""
968
        imgs = soup.find('div', id='comic').find_all('img')
969
        assert all(i['alt'] == i['title'] for i in imgs)
970
        return {
971
            'img': [i['src'] for i in imgs],
972
        }
973
974 View Code Duplication
975
class TheGentlemanArmchair(GenericNavigableComic):
976
    """Class to retrieve The Gentleman Armchair comics."""
977
    name = 'gentlemanarmchair'
978
    long_name = 'The Gentleman Armchair'
979
    url = 'http://thegentlemansarmchair.com'
980
    get_first_comic_link = get_a_navi_navifirst
981
    get_navi_link = get_link_rel_next
982
983
    @classmethod
984
    def get_comic_info(cls, soup, link):
985
        """Get information about a particular comics."""
986
        title = soup.find('h2', class_='post-title').string
987
        author = soup.find("span", class_="post-author").find("a").string
988
        date_str = soup.find('span', class_='post-date').string
989
        day = string_to_date(date_str, "%B %d, %Y")
990
        imgs = soup.find('div', id='comic').find_all('img')
991
        return {
992
            'img': [i['src'] for i in imgs],
993
            'title': title,
994
            'author': author,
995
            'month': day.month,
996
            'year': day.year,
997
            'day': day.day,
998
        }
999
1000
1001
class ImogenQuest(GenericNavigableComic):
1002
    """Class to retrieve Imogen Quest comics."""
1003
    # Also on http://imoquest.tumblr.com
1004
    name = 'imogen'
1005
    long_name = 'Imogen Quest'
1006
    url = 'http://imogenquest.net'
1007
    get_first_comic_link = get_div_navfirst_a
1008
    get_navi_link = get_a_rel_next
1009
1010
    @classmethod
1011
    def get_comic_info(cls, soup, link):
1012
        """Get information about a particular comics."""
1013
        title = soup.find('h2', class_='post-title').string
1014
        author = soup.find("span", class_="post-author").find("a").string
1015
        date_str = soup.find('span', class_='post-date').string
1016
        day = string_to_date(date_str, '%B %d, %Y')
1017
        imgs = soup.find('div', class_='comicpane').find_all('img')
1018
        assert all(i['alt'] == i['title'] for i in imgs)
1019
        title2 = imgs[0]['title']
1020
        return {
1021
            'day': day.day,
1022
            'month': day.month,
1023
            'year': day.year,
1024
            'img': [i['src'] for i in imgs],
1025
            'title': title,
1026
            'title2': title2,
1027
            'author': author,
1028
        }
1029
1030
1031
class MyExtraLife(GenericNavigableComic):
1032
    """Class to retrieve My Extra Life comics."""
1033
    name = 'extralife'
1034
    long_name = 'My Extra Life'
1035
    url = 'http://www.myextralife.com'
1036
    get_navi_link = get_link_rel_next
1037
1038
    @classmethod
1039
    def get_first_comic_link(cls):
1040
        """Get link to first comics."""
1041
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
1042 View Code Duplication
1043
    @classmethod
1044
    def get_comic_info(cls, soup, link):
1045
        """Get information about a particular comics."""
1046
        title = soup.find("h1", class_="comic_title").string
1047
        date_str = soup.find("span", class_="comic_date").string
1048
        day = string_to_date(date_str, "%B %d, %Y")
1049
        imgs = soup.find_all("img", class_="comic")
1050
        assert all(i['alt'] == i['title'] == title for i in imgs)
1051
        return {
1052
            'title': title,
1053
            'img': [i['src'] for i in imgs if i["src"]],
1054
            'day': day.day,
1055
            'month': day.month,
1056
            'year': day.year
1057
        }
1058
1059
1060
class SaturdayMorningBreakfastCereal(GenericNavigableComic):
1061
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
1062
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
1063
    # Also on http://smbc-comics.tumblr.com
1064
    name = 'smbc'
1065
    long_name = 'Saturday Morning Breakfast Cereal'
1066
    url = 'http://www.smbc-comics.com'
1067
    _categories = ('SMBC', )
1068
    get_navi_link = get_a_rel_next
1069
1070
    @classmethod
1071
    def get_first_comic_link(cls):
1072
        """Get link to first comics."""
1073
        return get_soup_at_url(cls.url).find('a', rel='start')
1074
1075
    @classmethod
1076
    def get_comic_info(cls, soup, link):
1077
        """Get information about a particular comics."""
1078
        image1 = soup.find('img', id='cc-comic')
1079
        image_url1 = image1['src']
1080
        aftercomic = soup.find('div', id='aftercomic')
1081
        image_url2 = aftercomic.find('img')['src'] if aftercomic else ''
1082
        imgs = [image_url1] + ([image_url2] if image_url2 else [])
1083
        date_str = soup.find('div', class_='cc-publishtime').contents[0]
1084
        day = string_to_date(date_str, "%B %d, %Y")
1085
        return {
1086
            'title': image1['title'],
1087
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i)) for i in imgs],
1088
            'day': day.day,
1089
            'month': day.month,
1090
            'year': day.year
1091
        }
1092
1093 View Code Duplication
1094
class PerryBibleFellowship(GenericListableComic):  # Is now navigable too
1095
    """Class to retrieve Perry Bible Fellowship comics."""
1096
    name = 'pbf'
1097
    long_name = 'Perry Bible Fellowship'
1098
    url = 'http://pbfcomics.com'
1099
    get_url_from_archive_element = join_cls_url_to_href
1100
1101
    @classmethod
1102
    def get_archive_elements(cls):
1103
        soup = get_soup_at_url(cls.url)
1104
        thumbnails = soup.find('div', id='all_thumbnails')
1105
        return reversed(thumbnails.find_all('a'))
1106
1107
    @classmethod
1108
    def get_comic_info(cls, soup, link):
1109
        """Get information about a particular comics."""
1110
        name = soup.find('meta', property='og:title')['content']
1111
        imgs = soup.find_all('meta', property='og:image')
1112
        assert len(imgs) == 1, imgs
1113
        return {
1114
            'name': name,
1115
            'img': [i['content'] for i in imgs],
1116
        }
1117
1118
1119
class Mercworks(GenericDeletedComic):  # Moved to Webtoons
1120
    """Class to retrieve Mercworks comics."""
1121
    # Also on http://mercworks.tumblr.com
1122
    # Also on http://www.webtoons.com/en/comedy/mercworks/list?title_no=426
1123
    # Also on https://tapastic.com/series/MercWorks
1124
    name = 'mercworks'
1125
    long_name = 'Mercworks'
1126
    url = 'http://mercworks.net'
1127
    _categories = ('MERCWORKS', )
1128
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1129
    get_navi_link = get_link_rel_next
1130
1131
    @classmethod
1132
    def get_comic_info(cls, soup, link):
1133
        """Get information about a particular comics."""
1134
        title = soup.find('meta', property='og:title')['content']
1135
        metadesc = soup.find('meta', property='og:description')
1136
        desc = metadesc['content'] if metadesc else ""
1137
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
1138
        day = string_to_date(date_str, "%Y-%m-%d")
1139
        imgs = soup.find_all('meta', property='og:image')
1140
        return {
1141
            'img': [i['content'] for i in imgs],
1142
            'title': title,
1143
            'desc': desc,
1144
            'day': day.day,
1145
            'month': day.month,
1146
            'year': day.year
1147
        }
1148
1149
1150
class BerkeleyMews(GenericListableComic):
1151
    """Class to retrieve Berkeley Mews comics."""
1152
    # Also on http://mews.tumblr.com
1153
    # Also on http://www.gocomics.com/berkeley-mews
1154
    name = 'berkeley'
1155
    long_name = 'Berkeley Mews'
1156
    url = 'http://www.berkeleymews.com'
1157
    _categories = ('BERKELEY', )
1158
    get_url_from_archive_element = get_href
1159
    comic_num_re = re.compile('%s/\\?p=([0-9]*)$' % url)
1160
1161
    @classmethod
1162
    def get_archive_elements(cls):
1163
        archive_url = urljoin_wrapper(cls.url, "?page_id=2")
1164
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_num_re))
1165
1166
    @classmethod
1167
    def get_comic_info(cls, soup, link):
1168
        """Get information about a particular comics."""
1169
        comic_date_re = re.compile('.*/([0-9]*)-([0-9]*)-([0-9]*)-.*')
1170
        url = cls.get_url_from_archive_element(link)
1171
        num = int(cls.comic_num_re.match(url).groups()[0])
1172
        img = soup.find('div', id='comic').find('img')
1173
        assert all(i['alt'] == i['title'] for i in [img])
1174
        title2 = img['title']
1175
        img_url = img['src']
1176
        year, month, day = [int(s) for s in comic_date_re.match(img_url).groups()]
1177
        return {
1178
            'num': num,
1179
            'title': link.string,
1180
            'title2': title2,
1181
            'img': [img_url],
1182
            'year': year,
1183
            'month': month,
1184
            'day': day,
1185
        }
1186
1187
1188
class GenericBouletCorp(GenericNavigableComic):
1189
    """Generic class to retrieve BouletCorp comics in different languages."""
1190
    # Also on https://bouletcorp.tumblr.com
1191
    _categories = ('BOULET', )
1192
    get_navi_link = get_link_rel_next
1193
1194
    @classmethod
1195
    def get_first_comic_link(cls):
1196
        """Get link to first comics."""
1197
        return get_soup_at_url(cls.url).find('div', id='centered_nav').find_all('a')[0]
1198
1199
    @classmethod
1200
    def get_comic_info(cls, soup, link):
1201
        """Get information about a particular comics."""
1202
        url = cls.get_url_from_link(link)
1203
        date_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/' % cls.url)
1204
        year, month, day = [int(s) for s in date_re.match(url).groups()]
1205
        imgs = soup.find('div', id='notes').find('div', class_='storycontent').find_all('img')
1206
        texts = '  '.join(t for t in (i.get('title') for i in imgs) if t)
1207
        title = soup.find('title').string
1208
        return {
1209
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs if i.get('src') is not None],
1210
            'title': title,
1211
            'texts': texts,
1212
            'year': year,
1213
            'month': month,
1214
            'day': day,
1215
        }
1216
1217
1218
class BouletCorp(GenericBouletCorp):
1219
    """Class to retrieve BouletCorp comics."""
1220
    name = 'boulet'
1221
    long_name = 'Boulet Corp'
1222
    url = 'http://www.bouletcorp.com'
1223
    _categories = ('FRANCAIS', )
1224
1225
1226
class BouletCorpEn(GenericBouletCorp):
1227
    """Class to retrieve EnglishBouletCorp comics."""
1228
    name = 'boulet_en'
1229
    long_name = 'Boulet Corp English'
1230
    url = 'http://english.bouletcorp.com'
1231
1232
1233
class AmazingSuperPowers(GenericNavigableComic):
1234
    """Class to retrieve Amazing Super Powers comics."""
1235
    name = 'asp'
1236
    long_name = 'Amazing Super Powers'
1237
    url = 'http://www.amazingsuperpowers.com'
1238
    get_first_comic_link = get_a_navi_navifirst
1239
    get_navi_link = get_a_navi_navinext
1240
1241
    @classmethod
1242
    def get_comic_info(cls, soup, link):
1243
        """Get information about a particular comics."""
1244
        author = soup.find("span", class_="post-author").find("a").string
1245
        date_str = soup.find('span', class_='post-date').string
1246
        day = string_to_date(date_str, "%B %d, %Y")
1247
        imgs = soup.find('div', id='comic').find_all('img')
1248
        title = ' '.join(i['title'] for i in imgs)
1249
        assert all(i['alt'] == i['title'] for i in imgs)
1250
        return {
1251
            'title': title,
1252
            'author': author,
1253
            'img': [img['src'] for img in imgs],
1254
            'day': day.day,
1255
            'month': day.month,
1256
            'year': day.year
1257
        }
1258
1259
1260
class ToonHole(GenericNavigableComic):
1261
    """Class to retrieve Toon Holes comics."""
1262
    # Also on http://tapastic.com/series/TOONHOLE
1263
    name = 'toonhole'
1264
    long_name = 'Toon Hole'
1265
    url = 'http://www.toonhole.com'
1266
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1267
    get_navi_link = get_a_comicnavbase_comicnavnext
1268
1269
    @classmethod
1270
    def get_comic_info(cls, soup, link):
1271
        """Get information about a particular comics."""
1272
        date_str = soup.find('div', class_='entry-meta').contents[0].strip()
1273
        day = string_to_date(date_str, "%B %d, %Y")
1274
        imgs = soup.find('div', id='comic').find_all('img')
1275
        if imgs:
1276
            img = imgs[0]
1277
            title = img['alt']
1278
            assert img['title'] == title
1279
        else:
1280
            title = ""
1281
        return {
1282
            'title': title,
1283
            'month': day.month,
1284
            'year': day.year,
1285
            'day': day.day,
1286
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs],
1287
        }
1288
1289
1290
class Channelate(GenericNavigableComic):
1291
    """Class to retrieve Channelate comics."""
1292
    name = 'channelate'
1293
    long_name = 'Channelate'
1294
    url = 'http://www.channelate.com'
1295
    get_first_comic_link = get_div_navfirst_a
1296
    get_navi_link = get_link_rel_next
1297
    get_url_from_link = join_cls_url_to_href
1298
1299
    @classmethod
1300
    def get_comic_info(cls, soup, link):
1301
        """Get information about a particular comics."""
1302
        author = soup.find("span", class_="post-author").find("a").string
1303
        date_str = soup.find('span', class_='post-date').string
1304
        day = string_to_date(date_str, '%Y/%m/%d')
1305
        title = soup.find('meta', property='og:title')['content']
1306
        post = soup.find('div', id='comic')
1307
        imgs = post.find_all('img') if post else []
1308
        extra_url = None
1309
        extra_div = soup.find('div', id='extrapanelbutton')
1310
        if extra_div:
1311
            extra_url = extra_div.find('a')['href']
1312
            extra_soup = get_soup_at_url(extra_url)
1313
            extra_imgs = extra_soup.find_all('img', class_='extrapanelimage')
1314
            imgs.extend(extra_imgs)
1315
        return {
1316
            'url_extra': extra_url,
1317
            'title': title,
1318
            'author': author,
1319
            'month': day.month,
1320
            'year': day.year,
1321
            'day': day.day,
1322
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1323
        }
1324
1325
1326
class CyanideAndHappiness(GenericNavigableComic):
1327
    """Class to retrieve Cyanide And Happiness comics."""
1328
    name = 'cyanide'
1329
    long_name = 'Cyanide and Happiness'
1330
    url = 'http://explosm.net'
1331
    _categories = ('NSFW', )
1332
    get_url_from_link = join_cls_url_to_href
1333
1334
    @classmethod
1335
    def get_first_comic_link(cls):
1336
        """Get link to first comics."""
1337
        return get_soup_at_url(cls.url).find('a', title='Oldest comic')
1338
1339
    @classmethod
1340
    def get_navi_link(cls, last_soup, next_):
1341
        """Get link to next or previous comic."""
1342
        link = last_soup.find('a', class_='nav-next' if next_ else 'nav-previous')
1343
        return None if link.get('href') is None else link
1344
1345
    @classmethod
1346
    def get_comic_info(cls, soup, link):
1347
        """Get information about a particular comics."""
1348
        url2 = soup.find('meta', property='og:url')['content']
1349
        num = int(url2.split('/')[-2])
1350
        date_str, _, author = soup.find('div', id='comic-author').text.strip().partition('\nby ')
1351
        day = string_to_date(date_str, '%Y.%m.%d')
1352
        imgs = soup.find_all('img', id='main-comic')
1353
        return {
1354
            'num': num,
1355
            'author': author,
1356
            'month': day.month,
1357
            'year': day.year,
1358
            'day': day.day,
1359
            'prefix': '%d-' % num,
1360
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i['src'])) for i in imgs]
1361
        }
1362
1363
1364
class MrLovenstein(GenericComic):
1365
    """Class to retrieve Mr Lovenstein comics."""
1366
    # Also on https://tapastic.com/series/MrLovenstein
1367
    name = 'mrlovenstein'
1368
    long_name = 'Mr. Lovenstein'
1369
    url = 'http://www.mrlovenstein.com'
1370
1371
    @classmethod
1372
    def get_next_comic(cls, last_comic):
1373
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1374
        # TODO: more info from http://www.mrlovenstein.com/archive
1375
        comic_num_re = re.compile('^/comic/([0-9]*)$')
1376
        nums = [int(comic_num_re.match(link['href']).groups()[0])
1377
                for link in get_soup_at_url(cls.url).find_all('a', href=comic_num_re)]
1378
        first, last = min(nums), max(nums)
1379
        if last_comic:
1380
            first = last_comic['num'] + 1
1381
        for num in range(first, last + 1):
1382
            url = urljoin_wrapper(cls.url, '/comic/%d' % num)
1383
            soup = get_soup_at_url(url)
1384
            imgs = list(
1385
                reversed(soup.find_all('img', src=re.compile('^/images/comics/'))))
1386
            description = soup.find('meta', attrs={'name': 'description'})['content']
1387
            yield {
1388
                'url': url,
1389
                'num': num,
1390
                'texts': '  '.join(t for t in (i.get('title') for i in imgs) if t),
1391
                'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1392
                'description': description,
1393
            }
1394
1395
1396
class DinosaurComics(GenericListableComic):
1397
    """Class to retrieve Dinosaur Comics comics."""
1398
    name = 'dinosaur'
1399
    long_name = 'Dinosaur Comics'
1400
    url = 'http://www.qwantz.com'
1401
    get_url_from_archive_element = get_href
1402
    comic_link_re = re.compile('^%s/index.php\\?comic=([0-9]*)$' % url)
1403
1404
    @classmethod
1405
    def get_archive_elements(cls):
1406
        archive_url = urljoin_wrapper(cls.url, 'archive.php')
1407
        # first link is random -> skip it
1408
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re)[1:])
1409
1410
    @classmethod
1411
    def get_comic_info(cls, soup, link):
1412
        """Get information about a particular comics."""
1413
        url = cls.get_url_from_archive_element(link)
1414
        num = int(cls.comic_link_re.match(url).groups()[0])
1415
        date_str = link.string
1416
        text = link.next_sibling.string
1417
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1418
        comic_img_re = re.compile('^%s/comics/' % cls.url)
1419
        img = soup.find('img', src=comic_img_re)
1420
        return {
1421
            'month': day.month,
1422
            'year': day.year,
1423
            'day': day.day,
1424
            'img': [img.get('src')],
1425
            'title': img.get('title'),
1426
            'text': text,
1427
            'num': num,
1428
        }
1429
1430
1431
class ButterSafe(GenericListableComic):
1432
    """Class to retrieve Butter Safe comics."""
1433
    name = 'butter'
1434
    long_name = 'ButterSafe'
1435
    url = 'http://buttersafe.com'
1436
    get_url_from_archive_element = get_href
1437
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)
1438
1439
    @classmethod
1440
    def get_archive_elements(cls):
1441
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1442
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
1443
1444
    @classmethod
1445
    def get_comic_info(cls, soup, link):
1446
        """Get information about a particular comics."""
1447
        url = cls.get_url_from_archive_element(link)
1448
        title = link.string
1449
        year, month, day = [int(s) for s in cls.comic_link_re.match(url).groups()]
1450
        img = soup.find('div', id='comic').find('img')
1451
        assert img['alt'] == title
1452
        return {
1453
            'title': title,
1454
            'day': day,
1455
            'month': month,
1456
            'year': year,
1457
            'img': [img['src']],
1458
        }
1459
1460
1461
class CalvinAndHobbes(GenericComic):
1462
    """Class to retrieve Calvin and Hobbes comics."""
1463
    # Also on http://www.gocomics.com/calvinandhobbes/
1464
    name = 'calvin'
1465
    long_name = 'Calvin and Hobbes'
1466
    # This is not through any official webpage but eh...
1467
    url = 'http://marcel-oehler.marcellosendos.ch/comics/ch/'
1468
1469
    @classmethod
1470
    def get_next_comic(cls, last_comic):
1471
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1472
        last_date = get_date_for_comic(
1473
            last_comic) if last_comic else date(1985, 11, 1)
1474
        link_re = re.compile('^([0-9]*)/([0-9]*)/')
1475
        img_re = re.compile('')
1476
        for link in get_soup_at_url(cls.url).find_all('a', href=link_re):
1477
            url = link['href']
1478
            year, month = link_re.match(url).groups()
1479
            if date(int(year), int(month), 1) + timedelta(days=31) >= last_date:
1480
                img_re = re.compile('^%s%s([0-9]*)' % (year, month))
1481
                month_url = urljoin_wrapper(cls.url, url)
1482
                for img in get_soup_at_url(month_url).find_all('img', src=img_re):
1483
                    img_src = img['src']
1484
                    day = int(img_re.match(img_src).groups()[0])
1485
                    comic_date = date(int(year), int(month), day)
1486
                    if comic_date > last_date:
1487
                        yield {
1488
                            'url': month_url,
1489
                            'year': int(year),
1490
                            'month': int(month),
1491
                            'day': int(day),
1492
                            'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)],
1493
                        }
1494
                        last_date = comic_date
1495
1496
1497
class AbstruseGoose(GenericListableComic):
1498
    """Class to retrieve AbstruseGoose Comics."""
1499
    name = 'abstruse'
1500
    long_name = 'Abstruse Goose'
1501
    url = 'http://abstrusegoose.com'
1502
    get_url_from_archive_element = get_href
1503
    comic_url_re = re.compile('^%s/([0-9]*)$' % url)
1504
    comic_img_re = re.compile('^%s/strips/.*' % url)
1505
1506
    @classmethod
1507
    def get_archive_elements(cls):
1508
        archive_url = urljoin_wrapper(cls.url, 'archive')
1509
        return get_soup_at_url(archive_url).find_all('a', href=cls.comic_url_re)
1510
1511
    @classmethod
1512
    def get_comic_info(cls, soup, archive_elt):
1513
        comic_url = cls.get_url_from_archive_element(archive_elt)
1514
        num = int(cls.comic_url_re.match(comic_url).groups()[0])
1515
        imgs = soup.find_all('img', src=cls.comic_img_re)
1516
        return {
1517
            'num': num,
1518
            'title': archive_elt.string,
1519
            'img': [i['src'] for i in imgs],
1520
        }
1521
1522
1523
class PhDComics(GenericNavigableComic):
1524
    """Class to retrieve PHD Comics."""
1525
    name = 'phd'
1526
    long_name = 'PhD Comics'
1527
    url = 'http://phdcomics.com/comics/archive.php'
1528
1529
    @classmethod
1530
    def get_first_comic_link(cls):
1531
        """Get link to first comics."""
1532
        soup = get_soup_at_url(cls.url)
1533
        img = soup.find('img', src='http://phdcomics.com/comics/images/first_button.gif')
1534
        return None if img is None else img.parent
1535
1536
    @classmethod
1537
    def get_navi_link(cls, last_soup, next_):
1538
        """Get link to next or previous comic."""
1539
        url = 'http://phdcomics.com/comics/images/%s_button.gif' % ('next' if next_ else 'prev')
1540
        img = last_soup.find('img', src=url)
1541
        return None if img is None else img.parent
1542
1543
    @classmethod
1544
    def get_comic_info(cls, soup, link):
1545
        """Get information about a particular comics."""
1546
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
1547
        imgs = soup.find_all('meta', property='og:image')
1548
        return {
1549
            'img': [i['content'] for i in imgs],
1550
            'title': title,
1551
        }
1552
1553
1554
class Quarktees(GenericNavigableComic):
1555
    """Class to retrieve the Quarktees comics."""
1556
    name = 'quarktees'
1557
    long_name = 'Quarktees'
1558
    url = 'http://www.quarktees.com/blogs/news'
1559
    get_url_from_link = join_cls_url_to_href
1560
    get_first_comic_link = simulate_first_link
1561
    first_url = 'http://www.quarktees.com/blogs/news/12486621-coming-soon'
1562
1563
    @classmethod
1564
    def get_navi_link(cls, last_soup, next_):
1565
        """Get link to next or previous comic."""
1566
        return last_soup.find('a', id='article-next' if next_ else 'article-prev')
1567
1568
    @classmethod
1569
    def get_comic_info(cls, soup, link):
1570
        """Get information about a particular comics."""
1571
        title = soup.find('meta', property='og:title')['content']
1572
        article = soup.find('div', class_='single-article')
1573
        imgs = article.find_all('img')
1574
        return {
1575
            'title': title,
1576
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1577
        }
1578
1579
1580
class OverCompensating(GenericNavigableComic):
1581
    """Class to retrieve the Over Compensating comics."""
1582
    name = 'compensating'
1583
    long_name = 'Over Compensating'
1584
    url = 'http://www.overcompensating.com'
1585
    get_url_from_link = join_cls_url_to_href
1586
1587
    @classmethod
1588
    def get_first_comic_link(cls):
1589
        """Get link to first comics."""
1590
        return get_soup_at_url(cls.url).find('a', href=re.compile('comic=1$'))
1591
1592
    @classmethod
1593
    def get_navi_link(cls, last_soup, next_):
1594
        """Get link to next or previous comic."""
1595
        return last_soup.find('a', title='next comic' if next_ else 'go back already')
1596
1597
    @classmethod
1598
    def get_comic_info(cls, soup, link):
1599
        """Get information about a particular comics."""
1600
        img_src_re = re.compile('^/oc/comics/.*')
1601
        comic_num_re = re.compile('.*comic=([0-9]*)$')
1602
        comic_url = cls.get_url_from_link(link)
1603
        num = int(comic_num_re.match(comic_url).groups()[0])
1604
        img = soup.find('img', src=img_src_re)
1605
        return {
1606
            'num': num,
1607
            'img': [urljoin_wrapper(comic_url, img['src'])],
1608
            'title': img.get('title')
1609
        }
1610
1611
1612
class Oglaf(GenericNavigableComic):
1613
    """Class to retrieve Oglaf comics."""
1614
    name = 'oglaf'
1615
    long_name = 'Oglaf [NSFW]'
1616
    url = 'http://oglaf.com'
1617
    _categories = ('NSFW', )
1618
    get_url_from_link = join_cls_url_to_href
1619
1620
    @classmethod
1621
    def get_first_comic_link(cls):
1622
        """Get link to first comics."""
1623
        return get_soup_at_url(cls.url).find("div", id="st").parent
1624
1625
    @classmethod
1626
    def get_navi_link(cls, last_soup, next_):
1627
        """Get link to next or previous comic."""
1628
        div = last_soup.find("div", id="nx" if next_ else "pvs")
1629
        return div.parent if div else None
1630
1631
    @classmethod
1632
    def get_comic_info(cls, soup, link):
1633
        """Get information about a particular comics."""
1634
        title = soup.find('title').string
1635
        title_imgs = soup.find('div', id='tt').find_all('img')
1636
        assert len(title_imgs) == 1, title_imgs
1637
        strip_imgs = soup.find_all('img', id='strip')
1638
        assert len(strip_imgs) == 1, strip_imgs
1639
        imgs = title_imgs + strip_imgs
1640
        desc = ' '.join(i['title'] for i in imgs)
1641
        return {
1642
            'title': title,
1643
            'img': [i['src'] for i in imgs],
1644
            'description': desc,
1645
        }
1646
1647
1648
class ScandinaviaAndTheWorld(GenericNavigableComic):
1649
    """Class to retrieve Scandinavia And The World comics."""
1650
    name = 'satw'
1651
    long_name = 'Scandinavia And The World'
1652
    url = 'http://satwcomic.com'
1653
    get_first_comic_link = simulate_first_link
1654
    first_url = 'http://satwcomic.com/sweden-denmark-and-norway'
1655
1656
    @classmethod
1657
    def get_navi_link(cls, last_soup, next_):
1658
        """Get link to next or previous comic."""
1659
        return last_soup.find('a', accesskey='n' if next_ else 'p')
1660
1661
    @classmethod
1662
    def get_comic_info(cls, soup, link):
1663
        """Get information about a particular comics."""
1664
        title = soup.find('meta', attrs={'name': 'twitter:label1'})['content']
1665
        desc = soup.find('meta', property='og:description')['content']
1666
        imgs = soup.find_all('img', itemprop="image")
1667
        return {
1668
            'title': title,
1669
            'description': desc,
1670
            'img': [i['src'] for i in imgs],
1671
        }
1672
1673
1674
class SomethingOfThatIlk(GenericDeletedComic):
1675
    """Class to retrieve the Something Of That Ilk comics."""
1676
    name = 'somethingofthatilk'
1677
    long_name = 'Something Of That Ilk'
1678
    url = 'http://www.somethingofthatilk.com'
1679
1680
1681
class MonkeyUser(GenericNavigableComic):
1682
    """Class to retrieve Monkey User comics."""
1683
    name = 'monkeyuser'
1684
    long_name = 'Monkey User'
1685
    url = 'http://www.monkeyuser.com'
1686
    get_first_comic_link = simulate_first_link
1687
    first_url = 'http://www.monkeyuser.com/2016/project-lifecycle/'
1688
    get_url_from_link = join_cls_url_to_href
1689
1690
    @classmethod
1691
    def get_navi_link(cls, last_soup, next_):
1692
        """Get link to next or previous comic."""
1693
        div = last_soup.find('div', title='next' if next_ else 'previous')
1694
        return None if div is None else div.find('a')
1695
1696
    @classmethod
1697
    def get_comic_info(cls, soup, link):
1698
        """Get information about a particular comics."""
1699
        title = soup.find('meta', property='og:title')['content']
1700
        desc = soup.find('meta', property='og:description')['content']
1701
        imgs = soup.find_all('meta', property='og:image')
1702
        date_str = soup.find('span', class_='post-date').find('time').string
1703
        day = string_to_date(date_str, "%d %b %Y")
1704
        return {
1705
            'month': day.month,
1706
            'year': day.year,
1707
            'day': day.day,
1708
            'img': [i['content'] for i in imgs],
1709
            'title': title,
1710
            'description': desc,
1711
        }
1712
1713
1714
class InfiniteMonkeyBusiness(GenericNavigableComic):
1715
    """Class to retrieve InfiniteMonkeyBusiness comics."""
1716
    name = 'monkey'
1717
    long_name = 'Infinite Monkey Business'
1718
    url = 'http://infinitemonkeybusiness.net'
1719
    get_navi_link = get_a_navi_comicnavnext_navinext
1720
    get_first_comic_link = simulate_first_link
1721
    first_url = 'http://infinitemonkeybusiness.net/comic/pillory/'
1722
1723
    @classmethod
1724
    def get_comic_info(cls, soup, link):
1725
        """Get information about a particular comics."""
1726
        title = soup.find('meta', property='og:title')['content']
1727
        imgs = soup.find('div', id='comic').find_all('img')
1728
        return {
1729
            'title': title,
1730
            'img': [i['src'] for i in imgs],
1731
        }
1732
1733
1734
class Wondermark(GenericListableComic):
1735
    """Class to retrieve the Wondermark comics."""
1736
    name = 'wondermark'
1737
    long_name = 'Wondermark'
1738
    url = 'http://wondermark.com'
1739
    get_url_from_archive_element = get_href
1740
1741
    @classmethod
1742
    def get_archive_elements(cls):
1743
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1744
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1745
1746
    @classmethod
1747
    def get_comic_info(cls, soup, link):
1748
        """Get information about a particular comics."""
1749
        date_str = soup.find('div', class_='postdate').find('em').string
1750
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1751
        div = soup.find('div', id='comic')
1752
        if div:
1753
            img = div.find('img')
1754
            img_src = [img['src']]
1755
            alt = img['alt']
1756
            assert alt == img['title']
1757
            title = soup.find('meta', property='og:title')['content']
1758
        else:
1759
            img_src = []
1760
            alt = ''
1761
            title = ''
1762
        return {
1763
            'month': day.month,
1764
            'year': day.year,
1765
            'day': day.day,
1766
            'img': img_src,
1767
            'title': title,
1768
            'alt': alt,
1769
            'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')),
1770
        }
1771
1772
1773
class WarehouseComic(GenericNavigableComic):
1774
    """Class to retrieve Warehouse Comic comics."""
1775
    name = 'warehouse'
1776
    long_name = 'Warehouse Comic'
1777
    url = 'http://warehousecomic.com'
1778
    get_first_comic_link = get_a_navi_navifirst
1779
    get_navi_link = get_link_rel_next
1780
1781
    @classmethod
1782
    def get_comic_info(cls, soup, link):
1783
        """Get information about a particular comics."""
1784
        title = soup.find('h2', class_='post-title').string
1785
        date_str = soup.find('span', class_='post-date').string
1786
        day = string_to_date(date_str, "%B %d, %Y")
1787
        imgs = soup.find('div', id='comic').find_all('img')
1788
        return {
1789
            'img': [i['src'] for i in imgs],
1790
            'title': title,
1791
            'day': day.day,
1792
            'month': day.month,
1793
            'year': day.year,
1794
        }
1795
1796
1797
class JustSayEh(GenericNavigableComic):
1798
    """Class to retrieve Just Say Eh comics."""
1799
    # Also on http//tapastic.com/series/Just-Say-Eh
1800
    name = 'justsayeh'
1801
    long_name = 'Just Say Eh'
1802
    url = 'http://www.justsayeh.com'
1803
    get_first_comic_link = get_a_navi_navifirst
1804
    get_navi_link = get_a_navi_comicnavnext_navinext
1805
1806
    @classmethod
1807
    def get_comic_info(cls, soup, link):
1808
        """Get information about a particular comics."""
1809
        title = soup.find('h2', class_='post-title').string
1810
        imgs = soup.find("div", id="comic").find_all("img")
1811
        assert all(i['alt'] == i['title'] for i in imgs)
1812
        alt = imgs[0]['alt']
1813
        return {
1814
            'img': [i['src'] for i in imgs],
1815
            'title': title,
1816
            'alt': alt,
1817
        }
1818
1819
1820
class MouseBearComedy(GenericComicNotWorking):  # Website has changed
1821
    """Class to retrieve Mouse Bear Comedy comics."""
1822
    # Also on http://mousebearcomedy.tumblr.com
1823
    name = 'mousebear'
1824
    long_name = 'Mouse Bear Comedy'
1825
    url = 'http://www.mousebearcomedy.com'
1826
    get_first_comic_link = get_a_navi_navifirst
1827
    get_navi_link = get_a_navi_comicnavnext_navinext
1828
1829
    @classmethod
1830
    def get_comic_info(cls, soup, link):
1831
        """Get information about a particular comics."""
1832
        title = soup.find('h2', class_='post-title').string
1833
        author = soup.find("span", class_="post-author").find("a").string
1834
        date_str = soup.find("span", class_="post-date").string
1835
        day = string_to_date(date_str, '%B %d, %Y')
1836
        imgs = soup.find("div", id="comic").find_all("img")
1837
        assert all(i['alt'] == i['title'] == title for i in imgs)
1838
        return {
1839
            'day': day.day,
1840
            'month': day.month,
1841
            'year': day.year,
1842
            'img': [i['src'] for i in imgs],
1843
            'title': title,
1844
            'author': author,
1845
        }
1846
1847 View Code Duplication
1848
class BigFootJustice(GenericNavigableComic):
1849
    """Class to retrieve Big Foot Justice comics."""
1850
    # Also on http://tapastic.com/series/bigfoot-justice
1851
    name = 'bigfoot'
1852
    long_name = 'Big Foot Justice'
1853
    url = 'http://bigfootjustice.com'
1854
    get_first_comic_link = get_a_navi_navifirst
1855
    get_navi_link = get_a_navi_comicnavnext_navinext
1856
1857
    @classmethod
1858
    def get_comic_info(cls, soup, link):
1859
        """Get information about a particular comics."""
1860
        imgs = soup.find('div', id='comic').find_all('img')
1861
        assert all(i['title'] == i['alt'] for i in imgs)
1862
        title = ' '.join(i['title'] for i in imgs)
1863
        return {
1864
            'img': [i['src'] for i in imgs],
1865
            'title': title,
1866
        }
1867
1868
1869
class RespawnComic(GenericNavigableComic):
1870
    """Class to retrieve Respawn Comic."""
1871
    # Also on https://respawncomic.tumblr.com
1872
    name = 'respawn'
1873
    long_name = 'Respawn Comic'
1874
    url = 'http://respawncomic.com '
1875
    _categories = ('RESPAWN', )
1876
    get_navi_link = get_a_rel_next
1877
    get_first_comic_link = simulate_first_link
1878
    first_url = 'http://respawncomic.com/comic/c0001/'
1879 View Code Duplication
1880
    @classmethod
1881
    def get_comic_info(cls, soup, link):
1882
        """Get information about a particular comics."""
1883
        title = soup.find('meta', property='og:title')['content']
1884
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1885
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1886
        date_str = date_str[:10]
1887
        day = string_to_date(date_str, "%Y-%m-%d")
1888
        imgs = soup.find_all('meta', property='og:image')
1889
        skip_imgs = {
1890
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1891
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1892
        }
1893
        return {
1894
            'title': title,
1895
            'author': author,
1896
            'day': day.day,
1897
            'month': day.month,
1898
            'year': day.year,
1899
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1900
        }
1901
1902
1903
class SafelyEndangered(GenericNavigableComic):
1904
    """Class to retrieve Safely Endangered comics."""
1905
    # Also on http://tumblr.safelyendangered.com
1906
    name = 'endangered'
1907
    long_name = 'Safely Endangered'
1908
    url = 'http://www.safelyendangered.com'
1909
    get_navi_link = get_link_rel_next
1910
    get_first_comic_link = simulate_first_link
1911
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1912
1913
    @classmethod
1914
    def get_comic_info(cls, soup, link):
1915
        """Get information about a particular comics."""
1916
        title = soup.find('h2', class_='post-title').string
1917
        date_str = soup.find('span', class_='post-date').string
1918
        day = string_to_date(date_str, '%B %d, %Y')
1919
        imgs = soup.find('div', id='comic').find_all('img')
1920
        alt = imgs[0]['alt']
1921
        assert all(i['alt'] == i['title'] for i in imgs)
1922
        return {
1923
            'day': day.day,
1924
            'month': day.month,
1925
            'year': day.year,
1926
            'img': [i['src'] for i in imgs],
1927
            'title': title,
1928
            'alt': alt,
1929
        }
1930
1931
1932
class PicturesInBoxes(GenericNavigableComic):
1933
    """Class to retrieve Pictures In Boxes comics."""
1934
    # Also on https://picturesinboxescomic.tumblr.com
1935
    name = 'picturesinboxes'
1936
    long_name = 'Pictures in Boxes'
1937
    url = 'http://www.picturesinboxes.com'
1938
    get_navi_link = get_a_navi_navinext
1939
    get_first_comic_link = simulate_first_link
1940
    first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'
1941
1942
    @classmethod
1943
    def get_comic_info(cls, soup, link):
1944
        """Get information about a particular comics."""
1945
        title = soup.find('h2', class_='post-title').string
1946
        author = soup.find("span", class_="post-author").find("a").string
1947
        date_str = soup.find('span', class_='post-date').string
1948
        day = string_to_date(date_str, '%B %d, %Y')
1949
        imgs = soup.find('div', class_='comicpane').find_all('img')
1950
        assert imgs
1951
        assert all(i['title'] == i['alt'] == title for i in imgs)
1952
        return {
1953
            'day': day.day,
1954
            'month': day.month,
1955
            'year': day.year,
1956
            'img': [i['src'] for i in imgs],
1957
            'title': title,
1958
            'author': author,
1959
        }
1960
1961
1962
class Penmen(GenericComicNotWorking, GenericNavigableComic):
1963
    """Class to retrieve Penmen comics."""
1964
    name = 'penmen'
1965
    long_name = 'Penmen'
1966
    url = 'http://penmen.com'
1967
    get_navi_link = get_link_rel_next
1968
    get_first_comic_link = simulate_first_link
1969
    first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/'
1970
1971
    @classmethod
1972
    def get_comic_info(cls, soup, link):
1973
        """Get information about a particular comics."""
1974
        title = soup.find('title').string
1975
        imgs = soup.find('div', class_='entry-content').find_all('img')
1976
        short_url = soup.find('link', rel='shortlink')['href']
1977
        tags = ' '.join(t.string for t in soup.find_all('a', rel='tag'))
1978
        date_str = soup.find('time')['datetime'][:10]
1979
        day = string_to_date(date_str, "%Y-%m-%d")
1980
        return {
1981
            'title': title,
1982
            'short_url': short_url,
1983
            'img': [i['src'] for i in imgs],
1984
            'tags': tags,
1985
            'month': day.month,
1986
            'year': day.year,
1987
            'day': day.day,
1988
        }
1989
1990
1991
class TheDoghouseDiaries(GenericDeletedComic, GenericNavigableComic):
1992
    """Class to retrieve The Dog House Diaries comics."""
1993
    name = 'doghouse'
1994
    long_name = 'The Dog House Diaries'
1995
    url = 'http://thedoghousediaries.com'
1996
1997
    @classmethod
1998
    def get_first_comic_link(cls):
1999
        """Get link to first comics."""
2000
        return get_soup_at_url(cls.url).find('a', id='firstlink')
2001
2002
    @classmethod
2003
    def get_navi_link(cls, last_soup, next_):
2004
        """Get link to next or previous comic."""
2005
        return last_soup.find('a', id='nextlink' if next_ else 'previouslink')
2006
2007
    @classmethod
2008
    def get_comic_info(cls, soup, link):
2009
        """Get information about a particular comics."""
2010
        comic_img_re = re.compile('^dhdcomics/.*')
2011
        img = soup.find('img', src=comic_img_re)
2012
        comic_url = cls.get_url_from_link(link)
2013
        return {
2014
            'title': soup.find('h2', id='titleheader').string,
2015
            'title2': soup.find('div', id='subtext').string,
2016
            'alt': img.get('title'),
2017
            'img': [urljoin_wrapper(comic_url, img['src'].strip())],
2018
            'num': int(comic_url.split('/')[-1]),
2019
        }
2020
2021
2022
class InvisibleBread(GenericListableComic):
2023
    """Class to retrieve Invisible Bread comics."""
2024
    # Also on http://www.gocomics.com/invisible-bread
2025
    name = 'invisiblebread'
2026
    long_name = 'Invisible Bread'
2027
    url = 'http://invisiblebread.com'
2028
2029
    @classmethod
2030
    def get_archive_elements(cls):
2031
        archive_url = urljoin_wrapper(cls.url, 'archives/')
2032
        return reversed(get_soup_at_url(archive_url).find_all('td', class_='archive-title'))
2033
2034
    @classmethod
2035
    def get_url_from_archive_element(cls, td):
2036
        return td.find('a')['href']
2037
2038
    @classmethod
2039
    def get_comic_info(cls, soup, td):
2040
        """Get information about a particular comics."""
2041
        url = cls.get_url_from_archive_element(td)
2042
        title = td.find('a').string
2043
        month_and_day = td.previous_sibling.string
2044
        link_re = re.compile('^%s/([0-9]+)/' % cls.url)
2045
        year = link_re.match(url).groups()[0]
2046
        date_str = month_and_day + ' ' + year
2047
        day = string_to_date(date_str, '%b %d %Y')
2048
        imgs = [soup.find('div', id='comic').find('img')]
2049
        assert len(imgs) == 1, imgs
2050
        assert all(i['title'] == i['alt'] == title for i in imgs)
2051
        return {
2052
            'month': day.month,
2053
            'year': day.year,
2054
            'day': day.day,
2055
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2056
            'title': title,
2057
        }
2058
2059
2060
class DiscoBleach(GenericDeletedComic):
2061
    """Class to retrieve Disco Bleach Comics."""
2062
    name = 'discobleach'
2063
    long_name = 'Disco Bleach'
2064
    url = 'http://discobleach.com'
2065
2066
2067
class TubeyToons(GenericDeletedComic):
2068
    """Class to retrieve TubeyToons comics."""
2069
    # Also on http://tapastic.com/series/Tubey-Toons
2070
    # Also on https://tubeytoons.tumblr.com
2071
    name = 'tubeytoons'
2072
    long_name = 'Tubey Toons'
2073
    url = 'http://tubeytoons.com'
2074
    _categories = ('TUNEYTOONS', )
2075
2076
2077
class CompletelySeriousComics(GenericNavigableComic):
2078
    """Class to retrieve Completely Serious comics."""
2079
    name = 'completelyserious'
2080
    long_name = 'Completely Serious Comics'
2081
    url = 'http://completelyseriouscomics.com'
2082
    get_first_comic_link = get_a_navi_navifirst
2083
    get_navi_link = get_a_navi_navinext
2084
2085
    @classmethod
2086
    def get_comic_info(cls, soup, link):
2087
        """Get information about a particular comics."""
2088
        title = soup.find('h2', class_='post-title').string
2089
        author = soup.find('span', class_='post-author').contents[1].string
2090
        date_str = soup.find('span', class_='post-date').string
2091
        day = string_to_date(date_str, '%B %d, %Y')
2092
        imgs = soup.find('div', class_='comicpane').find_all('img')
2093
        assert imgs
2094
        alt = imgs[0]['title']
2095
        assert all(i['title'] == i['alt'] == alt for i in imgs)
2096
        return {
2097
            'month': day.month,
2098
            'year': day.year,
2099
            'day': day.day,
2100
            'img': [i['src'] for i in imgs],
2101
            'title': title,
2102
            'alt': alt,
2103
            'author': author,
2104
        }
2105
2106 View Code Duplication
2107
class PoorlyDrawnLines(GenericListableComic):
2108
    """Class to retrieve Poorly Drawn Lines comics."""
2109
    # Also on http://pdlcomics.tumblr.com
2110
    name = 'poorlydrawn'
2111
    long_name = 'Poorly Drawn Lines'
2112
    url = 'https://www.poorlydrawnlines.com'
2113
    _categories = ('POORLYDRAWN', )
2114
    get_url_from_archive_element = get_href
2115
2116
    @classmethod
2117
    def get_comic_info(cls, soup, link):
2118
        """Get information about a particular comics."""
2119
        imgs = soup.find('div', class_='post').find_all('img')
2120
        assert len(imgs) <= 1, imgs
2121
        return {
2122
            'img': [i['src'] for i in imgs],
2123
            'title': imgs[0].get('title', "") if imgs else "",
2124
        }
2125
2126
    @classmethod
2127
    def get_archive_elements(cls):
2128
        archive_url = urljoin_wrapper(cls.url, 'archive')
2129
        url_re = re.compile('^%s/comic/.' % cls.url)
2130
        return reversed(get_soup_at_url(archive_url).find_all('a', href=url_re))
2131
2132
2133
class LoadingComics(GenericNavigableComic):
2134
    """Class to retrieve Loading Artist comics."""
2135
    name = 'loadingartist'
2136
    long_name = 'Loading Artist'
2137
    url = 'http://www.loadingartist.com/latest'
2138
2139
    @classmethod
2140
    def get_first_comic_link(cls):
2141
        """Get link to first comics."""
2142
        return get_soup_at_url(cls.url).find('a', title="First")
2143
2144
    @classmethod
2145
    def get_navi_link(cls, last_soup, next_):
2146
        """Get link to next or previous comic."""
2147
        return last_soup.find('a', title='Next' if next_ else 'Previous')
2148
2149
    @classmethod
2150
    def get_comic_info(cls, soup, link):
2151
        """Get information about a particular comics."""
2152
        title = soup.find('h1').string
2153
        date_str = soup.find('span', class_='date').string.strip()
2154
        day = string_to_date(date_str, "%B %d, %Y")
2155
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
2156
        return {
2157
            'title': title,
2158
            'img': [i['src'] for i in imgs],
2159
            'month': day.month,
2160
            'year': day.year,
2161
            'day': day.day,
2162
        }
2163
2164
2165
class ChuckleADuck(GenericNavigableComic):
2166
    """Class to retrieve Chuckle-A-Duck comics."""
2167
    name = 'chuckleaduck'
2168
    long_name = 'Chuckle-A-duck'
2169
    url = 'http://chuckleaduck.com'
2170
    get_first_comic_link = get_div_navfirst_a
2171
    get_navi_link = get_link_rel_next
2172
2173
    @classmethod
2174
    def get_comic_info(cls, soup, link):
2175
        """Get information about a particular comics."""
2176
        date_str = soup.find('span', class_='post-date').string
2177
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
2178
        author = soup.find('span', class_='post-author').string
2179
        div = soup.find('div', id='comic')
2180
        imgs = div.find_all('img') if div else []
2181
        title = imgs[0]['title'] if imgs else ""
2182
        assert all(i['title'] == i['alt'] == title for i in imgs)
2183
        return {
2184
            'month': day.month,
2185
            'year': day.year,
2186
            'day': day.day,
2187
            'img': [i['src'] for i in imgs],
2188
            'title': title,
2189
            'author': author,
2190
        }
2191
2192
2193
class DepressedAlien(GenericNavigableComic):
2194
    """Class to retrieve Depressed Alien Comics."""
2195
    name = 'depressedalien'
2196
    long_name = 'Depressed Alien'
2197
    url = 'http://depressedalien.com'
2198
    get_url_from_link = join_cls_url_to_href
2199
2200
    @classmethod
2201
    def get_first_comic_link(cls):
2202
        """Get link to first comics."""
2203
        return get_soup_at_url(cls.url).find('img', attrs={'name': 'beginArrow'}).parent
2204
2205
    @classmethod
2206
    def get_navi_link(cls, last_soup, next_):
2207
        """Get link to next or previous comic."""
2208
        return last_soup.find('img', attrs={'name': 'rightArrow' if next_ else 'leftArrow'}).parent
2209
2210
    @classmethod
2211
    def get_comic_info(cls, soup, link):
2212
        """Get information about a particular comics."""
2213
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2214
        imgs = soup.find_all('meta', property='og:image')
2215
        return {
2216
            'title': title,
2217
            'img': [i['content'] for i in imgs],
2218
        }
2219
2220 View Code Duplication
2221
class TurnOffUs(GenericListableComic):
2222
    """Class to retrieve TurnOffUs comics."""
2223
    name = 'turnoffus'
2224
    long_name = 'Turn Off Us'
2225
    url = 'http://turnoff.us'
2226
    get_url_from_archive_element = join_cls_url_to_href
2227
2228
    @classmethod
2229
    def get_archive_elements(cls):
2230
        archive_url = urljoin_wrapper(cls.url, 'all')
2231
        post_list = get_soup_at_url(archive_url).find('ul', class_='post-list')
2232
        return reversed(post_list.find_all('a', class_='post-link'))
2233
2234
    @classmethod
2235
    def get_comic_info(cls, soup, archive_elt):
2236
        """Get information about a particular comics."""
2237
        title = soup.find('meta', property='og:title')['content']
2238
        imgs = soup.find_all('meta', property='og:image')
2239
        return {
2240
            'title': title,
2241
            'img': [i['content'] for i in imgs],
2242
        }
2243
2244
2245
class ThingsInSquares(GenericListableComic):
2246
    """Class to retrieve Things In Squares comics."""
2247
    # This can be retrieved in other languages
2248
    # Also on https://tapastic.com/series/Things-in-Squares
2249
    name = 'squares'
2250
    long_name = 'Things in squares'
2251
    url = 'http://www.thingsinsquares.com'
2252
2253
    @classmethod
2254
    def get_comic_info(cls, soup, tr):
2255
        """Get information about a particular comics."""
2256
        _, td2, td3 = tr.find_all('td')
2257
        a = td2.find('a')
2258
        date_str = td3.string
2259
        day = string_to_date(date_str, "%m.%d.%y")
2260
        title = a.string
2261
        title2 = soup.find('meta', property='og:title')['content']
2262
        desc = soup.find('meta', property='og:description')
2263
        description = desc['content'] if desc else ''
2264
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2265
        imgs = soup.find_all('meta', property='og:image')
2266
        return {
2267
            'day': day.day,
2268
            'month': day.month,
2269
            'year': day.year,
2270
            'title': title,
2271
            'title2': title2,
2272
            'description': description,
2273
            'tags': tags,
2274
            'img': [i['content'] for i in imgs],
2275
        }
2276
2277
    @classmethod
2278
    def get_url_from_archive_element(cls, tr):
2279
        _, td2, __ = tr.find_all('td')
2280
        return td2.find('a')['href']
2281
2282
    @classmethod
2283
    def get_archive_elements(cls):
2284
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
2285
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2286
2287
2288
class HappleTea(GenericNavigableComic):
2289
    """Class to retrieve Happle Tea Comics."""
2290
    name = 'happletea'
2291
    long_name = 'Happle Tea'
2292
    url = 'http://www.happletea.com'
2293
    get_first_comic_link = get_a_navi_navifirst
2294
    get_navi_link = get_link_rel_next
2295
2296
    @classmethod
2297
    def get_comic_info(cls, soup, link):
2298
        """Get information about a particular comics."""
2299
        imgs = soup.find('div', id='comic').find_all('img')
2300
        post = soup.find('div', class_='post-content')
2301
        title = post.find('h2', class_='post-title').string
2302
        author = post.find('a', rel='author').string
2303
        date_str = post.find('span', class_='post-date').string
2304
        day = string_to_date(date_str, "%B %d, %Y")
2305
        assert all(i['alt'] == i['title'] for i in imgs)
2306
        return {
2307
            'title': title,
2308
            'img': [i['src'] for i in imgs],
2309
            'alt': ''.join(i['alt'] for i in imgs),
2310
            'month': day.month,
2311
            'year': day.year,
2312
            'day': day.day,
2313
            'author': author,
2314
        }
2315
2316
2317
class RockPaperScissors(GenericNavigableComic):
2318
    """Class to retrieve Rock Paper Scissors comics."""
2319
    name = 'rps'
2320
    long_name = 'Rock Paper Scissors'
2321
    url = 'http://rps-comics.com'
2322
    get_first_comic_link = get_a_navi_navifirst
2323
    get_navi_link = get_link_rel_next
2324
2325
    @classmethod
2326
    def get_comic_info(cls, soup, link):
2327
        """Get information about a particular comics."""
2328
        title = soup.find('title').string
2329
        imgs = soup.find_all('meta', property='og:image')
2330
        short_url = soup.find('link', rel='shortlink')['href']
2331
        transcript = soup.find('div', id='transcript-content').string
2332
        return {
2333
            'title': title,
2334
            'transcript': transcript,
2335
            'short_url': short_url,
2336
            'img': [i['content'] for i in imgs],
2337
        }
2338
2339
2340
class FatAwesomeComics(GenericNavigableComic):
2341
    """Class to retrieve Fat Awesome Comics."""
2342
    # Also on http://fatawesomecomedy.tumblr.com
2343
    name = 'fatawesome'
2344
    long_name = 'Fat Awesome'
2345
    url = 'http://fatawesome.com/comics'
2346
    get_navi_link = get_a_rel_next
2347
    get_first_comic_link = simulate_first_link
2348
    first_url = 'http://fatawesome.com/shortbus/'
2349
2350
    @classmethod
2351
    def get_comic_info(cls, soup, link):
2352
        """Get information about a particular comics."""
2353
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2354
        description = soup.find('meta', attrs={'name': 'description'})['content']
2355
        tags_prop = soup.find('meta', property='article:tag')
2356
        tags = tags_prop['content'] if tags_prop else ""
2357
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2358
        day = string_to_date(date_str, "%Y-%m-%d")
2359
        imgs = soup.find_all('img', attrs={'data-recalc-dims': "1"})
2360
        assert len(imgs) == 1, imgs
2361
        return {
2362
            'title': title,
2363
            'description': description,
2364
            'tags': tags,
2365
            'alt': "".join(i['alt'] for i in imgs),
2366
            'img': [i['src'].rsplit('?', 1)[0] for i in imgs],
2367
            'month': day.month,
2368
            'year': day.year,
2369
            'day': day.day,
2370
        }
2371
2372
2373
class PeterLauris(GenericNavigableComic):
2374
    """Class to retrieve Peter Lauris comics."""
2375
    name = 'peterlauris'
2376
    long_name = 'Peter Lauris'
2377
    url = 'http://peterlauris.com/comics'
2378
    get_navi_link = get_a_rel_next
2379
    get_first_comic_link = simulate_first_link
2380
    first_url = 'http://peterlauris.com/comics/just-in-case/'
2381
2382
    @classmethod
2383
    def get_comic_info(cls, soup, link):
2384
        """Get information about a particular comics."""
2385
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2386
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2387
        day = string_to_date(date_str, "%Y-%m-%d")
2388
        imgs = soup.find_all('meta', property='og:image')
2389
        return {
2390
            'title': title,
2391
            'img': [i['content'] for i in imgs],
2392
            'month': day.month,
2393
            'year': day.year,
2394
            'day': day.day,
2395
        }
2396
2397
2398
class RandomCrab(GenericNavigableComic):
2399
    """Class to retrieve Random Crab comics."""
2400
    name = 'randomcrab'
2401
    long_name = 'Random Crab'
2402
    url = 'https://randomcrab.com'
2403
    get_navi_link = get_a_rel_next
2404
    get_first_comic_link = simulate_first_link
2405
    first_url = 'https://randomcrab.com/natural-elephant/'
2406
2407
    @classmethod
2408
    def get_comic_info(cls, soup, link):
2409
        """Get information about a particular comics."""
2410
        title = soup.find('meta', property='og:title')['content']
2411
        desc = soup.find('meta', property='og:description')['content']
2412
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2413
        day = string_to_date(date_str, "%Y-%m-%d")
2414
        imgs = soup.find_all('meta', property='og:image')
2415
        author = soup.find('a', rel='author').string
2416
        return {
2417
            'title': title,
2418
            'desc': desc,
2419
            'img': [i['content'] for i in imgs],
2420
            'month': day.month,
2421
            'year': day.year,
2422
            'day': day.day,
2423
            'author': author,
2424
        }
2425
2426
2427
class JuliasDrawings(GenericListableComic):
2428
    """Class to retrieve Julia's Drawings."""
2429
    name = 'julia'
2430
    long_name = "Julia's Drawings"
2431
    url = 'https://drawings.jvns.ca'
2432
    get_url_from_archive_element = get_href
2433
2434
    @classmethod
2435
    def get_archive_elements(cls):
2436
        div = get_soup_at_url(cls.url).find('div', class_='drawings')
2437
        return reversed(div.find_all('a'))
2438
2439
    @classmethod
2440
    def get_comic_info(cls, soup, archive_elt):
2441
        """Get information about a particular comics."""
2442
        date_str = soup.find('meta', property='og:article:published_time')['content'][:10]
2443
        day = string_to_date(date_str, "%Y-%m-%d")
2444
        title = soup.find('h3', class_='p-post-title').string
2445
        imgs = soup.find('section', class_='post-content').find_all('img')
2446
        return {
2447
            'title': title,
2448
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2449
            'month': day.month,
2450
            'year': day.year,
2451
            'day': day.day,
2452
        }
2453
2454
2455
class AnythingComic(GenericListableComic):
2456
    """Class to retrieve Anything Comics."""
2457
    # Also on http://tapastic.com/series/anything
2458
    name = 'anythingcomic'
2459
    long_name = 'Anything Comic'
2460
    url = 'http://www.anythingcomic.com'
2461
2462
    @classmethod
2463
    def get_archive_elements(cls):
2464
        archive_url = urljoin_wrapper(cls.url, 'archive/')
2465
        # The first 2 <tr>'s do not correspond to comics
2466
        return get_soup_at_url(archive_url).find('table', id='chapter_table').find_all('tr')[2:]
2467
2468
    @classmethod
2469
    def get_url_from_archive_element(cls, tr):
2470
        """Get url corresponding to an archive element."""
2471
        _, td_comic, td_date, _ = tr.find_all('td')
2472
        link = td_comic.find('a')
2473
        return urljoin_wrapper(cls.url, link['href'])
2474
2475
    @classmethod
2476
    def get_comic_info(cls, soup, tr):
2477
        """Get information about a particular comics."""
2478
        td_num, td_comic, td_date, _ = tr.find_all('td')
2479
        num = int(td_num.string)
2480
        link = td_comic.find('a')
2481
        title = link.string
2482
        imgs = soup.find_all('img', id='comic_image')
2483
        date_str = td_date.string
2484
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y, %I:%M %p")
2485
        assert len(imgs) == 1, imgs
2486
        assert all(i.get('alt') == i.get('title') for i in imgs)
2487
        return {
2488
            'num': num,
2489
            'title': title,
2490
            'alt': imgs[0].get('alt', ''),
2491
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2492
            'month': day.month,
2493
            'year': day.year,
2494
            'day': day.day,
2495
        }
2496
2497 View Code Duplication
2498
class LonnieMillsap(GenericNavigableComic):
2499
    """Class to retrieve Lonnie Millsap's comics."""
2500
    name = 'millsap'
2501
    long_name = 'Lonnie Millsap'
2502
    url = 'http://www.lonniemillsap.com'
2503
    get_navi_link = get_link_rel_next
2504
    get_first_comic_link = simulate_first_link
2505
    first_url = 'http://www.lonniemillsap.com/?p=42'
2506
2507
    @classmethod
2508
    def get_comic_info(cls, soup, link):
2509
        """Get information about a particular comics."""
2510
        title = soup.find('h2', class_='post-title').string
2511
        post = soup.find('div', class_='post-content')
2512
        author = post.find("span", class_="post-author").find("a").string
2513
        date_str = post.find("span", class_="post-date").string
2514
        day = string_to_date(date_str, "%B %d, %Y")
2515
        imgs = post.find("div", class_="entry").find_all("img")
2516
        return {
2517
            'title': title,
2518
            'author': author,
2519
            'img': [i['src'] for i in imgs],
2520
            'month': day.month,
2521
            'year': day.year,
2522
            'day': day.day,
2523
        }
2524
2525
2526
class LinsEditions(GenericDeletedComic):  # Permanently moved to warandpeas
2527
    """Class to retrieve L.I.N.S. Editions comics."""
2528
    # Also on https://linscomics.tumblr.com
2529
    # Now on https://warandpeas.com
2530
    name = 'lins'
2531
    long_name = 'L.I.N.S. Editions'
2532
    url = 'https://linsedition.com'
2533
    _categories = ('WARANDPEAS', 'LINS')
2534
2535
2536
class WarAndPeas(GenericNavigableComic):
2537
    """Class to retrieve War And Peas comics."""
2538
    name = 'warandpeas'
2539
    long_name = 'War And Peas'
2540
    url = 'https://warandpeas.com'
2541
    get_navi_link = get_link_rel_next
2542
    get_first_comic_link = simulate_first_link
2543
    first_url = 'https://warandpeas.com/2011/11/07/565/'
2544
    _categories = ('WARANDPEAS', 'LINS')
2545
2546
    @classmethod
2547
    def get_comic_info(cls, soup, link):
2548
        """Get information about a particular comics."""
2549
        title = soup.find('meta', property='og:title')['content']
2550
        imgs = soup.find_all('meta', property='og:image')
2551
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2552
        day = string_to_date(date_str, "%Y-%m-%d")
2553
        return {
2554
            'title': title,
2555
            'img': [i['content'] for i in imgs],
2556
            'month': day.month,
2557
            'year': day.year,
2558
            'day': day.day,
2559
        }
2560
2561
2562
class ThorsThundershack(GenericNavigableComic):
2563
    """Class to retrieve Thor's Thundershack comics."""
2564
    # Also on http://tapastic.com/series/Thors-Thundershac
2565
    name = 'thor'
2566
    long_name = 'Thor\'s Thundershack'
2567
    url = 'http://www.thorsthundershack.com'
2568
    _categories = ('THOR', )
2569
    get_url_from_link = join_cls_url_to_href
2570
2571
    @classmethod
2572
    def get_first_comic_link(cls):
2573
        """Get link to first comics."""
2574
        return get_soup_at_url(cls.url).find('a', class_='first navlink')
2575
2576
    @classmethod
2577
    def get_navi_link(cls, last_soup, next_):
2578
        """Get link to next or previous comic."""
2579
        for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
2580
            if link['href'] != '/comic':
2581
                return link
2582
        return None
2583
2584
    @classmethod
2585
    def get_comic_info(cls, soup, link):
2586
        """Get information about a particular comics."""
2587
        title = soup.find('meta', attrs={'name': 'description'})["content"]
2588
        description = soup.find('div', itemprop='articleBody').text
2589
        author = soup.find('span', itemprop='author copyrightHolder').string
2590
        imgs = soup.find_all('img', itemprop='image')
2591
        assert all(i['title'] == i['alt'] for i in imgs)
2592
        alt = imgs[0]['alt'] if imgs else ""
2593
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
2594
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2595
        return {
2596
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2597
            'month': day.month,
2598
            'year': day.year,
2599
            'day': day.day,
2600
            'author': author,
2601
            'title': title,
2602
            'alt': alt,
2603
            'description': description,
2604
        }
2605
2606
2607
class GerbilWithAJetpack(GenericNavigableComic):
2608
    """Class to retrieve GerbilWithAJetpack comics."""
2609
    name = 'gerbil'
2610
    long_name = 'Gerbil With A Jetpack'
2611
    url = 'http://gerbilwithajetpack.com'
2612
    get_first_comic_link = get_a_navi_navifirst
2613
    get_navi_link = get_a_rel_next
2614
2615
    @classmethod
2616
    def get_comic_info(cls, soup, link):
2617
        """Get information about a particular comics."""
2618
        title = soup.find('h2', class_='post-title').string
2619
        author = soup.find("span", class_="post-author").find("a").string
2620
        date_str = soup.find("span", class_="post-date").string
2621
        day = string_to_date(date_str, "%B %d, %Y")
2622
        imgs = soup.find("div", id="comic").find_all("img")
2623
        alt = imgs[0]['alt']
2624
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2625
        return {
2626
            'img': [i['src'] for i in imgs],
2627
            'title': title,
2628
            'alt': alt,
2629
            'author': author,
2630
            'day': day.day,
2631
            'month': day.month,
2632
            'year': day.year
2633
        }
2634
2635
2636
class EveryDayBlues(GenericDeletedComic, GenericNavigableComic):
2637
    """Class to retrieve EveryDayBlues Comics."""
2638
    name = "blues"
2639
    long_name = "Every Day Blues"
2640
    url = "http://everydayblues.net"
2641
    get_first_comic_link = get_a_navi_navifirst
2642
    get_navi_link = get_link_rel_next
2643
2644
    @classmethod
2645
    def get_comic_info(cls, soup, link):
2646
        """Get information about a particular comics."""
2647
        title = soup.find("h2", class_="post-title").string
2648
        author = soup.find("span", class_="post-author").find("a").string
2649
        date_str = soup.find("span", class_="post-date").string
2650
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2651
        imgs = soup.find("div", id="comic").find_all("img")
2652
        assert all(i['alt'] == i['title'] == title for i in imgs)
2653
        assert len(imgs) <= 1, imgs
2654
        return {
2655
            'img': [i['src'] for i in imgs],
2656
            'title': title,
2657
            'author': author,
2658
            'day': day.day,
2659
            'month': day.month,
2660
            'year': day.year
2661
        }
2662
2663
2664
class BiterComics(GenericNavigableComic):
2665
    """Class to retrieve Biter Comics."""
2666
    name = "biter"
2667
    long_name = "Biter Comics"
2668
    url = "http://www.bitercomics.com"
2669
    get_first_comic_link = get_a_navi_navifirst
2670
    get_navi_link = get_link_rel_next
2671
2672
    @classmethod
2673
    def get_comic_info(cls, soup, link):
2674
        """Get information about a particular comics."""
2675
        title = soup.find("h1", class_="entry-title").string
2676
        author = soup.find("span", class_="author vcard").find("a").string
2677
        date_str = soup.find("span", class_="entry-date").string
2678
        day = string_to_date(date_str, "%B %d, %Y")
2679
        imgs = soup.find("div", id="comic").find_all("img")
2680
        assert all(i['alt'] == i['title'] for i in imgs)
2681
        assert len(imgs) == 1, imgs
2682
        alt = imgs[0]['alt']
2683
        return {
2684
            'img': [i['src'] for i in imgs],
2685
            'title': title,
2686
            'alt': alt,
2687
            'author': author,
2688
            'day': day.day,
2689
            'month': day.month,
2690
            'year': day.year
2691
        }
2692
2693
2694
class TheAwkwardYeti(GenericNavigableComic):
2695
    """Class to retrieve The Awkward Yeti comics."""
2696
    # Also on http://www.gocomics.com/the-awkward-yeti
2697
    # Also on http://larstheyeti.tumblr.com
2698
    # Also on https://tapastic.com/series/TheAwkwardYeti
2699
    name = 'yeti'
2700
    long_name = 'The Awkward Yeti'
2701
    url = 'http://theawkwardyeti.com'
2702
    _categories = ('YETI', )
2703
    get_first_comic_link = get_a_navi_navifirst
2704
    get_navi_link = get_link_rel_next
2705
2706
    @classmethod
2707
    def get_comic_info(cls, soup, link):
2708
        """Get information about a particular comics."""
2709
        title = soup.find('h2', class_='post-title').string
2710
        date_str = soup.find("span", class_="post-date").string
2711
        day = string_to_date(date_str, "%B %d, %Y")
2712
        imgs = soup.find("div", id="comic").find_all("img")
2713
        assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
2714
        return {
2715
            'img': [i['src'] for i in imgs],
2716
            'title': title,
2717
            'day': day.day,
2718
            'month': day.month,
2719
            'year': day.year
2720
        }
2721
2722
2723
class PleasantThoughts(GenericNavigableComic):
2724
    """Class to retrieve Pleasant Thoughts comics."""
2725
    name = 'pleasant'
2726
    long_name = 'Pleasant Thoughts'
2727
    url = 'http://pleasant-thoughts.com'
2728
    get_first_comic_link = get_a_navi_navifirst
2729
    get_navi_link = get_link_rel_next
2730
2731
    @classmethod
2732
    def get_comic_info(cls, soup, link):
2733
        """Get information about a particular comics."""
2734
        post = soup.find('div', class_='post-content')
2735
        title = post.find('h2', class_='post-title').string
2736
        imgs = post.find("div", class_="entry").find_all("img")
2737
        return {
2738
            'title': title,
2739
            'img': [i['src'] for i in imgs],
2740
        }
2741
2742
2743
class MisterAndMe(GenericNavigableComic):
2744
    """Class to retrieve Mister & Me Comics."""
2745
    # Also on http://www.gocomics.com/mister-and-me
2746
    # Also on https://tapastic.com/series/Mister-and-Me
2747
    name = 'mister'
2748
    long_name = 'Mister & Me'
2749
    url = 'http://www.mister-and-me.com'
2750
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2751
    get_navi_link = get_link_rel_next
2752
2753
    @classmethod
2754
    def get_comic_info(cls, soup, link):
2755
        """Get information about a particular comics."""
2756
        title = soup.find('h2', class_='post-title').string
2757
        author = soup.find("span", class_="post-author").find("a").string
2758
        date_str = soup.find("span", class_="post-date").string
2759
        day = string_to_date(date_str, "%B %d, %Y")
2760
        imgs = soup.find("div", id="comic").find_all("img")
2761
        assert all(i['alt'] == i['title'] for i in imgs)
2762
        assert len(imgs) <= 1, imgs
2763
        alt = imgs[0]['alt'] if imgs else ""
2764
        return {
2765
            'img': [i['src'] for i in imgs],
2766
            'title': title,
2767
            'alt': alt,
2768
            'author': author,
2769
            'day': day.day,
2770
            'month': day.month,
2771
            'year': day.year
2772
        }
2773
2774
2775
class LastPlaceComics(GenericNavigableComic):
2776
    """Class to retrieve Last Place Comics."""
2777
    name = 'lastplace'
2778
    long_name = 'Last Place Comics'
2779
    url = "http://lastplacecomics.com"
2780
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2781
    get_navi_link = get_link_rel_next
2782
2783
    @classmethod
2784
    def get_comic_info(cls, soup, link):
2785
        """Get information about a particular comics."""
2786
        title = soup.find('h2', class_='post-title').string
2787
        author = soup.find("span", class_="post-author").find("a").string
2788
        date_str = soup.find("span", class_="post-date").string
2789
        day = string_to_date(date_str, "%B %d, %Y")
2790
        imgs = soup.find("div", id="comic").find_all("img")
2791
        assert all(i['alt'] == i['title'] for i in imgs)
2792
        assert len(imgs) <= 1, imgs
2793
        alt = imgs[0]['alt'] if imgs else ""
2794
        return {
2795
            'img': [i['src'] for i in imgs],
2796
            'title': title,
2797
            'alt': alt,
2798
            'author': author,
2799
            'day': day.day,
2800
            'month': day.month,
2801
            'year': day.year
2802
        }
2803
2804
2805
class TalesOfAbsurdity(GenericNavigableComic):
2806
    """Class to retrieve Tales Of Absurdity comics."""
2807
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
2808
    # Also on http://talesofabsurdity.tumblr.com
2809
    name = 'absurdity'
2810
    long_name = 'Tales of Absurdity'
2811
    url = 'http://talesofabsurdity.com'
2812
    _categories = ('ABSURDITY', )
2813
    get_first_comic_link = get_a_navi_navifirst
2814
    get_navi_link = get_a_navi_comicnavnext_navinext
2815
2816
    @classmethod
2817
    def get_comic_info(cls, soup, link):
2818
        """Get information about a particular comics."""
2819
        title = soup.find('h2', class_='post-title').string
2820
        author = soup.find("span", class_="post-author").find("a").string
2821
        date_str = soup.find("span", class_="post-date").string
2822
        day = string_to_date(date_str, "%B %d, %Y")
2823
        imgs = soup.find("div", id="comic").find_all("img")
2824
        assert all(i['alt'] == i['title'] for i in imgs)
2825
        alt = imgs[0]['alt'] if imgs else ""
2826
        return {
2827
            'img': [i['src'] for i in imgs],
2828
            'title': title,
2829
            'alt': alt,
2830
            'author': author,
2831
            'day': day.day,
2832
            'month': day.month,
2833
            'year': day.year
2834
        }
2835
2836
2837
class EndlessOrigami(GenericComicNotWorking, GenericNavigableComic):  # Nav not working
2838
    """Class to retrieve Endless Origami Comics."""
2839
    name = "origami"
2840
    long_name = "Endless Origami"
2841
    url = "http://endlessorigami.com"
2842
    get_first_comic_link = get_a_navi_navifirst
2843
    get_navi_link = get_link_rel_next
2844
2845
    @classmethod
2846
    def get_comic_info(cls, soup, link):
2847
        """Get information about a particular comics."""
2848
        title = soup.find('h2', class_='post-title').string
2849
        author = soup.find("span", class_="post-author").find("a").string
2850
        date_str = soup.find("span", class_="post-date").string
2851
        day = string_to_date(date_str, "%B %d, %Y")
2852
        imgs = soup.find("div", id="comic").find_all("img")
2853
        assert all(i['alt'] == i['title'] for i in imgs)
2854
        alt = imgs[0]['alt'] if imgs else ""
2855
        return {
2856
            'img': [i['src'] for i in imgs],
2857
            'title': title,
2858
            'alt': alt,
2859
            'author': author,
2860
            'day': day.day,
2861
            'month': day.month,
2862
            'year': day.year
2863
        }
2864
2865
2866
class PlanC(GenericNavigableComic):
2867
    """Class to retrieve Plan C comics."""
2868
    name = 'planc'
2869
    long_name = 'Plan C'
2870
    url = 'http://www.plancomic.com'
2871
    get_first_comic_link = get_a_navi_navifirst
2872
    get_navi_link = get_a_navi_comicnavnext_navinext
2873
2874
    @classmethod
2875
    def get_comic_info(cls, soup, link):
2876
        """Get information about a particular comics."""
2877
        title = soup.find('h2', class_='post-title').string
2878
        date_str = soup.find("span", class_="post-date").string
2879
        day = string_to_date(date_str, "%B %d, %Y")
2880
        imgs = soup.find('div', id='comic').find_all('img')
2881
        return {
2882
            'title': title,
2883
            'img': [i['src'] for i in imgs],
2884
            'month': day.month,
2885
            'year': day.year,
2886
            'day': day.day,
2887
        }
2888
2889 View Code Duplication
2890
class BuniComic(GenericNavigableComic):
2891
    """Class to retrieve Buni Comics."""
2892
    name = 'buni'
2893
    long_name = 'BuniComics'
2894
    url = 'http://www.bunicomic.com'
2895
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2896
    get_navi_link = get_link_rel_next
2897
2898
    @classmethod
2899
    def get_comic_info(cls, soup, link):
2900
        """Get information about a particular comics."""
2901
        imgs = soup.find('div', id='comic').find_all('img')
2902
        assert all(i['alt'] == i['title'] for i in imgs)
2903
        assert len(imgs) == 1, imgs
2904
        return {
2905
            'img': [i['src'] for i in imgs],
2906
            'title': imgs[0]['title'],
2907
        }
2908
2909 View Code Duplication
2910
class GenericCommitStrip(GenericNavigableComic):
2911
    """Generic class to retrieve Commit Strips in different languages."""
2912
    get_navi_link = get_a_rel_next
2913
    get_first_comic_link = simulate_first_link
2914
    first_url = NotImplemented
2915
2916
    @classmethod
2917
    def get_comic_info(cls, soup, link):
2918
        """Get information about a particular comics."""
2919
        desc = soup.find('meta', property='og:description')['content']
2920
        title = soup.find('meta', property='og:title')['content']
2921
        imgs = soup.find('div', class_='entry-content').find_all('img')
2922
        title2 = ' '.join(i.get('title', '') for i in imgs)
2923
        return {
2924
            'title': title,
2925
            'title2': title2,
2926
            'description': desc,
2927
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2928
        }
2929
2930
2931
class CommitStripFr(GenericCommitStrip):
2932
    """Class to retrieve Commit Strips in French."""
2933
    name = 'commit_fr'
2934
    long_name = 'Commit Strip (Fr)'
2935
    url = 'http://www.commitstrip.com/fr'
2936
    _categories = ('FRANCAIS', )
2937
    first_url = 'http://www.commitstrip.com/fr/2012/02/22/interview/'
2938
2939
2940
class CommitStripEn(GenericCommitStrip):
2941
    """Class to retrieve Commit Strips in English."""
2942
    name = 'commit_en'
2943
    long_name = 'Commit Strip (En)'
2944
    url = 'http://www.commitstrip.com/en'
2945
    first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'
2946
2947
2948
class GenericBoumerie(GenericNavigableComic):
2949
    """Generic class to retrieve Boumeries comics in different languages."""
2950
    # Also on http://boumeries.tumblr.com
2951
    get_first_comic_link = get_a_navi_navifirst
2952
    get_navi_link = get_link_rel_next
2953
    date_format = NotImplemented
2954
    lang = NotImplemented
2955
2956
    @classmethod
2957
    def get_comic_info(cls, soup, link):
2958
        """Get information about a particular comics."""
2959
        title = soup.find('h2', class_='post-title').string
2960
        short_url = soup.find('link', rel='shortlink')['href']
2961
        author = soup.find("span", class_="post-author").find("a").string
2962
        date_str = soup.find('span', class_='post-date').string
2963
        day = string_to_date(date_str, cls.date_format, cls.lang)
2964
        imgs = soup.find('div', id='comic').find_all('img')
2965
        assert all(i['alt'] == i['title'] for i in imgs)
2966
        return {
2967
            'short_url': short_url,
2968
            'img': [i['src'] for i in imgs],
2969
            'title': title,
2970
            'author': author,
2971
            'month': day.month,
2972
            'year': day.year,
2973
            'day': day.day,
2974
        }
2975
2976
2977
class BoumerieEn(GenericBoumerie):
2978
    """Class to retrieve Boumeries comics in English."""
2979
    name = 'boumeries_en'
2980
    long_name = 'Boumeries (En)'
2981
    url = 'http://comics.boumerie.com'
2982
    _categories = ('BOUMERIES', )
2983
    date_format = "%B %d, %Y"
2984
    lang = 'en_GB.UTF-8'
2985
2986
2987
class BoumerieFr(GenericBoumerie):
2988
    """Class to retrieve Boumeries comics in French."""
2989
    name = 'boumeries_fr'
2990
    long_name = 'Boumeries (Fr)'
2991
    url = 'http://bd.boumerie.com'
2992
    _categories = ('BOUMERIES', 'FRANCAIS')
2993
    date_format = "%B %d, %Y"  # Used to be "%A, %d %B %Y"
2994
    lang = "fr_FR.utf8"
2995
2996
2997
class UnearthedComics(GenericNavigableComic):
2998
    """Class to retrieve Unearthed comics."""
2999
    # Also on http://tapastic.com/series/UnearthedComics
3000
    # Also on https://unearthedcomics.tumblr.com
3001
    name = 'unearthed'
3002
    long_name = 'Unearthed Comics'
3003
    url = 'http://unearthedcomics.com'
3004
    _categories = ('UNEARTHED', )
3005
    get_navi_link = get_link_rel_next
3006
    get_first_comic_link = simulate_first_link
3007
    first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'
3008
3009
    @classmethod
3010
    def get_comic_info(cls, soup, link):
3011
        """Get information about a particular comics."""
3012
        short_url = soup.find('link', rel='shortlink')['href']
3013
        title_elt = soup.find('h1') or soup.find('h2')
3014
        title = title_elt.string if title_elt else ""
3015
        desc = soup.find('meta', property='og:description')
3016
        date_str = soup.find('time', class_='published updated hidden')['datetime']
3017
        day = string_to_date(date_str, "%Y-%m-%d")
3018
        post = soup.find('div', class_="entry content entry-content type-portfolio")
3019
        imgs = post.find_all('img')
3020
        return {
3021
            'title': title,
3022
            'description': desc,
3023
            'url2': short_url,
3024
            'img': [i['src'] for i in imgs],
3025
            'month': day.month,
3026
            'year': day.year,
3027
            'day': day.day,
3028
        }
3029
3030
3031
class Optipess(GenericNavigableComic):
3032
    """Class to retrieve Optipess comics."""
3033
    name = 'optipess'
3034
    long_name = 'Optipess'
3035
    url = 'http://www.optipess.com'
3036
    get_first_comic_link = get_a_navi_navifirst
3037
    get_navi_link = get_link_rel_next
3038
3039
    @classmethod
3040
    def get_comic_info(cls, soup, link):
3041
        """Get information about a particular comics."""
3042
        title = soup.find('h2', class_='post-title').string
3043
        author = soup.find("span", class_="post-author").find("a").string
3044
        comic = soup.find('div', id='comic')
3045
        imgs = comic.find_all('img') if comic else []
3046
        alt = imgs[0]['title'] if imgs else ""
3047
        assert all(i['alt'] == i['title'] == alt for i in imgs)
3048
        date_str = soup.find('span', class_='post-date').string
3049
        day = string_to_date(date_str, "%B %d, %Y")
3050
        return {
3051
            'title': title,
3052
            'alt': alt,
3053
            'author': author,
3054
            'img': [i['src'] for i in imgs],
3055
            'month': day.month,
3056
            'year': day.year,
3057
            'day': day.day,
3058
        }
3059
3060
3061
class PainTrainComic(GenericNavigableComic):
3062
    """Class to retrieve Pain Train Comics."""
3063
    name = 'paintrain'
3064
    long_name = 'Pain Train Comics'
3065
    url = 'http://paintraincomic.com'
3066
    get_first_comic_link = get_a_navi_navifirst
3067
    get_navi_link = get_link_rel_next
3068
3069
    @classmethod
3070
    def get_comic_info(cls, soup, link):
3071
        """Get information about a particular comics."""
3072
        title = soup.find('h2', class_='post-title').string
3073
        short_url = soup.find('link', rel='shortlink')['href']
3074
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
3075
        num = int(short_url_re.match(short_url).groups()[0])
3076
        imgs = soup.find('div', id='comic').find_all('img')
3077
        alt = imgs[0]['title']
3078
        assert all(i['alt'] == i['title'] == alt for i in imgs)
3079
        date_str = soup.find('span', class_='post-date').string
3080
        day = string_to_date(date_str, "%d/%m/%Y")
3081
        return {
3082
            'short_url': short_url,
3083
            'num': num,
3084
            'img': [i['src'] for i in imgs],
3085
            'month': day.month,
3086
            'year': day.year,
3087
            'day': day.day,
3088
            'alt': alt,
3089
            'title': title,
3090
        }
3091
3092
3093
class MoonBeard(GenericNavigableComic):
3094
    """Class to retrieve MoonBeard comics."""
3095
    # Also on http://squireseses.tumblr.com
3096
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
3097
    name = 'moonbeard'
3098
    long_name = 'Moon Beard'
3099
    url = 'http://moonbeard.com'
3100
    _categories = ('MOONBEARD', )
3101
    get_first_comic_link = get_a_navi_navifirst
3102
    get_navi_link = get_a_navi_navinext
3103
3104
    @classmethod
3105
    def get_comic_info(cls, soup, link):
3106
        """Get information about a particular comics."""
3107
        title = soup.find('h2', class_='post-title').string
3108
        short_url = soup.find('link', rel='shortlink')['href']
3109
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
3110
        num = int(short_url_re.match(short_url).groups()[0])
3111
        imgs = soup.find('div', id='comic').find_all('img')
3112
        alt = imgs[0]['title']
3113
        assert all(i['alt'] == i['title'] == alt for i in imgs)
3114
        date_str = soup.find('span', class_='post-date').string
3115
        day = string_to_date(date_str, "%B %d, %Y")
3116
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
3117
        author = soup.find('span', class_='post-author').string
3118
        return {
3119
            'short_url': short_url,
3120
            'num': num,
3121
            'img': [i['src'] for i in imgs],
3122
            'month': day.month,
3123
            'year': day.year,
3124
            'day': day.day,
3125
            'title': title,
3126
            'tags': tags,
3127
            'alt': alt,
3128
            'author': author,
3129
        }
3130
3131
3132
class SystemComic(GenericNavigableComic):
3133
    """Class to retrieve System Comic."""
3134
    name = 'system'
3135
    long_name = 'System Comic'
3136
    url = 'http://www.systemcomic.com'
3137
    get_navi_link = get_a_rel_next
3138
3139
    @classmethod
3140
    def get_first_comic_link(cls):
3141
        """Get link to first comics."""
3142
        return get_soup_at_url(cls.url).find('li', class_='first').find('a')
3143
3144
    @classmethod
3145
    def get_comic_info(cls, soup, link):
3146
        """Get information about a particular comics."""
3147
        title = soup.find('meta', property='og:title')['content']
3148
        desc = soup.find('meta', property='og:description')['content']
3149
        date_str = soup.find('time')["datetime"]
3150
        day = string_to_date(date_str, "%Y-%m-%d")
3151
        imgs = soup.find('figure').find_all('img')
3152
        return {
3153
            'title': title,
3154
            'description': desc,
3155
            'day': day.day,
3156
            'month': day.month,
3157
            'year': day.year,
3158
            'img': [i['src'] for i in imgs],
3159
        }
3160
3161 View Code Duplication
3162
class LittleLifeLines(GenericNavigableComic):
3163
    """Class to retrieve Little Life Lines comics."""
3164
    # Also on https://little-life-lines.tumblr.com
3165
    name = 'life'
3166
    long_name = 'Little Life Lines'
3167
    url = 'http://www.littlelifelines.com'
3168
    get_url_from_link = join_cls_url_to_href
3169
    get_first_comic_link = simulate_first_link
3170
    first_url = 'http://www.littlelifelines.com/comics/well-done'
3171
3172
    @classmethod
3173
    def get_navi_link(cls, last_soup, next_):
3174
        """Get link to next or previous comic."""
3175
        # prev is next / next is prev
3176
        li = last_soup.find('li', class_='prev' if next_ else 'next')
3177
        return li.find('a') if li else None
3178
3179
    @classmethod
3180
    def get_comic_info(cls, soup, link):
3181
        """Get information about a particular comics."""
3182
        title = soup.find('meta', property='og:title')['content']
3183
        desc = soup.find('meta', property='og:description')['content']
3184
        date_str = soup.find('time', class_='published')['datetime']
3185
        day = string_to_date(date_str, "%Y-%m-%d")
3186
        author = soup.find('a', rel='author').string
3187
        div_content = soup.find('div', class_="body entry-content")
3188
        imgs = div_content.find_all('img')
3189
        imgs = [i for i in imgs if i.get('src') is not None]
3190
        alt = imgs[0]['alt']
3191
        return {
3192
            'title': title,
3193
            'alt': alt,
3194
            'description': desc,
3195
            'author': author,
3196
            'day': day.day,
3197
            'month': day.month,
3198
            'year': day.year,
3199
            'img': [i['src'] for i in imgs],
3200
        }
3201
3202
3203
class GenericWordPressInkblot(GenericNavigableComic):
3204
    """Generic class to retrieve comics using WordPress with Inkblot."""
3205
    get_navi_link = get_link_rel_next
3206
3207
    @classmethod
3208
    def get_first_comic_link(cls):
3209
        """Get link to first comics."""
3210
        return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')
3211
3212
    @classmethod
3213
    def get_comic_info(cls, soup, link):
3214
        """Get information about a particular comics."""
3215
        title = soup.find('meta', property='og:title')['content']
3216
        imgs = soup.find('div', class_='webcomic-image').find_all('img')
3217
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3218
        day = string_to_date(date_str, "%Y-%m-%d")
3219
        return {
3220
            'title': title,
3221
            'day': day.day,
3222
            'month': day.month,
3223
            'year': day.year,
3224
            'img': [i['src'] for i in imgs],
3225
        }
3226
3227
3228
class EverythingsStupid(GenericWordPressInkblot):
3229
    """Class to retrieve Everything's stupid Comics."""
3230
    # Also on http://tapastic.com/series/EverythingsStupid
3231
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
3232
    # Also on http://everythingsstupidcomics.tumblr.com
3233
    name = 'stupid'
3234
    long_name = "Everything's Stupid"
3235
    url = 'http://everythingsstupid.net'
3236
3237
3238
class TheIsmComics(GenericDeletedComic, GenericWordPressInkblot):
3239
    """Class to retrieve The Ism Comics."""
3240
    # Also on https://tapastic.com/series/TheIsm (?)
3241
    name = 'theism'
3242
    long_name = "The Ism"
3243
    url = 'http://www.theism-comics.com'
3244
3245
3246
class WoodenPlankStudios(GenericWordPressInkblot):
3247
    """Class to retrieve Wooden Plank Studios comics."""
3248
    name = 'woodenplank'
3249
    long_name = 'Wooden Plank Studios'
3250
    url = 'http://woodenplankstudios.com'
3251
3252
3253
class ElectricBunnyComic(GenericNavigableComic):
3254
    """Class to retrieve Electric Bunny Comics."""
3255
    # Also on http://electricbunnycomics.tumblr.com
3256
    name = 'bunny'
3257
    long_name = 'Electric Bunny Comic'
3258
    url = 'http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell'
3259
    get_url_from_link = join_cls_url_to_href
3260
3261
    @classmethod
3262
    def get_first_comic_link(cls):
3263
        """Get link to first comics."""
3264
        return get_soup_at_url(cls.url).find('img', alt='First').parent
3265
3266
    @classmethod
3267
    def get_navi_link(cls, last_soup, next_):
3268
        """Get link to next or previous comic."""
3269
        img = last_soup.find('img', alt='Next' if next_ else 'Back')
3270
        return img.parent if img else None
3271
3272
    @classmethod
3273
    def get_comic_info(cls, soup, link):
3274
        """Get information about a particular comics."""
3275
        title = soup.find('meta', property='og:title')['content']
3276
        imgs = soup.find_all('meta', property='og:image')
3277
        return {
3278
            'title': title,
3279
            'img': [i['content'] for i in imgs],
3280
        }
3281
3282
3283
class SheldonComics(GenericNavigableComic):
3284
    """Class to retrieve Sheldon comics."""
3285
    # Also on http://www.gocomics.com/sheldon
3286
    name = 'sheldon'
3287
    long_name = 'Sheldon Comics'
3288
    url = 'http://www.sheldoncomics.com'
3289
3290
    @classmethod
3291
    def get_first_comic_link(cls):
3292
        """Get link to first comics."""
3293
        return get_soup_at_url(cls.url).find("a", id="nav-first")
3294
3295
    @classmethod
3296
    def get_navi_link(cls, last_soup, next_):
3297
        """Get link to next or previous comic."""
3298
        for link in last_soup.find_all("a", id="nav-next" if next_ else "nav-prev"):
3299
            if link['href'] != 'http://www.sheldoncomics.com':
3300
                return link
3301
        return None
3302
3303
    @classmethod
3304
    def get_comic_info(cls, soup, link):
3305
        """Get information about a particular comics."""
3306
        imgs = soup.find("div", id="comic-foot").find_all("img")
3307
        assert all(i['alt'] == i['title'] for i in imgs)
3308
        assert len(imgs) == 1, imgs
3309
        title = imgs[0]['title']
3310
        return {
3311
            'title': title,
3312
            'img': [i['src'] for i in imgs],
3313
        }
3314
3315
3316
class ManVersusManatee(GenericNavigableComic):
3317
    """Class to retrieve Man Versus Manatee comics."""
3318
    url = 'http://manvsmanatee.com'
3319
    name = 'manvsmanatee'
3320
    long_name = 'Man Versus Manatee'
3321
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
3322
    get_navi_link = get_a_comicnavbase_comicnavnext
3323
3324
    @classmethod
3325
    def get_comic_info(cls, soup, link):
3326
        """Get information about a particular comics."""
3327
        title = soup.find('h2', class_='post-title').string
3328
        imgs = soup.find('div', id='comic').find_all('img')
3329
        date_str = soup.find('span', class_='post-date').string
3330
        day = string_to_date(date_str, "%B %d, %Y")
3331
        return {
3332
            'img': [i['src'] for i in imgs],
3333
            'title': title,
3334
            'month': day.month,
3335
            'year': day.year,
3336
            'day': day.day,
3337
        }
3338
3339
3340
class TheMeerkatguy(GenericNavigableComic):
3341
    """Class to retrieve The Meerkatguy comics."""
3342
    long_name = 'The Meerkatguy'
3343
    url = 'http://www.themeerkatguy.com'
3344
    name = 'meerkatguy'
3345
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
3346
    get_navi_link = get_a_comicnavbase_comicnavnext
3347
3348
    @classmethod
3349
    def get_comic_info(cls, soup, link):
3350
        """Get information about a particular comics."""
3351
        title = soup.find('title').string
3352
        imgs = soup.find_all('meta', property='og:image')
3353
        return {
3354
            'img': [i['content'] for i in imgs],
3355
            'title': title,
3356
        }
3357
3358
3359
class Ubertool(GenericNavigableComic):
3360
    """Class to retrieve Ubertool comics."""
3361
    # Also on https://ubertool.tumblr.com
3362
    # Also on https://tapastic.com/series/ubertool
3363
    name = 'ubertool'
3364
    long_name = 'Ubertool'
3365
    url = 'http://ubertoolcomic.com'
3366
    _categories = ('UBERTOOL', )
3367
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
3368
    get_navi_link = get_a_comicnavbase_comicnavnext
3369
3370
    @classmethod
3371
    def get_comic_info(cls, soup, link):
3372
        """Get information about a particular comics."""
3373
        title = soup.find('h2', class_='post-title').string
3374
        date_str = soup.find('span', class_='post-date').string
3375
        day = string_to_date(date_str, "%B %d, %Y")
3376
        imgs = soup.find('div', id='comic').find_all('img')
3377
        return {
3378
            'img': [i['src'] for i in imgs],
3379
            'title': title,
3380
            'month': day.month,
3381
            'year': day.year,
3382
            'day': day.day,
3383
        }
3384
3385
3386
class EarthExplodes(GenericNavigableComic):
3387
    """Class to retrieve The Earth Explodes comics."""
3388
    name = 'earthexplodes'
3389
    long_name = 'The Earth Explodes'
3390
    url = 'http://www.earthexplodes.com'
3391
    get_url_from_link = join_cls_url_to_href
3392
    get_first_comic_link = simulate_first_link
3393
    first_url = 'http://www.earthexplodes.com/comics/000/'
3394
3395
    @classmethod
3396
    def get_navi_link(cls, last_soup, next_):
3397
        """Get link to next or previous comic."""
3398
        return last_soup.find('a', id='next' if next_ else 'prev')
3399
3400
    @classmethod
3401
    def get_comic_info(cls, soup, link):
3402
        """Get information about a particular comics."""
3403
        title = soup.find('title').string
3404
        imgs = soup.find('div', id='image').find_all('img')
3405
        alt = imgs[0].get('title', '')
3406
        return {
3407
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3408
            'title': title,
3409
            'alt': alt,
3410
        }
3411
3412 View Code Duplication
3413
class PomComics(GenericNavigableComic):
3414
    """Class to retrieve PomComics."""
3415
    name = 'pom'
3416
    long_name = 'Pom Comics / Piece of Me'
3417
    url = 'http://www.pomcomic.com'
3418
    get_url_from_link = join_cls_url_to_href
3419
3420
    @classmethod
3421
    def get_first_comic_link(cls):
3422
        """Get link to first comics."""
3423
        return get_soup_at_url(cls.url).find('a', class_='btn-first')
3424
3425
    @classmethod
3426
    def get_navi_link(cls, last_soup, next_):
3427
        """Get link to next or previous comic."""
3428
        return last_soup.find('a', class_='btn-next' if next_ else 'btn-prev')
3429
3430
    @classmethod
3431
    def get_comic_info(cls, soup, link):
3432
        """Get information about a particular comics."""
3433
        title = soup.find('h1').string
3434
        desc = soup.find('meta', property='og:description')['content']
3435
        tags = soup.find('meta', attrs={'name': 'keywords'})['content']
3436
        imgs = soup.find('div', class_='comic').find_all('img')
3437
        return {
3438
            'title': title,
3439
            'desc': desc,
3440
            'tags': tags,
3441
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3442
        }
3443
3444
3445
class CubeDrone(GenericComicNotWorking, GenericNavigableComic):  # Website has changed
3446
    """Class to retrieve Cube Drone comics."""
3447
    name = 'cubedrone'
3448
    long_name = 'Cube Drone'
3449
    url = 'http://cube-drone.com/comics'
3450
    get_url_from_link = join_cls_url_to_href
3451
3452
    @classmethod
3453
    def get_first_comic_link(cls):
3454
        """Get link to first comics."""
3455
        return get_soup_at_url(cls.url).find('span', class_='glyphicon glyphicon-backward').parent
3456
3457
    @classmethod
3458
    def get_navi_link(cls, last_soup, next_):
3459
        """Get link to next or previous comic."""
3460
        class_ = 'glyphicon glyphicon-chevron-' + ('right' if next_ else 'left')
3461
        return last_soup.find('span', class_=class_).parent
3462
3463
    @classmethod
3464
    def get_comic_info(cls, soup, link):
3465
        """Get information about a particular comics."""
3466
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
3467
        url2 = soup.find('meta', attrs={'name': 'twitter:url'})['content']
3468
        # date_str = soup.find('h2', class_='comic_title').find('small').string
3469
        # day = string_to_date(date_str, "%B %d, %Y, %I:%M %p")
3470
        imgs = soup.find_all('img', class_='comic img-responsive')
3471
        title2 = imgs[0]['title']
3472
        alt = imgs[0]['alt']
3473
        return {
3474
            'url2': url2,
3475
            'title': title,
3476
            'title2': title2,
3477
            'alt': alt,
3478
            'img': [i['src'] for i in imgs],
3479
        }
3480
3481
3482
class MakeItStoopid(GenericDeletedComic, GenericNavigableComic):
3483
    """Class to retrieve Make It Stoopid Comics."""
3484
    name = 'stoopid'
3485
    long_name = 'Make it stoopid'
3486
    url = 'http://makeitstoopid.com/comic.php'
3487
3488
    @classmethod
3489
    def get_nav(cls, soup):
3490
        """Get the navigation elements from soup object."""
3491
        cnav = soup.find_all(class_='cnav')
3492
        nav1, nav2 = cnav[:5], cnav[5:]
3493
        assert nav1 == nav2
3494
        # begin, prev, archive, next_, end = nav1
3495
        return [None if i.get('href') is None else i for i in nav1]
3496
3497
    @classmethod
3498
    def get_first_comic_link(cls):
3499
        """Get link to first comics."""
3500
        return cls.get_nav(get_soup_at_url(cls.url))[0]
3501
3502
    @classmethod
3503
    def get_navi_link(cls, last_soup, next_):
3504
        """Get link to next or previous comic."""
3505
        return cls.get_nav(last_soup)[3 if next_ else 1]
3506
3507
    @classmethod
3508
    def get_comic_info(cls, soup, link):
3509
        """Get information about a particular comics."""
3510
        title = link['title']
3511
        imgs = soup.find_all('img', id='comicimg')
3512
        return {
3513
            'title': title,
3514
            'img': [i['src'] for i in imgs],
3515
        }
3516
3517
3518
class OffTheLeashDog(GenericNavigableComic):
3519
    """Class to retrieve Off The Leash Dog comics."""
3520
    # Also on http://rupertfawcettsdoggyblog.tumblr.com
3521
    # Also on http://www.rupertfawcettcartoons.com
3522
    name = 'offtheleash'
3523
    long_name = 'Off The Leash Dog'
3524
    url = 'http://offtheleashdogcartoons.com'
3525
    _categories = ('FAWCETT', )
3526
    get_navi_link = get_a_rel_next
3527
    get_first_comic_link = simulate_first_link
3528
    first_url = 'http://offtheleashdogcartoons.com/uncategorized/can-i-help-you/'
3529
3530
    @classmethod
3531
    def get_comic_info(cls, soup, link):
3532
        """Get information about a particular comics."""
3533
        title = soup.find("h1", class_="entry-title").string
3534
        imgs = soup.find('div', class_='entry-content').find_all('img')
3535
        return {
3536
            'title': title,
3537
            'img': [i['src'] for i in imgs],
3538
        }
3539
3540
3541
class MacadamValley(GenericNavigableComic):
3542
    """Class to retrieve Macadam Valley comics."""
3543
    name = 'macadamvalley'
3544
    long_name = 'Macadam Valley'
3545
    url = 'http://macadamvalley.com'
3546
    get_navi_link = get_a_rel_next
3547
    get_first_comic_link = simulate_first_link
3548
    first_url = 'http://macadamvalley.com/le-debut-de-la-fin/'
3549
3550
    @classmethod
3551
    def get_comic_info(cls, soup, link):
3552
        """Get information about a particular comics."""
3553
        title = soup.find("h1", class_="entry-title").string
3554
        img = soup.find('div', class_='entry-content').find('img')
3555
        date_str = soup.find('time', class_='entry-date')['datetime']
3556
        date_str = date_str[:10]
3557
        day = string_to_date(date_str, "%Y-%m-%d")
3558
        author = soup.find('a', rel='author').string
3559
        return {
3560
            'title': title,
3561
            'img': [i['src'] for i in [img]],
3562
            'day': day.day,
3563
            'month': day.month,
3564
            'year': day.year,
3565
            'author': author,
3566
        }
3567
3568
3569
class MarketoonistComics(GenericNavigableComic):
3570
    """Class to retrieve Marketoonist Comics."""
3571
    name = 'marketoonist'
3572
    long_name = 'Marketoonist'
3573
    url = 'https://marketoonist.com/cartoons'
3574
    get_first_comic_link = simulate_first_link
3575
    get_navi_link = get_link_rel_next
3576
    first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'
3577
3578
    @classmethod
3579
    def get_comic_info(cls, soup, link):
3580
        """Get information about a particular comics."""
3581
        imgs = soup.find_all('meta', property='og:image')
3582
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3583
        day = string_to_date(date_str, "%Y-%m-%d")
3584
        title = soup.find('meta', property='og:title')['content']
3585
        return {
3586
            'img': [i['content'] for i in imgs],
3587
            'day': day.day,
3588
            'month': day.month,
3589
            'year': day.year,
3590
            'title': title,
3591
        }
3592
3593 View Code Duplication
3594
class ConsoliaComics(GenericNavigableComic):
3595
    """Class to retrieve Consolia comics."""
3596
    name = 'consolia'
3597
    long_name = 'consolia'
3598
    url = 'https://consolia-comic.com'
3599
    get_url_from_link = join_cls_url_to_href
3600
3601
    @classmethod
3602
    def get_first_comic_link(cls):
3603
        """Get link to first comics."""
3604
        return get_soup_at_url(cls.url).find('a', class_='first')
3605
3606
    @classmethod
3607
    def get_navi_link(cls, last_soup, next_):
3608
        """Get link to next or previous comic."""
3609
        return last_soup.find('a', class_='next' if next_ else 'prev')
3610
3611
    @classmethod
3612
    def get_comic_info(cls, soup, link):
3613
        """Get information about a particular comics."""
3614
        title = soup.find('meta', property='og:title')['content']
3615
        date_str = soup.find('time')["datetime"]
3616
        day = string_to_date(date_str, "%Y-%m-%d")
3617
        imgs = soup.find_all('meta', property='og:image')
3618
        return {
3619
            'title': title,
3620
            'img': [i['content'] for i in imgs],
3621
            'day': day.day,
3622
            'month': day.month,
3623
            'year': day.year,
3624
        }
3625
3626
3627
class GenericBlogspotComic(GenericNavigableComic):
3628
    """Generic class to retrieve comics from Blogspot."""
3629
    get_first_comic_link = simulate_first_link
3630
    first_url = NotImplemented
3631
    _categories = ('BLOGSPOT', )
3632
3633
    @classmethod
3634
    def get_navi_link(cls, last_soup, next_):
3635
        """Get link to next or previous comic."""
3636
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3637
3638 View Code Duplication
3639
class TuMourrasMoinsBete(GenericBlogspotComic):
3640
    """Class to retrieve Tu Mourras Moins Bete comics."""
3641
    name = 'mourrasmoinsbete'
3642
    long_name = 'Tu Mourras Moins Bete'
3643
    url = 'http://tumourrasmoinsbete.blogspot.fr'
3644
    _categories = ('FRANCAIS', )
3645
    first_url = 'http://tumourrasmoinsbete.blogspot.fr/2008/06/essai.html'
3646
3647
    @classmethod
3648
    def get_comic_info(cls, soup, link):
3649
        """Get information about a particular comics."""
3650
        title = soup.find('title').string
3651
        imgs = soup.find('div', itemprop='description articleBody').find_all('img')
3652
        author = soup.find('span', itemprop='author').string
3653
        return {
3654
            'img': [i['src'] for i in imgs],
3655
            'author': author,
3656
            'title': title,
3657
        }
3658
3659
3660
class Octopuns(GenericBlogspotComic):
3661
    """Class to retrieve Octopuns comics."""
3662
    # Also on http://octopuns.tumblr.com
3663
    name = 'octopuns'
3664
    long_name = 'Octopuns'
3665
    url = 'http://www.octopuns.net'  # or http://octopuns.blogspot.fr/
3666
    first_url = 'http://octopuns.blogspot.com/2010/12/17122010-always-read-label.html'
3667
3668
    @classmethod
3669
    def get_comic_info(cls, soup, link):
3670
        """Get information about a particular comics."""
3671
        title = soup.find('h3', class_='post-title entry-title').string
3672
        date_str = soup.find('h2', class_='date-header').string
3673
        day = string_to_date(date_str, "%A, %B %d, %Y")
3674
        imgs = soup.find_all('link', rel='image_src')
3675
        return {
3676
            'img': [i['href'] for i in imgs],
3677
            'title': title,
3678
            'day': day.day,
3679
            'month': day.month,
3680
            'year': day.year,
3681
        }
3682
3683
3684
class GeekAndPoke(GenericNavigableComic):
3685
    """Class to retrieve Geek And Poke comics."""
3686
    name = 'geek'
3687
    long_name = 'Geek And Poke'
3688
    url = 'http://geek-and-poke.com'
3689
    get_url_from_link = join_cls_url_to_href
3690
    get_first_comic_link = simulate_first_link
3691
    first_url = 'http://geek-and-poke.com/geekandpoke/2006/8/27/a-new-place-for-a-not-so-old-blog.html'
3692
3693
    @classmethod
3694
    def get_navi_link(cls, last_soup, next_):
3695
        """Get link to next or previous comic."""
3696
        return last_soup.find('a', class_='prev-item' if next_ else 'next-item')
3697
3698
    @classmethod
3699
    def get_comic_info(cls, soup, link):
3700
        """Get information about a particular comics."""
3701
        title = soup.find('meta', property='og:title')['content']
3702
        desc = soup.find('meta', property='og:description')
3703
        desc_str = "" if desc is None else desc['content']
3704
        date_str = soup.find('time', class_='published')['datetime']
3705
        day = string_to_date(date_str, "%Y-%m-%d")
3706
        author = soup.find('a', rel='author').string
3707
        div_content = (soup.find('div', class_="body entry-content") or
3708
                       soup.find('div', class_="special-content"))
3709
        imgs = div_content.find_all('img')
3710
        imgs = [i for i in imgs if i.get('src') is not None]
3711
        assert all('title' not in i or i['alt'] == i['title'] for i in imgs)
3712
        alt = imgs[0].get('alt', "") if imgs else []
3713
        return {
3714
            'title': title,
3715
            'alt': alt,
3716
            'description': desc_str,
3717
            'author': author,
3718
            'day': day.day,
3719
            'month': day.month,
3720
            'year': day.year,
3721
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3722
        }
3723
3724 View Code Duplication
3725
class GloryOwlComix(GenericBlogspotComic):
3726
    """Class to retrieve Glory Owl comics."""
3727
    name = 'gloryowl'
3728
    long_name = 'Glory Owl'
3729
    url = 'http://gloryowlcomix.blogspot.fr'
3730
    _categories = ('NSFW', 'FRANCAIS')
3731
    first_url = 'http://gloryowlcomix.blogspot.fr/2013/02/1_7.html'
3732
3733
    @classmethod
3734
    def get_comic_info(cls, soup, link):
3735
        """Get information about a particular comics."""
3736
        title = soup.find('title').string
3737
        imgs = soup.find_all('link', rel='image_src')
3738
        author = soup.find('a', rel='author').string
3739
        return {
3740
            'img': [i['href'] for i in imgs],
3741
            'author': author,
3742
            'title': title,
3743
        }
3744
3745 View Code Duplication
3746
class GenericSquareSpace(GenericNavigableComic):
3747
    """Generic class to retrieve comics using SquareSpace."""
3748
    _categories = ('SQUARESPACE', )
3749
    get_url_from_link = join_cls_url_to_href
3750
    get_first_comic_link = simulate_first_link
3751
3752
    @classmethod
3753
    def get_navi_link(cls, last_soup, next_):
3754
        """Get link to next or previous comic."""
3755
        return last_soup.find('a', id='prevLink' if next_ else 'nextLink')
3756
3757
    @classmethod
3758
    def get_images(cls, soup):
3759
        """Get image URLs for a comic."""
3760
        raise NotImplementedError
3761
3762
    @classmethod
3763
    def get_comic_info(cls, soup, link):
3764
        """Get information about a particular comics."""
3765
        title = soup.find('meta', property='og:title')['content']
3766
        desc = soup.find('meta', property='og:description')['content']
3767
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
3768
        day = string_to_date(date_str, "%Y-%m-%d")
3769
        author = soup.find('a', rel='author').string
3770
        return {
3771
            'title': title,
3772
            'img': cls.get_images(soup),
3773
            'month': day.month,
3774
            'year': day.year,
3775
            'day': day.day,
3776
            'author': author,
3777
            'description': desc,
3778
        }
3779
3780
3781
class AtRandomComics(GenericSquareSpace):
3782
    """Class to retrieve At Random Comics."""
3783
    name = 'atrandom'
3784
    long_name = 'At Random Comics'
3785
    url = 'http://www.atrandomcomics.com'
3786
    first_url = 'http://www.atrandomcomics.com/at-random-comics-home/2015/5/5/can-of-worms'
3787
3788
    @classmethod
3789
    def get_images(cls, soup):
3790
        """Get image URLs for a comic."""
3791
        imgs = soup.find_all('meta', property='og:image')
3792
        return [i['content'] for i in imgs]
3793
3794
3795
class NothingSuspicious(GenericSquareSpace):
3796
    """Class to retrieve Nothing Suspicious comics."""
3797
    name = 'nothingsuspicious'
3798
    long_name = 'Nothing Suspicious'
3799
    url = 'https://nothingsuspicio.us'
3800
    first_url = 'https://nothingsuspicio.us/?offset=1483592400908'
3801
3802
    @classmethod
3803
    def get_images(cls, soup):
3804
        """Get image URLs for a comic."""
3805
        imgs = soup.find('div', class_='content-wrapper').find('img')
3806
        return [i['src'] for i in [imgs]]
3807
3808
3809
class DeathBulge(GenericComic):
3810
    """Class to retrieve the DeathBulge comics."""
3811
    name = 'deathbulge'
3812
    long_name = 'Death Bulge'
3813
    url = 'http://www.deathbulge.com'
3814
3815
    @classmethod
3816
    def get_next_comic(cls, last_comic):
3817
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
3818
        json_url = urljoin_wrapper(cls.url, 'api/comics/1')
3819
        json = load_json_at_url(json_url)
3820
        pagination = json['pagination_links']
3821
        first_num = last_comic['num'] if last_comic else pagination['first']
3822
        last_num = pagination['last']
3823
        for num in range(first_num + 1, last_num):
3824
            json_url = urljoin_wrapper(cls.url, 'api/comics/%d' % num)
3825
            json = load_json_at_url(json_url)
3826
            pagination = json['pagination_links']
3827
            comic_json = json['comic']
3828
            date_str = comic_json['timestamp'][:10]
3829
            day = string_to_date(date_str, "%Y-%m-%d")
3830
            comic_id = comic_json['id']  # not exactly 'num' o_O
3831
            yield {
3832
                'json_url': json_url,
3833
                'num': comic_id,
3834
                'url': urljoin_wrapper(cls.url, 'comics/%d' % num),
3835
                'alt': comic_json['alt_text'],
3836
                'title': comic_json['title'],
3837
                'img': [urljoin_wrapper(cls.url, comic_json['comic'])],
3838
                'month': day.month,
3839
                'year': day.year,
3840
                'day': day.day,
3841
            }
3842
3843
3844
class GenericTumblrV1(GenericComic):
3845
    """Generic class to retrieve comics from Tumblr using the V1 API."""
3846
    _categories = ('TUMBLR', )
3847
3848
    @classmethod
3849
    def get_next_comic(cls, last_comic):
3850
        """Generic implementation of get_next_comic for Tumblr comics."""
3851
        for p in cls.get_posts(last_comic):
3852
            comic = cls.get_comic_info(p)
3853
            if comic is not None:
3854
                yield comic
3855
3856
    @classmethod
3857
    def check_url(cls, url):
3858
        if not url.startswith(cls.url):
3859
            print("url '%s' does not start with '%s'" % (url, cls.url))
3860
        return url
3861
3862
    @classmethod
3863
    def get_url_from_post(cls, post):
3864
        return cls.check_url(post['url'])
3865
3866
    @classmethod
3867
    def get_api_url(cls):
3868
        return urljoin_wrapper(cls.url, '/api/read/')
3869
3870
    @classmethod
3871
    def get_api_url_for_id(cls, tumblr_id):
3872
        return cls.get_api_url() + '?id=%d' % (tumblr_id)
3873
3874
    @classmethod
3875
    def get_comic_info(cls, post):
3876
        """Get information about a particular comics."""
3877
        type_ = post['type']
3878
        if type_ != 'photo':
3879
            return None
3880
        tumblr_id = int(post['id'])
3881
        api_url = cls.get_api_url_for_id(tumblr_id)
3882
        day = datetime.datetime.fromtimestamp(int(post['unix-timestamp'])).date()
3883
        caption = post.find('photo-caption')
3884
        title = caption.string if caption else ""
3885
        tags = ' '.join(t.string for t in post.find_all('tag'))
3886
        # Photos may appear in 'photo' tags and/or straight in the post
3887
        photo_tags = post.find_all('photo')
3888
        if not photo_tags:
3889
            photo_tags = [post]
3890
        # Images are in multiple resolutions - taking the first one
3891
        imgs = [photo.find('photo-url') for photo in photo_tags]
3892
        return {
3893
            'url': cls.get_url_from_post(post),
3894
            'url2': post['url-with-slug'],
3895
            'day': day.day,
3896
            'month': day.month,
3897
            'year': day.year,
3898
            'title': title,
3899
            'tags': tags,
3900
            'img': [i.string for i in imgs],
3901
            'tumblr-id': tumblr_id,
3902
            'api_url': api_url,
3903
        }
3904
3905
    @classmethod
3906
    def get_posts(cls, last_comic, nb_post_per_call=10):
3907
        """Get posts using API. nb_post_per_call is max 50.
3908
3909
        Posts are retrieved from newer to older as per the tumblr v1 api
3910
        but are returned in chronological order."""
3911
        waiting_for_id = last_comic['tumblr-id'] if last_comic else None
3912
        posts_acc = []
3913
        if last_comic is not None:
3914
            # cls.check_url(last_comic['url'])
3915
            cls.check_url(last_comic['api_url'])
3916
            # Sometimes, tumblr posts are deleted. When previous post is deleted, we
3917
            # might end up spending a lot of time looking for something that
3918
            # doesn't exist. Failing early and clearly might be a better option.
3919
            last_api_url = cls.get_api_url_for_id(waiting_for_id)
3920
            try:
3921
                get_soup_at_url(last_api_url)
3922
            except urllib.error.HTTPError:
3923
                try:
3924
                    get_soup_at_url(cls.url)
3925
                except urllib.error.HTTPError:
3926
                    print("Did not find previous post nor main url %s" % cls.url)
3927
                else:
3928
                    print("Did not find previous post %s : it might have been deleted" % last_api_url)
3929
                return reversed(posts_acc)
3930
        api_url = cls.get_api_url()
3931
        posts = get_soup_at_url(api_url).find('posts')
3932
        start, total = int(posts['start']), int(posts['total'])
3933
        assert start == 0
3934
        for starting_num in range(0, total, nb_post_per_call):
3935
            api_url2 = api_url + '?start=%d&num=%d' % (starting_num, nb_post_per_call)
3936
            posts2 = get_soup_at_url(api_url2).find('posts')
3937
            start2, total2 = int(posts2['start']), int(posts2['total'])
3938
            assert starting_num == start2, "%d != %d" % (starting_num, start2)
3939
            # This may happen and should be handled in the future
3940
            assert total == total2, "%d != %d" % (total, total2)
3941
            for p in posts2.find_all('post'):
3942
                tumblr_id = int(p['id'])
3943
                if waiting_for_id and waiting_for_id == tumblr_id:
3944
                    return reversed(posts_acc)
3945
                posts_acc.append(p)
3946
        if waiting_for_id is None:
3947
            return reversed(posts_acc)
3948
        print("Did not find %s : there might be a problem" % waiting_for_id)
3949
        return []
3950
3951
3952
class SaturdayMorningBreakfastCerealTumblr(GenericTumblrV1):
3953
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
3954
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
3955
    # Also on http://www.smbc-comics.com
3956
    name = 'smbc-tumblr'
3957
    long_name = 'Saturday Morning Breakfast Cereal (from Tumblr)'
3958
    url = 'http://smbc-comics.tumblr.com'
3959
    _categories = ('SMBC', )
3960
3961
3962
class AHammADay(GenericTumblrV1):
3963
    """Class to retrieve class A Hamm A Day comics."""
3964
    name = 'hamm'
3965
    long_name = 'A Hamm A Day'
3966
    url = 'http://www.ahammaday.com'
3967
3968
3969
class IrwinCardozo(GenericTumblrV1):
3970
    """Class to retrieve Irwin Cardozo Comics."""
3971
    name = 'irwinc'
3972
    long_name = 'Irwin Cardozo'
3973
    url = 'http://irwincardozocomics.tumblr.com'
3974
3975
3976
class AccordingToDevin(GenericTumblrV1):
3977
    """Class to retrieve According To Devin comics."""
3978
    name = 'devin'
3979
    long_name = 'According To Devin'
3980
    url = 'http://accordingtodevin.tumblr.com'
3981
3982
3983
class ItsTheTieTumblr(GenericTumblrV1):
3984
    """Class to retrieve It's the tie comics."""
3985
    # Also on http://itsthetie.com
3986
    # Also on https://tapastic.com/series/itsthetie
3987
    name = 'tie-tumblr'
3988
    long_name = "It's the tie (from Tumblr)"
3989
    url = "http://itsthetie.tumblr.com"
3990
    _categories = ('TIE', )
3991
3992
3993
class OctopunsTumblr(GenericTumblrV1):
3994
    """Class to retrieve Octopuns comics."""
3995
    # Also on http://www.octopuns.net
3996
    name = 'octopuns-tumblr'
3997
    long_name = 'Octopuns (from Tumblr)'
3998
    url = 'http://octopuns.tumblr.com'
3999
4000
4001
class PicturesInBoxesTumblr(GenericTumblrV1):
4002
    """Class to retrieve Pictures In Boxes comics."""
4003
    # Also on http://www.picturesinboxes.com
4004
    name = 'picturesinboxes-tumblr'
4005
    long_name = 'Pictures in Boxes (from Tumblr)'
4006
    url = 'https://picturesinboxescomic.tumblr.com'
4007
4008
4009
class TubeyToonsTumblr(GenericTumblrV1):
4010
    """Class to retrieve TubeyToons comics."""
4011
    # Also on http://tapastic.com/series/Tubey-Toons
4012
    # Also on http://tubeytoons.com
4013
    name = 'tubeytoons-tumblr'
4014
    long_name = 'Tubey Toons (from Tumblr)'
4015
    url = 'https://tubeytoons.tumblr.com'
4016
    _categories = ('TUNEYTOONS', )
4017
4018
4019
class UnearthedComicsTumblr(GenericTumblrV1):
4020
    """Class to retrieve Unearthed comics."""
4021
    # Also on http://tapastic.com/series/UnearthedComics
4022
    # Also on http://unearthedcomics.com
4023
    name = 'unearthed-tumblr'
4024
    long_name = 'Unearthed Comics (from Tumblr)'
4025
    url = 'https://unearthedcomics.tumblr.com'
4026
    _categories = ('UNEARTHED', )
4027
4028
4029
class PieComic(GenericTumblrV1):
4030
    """Class to retrieve Pie Comic comics."""
4031
    name = 'pie'
4032
    long_name = 'Pie Comic'
4033
    url = "http://piecomic.tumblr.com"
4034
4035
4036
class MrEthanDiamond(GenericTumblrV1):
4037
    """Class to retrieve Mr Ethan Diamond comics."""
4038
    name = 'diamond'
4039
    long_name = 'Mr Ethan Diamond'
4040
    url = 'http://mrethandiamond.tumblr.com'
4041
4042
4043
class Flocci(GenericTumblrV1):
4044
    """Class to retrieve floccinaucinihilipilification comics."""
4045
    name = 'flocci'
4046
    long_name = 'floccinaucinihilipilification'
4047
    url = "http://floccinaucinihilipilificationa.tumblr.com"
4048
4049
4050
class UpAndOut(GenericTumblrV1):
4051
    """Class to retrieve Up & Out comics."""
4052
    # Also on http://tapastic.com/series/UP-and-OUT
4053
    name = 'upandout'
4054
    long_name = 'Up And Out (from Tumblr)'
4055
    url = 'http://upandoutcomic.tumblr.com'
4056
4057
4058
class Pundemonium(GenericTumblrV1):
4059
    """Class to retrieve Pundemonium comics."""
4060
    name = 'pundemonium'
4061
    long_name = 'Pundemonium'
4062
    url = 'http://monstika.tumblr.com'
4063
4064
4065
class PoorlyDrawnLinesTumblr(GenericTumblrV1):
4066
    """Class to retrieve Poorly Drawn Lines comics."""
4067
    # Also on http://poorlydrawnlines.com
4068
    name = 'poorlydrawn-tumblr'
4069
    long_name = 'Poorly Drawn Lines (from Tumblr)'
4070
    url = 'http://pdlcomics.tumblr.com'
4071
    _categories = ('POORLYDRAWN', )
4072
4073
4074
class PearShapedComics(GenericTumblrV1):
4075
    """Class to retrieve Pear Shaped Comics."""
4076
    name = 'pearshaped'
4077
    long_name = 'Pear-Shaped Comics'
4078
    url = 'http://pearshapedcomics.com'
4079
4080
4081
class PondScumComics(GenericTumblrV1):
4082
    """Class to retrieve Pond Scum Comics."""
4083
    name = 'pond'
4084
    long_name = 'Pond Scum'
4085
    url = 'http://pondscumcomic.tumblr.com'
4086
4087
4088
class MercworksTumblr(GenericTumblrV1):
4089
    """Class to retrieve Mercworks comics."""
4090
    # Also on http://mercworks.net
4091
    # Also on http://www.webtoons.com/en/comedy/mercworks/list?title_no=426
4092
    # Also on https://tapastic.com/series/MercWorks
4093
    name = 'mercworks-tumblr'
4094
    long_name = 'Mercworks (from Tumblr)'
4095
    url = 'http://mercworks.tumblr.com'
4096
    _categories = ('MERCWORKS', )
4097
4098
4099
class OwlTurdTumblr(GenericTumblrV1):
4100
    """Class to retrieve Owl Turd / Shen comix."""
4101
    # Also on https://tapas.io/series/Shen-Comix
4102
    name = 'owlturd-tumblr'
4103
    long_name = 'Owl Turd / Shen Comix (from Tumblr)'
4104
    url = 'http://shencomix.com'
4105
    _categories = ('OWLTURD', 'SHENCOMIX')
4106
4107
4108
class VectorBelly(GenericTumblrV1):
4109
    """Class to retrieve Vector Belly comics."""
4110
    # Also on http://vectorbelly.com
4111
    name = 'vector'
4112
    long_name = 'Vector Belly'
4113
    url = 'http://vectorbelly.tumblr.com'
4114
4115
4116
class GoneIntoRapture(GenericTumblrV1):
4117
    """Class to retrieve Gone Into Rapture comics."""
4118
    # Also on http://goneintorapture.tumblr.com
4119
    # Also on http://tapastic.com/series/Goneintorapture
4120
    name = 'rapture'
4121
    long_name = 'Gone Into Rapture'
4122
    url = 'http://goneintorapture.com'
4123
4124
4125
class TheOatmealTumblr(GenericTumblrV1):
4126
    """Class to retrieve The Oatmeal comics."""
4127
    # Also on http://theoatmeal.com
4128
    name = 'oatmeal-tumblr'
4129
    long_name = 'The Oatmeal (from Tumblr)'
4130
    url = 'http://oatmeal.tumblr.com'
4131
4132
4133
class HeckIfIKnowComicsTumblr(GenericTumblrV1):
4134
    """Class to retrieve Heck If I Know Comics."""
4135
    # Also on http://tapastic.com/series/Regular
4136
    name = 'heck-tumblr'
4137
    long_name = 'Heck if I Know comics (from Tumblr)'
4138
    url = 'http://heckifiknowcomics.com'
4139
4140
4141
class MyJetPack(GenericTumblrV1):
4142
    """Class to retrieve My Jet Pack comics."""
4143
    name = 'jetpack'
4144
    long_name = 'My Jet Pack'
4145
    url = 'http://myjetpack.tumblr.com'
4146
4147
4148
class CheerUpEmoKidTumblr(GenericTumblrV1):
4149
    """Class to retrieve CheerUpEmoKid comics."""
4150
    # Also on http://www.cheerupemokid.com
4151
    # Also on http://tapastic.com/series/CUEK
4152
    name = 'cuek-tumblr'
4153
    long_name = 'Cheer Up Emo Kid (from Tumblr)'
4154
    url = 'https://enzocomics.tumblr.com'
4155
4156
4157
class ForLackOfABetterComic(GenericTumblrV1):
4158
    """Class to retrieve For Lack Of A Better Comics."""
4159
    # Also on http://forlackofabettercomic.com
4160
    name = 'lack'
4161
    long_name = 'For Lack Of A Better Comic'
4162
    url = 'http://forlackofabettercomic.tumblr.com'
4163
4164
4165
class ZenPencilsTumblr(GenericTumblrV1):
4166
    """Class to retrieve ZenPencils comics."""
4167
    # Also on http://zenpencils.com
4168
    # Also on http://www.gocomics.com/zen-pencils
4169
    name = 'zenpencils-tumblr'
4170
    long_name = 'Zen Pencils (from Tumblr)'
4171
    url = 'http://zenpencils.tumblr.com'
4172
    _categories = ('ZENPENCILS', )
4173
4174
4175
class ThreeWordPhraseTumblr(GenericTumblrV1):
4176
    """Class to retrieve Three Word Phrase comics."""
4177
    # Also on http://threewordphrase.com
4178
    name = 'threeword-tumblr'
4179
    long_name = 'Three Word Phrase (from Tumblr)'
4180
    url = 'http://threewordphrase.tumblr.com'
4181
4182
4183
class TimeTrabbleTumblr(GenericTumblrV1):
4184
    """Class to retrieve Time Trabble comics."""
4185
    # Also on http://timetrabble.com
4186
    name = 'timetrabble-tumblr'
4187
    long_name = 'Time Trabble (from Tumblr)'
4188
    url = 'http://timetrabble.tumblr.com'
4189
4190
4191
class SafelyEndangeredTumblr(GenericTumblrV1):
4192
    """Class to retrieve Safely Endangered comics."""
4193
    # Also on http://www.safelyendangered.com
4194
    name = 'endangered-tumblr'
4195
    long_name = 'Safely Endangered (from Tumblr)'
4196
    url = 'http://tumblr.safelyendangered.com'
4197
4198
4199
class MouseBearComedyTumblr(GenericTumblrV1):
4200
    """Class to retrieve Mouse Bear Comedy comics."""
4201
    # Also on http://www.mousebearcomedy.com
4202
    name = 'mousebear-tumblr'
4203
    long_name = 'Mouse Bear Comedy (from Tumblr)'
4204
    url = 'http://mousebearcomedy.tumblr.com'
4205
4206
4207
class BouletCorpTumblr(GenericTumblrV1):
4208
    """Class to retrieve BouletCorp comics."""
4209
    # Also on http://www.bouletcorp.com
4210
    name = 'boulet-tumblr'
4211
    long_name = 'Boulet Corp (from Tumblr)'
4212
    url = 'https://bouletcorp.tumblr.com'
4213
    _categories = ('BOULET', )
4214
4215
4216
class TheAwkwardYetiTumblr(GenericTumblrV1):
4217
    """Class to retrieve The Awkward Yeti comics."""
4218
    # Also on http://www.gocomics.com/the-awkward-yeti
4219
    # Also on http://theawkwardyeti.com
4220
    # Also on https://tapastic.com/series/TheAwkwardYeti
4221
    name = 'yeti-tumblr'
4222
    long_name = 'The Awkward Yeti (from Tumblr)'
4223
    url = 'http://larstheyeti.tumblr.com'
4224
    _categories = ('YETI', )
4225
4226
4227
class NellucNhoj(GenericTumblrV1):
4228
    """Class to retrieve NellucNhoj comics."""
4229
    name = 'nhoj'
4230
    long_name = 'Nelluc Nhoj'
4231
    url = 'http://nellucnhoj.com'
4232
4233
4234
class DownTheUpwardSpiralTumblr(GenericTumblrV1):
4235
    """Class to retrieve Down The Upward Spiral comics."""
4236
    # Also on http://www.downtheupwardspiral.com
4237
    # Also on https://tapastic.com/series/Down-the-Upward-Spiral
4238
    name = 'spiral-tumblr'
4239
    long_name = 'Down the Upward Spiral (from Tumblr)'
4240
    url = 'http://downtheupwardspiral.tumblr.com'
4241
4242
4243
class AsPerUsualTumblr(GenericTumblrV1):
4244
    """Class to retrieve As Per Usual comics."""
4245
    # Also on https://tapastic.com/series/AsPerUsual
4246
    name = 'usual-tumblr'
4247
    long_name = 'As Per Usual (from Tumblr)'
4248
    url = 'http://as-per-usual.tumblr.com'
4249
    categories = ('DAMILEE', )
4250
4251
4252
class HotComicsForCoolPeopleTumblr(GenericTumblrV1):
4253
    """Class to retrieve Hot Comics For Cool People."""
4254
    # Also on https://tapastic.com/series/Hot-Comics-For-Cool-People
4255
    # Also on http://hotcomics.biz (links to tumblr)
4256
    # Also on http://hcfcp.com (links to tumblr)
4257
    name = 'hotcomics-tumblr'
4258
    long_name = 'Hot Comics For Cool People (from Tumblr)'
4259
    url = 'http://hotcomicsforcoolpeople.tumblr.com'
4260
    categories = ('DAMILEE', )
4261
4262
4263
class OneOneOneOneComicTumblr(GenericTumblrV1):
4264
    """Class to retrieve 1111 Comics."""
4265
    # Also on http://www.1111comics.me
4266
    # Also on https://tapastic.com/series/1111-Comics
4267
    name = '1111-tumblr'
4268
    long_name = '1111 Comics (from Tumblr)'
4269
    url = 'http://comics1111.tumblr.com'
4270
    _categories = ('ONEONEONEONE', )
4271
4272
4273
class JhallComicsTumblr(GenericTumblrV1):
4274
    """Class to retrieve Jhall Comics."""
4275
    # Also on http://jhallcomics.com
4276
    name = 'jhall-tumblr'
4277
    long_name = 'Jhall Comics (from Tumblr)'
4278
    url = 'http://jhallcomics.tumblr.com'
4279
4280
4281
class BerkeleyMewsTumblr(GenericTumblrV1):
4282
    """Class to retrieve Berkeley Mews comics."""
4283
    # Also on http://www.gocomics.com/berkeley-mews
4284
    # Also on http://www.berkeleymews.com
4285
    name = 'berkeley-tumblr'
4286
    long_name = 'Berkeley Mews (from Tumblr)'
4287
    url = 'http://mews.tumblr.com'
4288
    _categories = ('BERKELEY', )
4289
4290
4291
class JoanCornellaTumblr(GenericTumblrV1):
4292
    """Class to retrieve Joan Cornella comics."""
4293
    # Also on http://joancornella.net
4294
    name = 'cornella-tumblr'
4295
    long_name = 'Joan Cornella (from Tumblr)'
4296
    url = 'http://cornellajoan.tumblr.com'
4297
4298
4299
class RespawnComicTumblr(GenericTumblrV1):
4300
    """Class to retrieve Respawn Comic."""
4301
    # Also on http://respawncomic.com
4302
    name = 'respawn-tumblr'
4303
    long_name = 'Respawn Comic (from Tumblr)'
4304
    url = 'https://respawncomic.tumblr.com'
4305
4306
4307
class ChrisHallbeckTumblr(GenericTumblrV1):
4308
    """Class to retrieve Chris Hallbeck comics."""
4309
    # Also on https://tapastic.com/ChrisHallbeck
4310
    # Also on http://maximumble.com
4311
    # Also on http://minimumble.com
4312
    # Also on http://thebookofbiff.com
4313
    name = 'hallbeck-tumblr'
4314
    long_name = 'Chris Hallback (from Tumblr)'
4315
    url = 'https://chrishallbeck.tumblr.com'
4316
    _categories = ('HALLBACK', )
4317
4318
4319
class ComicNuggets(GenericTumblrV1):
4320
    """Class to retrieve Comic Nuggets."""
4321
    name = 'nuggets'
4322
    long_name = 'Comic Nuggets'
4323
    url = 'http://comicnuggets.com'
4324
4325
4326
class PigeonGazetteTumblr(GenericTumblrV1):
4327
    """Class to retrieve The Pigeon Gazette comics."""
4328
    # Also on https://tapastic.com/series/The-Pigeon-Gazette
4329
    name = 'pigeon-tumblr'
4330
    long_name = 'The Pigeon Gazette (from Tumblr)'
4331
    url = 'http://thepigeongazette.tumblr.com'
4332
4333
4334
class CancerOwl(GenericTumblrV1):
4335
    """Class to retrieve Cancer Owl comics."""
4336
    # Also on http://cancerowl.com
4337
    name = 'cancerowl-tumblr'
4338
    long_name = 'Cancer Owl (from Tumblr)'
4339
    url = 'http://cancerowl.tumblr.com'
4340
4341
4342
class FowlLanguageTumblr(GenericTumblrV1):
4343
    """Class to retrieve Fowl Language comics."""
4344
    # Also on http://www.fowllanguagecomics.com
4345
    # Also on http://tapastic.com/series/Fowl-Language-Comics
4346
    # Also on http://www.gocomics.com/fowl-language
4347
    name = 'fowllanguage-tumblr'
4348
    long_name = 'Fowl Language Comics (from Tumblr)'
4349
    url = 'http://fowllanguagecomics.tumblr.com'
4350
    _categories = ('FOWLLANGUAGE', )
4351
4352
4353
class TheOdd1sOutTumblr(GenericTumblrV1):
4354
    """Class to retrieve The Odd 1s Out comics."""
4355
    # Also on http://theodd1sout.com
4356
    # Also on https://tapastic.com/series/Theodd1sout
4357
    name = 'theodd-tumblr'
4358
    long_name = 'The Odd 1s Out (from Tumblr)'
4359
    url = 'http://theodd1sout.tumblr.com'
4360
4361
4362
class TheUnderfoldTumblr(GenericTumblrV1):
4363
    """Class to retrieve The Underfold comics."""
4364
    # Also on http://theunderfold.com
4365
    name = 'underfold-tumblr'
4366
    long_name = 'The Underfold (from Tumblr)'
4367
    url = 'http://theunderfold.tumblr.com'
4368
4369
4370
class LolNeinTumblr(GenericTumblrV1):
4371
    """Class to retrieve Lol Nein comics."""
4372
    # Also on http://lolnein.com
4373
    name = 'lolnein-tumblr'
4374
    long_name = 'Lol Nein (from Tumblr)'
4375
    url = 'http://lolneincom.tumblr.com'
4376
4377
4378
class FatAwesomeComicsTumblr(GenericTumblrV1):
4379
    """Class to retrieve Fat Awesome Comics."""
4380
    # Also on http://fatawesome.com/comics
4381
    name = 'fatawesome-tumblr'
4382
    long_name = 'Fat Awesome (from Tumblr)'
4383
    url = 'http://fatawesomecomedy.tumblr.com'
4384
4385
4386
class TheWorldIsFlatTumblr(GenericTumblrV1):
4387
    """Class to retrieve The World Is Flat Comics."""
4388
    # Also on https://tapastic.com/series/The-World-is-Flat
4389
    name = 'flatworld-tumblr'
4390
    long_name = 'The World Is Flat (from Tumblr)'
4391
    url = 'http://theworldisflatcomics.com'
4392
4393
4394
class DorrisMc(GenericTumblrV1):
4395
    """Class to retrieve Dorris Mc Comics"""
4396
    # Also on http://www.gocomics.com/dorris-mccomics
4397
    name = 'dorrismc'
4398
    long_name = 'Dorris Mc'
4399
    url = 'http://dorrismccomics.com'
4400
4401
4402
class LeleozTumblr(GenericDeletedComic, GenericTumblrV1):
4403
    """Class to retrieve Leleoz comics."""
4404
    # Also on https://tapastic.com/series/Leleoz
4405
    name = 'leleoz-tumblr'
4406
    long_name = 'Leleoz (from Tumblr)'
4407
    url = 'http://leleozcomics.tumblr.com'
4408
4409
4410
class MoonBeardTumblr(GenericTumblrV1):
4411
    """Class to retrieve MoonBeard comics."""
4412
    # Also on http://moonbeard.com
4413
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
4414
    name = 'moonbeard-tumblr'
4415
    long_name = 'Moon Beard (from Tumblr)'
4416
    url = 'http://squireseses.tumblr.com'
4417
    _categories = ('MOONBEARD', )
4418
4419
4420
class AComik(GenericTumblrV1):
4421
    """Class to retrieve A Comik"""
4422
    name = 'comik'
4423
    long_name = 'A Comik'
4424
    url = 'http://acomik.com'
4425
4426
4427
class ClassicRandy(GenericTumblrV1):
4428
    """Class to retrieve Classic Randy comics."""
4429
    name = 'randy'
4430
    long_name = 'Classic Randy'
4431
    url = 'http://classicrandy.tumblr.com'
4432
4433
4434
class DagssonTumblr(GenericTumblrV1):
4435
    """Class to retrieve Dagsson comics."""
4436
    # Also on http://www.dagsson.com
4437
    name = 'dagsson-tumblr'
4438
    long_name = 'Dagsson Hugleikur (from Tumblr)'
4439
    url = 'https://hugleikurdagsson.tumblr.com'
4440
4441
4442
class LinsEditionsTumblr(GenericTumblrV1):
4443
    """Class to retrieve L.I.N.S. Editions comics."""
4444
    # Also on https://linsedition.com
4445
    # Now on http://warandpeas.tumblr.com
4446
    name = 'lins-tumblr'
4447
    long_name = 'L.I.N.S. Editions (from Tumblr)'
4448
    url = 'https://linscomics.tumblr.com'
4449
    _categories = ('WARANDPEAS', 'LINS')
4450
4451
4452
class WarAndPeasTumblr(GenericTumblrV1):
4453
    """Class to retrieve War And Peas comics."""
4454
    # Was on https://linscomics.tumblr.com
4455
    name = 'warandpeas-tumblr'
4456
    long_name = 'War And Peas (from Tumblr)'
4457
    url = 'http://warandpeas.tumblr.com'
4458
    _categories = ('WARANDPEAS', 'LINS')
4459
4460
4461
class OrigamiHotDish(GenericTumblrV1):
4462
    """Class to retrieve Origami Hot Dish comics."""
4463
    name = 'origamihotdish'
4464
    long_name = 'Origami Hot Dish'
4465
    url = 'http://origamihotdish.com'
4466
4467
4468
class HitAndMissComicsTumblr(GenericTumblrV1):
4469
    """Class to retrieve Hit and Miss Comics."""
4470
    name = 'hitandmiss'
4471
    long_name = 'Hit and Miss Comics'
4472
    url = 'https://hitandmisscomics.tumblr.com'
4473
4474
4475
class HMBlanc(GenericTumblrV1):
4476
    """Class to retrieve HM Blanc comics."""
4477
    name = 'hmblanc'
4478
    long_name = 'HM Blanc'
4479
    url = 'http://hmblanc.tumblr.com'
4480
4481
4482
class TalesOfAbsurdityTumblr(GenericTumblrV1):
4483
    """Class to retrieve Tales Of Absurdity comics."""
4484
    # Also on http://talesofabsurdity.com
4485
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
4486
    name = 'absurdity-tumblr'
4487
    long_name = 'Tales of Absurdity (from Tumblr)'
4488
    url = 'http://talesofabsurdity.tumblr.com'
4489
    _categories = ('ABSURDITY', )
4490
4491
4492
class RobbieAndBobby(GenericTumblrV1):
4493
    """Class to retrieve Robbie And Bobby comics."""
4494
    # Also on http://robbieandbobby.com
4495
    name = 'robbie-tumblr'
4496
    long_name = 'Robbie And Bobby (from Tumblr)'
4497
    url = 'http://robbieandbobby.tumblr.com'
4498
4499
4500
class ElectricBunnyComicTumblr(GenericTumblrV1):
4501
    """Class to retrieve Electric Bunny Comics."""
4502
    # Also on http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell
4503
    name = 'bunny-tumblr'
4504
    long_name = 'Electric Bunny Comic (from Tumblr)'
4505
    url = 'http://electricbunnycomics.tumblr.com'
4506
4507
4508
class Hoomph(GenericTumblrV1):
4509
    """Class to retrieve Hoomph comics."""
4510
    name = 'hoomph'
4511
    long_name = 'Hoomph'
4512
    url = 'http://hoom.ph'
4513
4514
4515
class BFGFSTumblr(GenericTumblrV1):
4516
    """Class to retrieve BFGFS comics."""
4517
    # Also on https://tapastic.com/series/BFGFS
4518
    # Also on http://bfgfs.com
4519
    name = 'bfgfs-tumblr'
4520
    long_name = 'BFGFS (from Tumblr)'
4521
    url = 'https://bfgfs.tumblr.com'
4522
4523
4524
class DoodleForFood(GenericTumblrV1):
4525
    """Class to retrieve Doodle For Food comics."""
4526
    # Also on https://tapastic.com/series/Doodle-for-Food
4527
    name = 'doodle'
4528
    long_name = 'Doodle For Food'
4529
    url = 'http://www.doodleforfood.com'
4530
4531
4532
class CassandraCalinTumblr(GenericTumblrV1):
4533
    """Class to retrieve C. Cassandra comics."""
4534
    # Also on http://cassandracalin.com
4535
    # Also on https://tapastic.com/series/C-Cassandra-comics
4536
    name = 'cassandra-tumblr'
4537
    long_name = 'Cassandra Calin (from Tumblr)'
4538
    url = 'http://c-cassandra.tumblr.com'
4539
4540
4541
class DougWasTaken(GenericTumblrV1):
4542
    """Class to retrieve Doug Was Taken comics."""
4543
    name = 'doug'
4544
    long_name = 'Doug Was Taken'
4545
    url = 'https://dougwastaken.tumblr.com'
4546
4547
4548
class MandatoryRollerCoaster(GenericTumblrV1):
4549
    """Class to retrieve Mandatory Roller Coaster comics."""
4550
    name = 'rollercoaster'
4551
    long_name = 'Mandatory Roller Coaster'
4552
    url = 'http://mandatoryrollercoaster.com'
4553
4554
4555
class CEstPasEnRegardantSesPompes(GenericTumblrV1):
4556
    """Class to retrieve C'Est Pas En Regardant Ses Pompes (...)  comics."""
4557
    name = 'cperspqccltt'
4558
    long_name = 'C Est Pas En Regardant Ses Pompes (...)'
4559
    url = 'http://marcoandco.tumblr.com'
4560
4561
4562
class TheGrohlTroll(GenericTumblrV1):
4563
    """Class to retrieve The Grohl Troll comics."""
4564
    name = 'grohltroll'
4565
    long_name = 'The Grohl Troll'
4566
    url = 'http://thegrohltroll.com'
4567
4568
4569
class WebcomicName(GenericTumblrV1):
4570
    """Class to retrieve Webcomic Name comics."""
4571
    name = 'webcomicname'
4572
    long_name = 'Webcomic Name'
4573
    url = 'http://webcomicname.com'
4574
4575
4576
class BooksOfAdam(GenericTumblrV1):
4577
    """Class to retrieve Books of Adam comics."""
4578
    # Also on http://www.booksofadam.com
4579
    name = 'booksofadam'
4580
    long_name = 'Books of Adam'
4581
    url = 'http://booksofadam.tumblr.com'
4582
4583
4584
class HarkAVagrant(GenericTumblrV1):
4585
    """Class to retrieve Hark A Vagrant comics."""
4586
    # Also on http://www.harkavagrant.com
4587
    name = 'hark-tumblr'
4588
    long_name = 'Hark A Vagrant (from Tumblr)'
4589
    url = 'http://beatonna.tumblr.com'
4590
4591
4592
class OurSuperAdventureTumblr(GenericTumblrV1):
4593
    """Class to retrieve Our Super Adventure comics."""
4594
    # Also on https://tapastic.com/series/Our-Super-Adventure
4595
    # Also on http://www.oursuperadventure.com
4596
    # http://sarahgraley.com
4597
    name = 'superadventure-tumblr'
4598
    long_name = 'Our Super Adventure (from Tumblr)'
4599
    url = 'http://sarahssketchbook.tumblr.com'
4600
4601
4602
class JakeLikesOnions(GenericTumblrV1):
4603
    """Class to retrieve Jake Likes Onions comics."""
4604
    name = 'jake'
4605
    long_name = 'Jake Likes Onions'
4606
    url = 'http://jakelikesonions.com'
4607
4608
4609
class InYourFaceCakeTumblr(GenericTumblrV1):
4610
    """Class to retrieve In Your Face Cake comics."""
4611
    # Also on https://tapas.io/series/In-Your-Face-Cake
4612
    name = 'inyourfacecake-tumblr'
4613
    long_name = 'In Your Face Cake (from Tumblr)'
4614
    url = 'https://in-your-face-cake.tumblr.com'
4615
    _categories = ('INYOURFACECAKE', )
4616
4617
4618
class Robospunk(GenericTumblrV1):
4619
    """Class to retrieve Robospunk comics."""
4620
    name = 'robospunk'
4621
    long_name = 'Robospunk'
4622
    url = 'http://robospunk.com'
4623
4624
4625
class BananaTwinky(GenericTumblrV1):
4626
    """Class to retrieve Banana Twinky comics."""
4627
    name = 'banana'
4628
    long_name = 'Banana Twinky'
4629
    url = 'https://bananatwinky.tumblr.com'
4630
4631
4632
class YesterdaysPopcornTumblr(GenericTumblrV1):
4633
    """Class to retrieve Yesterday's Popcorn comics."""
4634
    # Also on http://www.yesterdayspopcorn.com
4635
    # Also on https://tapastic.com/series/Yesterdays-Popcorn
4636
    name = 'popcorn-tumblr'
4637
    long_name = 'Yesterday\'s Popcorn (from Tumblr)'
4638
    url = 'http://yesterdayspopcorn.tumblr.com'
4639
4640
4641
class TwistedDoodles(GenericTumblrV1):
4642
    """Class to retrieve Twisted Doodles comics."""
4643
    name = 'twisted'
4644
    long_name = 'Twisted Doodles'
4645
    url = 'http://www.twisteddoodles.com'
4646
4647
4648
class UbertoolTumblr(GenericTumblrV1):
4649
    """Class to retrieve Ubertool comics."""
4650
    # Also on http://ubertoolcomic.com
4651
    # Also on https://tapastic.com/series/ubertool
4652
    name = 'ubertool-tumblr'
4653
    long_name = 'Ubertool (from Tumblr)'
4654
    url = 'https://ubertool.tumblr.com'
4655
    _categories = ('UBERTOOL', )
4656
4657
4658
class LittleLifeLinesTumblr(GenericDeletedComic, GenericTumblrV1):
4659
    """Class to retrieve Little Life Lines comics."""
4660
    # Also on http://www.littlelifelines.com
4661
    name = 'life-tumblr'
4662
    long_name = 'Little Life Lines (from Tumblr)'
4663
    url = 'https://little-life-lines.tumblr.com'
4664
4665
4666
class TheyCanTalk(GenericTumblrV1):
4667
    """Class to retrieve They Can Talk comics."""
4668
    name = 'theycantalk'
4669
    long_name = 'They Can Talk'
4670
    url = 'http://theycantalk.com'
4671
4672
4673
class Will5NeverCome(GenericTumblrV1):
4674
    """Class to retrieve Will 5:00 Never Come comics."""
4675
    name = 'will5'
4676
    long_name = 'Will 5:00 Never Come ?'
4677
    url = 'http://will5nevercome.com'
4678
4679
4680
class Sephko(GenericTumblrV1):
4681
    """Class to retrieve Sephko Comics."""
4682
    # Also on http://www.sephko.com
4683
    name = 'sephko'
4684
    long_name = 'Sephko'
4685
    url = 'https://sephko.tumblr.com'
4686
4687
4688
class BlazersAtDawn(GenericTumblrV1):
4689
    """Class to retrieve Blazers At Dawn Comics."""
4690
    name = 'blazers'
4691
    long_name = 'Blazers At Dawn'
4692
    url = 'http://blazersatdawn.tumblr.com'
4693
4694
4695
class ArtByMoga(GenericEmptyComic, GenericTumblrV1):  # Deactivated because it downloads too many things
4696
    """Class to retrieve Art By Moga Comics."""
4697
    name = 'moga'
4698
    long_name = 'Art By Moga'
4699
    url = 'http://artbymoga.tumblr.com'
4700
4701
4702
class VerbalVomitTumblr(GenericTumblrV1):
4703
    """Class to retrieve Verbal Vomit comics."""
4704
    # Also on http://www.verbal-vomit.com
4705
    name = 'vomit-tumblr'
4706
    long_name = 'Verbal Vomit (from Tumblr)'
4707
    url = 'http://verbalvomits.tumblr.com'
4708
4709
4710
class LibraryComic(GenericTumblrV1):
4711
    """Class to retrieve LibraryComic."""
4712
    # Also on http://librarycomic.com
4713
    name = 'library-tumblr'
4714
    long_name = 'LibraryComic (from Tumblr)'
4715
    url = 'https://librarycomic.tumblr.com'
4716
4717
4718
class TizzyStitchBirdTumblr(GenericTumblrV1):
4719
    """Class to retrieve Tizzy Stitch Bird comics."""
4720
    # Also on http://tizzystitchbird.com
4721
    # Also on https://tapastic.com/series/TizzyStitchbird
4722
    # Also on http://www.webtoons.com/en/challenge/tizzy-stitchbird/list?title_no=50082
4723
    name = 'tizzy-tumblr'
4724
    long_name = 'Tizzy Stitch Bird (from Tumblr)'
4725
    url = 'http://tizzystitchbird.tumblr.com'
4726
4727
4728
class VictimsOfCircumsolarTumblr(GenericTumblrV1):
4729
    """Class to retrieve VictimsOfCircumsolar comics."""
4730
    # Also on http://www.victimsofcircumsolar.com
4731
    name = 'circumsolar-tumblr'
4732
    long_name = 'Victims Of Circumsolar (from Tumblr)'
4733
    url = 'https://victimsofcomics.tumblr.com'
4734
4735
4736
class RockPaperCynicTumblr(GenericTumblrV1):
4737
    """Class to retrieve RockPaperCynic comics."""
4738
    # Also on http://www.rockpapercynic.com
4739
    # Also on https://tapastic.com/series/rockpapercynic
4740
    name = 'rpc-tumblr'
4741
    long_name = 'Rock Paper Cynic (from Tumblr)'
4742
    url = 'http://rockpapercynic.tumblr.com'
4743
4744
4745
class DeadlyPanelTumblr(GenericTumblrV1):
4746
    """Class to retrieve Deadly Panel comics."""
4747
    # Also on http://www.deadlypanel.com
4748
    # Also on https://tapastic.com/series/deadlypanel
4749
    name = 'deadly-tumblr'
4750
    long_name = 'Deadly Panel (from Tumblr)'
4751
    url = 'https://deadlypanel.tumblr.com'
4752
4753
4754
class CatanaComics(GenericComicNotWorking):  # Not a Tumblr anymore ?
4755
    """Class to retrieve Catana comics."""
4756
    name = 'catana'
4757
    long_name = 'Catana'
4758
    url = 'http://www.catanacomics.com'
4759
4760
4761
class AngryAtNothingTumblr(GenericTumblrV1):
4762
    """Class to retrieve Angry at Nothing comics."""
4763
    # Also on http://www.angryatnothing.net
4764
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
4765
    name = 'angry-tumblr'
4766
    long_name = 'Angry At Nothing (from Tumblr)'
4767
    url = 'http://angryatnothing.tumblr.com'
4768
4769
4770
class ShanghaiTango(GenericTumblrV1):
4771
    """Class to retrieve Shanghai Tango comic."""
4772
    name = 'tango'
4773
    long_name = 'Shanghai Tango'
4774
    url = 'http://tango2010weibo.tumblr.com'
4775
4776
4777
class OffTheLeashDogTumblr(GenericTumblrV1):
4778
    """Class to retrieve Off The Leash Dog comics."""
4779
    # Also on http://offtheleashdogcartoons.com
4780
    # Also on http://www.rupertfawcettcartoons.com
4781
    name = 'offtheleash-tumblr'
4782
    long_name = 'Off The Leash Dog (from Tumblr)'
4783
    url = 'http://rupertfawcettsdoggyblog.tumblr.com'
4784
    _categories = ('FAWCETT', )
4785
4786
4787
class ImogenQuestTumblr(GenericTumblrV1):
4788
    """Class to retrieve Imogen Quest comics."""
4789
    # Also on http://imogenquest.net
4790
    name = 'imogen-tumblr'
4791
    long_name = 'Imogen Quest (from Tumblr)'
4792
    url = 'http://imoquest.tumblr.com'
4793
4794
4795
class Shitfest(GenericTumblrV1):
4796
    """Class to retrieve Shitfest comics."""
4797
    name = 'shitfest'
4798
    long_name = 'Shitfest'
4799
    url = 'http://shitfestcomic.com'
4800
4801
4802
class IceCreamSandwichComics(GenericTumblrV1):
4803
    """Class to retrieve Ice Cream Sandwich Comics."""
4804
    name = 'icecream'
4805
    long_name = 'Ice Cream Sandwich Comics'
4806
    url = 'http://icecreamsandwichcomics.com'
4807
4808
4809
class Dustinteractive(GenericTumblrV1):
4810
    """Class to retrieve Dustinteractive comics."""
4811
    name = 'dustinteractive'
4812
    long_name = 'Dustinteractive'
4813
    url = 'http://dustinteractive.com'
4814
4815
4816
class StickyCinemaFloor(GenericTumblrV1):
4817
    """Class to retrieve Sticky Cinema Floor comics."""
4818
    name = 'stickycinema'
4819
    long_name = 'Sticky Cinema Floor'
4820
    url = 'https://stickycinemafloor.tumblr.com'
4821
4822
4823
class IncidentalComicsTumblr(GenericTumblrV1):
4824
    """Class to retrieve Incidental Comics."""
4825
    # Also on http://www.incidentalcomics.com
4826
    name = 'incidental-tumblr'
4827
    long_name = 'Incidental Comics (from Tumblr)'
4828
    url = 'http://incidentalcomics.tumblr.com'
4829
4830
4831
class APleasantWasteOfTimeTumblr(GenericTumblrV1):
4832
    """Class to retrieve A Pleasant Waste Of Time comics."""
4833
    # Also on https://tapas.io/series/A-Pleasant-
4834
    name = 'pleasant-waste-tumblr'
4835
    long_name = 'A Pleasant Waste Of Time (from Tumblr)'
4836
    url = 'https://artjcf.tumblr.com'
4837
    _categories = ('WASTE', )
4838
4839
4840
class HorovitzComicsTumblr(GenericTumblrV1):
4841
    """Class to retrieve Horovitz new comics."""
4842
    # Also on http://www.horovitzcomics.com
4843
    name = 'horovitz-tumblr'
4844
    long_name = 'Horovitz (from Tumblr)'
4845
    url = 'https://horovitzcomics.tumblr.com'
4846
    _categories = ('HOROVITZ', )
4847
4848
4849
class DeepDarkFearsTumblr(GenericTumblrV1):
4850
    """Class to retrieve DeepvDarkvFears comics."""
4851
    name = 'deep-dark-fears-tumblr'
4852
    long_name = 'Deep Dark Fears (from Tumblr)'
4853
    url = 'http://deep-dark-fears.tumblr.com'
4854
4855
4856
class DakotaMcDadzean(GenericTumblrV1):
4857
    """Class to retrieve Dakota McDadzean comics."""
4858
    name = 'dakota'
4859
    long_name = 'Dakota McDadzean'
4860
    url = 'http://dakotamcfadzean.tumblr.com'
4861
4862
4863
class ExtraFabulousComicsTumblr(GenericTumblrV1):
4864
    """Class to retrieve Extra Fabulous Comics."""
4865
    # Also on http://extrafabulouscomics.com
4866
    name = 'efc-tumblr'
4867
    long_name = 'Extra Fabulous Comics (from Tumblr)'
4868
    url = 'https://extrafabulouscomics.tumblr.com'
4869
    _categories = ('EFC', )
4870
4871
4872
class AlexLevesque(GenericTumblrV1):
4873
    """Class to retrieve AlexLevesque comics."""
4874
    name = 'alevesque'
4875
    long_name = 'Alex Levesque'
4876
    url = 'http://alexlevesque.com'
4877
    _categories = ('FRANCAIS', )
4878
4879
4880
class JamesOfNoTradesTumblr(GenericTumblrV1):
4881
    """Class to retrieve JamesOfNoTrades comics."""
4882
    # Also on http://jamesofnotrades.com
4883
    # Also on http://www.webtoons.com/en/challenge/james-of-no-trades/list?title_no=43422
4884
    # Also on https://tapas.io/series/James-of-No-Trades
4885
    name = 'jamesofnotrades-tumblr'
4886
    long_name = 'James Of No Trades (from Tumblr)'
4887
    url = 'http://jamesfregan.tumblr.com'
4888
    _categories = ('JAMESOFNOTRADES', )
4889
4890
4891
class InfiniteGuff(GenericTumblrV1):
4892
    """Class to retrieve Infinite Guff comics."""
4893
    name = 'infiniteguff'
4894
    long_name = 'Infinite Guff'
4895
    url = 'http://infiniteguff.com'
4896
4897
4898
class SkeletonClaw(GenericTumblrV1):
4899
    """Class to retrieve Skeleton Claw comics."""
4900
    name = 'skeletonclaw'
4901
    long_name = 'Skeleton Claw'
4902
    url = 'http://skeletonclaw.com'
4903
4904
4905
class MrsFrolleinTumblr(GenericTumblrV1):
4906
    """Class to retrieve Mrs Frollein comics."""
4907
    # Also on http://www.webtoons.com/en/challenge/mrsfrollein/list?title_no=51710
4908
    name = 'frollein'
4909
    long_name = 'Mrs Frollein (from Tumblr)'
4910
    url = 'https://mrsfrollein.tumblr.com'
4911
4912
4913
class GoodBearComicsTumblr(GenericTumblrV1):
4914
    """Class to retrieve GoodBearComics."""
4915
    # Also on https://goodbearcomics.com
4916
    name = 'goodbear-tumblr'
4917
    long_name = 'Good Bear Comics (from Tumblr)'
4918
    url = 'https://goodbearcomics.tumblr.com'
4919
4920
4921
class BrooklynCartoonsTumblr(GenericTumblrV1):
4922
    """Class to retrieve Brooklyn Cartoons."""
4923
    # Also on https://www.brooklyncartoons.com
4924
    # Also on https://www.instagram.com/brooklyncartoons
4925
    name = 'brooklyn-tumblr'
4926
    long_name = 'Brooklyn Cartoons (from Tumblr)'
4927
    url = 'http://brooklyncartoons.tumblr.com'
4928
4929
4930
class GemmaCorrellTumblr(GenericTumblrV1):
4931
    # Also on http://www.gemmacorrell.com/portfolio/comics/
4932
    name = 'gemma-tumblr'
4933
    long_name = 'Gemma Correll (from Tumblr)'
4934
    url = 'http://gemmacorrell.tumblr.com'
4935
4936
4937
class RobotatertotTumblr(GenericTumblrV1):
4938
    """Class to retrieve Robotatertot comics."""
4939
    # Also on https://www.instagram.com/robotatertotcomics
4940
    name = 'robotatertot-tumblr'
4941
    long_name = 'Robotatertot (from Tumblr)'
4942
    url = 'https://robotatertot.tumblr.com'
4943
4944
4945
class HuffyPenguin(GenericTumblrV1):
4946
    """Class to retrieve Huffy Penguin comics."""
4947
    name = 'huffypenguin'
4948
    long_name = 'Huffy Penguin'
4949
    url = 'http://huffy-penguin.tumblr.com'
4950
4951
4952
class CowardlyComicsTumblr(GenericTumblrV1):
4953
    """Class to retrieve Cowardly Comics."""
4954
    # Also on https://tapas.io/series/CowardlyComics
4955
    # Also on http://www.webtoons.com/en/challenge/cowardly-comics/list?title_no=65893
4956
    name = 'cowardly-tumblr'
4957
    long_name = 'Cowardly Comics (from Tumblr)'
4958
    url = 'http://cowardlycomics.tumblr.com'
4959
4960
4961
class Caw4hwTumblr(GenericTumblrV1):
4962
    """Class to retrieve Caw4hw comics."""
4963
    # Also on https://tapas.io/series/CAW4HW
4964
    name = 'caw4hw-tumblr'
4965
    long_name = 'Caw4hw (from Tumblr)'
4966
    url = 'https://caw4hw.tumblr.com'
4967
4968
4969
class WeFlapsTumblr(GenericTumblrV1):
4970
    """Class to retrieve WeFlaps comics."""
4971
    name = 'weflaps-tumblr'
4972
    long_name = 'We Flaps (from Tumblr)'
4973
    url = 'https://weflaps.tumblr.com'
4974
4975
4976
class TheseInsideJokesTumblr(GenericTumblrV1):
4977
    """Class to retrieve These Inside Jokes comics."""
4978
    # Also on http://www.theseinsidejokes.com
4979
    name = 'theseinsidejokes-tumblr'
4980
    long_name = 'These Inside Jokes (from Tumblr)'
4981
    url = 'http://theseinsidejokes.tumblr.com'
4982
4983
4984
class RustledJimmies(GenericTumblrV1):
4985
    """Class to retrieve Rustled Jimmies comics."""
4986
    name = 'restled'
4987
    long_name = 'Rustled Jimmies'
4988
    url = 'http://rustledjimmies.net'
4989
4990
4991
class SinewynTumblr(GenericTumblrV1):
4992
    """Class to retrieve Sinewyn comics."""
4993
    # Also on https://sinewyn.wordpress.com
4994
    name = 'sinewyn-tumblr'
4995
    long_name = 'Sinewyn (from Tumblr)'
4996
    url = 'https://sinewyn.tumblr.com'
4997
4998
4999
class ItFoolsAMonster(GenericTumblrV1):
5000
    """Class to retrieve It Fools A Monster comics."""
5001
    name = 'itfoolsamonster'
5002
    long_name = 'It Fools A Monster'
5003
    url = 'http://itfoolsamonster.com'
5004
5005
5006
class BoumeriesTumblr(GenericTumblrV1):
5007
    """Class to retrieve Boumeries comics."""
5008
    # Also on http://bd.boumerie.com
5009
    # Also on http://comics.boumerie.com
5010
    name = 'boumeries-tumblr'
5011
    long_name = 'Boumeries (from Tumblr)'
5012
    url = 'http://boumeries.tumblr.com/'
5013
    _categories = ('BOUMERIES', )
5014
5015
5016
class InfiniteImmortalBensTumblr(GenericTumblrV1):
5017
    """Class to retrieve Infinite Immortal Bens comics."""
5018
    # Also on http://www.webtoons.com/en/challenge/infinite-immortal-bens/list?title_no=32847
5019
    # Also on https://tapas.io/series/Infinite-Immortal-Bens
5020
    url = 'https://infiniteimmortalbens.tumblr.com'
5021
    name = 'infiniteimmortal-tumblr'
5022
    long_name = 'Infinite Immortal Bens (from Tumblr)'
5023
    _categories = ('INFINITEIMMORTAL', )
5024
5025
5026
class CheeseCornzTumblr(GenericTumblrV1):
5027
    """Class to retrieve Cheese Cornz comics."""
5028
    name = 'cheesecornz-tumblr'
5029
    long_name = 'Cheese Cornz (from Tumblr)'
5030
    url = 'https://cheesecornz.tumblr.com'
5031
5032
5033
class CinismoIlustrado(GenericTumblrV1):
5034
    """Class to retrieve CinismoIlustrado comics."""
5035
    name = 'cinismo'
5036
    long_name = 'Cinismo Ilustrado'
5037
    url = 'http://cinismoilustrado.com'
5038
    _categories = ('ESPANOL', )
5039
5040
5041
class EatMyPaintTumblr(GenericTumblrV1):
5042
    """Class to retrieve Eat My Paint comics."""
5043
    # Also on https://tapas.io/series/eatmypaint
5044
    name = 'eatmypaint-tumblr'
5045
    long_name = 'Eat My Paint (from Tumblr)'
5046
    url = 'https://eatmypaint.tumblr.com'
5047
    _categories = ('EATMYPAINT', )
5048
5049
5050
class AnomalyTownFromTumblr(GenericTumblrV1):
5051
    """Class to retrieve Anomaly Town."""
5052
    name = 'anomalytown-tumblr'
5053
    long_name = 'Anomaly Town (from Tumblr)'
5054
    url = 'https://anomalytown.tumblr.com'
5055
5056
5057
class RoryTumblr(GenericTumblrV1):
5058
    """Class to retrieve Rory comics."""
5059
    # Also on https://tapas.io/series/Share-Your-Vulnerability
5060
    name = 'rory-tumblr'
5061
    long_name = 'Rory (from Tumblr)'
5062
    url = 'https://rorycomics.tumblr.com/'
5063
    _categories = ('RORY',)
5064
5065
5066
class HorovitzComics(GenericDeletedComic, GenericListableComic):
5067
    """Generic class to handle the logic common to the different comics from Horovitz."""
5068
    # Also on https://horovitzcomics.tumblr.com
5069
    url = 'http://www.horovitzcomics.com'
5070
    _categories = ('HOROVITZ', )
5071
    img_re = re.compile('.*comics/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
5072
    link_re = NotImplemented
5073
    get_url_from_archive_element = join_cls_url_to_href
5074 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
5075
    @classmethod
5076
    def get_comic_info(cls, soup, link):
5077
        """Get information about a particular comics."""
5078
        href = link['href']
5079
        num = int(cls.link_re.match(href).groups()[0])
5080
        title = link.string
5081
        imgs = soup.find_all('img', id='comic')
5082
        assert len(imgs) == 1, imgs
5083
        year, month, day = [int(s)
5084
                            for s in cls.img_re.match(imgs[0]['src']).groups()]
5085
        return {
5086
            'title': title,
5087
            'day': day,
5088
            'month': month,
5089
            'year': year,
5090
            'img': [i['src'] for i in imgs],
5091
            'num': num,
5092
        }
5093
5094
    @classmethod
5095
    def get_archive_elements(cls):
5096
        archive_url = 'http://www.horovitzcomics.com/comics/archive/'
5097
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.link_re))
5098
5099
5100
class HorovitzNew(HorovitzComics):
5101
    """Class to retrieve Horovitz new comics."""
5102
    name = 'horovitznew'
5103
    long_name = 'Horovitz New'
5104
    link_re = re.compile('^/comics/new/([0-9]+)$')
5105
5106
5107
class HorovitzClassic(HorovitzComics):
5108
    """Class to retrieve Horovitz classic comics."""
5109
    name = 'horovitzclassic'
5110
    long_name = 'Horovitz Classic'
5111
    link_re = re.compile('^/comics/classic/([0-9]+)$')
5112
5113
5114
class GenericGoComic(GenericNavigableComic):
5115
    """Generic class to handle the logic common to comics from gocomics.com."""
5116
    _categories = ('GOCOMIC', )
5117
5118
    @classmethod
5119
    def get_first_comic_link(cls):
5120
        """Get link to first comics."""
5121
        div = get_soup_at_url(cls.url).find('div', class_='gc-deck gc-deck--cta-1')
5122
        return div.find('a')
5123
5124
    @classmethod
5125
    def get_navi_link(cls, last_soup, next_):
5126
        """Get link to next or previous comic."""
5127
        PREV = 'fa btn btn-outline-default btn-circle fa-caret-left sm js-previous-comic '
5128
        NEXT = 'fa btn btn-outline-default btn-circle fa-caret-right sm '
5129
        return last_soup.find('a', class_=NEXT if next_ else PREV)
5130
5131
    @classmethod
5132
    def get_url_from_link(cls, link):
5133
        gocomics = 'http://www.gocomics.com'
5134
        return urljoin_wrapper(gocomics, link['href'])
5135
5136
    @classmethod
5137
    def get_comic_info(cls, soup, link):
5138
        """Get information about a particular comics."""
5139
        date_str = soup.find('meta', property='article:published_time')['content']
5140
        day = string_to_date(date_str, "%Y-%m-%d")
5141
        imgs = soup.find_all('meta', property='og:image')
5142
        author = soup.find('meta', property='article:author')['content']
5143
        tags = soup.find('meta', property='article:tag')['content']
5144
        return {
5145
            'day': day.day,
5146
            'month': day.month,
5147
            'year': day.year,
5148
            'img': [i['content'] for i in imgs],
5149
            'author': author,
5150
            'tags': tags,
5151
        }
5152
5153
5154
class PearlsBeforeSwine(GenericGoComic):
5155
    """Class to retrieve Pearls Before Swine comics."""
5156
    name = 'pearls'
5157
    long_name = 'Pearls Before Swine'
5158
    url = 'http://www.gocomics.com/pearlsbeforeswine'
5159
5160
5161
class Peanuts(GenericGoComic):
5162
    """Class to retrieve Peanuts comics."""
5163
    name = 'peanuts'
5164
    long_name = 'Peanuts'
5165
    url = 'http://www.gocomics.com/peanuts'
5166
5167
5168
class MattWuerker(GenericGoComic):
5169
    """Class to retrieve Matt Wuerker comics."""
5170
    name = 'wuerker'
5171
    long_name = 'Matt Wuerker'
5172
    url = 'http://www.gocomics.com/mattwuerker'
5173
5174
5175
class TomToles(GenericGoComic):
5176
    """Class to retrieve Tom Toles comics."""
5177
    name = 'toles'
5178
    long_name = 'Tom Toles'
5179
    url = 'http://www.gocomics.com/tomtoles'
5180
5181
5182
class BreakOfDay(GenericGoComic):
5183
    """Class to retrieve Break Of Day comics."""
5184
    name = 'breakofday'
5185
    long_name = 'Break Of Day'
5186
    url = 'http://www.gocomics.com/break-of-day'
5187
5188
5189
class Brevity(GenericGoComic):
5190
    """Class to retrieve Brevity comics."""
5191
    name = 'brevity'
5192
    long_name = 'Brevity'
5193
    url = 'http://www.gocomics.com/brevity'
5194
5195
5196
class MichaelRamirez(GenericGoComic):
5197
    """Class to retrieve Michael Ramirez comics."""
5198
    name = 'ramirez'
5199
    long_name = 'Michael Ramirez'
5200
    url = 'http://www.gocomics.com/michaelramirez'
5201
5202
5203
class MikeLuckovich(GenericGoComic):
5204
    """Class to retrieve Mike Luckovich comics."""
5205
    name = 'luckovich'
5206
    long_name = 'Mike Luckovich'
5207
    url = 'http://www.gocomics.com/mikeluckovich'
5208
5209
5210
class JimBenton(GenericGoComic):
5211
    """Class to retrieve Jim Benton comics."""
5212
    # Also on http://jimbenton.tumblr.com
5213
    name = 'benton'
5214
    long_name = 'Jim Benton'
5215
    url = 'http://www.gocomics.com/jim-benton-cartoons'
5216
5217
5218
class TheArgyleSweater(GenericGoComic):
5219
    """Class to retrieve the Argyle Sweater comics."""
5220
    name = 'argyle'
5221
    long_name = 'Argyle Sweater'
5222
    url = 'http://www.gocomics.com/theargylesweater'
5223
5224
5225
class SunnyStreet(GenericGoComic):
5226
    """Class to retrieve Sunny Street comics."""
5227
    # Also on http://www.sunnystreetcomics.com
5228
    name = 'sunny'
5229
    long_name = 'Sunny Street'
5230
    url = 'http://www.gocomics.com/sunny-street'
5231
5232
5233
class OffTheMark(GenericGoComic):
5234
    """Class to retrieve Off The Mark comics."""
5235
    # Also on https://www.offthemark.com
5236
    name = 'offthemark'
5237
    long_name = 'Off The Mark'
5238
    url = 'http://www.gocomics.com/offthemark'
5239
5240
5241
class WuMo(GenericGoComic):
5242
    """Class to retrieve WuMo comics."""
5243
    # Also on http://wumo.com
5244
    name = 'wumo'
5245
    long_name = 'WuMo'
5246
    url = 'http://www.gocomics.com/wumo'
5247
5248
5249
class LunarBaboon(GenericGoComic):
5250
    """Class to retrieve Lunar Baboon comics."""
5251
    # Also on http://www.lunarbaboon.com
5252
    # Also on https://tapastic.com/series/Lunarbaboon
5253
    name = 'lunarbaboon'
5254
    long_name = 'Lunar Baboon'
5255
    url = 'http://www.gocomics.com/lunarbaboon'
5256
5257
5258
class SandersenGocomic(GenericGoComic):
5259
    """Class to retrieve Sarah Andersen comics."""
5260
    # Also on http://sarahcandersen.com
5261
    # Also on http://tapastic.com/series/Doodle-Time
5262
    name = 'sandersen-goc'
5263
    long_name = 'Sarah Andersen (from GoComics)'
5264
    url = 'http://www.gocomics.com/sarahs-scribbles'
5265
5266
5267
class SaturdayMorningBreakfastCerealGoComic(GenericGoComic):
5268
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
5269
    # Also on http://smbc-comics.tumblr.com
5270
    # Also on http://www.smbc-comics.com
5271
    name = 'smbc-goc'
5272
    long_name = 'Saturday Morning Breakfast Cereal (from GoComics)'
5273
    url = 'http://www.gocomics.com/saturday-morning-breakfast-cereal'
5274
    _categories = ('SMBC', )
5275
5276
5277
class CalvinAndHobbesGoComic(GenericGoComic):
5278
    """Class to retrieve Calvin and Hobbes comics."""
5279
    # From gocomics, not http://marcel-oehler.marcellosendos.ch/comics/ch/
5280
    name = 'calvin-goc'
5281
    long_name = 'Calvin and Hobbes (from GoComics)'
5282
    url = 'http://www.gocomics.com/calvinandhobbes'
5283
5284
5285
class RallGoComic(GenericGoComic):
5286
    """Class to retrieve Ted Rall comics."""
5287
    # Also on http://rall.com/comic
5288
    name = 'rall-goc'
5289
    long_name = "Ted Rall (from GoComics)"
5290
    url = "http://www.gocomics.com/ted-rall"
5291
    _categories = ('RALL', )
5292
5293
5294
class TheAwkwardYetiGoComic(GenericGoComic):
5295
    """Class to retrieve The Awkward Yeti comics."""
5296
    # Also on http://larstheyeti.tumblr.com
5297
    # Also on http://theawkwardyeti.com
5298
    # Also on https://tapastic.com/series/TheAwkwardYeti
5299
    name = 'yeti-goc'
5300
    long_name = 'The Awkward Yeti (from GoComics)'
5301
    url = 'http://www.gocomics.com/the-awkward-yeti'
5302
    _categories = ('YETI', )
5303
5304
5305
class BerkeleyMewsGoComics(GenericGoComic):
5306
    """Class to retrieve Berkeley Mews comics."""
5307
    # Also on http://mews.tumblr.com
5308
    # Also on http://www.berkeleymews.com
5309
    name = 'berkeley-goc'
5310
    long_name = 'Berkeley Mews (from GoComics)'
5311
    url = 'http://www.gocomics.com/berkeley-mews'
5312
    _categories = ('BERKELEY', )
5313
5314
5315
class SheldonGoComics(GenericGoComic):
5316
    """Class to retrieve Sheldon comics."""
5317
    # Also on http://www.sheldoncomics.com
5318
    name = 'sheldon-goc'
5319
    long_name = 'Sheldon Comics (from GoComics)'
5320
    url = 'http://www.gocomics.com/sheldon'
5321
5322
5323
class FowlLanguageGoComics(GenericGoComic):
5324
    """Class to retrieve Fowl Language comics."""
5325
    # Also on http://www.fowllanguagecomics.com
5326
    # Also on http://tapastic.com/series/Fowl-Language-Comics
5327
    # Also on http://fowllanguagecomics.tumblr.com
5328
    name = 'fowllanguage-goc'
5329
    long_name = 'Fowl Language Comics (from GoComics)'
5330
    url = 'http://www.gocomics.com/fowl-language'
5331
    _categories = ('FOWLLANGUAGE', )
5332
5333
5334
class NickAnderson(GenericGoComic):
5335
    """Class to retrieve Nick Anderson comics."""
5336
    name = 'nickanderson'
5337
    long_name = 'Nick Anderson'
5338
    url = 'http://www.gocomics.com/nickanderson'
5339
5340
5341
class GarfieldGoComics(GenericGoComic):
5342
    """Class to retrieve Garfield comics."""
5343
    # Also on http://garfield.com
5344
    name = 'garfield-goc'
5345
    long_name = 'Garfield (from GoComics)'
5346
    url = 'http://www.gocomics.com/garfield'
5347
    _categories = ('GARFIELD', )
5348
5349
5350
class DorrisMcGoComics(GenericGoComic):
5351
    """Class to retrieve Dorris Mc Comics"""
5352
    # Also on http://dorrismccomics.com
5353
    name = 'dorrismc-goc'
5354
    long_name = 'Dorris Mc (from GoComics)'
5355
    url = 'http://www.gocomics.com/dorris-mccomics'
5356
5357
5358
class FoxTrot(GenericGoComic):
5359
    """Class to retrieve FoxTrot comics."""
5360
    name = 'foxtrot'
5361
    long_name = 'FoxTrot'
5362
    url = 'http://www.gocomics.com/foxtrot'
5363
5364
5365
class FoxTrotClassics(GenericGoComic):
5366
    """Class to retrieve FoxTrot Classics comics."""
5367
    name = 'foxtrot-classics'
5368
    long_name = 'FoxTrot Classics'
5369
    url = 'http://www.gocomics.com/foxtrotclassics'
5370
5371
5372
class MisterAndMeGoComics(GenericDeletedComic, GenericGoComic):
5373
    """Class to retrieve Mister & Me Comics."""
5374
    # Also on http://www.mister-and-me.com
5375
    # Also on https://tapastic.com/series/Mister-and-Me
5376
    name = 'mister-goc'
5377
    long_name = 'Mister & Me (from GoComics)'
5378
    url = 'http://www.gocomics.com/mister-and-me'
5379
5380
5381
class NonSequitur(GenericGoComic):
5382
    """Class to retrieve Non Sequitur (Wiley Miller) comics."""
5383
    name = 'nonsequitur'
5384
    long_name = 'Non Sequitur'
5385
    url = 'http://www.gocomics.com/nonsequitur'
5386
5387
5388
class JoeyAlisonSayers(GenericGoComic):
5389
    """Class to retrieve Joey Alison Sayers comics."""
5390
    name = 'joeyalison'
5391
    long_name = 'Joey Alison Sayers (from GoComics)'
5392
    url = 'http://www.gocomics.com/joey-alison-sayers-comics'
5393
5394
5395
class SavageChickenGoComics(GenericGoComic):
5396
    """Class to retrieve Savage Chicken comics."""
5397
    # Also on http://www.savagechickens.com
5398
    name = 'savage-goc'
5399
    long_name = 'Savage Chicken (from GoComics)'
5400
    url = 'http://www.gocomics.com/savage-chickens'
5401
5402
5403
class GenericTapasticComic(GenericListableComic):
5404
    """Generic class to handle the logic common to comics from tapastic.com."""
5405
    _categories = ('TAPASTIC', )
5406 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
5407
    @classmethod
5408
    def get_comic_info(cls, soup, archive_elt):
5409
        """Get information about a particular comics."""
5410
        timestamp = int(archive_elt['publishDate']) / 1000.0
5411
        day = datetime.datetime.fromtimestamp(timestamp).date()
5412
        imgs = soup.find_all('img', class_='art-image')
5413
        if not imgs:
5414
            # print("Comic %s is being uploaded, retry later" % cls.get_url_from_archive_element(archive_elt))
5415
            return None
5416
        assert len(imgs) > 0, imgs
5417
        return {
5418
            'day': day.day,
5419
            'year': day.year,
5420
            'month': day.month,
5421
            'img': [i['src'] for i in imgs],
5422
            'title': archive_elt['title'],
5423
        }
5424
5425
    @classmethod
5426
    def get_url_from_archive_element(cls, archive_elt):
5427
        return 'http://tapastic.com/episode/' + str(archive_elt['id'])
5428
5429
    @classmethod
5430
    def get_archive_elements(cls):
5431
        pref, suff = 'episodeList : ', ','
5432
        # Information is stored in the javascript part
5433
        # I don't know the clean way to get it so this is the ugly way.
5434
        string = [s[len(pref):-len(suff)] for s in (s.decode('utf-8').strip() for s in urlopen_wrapper(cls.url).readlines()) if s.startswith(pref) and s.endswith(suff)][0]
5435
        return json.loads(string)
5436
5437
5438
class VegetablesForDessert(GenericTapasticComic):
5439
    """Class to retrieve Vegetables For Dessert comics."""
5440
    # Also on http://vegetablesfordessert.tumblr.com
5441
    name = 'vegetables'
5442
    long_name = 'Vegetables For Dessert'
5443
    url = 'http://tapastic.com/series/vegetablesfordessert'
5444
5445
5446
class FowlLanguageTapa(GenericTapasticComic):
5447
    """Class to retrieve Fowl Language comics."""
5448
    # Also on http://www.fowllanguagecomics.com
5449
    # Also on http://fowllanguagecomics.tumblr.com
5450
    # Also on http://www.gocomics.com/fowl-language
5451
    name = 'fowllanguage-tapa'
5452
    long_name = 'Fowl Language Comics (from Tapastic)'
5453
    url = 'http://tapastic.com/series/Fowl-Language-Comics'
5454
    _categories = ('FOWLLANGUAGE', )
5455
5456
5457
class OscillatingProfundities(GenericTapasticComic):
5458
    """Class to retrieve Oscillating Profundities comics."""
5459
    name = 'oscillating'
5460
    long_name = 'Oscillating Profundities'
5461
    url = 'http://tapastic.com/series/oscillatingprofundities'
5462
5463
5464
class ZnoflatsComics(GenericTapasticComic):
5465
    """Class to retrieve Znoflats comics."""
5466
    name = 'znoflats'
5467
    long_name = 'Znoflats Comics'
5468
    url = 'http://tapastic.com/series/Znoflats-Comics'
5469
5470
5471
class SandersenTapastic(GenericTapasticComic):
5472
    """Class to retrieve Sarah Andersen comics."""
5473
    # Also on http://sarahcandersen.com
5474
    # Also on http://www.gocomics.com/sarahs-scribbles
5475
    name = 'sandersen-tapa'
5476
    long_name = 'Sarah Andersen (from Tapastic)'
5477
    url = 'http://tapastic.com/series/Doodle-Time'
5478
5479
5480
class TubeyToonsTapastic(GenericTapasticComic):
5481
    """Class to retrieve TubeyToons comics."""
5482
    # Also on http://tubeytoons.com
5483
    # Also on https://tubeytoons.tumblr.com
5484
    name = 'tubeytoons-tapa'
5485
    long_name = 'Tubey Toons (from Tapastic)'
5486
    url = 'http://tapastic.com/series/Tubey-Toons'
5487
    _categories = ('TUNEYTOONS', )
5488
5489
5490
class AnythingComicTapastic(GenericTapasticComic):
5491
    """Class to retrieve Anything Comics."""
5492
    # Also on http://www.anythingcomic.com
5493
    name = 'anythingcomic-tapa'
5494
    long_name = 'Anything Comic (from Tapastic)'
5495
    url = 'http://tapastic.com/series/anything'
5496
5497
5498
class UnearthedComicsTapastic(GenericTapasticComic):
5499
    """Class to retrieve Unearthed comics."""
5500
    # Also on http://unearthedcomics.com
5501
    # Also on https://unearthedcomics.tumblr.com
5502
    name = 'unearthed-tapa'
5503
    long_name = 'Unearthed Comics (from Tapastic)'
5504
    url = 'http://tapastic.com/series/UnearthedComics'
5505
    _categories = ('UNEARTHED', )
5506
5507
5508
class EverythingsStupidTapastic(GenericTapasticComic):
5509
    """Class to retrieve Everything's stupid Comics."""
5510
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
5511
    # Also on http://everythingsstupid.net
5512
    name = 'stupid-tapa'
5513
    long_name = "Everything's Stupid (from Tapastic)"
5514
    url = 'http://tapastic.com/series/EverythingsStupid'
5515
5516
5517
class JustSayEhTapastic(GenericTapasticComic):
5518
    """Class to retrieve Just Say Eh comics."""
5519
    # Also on http://www.justsayeh.com
5520
    name = 'justsayeh-tapa'
5521
    long_name = 'Just Say Eh (from Tapastic)'
5522
    url = 'http://tapastic.com/series/Just-Say-Eh'
5523
5524
5525
class ThorsThundershackTapastic(GenericTapasticComic):
5526
    """Class to retrieve Thor's Thundershack comics."""
5527
    # Also on http://www.thorsthundershack.com
5528
    name = 'thor-tapa'
5529
    long_name = 'Thor\'s Thundershack (from Tapastic)'
5530
    url = 'http://tapastic.com/series/Thors-Thundershac'
5531
    _categories = ('THOR', )
5532
5533
5534
class OwlTurdTapastic(GenericTapasticComic):
5535
    """Class to retrieve Owl Turd / Shen comix."""
5536
    # Also on http://shencomix.com
5537
    name = 'owlturd-tapa'
5538
    long_name = 'Owl Turd / Shen Comix (from Tapastic)'
5539
    url = 'https://tapas.io/series/Shen-Comix'
5540
    _categories = ('OWLTURD', 'SHENCOMIX')
5541
5542
5543
class GoneIntoRaptureTapastic(GenericTapasticComic):
5544
    """Class to retrieve Gone Into Rapture comics."""
5545
    # Also on http://goneintorapture.tumblr.com
5546
    # Also on http://goneintorapture.com
5547
    name = 'rapture-tapa'
5548
    long_name = 'Gone Into Rapture (from Tapastic)'
5549
    url = 'http://tapastic.com/series/Goneintorapture'
5550
5551
5552
class HeckIfIKnowComicsTapa(GenericTapasticComic):
5553
    """Class to retrieve Heck If I Know Comics."""
5554
    # Also on http://heckifiknowcomics.com
5555
    name = 'heck-tapa'
5556
    long_name = 'Heck if I Know comics (from Tapastic)'
5557
    url = 'http://tapastic.com/series/Regular'
5558
5559
5560
class CheerUpEmoKidTapa(GenericTapasticComic):
5561
    """Class to retrieve CheerUpEmoKid comics."""
5562
    # Also on http://www.cheerupemokid.com
5563
    # Also on https://enzocomics.tumblr.com
5564
    name = 'cuek-tapa'
5565
    long_name = 'Cheer Up Emo Kid (from Tapastic)'
5566
    url = 'http://tapastic.com/series/CUEK'
5567
5568
5569
class BigFootJusticeTapa(GenericTapasticComic):
5570
    """Class to retrieve Big Foot Justice comics."""
5571
    # Also on http://bigfootjustice.com
5572
    name = 'bigfoot-tapa'
5573
    long_name = 'Big Foot Justice (from Tapastic)'
5574
    url = 'http://tapastic.com/series/bigfoot-justice'
5575
5576
5577
class UpAndOutTapa(GenericTapasticComic):
5578
    """Class to retrieve Up & Out comics."""
5579
    # Also on http://upandoutcomic.tumblr.com
5580
    name = 'upandout-tapa'
5581
    long_name = 'Up And Out (from Tapastic)'
5582
    url = 'http://tapastic.com/series/UP-and-OUT'
5583
5584
5585
class ToonHoleTapa(GenericTapasticComic):
5586
    """Class to retrieve Toon Holes comics."""
5587
    # Also on http://www.toonhole.com
5588
    name = 'toonhole-tapa'
5589
    long_name = 'Toon Hole (from Tapastic)'
5590
    url = 'http://tapastic.com/series/TOONHOLE'
5591
5592
5593
class AngryAtNothingTapa(GenericTapasticComic):
5594
    """Class to retrieve Angry at Nothing comics."""
5595
    # Also on http://www.angryatnothing.net
5596
    # Also on http://angryatnothing.tumblr.com
5597
    name = 'angry-tapa'
5598
    long_name = 'Angry At Nothing (from Tapastic)'
5599
    url = 'http://tapastic.com/series/Comics-yeah-definitely-comics-'
5600
5601
5602
class LeleozTapa(GenericTapasticComic):
5603
    """Class to retrieve Leleoz comics."""
5604
    # Also on http://leleozcomics.tumblr.com
5605
    name = 'leleoz-tapa'
5606
    long_name = 'Leleoz (from Tapastic)'
5607
    url = 'https://tapastic.com/series/Leleoz'
5608
5609
5610
class TheAwkwardYetiTapa(GenericTapasticComic):
5611
    """Class to retrieve The Awkward Yeti comics."""
5612
    # Also on http://www.gocomics.com/the-awkward-yeti
5613
    # Also on http://theawkwardyeti.com
5614
    # Also on http://larstheyeti.tumblr.com
5615
    name = 'yeti-tapa'
5616
    long_name = 'The Awkward Yeti (from Tapastic)'
5617
    url = 'https://tapastic.com/series/TheAwkwardYeti'
5618
    _categories = ('YETI', )
5619
5620
5621
class AsPerUsualTapa(GenericTapasticComic):
5622
    """Class to retrieve As Per Usual comics."""
5623
    # Also on http://as-per-usual.tumblr.com
5624
    name = 'usual-tapa'
5625
    long_name = 'As Per Usual (from Tapastic)'
5626
    url = 'https://tapastic.com/series/AsPerUsual'
5627
    categories = ('DAMILEE', )
5628
5629
5630
class HotComicsForCoolPeopleTapa(GenericTapasticComic):
5631
    """Class to retrieve Hot Comics For Cool People."""
5632
    # Also on http://hotcomicsforcoolpeople.tumblr.com
5633
    # Also on http://hotcomics.biz (links to tumblr)
5634
    # Also on http://hcfcp.com (links to tumblr)
5635
    name = 'hotcomics-tapa'
5636
    long_name = 'Hot Comics For Cool People (from Tapastic)'
5637
    url = 'https://tapastic.com/series/Hot-Comics-For-Cool-People'
5638
    categories = ('DAMILEE', )
5639
5640
5641
class OneOneOneOneComicTapa(GenericTapasticComic):
5642
    """Class to retrieve 1111 Comics."""
5643
    # Also on http://www.1111comics.me
5644
    # Also on http://comics1111.tumblr.com
5645
    name = '1111-tapa'
5646
    long_name = '1111 Comics (from Tapastic)'
5647
    url = 'https://tapastic.com/series/1111-Comics'
5648
    _categories = ('ONEONEONEONE', )
5649
5650
5651
class TumbleDryTapa(GenericTapasticComic):
5652
    """Class to retrieve Tumble Dry comics."""
5653
    # Also on http://tumbledrycomics.com
5654
    name = 'tumbledry-tapa'
5655
    long_name = 'Tumblr Dry (from Tapastic)'
5656
    url = 'https://tapastic.com/series/TumbleDryComics'
5657
5658
5659
class DeadlyPanelTapa(GenericTapasticComic):
5660
    """Class to retrieve Deadly Panel comics."""
5661
    # Also on http://www.deadlypanel.com
5662
    # Also on https://deadlypanel.tumblr.com
5663
    name = 'deadly-tapa'
5664
    long_name = 'Deadly Panel (from Tapastic)'
5665
    url = 'https://tapastic.com/series/deadlypanel'
5666
5667
5668
class ChrisHallbeckMaxiTapa(GenericTapasticComic):
5669
    """Class to retrieve Chris Hallbeck comics."""
5670
    # Also on https://chrishallbeck.tumblr.com
5671
    # Also on http://maximumble.com
5672
    name = 'hallbeckmaxi-tapa'
5673
    long_name = 'Chris Hallback - Maximumble (from Tapastic)'
5674
    url = 'https://tapastic.com/series/Maximumble'
5675
    _categories = ('HALLBACK', )
5676
5677
5678
class ChrisHallbeckMiniTapa(GenericDeletedComic, GenericTapasticComic):
5679
    """Class to retrieve Chris Hallbeck comics."""
5680
    # Also on https://chrishallbeck.tumblr.com
5681
    # Also on http://minimumble.com
5682
    name = 'hallbeckmini-tapa'
5683
    long_name = 'Chris Hallback - Minimumble (from Tapastic)'
5684
    url = 'https://tapastic.com/series/Minimumble'
5685
    _categories = ('HALLBACK', )
5686
5687
5688
class ChrisHallbeckBiffTapa(GenericDeletedComic, GenericTapasticComic):
5689
    """Class to retrieve Chris Hallbeck comics."""
5690
    # Also on https://chrishallbeck.tumblr.com
5691
    # Also on http://thebookofbiff.com
5692
    name = 'hallbeckbiff-tapa'
5693
    long_name = 'Chris Hallback - The Book of Biff (from Tapastic)'
5694
    url = 'https://tapastic.com/series/Biff'
5695
    _categories = ('HALLBACK', )
5696
5697
5698
class RandoWisTapa(GenericTapasticComic):
5699
    """Class to retrieve RandoWis comics."""
5700
    # Also on https://randowis.com
5701
    name = 'randowis-tapa'
5702
    long_name = 'RandoWis (from Tapastic)'
5703
    url = 'https://tapastic.com/series/RandoWis'
5704
5705
5706
class PigeonGazetteTapa(GenericTapasticComic):
5707
    """Class to retrieve The Pigeon Gazette comics."""
5708
    # Also on http://thepigeongazette.tumblr.com
5709
    name = 'pigeon-tapa'
5710
    long_name = 'The Pigeon Gazette (from Tapastic)'
5711
    url = 'https://tapastic.com/series/The-Pigeon-Gazette'
5712
5713
5714
class TheOdd1sOutTapa(GenericTapasticComic):
5715
    """Class to retrieve The Odd 1s Out comics."""
5716
    # Also on http://theodd1sout.com
5717
    # Also on http://theodd1sout.tumblr.com
5718
    name = 'theodd-tapa'
5719
    long_name = 'The Odd 1s Out (from Tapastic)'
5720
    url = 'https://tapastic.com/series/Theodd1sout'
5721
5722
5723
class TheWorldIsFlatTapa(GenericTapasticComic):
5724
    """Class to retrieve The World Is Flat Comics."""
5725
    # Also on http://theworldisflatcomics.tumblr.com
5726
    name = 'flatworld-tapa'
5727
    long_name = 'The World Is Flat (from Tapastic)'
5728
    url = 'https://tapastic.com/series/The-World-is-Flat'
5729
5730
5731
class MisterAndMeTapa(GenericTapasticComic):
5732
    """Class to retrieve Mister & Me Comics."""
5733
    # Also on http://www.mister-and-me.com
5734
    # Also on http://www.gocomics.com/mister-and-me
5735
    name = 'mister-tapa'
5736
    long_name = 'Mister & Me (from Tapastic)'
5737
    url = 'https://tapastic.com/series/Mister-and-Me'
5738
5739
5740
class TalesOfAbsurdityTapa(GenericDeletedComic, GenericTapasticComic):
5741
    """Class to retrieve Tales Of Absurdity comics."""
5742
    # Also on http://talesofabsurdity.com
5743
    # Also on http://talesofabsurdity.tumblr.com
5744
    name = 'absurdity-tapa'
5745
    long_name = 'Tales of Absurdity (from Tapastic)'
5746
    url = 'http://tapastic.com/series/Tales-Of-Absurdity'
5747
    _categories = ('ABSURDITY', )
5748
5749
5750
class BFGFSTapa(GenericTapasticComic):
5751
    """Class to retrieve BFGFS comics."""
5752
    # Also on http://bfgfs.com
5753
    # Also on https://bfgfs.tumblr.com
5754
    name = 'bfgfs-tapa'
5755
    long_name = 'BFGFS (from Tapastic)'
5756
    url = 'https://tapastic.com/series/BFGFS'
5757
5758
5759
class DoodleForFoodTapa(GenericTapasticComic):
5760
    """Class to retrieve Doodle For Food comics."""
5761
    # Also on http://www.doodleforfood.com
5762
    name = 'doodle-tapa'
5763
    long_name = 'Doodle For Food (from Tapastic)'
5764
    url = 'https://tapastic.com/series/Doodle-for-Food'
5765
5766
5767
class MrLovensteinTapa(GenericTapasticComic):
5768
    """Class to retrieve Mr Lovenstein comics."""
5769
    # Also on  https://tapastic.com/series/MrLovenstein
5770
    name = 'mrlovenstein-tapa'
5771
    long_name = 'Mr. Lovenstein (from Tapastic)'
5772
    url = 'https://tapastic.com/series/MrLovenstein'
5773
5774
5775
class CassandraCalinTapa(GenericTapasticComic):
5776
    """Class to retrieve C. Cassandra comics."""
5777
    # Also on http://cassandracalin.com
5778
    # Also on http://c-cassandra.tumblr.com
5779
    name = 'cassandra-tapa'
5780
    long_name = 'Cassandra Calin (from Tapastic)'
5781
    url = 'https://tapastic.com/series/C-Cassandra-comics'
5782
5783
5784
class WafflesAndPancakes(GenericTapasticComic):
5785
    """Class to retrieve Waffles And Pancakes comics."""
5786
    # Also on http://wandpcomic.com
5787
    name = 'waffles'
5788
    long_name = 'Waffles And Pancakes'
5789
    url = 'https://tapastic.com/series/Waffles-and-Pancakes'
5790
5791
5792
class YesterdaysPopcornTapastic(GenericTapasticComic):
5793
    """Class to retrieve Yesterday's Popcorn comics."""
5794
    # Also on http://www.yesterdayspopcorn.com
5795
    # Also on http://yesterdayspopcorn.tumblr.com
5796
    name = 'popcorn-tapa'
5797
    long_name = 'Yesterday\'s Popcorn (from Tapastic)'
5798
    url = 'https://tapastic.com/series/Yesterdays-Popcorn'
5799
5800
5801
class OurSuperAdventureTapastic(GenericDeletedComic, GenericTapasticComic):
5802
    """Class to retrieve Our Super Adventure comics."""
5803
    # Also on http://www.oursuperadventure.com
5804
    # http://sarahssketchbook.tumblr.com
5805
    # http://sarahgraley.com
5806
    name = 'superadventure-tapastic'
5807
    long_name = 'Our Super Adventure (from Tapastic)'
5808
    url = 'https://tapastic.com/series/Our-Super-Adventure'
5809
5810
5811
class NamelessPCs(GenericTapasticComic):
5812
    """Class to retrieve Nameless PCs comics."""
5813
    # Also on http://namelesspcs.com
5814
    name = 'namelesspcs-tapa'
5815
    long_name = 'NamelessPCs (from Tapastic)'
5816
    url = 'https://tapastic.com/series/NamelessPC'
5817
5818
5819
class DownTheUpwardSpiralTapa(GenericTapasticComic):
5820
    """Class to retrieve Down The Upward Spiral comics."""
5821
    # Also on http://www.downtheupwardspiral.com
5822
    # Also on http://downtheupwardspiral.tumblr.com
5823
    name = 'spiral-tapa'
5824
    long_name = 'Down the Upward Spiral (from Tapastic)'
5825
    url = 'https://tapastic.com/series/Down-the-Upward-Spiral'
5826
5827
5828
class UbertoolTapa(GenericTapasticComic):
5829
    """Class to retrieve Ubertool comics."""
5830
    # Also on http://ubertoolcomic.com
5831
    # Also on https://ubertool.tumblr.com
5832
    name = 'ubertool-tapa'
5833
    long_name = 'Ubertool (from Tapastic)'
5834
    url = 'https://tapastic.com/series/ubertool'
5835
    _categories = ('UBERTOOL', )
5836
5837
5838
class BarteNerdsTapa(GenericDeletedComic, GenericTapasticComic):
5839
    """Class to retrieve BarteNerds comics."""
5840
    # Also on http://www.bartenerds.com
5841
    name = 'bartenerds-tapa'
5842
    long_name = 'BarteNerds (from Tapastic)'
5843
    url = 'https://tapastic.com/series/BarteNERDS'
5844
5845
5846
class SmallBlueYonderTapa(GenericTapasticComic):
5847
    """Class to retrieve Small Blue Yonder comics."""
5848
    # Also on http://www.smallblueyonder.com
5849
    name = 'smallblue-tapa'
5850
    long_name = 'Small Blue Yonder (from Tapastic)'
5851
    url = 'https://tapastic.com/series/Small-Blue-Yonder'
5852
5853
5854
class TizzyStitchBirdTapa(GenericTapasticComic):
5855
    """Class to retrieve Tizzy Stitch Bird comics."""
5856
    # Also on http://tizzystitchbird.com
5857
    # Also on http://tizzystitchbird.tumblr.com
5858
    # Also on http://www.webtoons.com/en/challenge/tizzy-stitchbird/list?title_no=50082
5859
    name = 'tizzy-tapa'
5860
    long_name = 'Tizzy Stitch Bird (from Tapastic)'
5861
    url = 'https://tapastic.com/series/TizzyStitchbird'
5862
5863
5864
class RockPaperCynicTapa(GenericTapasticComic):
5865
    """Class to retrieve RockPaperCynic comics."""
5866
    # Also on http://www.rockpapercynic.com
5867
    # Also on http://rockpapercynic.tumblr.com
5868
    name = 'rpc-tapa'
5869
    long_name = 'Rock Paper Cynic (from Tapastic)'
5870
    url = 'https://tapastic.com/series/rockpapercynic'
5871
5872
5873
class IsItCanonTapa(GenericTapasticComic):
5874
    """Class to retrieve Is It Canon comics."""
5875
    # Also on http://www.isitcanon.com
5876
    name = 'canon-tapa'
5877
    long_name = 'Is It Canon (from Tapastic)'
5878
    url = 'http://tapastic.com/series/isitcanon'
5879
5880
5881
class ItsTheTieTapa(GenericTapasticComic):
5882
    """Class to retrieve It's the tie comics."""
5883
    # Also on http://itsthetie.com
5884
    # Also on http://itsthetie.tumblr.com
5885
    name = 'tie-tapa'
5886
    long_name = "It's the tie (from Tapastic)"
5887
    url = "https://tapastic.com/series/itsthetie"
5888
    _categories = ('TIE', )
5889
5890
5891
class JamesOfNoTradesTapa(GenericTapasticComic):
5892
    """Class to retrieve JamesOfNoTrades comics."""
5893
    # Also on http://jamesofnotrades.com
5894
    # Also on http://www.webtoons.com/en/challenge/james-of-no-trades/list?title_no=43422
5895
    # Also on http://jamesfregan.tumblr.com
5896
    name = 'jamesofnotrades-tapa'
5897
    long_name = 'James Of No Trades (from Tapastic)'
5898
    url = 'https://tapas.io/series/James-of-No-Trades'
5899
    _categories = ('JAMESOFNOTRADES', )
5900
5901
5902
class MomentumTapa(GenericTapasticComic):
5903
    """Class to retrieve Momentum comics."""
5904
    # Also on http://www.momentumcomic.com
5905
    name = 'momentum-tapa'
5906
    long_name = 'Momentum (from Tapastic)'
5907
    url = 'https://tapastic.com/series/momentum'
5908
5909
5910
class InYourFaceCakeTapa(GenericTapasticComic):
5911
    """Class to retrieve In Your Face Cake comics."""
5912
    # Also on https://in-your-face-cake.tumblr.com
5913
    name = 'inyourfacecake-tapa'
5914
    long_name = 'In Your Face Cake (from Tapastic)'
5915
    url = 'https://tapas.io/series/In-Your-Face-Cake'
5916
    _categories = ('INYOURFACECAKE', )
5917
5918
5919
class CowardlyComicsTapa(GenericTapasticComic):
5920
    """Class to retrieve Cowardly Comics."""
5921
    # Also on http://cowardlycomics.tumblr.com
5922
    # Also on http://www.webtoons.com/en/challenge/cowardly-comics/list?title_no=65893
5923
    name = 'cowardly-tapa'
5924
    long_name = 'Cowardly Comics (from Tapastic)'
5925
    url = 'https://tapas.io/series/CowardlyComics'
5926
5927
5928
class Caw4hwTapa(GenericTapasticComic):
5929
    """Class to retrieve Caw4hw comics."""
5930
    # Also on https://caw4hw.tumblr.com
5931
    name = 'caw4hw-tapa'
5932
    long_name = 'Caw4hw (from Tapastic)'
5933
    url = 'https://tapas.io/series/CAW4HW'
5934
5935
5936
class DontBeDadTapa(GenericTapasticComic):
5937
    """Class to retrieve Don't Be Dad comics."""
5938
    # Also on https://dontbedad.com/
5939
    # Also on http://www.webtoons.com/en/challenge/dontbedad/list?title_no=123074
5940
    name = 'dontbedad-tapa'
5941
    long_name = "Don't Be Dad (from Tapastic)"
5942
    url = 'https://tapas.io/series/DontBeDad-Comics'
5943
5944
5945
class APleasantWasteOfTimeTapa(GenericTapasticComic):
5946
    """Class to retrieve A Pleasant Waste Of Time comics."""
5947
    # Also on https://artjcf.tumblr.com
5948
    name = 'pleasant-waste-tapa'
5949
    long_name = 'A Pleasant Waste Of Time (from Tapastic)'
5950
    url = 'https://tapas.io/series/A-Pleasant-'
5951
    _categories = ('WASTE', )
5952
5953
5954
class InfiniteImmortalBensTapa(GenericTapasticComic):
5955
    """Class to retrieve Infinite Immortal Bens comics."""
5956
    # Also on http://www.webtoons.com/en/challenge/infinite-immortal-bens/list?title_no=32847
5957
    # Also on https://infiniteimmortalbens.tumblr.com
5958
    url = 'https://tapas.io/series/Infinite-Immortal-Bens'
5959
    name = 'infiniteimmortal-tapa'
5960
    long_name = 'Infinite Immortal Bens (from Tapastic)'
5961
    _categories = ('INFINITEIMMORTAL', )
5962
5963
5964
class EatMyPaintTapa(GenericTapasticComic):
5965
    """Class to retrieve Eat My Paint comics."""
5966
    # Also on https://eatmypaint.tumblr.com
5967
    name = 'eatmypaint-tapa'
5968
    long_name = 'Eat My Paint (from Tapastic)'
5969
    url = 'https://tapas.io/series/eatmypaint'
5970
    _categories = ('EATMYPAINT', )
5971
5972
5973
class RoryTapastic(GenericTapasticComic):
5974
    """Class to retrieve Rory comics."""
5975
    # Also on https://rorycomics.tumblr.com/
5976
    name = 'rory-tapa'
5977
    long_name = 'Rory (from Tapastic)'
5978
    url = 'https://tapas.io/series/Share-Your-Vulnerability'
5979
    _categories = ('RORY',)
5980
5981
5982
class MercworksTapa(GenericTapasticComic):
5983
    """Class to retrieve Mercworks comics."""
5984
    # Also on http://mercworks.net
5985
    # Also on http://www.webtoons.com/en/comedy/mercworks/list?title_no=426
5986
    # Also on http://mercworks.tumblr.com
5987
    name = 'mercworks-tapa'
5988
    long_name = 'Mercworks (from Tapastic)'
5989
    url = 'https://tapastic.com/series/MercWorks'
5990
    _categories = ('MERCWORKS', )
5991
5992
5993
class AbsurdoLapin(GenericNavigableComic):
5994
    """Class to retrieve Absurdo Lapin comics."""
5995
    name = 'absurdo'
5996
    long_name = 'Absurdo'
5997
    url = 'https://absurdo.lapin.org'
5998
    get_url_from_link = join_cls_url_to_href
5999
6000
    @classmethod
6001
    def get_nav(cls, soup):
6002
        """Get the navigation elements from soup object."""
6003
        cont = soup.find('div', id='content')
6004
        _, b2 = cont.find_all('div', class_='buttons')
6005
        # prev, first, last, next
6006
        return [li.find('a') for li in b2.find_all('li')]
6007
6008
    @classmethod
6009
    def get_first_comic_link(cls):
6010
        """Get link to first comics."""
6011
        return cls.get_nav(get_soup_at_url(cls.url))[1]
6012
6013
    @classmethod
6014
    def get_navi_link(cls, last_soup, next_):
6015
        """Get link to next or previous comic."""
6016
        return cls.get_nav(last_soup)[3 if next_ else 0]
6017
6018
    @classmethod
6019
    def get_comic_info(cls, soup, link):
6020
        """Get information about a particular comics."""
6021
        author = soup.find('meta', attrs={'name': 'author'})['content']
6022
        tags = soup.find('meta', attrs={'name': 'keywords'})['content']
6023
        title = soup.find('title').string
6024
        imgs = soup.find('div', id='content').find_all('img')
6025
        return {
6026
            'title': title,
6027
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
6028
            'tags': tags,
6029
            'author': author,
6030
        }
6031
6032
6033
def get_subclasses(klass):
6034
    """Gets the list of direct/indirect subclasses of a class"""
6035
    subclasses = klass.__subclasses__()
6036
    for derived in list(subclasses):
6037
        subclasses.extend(get_subclasses(derived))
6038
    return subclasses
6039
6040
6041
def remove_st_nd_rd_th_from_date(string):
6042
    """Function to transform 1st/2nd/3rd/4th in a parsable date format."""
6043
    # Hackish way to convert string with numeral "1st"/"2nd"/etc to date
6044
    return (string.replace('st', '')
6045
            .replace('nd', '')
6046
            .replace('rd', '')
6047
            .replace('th', '')
6048
            .replace('Augu', 'August'))
6049
6050
6051
def string_to_date(string, date_format, local=DEFAULT_LOCAL):
6052
    """Function to convert string to date object.
6053
    Wrapper around datetime.datetime.strptime."""
6054
    # format described in https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior
6055
    prev_locale = locale.setlocale(locale.LC_ALL)
6056
    if local != prev_locale:
6057
        locale.setlocale(locale.LC_ALL, local)
6058
    ret = datetime.datetime.strptime(string, date_format).date()
6059
    if local != prev_locale:
6060
        locale.setlocale(locale.LC_ALL, prev_locale)
6061
    return ret
6062
6063
6064
COMICS = set(get_subclasses(GenericComic))
6065
VALID_COMICS = [c for c in COMICS if c.name is not None]
6066
COMIC_NAMES = {c.name: c for c in VALID_COMICS}
6067
assert len(VALID_COMICS) == len(COMIC_NAMES)
6068
CLASS_NAMES = {c.__name__ for c in VALID_COMICS}
6069
assert len(VALID_COMICS) == len(CLASS_NAMES)
6070