Issues (34)

comics.py (34 issues)

Code
1
#! /usr/bin/python3
2
# vim: set expandtab tabstop=4 shiftwidth=4 :
3
"""Module to retrieve webcomics"""
4
5
from comic_abstract import GenericComic, get_date_for_comic
6
import re
7
from datetime import date, timedelta
8
import datetime
9
from urlfunctions import get_soup_at_url, urljoin_wrapper,\
10
    convert_iri_to_plain_ascii_uri, load_json_at_url, urlopen_wrapper
11
import json
12
import locale
13
import urllib
14
15
DEFAULT_LOCAL = 'en_GB.UTF-8'
16
17
18
class Xkcd(GenericComic):
19
    """Class to retrieve Xkcd comics."""
20
    name = 'xkcd'
21
    long_name = 'xkcd'
22
    url = 'http://xkcd.com'
23
24
    @classmethod
25
    def get_next_comic(cls, last_comic):
26
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
27
        json_url = urljoin_wrapper(cls.url, 'info.0.json')
28
        first_num = last_comic['num'] if last_comic else 0
29
        last_num = load_json_at_url(json_url)['num']
30
31
        for num in range(first_num + 1, last_num + 1):
32
            comic = cls.get_comic_info(num)
33
            if comic is not None:
34
                yield comic
35
36
    @classmethod
37
    def get_comic_info(cls, num):
38
        """Get information about a particular comics."""
39
        if num == 404:
40
            return None
41
        json_url = urljoin_wrapper(cls.url, '%d/info.0.json' % num)
42
        comic_json = load_json_at_url(json_url)
43
        assert comic_json['num'] == num, json_url
44
        return {
45
            'json_url': json_url,
46
            'num': num,
47
            'url': urljoin_wrapper(cls.url, str(num)),
48
            'prefix': '%d-' % num,
49
            'img': [comic_json['img']],
50
            'day': int(comic_json['day']),
51
            'month': int(comic_json['month']),
52
            'year': int(comic_json['year']),
53
            'link': comic_json['link'],
54
            'news': comic_json['news'],
55
            'safe_title': comic_json['safe_title'],
56
            'transcript': comic_json['transcript'],
57
            'alt': comic_json['alt'],
58
            'title': comic_json['title'],
59
        }
60
61
62
# Helper functions corresponding to get_url_from_link/get_url_from_archive_element
63
64
65
@classmethod
66
def get_href(cls, link):
67
    """Implementation of get_url_from_link/get_url_from_archive_element."""
68
    return link['href']
69
70
71
@classmethod
72
def join_cls_url_to_href(cls, link):
73
    """Implementation of get_url_from_link/get_url_from_archive_element."""
74
    return urljoin_wrapper(cls.url, link['href'])
75
76
77
class GenericNavigableComic(GenericComic):
78
    """Generic class for "navigable" comics : with first/next arrows.
79
80
    This class applies to comic where previous and next comics can be
81
    accessed from a given comic. Once given a starting point (either
82
    the first comic or the last comic retrieved), it will handle the
83
    navigation, the retrieval of the soup object and the setting of
84
    the 'url' attribute on retrieved comics. This limits a lot the
85
    amount of boilerplate code in the different implementation classes.
86
87
    The method `get_next_comic` methods is implemented in terms of new
88
    more specialized methods to be implemented/overridden:
89
        - get_first_comic_link
90
        - get_navi_link
91
        - get_comic_info
92
        - get_url_from_link
93
    """
94
    _categories = ('NAVIGABLE', )
95
96
    @classmethod
97
    def get_first_comic_link(cls):
98
        """Get link to first comics.
99
100
        Sometimes this can be retrieved of any comic page, sometimes on
101
        the archive page, sometimes it doesn't exist at all and one has
102
        to iterate backward to find it before hardcoding the result found.
103
        """
104
        raise NotImplementedError
105
106
    @classmethod
107
    def get_navi_link(cls, last_soup, next_):
108
        """Get link to next (or previous - for dev purposes) comic."""
109
        raise NotImplementedError
110
111
    @classmethod
112
    def get_comic_info(cls, soup, link):
113
        """Get information about a particular comics."""
114
        raise NotImplementedError
115
116
    @classmethod
117
    def get_url_from_link(cls, link):
118
        """Get url corresponding to a link. Default implementation is similar to get_href."""
119
        return link['href']
120
121
    @classmethod
122
    def get_next_link(cls, last_soup):
123
        """Get link to next comic."""
124
        link = cls.get_navi_link(last_soup, True)
125
        cls.log("Next link is %s" % link)
126
        return link
127
128
    @classmethod
129
    def get_prev_link(cls, last_soup):
130
        """Get link to previous comic."""
131
        link = cls.get_navi_link(last_soup, False)
132
        cls.log("Prev link is %s" % link)
133
        return link
134
135
    @classmethod
136
    def get_next_comic(cls, last_comic):
137
        """Generic implementation of get_next_comic for navigable comics."""
138
        url = last_comic['url'] if last_comic else None
139
        cls.log("starting 'get_next_comic' from %s" % url)
140
        next_comic = \
141
            cls.get_next_link(get_soup_at_url(url)) \
142
            if url else \
143
            cls.get_first_comic_link()
144
        cls.log("next/first comic will be %s (url is %s)" % (str(next_comic), url))
145
        # cls.check_navigation(url)
146
        while next_comic:
147
            prev_url, url = url, cls.get_url_from_link(next_comic)
148
            if prev_url == url:
149
                cls.log("got same url %s" % url)
150
                break
151
            cls.log("about to get %s (%s)" % (url, str(next_comic)))
152
            soup = get_soup_at_url(url)
153
            comic = cls.get_comic_info(soup, next_comic)
154
            if comic is not None:
155
                assert 'url' not in comic
156
                comic['url'] = url
157
                yield comic
158
            next_comic = cls.get_next_link(soup)
159
            cls.log("next comic will be %s" % str(next_comic))
160
161
    @classmethod
162
    def check_first_link(cls):
163
        """Check that navigation to first comic seems to be working - for dev purposes."""
164
        cls.log("about to check first link")
165
        ok = True
166
        firstlink = cls.get_first_comic_link()
167
        if firstlink is None:
168
            print("From %s : no first link" % cls.url)
169
            ok = False
170
        else:
171
            firsturl = cls.get_url_from_link(firstlink)
172
            try:
173
                get_soup_at_url(firsturl)
174
            except urllib.error.HTTPError:
175
                print("From %s : invalid first url" % cls.url)
176
                ok = False
177
        cls.log("checked first link -> returned %d" % ok)
178
        return ok
179
180
    @classmethod
181
    def check_prev_next_links(cls, url):
182
        """Check that navigation to prev/next from a given URL seems to be working - for dev purposes."""
183
        cls.log("about to check prev/next from %s" % url)
184
        ok = True
185
        if url is None:
186
            prevlink, nextlink = None, None
187
        else:
188
            soup = get_soup_at_url(url)
189
            prevlink, nextlink = cls.get_prev_link(soup), cls.get_next_link(soup)
190
        if prevlink is None and nextlink is None:
191
            print("From %s : no previous nor next" % url)
192
            ok = False
193
        else:
194
            if prevlink:
195
                prevurl = cls.get_url_from_link(prevlink)
196
                prevsoup = get_soup_at_url(prevurl)
197
                prevnextlink = cls.get_next_link(prevsoup)
198
                prevnext = cls.get_url_from_link(prevnextlink) if prevnextlink is not None else "NO URL"
199
                if prevnext != url:
200
                    print("From %s, going backward then forward leads to %s" % (url, prevnext))
201
                    ok = False
202
            if nextlink:
203
                nexturl = cls.get_url_from_link(nextlink)
204
                if nexturl != url:
205
                    nextsoup = get_soup_at_url(nexturl)
206
                    nextprevlink = cls.get_prev_link(nextsoup)
207
                    nextprev = cls.get_url_from_link(nextprevlink) if nextprevlink is not None else "NO URL"
208
                    if nextprev != url:
209
                        print("From %s, going forward then backward leads to %s" % (url, nextprev))
210
                        ok = False
211
        cls.log("checked prev/next from %s -> returned %d" % (url, ok))
212
        return ok
213
214
    @classmethod
215
    def check_navigation(cls, url):
216
        """Check that navigation functions seem to be working - for dev purposes."""
217
        cls.log("about to check navigation from %s" % url)
218
        first = cls.check_first_link()
219
        prevnext = cls.check_prev_next_links(url)
220
        ok = first and prevnext
221
        cls.log("checked navigation from %s -> returned %d" % (url, ok))
222
        return ok
223
224
225
class GenericListableComic(GenericComic):
226
    """Generic class for "listable" comics : with a list of comics (aka 'archive')
227
228
    The method `get_next_comic` methods is implemented in terms of new
229
    more specialized methods to be implemented/overridden:
230
        - get_archive_elements
231
        - get_url_from_archive_element
232
        - get_comic_info
233
    """
234
    _categories = ('LISTABLE', )
235
236
    @classmethod
237
    def get_archive_elements(cls):
238
        """Get the archive elements (iterable)."""
239
        raise NotImplementedError
240
241
    @classmethod
242
    def get_url_from_archive_element(cls, archive_elt):
243
        """Get url corresponding to an archive element."""
244
        raise NotImplementedError
245
246
    @classmethod
247
    def get_comic_info(cls, soup, archive_elt):
248
        """Get information about a particular comics."""
249
        raise NotImplementedError
250
251
    @classmethod
252
    def get_next_comic(cls, last_comic):
253
        """Generic implementation of get_next_comic for listable comics."""
254
        waiting_for_url = last_comic['url'] if last_comic else None
255
        archive_elts = list(cls.get_archive_elements())
256
        for archive_elt in archive_elts:
257
            url = cls.get_url_from_archive_element(archive_elt)
258
            cls.log("considering %s" % url)
259
            if waiting_for_url is None:
260
                cls.log("about to get %s (%s)" % (url, str(archive_elt)))
261
                soup = get_soup_at_url(url)
262
                comic = cls.get_comic_info(soup, archive_elt)
263
                if comic is not None:
264
                    assert 'url' not in comic
265
                    comic['url'] = url
266
                    yield comic
267
            elif waiting_for_url == url:
268
                waiting_for_url = None
269
        if waiting_for_url is not None:
270
            print("Did not find %s in the %d comics: there might be a problem" %
271
                  (waiting_for_url, len(archive_elts)))
272
273
# Helper functions corresponding to get_first_comic_link/get_navi_link
274
275
276
@classmethod
277
def get_link_rel_next(cls, last_soup, next_):
278
    """Implementation of get_navi_link."""
279
    return last_soup.find('link', rel='next' if next_ else 'prev')
280
281
282
@classmethod
283
def get_a_rel_next(cls, last_soup, next_):
284
    """Implementation of get_navi_link."""
285
    return last_soup.find('a', rel='next' if next_ else 'prev')
286
287
288
@classmethod
289
def get_a_navi_navinext(cls, last_soup, next_):
290
    """Implementation of get_navi_link."""
291
    # ComicPress (WordPress plugin)
292
    return last_soup.find('a', class_='navi navi-next' if next_ else 'navi navi-prev')
293
294
295
@classmethod
296
def get_a_navi_comicnavnext_navinext(cls, last_soup, next_):
297
    """Implementation of get_navi_link."""
298
    return last_soup.find('a', class_='navi comic-nav-next navi-next' if next_ else 'navi comic-nav-previous navi-prev')
299
300
301
@classmethod
302
def get_a_comicnavbase_comicnavnext(cls, last_soup, next_):
303
    """Implementation of get_navi_link."""
304
    return last_soup.find('a', class_='comic-nav-base comic-nav-next' if next_ else 'comic-nav-base comic-nav-previous')
305
306
307
@classmethod
308
def get_a_navi_navifirst(cls):
309
    """Implementation of get_first_comic_link."""
310
    # ComicPress (WordPress plugin)
311
    return get_soup_at_url(cls.url).find('a', class_='navi navi-first')
312
313
314
@classmethod
315
def get_div_navfirst_a(cls):
316
    """Implementation of get_first_comic_link."""
317
    return get_soup_at_url(cls.url).find('div', class_="nav-first").find('a')
318
319
320
@classmethod
321
def get_a_comicnavbase_comicnavfirst(cls):
322
    """Implementation of get_first_comic_link."""
323
    return get_soup_at_url(cls.url).find('a', class_='comic-nav-base comic-nav-first')
324
325
326
@classmethod
327
def simulate_first_link(cls):
328
    """Implementation of get_first_comic_link creating a link-like object from
329
    an URL provided by the class.
330
331
    Note: The first URL can easily be found using :
332
    `get_first_comic_link = navigate_to_first_comic`.
333
    """
334
    return {'href': cls.first_url}
335
336
337
@classmethod
338
def navigate_to_first_comic(cls):
339
    """Implementation of get_first_comic_link navigating from a user provided
340
    URL to the first comic.
341
342
    Sometimes, the first comic cannot be reached directly so to start
343
    from the first comic one has to go to the previous comic until
344
    there is no previous comics. Once this URL is reached, it
345
    is better to hardcode it but for development purposes, it
346
    is convenient to have an automatic way to find it.
347
348
    Then, the URL found can easily be used via `simulate_first_link`.
349
    """
350
    url = getattr(cls, 'first_url', None)
351
    if url is None or url == NotImplemented:
352
        prompt = "Get starting URL for %s (%s):" % (cls.name, cls.url)
353
        url = input(prompt)
354
    print(url)
355
    comic = cls.get_prev_link(get_soup_at_url(url))
356
    while comic:
357
        url = cls.get_url_from_link(comic)
358
        print(url)
359
        comic = cls.get_prev_link(get_soup_at_url(url))
360
    cls.first_url = url
361
    return {'href': url}
362
363
364
class GenericEmptyComic(GenericComic):
365
    """Generic class for comics where nothing is to be done.
366
367
    It can be useful to deactivate temporarily comics that do not work
368
    properly by replacing `def MyComic(GenericWhateverComic)` with
369
    `def MyComic(GenericEmptyComic, GenericWhateverComic)`."""
370
    _categories = ('EMPTY', )
371
372
    @classmethod
373
    def get_next_comic(cls, last_comic):
374
        """Implementation of get_next_comic returning no comics."""
375
        cls.log("comic is considered as empty - returning no comic")
376
        return []
377
378
379
class GenericComicNotWorking(GenericEmptyComic):
380
    """Subclass of GenericEmptyComic used when comic is not working.
381
382
    This is more explicit than GenericEmptyComic as it hilights that
383
    only the implementation is not working and it can be fixed."""
384
    _categories = ('NOTWORKING', )
385
386
387
class GenericUnavailableComic(GenericEmptyComic):
388
    """Subclass of GenericEmptyComic used when a comic is not available.
389
390
    This is more explicit than GenericEmptyComic as it hilights that
391
    the source of the comic is not available but we expect it to be back
392
    soonish. See also GenericDeletedComic."""
393
    _categories = ('UNAVAILABLE', )
394
395
396
class GenericDeletedComic(GenericEmptyComic):
397
    """Subclass of GenericEmptyComic used when a comic does not exist anymore.
398
399
    This is more explicit than GenericEmptyComic as it hilights that
400
    the source of the comic does not exist anymore and it probably cannot
401
    be fixed. Corresponding classes are kept as we can still use the
402
    downloaded data. See also GenericUnavailableComic."""
403
    _categories = ('DELETED', )
404
405
406 View Code Duplication
class ExtraFabulousComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
407
    """Class to retrieve Extra Fabulous Comics."""
408
    # Also on https://extrafabulouscomics.tumblr.com
409
    name = 'efc'
410
    long_name = 'Extra Fabulous Comics'
411
    url = 'http://extrafabulouscomics.com'
412
    _categories = ('EFC', )
413
    get_navi_link = get_link_rel_next
414
    get_first_comic_link = simulate_first_link
415
    first_url = 'http://extrafabulouscomics.com/comic/buttfly/'
416
417
    @classmethod
418
    def get_comic_info(cls, soup, link):
419
        """Get information about a particular comics."""
420
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
421
        imgs = soup.find_all('img', src=img_src_re)
422
        title = soup.find('meta', property='og:title')['content']
423
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
424
        day = string_to_date(date_str, "%Y-%m-%d")
425
        return {
426
            'title': title,
427
            'img': [i['src'] for i in imgs],
428
            'month': day.month,
429
            'year': day.year,
430
            'day': day.day,
431
            'prefix': title + '-'
432
        }
433
434
435 View Code Duplication
class GenericLeMondeBlog(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
436
    """Generic class to retrieve comics from Le Monde blogs."""
437
    _categories = ('LEMONDE', 'FRANCAIS')
438
    get_navi_link = get_link_rel_next
439
    get_first_comic_link = simulate_first_link
440
    first_url = NotImplemented
441
    date_format = "%d %B %Y"
442
443
    @classmethod
444
    def get_comic_info(cls, soup, link):
445
        """Get information about a particular comics."""
446
        url2 = soup.find('link', rel='shortlink')['href']
447
        title = soup.find('meta', property='og:title')['content']
448
        date_str = soup.find("span", class_="entry-date").string
449
        day = string_to_date(date_str, cls.date_format, "fr_FR.utf8")
450
        imgs = soup.find_all('meta', property='og:image')
451
        return {
452
            'title': title,
453
            'url2': url2,
454
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
455
            'month': day.month,
456
            'year': day.year,
457
            'day': day.day,
458
        }
459
460
461
class ZepWorld(GenericLeMondeBlog):
462
    """Class to retrieve Zep World comics."""
463
    name = "zep"
464
    long_name = "Zep World"
465
    url = "http://zepworld.blog.lemonde.fr"
466
    first_url = "http://zepworld.blog.lemonde.fr/2014/10/31/bientot-le-blog-de-zep/"
467
468
469
class Vidberg(GenericLeMondeBlog):
470
    """Class to retrieve Vidberg comics."""
471
    name = 'vidberg'
472
    long_name = "Vidberg - l'actu en patates"
473
    url = "http://vidberg.blog.lemonde.fr"
474
    # Not the first but I didn't find an efficient way to retrieve it
475
    first_url = "http://vidberg.blog.lemonde.fr/2012/02/09/revue-de-campagne-la-campagne-du-modem-semballe/"
476
477
478
class Plantu(GenericLeMondeBlog):
479
    """Class to retrieve Plantu comics."""
480
    name = 'plantu'
481
    long_name = "Plantu"
482
    url = "http://plantu.blog.lemonde.fr"
483
    first_url = "http://plantu.blog.lemonde.fr/2014/10/28/stress-test-a-bruxelles/"
484
485
486
class XavierGorce(GenericLeMondeBlog):
487
    """Class to retrieve Xavier Gorce comics."""
488
    name = 'gorce'
489
    long_name = "Xavier Gorce"
490
    url = "http://xaviergorce.blog.lemonde.fr"
491
    first_url = "http://xaviergorce.blog.lemonde.fr/2015/01/09/distinction/"
492
493
494
class CartooningForPeace(GenericLeMondeBlog):
495
    """Class to retrieve Cartooning For Peace comics."""
496
    name = 'forpeace'
497
    long_name = "Cartooning For Peace"
498
    url = "http://cartooningforpeace.blog.lemonde.fr"
499
    first_url = "http://cartooningforpeace.blog.lemonde.fr/2014/12/15/bado/"
500
501
502
class Aurel(GenericLeMondeBlog):
503
    """Class to retrieve Aurel comics."""
504
    name = 'aurel'
505
    long_name = "Aurel"
506
    url = "http://aurel.blog.lemonde.fr"
507
    first_url = "http://aurel.blog.lemonde.fr/2014/09/29/le-senat-repasse-a-droite/"
508
509
510
class LesCulottees(GenericLeMondeBlog):
511
    """Class to retrieve Les Culottees comics."""
512
    name = 'culottees'
513
    long_name = 'Les Culottees'
514
    url = "http://lesculottees.blog.lemonde.fr"
515
    first_url = "http://lesculottees.blog.lemonde.fr/2016/01/11/clementine-delait-femme-a-barbe/"
516
517
518
class UneAnneeAuLycee(GenericLeMondeBlog):
519
    """Class to retrieve Une Annee Au Lycee comics."""
520
    name = 'lycee'
521
    long_name = 'Une Annee au Lycee'
522
    url = 'http://uneanneeaulycee.blog.lemonde.fr'
523
    first_url = "http://uneanneeaulycee.blog.lemonde.fr/2016/06/13/la-semaine-du-bac-est-arrivee/"
524
525
526
class LisaMandel(GenericLeMondeBlog):
527
    """Class to retrieve Lisa Mandel comics."""
528
    name = 'mandel'
529
    long_name = 'Lisa Mandel (HP, hors-service)'
530
    url = 'http://lisamandel.blog.lemonde.fr'
531
    first_url = 'http://lisamandel.blog.lemonde.fr/2016/02/23/premiers-jours-a-calais/'
532
533
534
class Avventura(GenericLeMondeBlog):
535
    """Class to retrieve L'Avventura comics."""
536
    name = 'avventura'
537
    long_name = 'Avventura'
538
    url = 'http://lavventura.blog.lemonde.fr'
539
    first_url = 'http://lavventura.blog.lemonde.fr/2013/11/23/roma-paris-aller-simple/'
540
    date_format = "%d/%m/%Y"
541
542
543
class MorganNavarro(GenericLeMondeBlog):
544
    """Class to retrieve Morgan Navarro comics."""
545
    name = 'navarro'
546
    long_name = 'Morgan Navarro (Ma vie de reac)'
547
    url = 'http://morgannavarro.blog.lemonde.fr'
548
    first_url = 'http://morgannavarro.blog.lemonde.fr/2015/09/09/le-doute/'
549
550
551 View Code Duplication
class Rall(GenericComicNotWorking, GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
552
    """Class to retrieve Ted Rall comics."""
553
    # Also on http://www.gocomics.com/tedrall
554
    name = 'rall'
555
    long_name = "Ted Rall"
556
    url = "http://rall.com/comic"
557
    _categories = ('RALL', )
558
    get_navi_link = get_link_rel_next
559
    get_first_comic_link = simulate_first_link
560
    # Not the first but I didn't find an efficient way to retrieve it
561
    first_url = "http://rall.com/2014/01/30/los-angeles-times-cartoon-well-miss-those-california-flowers"
562
563
    @classmethod
564
    def get_comic_info(cls, soup, link):
565
        """Get information about a particular comics."""
566
        title = soup.find('meta', property='og:title')['content']
567
        author = soup.find("span", class_="author vcard").find("a").string
568
        date_str = soup.find("span", class_="entry-date").string
569
        day = string_to_date(date_str, "%B %d, %Y")
570
        desc = soup.find('meta', property='og:description')['content']
571
        imgs = soup.find('div', class_='entry-content').find_all('img')
572
        imgs = imgs[:-7]  # remove social media buttons
573
        return {
574
            'title': title,
575
            'author': author,
576
            'month': day.month,
577
            'year': day.year,
578
            'day': day.day,
579
            'description': desc,
580
            'img': [i['src'] for i in imgs],
581
        }
582
583
584
class Dilem(GenericNavigableComic):
585
    """Class to retrieve Ali Dilem comics."""
586
    name = 'dilem'
587
    long_name = 'Ali Dilem'
588
    url = 'http://information.tv5monde.com/dilem'
589
    _categories = ('FRANCAIS', )
590
    get_url_from_link = join_cls_url_to_href
591
    get_first_comic_link = simulate_first_link
592
    first_url = "http://information.tv5monde.com/dilem/2004-06-26"
593
594
    @classmethod
595
    def get_navi_link(cls, last_soup, next_):
596
        """Get link to next or previous comic."""
597
        # prev is next / next is prev
598
        li = last_soup.find('li', class_='prev' if next_ else 'next')
599
        return li.find('a') if li else None
600
601 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
602
    def get_comic_info(cls, soup, link):
603
        """Get information about a particular comics."""
604
        short_url = soup.find('link', rel='shortlink')['href']
605
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
606
        imgs = soup.find_all('meta', property='og:image')
607
        date_str = soup.find('span', property='dc:date')['content']
608
        date_str = date_str[:10]
609
        day = string_to_date(date_str, "%Y-%m-%d")
610
        return {
611
            'short_url': short_url,
612
            'title': title,
613
            'img': [i['content'] for i in imgs],
614
            'day': day.day,
615
            'month': day.month,
616
            'year': day.year,
617
        }
618
619
620
class SpaceAvalanche(GenericNavigableComic):
621
    """Class to retrieve Space Avalanche comics."""
622
    name = 'avalanche'
623
    long_name = 'Space Avalanche'
624
    url = 'http://www.spaceavalanche.com'
625
    get_navi_link = get_link_rel_next
626
627
    @classmethod
628
    def get_first_comic_link(cls):
629
        """Get link to first comics."""
630
        return {'href': "http://www.spaceavalanche.com/2009/02/02/irish-sea/", 'title': "Irish Sea"}
631
632
    @classmethod
633
    def get_comic_info(cls, soup, link):
634
        """Get information about a particular comics."""
635
        url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
636
        title = link['title']
637
        url = cls.get_url_from_link(link)
638
        year, month, day = [int(s)
639
                            for s in url_date_re.match(url).groups()]
640
        imgs = soup.find("div", class_="entry").find_all("img")
641
        return {
642
            'title': title,
643
            'day': day,
644
            'month': month,
645
            'year': year,
646
            'img': [i['src'] for i in imgs],
647
        }
648
649
650
class ZenPencils(GenericNavigableComic):
651
    """Class to retrieve ZenPencils comics."""
652
    # Also on http://zenpencils.tumblr.com
653
    # Also on http://www.gocomics.com/zen-pencils
654
    name = 'zenpencils'
655
    long_name = 'Zen Pencils'
656
    url = 'http://zenpencils.com'
657
    _categories = ('ZENPENCILS', )
658
    get_navi_link = get_link_rel_next
659
    get_first_comic_link = simulate_first_link
660
    first_url = "http://zenpencils.com/comic/1-ralph-waldo-emerson-make-them-cry/"
661
662
    @classmethod
663
    def get_comic_info(cls, soup, link):
664
        """Get information about a particular comics."""
665
        imgs = soup.find('div', id='comic').find_all('img')
666
        # imgs2 = soup.find_all('meta', property='og:image')
667
        post = soup.find('div', class_='post-content')
668
        author = post.find("span", class_="post-author").find("a").string
669
        title = soup.find('h2', class_='post-title').string
670
        date_str = post.find('span', class_='post-date').string
671
        day = string_to_date(date_str, "%B %d, %Y")
672
        assert imgs
673
        assert all(i['alt'] == i['title'] for i in imgs)
674
        assert all(i['alt'] in (title, "") for i in imgs)
675
        return {
676
            'title': title,
677
            'author': author,
678
            'day': day.day,
679
            'month': day.month,
680
            'year': day.year,
681
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
682
        }
683
684
685
class ItsTheTie(GenericDeletedComic, GenericNavigableComic):
686
    """Class to retrieve It's the tie comics."""
687
    # Also on http://itsthetie.tumblr.com
688
    # Also on https://tapastic.com/series/itsthetie
689
    name = 'tie'
690
    long_name = "It's the tie"
691
    url = "http://itsthetie.com"
692
    _categories = ('TIE', )
693
    get_first_comic_link = get_div_navfirst_a
694
    get_navi_link = get_a_rel_next
695
696
    @classmethod
697
    def get_comic_info(cls, soup, link):
698
        """Get information about a particular comics."""
699
        title = soup.find('h1', class_='comic-title').find('a').string
700
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
701
        day = string_to_date(date_str, "%B %d, %Y")
702
        # Bonus images may or may not be in meta og:image.
703
        imgs = soup.find_all('meta', property='og:image')
704
        imgs_src = [i['content'] for i in imgs]
705
        bonus = soup.find_all('img', attrs={'data-oversrc': True})
706
        bonus_src = [b['data-oversrc'] for b in bonus]
707
        all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src]
708
        all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")]
709
        tag_meta = soup.find('meta', property='article:tag')
710
        tags = tag_meta['content'] if tag_meta else ""
711
        return {
712
            'title': title,
713
            'month': day.month,
714
            'year': day.year,
715
            'day': day.day,
716
            'img': all_imgs_src,
717
            'tags': tags,
718
        }
719
720
721
class PenelopeBagieu(GenericNavigableComic):
722
    """Class to retrieve comics from Penelope Bagieu's blog."""
723
    name = 'bagieu'
724
    long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
725
    url = 'http://www.penelope-jolicoeur.com'
726
    _categories = ('FRANCAIS', )
727
    get_navi_link = get_link_rel_next
728
    get_first_comic_link = simulate_first_link
729
    first_url = 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'
730
731
    @classmethod
732
    def get_comic_info(cls, soup, link):
733
        """Get information about a particular comics."""
734
        date_str = soup.find('h2', class_='date-header').string
735
        day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
736
        imgs = soup.find('div', class_='entry-body').find_all('img')
737
        title = soup.find('h3', class_='entry-header').string
738
        return {
739
            'title': title,
740
            'img': [i['src'] for i in imgs],
741
            'month': day.month,
742
            'year': day.year,
743
            'day': day.day,
744
        }
745
746
747
class OneOneOneOneComic(GenericComicNotWorking, GenericNavigableComic):
748
    """Class to retrieve 1111 Comics."""
749
    # Also on http://comics1111.tumblr.com
750
    # Also on https://tapastic.com/series/1111-Comics
751
    name = '1111'
752
    long_name = '1111 Comics'
753
    url = 'http://www.1111comics.me'
754
    _categories = ('ONEONEONEONE', )
755
    get_first_comic_link = get_div_navfirst_a
756
    get_navi_link = get_link_rel_next
757
758
    @classmethod
759
    def get_comic_info(cls, soup, link):
760
        """Get information about a particular comics."""
761
        title = soup.find('h1', class_='comic-title').find('a').string
762
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
763
        day = string_to_date(date_str, "%B %d, %Y")
764
        imgs = soup.find_all('meta', property='og:image')
765
        return {
766
            'title': title,
767
            'month': day.month,
768
            'year': day.year,
769
            'day': day.day,
770
            'img': [i['content'] for i in imgs],
771
        }
772
773
774
class AngryAtNothing(GenericDeletedComic, GenericNavigableComic):
775
    """Class to retrieve Angry at Nothing comics."""
776
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
777
    # Also on http://angryatnothing.tumblr.com
778
    name = 'angry'
779
    long_name = 'Angry At Nothing'
780
    url = 'http://www.angryatnothing.net'
781
    get_first_comic_link = get_div_navfirst_a
782
    get_navi_link = get_a_rel_next
783
784
    @classmethod
785
    def get_comic_info(cls, soup, link):
786
        """Get information about a particular comics."""
787
        title = soup.find('h1', class_='comic-title').find('a').string
788
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
789
        day = string_to_date(date_str, "%B %d, %Y")
790
        imgs = soup.find_all('meta', property='og:image')
791
        return {
792
            'title': title,
793
            'month': day.month,
794
            'year': day.year,
795
            'day': day.day,
796
            'img': [i['content'] for i in imgs],
797
        }
798
799
800
class NeDroid(GenericNavigableComic):
801
    """Class to retrieve NeDroid comics."""
802
    name = 'nedroid'
803
    long_name = 'NeDroid'
804
    url = 'http://nedroid.com'
805
    get_first_comic_link = get_div_navfirst_a
806
    get_navi_link = get_link_rel_next
807
    get_url_from_link = join_cls_url_to_href
808
809 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
810
    def get_comic_info(cls, soup, link):
811
        """Get information about a particular comics."""
812
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
813
        short_url = cls.get_url_from_link(soup.find('link', rel='shortlink'))
814
        num = int(short_url_re.match(short_url).groups()[0])
815
        imgs = soup.find('div', id='comic').find_all('img')
816
        assert len(imgs) == 1, imgs
817
        title = imgs[0]['alt']
818
        title2 = imgs[0]['title']
819
        return {
820
            'short_url': short_url,
821
            'title': title,
822
            'title2': title2,
823
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
824
            'num': num,
825
        }
826
827
828
class Garfield(GenericNavigableComic):
829
    """Class to retrieve Garfield comics."""
830
    # Also on http://www.gocomics.com/garfield
831
    name = 'garfield'
832
    long_name = 'Garfield'
833
    url = 'https://garfield.com'
834
    _categories = ('GARFIELD', )
835
    get_first_comic_link = simulate_first_link
836
    first_url = 'https://garfield.com/comic/1978/06/19'
837
838
    @classmethod
839
    def get_navi_link(cls, last_soup, next_):
840
        """Get link to next or previous comic."""
841
        return last_soup.find('a', class_='comic-arrow-right' if next_ else 'comic-arrow-left')
842
843
    @classmethod
844
    def get_comic_info(cls, soup, link):
845
        """Get information about a particular comics."""
846
        url = cls.get_url_from_link(link)
847
        date_re = re.compile('^%s/comic/([0-9]*)/([0-9]*)/([0-9]*)' % cls.url)
848
        year, month, day = [int(s) for s in date_re.match(url).groups()]
849
        imgs = soup.find('div', class_='comic-display').find_all('img', class_='img-responsive')
850
        return {
851
            'month': month,
852
            'year': year,
853
            'day': day,
854
            'img': [i['src'] for i in imgs],
855
        }
856
857
858 View Code Duplication
class Dilbert(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
859
    """Class to retrieve Dilbert comics."""
860
    # Also on http://www.gocomics.com/dilbert-classics
861
    name = 'dilbert'
862
    long_name = 'Dilbert'
863
    url = 'http://dilbert.com'
864
    get_url_from_link = join_cls_url_to_href
865
    get_first_comic_link = simulate_first_link
866
    first_url = 'http://dilbert.com/strip/1989-04-16'
867
868
    @classmethod
869
    def get_navi_link(cls, last_soup, next_):
870
        """Get link to next or previous comic."""
871
        link = last_soup.find('div', class_='nav-comic nav-right' if next_ else 'nav-comic nav-left')
872
        return link.find('a') if link else None
873
874
    @classmethod
875
    def get_comic_info(cls, soup, link):
876
        """Get information about a particular comics."""
877
        title = soup.find('meta', property='og:title')['content']
878
        imgs = soup.find_all('meta', property='og:image')
879
        desc = soup.find('meta', property='og:description')['content']
880
        date_str = soup.find('meta', property='article:publish_date')['content']
881
        day = string_to_date(date_str, "%B %d, %Y")
882
        author = soup.find('meta', property='article:author')['content']
883
        tags = soup.find('meta', property='article:tag')['content']
884
        return {
885
            'title': title,
886
            'description': desc,
887
            'img': [i['content'] for i in imgs],
888
            'author': author,
889
            'tags': tags,
890
            'day': day.day,
891
            'month': day.month,
892
            'year': day.year
893
        }
894
895
896
class VictimsOfCircumsolar(GenericDeletedComic, GenericNavigableComic):
897
    """Class to retrieve VictimsOfCircumsolar comics."""
898
    # Also on https://victimsofcomics.tumblr.com
899
    name = 'circumsolar'
900
    long_name = 'Victims Of Circumsolar'
901
    url = 'http://www.victimsofcircumsolar.com'
902
    get_navi_link = get_a_navi_comicnavnext_navinext
903
    get_first_comic_link = simulate_first_link
904
    first_url = 'http://www.victimsofcircumsolar.com/comic/modern-addiction'
905
906
    @classmethod
907
    def get_comic_info(cls, soup, link):
908
        """Get information about a particular comics."""
909
        # Date is on the archive page
910
        title = soup.find_all('meta', property='og:title')[-1]['content']
911
        desc = soup.find_all('meta', property='og:description')[-1]['content']
912
        imgs = soup.find('div', id='comic').find_all('img')
913
        assert all(i['title'] == i['alt'] == title for i in imgs)
914
        return {
915
            'title': title,
916
            'description': desc,
917
            'img': [i['src'] for i in imgs],
918
        }
919
920
921
class ThreeWordPhrase(GenericNavigableComic):
922
    """Class to retrieve Three Word Phrase comics."""
923
    # Also on http://www.threewordphrase.tumblr.com
924
    name = 'threeword'
925
    long_name = 'Three Word Phrase'
926
    url = 'http://threewordphrase.com'
927
    get_url_from_link = join_cls_url_to_href
928
929
    @classmethod
930
    def get_first_comic_link(cls):
931
        """Get link to first comics."""
932
        return get_soup_at_url(cls.url).find('img', src='/firstlink.gif').parent
933
934
    @classmethod
935
    def get_navi_link(cls, last_soup, next_):
936
        """Get link to next or previous comic."""
937
        link = last_soup.find('img', src='/nextlink.gif' if next_ else '/prevlink.gif').parent
938
        return None if link.get('href') is None else link
939
940
    @classmethod
941
    def get_comic_info(cls, soup, link):
942
        """Get information about a particular comics."""
943
        title = soup.find('title')
944
        imgs = [img for img in soup.find_all('img')
945
                if not img['src'].endswith(
946
                    ('link.gif', '32.png', 'twpbookad.jpg',
947
                     'merchad.jpg', 'header.gif', 'tipjar.jpg'))]
948
        return {
949
            'title': title.string if title else None,
950
            'title2': '  '.join(img.get('alt') for img in imgs if img.get('alt')),
951
            'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs],
952
        }
953
954
955
class DeadlyPanel(GenericComicNotWorking, GenericNavigableComic):  # Not working on my machine
956
    """Class to retrieve Deadly Panel comics."""
957
    # Also on https://tapastic.com/series/deadlypanel
958
    # Also on https://deadlypanel.tumblr.com
959
    name = 'deadly'
960
    long_name = 'Deadly Panel'
961
    url = 'http://www.deadlypanel.com'
962
    get_first_comic_link = get_a_navi_navifirst
963
    get_navi_link = get_a_navi_comicnavnext_navinext
964
965
    @classmethod
966
    def get_comic_info(cls, soup, link):
967
        """Get information about a particular comics."""
968
        imgs = soup.find('div', id='comic').find_all('img')
969
        assert all(i['alt'] == i['title'] for i in imgs)
970
        return {
971
            'img': [i['src'] for i in imgs],
972
        }
973
974
975 View Code Duplication
class TheGentlemanArmchair(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
976
    """Class to retrieve The Gentleman Armchair comics."""
977
    name = 'gentlemanarmchair'
978
    long_name = 'The Gentleman Armchair'
979
    url = 'http://thegentlemansarmchair.com'
980
    get_first_comic_link = get_a_navi_navifirst
981
    get_navi_link = get_link_rel_next
982
983
    @classmethod
984
    def get_comic_info(cls, soup, link):
985
        """Get information about a particular comics."""
986
        title = soup.find('h2', class_='post-title').string
987
        author = soup.find("span", class_="post-author").find("a").string
988
        date_str = soup.find('span', class_='post-date').string
989
        day = string_to_date(date_str, "%B %d, %Y")
990
        imgs = soup.find('div', id='comic').find_all('img')
991
        return {
992
            'img': [i['src'] for i in imgs],
993
            'title': title,
994
            'author': author,
995
            'month': day.month,
996
            'year': day.year,
997
            'day': day.day,
998
        }
999
1000
1001 View Code Duplication
class ImogenQuest(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1002
    """Class to retrieve Imogen Quest comics."""
1003
    # Also on http://imoquest.tumblr.com
1004
    name = 'imogen'
1005
    long_name = 'Imogen Quest'
1006
    url = 'http://imogenquest.net'
1007
    get_first_comic_link = get_div_navfirst_a
1008
    get_navi_link = get_a_rel_next
1009
1010
    @classmethod
1011
    def get_comic_info(cls, soup, link):
1012
        """Get information about a particular comics."""
1013
        title = soup.find('h2', class_='post-title').string
1014
        author = soup.find("span", class_="post-author").find("a").string
1015
        date_str = soup.find('span', class_='post-date').string
1016
        day = string_to_date(date_str, '%B %d, %Y')
1017
        imgs = soup.find('div', class_='comicpane').find_all('img')
1018
        assert all(i['alt'] == i['title'] for i in imgs)
1019
        title2 = imgs[0]['title']
1020
        return {
1021
            'day': day.day,
1022
            'month': day.month,
1023
            'year': day.year,
1024
            'img': [i['src'] for i in imgs],
1025
            'title': title,
1026
            'title2': title2,
1027
            'author': author,
1028
        }
1029
1030
1031
class MyExtraLife(GenericNavigableComic):
1032
    """Class to retrieve My Extra Life comics."""
1033
    name = 'extralife'
1034
    long_name = 'My Extra Life'
1035
    url = 'http://www.myextralife.com'
1036
    get_navi_link = get_link_rel_next
1037
1038
    @classmethod
1039
    def get_first_comic_link(cls):
1040
        """Get link to first comics."""
1041
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
1042
1043 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1044
    def get_comic_info(cls, soup, link):
1045
        """Get information about a particular comics."""
1046
        title = soup.find("h1", class_="comic_title").string
1047
        date_str = soup.find("span", class_="comic_date").string
1048
        day = string_to_date(date_str, "%B %d, %Y")
1049
        imgs = soup.find_all("img", class_="comic")
1050
        assert all(i['alt'] == i['title'] == title for i in imgs)
1051
        return {
1052
            'title': title,
1053
            'img': [i['src'] for i in imgs if i["src"]],
1054
            'day': day.day,
1055
            'month': day.month,
1056
            'year': day.year
1057
        }
1058
1059
1060
class SaturdayMorningBreakfastCereal(GenericNavigableComic):
1061
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
1062
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
1063
    # Also on http://smbc-comics.tumblr.com
1064
    name = 'smbc'
1065
    long_name = 'Saturday Morning Breakfast Cereal'
1066
    url = 'http://www.smbc-comics.com'
1067
    _categories = ('SMBC', )
1068
    get_navi_link = get_a_rel_next
1069
1070
    @classmethod
1071
    def get_first_comic_link(cls):
1072
        """Get link to first comics."""
1073
        return get_soup_at_url(cls.url).find('a', rel='first')
1074
1075
    @classmethod
1076
    def get_comic_info(cls, soup, link):
1077
        """Get information about a particular comics."""
1078
        image1 = soup.find('img', id='cc-comic')
1079
        image_url1 = image1['src']
1080
        aftercomic = soup.find('div', id='aftercomic')
1081
        image_url2 = aftercomic.find('img')['src'] if aftercomic else ''
1082
        imgs = [image_url1] + ([image_url2] if image_url2 else [])
1083
        date_str = soup.find('div', class_='cc-publishtime').contents[0]
1084
        day = string_to_date(date_str, "Posted %B %d, %Y at %I:%M %p")
1085
        return {
1086
            'title': image1['title'],
1087
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i)) for i in imgs],
1088
            'day': day.day,
1089
            'month': day.month,
1090
            'year': day.year
1091
        }
1092
1093
1094 View Code Duplication
class PerryBibleFellowship(GenericListableComic):  # Is now navigable too
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1095
    """Class to retrieve Perry Bible Fellowship comics."""
1096
    name = 'pbf'
1097
    long_name = 'Perry Bible Fellowship'
1098
    url = 'http://pbfcomics.com'
1099
    get_url_from_archive_element = join_cls_url_to_href
1100
1101
    @classmethod
1102
    def get_archive_elements(cls):
1103
        soup = get_soup_at_url(cls.url)
1104
        thumbnails = soup.find('div', id='all_thumbnails')
1105
        return reversed(thumbnails.find_all('a'))
1106
1107
    @classmethod
1108
    def get_comic_info(cls, soup, link):
1109
        """Get information about a particular comics."""
1110
        name = soup.find('meta', property='og:title')['content']
1111
        imgs = soup.find_all('meta', property='og:image')
1112
        assert len(imgs) == 1, imgs
1113
        return {
1114
            'name': name,
1115
            'img': [i['content'] for i in imgs],
1116
        }
1117
1118
1119
class Mercworks(GenericDeletedComic):  # Moved to Webtoons
1120
    """Class to retrieve Mercworks comics."""
1121
    # Also on http://mercworks.tumblr.com
1122
    # Also on http://www.webtoons.com/en/comedy/mercworks/list?title_no=426
1123
    # Also on https://tapastic.com/series/MercWorks
1124
    name = 'mercworks'
1125
    long_name = 'Mercworks'
1126
    url = 'http://mercworks.net'
1127
    _categories = ('MERCWORKS', )
1128
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1129
    get_navi_link = get_link_rel_next
1130
1131
    @classmethod
1132
    def get_comic_info(cls, soup, link):
1133
        """Get information about a particular comics."""
1134
        title = soup.find('meta', property='og:title')['content']
1135
        metadesc = soup.find('meta', property='og:description')
1136
        desc = metadesc['content'] if metadesc else ""
1137
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
1138
        day = string_to_date(date_str, "%Y-%m-%d")
1139
        imgs = soup.find_all('meta', property='og:image')
1140
        return {
1141
            'img': [i['content'] for i in imgs],
1142
            'title': title,
1143
            'desc': desc,
1144
            'day': day.day,
1145
            'month': day.month,
1146
            'year': day.year
1147
        }
1148
1149
1150
class BerkeleyMews(GenericListableComic):
1151
    """Class to retrieve Berkeley Mews comics."""
1152
    # Also on http://mews.tumblr.com
1153
    # Also on http://www.gocomics.com/berkeley-mews
1154
    name = 'berkeley'
1155
    long_name = 'Berkeley Mews'
1156
    url = 'http://www.berkeleymews.com'
1157
    _categories = ('BERKELEY', )
1158
    get_url_from_archive_element = get_href
1159
    comic_num_re = re.compile('%s/\\?p=([0-9]*)$' % url)
1160
1161
    @classmethod
1162
    def get_archive_elements(cls):
1163
        archive_url = urljoin_wrapper(cls.url, "?page_id=2")
1164
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_num_re))
1165
1166
    @classmethod
1167
    def get_comic_info(cls, soup, link):
1168
        """Get information about a particular comics."""
1169
        comic_date_re = re.compile('.*/([0-9]*)-([0-9]*)-([0-9]*)-.*')
1170
        url = cls.get_url_from_archive_element(link)
1171
        num = int(cls.comic_num_re.match(url).groups()[0])
1172
        img = soup.find('div', id='comic').find('img')
1173
        assert all(i['alt'] == i['title'] for i in [img])
1174
        title2 = img['title']
1175
        img_url = img['src']
1176
        year, month, day = [int(s) for s in comic_date_re.match(img_url).groups()]
1177
        return {
1178
            'num': num,
1179
            'title': link.string,
1180
            'title2': title2,
1181
            'img': [img_url],
1182
            'year': year,
1183
            'month': month,
1184
            'day': day,
1185
        }
1186
1187
1188
class GenericBouletCorp(GenericNavigableComic):
1189
    """Generic class to retrieve BouletCorp comics in different languages."""
1190
    # Also on https://bouletcorp.tumblr.com
1191
    _categories = ('BOULET', )
1192
    get_navi_link = get_link_rel_next
1193
1194
    @classmethod
1195
    def get_first_comic_link(cls):
1196
        """Get link to first comics."""
1197
        return get_soup_at_url(cls.url).find('div', id='centered_nav').find_all('a')[0]
1198
1199
    @classmethod
1200
    def get_comic_info(cls, soup, link):
1201
        """Get information about a particular comics."""
1202
        url = cls.get_url_from_link(link)
1203
        date_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/' % cls.url)
1204
        year, month, day = [int(s) for s in date_re.match(url).groups()]
1205
        imgs = soup.find('div', id='notes').find('div', class_='storycontent').find_all('img')
1206
        texts = '  '.join(t for t in (i.get('title') for i in imgs) if t)
1207
        title = soup.find('title').string
1208
        return {
1209
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs if i.get('src') is not None],
1210
            'title': title,
1211
            'texts': texts,
1212
            'year': year,
1213
            'month': month,
1214
            'day': day,
1215
        }
1216
1217
1218
class BouletCorp(GenericBouletCorp):
1219
    """Class to retrieve BouletCorp comics."""
1220
    name = 'boulet'
1221
    long_name = 'Boulet Corp'
1222
    url = 'http://www.bouletcorp.com'
1223
    _categories = ('FRANCAIS', )
1224
1225
1226
class BouletCorpEn(GenericBouletCorp):
1227
    """Class to retrieve EnglishBouletCorp comics."""
1228
    name = 'boulet_en'
1229
    long_name = 'Boulet Corp English'
1230
    url = 'http://english.bouletcorp.com'
1231
1232
1233 View Code Duplication
class AmazingSuperPowers(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1234
    """Class to retrieve Amazing Super Powers comics."""
1235
    name = 'asp'
1236
    long_name = 'Amazing Super Powers'
1237
    url = 'http://www.amazingsuperpowers.com'
1238
    get_first_comic_link = get_a_navi_navifirst
1239
    get_navi_link = get_a_navi_navinext
1240
1241
    @classmethod
1242
    def get_comic_info(cls, soup, link):
1243
        """Get information about a particular comics."""
1244
        author = soup.find("span", class_="post-author").find("a").string
1245
        date_str = soup.find('span', class_='post-date').string
1246
        day = string_to_date(date_str, "%B %d, %Y")
1247
        imgs = soup.find('div', id='comic').find_all('img')
1248
        title = ' '.join(i['title'] for i in imgs)
1249
        assert all(i['alt'] == i['title'] for i in imgs)
1250
        return {
1251
            'title': title,
1252
            'author': author,
1253
            'img': [img['src'] for img in imgs],
1254
            'day': day.day,
1255
            'month': day.month,
1256
            'year': day.year
1257
        }
1258
1259
1260 View Code Duplication
class ToonHole(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1261
    """Class to retrieve Toon Holes comics."""
1262
    # Also on http://tapastic.com/series/TOONHOLE
1263
    name = 'toonhole'
1264
    long_name = 'Toon Hole'
1265
    url = 'http://www.toonhole.com'
1266
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1267
    get_navi_link = get_a_comicnavbase_comicnavnext
1268
1269
    @classmethod
1270
    def get_comic_info(cls, soup, link):
1271
        """Get information about a particular comics."""
1272
        date_str = soup.find('div', class_='entry-meta').contents[0].strip()
1273
        day = string_to_date(date_str, "%B %d, %Y")
1274
        imgs = soup.find('div', id='comic').find_all('img')
1275
        if imgs:
1276
            img = imgs[0]
1277
            title = img['alt']
1278
            assert img['title'] == title
1279
        else:
1280
            title = ""
1281
        return {
1282
            'title': title,
1283
            'month': day.month,
1284
            'year': day.year,
1285
            'day': day.day,
1286
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs],
1287
        }
1288
1289
1290
class Channelate(GenericNavigableComic):
1291
    """Class to retrieve Channelate comics."""
1292
    name = 'channelate'
1293
    long_name = 'Channelate'
1294
    url = 'http://www.channelate.com'
1295
    get_first_comic_link = get_div_navfirst_a
1296
    get_navi_link = get_link_rel_next
1297
    get_url_from_link = join_cls_url_to_href
1298
1299
    @classmethod
1300
    def get_comic_info(cls, soup, link):
1301
        """Get information about a particular comics."""
1302
        author = soup.find("span", class_="post-author").find("a").string
1303
        date_str = soup.find('span', class_='post-date').string
1304
        day = string_to_date(date_str, '%Y/%m/%d')
1305
        title = soup.find('meta', property='og:title')['content']
1306
        post = soup.find('div', id='comic')
1307
        imgs = post.find_all('img') if post else []
1308
        extra_url = None
1309
        extra_div = soup.find('div', id='extrapanelbutton')
1310
        if extra_div:
1311
            extra_url = extra_div.find('a')['href']
1312
            extra_soup = get_soup_at_url(extra_url)
1313
            extra_imgs = extra_soup.find_all('img', class_='extrapanelimage')
1314
            imgs.extend(extra_imgs)
1315
        return {
1316
            'url_extra': extra_url,
1317
            'title': title,
1318
            'author': author,
1319
            'month': day.month,
1320
            'year': day.year,
1321
            'day': day.day,
1322
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1323
        }
1324
1325
1326
class CyanideAndHappiness(GenericNavigableComic):
1327
    """Class to retrieve Cyanide And Happiness comics."""
1328
    name = 'cyanide'
1329
    long_name = 'Cyanide and Happiness'
1330
    url = 'http://explosm.net'
1331
    _categories = ('NSFW', )
1332
    get_url_from_link = join_cls_url_to_href
1333
1334
    @classmethod
1335
    def get_first_comic_link(cls):
1336
        """Get link to first comics."""
1337
        return get_soup_at_url(cls.url).find('a', title='Oldest comic')
1338
1339
    @classmethod
1340
    def get_navi_link(cls, last_soup, next_):
1341
        """Get link to next or previous comic."""
1342
        link = last_soup.find('a', class_='nav-next' if next_ else 'nav-previous')
1343
        return None if link.get('href') is None else link
1344
1345
    @classmethod
1346
    def get_comic_info(cls, soup, link):
1347
        """Get information about a particular comics."""
1348
        url2 = soup.find('meta', property='og:url')['content']
1349
        num = int(url2.split('/')[-2])
1350
        date_str, _, author = soup.find('div', id='comic-author').text.strip().partition('\nby ')
1351
        day = string_to_date(date_str, '%Y.%m.%d')
1352
        imgs = soup.find_all('img', id='main-comic')
1353
        return {
1354
            'num': num,
1355
            'author': author,
1356
            'month': day.month,
1357
            'year': day.year,
1358
            'day': day.day,
1359
            'prefix': '%d-' % num,
1360
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i['src'])) for i in imgs]
1361
        }
1362
1363
1364
class MrLovenstein(GenericComic):
1365
    """Class to retrieve Mr Lovenstein comics."""
1366
    # Also on https://tapastic.com/series/MrLovenstein
1367
    name = 'mrlovenstein'
1368
    long_name = 'Mr. Lovenstein'
1369
    url = 'http://www.mrlovenstein.com'
1370
1371
    @classmethod
1372
    def get_next_comic(cls, last_comic):
1373
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1374
        # TODO: more info from http://www.mrlovenstein.com/archive
1375
        comic_num_re = re.compile('^/comic/([0-9]*)$')
1376
        nums = [int(comic_num_re.match(link['href']).groups()[0])
1377
                for link in get_soup_at_url(cls.url).find_all('a', href=comic_num_re)]
1378
        first, last = min(nums), max(nums)
1379
        if last_comic:
1380
            first = last_comic['num'] + 1
1381
        for num in range(first, last + 1):
1382
            url = urljoin_wrapper(cls.url, '/comic/%d' % num)
1383
            soup = get_soup_at_url(url)
1384
            imgs = list(
1385
                reversed(soup.find_all('img', src=re.compile('^/images/comics/'))))
1386
            description = soup.find('meta', attrs={'name': 'description'})['content']
1387
            yield {
1388
                'url': url,
1389
                'num': num,
1390
                'texts': '  '.join(t for t in (i.get('title') for i in imgs) if t),
1391
                'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1392
                'description': description,
1393
            }
1394
1395
1396
class DinosaurComics(GenericListableComic):
1397
    """Class to retrieve Dinosaur Comics comics."""
1398
    name = 'dinosaur'
1399
    long_name = 'Dinosaur Comics'
1400
    url = 'http://www.qwantz.com'
1401
    get_url_from_archive_element = get_href
1402
    comic_link_re = re.compile('^%s/index.php\\?comic=([0-9]*)$' % url)
1403
1404
    @classmethod
1405
    def get_archive_elements(cls):
1406
        archive_url = urljoin_wrapper(cls.url, 'archive.php')
1407
        # first link is random -> skip it
1408
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re)[1:])
1409
1410
    @classmethod
1411
    def get_comic_info(cls, soup, link):
1412
        """Get information about a particular comics."""
1413
        url = cls.get_url_from_archive_element(link)
1414
        num = int(cls.comic_link_re.match(url).groups()[0])
1415
        date_str = link.string
1416
        text = link.next_sibling.string
1417
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1418
        comic_img_re = re.compile('^%s/comics/' % cls.url)
1419
        img = soup.find('img', src=comic_img_re)
1420
        return {
1421
            'month': day.month,
1422
            'year': day.year,
1423
            'day': day.day,
1424
            'img': [img.get('src')],
1425
            'title': img.get('title'),
1426
            'text': text,
1427
            'num': num,
1428
        }
1429
1430
1431 View Code Duplication
class ButterSafe(GenericListableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1432
    """Class to retrieve Butter Safe comics."""
1433
    name = 'butter'
1434
    long_name = 'ButterSafe'
1435
    url = 'http://buttersafe.com'
1436
    get_url_from_archive_element = get_href
1437
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)
1438
1439
    @classmethod
1440
    def get_archive_elements(cls):
1441
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1442
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
1443
1444
    @classmethod
1445
    def get_comic_info(cls, soup, link):
1446
        """Get information about a particular comics."""
1447
        url = cls.get_url_from_archive_element(link)
1448
        title = link.string
1449
        year, month, day = [int(s) for s in cls.comic_link_re.match(url).groups()]
1450
        img = soup.find('div', id='comic').find('img')
1451
        assert img['alt'] == title
1452
        return {
1453
            'title': title,
1454
            'day': day,
1455
            'month': month,
1456
            'year': year,
1457
            'img': [img['src']],
1458
        }
1459
1460
1461
class CalvinAndHobbes(GenericComic):
1462
    """Class to retrieve Calvin and Hobbes comics."""
1463
    # Also on http://www.gocomics.com/calvinandhobbes/
1464
    name = 'calvin'
1465
    long_name = 'Calvin and Hobbes'
1466
    # This is not through any official webpage but eh...
1467
    url = 'http://marcel-oehler.marcellosendos.ch/comics/ch/'
1468
1469
    @classmethod
1470
    def get_next_comic(cls, last_comic):
1471
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1472
        last_date = get_date_for_comic(
1473
            last_comic) if last_comic else date(1985, 11, 1)
1474
        link_re = re.compile('^([0-9]*)/([0-9]*)/')
1475
        img_re = re.compile('')
1476
        for link in get_soup_at_url(cls.url).find_all('a', href=link_re):
1477
            url = link['href']
1478
            year, month = link_re.match(url).groups()
1479
            if date(int(year), int(month), 1) + timedelta(days=31) >= last_date:
1480
                img_re = re.compile('^%s%s([0-9]*)' % (year, month))
1481
                month_url = urljoin_wrapper(cls.url, url)
1482
                for img in get_soup_at_url(month_url).find_all('img', src=img_re):
1483
                    img_src = img['src']
1484
                    day = int(img_re.match(img_src).groups()[0])
1485
                    comic_date = date(int(year), int(month), day)
1486
                    if comic_date > last_date:
1487
                        yield {
1488
                            'url': month_url,
1489
                            'year': int(year),
1490
                            'month': int(month),
1491
                            'day': int(day),
1492
                            'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)],
1493
                        }
1494
                        last_date = comic_date
1495
1496
1497 View Code Duplication
class AbstruseGoose(GenericListableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1498
    """Class to retrieve AbstruseGoose Comics."""
1499
    name = 'abstruse'
1500
    long_name = 'Abstruse Goose'
1501
    url = 'http://abstrusegoose.com'
1502
    get_url_from_archive_element = get_href
1503
    comic_url_re = re.compile('^%s/([0-9]*)$' % url)
1504
    comic_img_re = re.compile('^%s/strips/.*' % url)
1505
1506
    @classmethod
1507
    def get_archive_elements(cls):
1508
        archive_url = urljoin_wrapper(cls.url, 'archive')
1509
        return get_soup_at_url(archive_url).find_all('a', href=cls.comic_url_re)
1510
1511
    @classmethod
1512
    def get_comic_info(cls, soup, archive_elt):
1513
        comic_url = cls.get_url_from_archive_element(archive_elt)
1514
        num = int(cls.comic_url_re.match(comic_url).groups()[0])
1515
        imgs = soup.find_all('img', src=cls.comic_img_re)
1516
        return {
1517
            'num': num,
1518
            'title': archive_elt.string,
1519
            'img': [i['src'] for i in imgs],
1520
        }
1521
1522
1523
class PhDComics(GenericNavigableComic):
1524
    """Class to retrieve PHD Comics."""
1525
    name = 'phd'
1526
    long_name = 'PhD Comics'
1527
    url = 'http://phdcomics.com/comics/archive.php'
1528
1529
    @classmethod
1530
    def get_first_comic_link(cls):
1531
        """Get link to first comics."""
1532
        soup = get_soup_at_url(cls.url)
1533
        img = soup.find('img', src='http://phdcomics.com/comics/images/first_button.gif')
1534
        return None if img is None else img.parent
1535
1536
    @classmethod
1537
    def get_navi_link(cls, last_soup, next_):
1538
        """Get link to next or previous comic."""
1539
        url = 'http://phdcomics.com/comics/images/%s_button.gif' % ('next' if next_ else 'prev')
1540
        img = last_soup.find('img', src=url)
1541
        return None if img is None else img.parent
1542
1543
    @classmethod
1544
    def get_comic_info(cls, soup, link):
1545
        """Get information about a particular comics."""
1546
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
1547
        imgs = soup.find_all('meta', property='og:image')
1548
        return {
1549
            'img': [i['content'] for i in imgs],
1550
            'title': title,
1551
        }
1552
1553
1554
class Quarktees(GenericNavigableComic):
1555
    """Class to retrieve the Quarktees comics."""
1556
    name = 'quarktees'
1557
    long_name = 'Quarktees'
1558
    url = 'http://www.quarktees.com/blogs/news'
1559
    get_url_from_link = join_cls_url_to_href
1560
    get_first_comic_link = simulate_first_link
1561
    first_url = 'http://www.quarktees.com/blogs/news/12486621-coming-soon'
1562
1563
    @classmethod
1564
    def get_navi_link(cls, last_soup, next_):
1565
        """Get link to next or previous comic."""
1566
        return last_soup.find('a', id='article-next' if next_ else 'article-prev')
1567
1568
    @classmethod
1569
    def get_comic_info(cls, soup, link):
1570
        """Get information about a particular comics."""
1571
        title = soup.find('meta', property='og:title')['content']
1572
        article = soup.find('div', class_='single-article')
1573
        imgs = article.find_all('img')
1574
        return {
1575
            'title': title,
1576
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1577
        }
1578
1579
1580
class OverCompensating(GenericNavigableComic):
1581
    """Class to retrieve the Over Compensating comics."""
1582
    name = 'compensating'
1583
    long_name = 'Over Compensating'
1584
    url = 'http://www.overcompensating.com'
1585
    get_url_from_link = join_cls_url_to_href
1586
1587
    @classmethod
1588
    def get_first_comic_link(cls):
1589
        """Get link to first comics."""
1590
        return get_soup_at_url(cls.url).find('a', href=re.compile('comic=1$'))
1591
1592
    @classmethod
1593
    def get_navi_link(cls, last_soup, next_):
1594
        """Get link to next or previous comic."""
1595
        return last_soup.find('a', title='next comic' if next_ else 'go back already')
1596
1597
    @classmethod
1598
    def get_comic_info(cls, soup, link):
1599
        """Get information about a particular comics."""
1600
        img_src_re = re.compile('^/oc/comics/.*')
1601
        comic_num_re = re.compile('.*comic=([0-9]*)$')
1602
        comic_url = cls.get_url_from_link(link)
1603
        num = int(comic_num_re.match(comic_url).groups()[0])
1604
        img = soup.find('img', src=img_src_re)
1605
        return {
1606
            'num': num,
1607
            'img': [urljoin_wrapper(comic_url, img['src'])],
1608
            'title': img.get('title')
1609
        }
1610
1611
1612
class Oglaf(GenericNavigableComic):
1613
    """Class to retrieve Oglaf comics."""
1614
    name = 'oglaf'
1615
    long_name = 'Oglaf [NSFW]'
1616
    url = 'http://oglaf.com'
1617
    _categories = ('NSFW', )
1618
    get_url_from_link = join_cls_url_to_href
1619
1620
    @classmethod
1621
    def get_first_comic_link(cls):
1622
        """Get link to first comics."""
1623
        return get_soup_at_url(cls.url).find("div", id="st").parent
1624
1625
    @classmethod
1626
    def get_navi_link(cls, last_soup, next_):
1627
        """Get link to next or previous comic."""
1628
        div = last_soup.find("div", id="nx" if next_ else "pvs")
1629
        return div.parent if div else None
1630
1631
    @classmethod
1632
    def get_comic_info(cls, soup, link):
1633
        """Get information about a particular comics."""
1634
        title = soup.find('title').string
1635
        title_imgs = soup.find('div', id='tt').find_all('img')
1636
        assert len(title_imgs) == 1, title_imgs
1637
        strip_imgs = soup.find_all('img', id='strip')
1638
        assert len(strip_imgs) == 1, strip_imgs
1639
        imgs = title_imgs + strip_imgs
1640
        desc = ' '.join(i['title'] for i in imgs)
1641
        return {
1642
            'title': title,
1643
            'img': [i['src'] for i in imgs],
1644
            'description': desc,
1645
        }
1646
1647
1648
class ScandinaviaAndTheWorld(GenericNavigableComic):
1649
    """Class to retrieve Scandinavia And The World comics."""
1650
    name = 'satw'
1651
    long_name = 'Scandinavia And The World'
1652
    url = 'http://satwcomic.com'
1653
    get_first_comic_link = simulate_first_link
1654
    first_url = 'http://satwcomic.com/sweden-denmark-and-norway'
1655
1656
    @classmethod
1657
    def get_navi_link(cls, last_soup, next_):
1658
        """Get link to next or previous comic."""
1659
        return last_soup.find('a', accesskey='n' if next_ else 'p')
1660
1661
    @classmethod
1662
    def get_comic_info(cls, soup, link):
1663
        """Get information about a particular comics."""
1664
        title = soup.find('meta', attrs={'name': 'twitter:label1'})['content']
1665
        desc = soup.find('meta', property='og:description')['content']
1666
        imgs = soup.find_all('img', itemprop="image")
1667
        return {
1668
            'title': title,
1669
            'description': desc,
1670
            'img': [i['src'] for i in imgs],
1671
        }
1672
1673
1674
class SomethingOfThatIlk(GenericDeletedComic):
1675
    """Class to retrieve the Something Of That Ilk comics."""
1676
    name = 'somethingofthatilk'
1677
    long_name = 'Something Of That Ilk'
1678
    url = 'http://www.somethingofthatilk.com'
1679
1680
1681
class MonkeyUser(GenericNavigableComic):
1682
    """Class to retrieve Monkey User comics."""
1683
    name = 'monkeyuser'
1684
    long_name = 'Monkey User'
1685
    url = 'http://www.monkeyuser.com'
1686
    get_first_comic_link = simulate_first_link
1687
    first_url = 'http://www.monkeyuser.com/2016/project-lifecycle/'
1688
    get_url_from_link = join_cls_url_to_href
1689
1690
    @classmethod
1691
    def get_navi_link(cls, last_soup, next_):
1692
        """Get link to next or previous comic."""
1693
        div = last_soup.find('div', title='next' if next_ else 'previous')
1694
        return None if div is None else div.find('a')
1695
1696
    @classmethod
1697
    def get_comic_info(cls, soup, link):
1698
        """Get information about a particular comics."""
1699
        title = soup.find('meta', property='og:title')['content']
1700
        desc = soup.find('meta', property='og:description')['content']
1701
        imgs = soup.find_all('meta', property='og:image')
1702
        date_str = soup.find('span', class_='post-date').find('time').string
1703
        day = string_to_date(date_str, "%d %b %Y")
1704
        return {
1705
            'month': day.month,
1706
            'year': day.year,
1707
            'day': day.day,
1708
            'img': [i['content'] for i in imgs],
1709
            'title': title,
1710
            'description': desc,
1711
        }
1712
1713
1714
class InfiniteMonkeyBusiness(GenericNavigableComic):
1715
    """Class to retrieve InfiniteMonkeyBusiness comics."""
1716
    name = 'monkey'
1717
    long_name = 'Infinite Monkey Business'
1718
    url = 'http://infinitemonkeybusiness.net'
1719
    get_navi_link = get_a_navi_comicnavnext_navinext
1720
    get_first_comic_link = simulate_first_link
1721
    first_url = 'http://infinitemonkeybusiness.net/comic/pillory/'
1722
1723
    @classmethod
1724
    def get_comic_info(cls, soup, link):
1725
        """Get information about a particular comics."""
1726
        title = soup.find('meta', property='og:title')['content']
1727
        imgs = soup.find('div', id='comic').find_all('img')
1728
        return {
1729
            'title': title,
1730
            'img': [i['src'] for i in imgs],
1731
        }
1732
1733
1734
class Wondermark(GenericListableComic):
1735
    """Class to retrieve the Wondermark comics."""
1736
    name = 'wondermark'
1737
    long_name = 'Wondermark'
1738
    url = 'http://wondermark.com'
1739
    get_url_from_archive_element = get_href
1740
1741
    @classmethod
1742
    def get_archive_elements(cls):
1743
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1744
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1745
1746
    @classmethod
1747
    def get_comic_info(cls, soup, link):
1748
        """Get information about a particular comics."""
1749
        date_str = soup.find('div', class_='postdate').find('em').string
1750
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1751
        div = soup.find('div', id='comic')
1752
        if div:
1753
            img = div.find('img')
1754
            img_src = [img['src']]
1755
            alt = img['alt']
1756
            assert alt == img['title']
1757
            title = soup.find('meta', property='og:title')['content']
1758
        else:
1759
            img_src = []
1760
            alt = ''
1761
            title = ''
1762
        return {
1763
            'month': day.month,
1764
            'year': day.year,
1765
            'day': day.day,
1766
            'img': img_src,
1767
            'title': title,
1768
            'alt': alt,
1769
            'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')),
1770
        }
1771
1772
1773
class WarehouseComic(GenericNavigableComic):
1774
    """Class to retrieve Warehouse Comic comics."""
1775
    name = 'warehouse'
1776
    long_name = 'Warehouse Comic'
1777
    url = 'http://warehousecomic.com'
1778
    get_first_comic_link = get_a_navi_navifirst
1779
    get_navi_link = get_link_rel_next
1780
1781
    @classmethod
1782
    def get_comic_info(cls, soup, link):
1783
        """Get information about a particular comics."""
1784
        title = soup.find('h2', class_='post-title').string
1785
        date_str = soup.find('span', class_='post-date').string
1786
        day = string_to_date(date_str, "%B %d, %Y")
1787
        imgs = soup.find('div', id='comic').find_all('img')
1788
        return {
1789
            'img': [i['src'] for i in imgs],
1790
            'title': title,
1791
            'day': day.day,
1792
            'month': day.month,
1793
            'year': day.year,
1794
        }
1795
1796
1797
class JustSayEh(GenericNavigableComic):
1798
    """Class to retrieve Just Say Eh comics."""
1799
    # Also on http//tapastic.com/series/Just-Say-Eh
1800
    name = 'justsayeh'
1801
    long_name = 'Just Say Eh'
1802
    url = 'http://www.justsayeh.com'
1803
    get_first_comic_link = get_a_navi_navifirst
1804
    get_navi_link = get_a_navi_comicnavnext_navinext
1805
1806
    @classmethod
1807
    def get_comic_info(cls, soup, link):
1808
        """Get information about a particular comics."""
1809
        title = soup.find('h2', class_='post-title').string
1810
        imgs = soup.find("div", id="comic").find_all("img")
1811
        assert all(i['alt'] == i['title'] for i in imgs)
1812
        alt = imgs[0]['alt']
1813
        return {
1814
            'img': [i['src'] for i in imgs],
1815
            'title': title,
1816
            'alt': alt,
1817
        }
1818
1819
1820
class MouseBearComedy(GenericComicNotWorking):  # Website has changed
1821
    """Class to retrieve Mouse Bear Comedy comics."""
1822
    # Also on http://mousebearcomedy.tumblr.com
1823
    name = 'mousebear'
1824
    long_name = 'Mouse Bear Comedy'
1825
    url = 'http://www.mousebearcomedy.com'
1826
    get_first_comic_link = get_a_navi_navifirst
1827
    get_navi_link = get_a_navi_comicnavnext_navinext
1828
1829
    @classmethod
1830
    def get_comic_info(cls, soup, link):
1831
        """Get information about a particular comics."""
1832
        title = soup.find('h2', class_='post-title').string
1833
        author = soup.find("span", class_="post-author").find("a").string
1834
        date_str = soup.find("span", class_="post-date").string
1835
        day = string_to_date(date_str, '%B %d, %Y')
1836
        imgs = soup.find("div", id="comic").find_all("img")
1837
        assert all(i['alt'] == i['title'] == title for i in imgs)
1838
        return {
1839
            'day': day.day,
1840
            'month': day.month,
1841
            'year': day.year,
1842
            'img': [i['src'] for i in imgs],
1843
            'title': title,
1844
            'author': author,
1845
        }
1846
1847
1848 View Code Duplication
class BigFootJustice(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1849
    """Class to retrieve Big Foot Justice comics."""
1850
    # Also on http://tapastic.com/series/bigfoot-justice
1851
    name = 'bigfoot'
1852
    long_name = 'Big Foot Justice'
1853
    url = 'http://bigfootjustice.com'
1854
    get_first_comic_link = get_a_navi_navifirst
1855
    get_navi_link = get_a_navi_comicnavnext_navinext
1856
1857
    @classmethod
1858
    def get_comic_info(cls, soup, link):
1859
        """Get information about a particular comics."""
1860
        imgs = soup.find('div', id='comic').find_all('img')
1861
        assert all(i['title'] == i['alt'] for i in imgs)
1862
        title = ' '.join(i['title'] for i in imgs)
1863
        return {
1864
            'img': [i['src'] for i in imgs],
1865
            'title': title,
1866
        }
1867
1868
1869
class RespawnComic(GenericNavigableComic):
1870
    """Class to retrieve Respawn Comic."""
1871
    # Also on https://respawncomic.tumblr.com
1872
    name = 'respawn'
1873
    long_name = 'Respawn Comic'
1874
    url = 'http://respawncomic.com '
1875
    _categories = ('RESPAWN', )
1876
    get_navi_link = get_a_rel_next
1877
    get_first_comic_link = simulate_first_link
1878
    first_url = 'http://respawncomic.com/comic/c0001/'
1879
1880 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1881
    def get_comic_info(cls, soup, link):
1882
        """Get information about a particular comics."""
1883
        title = soup.find('meta', property='og:title')['content']
1884
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1885
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1886
        date_str = date_str[:10]
1887
        day = string_to_date(date_str, "%Y-%m-%d")
1888
        imgs = soup.find_all('meta', property='og:image')
1889
        skip_imgs = {
1890
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1891
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1892
        }
1893
        return {
1894
            'title': title,
1895
            'author': author,
1896
            'day': day.day,
1897
            'month': day.month,
1898
            'year': day.year,
1899
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1900
        }
1901
1902
1903 View Code Duplication
class SafelyEndangered(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1904
    """Class to retrieve Safely Endangered comics."""
1905
    # Also on http://tumblr.safelyendangered.com
1906
    name = 'endangered'
1907
    long_name = 'Safely Endangered'
1908
    url = 'http://www.safelyendangered.com'
1909
    get_navi_link = get_link_rel_next
1910
    get_first_comic_link = simulate_first_link
1911
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1912
1913
    @classmethod
1914
    def get_comic_info(cls, soup, link):
1915
        """Get information about a particular comics."""
1916
        title = soup.find('h2', class_='post-title').string
1917
        date_str = soup.find('span', class_='post-date').string
1918
        day = string_to_date(date_str, '%B %d, %Y')
1919
        imgs = soup.find('div', id='comic').find_all('img')
1920
        alt = imgs[0]['alt']
1921
        assert all(i['alt'] == i['title'] for i in imgs)
1922
        return {
1923
            'day': day.day,
1924
            'month': day.month,
1925
            'year': day.year,
1926
            'img': [i['src'] for i in imgs],
1927
            'title': title,
1928
            'alt': alt,
1929
        }
1930
1931
1932
class PicturesInBoxes(GenericNavigableComic):
1933
    """Class to retrieve Pictures In Boxes comics."""
1934
    # Also on https://picturesinboxescomic.tumblr.com
1935
    name = 'picturesinboxes'
1936
    long_name = 'Pictures in Boxes'
1937
    url = 'http://www.picturesinboxes.com'
1938
    get_navi_link = get_a_navi_navinext
1939
    get_first_comic_link = simulate_first_link
1940
    first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'
1941
1942
    @classmethod
1943
    def get_comic_info(cls, soup, link):
1944
        """Get information about a particular comics."""
1945
        title = soup.find('h2', class_='post-title').string
1946
        author = soup.find("span", class_="post-author").find("a").string
1947
        date_str = soup.find('span', class_='post-date').string
1948
        day = string_to_date(date_str, '%B %d, %Y')
1949
        imgs = soup.find('div', class_='comicpane').find_all('img')
1950
        assert imgs
1951
        assert all(i['title'] == i['alt'] == title for i in imgs)
1952
        return {
1953
            'day': day.day,
1954
            'month': day.month,
1955
            'year': day.year,
1956
            'img': [i['src'] for i in imgs],
1957
            'title': title,
1958
            'author': author,
1959
        }
1960
1961
1962 View Code Duplication
class Penmen(GenericComicNotWorking, GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1963
    """Class to retrieve Penmen comics."""
1964
    name = 'penmen'
1965
    long_name = 'Penmen'
1966
    url = 'http://penmen.com'
1967
    get_navi_link = get_link_rel_next
1968
    get_first_comic_link = simulate_first_link
1969
    first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/'
1970
1971
    @classmethod
1972
    def get_comic_info(cls, soup, link):
1973
        """Get information about a particular comics."""
1974
        title = soup.find('title').string
1975
        imgs = soup.find('div', class_='entry-content').find_all('img')
1976
        short_url = soup.find('link', rel='shortlink')['href']
1977
        tags = ' '.join(t.string for t in soup.find_all('a', rel='tag'))
1978
        date_str = soup.find('time')['datetime'][:10]
1979
        day = string_to_date(date_str, "%Y-%m-%d")
1980
        return {
1981
            'title': title,
1982
            'short_url': short_url,
1983
            'img': [i['src'] for i in imgs],
1984
            'tags': tags,
1985
            'month': day.month,
1986
            'year': day.year,
1987
            'day': day.day,
1988
        }
1989
1990
1991
class TheDoghouseDiaries(GenericDeletedComic, GenericNavigableComic):
1992
    """Class to retrieve The Dog House Diaries comics."""
1993
    name = 'doghouse'
1994
    long_name = 'The Dog House Diaries'
1995
    url = 'http://thedoghousediaries.com'
1996
1997
    @classmethod
1998
    def get_first_comic_link(cls):
1999
        """Get link to first comics."""
2000
        return get_soup_at_url(cls.url).find('a', id='firstlink')
2001
2002
    @classmethod
2003
    def get_navi_link(cls, last_soup, next_):
2004
        """Get link to next or previous comic."""
2005
        return last_soup.find('a', id='nextlink' if next_ else 'previouslink')
2006
2007
    @classmethod
2008
    def get_comic_info(cls, soup, link):
2009
        """Get information about a particular comics."""
2010
        comic_img_re = re.compile('^dhdcomics/.*')
2011
        img = soup.find('img', src=comic_img_re)
2012
        comic_url = cls.get_url_from_link(link)
2013
        return {
2014
            'title': soup.find('h2', id='titleheader').string,
2015
            'title2': soup.find('div', id='subtext').string,
2016
            'alt': img.get('title'),
2017
            'img': [urljoin_wrapper(comic_url, img['src'].strip())],
2018
            'num': int(comic_url.split('/')[-1]),
2019
        }
2020
2021
2022
class InvisibleBread(GenericListableComic):
2023
    """Class to retrieve Invisible Bread comics."""
2024
    # Also on http://www.gocomics.com/invisible-bread
2025
    name = 'invisiblebread'
2026
    long_name = 'Invisible Bread'
2027
    url = 'http://invisiblebread.com'
2028
2029
    @classmethod
2030
    def get_archive_elements(cls):
2031
        archive_url = urljoin_wrapper(cls.url, 'archives/')
2032
        return reversed(get_soup_at_url(archive_url).find_all('td', class_='archive-title'))
2033
2034
    @classmethod
2035
    def get_url_from_archive_element(cls, td):
2036
        return td.find('a')['href']
2037
2038
    @classmethod
2039
    def get_comic_info(cls, soup, td):
2040
        """Get information about a particular comics."""
2041
        url = cls.get_url_from_archive_element(td)
2042
        title = td.find('a').string
2043
        month_and_day = td.previous_sibling.string
2044
        link_re = re.compile('^%s/([0-9]+)/' % cls.url)
2045
        year = link_re.match(url).groups()[0]
2046
        date_str = month_and_day + ' ' + year
2047
        day = string_to_date(date_str, '%b %d %Y')
2048
        imgs = [soup.find('div', id='comic').find('img')]
2049
        assert len(imgs) == 1, imgs
2050
        assert all(i['title'] == i['alt'] == title for i in imgs)
2051
        return {
2052
            'month': day.month,
2053
            'year': day.year,
2054
            'day': day.day,
2055
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2056
            'title': title,
2057
        }
2058
2059
2060
class DiscoBleach(GenericDeletedComic):
2061
    """Class to retrieve Disco Bleach Comics."""
2062
    name = 'discobleach'
2063
    long_name = 'Disco Bleach'
2064
    url = 'http://discobleach.com'
2065
2066
2067
class TubeyToons(GenericDeletedComic):
2068
    """Class to retrieve TubeyToons comics."""
2069
    # Also on http://tapastic.com/series/Tubey-Toons
2070
    # Also on https://tubeytoons.tumblr.com
2071
    name = 'tubeytoons'
2072
    long_name = 'Tubey Toons'
2073
    url = 'http://tubeytoons.com'
2074
    _categories = ('TUNEYTOONS', )
2075
2076
2077
class CompletelySeriousComics(GenericNavigableComic):
2078
    """Class to retrieve Completely Serious comics."""
2079
    name = 'completelyserious'
2080
    long_name = 'Completely Serious Comics'
2081
    url = 'http://completelyseriouscomics.com'
2082
    get_first_comic_link = get_a_navi_navifirst
2083
    get_navi_link = get_a_navi_navinext
2084
2085
    @classmethod
2086
    def get_comic_info(cls, soup, link):
2087
        """Get information about a particular comics."""
2088
        title = soup.find('h2', class_='post-title').string
2089
        author = soup.find('span', class_='post-author').contents[1].string
2090
        date_str = soup.find('span', class_='post-date').string
2091
        day = string_to_date(date_str, '%B %d, %Y')
2092
        imgs = soup.find('div', class_='comicpane').find_all('img')
2093
        assert imgs
2094
        alt = imgs[0]['title']
2095
        assert all(i['title'] == i['alt'] == alt for i in imgs)
2096
        return {
2097
            'month': day.month,
2098
            'year': day.year,
2099
            'day': day.day,
2100
            'img': [i['src'] for i in imgs],
2101
            'title': title,
2102
            'alt': alt,
2103
            'author': author,
2104
        }
2105
2106
2107 View Code Duplication
class PoorlyDrawnLines(GenericListableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2108
    """Class to retrieve Poorly Drawn Lines comics."""
2109
    # Also on http://pdlcomics.tumblr.com
2110
    name = 'poorlydrawn'
2111
    long_name = 'Poorly Drawn Lines'
2112
    url = 'https://www.poorlydrawnlines.com'
2113
    _categories = ('POORLYDRAWN', )
2114
    get_url_from_archive_element = get_href
2115
2116
    @classmethod
2117
    def get_comic_info(cls, soup, link):
2118
        """Get information about a particular comics."""
2119
        imgs = soup.find('div', class_='post').find_all('img')
2120
        assert len(imgs) <= 1, imgs
2121
        return {
2122
            'img': [i['src'] for i in imgs],
2123
            'title': imgs[0].get('title', "") if imgs else "",
2124
        }
2125
2126
    @classmethod
2127
    def get_archive_elements(cls):
2128
        archive_url = urljoin_wrapper(cls.url, 'archive')
2129
        url_re = re.compile('^%s/comic/.' % cls.url)
2130
        return reversed(get_soup_at_url(archive_url).find_all('a', href=url_re))
2131
2132
2133
class LoadingComics(GenericNavigableComic):
2134
    """Class to retrieve Loading Artist comics."""
2135
    name = 'loadingartist'
2136
    long_name = 'Loading Artist'
2137
    url = 'http://www.loadingartist.com/latest'
2138
2139
    @classmethod
2140
    def get_first_comic_link(cls):
2141
        """Get link to first comics."""
2142
        return get_soup_at_url(cls.url).find('a', title="First")
2143
2144
    @classmethod
2145
    def get_navi_link(cls, last_soup, next_):
2146
        """Get link to next or previous comic."""
2147
        return last_soup.find('a', title='Next' if next_ else 'Previous')
2148
2149
    @classmethod
2150
    def get_comic_info(cls, soup, link):
2151
        """Get information about a particular comics."""
2152
        title = soup.find('h1').string
2153
        date_str = soup.find('span', class_='date').string.strip()
2154
        day = string_to_date(date_str, "%B %d, %Y")
2155
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
2156
        return {
2157
            'title': title,
2158
            'img': [i['src'] for i in imgs],
2159
            'month': day.month,
2160
            'year': day.year,
2161
            'day': day.day,
2162
        }
2163
2164
2165
class ChuckleADuck(GenericNavigableComic):
2166
    """Class to retrieve Chuckle-A-Duck comics."""
2167
    name = 'chuckleaduck'
2168
    long_name = 'Chuckle-A-duck'
2169
    url = 'http://chuckleaduck.com'
2170
    get_first_comic_link = get_div_navfirst_a
2171
    get_navi_link = get_link_rel_next
2172
2173
    @classmethod
2174
    def get_comic_info(cls, soup, link):
2175
        """Get information about a particular comics."""
2176
        date_str = soup.find('span', class_='post-date').string
2177
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
2178
        author = soup.find('span', class_='post-author').string
2179
        div = soup.find('div', id='comic')
2180
        imgs = div.find_all('img') if div else []
2181
        title = imgs[0]['title'] if imgs else ""
2182
        assert all(i['title'] == i['alt'] == title for i in imgs)
2183
        return {
2184
            'month': day.month,
2185
            'year': day.year,
2186
            'day': day.day,
2187
            'img': [i['src'] for i in imgs],
2188
            'title': title,
2189
            'author': author,
2190
        }
2191
2192
2193
class DepressedAlien(GenericNavigableComic):
2194
    """Class to retrieve Depressed Alien Comics."""
2195
    name = 'depressedalien'
2196
    long_name = 'Depressed Alien'
2197
    url = 'http://depressedalien.com'
2198
    get_url_from_link = join_cls_url_to_href
2199
2200
    @classmethod
2201
    def get_first_comic_link(cls):
2202
        """Get link to first comics."""
2203
        return get_soup_at_url(cls.url).find('img', attrs={'name': 'beginArrow'}).parent
2204
2205
    @classmethod
2206
    def get_navi_link(cls, last_soup, next_):
2207
        """Get link to next or previous comic."""
2208
        return last_soup.find('img', attrs={'name': 'rightArrow' if next_ else 'leftArrow'}).parent
2209
2210
    @classmethod
2211
    def get_comic_info(cls, soup, link):
2212
        """Get information about a particular comics."""
2213
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2214
        imgs = soup.find_all('meta', property='og:image')
2215
        return {
2216
            'title': title,
2217
            'img': [i['content'] for i in imgs],
2218
        }
2219
2220
2221 View Code Duplication
class TurnOffUs(GenericListableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2222
    """Class to retrieve TurnOffUs comics."""
2223
    name = 'turnoffus'
2224
    long_name = 'Turn Off Us'
2225
    url = 'http://turnoff.us'
2226
    get_url_from_archive_element = join_cls_url_to_href
2227
2228
    @classmethod
2229
    def get_archive_elements(cls):
2230
        archive_url = urljoin_wrapper(cls.url, 'all')
2231
        post_list = get_soup_at_url(archive_url).find('ul', class_='post-list')
2232
        return reversed(post_list.find_all('a', class_='post-link'))
2233
2234
    @classmethod
2235
    def get_comic_info(cls, soup, archive_elt):
2236
        """Get information about a particular comics."""
2237
        title = soup.find('meta', property='og:title')['content']
2238
        imgs = soup.find_all('meta', property='og:image')
2239
        return {
2240
            'title': title,
2241
            'img': [i['content'] for i in imgs],
2242
        }
2243
2244
2245
class ThingsInSquares(GenericListableComic):
2246
    """Class to retrieve Things In Squares comics."""
2247
    # This can be retrieved in other languages
2248
    # Also on https://tapastic.com/series/Things-in-Squares
2249
    name = 'squares'
2250
    long_name = 'Things in squares'
2251
    url = 'http://www.thingsinsquares.com'
2252
2253
    @classmethod
2254
    def get_comic_info(cls, soup, tr):
2255
        """Get information about a particular comics."""
2256
        _, td2, td3 = tr.find_all('td')
2257
        a = td2.find('a')
2258
        date_str = td3.string
2259
        day = string_to_date(date_str, "%m.%d.%y")
2260
        title = a.string
2261
        title2 = soup.find('meta', property='og:title')['content']
2262
        desc = soup.find('meta', property='og:description')
2263
        description = desc['content'] if desc else ''
2264
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2265
        imgs = soup.find_all('meta', property='og:image')
2266
        return {
2267
            'day': day.day,
2268
            'month': day.month,
2269
            'year': day.year,
2270
            'title': title,
2271
            'title2': title2,
2272
            'description': description,
2273
            'tags': tags,
2274
            'img': [i['content'] for i in imgs],
2275
        }
2276
2277
    @classmethod
2278
    def get_url_from_archive_element(cls, tr):
2279
        _, td2, __ = tr.find_all('td')
2280
        return td2.find('a')['href']
2281
2282
    @classmethod
2283
    def get_archive_elements(cls):
2284
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
2285
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2286
2287
2288
class HappleTea(GenericNavigableComic):
2289
    """Class to retrieve Happle Tea Comics."""
2290
    name = 'happletea'
2291
    long_name = 'Happle Tea'
2292
    url = 'http://www.happletea.com'
2293
    get_first_comic_link = get_a_navi_navifirst
2294
    get_navi_link = get_link_rel_next
2295
2296
    @classmethod
2297
    def get_comic_info(cls, soup, link):
2298
        """Get information about a particular comics."""
2299
        imgs = soup.find('div', id='comic').find_all('img')
2300
        post = soup.find('div', class_='post-content')
2301
        title = post.find('h2', class_='post-title').string
2302
        author = post.find('a', rel='author').string
2303
        date_str = post.find('span', class_='post-date').string
2304
        day = string_to_date(date_str, "%B %d, %Y")
2305
        assert all(i['alt'] == i['title'] for i in imgs)
2306
        return {
2307
            'title': title,
2308
            'img': [i['src'] for i in imgs],
2309
            'alt': ''.join(i['alt'] for i in imgs),
2310
            'month': day.month,
2311
            'year': day.year,
2312
            'day': day.day,
2313
            'author': author,
2314
        }
2315
2316
2317
class RockPaperScissors(GenericNavigableComic):
2318
    """Class to retrieve Rock Paper Scissors comics."""
2319
    name = 'rps'
2320
    long_name = 'Rock Paper Scissors'
2321
    url = 'http://rps-comics.com'
2322
    get_first_comic_link = get_a_navi_navifirst
2323
    get_navi_link = get_link_rel_next
2324
2325
    @classmethod
2326
    def get_comic_info(cls, soup, link):
2327
        """Get information about a particular comics."""
2328
        title = soup.find('title').string
2329
        imgs = soup.find_all('meta', property='og:image')
2330
        short_url = soup.find('link', rel='shortlink')['href']
2331
        transcript = soup.find('div', id='transcript-content').string
2332
        return {
2333
            'title': title,
2334
            'transcript': transcript,
2335
            'short_url': short_url,
2336
            'img': [i['content'] for i in imgs],
2337
        }
2338
2339
2340
class FatAwesomeComics(GenericNavigableComic):
2341
    """Class to retrieve Fat Awesome Comics."""
2342
    # Also on http://fatawesomecomedy.tumblr.com
2343
    name = 'fatawesome'
2344
    long_name = 'Fat Awesome'
2345
    url = 'http://fatawesome.com/comics'
2346
    get_navi_link = get_a_rel_next
2347
    get_first_comic_link = simulate_first_link
2348
    first_url = 'http://fatawesome.com/shortbus/'
2349
2350
    @classmethod
2351
    def get_comic_info(cls, soup, link):
2352
        """Get information about a particular comics."""
2353
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2354
        description = soup.find('meta', attrs={'name': 'description'})['content']
2355
        tags_prop = soup.find('meta', property='article:tag')
2356
        tags = tags_prop['content'] if tags_prop else ""
2357
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2358
        day = string_to_date(date_str, "%Y-%m-%d")
2359
        imgs = soup.find_all('img', attrs={'data-recalc-dims': "1"})
2360
        assert len(imgs) == 1, imgs
2361
        return {
2362
            'title': title,
2363
            'description': description,
2364
            'tags': tags,
2365
            'alt': "".join(i['alt'] for i in imgs),
2366
            'img': [i['src'].rsplit('?', 1)[0] for i in imgs],
2367
            'month': day.month,
2368
            'year': day.year,
2369
            'day': day.day,
2370
        }
2371
2372
2373
class PeterLauris(GenericNavigableComic):
2374
    """Class to retrieve Peter Lauris comics."""
2375
    name = 'peterlauris'
2376
    long_name = 'Peter Lauris'
2377
    url = 'http://peterlauris.com/comics'
2378
    get_navi_link = get_a_rel_next
2379
    get_first_comic_link = simulate_first_link
2380
    first_url = 'http://peterlauris.com/comics/just-in-case/'
2381
2382
    @classmethod
2383
    def get_comic_info(cls, soup, link):
2384
        """Get information about a particular comics."""
2385
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2386
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2387
        day = string_to_date(date_str, "%Y-%m-%d")
2388
        imgs = soup.find_all('meta', property='og:image')
2389
        return {
2390
            'title': title,
2391
            'img': [i['content'] for i in imgs],
2392
            'month': day.month,
2393
            'year': day.year,
2394
            'day': day.day,
2395
        }
2396
2397
2398
class RandomCrab(GenericNavigableComic):
2399
    """Class to retrieve Random Crab comics."""
2400
    name = 'randomcrab'
2401
    long_name = 'Random Crab'
2402
    url = 'https://randomcrab.com'
2403
    get_navi_link = get_a_rel_next
2404
    get_first_comic_link = simulate_first_link
2405
    first_url = 'https://randomcrab.com/natural-elephant/'
2406
2407
    @classmethod
2408
    def get_comic_info(cls, soup, link):
2409
        """Get information about a particular comics."""
2410
        title = soup.find('meta', property='og:title')['content']
2411
        desc = soup.find('meta', property='og:description')['content']
2412
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2413
        day = string_to_date(date_str, "%Y-%m-%d")
2414
        imgs = soup.find_all('meta', property='og:image')
2415
        author = soup.find('a', rel='author').string
2416
        return {
2417
            'title': title,
2418
            'desc': desc,
2419
            'img': [i['content'] for i in imgs],
2420
            'month': day.month,
2421
            'year': day.year,
2422
            'day': day.day,
2423
            'author': author,
2424
        }
2425
2426
2427 View Code Duplication
class JuliasDrawings(GenericListableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2428
    """Class to retrieve Julia's Drawings."""
2429
    name = 'julia'
2430
    long_name = "Julia's Drawings"
2431
    url = 'https://drawings.jvns.ca'
2432
    get_url_from_archive_element = get_href
2433
2434
    @classmethod
2435
    def get_archive_elements(cls):
2436
        div = get_soup_at_url(cls.url).find('div', class_='drawings')
2437
        return reversed(div.find_all('a'))
2438
2439
    @classmethod
2440
    def get_comic_info(cls, soup, archive_elt):
2441
        """Get information about a particular comics."""
2442
        date_str = soup.find('meta', property='og:article:published_time')['content'][:10]
2443
        day = string_to_date(date_str, "%Y-%m-%d")
2444
        title = soup.find('h3', class_='p-post-title').string
2445
        imgs = soup.find('section', class_='post-content').find_all('img')
2446
        return {
2447
            'title': title,
2448
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2449
            'month': day.month,
2450
            'year': day.year,
2451
            'day': day.day,
2452
        }
2453
2454
2455
class AnythingComic(GenericListableComic):
2456
    """Class to retrieve Anything Comics."""
2457
    # Also on http://tapastic.com/series/anything
2458
    name = 'anythingcomic'
2459
    long_name = 'Anything Comic'
2460
    url = 'http://www.anythingcomic.com'
2461
2462
    @classmethod
2463
    def get_archive_elements(cls):
2464
        archive_url = urljoin_wrapper(cls.url, 'archive/')
2465
        # The first 2 <tr>'s do not correspond to comics
2466
        return get_soup_at_url(archive_url).find('table', id='chapter_table').find_all('tr')[2:]
2467
2468
    @classmethod
2469
    def get_url_from_archive_element(cls, tr):
2470
        """Get url corresponding to an archive element."""
2471
        _, td_comic, td_date, _ = tr.find_all('td')
2472
        link = td_comic.find('a')
2473
        return urljoin_wrapper(cls.url, link['href'])
2474
2475
    @classmethod
2476
    def get_comic_info(cls, soup, tr):
2477
        """Get information about a particular comics."""
2478
        td_num, td_comic, td_date, _ = tr.find_all('td')
2479
        num = int(td_num.string)
2480
        link = td_comic.find('a')
2481
        title = link.string
2482
        imgs = soup.find_all('img', id='comic_image')
2483
        date_str = td_date.string
2484
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y, %I:%M %p")
2485
        assert len(imgs) == 1, imgs
2486
        assert all(i.get('alt') == i.get('title') for i in imgs)
2487
        return {
2488
            'num': num,
2489
            'title': title,
2490
            'alt': imgs[0].get('alt', ''),
2491
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2492
            'month': day.month,
2493
            'year': day.year,
2494
            'day': day.day,
2495
        }
2496
2497
2498 View Code Duplication
class LonnieMillsap(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2499
    """Class to retrieve Lonnie Millsap's comics."""
2500
    name = 'millsap'
2501
    long_name = 'Lonnie Millsap'
2502
    url = 'http://www.lonniemillsap.com'
2503
    get_navi_link = get_link_rel_next
2504
    get_first_comic_link = simulate_first_link
2505
    first_url = 'http://www.lonniemillsap.com/?p=42'
2506
2507
    @classmethod
2508
    def get_comic_info(cls, soup, link):
2509
        """Get information about a particular comics."""
2510
        title = soup.find('h2', class_='post-title').string
2511
        post = soup.find('div', class_='post-content')
2512
        author = post.find("span", class_="post-author").find("a").string
2513
        date_str = post.find("span", class_="post-date").string
2514
        day = string_to_date(date_str, "%B %d, %Y")
2515
        imgs = post.find("div", class_="entry").find_all("img")
2516
        return {
2517
            'title': title,
2518
            'author': author,
2519
            'img': [i['src'] for i in imgs],
2520
            'month': day.month,
2521
            'year': day.year,
2522
            'day': day.day,
2523
        }
2524
2525
2526
class LinsEditions(GenericDeletedComic):  # Permanently moved to warandpeas
2527
    """Class to retrieve L.I.N.S. Editions comics."""
2528
    # Also on https://linscomics.tumblr.com
2529
    # Now on https://warandpeas.com
2530
    name = 'lins'
2531
    long_name = 'L.I.N.S. Editions'
2532
    url = 'https://linsedition.com'
2533
    _categories = ('WARANDPEAS', 'LINS')
2534
2535
2536
class WarAndPeas(GenericNavigableComic):
2537
    """Class to retrieve War And Peas comics."""
2538
    name = 'warandpeas'
2539
    long_name = 'War And Peas'
2540
    url = 'https://warandpeas.com'
2541
    get_navi_link = get_link_rel_next
2542
    get_first_comic_link = simulate_first_link
2543
    first_url = 'https://warandpeas.com/2011/11/07/565/'
2544
    _categories = ('WARANDPEAS', 'LINS')
2545
2546
    @classmethod
2547
    def get_comic_info(cls, soup, link):
2548
        """Get information about a particular comics."""
2549
        title = soup.find('meta', property='og:title')['content']
2550
        imgs = soup.find_all('meta', property='og:image')
2551
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2552
        day = string_to_date(date_str, "%Y-%m-%d")
2553
        return {
2554
            'title': title,
2555
            'img': [i['content'] for i in imgs],
2556
            'month': day.month,
2557
            'year': day.year,
2558
            'day': day.day,
2559
        }
2560
2561
2562
class ThorsThundershack(GenericNavigableComic):
2563
    """Class to retrieve Thor's Thundershack comics."""
2564
    # Also on http://tapastic.com/series/Thors-Thundershac
2565
    name = 'thor'
2566
    long_name = 'Thor\'s Thundershack'
2567
    url = 'http://www.thorsthundershack.com'
2568
    _categories = ('THOR', )
2569
    get_url_from_link = join_cls_url_to_href
2570
2571
    @classmethod
2572
    def get_first_comic_link(cls):
2573
        """Get link to first comics."""
2574
        return get_soup_at_url(cls.url).find('a', class_='first navlink')
2575
2576
    @classmethod
2577
    def get_navi_link(cls, last_soup, next_):
2578
        """Get link to next or previous comic."""
2579
        for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
2580
            if link['href'] != '/comic':
2581
                return link
2582
        return None
2583
2584
    @classmethod
2585
    def get_comic_info(cls, soup, link):
2586
        """Get information about a particular comics."""
2587
        title = soup.find('meta', attrs={'name': 'description'})["content"]
2588
        description = soup.find('div', itemprop='articleBody').text
2589
        author = soup.find('span', itemprop='author copyrightHolder').string
2590
        imgs = soup.find_all('img', itemprop='image')
2591
        assert all(i['title'] == i['alt'] for i in imgs)
2592
        alt = imgs[0]['alt'] if imgs else ""
2593
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
2594
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2595
        return {
2596
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2597
            'month': day.month,
2598
            'year': day.year,
2599
            'day': day.day,
2600
            'author': author,
2601
            'title': title,
2602
            'alt': alt,
2603
            'description': description,
2604
        }
2605
2606
2607 View Code Duplication
class GerbilWithAJetpack(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2608
    """Class to retrieve GerbilWithAJetpack comics."""
2609
    name = 'gerbil'
2610
    long_name = 'Gerbil With A Jetpack'
2611
    url = 'http://gerbilwithajetpack.com'
2612
    get_first_comic_link = get_a_navi_navifirst
2613
    get_navi_link = get_a_rel_next
2614
2615
    @classmethod
2616
    def get_comic_info(cls, soup, link):
2617
        """Get information about a particular comics."""
2618
        title = soup.find('h2', class_='post-title').string
2619
        author = soup.find("span", class_="post-author").find("a").string
2620
        date_str = soup.find("span", class_="post-date").string
2621
        day = string_to_date(date_str, "%B %d, %Y")
2622
        imgs = soup.find("div", id="comic").find_all("img")
2623
        alt = imgs[0]['alt']
2624
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2625
        return {
2626
            'img': [i['src'] for i in imgs],
2627
            'title': title,
2628
            'alt': alt,
2629
            'author': author,
2630
            'day': day.day,
2631
            'month': day.month,
2632
            'year': day.year
2633
        }
2634
2635
2636
class EveryDayBlues(GenericDeletedComic, GenericNavigableComic):
2637
    """Class to retrieve EveryDayBlues Comics."""
2638
    name = "blues"
2639
    long_name = "Every Day Blues"
2640
    url = "http://everydayblues.net"
2641
    get_first_comic_link = get_a_navi_navifirst
2642
    get_navi_link = get_link_rel_next
2643
2644
    @classmethod
2645
    def get_comic_info(cls, soup, link):
2646
        """Get information about a particular comics."""
2647
        title = soup.find("h2", class_="post-title").string
2648
        author = soup.find("span", class_="post-author").find("a").string
2649
        date_str = soup.find("span", class_="post-date").string
2650
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2651
        imgs = soup.find("div", id="comic").find_all("img")
2652
        assert all(i['alt'] == i['title'] == title for i in imgs)
2653
        assert len(imgs) <= 1, imgs
2654
        return {
2655
            'img': [i['src'] for i in imgs],
2656
            'title': title,
2657
            'author': author,
2658
            'day': day.day,
2659
            'month': day.month,
2660
            'year': day.year
2661
        }
2662
2663
2664
class BiterComics(GenericNavigableComic):
2665
    """Class to retrieve Biter Comics."""
2666
    name = "biter"
2667
    long_name = "Biter Comics"
2668
    url = "http://www.bitercomics.com"
2669
    get_first_comic_link = get_a_navi_navifirst
2670
    get_navi_link = get_link_rel_next
2671
2672
    @classmethod
2673
    def get_comic_info(cls, soup, link):
2674
        """Get information about a particular comics."""
2675
        title = soup.find("h1", class_="entry-title").string
2676
        author = soup.find("span", class_="author vcard").find("a").string
2677
        date_str = soup.find("span", class_="entry-date").string
2678
        day = string_to_date(date_str, "%B %d, %Y")
2679
        imgs = soup.find("div", id="comic").find_all("img")
2680
        assert all(i['alt'] == i['title'] for i in imgs)
2681
        assert len(imgs) == 1, imgs
2682
        alt = imgs[0]['alt']
2683
        return {
2684
            'img': [i['src'] for i in imgs],
2685
            'title': title,
2686
            'alt': alt,
2687
            'author': author,
2688
            'day': day.day,
2689
            'month': day.month,
2690
            'year': day.year
2691
        }
2692
2693
2694
class TheAwkwardYeti(GenericNavigableComic):
2695
    """Class to retrieve The Awkward Yeti comics."""
2696
    # Also on http://www.gocomics.com/the-awkward-yeti
2697
    # Also on http://larstheyeti.tumblr.com
2698
    # Also on https://tapastic.com/series/TheAwkwardYeti
2699
    name = 'yeti'
2700
    long_name = 'The Awkward Yeti'
2701
    url = 'http://theawkwardyeti.com'
2702
    _categories = ('YETI', )
2703
    get_first_comic_link = get_a_navi_navifirst
2704
    get_navi_link = get_link_rel_next
2705
2706
    @classmethod
2707
    def get_comic_info(cls, soup, link):
2708
        """Get information about a particular comics."""
2709
        title = soup.find('h2', class_='post-title').string
2710
        date_str = soup.find("span", class_="post-date").string
2711
        day = string_to_date(date_str, "%B %d, %Y")
2712
        imgs = soup.find("div", id="comic").find_all("img")
2713
        assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
2714
        return {
2715
            'img': [i['src'] for i in imgs],
2716
            'title': title,
2717
            'day': day.day,
2718
            'month': day.month,
2719
            'year': day.year
2720
        }
2721
2722
2723
class PleasantThoughts(GenericNavigableComic):
2724
    """Class to retrieve Pleasant Thoughts comics."""
2725
    name = 'pleasant'
2726
    long_name = 'Pleasant Thoughts'
2727
    url = 'http://pleasant-thoughts.com'
2728
    get_first_comic_link = get_a_navi_navifirst
2729
    get_navi_link = get_link_rel_next
2730
2731
    @classmethod
2732
    def get_comic_info(cls, soup, link):
2733
        """Get information about a particular comics."""
2734
        post = soup.find('div', class_='post-content')
2735
        title = post.find('h2', class_='post-title').string
2736
        imgs = post.find("div", class_="entry").find_all("img")
2737
        return {
2738
            'title': title,
2739
            'img': [i['src'] for i in imgs],
2740
        }
2741
2742
2743
class MisterAndMe(GenericNavigableComic):
2744
    """Class to retrieve Mister & Me Comics."""
2745
    # Also on http://www.gocomics.com/mister-and-me
2746
    # Also on https://tapastic.com/series/Mister-and-Me
2747
    name = 'mister'
2748
    long_name = 'Mister & Me'
2749
    url = 'http://www.mister-and-me.com'
2750
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2751
    get_navi_link = get_link_rel_next
2752
2753
    @classmethod
2754
    def get_comic_info(cls, soup, link):
2755
        """Get information about a particular comics."""
2756
        title = soup.find('h2', class_='post-title').string
2757
        author = soup.find("span", class_="post-author").find("a").string
2758
        date_str = soup.find("span", class_="post-date").string
2759
        day = string_to_date(date_str, "%B %d, %Y")
2760
        imgs = soup.find("div", id="comic").find_all("img")
2761
        assert all(i['alt'] == i['title'] for i in imgs)
2762
        assert len(imgs) <= 1, imgs
2763
        alt = imgs[0]['alt'] if imgs else ""
2764
        return {
2765
            'img': [i['src'] for i in imgs],
2766
            'title': title,
2767
            'alt': alt,
2768
            'author': author,
2769
            'day': day.day,
2770
            'month': day.month,
2771
            'year': day.year
2772
        }
2773
2774
2775
class LastPlaceComics(GenericNavigableComic):
2776
    """Class to retrieve Last Place Comics."""
2777
    name = 'lastplace'
2778
    long_name = 'Last Place Comics'
2779
    url = "http://lastplacecomics.com"
2780
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2781
    get_navi_link = get_link_rel_next
2782
2783
    @classmethod
2784
    def get_comic_info(cls, soup, link):
2785
        """Get information about a particular comics."""
2786
        title = soup.find('h2', class_='post-title').string
2787
        author = soup.find("span", class_="post-author").find("a").string
2788
        date_str = soup.find("span", class_="post-date").string
2789
        day = string_to_date(date_str, "%B %d, %Y")
2790
        imgs = soup.find("div", id="comic").find_all("img")
2791
        assert all(i['alt'] == i['title'] for i in imgs)
2792
        assert len(imgs) <= 1, imgs
2793
        alt = imgs[0]['alt'] if imgs else ""
2794
        return {
2795
            'img': [i['src'] for i in imgs],
2796
            'title': title,
2797
            'alt': alt,
2798
            'author': author,
2799
            'day': day.day,
2800
            'month': day.month,
2801
            'year': day.year
2802
        }
2803
2804
2805
class TalesOfAbsurdity(GenericNavigableComic):
2806
    """Class to retrieve Tales Of Absurdity comics."""
2807
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
2808
    # Also on http://talesofabsurdity.tumblr.com
2809
    name = 'absurdity'
2810
    long_name = 'Tales of Absurdity'
2811
    url = 'http://talesofabsurdity.com'
2812
    _categories = ('ABSURDITY', )
2813
    get_first_comic_link = get_a_navi_navifirst
2814
    get_navi_link = get_a_navi_comicnavnext_navinext
2815
2816
    @classmethod
2817
    def get_comic_info(cls, soup, link):
2818
        """Get information about a particular comics."""
2819
        title = soup.find('h2', class_='post-title').string
2820
        author = soup.find("span", class_="post-author").find("a").string
2821
        date_str = soup.find("span", class_="post-date").string
2822
        day = string_to_date(date_str, "%B %d, %Y")
2823
        imgs = soup.find("div", id="comic").find_all("img")
2824
        assert all(i['alt'] == i['title'] for i in imgs)
2825
        alt = imgs[0]['alt'] if imgs else ""
2826
        return {
2827
            'img': [i['src'] for i in imgs],
2828
            'title': title,
2829
            'alt': alt,
2830
            'author': author,
2831
            'day': day.day,
2832
            'month': day.month,
2833
            'year': day.year
2834
        }
2835
2836
2837
class EndlessOrigami(GenericComicNotWorking, GenericNavigableComic):  # Nav not working
2838
    """Class to retrieve Endless Origami Comics."""
2839
    name = "origami"
2840
    long_name = "Endless Origami"
2841
    url = "http://endlessorigami.com"
2842
    get_first_comic_link = get_a_navi_navifirst
2843
    get_navi_link = get_link_rel_next
2844
2845
    @classmethod
2846
    def get_comic_info(cls, soup, link):
2847
        """Get information about a particular comics."""
2848
        title = soup.find('h2', class_='post-title').string
2849
        author = soup.find("span", class_="post-author").find("a").string
2850
        date_str = soup.find("span", class_="post-date").string
2851
        day = string_to_date(date_str, "%B %d, %Y")
2852
        imgs = soup.find("div", id="comic").find_all("img")
2853
        assert all(i['alt'] == i['title'] for i in imgs)
2854
        alt = imgs[0]['alt'] if imgs else ""
2855
        return {
2856
            'img': [i['src'] for i in imgs],
2857
            'title': title,
2858
            'alt': alt,
2859
            'author': author,
2860
            'day': day.day,
2861
            'month': day.month,
2862
            'year': day.year
2863
        }
2864
2865
2866
class PlanC(GenericNavigableComic):
2867
    """Class to retrieve Plan C comics."""
2868
    name = 'planc'
2869
    long_name = 'Plan C'
2870
    url = 'http://www.plancomic.com'
2871
    get_first_comic_link = get_a_navi_navifirst
2872
    get_navi_link = get_a_navi_comicnavnext_navinext
2873
2874
    @classmethod
2875
    def get_comic_info(cls, soup, link):
2876
        """Get information about a particular comics."""
2877
        title = soup.find('h2', class_='post-title').string
2878
        date_str = soup.find("span", class_="post-date").string
2879
        day = string_to_date(date_str, "%B %d, %Y")
2880
        imgs = soup.find('div', id='comic').find_all('img')
2881
        return {
2882
            'title': title,
2883
            'img': [i['src'] for i in imgs],
2884
            'month': day.month,
2885
            'year': day.year,
2886
            'day': day.day,
2887
        }
2888
2889
2890 View Code Duplication
class BuniComic(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2891
    """Class to retrieve Buni Comics."""
2892
    name = 'buni'
2893
    long_name = 'BuniComics'
2894
    url = 'http://www.bunicomic.com'
2895
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2896
    get_navi_link = get_link_rel_next
2897
2898
    @classmethod
2899
    def get_comic_info(cls, soup, link):
2900
        """Get information about a particular comics."""
2901
        imgs = soup.find('div', id='comic').find_all('img')
2902
        assert all(i['alt'] == i['title'] for i in imgs)
2903
        assert len(imgs) == 1, imgs
2904
        return {
2905
            'img': [i['src'] for i in imgs],
2906
            'title': imgs[0]['title'],
2907
        }
2908
2909
2910 View Code Duplication
class GenericCommitStrip(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2911
    """Generic class to retrieve Commit Strips in different languages."""
2912
    get_navi_link = get_a_rel_next
2913
    get_first_comic_link = simulate_first_link
2914
    first_url = NotImplemented
2915
2916
    @classmethod
2917
    def get_comic_info(cls, soup, link):
2918
        """Get information about a particular comics."""
2919
        desc = soup.find('meta', property='og:description')['content']
2920
        title = soup.find('meta', property='og:title')['content']
2921
        imgs = soup.find('div', class_='entry-content').find_all('img')
2922
        title2 = ' '.join(i.get('title', '') for i in imgs)
2923
        return {
2924
            'title': title,
2925
            'title2': title2,
2926
            'description': desc,
2927
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2928
        }
2929
2930
2931
class CommitStripFr(GenericCommitStrip):
2932
    """Class to retrieve Commit Strips in French."""
2933
    name = 'commit_fr'
2934
    long_name = 'Commit Strip (Fr)'
2935
    url = 'http://www.commitstrip.com/fr'
2936
    _categories = ('FRANCAIS', )
2937
    first_url = 'http://www.commitstrip.com/fr/2012/02/22/interview/'
2938
2939
2940
class CommitStripEn(GenericCommitStrip):
2941
    """Class to retrieve Commit Strips in English."""
2942
    name = 'commit_en'
2943
    long_name = 'Commit Strip (En)'
2944
    url = 'http://www.commitstrip.com/en'
2945
    first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'
2946
2947
2948
class GenericBoumerie(GenericNavigableComic):
2949
    """Generic class to retrieve Boumeries comics in different languages."""
2950
    # Also on http://boumeries.tumblr.com
2951
    get_first_comic_link = get_a_navi_navifirst
2952
    get_navi_link = get_link_rel_next
2953
    date_format = NotImplemented
2954
    lang = NotImplemented
2955
2956
    @classmethod
2957
    def get_comic_info(cls, soup, link):
2958
        """Get information about a particular comics."""
2959
        title = soup.find('h2', class_='post-title').string
2960
        short_url = soup.find('link', rel='shortlink')['href']
2961
        author = soup.find("span", class_="post-author").find("a").string
2962
        date_str = soup.find('span', class_='post-date').string
2963
        day = string_to_date(date_str, cls.date_format, cls.lang)
2964
        imgs = soup.find('div', id='comic').find_all('img')
2965
        assert all(i['alt'] == i['title'] for i in imgs)
2966
        return {
2967
            'short_url': short_url,
2968
            'img': [i['src'] for i in imgs],
2969
            'title': title,
2970
            'author': author,
2971
            'month': day.month,
2972
            'year': day.year,
2973
            'day': day.day,
2974
        }
2975
2976
2977
class BoumerieEn(GenericBoumerie):
2978
    """Class to retrieve Boumeries comics in English."""
2979
    name = 'boumeries_en'
2980
    long_name = 'Boumeries (En)'
2981
    url = 'http://comics.boumerie.com'
2982
    _categories = ('BOUMERIES', )
2983
    date_format = "%B %d, %Y"
2984
    lang = 'en_GB.UTF-8'
2985
2986
2987
class BoumerieFr(GenericBoumerie):
2988
    """Class to retrieve Boumeries comics in French."""
2989
    name = 'boumeries_fr'
2990
    long_name = 'Boumeries (Fr)'
2991
    url = 'http://bd.boumerie.com'
2992
    _categories = ('BOUMERIES', 'FRANCAIS')
2993
    date_format = "%B %d, %Y"  # Used to be "%A, %d %B %Y"
2994
    lang = "fr_FR.utf8"
2995
2996
2997 View Code Duplication
class UnearthedComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2998
    """Class to retrieve Unearthed comics."""
2999
    # Also on http://tapastic.com/series/UnearthedComics
3000
    # Also on https://unearthedcomics.tumblr.com
3001
    name = 'unearthed'
3002
    long_name = 'Unearthed Comics'
3003
    url = 'http://unearthedcomics.com'
3004
    _categories = ('UNEARTHED', )
3005
    get_navi_link = get_link_rel_next
3006
    get_first_comic_link = simulate_first_link
3007
    first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'
3008
3009
    @classmethod
3010
    def get_comic_info(cls, soup, link):
3011
        """Get information about a particular comics."""
3012
        short_url = soup.find('link', rel='shortlink')['href']
3013
        title_elt = soup.find('h1') or soup.find('h2')
3014
        title = title_elt.string if title_elt else ""
3015
        desc = soup.find('meta', property='og:description')
3016
        date_str = soup.find('time', class_='published updated hidden')['datetime']
3017
        day = string_to_date(date_str, "%Y-%m-%d")
3018
        post = soup.find('div', class_="entry content entry-content type-portfolio")
3019
        imgs = post.find_all('img')
3020
        return {
3021
            'title': title,
3022
            'description': desc,
3023
            'url2': short_url,
3024
            'img': [i['src'] for i in imgs],
3025
            'month': day.month,
3026
            'year': day.year,
3027
            'day': day.day,
3028
        }
3029
3030
3031
class Optipess(GenericNavigableComic):
3032
    """Class to retrieve Optipess comics."""
3033
    name = 'optipess'
3034
    long_name = 'Optipess'
3035
    url = 'http://www.optipess.com'
3036
    get_first_comic_link = get_a_navi_navifirst
3037
    get_navi_link = get_link_rel_next
3038
3039
    @classmethod
3040
    def get_comic_info(cls, soup, link):
3041
        """Get information about a particular comics."""
3042
        title = soup.find('h2', class_='post-title').string
3043
        author = soup.find("span", class_="post-author").find("a").string
3044
        comic = soup.find('div', id='comic')
3045
        imgs = comic.find_all('img') if comic else []
3046
        alt = imgs[0]['title'] if imgs else ""
3047
        assert all(i['alt'] == i['title'] == alt for i in imgs)
3048
        date_str = soup.find('span', class_='post-date').string
3049
        day = string_to_date(date_str, "%B %d, %Y")
3050
        return {
3051
            'title': title,
3052
            'alt': alt,
3053
            'author': author,
3054
            'img': [i['src'] for i in imgs],
3055
            'month': day.month,
3056
            'year': day.year,
3057
            'day': day.day,
3058
        }
3059
3060
3061
class PainTrainComic(GenericNavigableComic):
3062
    """Class to retrieve Pain Train Comics."""
3063
    name = 'paintrain'
3064
    long_name = 'Pain Train Comics'
3065
    url = 'http://paintraincomic.com'
3066
    get_first_comic_link = get_a_navi_navifirst
3067
    get_navi_link = get_link_rel_next
3068
3069
    @classmethod
3070
    def get_comic_info(cls, soup, link):
3071
        """Get information about a particular comics."""
3072
        title = soup.find('h2', class_='post-title').string
3073
        short_url = soup.find('link', rel='shortlink')['href']
3074
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
3075
        num = int(short_url_re.match(short_url).groups()[0])
3076
        imgs = soup.find('div', id='comic').find_all('img')
3077
        alt = imgs[0]['title']
3078
        assert all(i['alt'] == i['title'] == alt for i in imgs)
3079
        date_str = soup.find('span', class_='post-date').string
3080
        day = string_to_date(date_str, "%d/%m/%Y")
3081
        return {
3082
            'short_url': short_url,
3083
            'num': num,
3084
            'img': [i['src'] for i in imgs],
3085
            'month': day.month,
3086
            'year': day.year,
3087
            'day': day.day,
3088
            'alt': alt,
3089
            'title': title,
3090
        }
3091
3092
3093
class MoonBeard(GenericNavigableComic):
3094
    """Class to retrieve MoonBeard comics."""
3095
    # Also on http://squireseses.tumblr.com
3096
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
3097
    name = 'moonbeard'
3098
    long_name = 'Moon Beard'
3099
    url = 'http://moonbeard.com'
3100
    _categories = ('MOONBEARD', )
3101
    get_first_comic_link = get_a_navi_navifirst
3102
    get_navi_link = get_a_navi_navinext
3103
3104
    @classmethod
3105
    def get_comic_info(cls, soup, link):
3106
        """Get information about a particular comics."""
3107
        title = soup.find('h2', class_='post-title').string
3108
        short_url = soup.find('link', rel='shortlink')['href']
3109
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
3110
        num = int(short_url_re.match(short_url).groups()[0])
3111
        imgs = soup.find('div', id='comic').find_all('img')
3112
        alt = imgs[0]['title']
3113
        assert all(i['alt'] == i['title'] == alt for i in imgs)
3114
        date_str = soup.find('span', class_='post-date').string
3115
        day = string_to_date(date_str, "%B %d, %Y")
3116
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
3117
        author = soup.find('span', class_='post-author').string
3118
        return {
3119
            'short_url': short_url,
3120
            'num': num,
3121
            'img': [i['src'] for i in imgs],
3122
            'month': day.month,
3123
            'year': day.year,
3124
            'day': day.day,
3125
            'title': title,
3126
            'tags': tags,
3127
            'alt': alt,
3128
            'author': author,
3129
        }
3130
3131
3132
class SystemComic(GenericNavigableComic):
3133
    """Class to retrieve System Comic."""
3134
    name = 'system'
3135
    long_name = 'System Comic'
3136
    url = 'http://www.systemcomic.com'
3137
    get_navi_link = get_a_rel_next
3138
3139
    @classmethod
3140
    def get_first_comic_link(cls):
3141
        """Get link to first comics."""
3142
        return get_soup_at_url(cls.url).find('li', class_='first').find('a')
3143
3144
    @classmethod
3145
    def get_comic_info(cls, soup, link):
3146
        """Get information about a particular comics."""
3147
        title = soup.find('meta', property='og:title')['content']
3148
        desc = soup.find('meta', property='og:description')['content']
3149
        date_str = soup.find('time')["datetime"]
3150
        day = string_to_date(date_str, "%Y-%m-%d")
3151
        imgs = soup.find('figure').find_all('img')
3152
        return {
3153
            'title': title,
3154
            'description': desc,
3155
            'day': day.day,
3156
            'month': day.month,
3157
            'year': day.year,
3158
            'img': [i['src'] for i in imgs],
3159
        }
3160
3161
3162 View Code Duplication
class LittleLifeLines(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3163
    """Class to retrieve Little Life Lines comics."""
3164
    # Also on https://little-life-lines.tumblr.com
3165
    name = 'life'
3166
    long_name = 'Little Life Lines'
3167
    url = 'http://www.littlelifelines.com'
3168
    get_url_from_link = join_cls_url_to_href
3169
    get_first_comic_link = simulate_first_link
3170
    first_url = 'http://www.littlelifelines.com/comics/well-done'
3171
3172
    @classmethod
3173
    def get_navi_link(cls, last_soup, next_):
3174
        """Get link to next or previous comic."""
3175
        # prev is next / next is prev
3176
        li = last_soup.find('li', class_='prev' if next_ else 'next')
3177
        return li.find('a') if li else None
3178
3179
    @classmethod
3180
    def get_comic_info(cls, soup, link):
3181
        """Get information about a particular comics."""
3182
        title = soup.find('meta', property='og:title')['content']
3183
        desc = soup.find('meta', property='og:description')['content']
3184
        date_str = soup.find('time', class_='published')['datetime']
3185
        day = string_to_date(date_str, "%Y-%m-%d")
3186
        author = soup.find('a', rel='author').string
3187
        div_content = soup.find('div', class_="body entry-content")
3188
        imgs = div_content.find_all('img')
3189
        imgs = [i for i in imgs if i.get('src') is not None]
3190
        alt = imgs[0]['alt']
3191
        return {
3192
            'title': title,
3193
            'alt': alt,
3194
            'description': desc,
3195
            'author': author,
3196
            'day': day.day,
3197
            'month': day.month,
3198
            'year': day.year,
3199
            'img': [i['src'] for i in imgs],
3200
        }
3201
3202
3203
class GenericWordPressInkblot(GenericNavigableComic):
3204
    """Generic class to retrieve comics using WordPress with Inkblot."""
3205
    get_navi_link = get_link_rel_next
3206
3207
    @classmethod
3208
    def get_first_comic_link(cls):
3209
        """Get link to first comics."""
3210
        return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')
3211
3212
    @classmethod
3213
    def get_comic_info(cls, soup, link):
3214
        """Get information about a particular comics."""
3215
        title = soup.find('meta', property='og:title')['content']
3216
        imgs = soup.find('div', class_='webcomic-image').find_all('img')
3217
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3218
        day = string_to_date(date_str, "%Y-%m-%d")
3219
        return {
3220
            'title': title,
3221
            'day': day.day,
3222
            'month': day.month,
3223
            'year': day.year,
3224
            'img': [i['src'] for i in imgs],
3225
        }
3226
3227
3228
class EverythingsStupid(GenericWordPressInkblot):
3229
    """Class to retrieve Everything's stupid Comics."""
3230
    # Also on http://tapastic.com/series/EverythingsStupid
3231
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
3232
    # Also on http://everythingsstupidcomics.tumblr.com
3233
    name = 'stupid'
3234
    long_name = "Everything's Stupid"
3235
    url = 'http://everythingsstupid.net'
3236
3237
3238
class TheIsmComics(GenericDeletedComic, GenericWordPressInkblot):
3239
    """Class to retrieve The Ism Comics."""
3240
    # Also on https://tapastic.com/series/TheIsm (?)
3241
    name = 'theism'
3242
    long_name = "The Ism"
3243
    url = 'http://www.theism-comics.com'
3244
3245
3246
class WoodenPlankStudios(GenericWordPressInkblot):
3247
    """Class to retrieve Wooden Plank Studios comics."""
3248
    name = 'woodenplank'
3249
    long_name = 'Wooden Plank Studios'
3250
    url = 'http://woodenplankstudios.com'
3251
3252
3253
class ElectricBunnyComic(GenericNavigableComic):
3254
    """Class to retrieve Electric Bunny Comics."""
3255
    # Also on http://electricbunnycomics.tumblr.com
3256
    name = 'bunny'
3257
    long_name = 'Electric Bunny Comic'
3258
    url = 'http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell'
3259
    get_url_from_link = join_cls_url_to_href
3260
3261
    @classmethod
3262
    def get_first_comic_link(cls):
3263
        """Get link to first comics."""
3264
        return get_soup_at_url(cls.url).find('img', alt='First').parent
3265
3266
    @classmethod
3267
    def get_navi_link(cls, last_soup, next_):
3268
        """Get link to next or previous comic."""
3269
        img = last_soup.find('img', alt='Next' if next_ else 'Back')
3270
        return img.parent if img else None
3271
3272
    @classmethod
3273
    def get_comic_info(cls, soup, link):
3274
        """Get information about a particular comics."""
3275
        title = soup.find('meta', property='og:title')['content']
3276
        imgs = soup.find_all('meta', property='og:image')
3277
        return {
3278
            'title': title,
3279
            'img': [i['content'] for i in imgs],
3280
        }
3281
3282
3283
class SheldonComics(GenericNavigableComic):
3284
    """Class to retrieve Sheldon comics."""
3285
    # Also on http://www.gocomics.com/sheldon
3286
    name = 'sheldon'
3287
    long_name = 'Sheldon Comics'
3288
    url = 'http://www.sheldoncomics.com'
3289
3290
    @classmethod
3291
    def get_first_comic_link(cls):
3292
        """Get link to first comics."""
3293
        return get_soup_at_url(cls.url).find("a", id="nav-first")
3294
3295
    @classmethod
3296
    def get_navi_link(cls, last_soup, next_):
3297
        """Get link to next or previous comic."""
3298
        for link in last_soup.find_all("a", id="nav-next" if next_ else "nav-prev"):
3299
            if link['href'] != 'http://www.sheldoncomics.com':
3300
                return link
3301
        return None
3302
3303
    @classmethod
3304
    def get_comic_info(cls, soup, link):
3305
        """Get information about a particular comics."""
3306
        imgs = soup.find("div", id="comic-foot").find_all("img")
3307
        assert all(i['alt'] == i['title'] for i in imgs)
3308
        assert len(imgs) == 1, imgs
3309
        title = imgs[0]['title']
3310
        return {
3311
            'title': title,
3312
            'img': [i['src'] for i in imgs],
3313
        }
3314
3315
3316
class ManVersusManatee(GenericNavigableComic):
3317
    """Class to retrieve Man Versus Manatee comics."""
3318
    url = 'http://manvsmanatee.com'
3319
    name = 'manvsmanatee'
3320
    long_name = 'Man Versus Manatee'
3321
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
3322
    get_navi_link = get_a_comicnavbase_comicnavnext
3323
3324
    @classmethod
3325
    def get_comic_info(cls, soup, link):
3326
        """Get information about a particular comics."""
3327
        title = soup.find('h2', class_='post-title').string
3328
        imgs = soup.find('div', id='comic').find_all('img')
3329
        date_str = soup.find('span', class_='post-date').string
3330
        day = string_to_date(date_str, "%B %d, %Y")
3331
        return {
3332
            'img': [i['src'] for i in imgs],
3333
            'title': title,
3334
            'month': day.month,
3335
            'year': day.year,
3336
            'day': day.day,
3337
        }
3338
3339
3340
class TheMeerkatguy(GenericNavigableComic):
3341
    """Class to retrieve The Meerkatguy comics."""
3342
    long_name = 'The Meerkatguy'
3343
    url = 'http://www.themeerkatguy.com'
3344
    name = 'meerkatguy'
3345
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
3346
    get_navi_link = get_a_comicnavbase_comicnavnext
3347
3348
    @classmethod
3349
    def get_comic_info(cls, soup, link):
3350
        """Get information about a particular comics."""
3351
        title = soup.find('title').string
3352
        imgs = soup.find_all('meta', property='og:image')
3353
        return {
3354
            'img': [i['content'] for i in imgs],
3355
            'title': title,
3356
        }
3357
3358
3359
class Ubertool(GenericNavigableComic):
3360
    """Class to retrieve Ubertool comics."""
3361
    # Also on https://ubertool.tumblr.com
3362
    # Also on https://tapastic.com/series/ubertool
3363
    name = 'ubertool'
3364
    long_name = 'Ubertool'
3365
    url = 'http://ubertoolcomic.com'
3366
    _categories = ('UBERTOOL', )
3367
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
3368
    get_navi_link = get_a_comicnavbase_comicnavnext
3369
3370
    @classmethod
3371
    def get_comic_info(cls, soup, link):
3372
        """Get information about a particular comics."""
3373
        title = soup.find('h2', class_='post-title').string
3374
        date_str = soup.find('span', class_='post-date').string
3375
        day = string_to_date(date_str, "%B %d, %Y")
3376
        imgs = soup.find('div', id='comic').find_all('img')
3377
        return {
3378
            'img': [i['src'] for i in imgs],
3379
            'title': title,
3380
            'month': day.month,
3381
            'year': day.year,
3382
            'day': day.day,
3383
        }
3384
3385
3386
class EarthExplodes(GenericNavigableComic):
3387
    """Class to retrieve The Earth Explodes comics."""
3388
    name = 'earthexplodes'
3389
    long_name = 'The Earth Explodes'
3390
    url = 'http://www.earthexplodes.com'
3391
    get_url_from_link = join_cls_url_to_href
3392
    get_first_comic_link = simulate_first_link
3393
    first_url = 'http://www.earthexplodes.com/comics/000/'
3394
3395
    @classmethod
3396
    def get_navi_link(cls, last_soup, next_):
3397
        """Get link to next or previous comic."""
3398
        return last_soup.find('a', id='next' if next_ else 'prev')
3399
3400
    @classmethod
3401
    def get_comic_info(cls, soup, link):
3402
        """Get information about a particular comics."""
3403
        title = soup.find('title').string
3404
        imgs = soup.find('div', id='image').find_all('img')
3405
        alt = imgs[0].get('title', '')
3406
        return {
3407
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3408
            'title': title,
3409
            'alt': alt,
3410
        }
3411
3412
3413 View Code Duplication
class PomComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3414
    """Class to retrieve PomComics."""
3415
    name = 'pom'
3416
    long_name = 'Pom Comics / Piece of Me'
3417
    url = 'http://www.pomcomic.com'
3418
    get_url_from_link = join_cls_url_to_href
3419
3420
    @classmethod
3421
    def get_first_comic_link(cls):
3422
        """Get link to first comics."""
3423
        return get_soup_at_url(cls.url).find('a', class_='btn-first')
3424
3425
    @classmethod
3426
    def get_navi_link(cls, last_soup, next_):
3427
        """Get link to next or previous comic."""
3428
        return last_soup.find('a', class_='btn-next' if next_ else 'btn-prev')
3429
3430
    @classmethod
3431
    def get_comic_info(cls, soup, link):
3432
        """Get information about a particular comics."""
3433
        title = soup.find('h1').string
3434
        desc = soup.find('meta', property='og:description')['content']
3435
        tags = soup.find('meta', attrs={'name': 'keywords'})['content']
3436
        imgs = soup.find('div', class_='comic').find_all('img')
3437
        return {
3438
            'title': title,
3439
            'desc': desc,
3440
            'tags': tags,
3441
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3442
        }
3443
3444
3445
class CubeDrone(GenericComicNotWorking, GenericNavigableComic):  # Website has changed
3446
    """Class to retrieve Cube Drone comics."""
3447
    name = 'cubedrone'
3448
    long_name = 'Cube Drone'
3449
    url = 'http://cube-drone.com/comics'
3450
    get_url_from_link = join_cls_url_to_href
3451
3452
    @classmethod
3453
    def get_first_comic_link(cls):
3454
        """Get link to first comics."""
3455
        return get_soup_at_url(cls.url).find('span', class_='glyphicon glyphicon-backward').parent
3456
3457
    @classmethod
3458
    def get_navi_link(cls, last_soup, next_):
3459
        """Get link to next or previous comic."""
3460
        class_ = 'glyphicon glyphicon-chevron-' + ('right' if next_ else 'left')
3461
        return last_soup.find('span', class_=class_).parent
3462
3463
    @classmethod
3464
    def get_comic_info(cls, soup, link):
3465
        """Get information about a particular comics."""
3466
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
3467
        url2 = soup.find('meta', attrs={'name': 'twitter:url'})['content']
3468
        # date_str = soup.find('h2', class_='comic_title').find('small').string
3469
        # day = string_to_date(date_str, "%B %d, %Y, %I:%M %p")
3470
        imgs = soup.find_all('img', class_='comic img-responsive')
3471
        title2 = imgs[0]['title']
3472
        alt = imgs[0]['alt']
3473
        return {
3474
            'url2': url2,
3475
            'title': title,
3476
            'title2': title2,
3477
            'alt': alt,
3478
            'img': [i['src'] for i in imgs],
3479
        }
3480
3481
3482
class MakeItStoopid(GenericDeletedComic, GenericNavigableComic):
3483
    """Class to retrieve Make It Stoopid Comics."""
3484
    name = 'stoopid'
3485
    long_name = 'Make it stoopid'
3486
    url = 'http://makeitstoopid.com/comic.php'
3487
3488
    @classmethod
3489
    def get_nav(cls, soup):
3490
        """Get the navigation elements from soup object."""
3491
        cnav = soup.find_all(class_='cnav')
3492
        nav1, nav2 = cnav[:5], cnav[5:]
3493
        assert nav1 == nav2
3494
        # begin, prev, archive, next_, end = nav1
3495
        return [None if i.get('href') is None else i for i in nav1]
3496
3497
    @classmethod
3498
    def get_first_comic_link(cls):
3499
        """Get link to first comics."""
3500
        return cls.get_nav(get_soup_at_url(cls.url))[0]
3501
3502
    @classmethod
3503
    def get_navi_link(cls, last_soup, next_):
3504
        """Get link to next or previous comic."""
3505
        return cls.get_nav(last_soup)[3 if next_ else 1]
3506
3507
    @classmethod
3508
    def get_comic_info(cls, soup, link):
3509
        """Get information about a particular comics."""
3510
        title = link['title']
3511
        imgs = soup.find_all('img', id='comicimg')
3512
        return {
3513
            'title': title,
3514
            'img': [i['src'] for i in imgs],
3515
        }
3516
3517
3518
class OffTheLeashDog(GenericNavigableComic):
3519
    """Class to retrieve Off The Leash Dog comics."""
3520
    # Also on http://rupertfawcettsdoggyblog.tumblr.com
3521
    # Also on http://www.rupertfawcettcartoons.com
3522
    name = 'offtheleash'
3523
    long_name = 'Off The Leash Dog'
3524
    url = 'http://offtheleashdogcartoons.com'
3525
    _categories = ('FAWCETT', )
3526
    get_navi_link = get_a_rel_next
3527
    get_first_comic_link = simulate_first_link
3528
    first_url = 'http://offtheleashdogcartoons.com/uncategorized/can-i-help-you/'
3529
3530
    @classmethod
3531
    def get_comic_info(cls, soup, link):
3532
        """Get information about a particular comics."""
3533
        title = soup.find("h1", class_="entry-title").string
3534
        imgs = soup.find('div', class_='entry-content').find_all('img')
3535
        return {
3536
            'title': title,
3537
            'img': [i['src'] for i in imgs],
3538
        }
3539
3540
3541
class MacadamValley(GenericNavigableComic):
3542
    """Class to retrieve Macadam Valley comics."""
3543
    name = 'macadamvalley'
3544
    long_name = 'Macadam Valley'
3545
    url = 'http://macadamvalley.com'
3546
    get_navi_link = get_a_rel_next
3547
    get_first_comic_link = simulate_first_link
3548
    first_url = 'http://macadamvalley.com/le-debut-de-la-fin/'
3549
3550
    @classmethod
3551
    def get_comic_info(cls, soup, link):
3552
        """Get information about a particular comics."""
3553
        title = soup.find("h1", class_="entry-title").string
3554
        img = soup.find('div', class_='entry-content').find('img')
3555
        date_str = soup.find('time', class_='entry-date')['datetime']
3556
        date_str = date_str[:10]
3557
        day = string_to_date(date_str, "%Y-%m-%d")
3558
        author = soup.find('a', rel='author').string
3559
        return {
3560
            'title': title,
3561
            'img': [i['src'] for i in [img]],
3562
            'day': day.day,
3563
            'month': day.month,
3564
            'year': day.year,
3565
            'author': author,
3566
        }
3567
3568
3569
class MarketoonistComics(GenericNavigableComic):
3570
    """Class to retrieve Marketoonist Comics."""
3571
    name = 'marketoonist'
3572
    long_name = 'Marketoonist'
3573
    url = 'https://marketoonist.com/cartoons'
3574
    get_first_comic_link = simulate_first_link
3575
    get_navi_link = get_link_rel_next
3576
    first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'
3577
3578
    @classmethod
3579
    def get_comic_info(cls, soup, link):
3580
        """Get information about a particular comics."""
3581
        imgs = soup.find_all('meta', property='og:image')
3582
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3583
        day = string_to_date(date_str, "%Y-%m-%d")
3584
        title = soup.find('meta', property='og:title')['content']
3585
        return {
3586
            'img': [i['content'] for i in imgs],
3587
            'day': day.day,
3588
            'month': day.month,
3589
            'year': day.year,
3590
            'title': title,
3591
        }
3592
3593
3594 View Code Duplication
class ConsoliaComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3595
    """Class to retrieve Consolia comics."""
3596
    name = 'consolia'
3597
    long_name = 'consolia'
3598
    url = 'https://consolia-comic.com'
3599
    get_url_from_link = join_cls_url_to_href
3600
3601
    @classmethod
3602
    def get_first_comic_link(cls):
3603
        """Get link to first comics."""
3604
        return get_soup_at_url(cls.url).find('a', class_='first')
3605
3606
    @classmethod
3607
    def get_navi_link(cls, last_soup, next_):
3608
        """Get link to next or previous comic."""
3609
        return last_soup.find('a', class_='next' if next_ else 'prev')
3610
3611
    @classmethod
3612
    def get_comic_info(cls, soup, link):
3613
        """Get information about a particular comics."""
3614
        title = soup.find('meta', property='og:title')['content']
3615
        date_str = soup.find('time')["datetime"]
3616
        day = string_to_date(date_str, "%Y-%m-%d")
3617
        imgs = soup.find_all('meta', property='og:image')
3618
        return {
3619
            'title': title,
3620
            'img': [i['content'] for i in imgs],
3621
            'day': day.day,
3622
            'month': day.month,
3623
            'year': day.year,
3624
        }
3625
3626
3627
class GenericBlogspotComic(GenericNavigableComic):
3628
    """Generic class to retrieve comics from Blogspot."""
3629
    get_first_comic_link = simulate_first_link
3630
    first_url = NotImplemented
3631
    _categories = ('BLOGSPOT', )
3632
3633
    @classmethod
3634
    def get_navi_link(cls, last_soup, next_):
3635
        """Get link to next or previous comic."""
3636
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3637
3638
3639 View Code Duplication
class TuMourrasMoinsBete(GenericBlogspotComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3640
    """Class to retrieve Tu Mourras Moins Bete comics."""
3641
    name = 'mourrasmoinsbete'
3642
    long_name = 'Tu Mourras Moins Bete'
3643
    url = 'http://tumourrasmoinsbete.blogspot.fr'
3644
    _categories = ('FRANCAIS', )
3645
    first_url = 'http://tumourrasmoinsbete.blogspot.fr/2008/06/essai.html'
3646
3647
    @classmethod
3648
    def get_comic_info(cls, soup, link):
3649
        """Get information about a particular comics."""
3650
        title = soup.find('title').string
3651
        imgs = soup.find('div', itemprop='description articleBody').find_all('img')
3652
        author = soup.find('span', itemprop='author').string
3653
        return {
3654
            'img': [i['src'] for i in imgs],
3655
            'author': author,
3656
            'title': title,
3657
        }
3658
3659
3660
class Octopuns(GenericBlogspotComic):
3661
    """Class to retrieve Octopuns comics."""
3662
    # Also on http://octopuns.tumblr.com
3663
    name = 'octopuns'
3664
    long_name = 'Octopuns'
3665
    url = 'http://www.octopuns.net'  # or http://octopuns.blogspot.fr/
3666
    first_url = 'http://octopuns.blogspot.com/2010/12/17122010-always-read-label.html'
3667
3668
    @classmethod
3669
    def get_comic_info(cls, soup, link):
3670
        """Get information about a particular comics."""
3671
        title = soup.find('h3', class_='post-title entry-title').string
3672
        date_str = soup.find('h2', class_='date-header').string
3673
        day = string_to_date(date_str, "%A, %B %d, %Y")
3674
        imgs = soup.find_all('link', rel='image_src')
3675
        return {
3676
            'img': [i['href'] for i in imgs],
3677
            'title': title,
3678
            'day': day.day,
3679
            'month': day.month,
3680
            'year': day.year,
3681
        }
3682
3683
3684
class GeekAndPoke(GenericNavigableComic):
3685
    """Class to retrieve Geek And Poke comics."""
3686
    name = 'geek'
3687
    long_name = 'Geek And Poke'
3688
    url = 'http://geek-and-poke.com'
3689
    get_url_from_link = join_cls_url_to_href
3690
    get_first_comic_link = simulate_first_link
3691
    first_url = 'http://geek-and-poke.com/geekandpoke/2006/8/27/a-new-place-for-a-not-so-old-blog.html'
3692
3693
    @classmethod
3694
    def get_navi_link(cls, last_soup, next_):
3695
        """Get link to next or previous comic."""
3696
        return last_soup.find('a', class_='prev-item' if next_ else 'next-item')
3697
3698
    @classmethod
3699
    def get_comic_info(cls, soup, link):
3700
        """Get information about a particular comics."""
3701
        title = soup.find('meta', property='og:title')['content']
3702
        desc = soup.find('meta', property='og:description')
3703
        desc_str = "" if desc is None else desc['content']
3704
        date_str = soup.find('time', class_='published')['datetime']
3705
        day = string_to_date(date_str, "%Y-%m-%d")
3706
        author = soup.find('a', rel='author').string
3707
        div_content = (soup.find('div', class_="body entry-content") or
3708
                       soup.find('div', class_="special-content"))
3709
        imgs = div_content.find_all('img')
3710
        imgs = [i for i in imgs if i.get('src') is not None]
3711
        assert all('title' not in i or i['alt'] == i['title'] for i in imgs)
3712
        alt = imgs[0].get('alt', "") if imgs else []
3713
        return {
3714
            'title': title,
3715
            'alt': alt,
3716
            'description': desc_str,
3717
            'author': author,
3718
            'day': day.day,
3719
            'month': day.month,
3720
            'year': day.year,
3721
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3722
        }
3723
3724
3725 View Code Duplication
class GloryOwlComix(GenericBlogspotComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3726
    """Class to retrieve Glory Owl comics."""
3727
    name = 'gloryowl'
3728
    long_name = 'Glory Owl'
3729
    url = 'http://gloryowlcomix.blogspot.fr'
3730
    _categories = ('NSFW', 'FRANCAIS')
3731
    first_url = 'http://gloryowlcomix.blogspot.fr/2013/02/1_7.html'
3732
3733
    @classmethod
3734
    def get_comic_info(cls, soup, link):
3735
        """Get information about a particular comics."""
3736
        title = soup.find('title').string
3737
        imgs = soup.find_all('link', rel='image_src')
3738
        author = soup.find('a', rel='author').string
3739
        return {
3740
            'img': [i['href'] for i in imgs],
3741
            'author': author,
3742
            'title': title,
3743
        }
3744
3745
3746 View Code Duplication
class GenericSquareSpace(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3747
    """Generic class to retrieve comics using SquareSpace."""
3748
    _categories = ('SQUARESPACE', )
3749
    get_url_from_link = join_cls_url_to_href
3750
    get_first_comic_link = simulate_first_link
3751
3752
    @classmethod
3753
    def get_navi_link(cls, last_soup, next_):
3754
        """Get link to next or previous comic."""
3755
        return last_soup.find('a', id='prevLink' if next_ else 'nextLink')
3756
3757
    @classmethod
3758
    def get_images(cls, soup):
3759
        """Get image URLs for a comic."""
3760
        raise NotImplementedError
3761
3762
    @classmethod
3763
    def get_comic_info(cls, soup, link):
3764
        """Get information about a particular comics."""
3765
        title = soup.find('meta', property='og:title')['content']
3766
        desc = soup.find('meta', property='og:description')['content']
3767
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
3768
        day = string_to_date(date_str, "%Y-%m-%d")
3769
        author = soup.find('a', rel='author').string
3770
        return {
3771
            'title': title,
3772
            'img': cls.get_images(soup),
3773
            'month': day.month,
3774
            'year': day.year,
3775
            'day': day.day,
3776
            'author': author,
3777
            'description': desc,
3778
        }
3779
3780
3781
class AtRandomComics(GenericSquareSpace):
3782
    """Class to retrieve At Random Comics."""
3783
    name = 'atrandom'
3784
    long_name = 'At Random Comics'
3785
    url = 'http://www.atrandomcomics.com'
3786
    first_url = 'http://www.atrandomcomics.com/at-random-comics-home/2015/5/5/can-of-worms'
3787
3788
    @classmethod
3789
    def get_images(cls, soup):
3790
        """Get image URLs for a comic."""
3791
        imgs = soup.find_all('meta', property='og:image')
3792
        return [i['content'] for i in imgs]
3793
3794
3795
class NothingSuspicious(GenericSquareSpace):
3796
    """Class to retrieve Nothing Suspicious comics."""
3797
    name = 'nothingsuspicious'
3798
    long_name = 'Nothing Suspicious'
3799
    url = 'https://nothingsuspicio.us'
3800
    first_url = 'https://nothingsuspicio.us/?offset=1483592400908'
3801
3802
    @classmethod
3803
    def get_images(cls, soup):
3804
        """Get image URLs for a comic."""
3805
        imgs = soup.find('div', class_='content-wrapper').find('img')
3806
        return [i['src'] for i in [imgs]]
3807
3808
3809
class DeathBulge(GenericComic):
3810
    """Class to retrieve the DeathBulge comics."""
3811
    name = 'deathbulge'
3812
    long_name = 'Death Bulge'
3813
    url = 'http://www.deathbulge.com'
3814
3815
    @classmethod
3816
    def get_next_comic(cls, last_comic):
3817
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
3818
        json_url = urljoin_wrapper(cls.url, 'api/comics/1')
3819
        json = load_json_at_url(json_url)
3820
        pagination = json['pagination_links']
3821
        first_num = last_comic['num'] if last_comic else pagination['first']
3822
        last_num = pagination['last']
3823
        for num in range(first_num + 1, last_num):
3824
            json_url = urljoin_wrapper(cls.url, 'api/comics/%d' % num)
3825
            json = load_json_at_url(json_url)
3826
            pagination = json['pagination_links']
3827
            comic_json = json['comic']
3828
            date_str = comic_json['timestamp'][:10]
3829
            day = string_to_date(date_str, "%Y-%m-%d")
3830
            comic_id = comic_json['id']  # not exactly 'num' o_O
3831
            yield {
3832
                'json_url': json_url,
3833
                'num': comic_id,
3834
                'url': urljoin_wrapper(cls.url, 'comics/%d' % num),
3835
                'alt': comic_json['alt_text'],
3836
                'title': comic_json['title'],
3837
                'img': [urljoin_wrapper(cls.url, comic_json['comic'])],
3838
                'month': day.month,
3839
                'year': day.year,
3840
                'day': day.day,
3841
            }
3842
3843
3844
class GenericTumblrV1(GenericComic):
3845
    """Generic class to retrieve comics from Tumblr using the V1 API."""
3846
    _categories = ('TUMBLR', )
3847
3848
    @classmethod
3849
    def get_next_comic(cls, last_comic):
3850
        """Generic implementation of get_next_comic for Tumblr comics."""
3851
        for p in cls.get_posts(last_comic):
3852
            comic = cls.get_comic_info(p)
3853
            if comic is not None:
3854
                yield comic
3855
3856
    @classmethod
3857
    def check_url(cls, url):
3858
        if not url.startswith(cls.url):
3859
            print("url '%s' does not start with '%s'" % (url, cls.url))
3860
        return url
3861
3862
    @classmethod
3863
    def get_url_from_post(cls, post):
3864
        return cls.check_url(post['url'])
3865
3866
    @classmethod
3867
    def get_api_url(cls):
3868
        return urljoin_wrapper(cls.url, '/api/read/')
3869
3870
    @classmethod
3871
    def get_api_url_for_id(cls, tumblr_id):
3872
        return cls.get_api_url() + '?id=%d' % (tumblr_id)
3873
3874
    @classmethod
3875
    def get_comic_info(cls, post):
3876
        """Get information about a particular comics."""
3877
        type_ = post['type']
3878
        if type_ != 'photo':
3879
            return None
3880
        tumblr_id = int(post['id'])
3881
        api_url = cls.get_api_url_for_id(tumblr_id)
3882
        day = datetime.datetime.fromtimestamp(int(post['unix-timestamp'])).date()
3883
        caption = post.find('photo-caption')
3884
        title = caption.string if caption else ""
3885
        tags = ' '.join(t.string for t in post.find_all('tag'))
3886
        # Photos may appear in 'photo' tags and/or straight in the post
3887
        photo_tags = post.find_all('photo')
3888
        if not photo_tags:
3889
            photo_tags = [post]
3890
        # Images are in multiple resolutions - taking the first one
3891
        imgs = [photo.find('photo-url') for photo in photo_tags]
3892
        return {
3893
            'url': cls.get_url_from_post(post),
3894
            'url2': post['url-with-slug'],
3895
            'day': day.day,
3896
            'month': day.month,
3897
            'year': day.year,
3898
            'title': title,
3899
            'tags': tags,
3900
            'img': [i.string for i in imgs],
3901
            'tumblr-id': tumblr_id,
3902
            'api_url': api_url,
3903
        }
3904
3905
    @classmethod
3906
    def get_posts(cls, last_comic, nb_post_per_call=10):
3907
        """Get posts using API. nb_post_per_call is max 50.
3908
3909
        Posts are retrieved from newer to older as per the tumblr v1 api
3910
        but are returned in chronological order."""
3911
        waiting_for_id = last_comic['tumblr-id'] if last_comic else None
3912
        posts_acc = []
3913
        if last_comic is not None:
3914
            # cls.check_url(last_comic['url'])
3915
            cls.check_url(last_comic['api_url'])
3916
            # Sometimes, tumblr posts are deleted. When previous post is deleted, we
3917
            # might end up spending a lot of time looking for something that
3918
            # doesn't exist. Failing early and clearly might be a better option.
3919
            last_api_url = cls.get_api_url_for_id(waiting_for_id)
3920
            try:
3921
                get_soup_at_url(last_api_url)
3922
            except urllib.error.HTTPError:
3923
                try:
3924
                    get_soup_at_url(cls.url)
3925
                except urllib.error.HTTPError:
3926
                    print("Did not find previous post nor main url %s" % cls.url)
3927
                else:
3928
                    print("Did not find previous post %s : it might have been deleted" % last_api_url)
3929
                return reversed(posts_acc)
3930
        api_url = cls.get_api_url()
3931
        soup = get_soup_at_url(api_url)
3932
        posts = soup.find('posts')
3933
        if posts is None:
3934
            print("Could not get post info from url %s - problem with GDPR diclaimer?" % api_url)
3935
            return []
3936
        start, total = int(posts['start']), int(posts['total'])
3937
        assert start == 0
3938
        for starting_num in range(0, total, nb_post_per_call):
3939
            api_url2 = api_url + '?start=%d&num=%d' % (starting_num, nb_post_per_call)
3940
            posts2 = get_soup_at_url(api_url2).find('posts')
3941
            start2, total2 = int(posts2['start']), int(posts2['total'])
3942
            assert starting_num == start2, "%d != %d" % (starting_num, start2)
3943
            # This may happen and should be handled in the future
3944
            assert total == total2, "%d != %d" % (total, total2)
3945
            for p in posts2.find_all('post'):
3946
                tumblr_id = int(p['id'])
3947
                if waiting_for_id and waiting_for_id == tumblr_id:
3948
                    return reversed(posts_acc)
3949
                posts_acc.append(p)
3950
        if waiting_for_id is None:
3951
            return reversed(posts_acc)
3952
        print("Did not find %s : there might be a problem" % waiting_for_id)
3953
        return []
3954
3955
3956
class SaturdayMorningBreakfastCerealTumblr(GenericTumblrV1):
3957
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
3958
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
3959
    # Also on http://www.smbc-comics.com
3960
    name = 'smbc-tumblr'
3961
    long_name = 'Saturday Morning Breakfast Cereal (from Tumblr)'
3962
    url = 'http://smbc-comics.tumblr.com'
3963
    _categories = ('SMBC', )
3964
3965
3966
class AHammADay(GenericTumblrV1):
3967
    """Class to retrieve class A Hamm A Day comics."""
3968
    name = 'hamm'
3969
    long_name = 'A Hamm A Day'
3970
    url = 'http://www.ahammaday.com'
3971
3972
3973
class IrwinCardozo(GenericTumblrV1):
3974
    """Class to retrieve Irwin Cardozo Comics."""
3975
    name = 'irwinc'
3976
    long_name = 'Irwin Cardozo'
3977
    url = 'http://irwincardozocomics.tumblr.com'
3978
3979
3980
class AccordingToDevin(GenericTumblrV1):
3981
    """Class to retrieve According To Devin comics."""
3982
    name = 'devin'
3983
    long_name = 'According To Devin'
3984
    url = 'http://accordingtodevin.tumblr.com'
3985
3986
3987
class ItsTheTieTumblr(GenericTumblrV1):
3988
    """Class to retrieve It's the tie comics."""
3989
    # Also on http://itsthetie.com
3990
    # Also on https://tapastic.com/series/itsthetie
3991
    name = 'tie-tumblr'
3992
    long_name = "It's the tie (from Tumblr)"
3993
    url = "http://itsthetie.tumblr.com"
3994
    _categories = ('TIE', )
3995
3996
3997
class OctopunsTumblr(GenericTumblrV1):
3998
    """Class to retrieve Octopuns comics."""
3999
    # Also on http://www.octopuns.net
4000
    name = 'octopuns-tumblr'
4001
    long_name = 'Octopuns (from Tumblr)'
4002
    url = 'http://octopuns.tumblr.com'
4003
4004
4005
class PicturesInBoxesTumblr(GenericTumblrV1):
4006
    """Class to retrieve Pictures In Boxes comics."""
4007
    # Also on http://www.picturesinboxes.com
4008
    name = 'picturesinboxes-tumblr'
4009
    long_name = 'Pictures in Boxes (from Tumblr)'
4010
    url = 'https://picturesinboxescomic.tumblr.com'
4011
4012
4013
class TubeyToonsTumblr(GenericTumblrV1):
4014
    """Class to retrieve TubeyToons comics."""
4015
    # Also on http://tapastic.com/series/Tubey-Toons
4016
    # Also on http://tubeytoons.com
4017
    name = 'tubeytoons-tumblr'
4018
    long_name = 'Tubey Toons (from Tumblr)'
4019
    url = 'https://tubeytoons.tumblr.com'
4020
    _categories = ('TUNEYTOONS', )
4021
4022
4023
class UnearthedComicsTumblr(GenericTumblrV1):
4024
    """Class to retrieve Unearthed comics."""
4025
    # Also on http://tapastic.com/series/UnearthedComics
4026
    # Also on http://unearthedcomics.com
4027
    name = 'unearthed-tumblr'
4028
    long_name = 'Unearthed Comics (from Tumblr)'
4029
    url = 'https://unearthedcomics.tumblr.com'
4030
    _categories = ('UNEARTHED', )
4031
4032
4033
class PieComic(GenericTumblrV1):
4034
    """Class to retrieve Pie Comic comics."""
4035
    name = 'pie'
4036
    long_name = 'Pie Comic'
4037
    url = "http://piecomic.tumblr.com"
4038
4039
4040
class MrEthanDiamond(GenericTumblrV1):
4041
    """Class to retrieve Mr Ethan Diamond comics."""
4042
    name = 'diamond'
4043
    long_name = 'Mr Ethan Diamond'
4044
    url = 'http://mrethandiamond.tumblr.com'
4045
4046
4047
class Flocci(GenericTumblrV1):
4048
    """Class to retrieve floccinaucinihilipilification comics."""
4049
    name = 'flocci'
4050
    long_name = 'floccinaucinihilipilification'
4051
    url = "http://floccinaucinihilipilificationa.tumblr.com"
4052
4053
4054
class UpAndOut(GenericTumblrV1):
4055
    """Class to retrieve Up & Out comics."""
4056
    # Also on http://tapastic.com/series/UP-and-OUT
4057
    name = 'upandout'
4058
    long_name = 'Up And Out (from Tumblr)'
4059
    url = 'http://upandoutcomic.tumblr.com'
4060
4061
4062
class Pundemonium(GenericTumblrV1):
4063
    """Class to retrieve Pundemonium comics."""
4064
    name = 'pundemonium'
4065
    long_name = 'Pundemonium'
4066
    url = 'http://monstika.tumblr.com'
4067
4068
4069
class PoorlyDrawnLinesTumblr(GenericTumblrV1):
4070
    """Class to retrieve Poorly Drawn Lines comics."""
4071
    # Also on http://poorlydrawnlines.com
4072
    name = 'poorlydrawn-tumblr'
4073
    long_name = 'Poorly Drawn Lines (from Tumblr)'
4074
    url = 'http://pdlcomics.tumblr.com'
4075
    _categories = ('POORLYDRAWN', )
4076
4077
4078
class PearShapedComics(GenericTumblrV1):
4079
    """Class to retrieve Pear Shaped Comics."""
4080
    name = 'pearshaped'
4081
    long_name = 'Pear-Shaped Comics'
4082
    url = 'http://pearshapedcomics.com'
4083
4084
4085
class PondScumComics(GenericTumblrV1):
4086
    """Class to retrieve Pond Scum Comics."""
4087
    name = 'pond'
4088
    long_name = 'Pond Scum'
4089
    url = 'http://pondscumcomic.tumblr.com'
4090
4091
4092
class MercworksTumblr(GenericTumblrV1):
4093
    """Class to retrieve Mercworks comics."""
4094
    # Also on http://mercworks.net
4095
    # Also on http://www.webtoons.com/en/comedy/mercworks/list?title_no=426
4096
    # Also on https://tapastic.com/series/MercWorks
4097
    name = 'mercworks-tumblr'
4098
    long_name = 'Mercworks (from Tumblr)'
4099
    url = 'http://mercworks.tumblr.com'
4100
    _categories = ('MERCWORKS', )
4101
4102
4103
class OwlTurdTumblr(GenericTumblrV1):
4104
    """Class to retrieve Owl Turd / Shen comix."""
4105
    # Also on https://tapas.io/series/Shen-Comix
4106
    name = 'owlturd-tumblr'
4107
    long_name = 'Owl Turd / Shen Comix (from Tumblr)'
4108
    url = 'http://shencomix.com'
4109
    _categories = ('OWLTURD', 'SHENCOMIX')
4110
4111
4112
class VectorBelly(GenericTumblrV1):
4113
    """Class to retrieve Vector Belly comics."""
4114
    # Also on http://vectorbelly.com
4115
    name = 'vector'
4116
    long_name = 'Vector Belly'
4117
    url = 'http://vectorbelly.tumblr.com'
4118
4119
4120
class GoneIntoRapture(GenericTumblrV1):
4121
    """Class to retrieve Gone Into Rapture comics."""
4122
    # Also on http://goneintorapture.tumblr.com
4123
    # Also on http://tapastic.com/series/Goneintorapture
4124
    name = 'rapture'
4125
    long_name = 'Gone Into Rapture'
4126
    url = 'http://goneintorapture.com'
4127
4128
4129
class TheOatmealTumblr(GenericTumblrV1):
4130
    """Class to retrieve The Oatmeal comics."""
4131
    # Also on http://theoatmeal.com
4132
    name = 'oatmeal-tumblr'
4133
    long_name = 'The Oatmeal (from Tumblr)'
4134
    url = 'http://oatmeal.tumblr.com'
4135
4136
4137
class HeckIfIKnowComicsTumblr(GenericTumblrV1):
4138
    """Class to retrieve Heck If I Know Comics."""
4139
    # Also on http://tapastic.com/series/Regular
4140
    name = 'heck-tumblr'
4141
    long_name = 'Heck if I Know comics (from Tumblr)'
4142
    url = 'http://heckifiknowcomics.com'
4143
4144
4145
class MyJetPack(GenericTumblrV1):
4146
    """Class to retrieve My Jet Pack comics."""
4147
    name = 'jetpack'
4148
    long_name = 'My Jet Pack'
4149
    url = 'http://myjetpack.tumblr.com'
4150
4151
4152
class CheerUpEmoKidTumblr(GenericTumblrV1):
4153
    """Class to retrieve CheerUpEmoKid comics."""
4154
    # Also on http://www.cheerupemokid.com
4155
    # Also on http://tapastic.com/series/CUEK
4156
    name = 'cuek-tumblr'
4157
    long_name = 'Cheer Up Emo Kid (from Tumblr)'
4158
    url = 'https://enzocomics.tumblr.com'
4159
4160
4161
class ForLackOfABetterComic(GenericTumblrV1):
4162
    """Class to retrieve For Lack Of A Better Comics."""
4163
    # Also on http://forlackofabettercomic.com
4164
    name = 'lack'
4165
    long_name = 'For Lack Of A Better Comic'
4166
    url = 'http://forlackofabettercomic.tumblr.com'
4167
4168
4169
class ZenPencilsTumblr(GenericTumblrV1):
4170
    """Class to retrieve ZenPencils comics."""
4171
    # Also on http://zenpencils.com
4172
    # Also on http://www.gocomics.com/zen-pencils
4173
    name = 'zenpencils-tumblr'
4174
    long_name = 'Zen Pencils (from Tumblr)'
4175
    url = 'http://zenpencils.tumblr.com'
4176
    _categories = ('ZENPENCILS', )
4177
4178
4179
class ThreeWordPhraseTumblr(GenericTumblrV1):
4180
    """Class to retrieve Three Word Phrase comics."""
4181
    # Also on http://threewordphrase.com
4182
    name = 'threeword-tumblr'
4183
    long_name = 'Three Word Phrase (from Tumblr)'
4184
    url = 'http://threewordphrase.tumblr.com'
4185
4186
4187
class TimeTrabbleTumblr(GenericTumblrV1):
4188
    """Class to retrieve Time Trabble comics."""
4189
    # Also on http://timetrabble.com
4190
    name = 'timetrabble-tumblr'
4191
    long_name = 'Time Trabble (from Tumblr)'
4192
    url = 'http://timetrabble.tumblr.com'
4193
4194
4195
class SafelyEndangeredTumblr(GenericTumblrV1):
4196
    """Class to retrieve Safely Endangered comics."""
4197
    # Also on http://www.safelyendangered.com
4198
    name = 'endangered-tumblr'
4199
    long_name = 'Safely Endangered (from Tumblr)'
4200
    url = 'http://tumblr.safelyendangered.com'
4201
4202
4203
class MouseBearComedyTumblr(GenericTumblrV1):
4204
    """Class to retrieve Mouse Bear Comedy comics."""
4205
    # Also on http://www.mousebearcomedy.com
4206
    name = 'mousebear-tumblr'
4207
    long_name = 'Mouse Bear Comedy (from Tumblr)'
4208
    url = 'http://mousebearcomedy.tumblr.com'
4209
4210
4211
class BouletCorpTumblr(GenericTumblrV1):
4212
    """Class to retrieve BouletCorp comics."""
4213
    # Also on http://www.bouletcorp.com
4214
    name = 'boulet-tumblr'
4215
    long_name = 'Boulet Corp (from Tumblr)'
4216
    url = 'https://bouletcorp.tumblr.com'
4217
    _categories = ('BOULET', )
4218
4219
4220
class TheAwkwardYetiTumblr(GenericTumblrV1):
4221
    """Class to retrieve The Awkward Yeti comics."""
4222
    # Also on http://www.gocomics.com/the-awkward-yeti
4223
    # Also on http://theawkwardyeti.com
4224
    # Also on https://tapastic.com/series/TheAwkwardYeti
4225
    name = 'yeti-tumblr'
4226
    long_name = 'The Awkward Yeti (from Tumblr)'
4227
    url = 'http://larstheyeti.tumblr.com'
4228
    _categories = ('YETI', )
4229
4230
4231
class NellucNhoj(GenericTumblrV1):
4232
    """Class to retrieve NellucNhoj comics."""
4233
    name = 'nhoj'
4234
    long_name = 'Nelluc Nhoj'
4235
    url = 'http://nellucnhoj.com'
4236
4237
4238
class DownTheUpwardSpiralTumblr(GenericTumblrV1):
4239
    """Class to retrieve Down The Upward Spiral comics."""
4240
    # Also on http://www.downtheupwardspiral.com
4241
    # Also on https://tapastic.com/series/Down-the-Upward-Spiral
4242
    name = 'spiral-tumblr'
4243
    long_name = 'Down the Upward Spiral (from Tumblr)'
4244
    url = 'http://downtheupwardspiral.tumblr.com'
4245
4246
4247
class AsPerUsualTumblr(GenericTumblrV1):
4248
    """Class to retrieve As Per Usual comics."""
4249
    # Also on https://tapastic.com/series/AsPerUsual
4250
    name = 'usual-tumblr'
4251
    long_name = 'As Per Usual (from Tumblr)'
4252
    url = 'http://as-per-usual.tumblr.com'
4253
    categories = ('DAMILEE', )
4254
4255
4256
class HotComicsForCoolPeopleTumblr(GenericTumblrV1):
4257
    """Class to retrieve Hot Comics For Cool People."""
4258
    # Also on https://tapastic.com/series/Hot-Comics-For-Cool-People
4259
    # Also on http://hotcomics.biz (links to tumblr)
4260
    # Also on http://hcfcp.com (links to tumblr)
4261
    name = 'hotcomics-tumblr'
4262
    long_name = 'Hot Comics For Cool People (from Tumblr)'
4263
    url = 'http://hotcomicsforcoolpeople.tumblr.com'
4264
    categories = ('DAMILEE', )
4265
4266
4267
class OneOneOneOneComicTumblr(GenericTumblrV1):
4268
    """Class to retrieve 1111 Comics."""
4269
    # Also on http://www.1111comics.me
4270
    # Also on https://tapastic.com/series/1111-Comics
4271
    name = '1111-tumblr'
4272
    long_name = '1111 Comics (from Tumblr)'
4273
    url = 'http://comics1111.tumblr.com'
4274
    _categories = ('ONEONEONEONE', )
4275
4276
4277
class JhallComicsTumblr(GenericTumblrV1):
4278
    """Class to retrieve Jhall Comics."""
4279
    # Also on http://jhallcomics.com
4280
    name = 'jhall-tumblr'
4281
    long_name = 'Jhall Comics (from Tumblr)'
4282
    url = 'http://jhallcomics.tumblr.com'
4283
4284
4285
class BerkeleyMewsTumblr(GenericTumblrV1):
4286
    """Class to retrieve Berkeley Mews comics."""
4287
    # Also on http://www.gocomics.com/berkeley-mews
4288
    # Also on http://www.berkeleymews.com
4289
    name = 'berkeley-tumblr'
4290
    long_name = 'Berkeley Mews (from Tumblr)'
4291
    url = 'http://mews.tumblr.com'
4292
    _categories = ('BERKELEY', )
4293
4294
4295
class JoanCornellaTumblr(GenericTumblrV1):
4296
    """Class to retrieve Joan Cornella comics."""
4297
    # Also on http://joancornella.net
4298
    name = 'cornella-tumblr'
4299
    long_name = 'Joan Cornella (from Tumblr)'
4300
    url = 'http://cornellajoan.tumblr.com'
4301
4302
4303
class RespawnComicTumblr(GenericTumblrV1):
4304
    """Class to retrieve Respawn Comic."""
4305
    # Also on http://respawncomic.com
4306
    name = 'respawn-tumblr'
4307
    long_name = 'Respawn Comic (from Tumblr)'
4308
    url = 'https://respawncomic.tumblr.com'
4309
4310
4311
class ChrisHallbeckTumblr(GenericTumblrV1):
4312
    """Class to retrieve Chris Hallbeck comics."""
4313
    # Also on https://tapastic.com/ChrisHallbeck
4314
    # Also on http://maximumble.com
4315
    # Also on http://minimumble.com
4316
    # Also on http://thebookofbiff.com
4317
    name = 'hallbeck-tumblr'
4318
    long_name = 'Chris Hallback (from Tumblr)'
4319
    url = 'https://chrishallbeck.tumblr.com'
4320
    _categories = ('HALLBACK', )
4321
4322
4323
class ComicNuggets(GenericTumblrV1):
4324
    """Class to retrieve Comic Nuggets."""
4325
    name = 'nuggets'
4326
    long_name = 'Comic Nuggets'
4327
    url = 'http://comicnuggets.com'
4328
4329
4330
class PigeonGazetteTumblr(GenericTumblrV1):
4331
    """Class to retrieve The Pigeon Gazette comics."""
4332
    # Also on https://tapastic.com/series/The-Pigeon-Gazette
4333
    name = 'pigeon-tumblr'
4334
    long_name = 'The Pigeon Gazette (from Tumblr)'
4335
    url = 'http://thepigeongazette.tumblr.com'
4336
4337
4338
class CancerOwl(GenericTumblrV1):
4339
    """Class to retrieve Cancer Owl comics."""
4340
    # Also on http://cancerowl.com
4341
    name = 'cancerowl-tumblr'
4342
    long_name = 'Cancer Owl (from Tumblr)'
4343
    url = 'http://cancerowl.tumblr.com'
4344
4345
4346
class FowlLanguageTumblr(GenericTumblrV1):
4347
    """Class to retrieve Fowl Language comics."""
4348
    # Also on http://www.fowllanguagecomics.com
4349
    # Also on http://tapastic.com/series/Fowl-Language-Comics
4350
    # Also on http://www.gocomics.com/fowl-language
4351
    name = 'fowllanguage-tumblr'
4352
    long_name = 'Fowl Language Comics (from Tumblr)'
4353
    url = 'http://fowllanguagecomics.tumblr.com'
4354
    _categories = ('FOWLLANGUAGE', )
4355
4356
4357
class TheOdd1sOutTumblr(GenericTumblrV1):
4358
    """Class to retrieve The Odd 1s Out comics."""
4359
    # Also on http://theodd1sout.com
4360
    # Also on https://tapastic.com/series/Theodd1sout
4361
    name = 'theodd-tumblr'
4362
    long_name = 'The Odd 1s Out (from Tumblr)'
4363
    url = 'http://theodd1sout.tumblr.com'
4364
4365
4366
class TheUnderfoldTumblr(GenericTumblrV1):
4367
    """Class to retrieve The Underfold comics."""
4368
    # Also on http://theunderfold.com
4369
    name = 'underfold-tumblr'
4370
    long_name = 'The Underfold (from Tumblr)'
4371
    url = 'http://theunderfold.tumblr.com'
4372
4373
4374
class LolNeinTumblr(GenericTumblrV1):
4375
    """Class to retrieve Lol Nein comics."""
4376
    # Also on http://lolnein.com
4377
    name = 'lolnein-tumblr'
4378
    long_name = 'Lol Nein (from Tumblr)'
4379
    url = 'http://lolneincom.tumblr.com'
4380
4381
4382
class FatAwesomeComicsTumblr(GenericTumblrV1):
4383
    """Class to retrieve Fat Awesome Comics."""
4384
    # Also on http://fatawesome.com/comics
4385
    name = 'fatawesome-tumblr'
4386
    long_name = 'Fat Awesome (from Tumblr)'
4387
    url = 'http://fatawesomecomedy.tumblr.com'
4388
4389
4390
class TheWorldIsFlatTumblr(GenericTumblrV1):
4391
    """Class to retrieve The World Is Flat Comics."""
4392
    # Also on https://tapastic.com/series/The-World-is-Flat
4393
    name = 'flatworld-tumblr'
4394
    long_name = 'The World Is Flat (from Tumblr)'
4395
    url = 'http://theworldisflatcomics.com'
4396
4397
4398
class DorrisMc(GenericTumblrV1):
4399
    """Class to retrieve Dorris Mc Comics"""
4400
    # Also on http://www.gocomics.com/dorris-mccomics
4401
    name = 'dorrismc'
4402
    long_name = 'Dorris Mc'
4403
    url = 'http://dorrismccomics.com'
4404
4405
4406
class LeleozTumblr(GenericDeletedComic, GenericTumblrV1):
4407
    """Class to retrieve Leleoz comics."""
4408
    # Also on https://tapastic.com/series/Leleoz
4409
    name = 'leleoz-tumblr'
4410
    long_name = 'Leleoz (from Tumblr)'
4411
    url = 'http://leleozcomics.tumblr.com'
4412
4413
4414
class MoonBeardTumblr(GenericTumblrV1):
4415
    """Class to retrieve MoonBeard comics."""
4416
    # Also on http://moonbeard.com
4417
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
4418
    name = 'moonbeard-tumblr'
4419
    long_name = 'Moon Beard (from Tumblr)'
4420
    url = 'http://squireseses.tumblr.com'
4421
    _categories = ('MOONBEARD', )
4422
4423
4424
class AComik(GenericTumblrV1):
4425
    """Class to retrieve A Comik"""
4426
    name = 'comik'
4427
    long_name = 'A Comik'
4428
    url = 'http://acomik.com'
4429
4430
4431
class ClassicRandy(GenericTumblrV1):
4432
    """Class to retrieve Classic Randy comics."""
4433
    name = 'randy'
4434
    long_name = 'Classic Randy'
4435
    url = 'http://classicrandy.tumblr.com'
4436
4437
4438
class DagssonTumblr(GenericTumblrV1):
4439
    """Class to retrieve Dagsson comics."""
4440
    # Also on http://www.dagsson.com
4441
    name = 'dagsson-tumblr'
4442
    long_name = 'Dagsson Hugleikur (from Tumblr)'
4443
    url = 'https://hugleikurdagsson.tumblr.com'
4444
4445
4446
class LinsEditionsTumblr(GenericTumblrV1):
4447
    """Class to retrieve L.I.N.S. Editions comics."""
4448
    # Also on https://linsedition.com
4449
    # Now on http://warandpeas.tumblr.com
4450
    name = 'lins-tumblr'
4451
    long_name = 'L.I.N.S. Editions (from Tumblr)'
4452
    url = 'https://linscomics.tumblr.com'
4453
    _categories = ('WARANDPEAS', 'LINS')
4454
4455
4456
class WarAndPeasTumblr(GenericTumblrV1):
4457
    """Class to retrieve War And Peas comics."""
4458
    # Was on https://linscomics.tumblr.com
4459
    name = 'warandpeas-tumblr'
4460
    long_name = 'War And Peas (from Tumblr)'
4461
    url = 'http://warandpeas.tumblr.com'
4462
    _categories = ('WARANDPEAS', 'LINS')
4463
4464
4465
class OrigamiHotDish(GenericTumblrV1):
4466
    """Class to retrieve Origami Hot Dish comics."""
4467
    name = 'origamihotdish'
4468
    long_name = 'Origami Hot Dish'
4469
    url = 'http://origamihotdish.com'
4470
4471
4472
class HitAndMissComicsTumblr(GenericTumblrV1):
4473
    """Class to retrieve Hit and Miss Comics."""
4474
    name = 'hitandmiss'
4475
    long_name = 'Hit and Miss Comics'
4476
    url = 'https://hitandmisscomics.tumblr.com'
4477
4478
4479
class HMBlanc(GenericTumblrV1):
4480
    """Class to retrieve HM Blanc comics."""
4481
    name = 'hmblanc'
4482
    long_name = 'HM Blanc'
4483
    url = 'http://hmblanc.tumblr.com'
4484
4485
4486
class TalesOfAbsurdityTumblr(GenericTumblrV1):
4487
    """Class to retrieve Tales Of Absurdity comics."""
4488
    # Also on http://talesofabsurdity.com
4489
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
4490
    name = 'absurdity-tumblr'
4491
    long_name = 'Tales of Absurdity (from Tumblr)'
4492
    url = 'http://talesofabsurdity.tumblr.com'
4493
    _categories = ('ABSURDITY', )
4494
4495
4496
class RobbieAndBobby(GenericTumblrV1):
4497
    """Class to retrieve Robbie And Bobby comics."""
4498
    # Also on http://robbieandbobby.com
4499
    name = 'robbie-tumblr'
4500
    long_name = 'Robbie And Bobby (from Tumblr)'
4501
    url = 'http://robbieandbobby.tumblr.com'
4502
4503
4504
class ElectricBunnyComicTumblr(GenericTumblrV1):
4505
    """Class to retrieve Electric Bunny Comics."""
4506
    # Also on http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell
4507
    name = 'bunny-tumblr'
4508
    long_name = 'Electric Bunny Comic (from Tumblr)'
4509
    url = 'http://electricbunnycomics.tumblr.com'
4510
4511
4512
class Hoomph(GenericTumblrV1):
4513
    """Class to retrieve Hoomph comics."""
4514
    name = 'hoomph'
4515
    long_name = 'Hoomph'
4516
    url = 'http://hoom.ph'
4517
4518
4519
class BFGFSTumblr(GenericTumblrV1):
4520
    """Class to retrieve BFGFS comics."""
4521
    # Also on https://tapastic.com/series/BFGFS
4522
    # Also on http://bfgfs.com
4523
    name = 'bfgfs-tumblr'
4524
    long_name = 'BFGFS (from Tumblr)'
4525
    url = 'https://bfgfs.tumblr.com'
4526
4527
4528
class DoodleForFood(GenericTumblrV1):
4529
    """Class to retrieve Doodle For Food comics."""
4530
    # Also on https://tapastic.com/series/Doodle-for-Food
4531
    name = 'doodle'
4532
    long_name = 'Doodle For Food'
4533
    url = 'http://www.doodleforfood.com'
4534
4535
4536
class CassandraCalinTumblr(GenericTumblrV1):
4537
    """Class to retrieve C. Cassandra comics."""
4538
    # Also on http://cassandracalin.com
4539
    # Also on https://tapastic.com/series/C-Cassandra-comics
4540
    name = 'cassandra-tumblr'
4541
    long_name = 'Cassandra Calin (from Tumblr)'
4542
    url = 'http://c-cassandra.tumblr.com'
4543
4544
4545
class DougWasTaken(GenericTumblrV1):
4546
    """Class to retrieve Doug Was Taken comics."""
4547
    name = 'doug'
4548
    long_name = 'Doug Was Taken'
4549
    url = 'https://dougwastaken.tumblr.com'
4550
4551
4552
class MandatoryRollerCoaster(GenericTumblrV1):
4553
    """Class to retrieve Mandatory Roller Coaster comics."""
4554
    name = 'rollercoaster'
4555
    long_name = 'Mandatory Roller Coaster'
4556
    url = 'http://mandatoryrollercoaster.com'
4557
4558
4559
class CEstPasEnRegardantSesPompes(GenericTumblrV1):
4560
    """Class to retrieve C'Est Pas En Regardant Ses Pompes (...)  comics."""
4561
    name = 'cperspqccltt'
4562
    long_name = 'C Est Pas En Regardant Ses Pompes (...)'
4563
    url = 'http://marcoandco.tumblr.com'
4564
4565
4566
class TheGrohlTroll(GenericTumblrV1):
4567
    """Class to retrieve The Grohl Troll comics."""
4568
    name = 'grohltroll'
4569
    long_name = 'The Grohl Troll'
4570
    url = 'http://thegrohltroll.com'
4571
4572
4573
class WebcomicName(GenericTumblrV1):
4574
    """Class to retrieve Webcomic Name comics."""
4575
    name = 'webcomicname'
4576
    long_name = 'Webcomic Name'
4577
    url = 'http://webcomicname.com'
4578
4579
4580
class BooksOfAdam(GenericTumblrV1):
4581
    """Class to retrieve Books of Adam comics."""
4582
    # Also on http://www.booksofadam.com
4583
    name = 'booksofadam'
4584
    long_name = 'Books of Adam'
4585
    url = 'http://booksofadam.tumblr.com'
4586
4587
4588
class HarkAVagrant(GenericTumblrV1):
4589
    """Class to retrieve Hark A Vagrant comics."""
4590
    # Also on http://www.harkavagrant.com
4591
    name = 'hark-tumblr'
4592
    long_name = 'Hark A Vagrant (from Tumblr)'
4593
    url = 'http://beatonna.tumblr.com'
4594
4595
4596
class OurSuperAdventureTumblr(GenericTumblrV1):
4597
    """Class to retrieve Our Super Adventure comics."""
4598
    # Also on https://tapastic.com/series/Our-Super-Adventure
4599
    # Also on http://www.oursuperadventure.com
4600
    # http://sarahgraley.com
4601
    name = 'superadventure-tumblr'
4602
    long_name = 'Our Super Adventure (from Tumblr)'
4603
    url = 'http://sarahssketchbook.tumblr.com'
4604
4605
4606
class JakeLikesOnions(GenericTumblrV1):
4607
    """Class to retrieve Jake Likes Onions comics."""
4608
    name = 'jake'
4609
    long_name = 'Jake Likes Onions'
4610
    url = 'http://jakelikesonions.com'
4611
4612
4613
class InYourFaceCakeTumblr(GenericTumblrV1):
4614
    """Class to retrieve In Your Face Cake comics."""
4615
    # Also on https://tapas.io/series/In-Your-Face-Cake
4616
    name = 'inyourfacecake-tumblr'
4617
    long_name = 'In Your Face Cake (from Tumblr)'
4618
    url = 'https://in-your-face-cake.tumblr.com'
4619
    _categories = ('INYOURFACECAKE', )
4620
4621
4622
class Robospunk(GenericTumblrV1):
4623
    """Class to retrieve Robospunk comics."""
4624
    name = 'robospunk'
4625
    long_name = 'Robospunk'
4626
    url = 'http://robospunk.com'
4627
4628
4629
class BananaTwinky(GenericTumblrV1):
4630
    """Class to retrieve Banana Twinky comics."""
4631
    name = 'banana'
4632
    long_name = 'Banana Twinky'
4633
    url = 'https://bananatwinky.tumblr.com'
4634
4635
4636
class YesterdaysPopcornTumblr(GenericTumblrV1):
4637
    """Class to retrieve Yesterday's Popcorn comics."""
4638
    # Also on http://www.yesterdayspopcorn.com
4639
    # Also on https://tapastic.com/series/Yesterdays-Popcorn
4640
    name = 'popcorn-tumblr'
4641
    long_name = 'Yesterday\'s Popcorn (from Tumblr)'
4642
    url = 'http://yesterdayspopcorn.tumblr.com'
4643
4644
4645
class TwistedDoodles(GenericTumblrV1):
4646
    """Class to retrieve Twisted Doodles comics."""
4647
    name = 'twisted'
4648
    long_name = 'Twisted Doodles'
4649
    url = 'http://www.twisteddoodles.com'
4650
4651
4652
class UbertoolTumblr(GenericTumblrV1):
4653
    """Class to retrieve Ubertool comics."""
4654
    # Also on http://ubertoolcomic.com
4655
    # Also on https://tapastic.com/series/ubertool
4656
    name = 'ubertool-tumblr'
4657
    long_name = 'Ubertool (from Tumblr)'
4658
    url = 'https://ubertool.tumblr.com'
4659
    _categories = ('UBERTOOL', )
4660
4661
4662
class LittleLifeLinesTumblr(GenericDeletedComic, GenericTumblrV1):
4663
    """Class to retrieve Little Life Lines comics."""
4664
    # Also on http://www.littlelifelines.com
4665
    name = 'life-tumblr'
4666
    long_name = 'Little Life Lines (from Tumblr)'
4667
    url = 'https://little-life-lines.tumblr.com'
4668
4669
4670
class TheyCanTalk(GenericTumblrV1):
4671
    """Class to retrieve They Can Talk comics."""
4672
    name = 'theycantalk'
4673
    long_name = 'They Can Talk'
4674
    url = 'http://theycantalk.com'
4675
4676
4677
class Will5NeverCome(GenericTumblrV1):
4678
    """Class to retrieve Will 5:00 Never Come comics."""
4679
    name = 'will5'
4680
    long_name = 'Will 5:00 Never Come ?'
4681
    url = 'http://will5nevercome.com'
4682
4683
4684
class Sephko(GenericTumblrV1):
4685
    """Class to retrieve Sephko Comics."""
4686
    # Also on http://www.sephko.com
4687
    name = 'sephko'
4688
    long_name = 'Sephko'
4689
    url = 'https://sephko.tumblr.com'
4690
4691
4692
class BlazersAtDawn(GenericTumblrV1):
4693
    """Class to retrieve Blazers At Dawn Comics."""
4694
    name = 'blazers'
4695
    long_name = 'Blazers At Dawn'
4696
    url = 'http://blazersatdawn.tumblr.com'
4697
4698
4699
class ArtByMoga(GenericEmptyComic, GenericTumblrV1):  # Deactivated because it downloads too many things
4700
    """Class to retrieve Art By Moga Comics."""
4701
    name = 'moga'
4702
    long_name = 'Art By Moga'
4703
    url = 'http://artbymoga.tumblr.com'
4704
4705
4706
class VerbalVomitTumblr(GenericTumblrV1):
4707
    """Class to retrieve Verbal Vomit comics."""
4708
    # Also on http://www.verbal-vomit.com
4709
    name = 'vomit-tumblr'
4710
    long_name = 'Verbal Vomit (from Tumblr)'
4711
    url = 'http://verbalvomits.tumblr.com'
4712
4713
4714
class LibraryComic(GenericTumblrV1):
4715
    """Class to retrieve LibraryComic."""
4716
    # Also on http://librarycomic.com
4717
    name = 'library-tumblr'
4718
    long_name = 'LibraryComic (from Tumblr)'
4719
    url = 'https://librarycomic.tumblr.com'
4720
4721
4722
class TizzyStitchBirdTumblr(GenericTumblrV1):
4723
    """Class to retrieve Tizzy Stitch Bird comics."""
4724
    # Also on http://tizzystitchbird.com
4725
    # Also on https://tapastic.com/series/TizzyStitchbird
4726
    # Also on http://www.webtoons.com/en/challenge/tizzy-stitchbird/list?title_no=50082
4727
    name = 'tizzy-tumblr'
4728
    long_name = 'Tizzy Stitch Bird (from Tumblr)'
4729
    url = 'http://tizzystitchbird.tumblr.com'
4730
4731
4732
class VictimsOfCircumsolarTumblr(GenericTumblrV1):
4733
    """Class to retrieve VictimsOfCircumsolar comics."""
4734
    # Also on http://www.victimsofcircumsolar.com
4735
    name = 'circumsolar-tumblr'
4736
    long_name = 'Victims Of Circumsolar (from Tumblr)'
4737
    url = 'https://victimsofcomics.tumblr.com'
4738
4739
4740
class RockPaperCynicTumblr(GenericTumblrV1):
4741
    """Class to retrieve RockPaperCynic comics."""
4742
    # Also on http://www.rockpapercynic.com
4743
    # Also on https://tapastic.com/series/rockpapercynic
4744
    name = 'rpc-tumblr'
4745
    long_name = 'Rock Paper Cynic (from Tumblr)'
4746
    url = 'http://rockpapercynic.tumblr.com'
4747
4748
4749
class DeadlyPanelTumblr(GenericTumblrV1):
4750
    """Class to retrieve Deadly Panel comics."""
4751
    # Also on http://www.deadlypanel.com
4752
    # Also on https://tapastic.com/series/deadlypanel
4753
    name = 'deadly-tumblr'
4754
    long_name = 'Deadly Panel (from Tumblr)'
4755
    url = 'https://deadlypanel.tumblr.com'
4756
4757
4758
class CatanaComics(GenericComicNotWorking):  # Not a Tumblr anymore ?
4759
    """Class to retrieve Catana comics."""
4760
    name = 'catana'
4761
    long_name = 'Catana'
4762
    url = 'http://www.catanacomics.com'
4763
4764
4765
class AngryAtNothingTumblr(GenericTumblrV1):
4766
    """Class to retrieve Angry at Nothing comics."""
4767
    # Also on http://www.angryatnothing.net
4768
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
4769
    name = 'angry-tumblr'
4770
    long_name = 'Angry At Nothing (from Tumblr)'
4771
    url = 'http://angryatnothing.tumblr.com'
4772
4773
4774
class ShanghaiTango(GenericTumblrV1):
4775
    """Class to retrieve Shanghai Tango comic."""
4776
    name = 'tango'
4777
    long_name = 'Shanghai Tango'
4778
    url = 'http://tango2010weibo.tumblr.com'
4779
4780
4781
class OffTheLeashDogTumblr(GenericTumblrV1):
4782
    """Class to retrieve Off The Leash Dog comics."""
4783
    # Also on http://offtheleashdogcartoons.com
4784
    # Also on http://www.rupertfawcettcartoons.com
4785
    name = 'offtheleash-tumblr'
4786
    long_name = 'Off The Leash Dog (from Tumblr)'
4787
    url = 'http://rupertfawcettsdoggyblog.tumblr.com'
4788
    _categories = ('FAWCETT', )
4789
4790
4791
class ImogenQuestTumblr(GenericTumblrV1):
4792
    """Class to retrieve Imogen Quest comics."""
4793
    # Also on http://imogenquest.net
4794
    name = 'imogen-tumblr'
4795
    long_name = 'Imogen Quest (from Tumblr)'
4796
    url = 'http://imoquest.tumblr.com'
4797
4798
4799
class Shitfest(GenericTumblrV1):
4800
    """Class to retrieve Shitfest comics."""
4801
    name = 'shitfest'
4802
    long_name = 'Shitfest'
4803
    url = 'http://shitfestcomic.com'
4804
4805
4806
class IceCreamSandwichComics(GenericTumblrV1):
4807
    """Class to retrieve Ice Cream Sandwich Comics."""
4808
    name = 'icecream'
4809
    long_name = 'Ice Cream Sandwich Comics'
4810
    url = 'http://icecreamsandwichcomics.com'
4811
4812
4813
class Dustinteractive(GenericTumblrV1):
4814
    """Class to retrieve Dustinteractive comics."""
4815
    name = 'dustinteractive'
4816
    long_name = 'Dustinteractive'
4817
    url = 'http://dustinteractive.com'
4818
4819
4820
class StickyCinemaFloor(GenericTumblrV1):
4821
    """Class to retrieve Sticky Cinema Floor comics."""
4822
    name = 'stickycinema'
4823
    long_name = 'Sticky Cinema Floor'
4824
    url = 'https://stickycinemafloor.tumblr.com'
4825
4826
4827
class IncidentalComicsTumblr(GenericTumblrV1):
4828
    """Class to retrieve Incidental Comics."""
4829
    # Also on http://www.incidentalcomics.com
4830
    name = 'incidental-tumblr'
4831
    long_name = 'Incidental Comics (from Tumblr)'
4832
    url = 'http://incidentalcomics.tumblr.com'
4833
4834
4835
class APleasantWasteOfTimeTumblr(GenericTumblrV1):
4836
    """Class to retrieve A Pleasant Waste Of Time comics."""
4837
    # Also on https://tapas.io/series/A-Pleasant-
4838
    name = 'pleasant-waste-tumblr'
4839
    long_name = 'A Pleasant Waste Of Time (from Tumblr)'
4840
    url = 'https://artjcf.tumblr.com'
4841
    _categories = ('WASTE', )
4842
4843
4844
class HorovitzComicsTumblr(GenericTumblrV1):
4845
    """Class to retrieve Horovitz new comics."""
4846
    # Also on http://www.horovitzcomics.com
4847
    name = 'horovitz-tumblr'
4848
    long_name = 'Horovitz (from Tumblr)'
4849
    url = 'https://horovitzcomics.tumblr.com'
4850
    _categories = ('HOROVITZ', )
4851
4852
4853
class DeepDarkFearsTumblr(GenericTumblrV1):
4854
    """Class to retrieve DeepvDarkvFears comics."""
4855
    name = 'deep-dark-fears-tumblr'
4856
    long_name = 'Deep Dark Fears (from Tumblr)'
4857
    url = 'http://deep-dark-fears.tumblr.com'
4858
4859
4860
class DakotaMcDadzean(GenericTumblrV1):
4861
    """Class to retrieve Dakota McDadzean comics."""
4862
    name = 'dakota'
4863
    long_name = 'Dakota McDadzean'
4864
    url = 'http://dakotamcfadzean.tumblr.com'
4865
4866
4867
class ExtraFabulousComicsTumblr(GenericTumblrV1):
4868
    """Class to retrieve Extra Fabulous Comics."""
4869
    # Also on http://extrafabulouscomics.com
4870
    name = 'efc-tumblr'
4871
    long_name = 'Extra Fabulous Comics (from Tumblr)'
4872
    url = 'https://extrafabulouscomics.tumblr.com'
4873
    _categories = ('EFC', )
4874
4875
4876
class AlexLevesque(GenericTumblrV1):
4877
    """Class to retrieve AlexLevesque comics."""
4878
    name = 'alevesque'
4879
    long_name = 'Alex Levesque'
4880
    url = 'http://alexlevesque.com'
4881
    _categories = ('FRANCAIS', )
4882
4883
4884
class JamesOfNoTradesTumblr(GenericTumblrV1):
4885
    """Class to retrieve JamesOfNoTrades comics."""
4886
    # Also on http://jamesofnotrades.com
4887
    # Also on http://www.webtoons.com/en/challenge/james-of-no-trades/list?title_no=43422
4888
    # Also on https://tapas.io/series/James-of-No-Trades
4889
    name = 'jamesofnotrades-tumblr'
4890
    long_name = 'James Of No Trades (from Tumblr)'
4891
    url = 'http://jamesfregan.tumblr.com'
4892
    _categories = ('JAMESOFNOTRADES', )
4893
4894
4895
class InfiniteGuff(GenericTumblrV1):
4896
    """Class to retrieve Infinite Guff comics."""
4897
    name = 'infiniteguff'
4898
    long_name = 'Infinite Guff'
4899
    url = 'http://infiniteguff.com'
4900
4901
4902
class SkeletonClaw(GenericTumblrV1):
4903
    """Class to retrieve Skeleton Claw comics."""
4904
    name = 'skeletonclaw'
4905
    long_name = 'Skeleton Claw'
4906
    url = 'http://skeletonclaw.com'
4907
4908
4909
class MrsFrolleinTumblr(GenericTumblrV1):
4910
    """Class to retrieve Mrs Frollein comics."""
4911
    # Also on http://www.webtoons.com/en/challenge/mrsfrollein/list?title_no=51710
4912
    name = 'frollein'
4913
    long_name = 'Mrs Frollein (from Tumblr)'
4914
    url = 'https://mrsfrollein.tumblr.com'
4915
4916
4917
class GoodBearComicsTumblr(GenericTumblrV1):
4918
    """Class to retrieve GoodBearComics."""
4919
    # Also on https://goodbearcomics.com
4920
    name = 'goodbear-tumblr'
4921
    long_name = 'Good Bear Comics (from Tumblr)'
4922
    url = 'https://goodbearcomics.tumblr.com'
4923
4924
4925
class BrooklynCartoonsTumblr(GenericTumblrV1):
4926
    """Class to retrieve Brooklyn Cartoons."""
4927
    # Also on https://www.brooklyncartoons.com
4928
    # Also on https://www.instagram.com/brooklyncartoons
4929
    name = 'brooklyn-tumblr'
4930
    long_name = 'Brooklyn Cartoons (from Tumblr)'
4931
    url = 'http://brooklyncartoons.tumblr.com'
4932
4933
4934
class GemmaCorrellTumblr(GenericTumblrV1):
4935
    # Also on http://www.gemmacorrell.com/portfolio/comics/
4936
    name = 'gemma-tumblr'
4937
    long_name = 'Gemma Correll (from Tumblr)'
4938
    url = 'http://gemmacorrell.tumblr.com'
4939
4940
4941
class RobotatertotTumblr(GenericTumblrV1):
4942
    """Class to retrieve Robotatertot comics."""
4943
    # Also on https://www.instagram.com/robotatertotcomics
4944
    name = 'robotatertot-tumblr'
4945
    long_name = 'Robotatertot (from Tumblr)'
4946
    url = 'https://robotatertot.tumblr.com'
4947
4948
4949
class HuffyPenguin(GenericTumblrV1):
4950
    """Class to retrieve Huffy Penguin comics."""
4951
    name = 'huffypenguin'
4952
    long_name = 'Huffy Penguin'
4953
    url = 'http://huffy-penguin.tumblr.com'
4954
4955
4956
class CowardlyComicsTumblr(GenericTumblrV1):
4957
    """Class to retrieve Cowardly Comics."""
4958
    # Also on https://tapas.io/series/CowardlyComics
4959
    # Also on http://www.webtoons.com/en/challenge/cowardly-comics/list?title_no=65893
4960
    name = 'cowardly-tumblr'
4961
    long_name = 'Cowardly Comics (from Tumblr)'
4962
    url = 'http://cowardlycomics.tumblr.com'
4963
4964
4965
class Caw4hwTumblr(GenericTumblrV1):
4966
    """Class to retrieve Caw4hw comics."""
4967
    # Also on https://tapas.io/series/CAW4HW
4968
    name = 'caw4hw-tumblr'
4969
    long_name = 'Caw4hw (from Tumblr)'
4970
    url = 'https://caw4hw.tumblr.com'
4971
4972
4973
class WeFlapsTumblr(GenericTumblrV1):
4974
    """Class to retrieve WeFlaps comics."""
4975
    name = 'weflaps-tumblr'
4976
    long_name = 'We Flaps (from Tumblr)'
4977
    url = 'https://weflaps.tumblr.com'
4978
4979
4980
class TheseInsideJokesTumblr(GenericTumblrV1):
4981
    """Class to retrieve These Inside Jokes comics."""
4982
    # Also on http://www.theseinsidejokes.com
4983
    name = 'theseinsidejokes-tumblr'
4984
    long_name = 'These Inside Jokes (from Tumblr)'
4985
    url = 'http://theseinsidejokes.tumblr.com'
4986
4987
4988
class RustledJimmies(GenericTumblrV1):
4989
    """Class to retrieve Rustled Jimmies comics."""
4990
    name = 'restled'
4991
    long_name = 'Rustled Jimmies'
4992
    url = 'http://rustledjimmies.net'
4993
4994
4995
class SinewynTumblr(GenericTumblrV1):
4996
    """Class to retrieve Sinewyn comics."""
4997
    # Also on https://sinewyn.wordpress.com
4998
    name = 'sinewyn-tumblr'
4999
    long_name = 'Sinewyn (from Tumblr)'
5000
    url = 'https://sinewyn.tumblr.com'
5001
5002
5003
class ItFoolsAMonster(GenericTumblrV1):
5004
    """Class to retrieve It Fools A Monster comics."""
5005
    name = 'itfoolsamonster'
5006
    long_name = 'It Fools A Monster'
5007
    url = 'http://itfoolsamonster.com'
5008
5009
5010
class BoumeriesTumblr(GenericTumblrV1):
5011
    """Class to retrieve Boumeries comics."""
5012
    # Also on http://bd.boumerie.com
5013
    # Also on http://comics.boumerie.com
5014
    name = 'boumeries-tumblr'
5015
    long_name = 'Boumeries (from Tumblr)'
5016
    url = 'http://boumeries.tumblr.com/'
5017
    _categories = ('BOUMERIES', )
5018
5019
5020
class InfiniteImmortalBensTumblr(GenericTumblrV1):
5021
    """Class to retrieve Infinite Immortal Bens comics."""
5022
    # Also on http://www.webtoons.com/en/challenge/infinite-immortal-bens/list?title_no=32847
5023
    # Also on https://tapas.io/series/Infinite-Immortal-Bens
5024
    url = 'https://infiniteimmortalbens.tumblr.com'
5025
    name = 'infiniteimmortal-tumblr'
5026
    long_name = 'Infinite Immortal Bens (from Tumblr)'
5027
    _categories = ('INFINITEIMMORTAL', )
5028
5029
5030
class CheeseCornzTumblr(GenericTumblrV1):
5031
    """Class to retrieve Cheese Cornz comics."""
5032
    name = 'cheesecornz-tumblr'
5033
    long_name = 'Cheese Cornz (from Tumblr)'
5034
    url = 'https://cheesecornz.tumblr.com'
5035
5036
5037
class CinismoIlustrado(GenericTumblrV1):
5038
    """Class to retrieve CinismoIlustrado comics."""
5039
    name = 'cinismo'
5040
    long_name = 'Cinismo Ilustrado'
5041
    url = 'http://cinismoilustrado.com'
5042
    _categories = ('ESPANOL', )
5043
5044
5045
class EatMyPaintTumblr(GenericTumblrV1):
5046
    """Class to retrieve Eat My Paint comics."""
5047
    # Also on https://tapas.io/series/eatmypaint
5048
    name = 'eatmypaint-tumblr'
5049
    long_name = 'Eat My Paint (from Tumblr)'
5050
    url = 'https://eatmypaint.tumblr.com'
5051
    _categories = ('EATMYPAINT', )
5052
5053
5054
class AnomalyTownFromTumblr(GenericTumblrV1):
5055
    """Class to retrieve Anomaly Town."""
5056
    name = 'anomalytown-tumblr'
5057
    long_name = 'Anomaly Town (from Tumblr)'
5058
    url = 'https://anomalytown.tumblr.com'
5059
5060
5061
class RoryTumblr(GenericTumblrV1):
5062
    """Class to retrieve Rory comics."""
5063
    # Also on https://tapas.io/series/Share-Your-Vulnerability
5064
    name = 'rory-tumblr'
5065
    long_name = 'Rory (from Tumblr)'
5066
    url = 'https://rorycomics.tumblr.com/'
5067
    _categories = ('RORY',)
5068
5069
5070
class HorovitzComics(GenericDeletedComic, GenericListableComic):
5071
    """Generic class to handle the logic common to the different comics from Horovitz."""
5072
    # Also on https://horovitzcomics.tumblr.com
5073
    url = 'http://www.horovitzcomics.com'
5074
    _categories = ('HOROVITZ', )
5075
    img_re = re.compile('.*comics/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
5076
    link_re = NotImplemented
5077
    get_url_from_archive_element = join_cls_url_to_href
5078
5079 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
5080
    def get_comic_info(cls, soup, link):
5081
        """Get information about a particular comics."""
5082
        href = link['href']
5083
        num = int(cls.link_re.match(href).groups()[0])
5084
        title = link.string
5085
        imgs = soup.find_all('img', id='comic')
5086
        assert len(imgs) == 1, imgs
5087
        year, month, day = [int(s)
5088
                            for s in cls.img_re.match(imgs[0]['src']).groups()]
5089
        return {
5090
            'title': title,
5091
            'day': day,
5092
            'month': month,
5093
            'year': year,
5094
            'img': [i['src'] for i in imgs],
5095
            'num': num,
5096
        }
5097
5098
    @classmethod
5099
    def get_archive_elements(cls):
5100
        archive_url = 'http://www.horovitzcomics.com/comics/archive/'
5101
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.link_re))
5102
5103
5104
class HorovitzNew(HorovitzComics):
5105
    """Class to retrieve Horovitz new comics."""
5106
    name = 'horovitznew'
5107
    long_name = 'Horovitz New'
5108
    link_re = re.compile('^/comics/new/([0-9]+)$')
5109
5110
5111
class HorovitzClassic(HorovitzComics):
5112
    """Class to retrieve Horovitz classic comics."""
5113
    name = 'horovitzclassic'
5114
    long_name = 'Horovitz Classic'
5115
    link_re = re.compile('^/comics/classic/([0-9]+)$')
5116
5117
5118
class GenericGoComic(GenericNavigableComic):
5119
    """Generic class to handle the logic common to comics from gocomics.com."""
5120
    _categories = ('GOCOMIC', )
5121
5122
    @classmethod
5123
    def get_first_comic_link(cls):
5124
        """Get link to first comics."""
5125
        div = get_soup_at_url(cls.url).find('div', class_='gc-deck gc-deck--cta-1')
5126
        return div.find('a')
5127
5128
    @classmethod
5129
    def get_navi_link(cls, last_soup, next_):
5130
        """Get link to next or previous comic."""
5131
        PREV = 'fa btn btn-outline-default btn-circle fa-caret-left sm js-previous-comic '
5132
        NEXT = 'fa btn btn-outline-default btn-circle fa-caret-right sm '
5133
        return last_soup.find('a', class_=NEXT if next_ else PREV)
5134
5135
    @classmethod
5136
    def get_url_from_link(cls, link):
5137
        gocomics = 'http://www.gocomics.com'
5138
        return urljoin_wrapper(gocomics, link['href'])
5139
5140
    @classmethod
5141
    def get_comic_info(cls, soup, link):
5142
        """Get information about a particular comics."""
5143
        date_str = soup.find('meta', property='article:published_time')['content']
5144
        day = string_to_date(date_str, "%Y-%m-%d")
5145
        imgs = soup.find_all('meta', property='og:image')
5146
        author = soup.find('meta', property='article:author')['content']
5147
        tags = soup.find('meta', property='article:tag')['content']
5148
        return {
5149
            'day': day.day,
5150
            'month': day.month,
5151
            'year': day.year,
5152
            'img': [i['content'] for i in imgs],
5153
            'author': author,
5154
            'tags': tags,
5155
        }
5156
5157
5158
class PearlsBeforeSwine(GenericGoComic):
5159
    """Class to retrieve Pearls Before Swine comics."""
5160
    name = 'pearls'
5161
    long_name = 'Pearls Before Swine'
5162
    url = 'http://www.gocomics.com/pearlsbeforeswine'
5163
5164
5165
class Peanuts(GenericGoComic):
5166
    """Class to retrieve Peanuts comics."""
5167
    name = 'peanuts'
5168
    long_name = 'Peanuts'
5169
    url = 'http://www.gocomics.com/peanuts'
5170
5171
5172
class MattWuerker(GenericGoComic):
5173
    """Class to retrieve Matt Wuerker comics."""
5174
    name = 'wuerker'
5175
    long_name = 'Matt Wuerker'
5176
    url = 'http://www.gocomics.com/mattwuerker'
5177
5178
5179
class TomToles(GenericGoComic):
5180
    """Class to retrieve Tom Toles comics."""
5181
    name = 'toles'
5182
    long_name = 'Tom Toles'
5183
    url = 'http://www.gocomics.com/tomtoles'
5184
5185
5186
class BreakOfDay(GenericGoComic):
5187
    """Class to retrieve Break Of Day comics."""
5188
    name = 'breakofday'
5189
    long_name = 'Break Of Day'
5190
    url = 'http://www.gocomics.com/break-of-day'
5191
5192
5193
class Brevity(GenericGoComic):
5194
    """Class to retrieve Brevity comics."""
5195
    name = 'brevity'
5196
    long_name = 'Brevity'
5197
    url = 'http://www.gocomics.com/brevity'
5198
5199
5200
class MichaelRamirez(GenericGoComic):
5201
    """Class to retrieve Michael Ramirez comics."""
5202
    name = 'ramirez'
5203
    long_name = 'Michael Ramirez'
5204
    url = 'http://www.gocomics.com/michaelramirez'
5205
5206
5207
class MikeLuckovich(GenericGoComic):
5208
    """Class to retrieve Mike Luckovich comics."""
5209
    name = 'luckovich'
5210
    long_name = 'Mike Luckovich'
5211
    url = 'http://www.gocomics.com/mikeluckovich'
5212
5213
5214
class JimBenton(GenericGoComic):
5215
    """Class to retrieve Jim Benton comics."""
5216
    # Also on http://jimbenton.tumblr.com
5217
    name = 'benton'
5218
    long_name = 'Jim Benton'
5219
    url = 'http://www.gocomics.com/jim-benton-cartoons'
5220
5221
5222
class TheArgyleSweater(GenericGoComic):
5223
    """Class to retrieve the Argyle Sweater comics."""
5224
    name = 'argyle'
5225
    long_name = 'Argyle Sweater'
5226
    url = 'http://www.gocomics.com/theargylesweater'
5227
5228
5229
class SunnyStreet(GenericGoComic):
5230
    """Class to retrieve Sunny Street comics."""
5231
    # Also on http://www.sunnystreetcomics.com
5232
    name = 'sunny'
5233
    long_name = 'Sunny Street'
5234
    url = 'http://www.gocomics.com/sunny-street'
5235
5236
5237
class OffTheMark(GenericGoComic):
5238
    """Class to retrieve Off The Mark comics."""
5239
    # Also on https://www.offthemark.com
5240
    name = 'offthemark'
5241
    long_name = 'Off The Mark'
5242
    url = 'http://www.gocomics.com/offthemark'
5243
5244
5245
class WuMo(GenericGoComic):
5246
    """Class to retrieve WuMo comics."""
5247
    # Also on http://wumo.com
5248
    name = 'wumo'
5249
    long_name = 'WuMo'
5250
    url = 'http://www.gocomics.com/wumo'
5251
5252
5253
class LunarBaboon(GenericGoComic):
5254
    """Class to retrieve Lunar Baboon comics."""
5255
    # Also on http://www.lunarbaboon.com
5256
    # Also on https://tapastic.com/series/Lunarbaboon
5257
    name = 'lunarbaboon'
5258
    long_name = 'Lunar Baboon'
5259
    url = 'http://www.gocomics.com/lunarbaboon'
5260
5261
5262
class SandersenGocomic(GenericGoComic):
5263
    """Class to retrieve Sarah Andersen comics."""
5264
    # Also on http://sarahcandersen.com
5265
    # Also on http://tapastic.com/series/Doodle-Time
5266
    name = 'sandersen-goc'
5267
    long_name = 'Sarah Andersen (from GoComics)'
5268
    url = 'http://www.gocomics.com/sarahs-scribbles'
5269
5270
5271
class SaturdayMorningBreakfastCerealGoComic(GenericGoComic):
5272
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
5273
    # Also on http://smbc-comics.tumblr.com
5274
    # Also on http://www.smbc-comics.com
5275
    name = 'smbc-goc'
5276
    long_name = 'Saturday Morning Breakfast Cereal (from GoComics)'
5277
    url = 'http://www.gocomics.com/saturday-morning-breakfast-cereal'
5278
    _categories = ('SMBC', )
5279
5280
5281
class CalvinAndHobbesGoComic(GenericGoComic):
5282
    """Class to retrieve Calvin and Hobbes comics."""
5283
    # From gocomics, not http://marcel-oehler.marcellosendos.ch/comics/ch/
5284
    name = 'calvin-goc'
5285
    long_name = 'Calvin and Hobbes (from GoComics)'
5286
    url = 'http://www.gocomics.com/calvinandhobbes'
5287
5288
5289
class RallGoComic(GenericGoComic):
5290
    """Class to retrieve Ted Rall comics."""
5291
    # Also on http://rall.com/comic
5292
    name = 'rall-goc'
5293
    long_name = "Ted Rall (from GoComics)"
5294
    url = "http://www.gocomics.com/ted-rall"
5295
    _categories = ('RALL', )
5296
5297
5298
class TheAwkwardYetiGoComic(GenericGoComic):
5299
    """Class to retrieve The Awkward Yeti comics."""
5300
    # Also on http://larstheyeti.tumblr.com
5301
    # Also on http://theawkwardyeti.com
5302
    # Also on https://tapastic.com/series/TheAwkwardYeti
5303
    name = 'yeti-goc'
5304
    long_name = 'The Awkward Yeti (from GoComics)'
5305
    url = 'http://www.gocomics.com/the-awkward-yeti'
5306
    _categories = ('YETI', )
5307
5308
5309
class BerkeleyMewsGoComics(GenericGoComic):
5310
    """Class to retrieve Berkeley Mews comics."""
5311
    # Also on http://mews.tumblr.com
5312
    # Also on http://www.berkeleymews.com
5313
    name = 'berkeley-goc'
5314
    long_name = 'Berkeley Mews (from GoComics)'
5315
    url = 'http://www.gocomics.com/berkeley-mews'
5316
    _categories = ('BERKELEY', )
5317
5318
5319
class SheldonGoComics(GenericGoComic):
5320
    """Class to retrieve Sheldon comics."""
5321
    # Also on http://www.sheldoncomics.com
5322
    name = 'sheldon-goc'
5323
    long_name = 'Sheldon Comics (from GoComics)'
5324
    url = 'http://www.gocomics.com/sheldon'
5325
5326
5327
class FowlLanguageGoComics(GenericGoComic):
5328
    """Class to retrieve Fowl Language comics."""
5329
    # Also on http://www.fowllanguagecomics.com
5330
    # Also on http://tapastic.com/series/Fowl-Language-Comics
5331
    # Also on http://fowllanguagecomics.tumblr.com
5332
    name = 'fowllanguage-goc'
5333
    long_name = 'Fowl Language Comics (from GoComics)'
5334
    url = 'http://www.gocomics.com/fowl-language'
5335
    _categories = ('FOWLLANGUAGE', )
5336
5337
5338
class NickAnderson(GenericGoComic):
5339
    """Class to retrieve Nick Anderson comics."""
5340
    name = 'nickanderson'
5341
    long_name = 'Nick Anderson'
5342
    url = 'http://www.gocomics.com/nickanderson'
5343
5344
5345
class GarfieldGoComics(GenericGoComic):
5346
    """Class to retrieve Garfield comics."""
5347
    # Also on http://garfield.com
5348
    name = 'garfield-goc'
5349
    long_name = 'Garfield (from GoComics)'
5350
    url = 'http://www.gocomics.com/garfield'
5351
    _categories = ('GARFIELD', )
5352
5353
5354
class DorrisMcGoComics(GenericGoComic):
5355
    """Class to retrieve Dorris Mc Comics"""
5356
    # Also on http://dorrismccomics.com
5357
    name = 'dorrismc-goc'
5358
    long_name = 'Dorris Mc (from GoComics)'
5359
    url = 'http://www.gocomics.com/dorris-mccomics'
5360
5361
5362
class FoxTrot(GenericGoComic):
5363
    """Class to retrieve FoxTrot comics."""
5364
    name = 'foxtrot'
5365
    long_name = 'FoxTrot'
5366
    url = 'http://www.gocomics.com/foxtrot'
5367
5368
5369
class FoxTrotClassics(GenericGoComic):
5370
    """Class to retrieve FoxTrot Classics comics."""
5371
    name = 'foxtrot-classics'
5372
    long_name = 'FoxTrot Classics'
5373
    url = 'http://www.gocomics.com/foxtrotclassics'
5374
5375
5376
class MisterAndMeGoComics(GenericDeletedComic, GenericGoComic):
5377
    """Class to retrieve Mister & Me Comics."""
5378
    # Also on http://www.mister-and-me.com
5379
    # Also on https://tapastic.com/series/Mister-and-Me
5380
    name = 'mister-goc'
5381
    long_name = 'Mister & Me (from GoComics)'
5382
    url = 'http://www.gocomics.com/mister-and-me'
5383
5384
5385
class NonSequitur(GenericGoComic):
5386
    """Class to retrieve Non Sequitur (Wiley Miller) comics."""
5387
    name = 'nonsequitur'
5388
    long_name = 'Non Sequitur'
5389
    url = 'http://www.gocomics.com/nonsequitur'
5390
5391
5392
class JoeyAlisonSayers(GenericGoComic):
5393
    """Class to retrieve Joey Alison Sayers comics."""
5394
    name = 'joeyalison'
5395
    long_name = 'Joey Alison Sayers (from GoComics)'
5396
    url = 'http://www.gocomics.com/joey-alison-sayers-comics'
5397
5398
5399
class SavageChickenGoComics(GenericGoComic):
5400
    """Class to retrieve Savage Chicken comics."""
5401
    # Also on http://www.savagechickens.com
5402
    name = 'savage-goc'
5403
    long_name = 'Savage Chicken (from GoComics)'
5404
    url = 'http://www.gocomics.com/savage-chickens'
5405
5406
5407
class GenericTapasticComic(GenericListableComic):
5408
    """Generic class to handle the logic common to comics from tapastic.com."""
5409
    _categories = ('TAPASTIC', )
5410
5411 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
5412
    def get_comic_info(cls, soup, archive_elt):
5413
        """Get information about a particular comics."""
5414
        timestamp = int(archive_elt['publishDate']) / 1000.0
5415
        day = datetime.datetime.fromtimestamp(timestamp).date()
5416
        imgs = soup.find_all('img', class_='art-image')
5417
        if not imgs:
5418
            # print("Comic %s is being uploaded, retry later" % cls.get_url_from_archive_element(archive_elt))
5419
            return None
5420
        assert len(imgs) > 0, imgs
5421
        return {
5422
            'day': day.day,
5423
            'year': day.year,
5424
            'month': day.month,
5425
            'img': [i['src'] for i in imgs],
5426
            'title': archive_elt['title'],
5427
        }
5428
5429
    @classmethod
5430
    def get_url_from_archive_element(cls, archive_elt):
5431
        return 'http://tapastic.com/episode/' + str(archive_elt['id'])
5432
5433
    @classmethod
5434
    def get_archive_elements(cls):
5435
        pref, suff = 'episodeList : ', ','
5436
        # Information is stored in the javascript part
5437
        # I don't know the clean way to get it so this is the ugly way.
5438
        string = [s[len(pref):-len(suff)] for s in (s.decode('utf-8').strip() for s in urlopen_wrapper(cls.url).readlines()) if s.startswith(pref) and s.endswith(suff)][0]
5439
        return json.loads(string)
5440
5441
5442
class VegetablesForDessert(GenericTapasticComic):
5443
    """Class to retrieve Vegetables For Dessert comics."""
5444
    # Also on http://vegetablesfordessert.tumblr.com
5445
    name = 'vegetables'
5446
    long_name = 'Vegetables For Dessert'
5447
    url = 'http://tapastic.com/series/vegetablesfordessert'
5448
5449
5450
class FowlLanguageTapa(GenericTapasticComic):
5451
    """Class to retrieve Fowl Language comics."""
5452
    # Also on http://www.fowllanguagecomics.com
5453
    # Also on http://fowllanguagecomics.tumblr.com
5454
    # Also on http://www.gocomics.com/fowl-language
5455
    name = 'fowllanguage-tapa'
5456
    long_name = 'Fowl Language Comics (from Tapastic)'
5457
    url = 'http://tapastic.com/series/Fowl-Language-Comics'
5458
    _categories = ('FOWLLANGUAGE', )
5459
5460
5461
class OscillatingProfundities(GenericTapasticComic):
5462
    """Class to retrieve Oscillating Profundities comics."""
5463
    name = 'oscillating'
5464
    long_name = 'Oscillating Profundities'
5465
    url = 'http://tapastic.com/series/oscillatingprofundities'
5466
5467
5468
class ZnoflatsComics(GenericTapasticComic):
5469
    """Class to retrieve Znoflats comics."""
5470
    name = 'znoflats'
5471
    long_name = 'Znoflats Comics'
5472
    url = 'http://tapastic.com/series/Znoflats-Comics'
5473
5474
5475
class SandersenTapastic(GenericTapasticComic):
5476
    """Class to retrieve Sarah Andersen comics."""
5477
    # Also on http://sarahcandersen.com
5478
    # Also on http://www.gocomics.com/sarahs-scribbles
5479
    name = 'sandersen-tapa'
5480
    long_name = 'Sarah Andersen (from Tapastic)'
5481
    url = 'http://tapastic.com/series/Doodle-Time'
5482
5483
5484
class TubeyToonsTapastic(GenericTapasticComic):
5485
    """Class to retrieve TubeyToons comics."""
5486
    # Also on http://tubeytoons.com
5487
    # Also on https://tubeytoons.tumblr.com
5488
    name = 'tubeytoons-tapa'
5489
    long_name = 'Tubey Toons (from Tapastic)'
5490
    url = 'http://tapastic.com/series/Tubey-Toons'
5491
    _categories = ('TUNEYTOONS', )
5492
5493
5494
class AnythingComicTapastic(GenericTapasticComic):
5495
    """Class to retrieve Anything Comics."""
5496
    # Also on http://www.anythingcomic.com
5497
    name = 'anythingcomic-tapa'
5498
    long_name = 'Anything Comic (from Tapastic)'
5499
    url = 'http://tapastic.com/series/anything'
5500
5501
5502
class UnearthedComicsTapastic(GenericTapasticComic):
5503
    """Class to retrieve Unearthed comics."""
5504
    # Also on http://unearthedcomics.com
5505
    # Also on https://unearthedcomics.tumblr.com
5506
    name = 'unearthed-tapa'
5507
    long_name = 'Unearthed Comics (from Tapastic)'
5508
    url = 'http://tapastic.com/series/UnearthedComics'
5509
    _categories = ('UNEARTHED', )
5510
5511
5512
class EverythingsStupidTapastic(GenericTapasticComic):
5513
    """Class to retrieve Everything's stupid Comics."""
5514
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
5515
    # Also on http://everythingsstupid.net
5516
    name = 'stupid-tapa'
5517
    long_name = "Everything's Stupid (from Tapastic)"
5518
    url = 'http://tapastic.com/series/EverythingsStupid'
5519
5520
5521
class JustSayEhTapastic(GenericTapasticComic):
5522
    """Class to retrieve Just Say Eh comics."""
5523
    # Also on http://www.justsayeh.com
5524
    name = 'justsayeh-tapa'
5525
    long_name = 'Just Say Eh (from Tapastic)'
5526
    url = 'http://tapastic.com/series/Just-Say-Eh'
5527
5528
5529
class ThorsThundershackTapastic(GenericTapasticComic):
5530
    """Class to retrieve Thor's Thundershack comics."""
5531
    # Also on http://www.thorsthundershack.com
5532
    name = 'thor-tapa'
5533
    long_name = 'Thor\'s Thundershack (from Tapastic)'
5534
    url = 'http://tapastic.com/series/Thors-Thundershac'
5535
    _categories = ('THOR', )
5536
5537
5538
class OwlTurdTapastic(GenericTapasticComic):
5539
    """Class to retrieve Owl Turd / Shen comix."""
5540
    # Also on http://shencomix.com
5541
    name = 'owlturd-tapa'
5542
    long_name = 'Owl Turd / Shen Comix (from Tapastic)'
5543
    url = 'https://tapas.io/series/Shen-Comix'
5544
    _categories = ('OWLTURD', 'SHENCOMIX')
5545
5546
5547
class GoneIntoRaptureTapastic(GenericTapasticComic):
5548
    """Class to retrieve Gone Into Rapture comics."""
5549
    # Also on http://goneintorapture.tumblr.com
5550
    # Also on http://goneintorapture.com
5551
    name = 'rapture-tapa'
5552
    long_name = 'Gone Into Rapture (from Tapastic)'
5553
    url = 'http://tapastic.com/series/Goneintorapture'
5554
5555
5556
class HeckIfIKnowComicsTapa(GenericTapasticComic):
5557
    """Class to retrieve Heck If I Know Comics."""
5558
    # Also on http://heckifiknowcomics.com
5559
    name = 'heck-tapa'
5560
    long_name = 'Heck if I Know comics (from Tapastic)'
5561
    url = 'http://tapastic.com/series/Regular'
5562
5563
5564
class CheerUpEmoKidTapa(GenericTapasticComic):
5565
    """Class to retrieve CheerUpEmoKid comics."""
5566
    # Also on http://www.cheerupemokid.com
5567
    # Also on https://enzocomics.tumblr.com
5568
    name = 'cuek-tapa'
5569
    long_name = 'Cheer Up Emo Kid (from Tapastic)'
5570
    url = 'http://tapastic.com/series/CUEK'
5571
5572
5573
class BigFootJusticeTapa(GenericTapasticComic):
5574
    """Class to retrieve Big Foot Justice comics."""
5575
    # Also on http://bigfootjustice.com
5576
    name = 'bigfoot-tapa'
5577
    long_name = 'Big Foot Justice (from Tapastic)'
5578
    url = 'http://tapastic.com/series/bigfoot-justice'
5579
5580
5581
class UpAndOutTapa(GenericTapasticComic):
5582
    """Class to retrieve Up & Out comics."""
5583
    # Also on http://upandoutcomic.tumblr.com
5584
    name = 'upandout-tapa'
5585
    long_name = 'Up And Out (from Tapastic)'
5586
    url = 'http://tapastic.com/series/UP-and-OUT'
5587
5588
5589
class ToonHoleTapa(GenericTapasticComic):
5590
    """Class to retrieve Toon Holes comics."""
5591
    # Also on http://www.toonhole.com
5592
    name = 'toonhole-tapa'
5593
    long_name = 'Toon Hole (from Tapastic)'
5594
    url = 'http://tapastic.com/series/TOONHOLE'
5595
5596
5597
class AngryAtNothingTapa(GenericTapasticComic):
5598
    """Class to retrieve Angry at Nothing comics."""
5599
    # Also on http://www.angryatnothing.net
5600
    # Also on http://angryatnothing.tumblr.com
5601
    name = 'angry-tapa'
5602
    long_name = 'Angry At Nothing (from Tapastic)'
5603
    url = 'http://tapastic.com/series/Comics-yeah-definitely-comics-'
5604
5605
5606
class LeleozTapa(GenericTapasticComic):
5607
    """Class to retrieve Leleoz comics."""
5608
    # Also on http://leleozcomics.tumblr.com
5609
    name = 'leleoz-tapa'
5610
    long_name = 'Leleoz (from Tapastic)'
5611
    url = 'https://tapastic.com/series/Leleoz'
5612
5613
5614
class TheAwkwardYetiTapa(GenericTapasticComic):
5615
    """Class to retrieve The Awkward Yeti comics."""
5616
    # Also on http://www.gocomics.com/the-awkward-yeti
5617
    # Also on http://theawkwardyeti.com
5618
    # Also on http://larstheyeti.tumblr.com
5619
    name = 'yeti-tapa'
5620
    long_name = 'The Awkward Yeti (from Tapastic)'
5621
    url = 'https://tapastic.com/series/TheAwkwardYeti'
5622
    _categories = ('YETI', )
5623
5624
5625
class AsPerUsualTapa(GenericTapasticComic):
5626
    """Class to retrieve As Per Usual comics."""
5627
    # Also on http://as-per-usual.tumblr.com
5628
    name = 'usual-tapa'
5629
    long_name = 'As Per Usual (from Tapastic)'
5630
    url = 'https://tapastic.com/series/AsPerUsual'
5631
    categories = ('DAMILEE', )
5632
5633
5634
class HotComicsForCoolPeopleTapa(GenericTapasticComic):
5635
    """Class to retrieve Hot Comics For Cool People."""
5636
    # Also on http://hotcomicsforcoolpeople.tumblr.com
5637
    # Also on http://hotcomics.biz (links to tumblr)
5638
    # Also on http://hcfcp.com (links to tumblr)
5639
    name = 'hotcomics-tapa'
5640
    long_name = 'Hot Comics For Cool People (from Tapastic)'
5641
    url = 'https://tapastic.com/series/Hot-Comics-For-Cool-People'
5642
    categories = ('DAMILEE', )
5643
5644
5645
class OneOneOneOneComicTapa(GenericTapasticComic):
5646
    """Class to retrieve 1111 Comics."""
5647
    # Also on http://www.1111comics.me
5648
    # Also on http://comics1111.tumblr.com
5649
    name = '1111-tapa'
5650
    long_name = '1111 Comics (from Tapastic)'
5651
    url = 'https://tapastic.com/series/1111-Comics'
5652
    _categories = ('ONEONEONEONE', )
5653
5654
5655
class TumbleDryTapa(GenericTapasticComic):
5656
    """Class to retrieve Tumble Dry comics."""
5657
    # Also on http://tumbledrycomics.com
5658
    name = 'tumbledry-tapa'
5659
    long_name = 'Tumblr Dry (from Tapastic)'
5660
    url = 'https://tapastic.com/series/TumbleDryComics'
5661
5662
5663
class DeadlyPanelTapa(GenericTapasticComic):
5664
    """Class to retrieve Deadly Panel comics."""
5665
    # Also on http://www.deadlypanel.com
5666
    # Also on https://deadlypanel.tumblr.com
5667
    name = 'deadly-tapa'
5668
    long_name = 'Deadly Panel (from Tapastic)'
5669
    url = 'https://tapastic.com/series/deadlypanel'
5670
5671
5672
class ChrisHallbeckMaxiTapa(GenericTapasticComic):
5673
    """Class to retrieve Chris Hallbeck comics."""
5674
    # Also on https://chrishallbeck.tumblr.com
5675
    # Also on http://maximumble.com
5676
    name = 'hallbeckmaxi-tapa'
5677
    long_name = 'Chris Hallback - Maximumble (from Tapastic)'
5678
    url = 'https://tapastic.com/series/Maximumble'
5679
    _categories = ('HALLBACK', )
5680
5681
5682
class ChrisHallbeckMiniTapa(GenericDeletedComic, GenericTapasticComic):
5683
    """Class to retrieve Chris Hallbeck comics."""
5684
    # Also on https://chrishallbeck.tumblr.com
5685
    # Also on http://minimumble.com
5686
    name = 'hallbeckmini-tapa'
5687
    long_name = 'Chris Hallback - Minimumble (from Tapastic)'
5688
    url = 'https://tapastic.com/series/Minimumble'
5689
    _categories = ('HALLBACK', )
5690
5691
5692
class ChrisHallbeckBiffTapa(GenericDeletedComic, GenericTapasticComic):
5693
    """Class to retrieve Chris Hallbeck comics."""
5694
    # Also on https://chrishallbeck.tumblr.com
5695
    # Also on http://thebookofbiff.com
5696
    name = 'hallbeckbiff-tapa'
5697
    long_name = 'Chris Hallback - The Book of Biff (from Tapastic)'
5698
    url = 'https://tapastic.com/series/Biff'
5699
    _categories = ('HALLBACK', )
5700
5701
5702
class RandoWisTapa(GenericTapasticComic):
5703
    """Class to retrieve RandoWis comics."""
5704
    # Also on https://randowis.com
5705
    name = 'randowis-tapa'
5706
    long_name = 'RandoWis (from Tapastic)'
5707
    url = 'https://tapastic.com/series/RandoWis'
5708
5709
5710
class PigeonGazetteTapa(GenericTapasticComic):
5711
    """Class to retrieve The Pigeon Gazette comics."""
5712
    # Also on http://thepigeongazette.tumblr.com
5713
    name = 'pigeon-tapa'
5714
    long_name = 'The Pigeon Gazette (from Tapastic)'
5715
    url = 'https://tapastic.com/series/The-Pigeon-Gazette'
5716
5717
5718
class TheOdd1sOutTapa(GenericTapasticComic):
5719
    """Class to retrieve The Odd 1s Out comics."""
5720
    # Also on http://theodd1sout.com
5721
    # Also on http://theodd1sout.tumblr.com
5722
    name = 'theodd-tapa'
5723
    long_name = 'The Odd 1s Out (from Tapastic)'
5724
    url = 'https://tapastic.com/series/Theodd1sout'
5725
5726
5727
class TheWorldIsFlatTapa(GenericTapasticComic):
5728
    """Class to retrieve The World Is Flat Comics."""
5729
    # Also on http://theworldisflatcomics.tumblr.com
5730
    name = 'flatworld-tapa'
5731
    long_name = 'The World Is Flat (from Tapastic)'
5732
    url = 'https://tapastic.com/series/The-World-is-Flat'
5733
5734
5735
class MisterAndMeTapa(GenericTapasticComic):
5736
    """Class to retrieve Mister & Me Comics."""
5737
    # Also on http://www.mister-and-me.com
5738
    # Also on http://www.gocomics.com/mister-and-me
5739
    name = 'mister-tapa'
5740
    long_name = 'Mister & Me (from Tapastic)'
5741
    url = 'https://tapastic.com/series/Mister-and-Me'
5742
5743
5744
class TalesOfAbsurdityTapa(GenericDeletedComic, GenericTapasticComic):
5745
    """Class to retrieve Tales Of Absurdity comics."""
5746
    # Also on http://talesofabsurdity.com
5747
    # Also on http://talesofabsurdity.tumblr.com
5748
    name = 'absurdity-tapa'
5749
    long_name = 'Tales of Absurdity (from Tapastic)'
5750
    url = 'http://tapastic.com/series/Tales-Of-Absurdity'
5751
    _categories = ('ABSURDITY', )
5752
5753
5754
class BFGFSTapa(GenericTapasticComic):
5755
    """Class to retrieve BFGFS comics."""
5756
    # Also on http://bfgfs.com
5757
    # Also on https://bfgfs.tumblr.com
5758
    name = 'bfgfs-tapa'
5759
    long_name = 'BFGFS (from Tapastic)'
5760
    url = 'https://tapastic.com/series/BFGFS'
5761
5762
5763
class DoodleForFoodTapa(GenericTapasticComic):
5764
    """Class to retrieve Doodle For Food comics."""
5765
    # Also on http://www.doodleforfood.com
5766
    name = 'doodle-tapa'
5767
    long_name = 'Doodle For Food (from Tapastic)'
5768
    url = 'https://tapastic.com/series/Doodle-for-Food'
5769
5770
5771
class MrLovensteinTapa(GenericTapasticComic):
5772
    """Class to retrieve Mr Lovenstein comics."""
5773
    # Also on  https://tapastic.com/series/MrLovenstein
5774
    name = 'mrlovenstein-tapa'
5775
    long_name = 'Mr. Lovenstein (from Tapastic)'
5776
    url = 'https://tapastic.com/series/MrLovenstein'
5777
5778
5779
class CassandraCalinTapa(GenericTapasticComic):
5780
    """Class to retrieve C. Cassandra comics."""
5781
    # Also on http://cassandracalin.com
5782
    # Also on http://c-cassandra.tumblr.com
5783
    name = 'cassandra-tapa'
5784
    long_name = 'Cassandra Calin (from Tapastic)'
5785
    url = 'https://tapastic.com/series/C-Cassandra-comics'
5786
5787
5788
class WafflesAndPancakes(GenericTapasticComic):
5789
    """Class to retrieve Waffles And Pancakes comics."""
5790
    # Also on http://wandpcomic.com
5791
    name = 'waffles'
5792
    long_name = 'Waffles And Pancakes'
5793
    url = 'https://tapastic.com/series/Waffles-and-Pancakes'
5794
5795
5796
class YesterdaysPopcornTapastic(GenericTapasticComic):
5797
    """Class to retrieve Yesterday's Popcorn comics."""
5798
    # Also on http://www.yesterdayspopcorn.com
5799
    # Also on http://yesterdayspopcorn.tumblr.com
5800
    name = 'popcorn-tapa'
5801
    long_name = 'Yesterday\'s Popcorn (from Tapastic)'
5802
    url = 'https://tapastic.com/series/Yesterdays-Popcorn'
5803
5804
5805
class OurSuperAdventureTapastic(GenericDeletedComic, GenericTapasticComic):
5806
    """Class to retrieve Our Super Adventure comics."""
5807
    # Also on http://www.oursuperadventure.com
5808
    # http://sarahssketchbook.tumblr.com
5809
    # http://sarahgraley.com
5810
    name = 'superadventure-tapastic'
5811
    long_name = 'Our Super Adventure (from Tapastic)'
5812
    url = 'https://tapastic.com/series/Our-Super-Adventure'
5813
5814
5815
class NamelessPCs(GenericTapasticComic):
5816
    """Class to retrieve Nameless PCs comics."""
5817
    # Also on http://namelesspcs.com
5818
    name = 'namelesspcs-tapa'
5819
    long_name = 'NamelessPCs (from Tapastic)'
5820
    url = 'https://tapastic.com/series/NamelessPC'
5821
5822
5823
class DownTheUpwardSpiralTapa(GenericTapasticComic):
5824
    """Class to retrieve Down The Upward Spiral comics."""
5825
    # Also on http://www.downtheupwardspiral.com
5826
    # Also on http://downtheupwardspiral.tumblr.com
5827
    name = 'spiral-tapa'
5828
    long_name = 'Down the Upward Spiral (from Tapastic)'
5829
    url = 'https://tapastic.com/series/Down-the-Upward-Spiral'
5830
5831
5832
class UbertoolTapa(GenericTapasticComic):
5833
    """Class to retrieve Ubertool comics."""
5834
    # Also on http://ubertoolcomic.com
5835
    # Also on https://ubertool.tumblr.com
5836
    name = 'ubertool-tapa'
5837
    long_name = 'Ubertool (from Tapastic)'
5838
    url = 'https://tapastic.com/series/ubertool'
5839
    _categories = ('UBERTOOL', )
5840
5841
5842
class BarteNerdsTapa(GenericDeletedComic, GenericTapasticComic):
5843
    """Class to retrieve BarteNerds comics."""
5844
    # Also on http://www.bartenerds.com
5845
    name = 'bartenerds-tapa'
5846
    long_name = 'BarteNerds (from Tapastic)'
5847
    url = 'https://tapastic.com/series/BarteNERDS'
5848
5849
5850
class SmallBlueYonderTapa(GenericTapasticComic):
5851
    """Class to retrieve Small Blue Yonder comics."""
5852
    # Also on http://www.smallblueyonder.com
5853
    name = 'smallblue-tapa'
5854
    long_name = 'Small Blue Yonder (from Tapastic)'
5855
    url = 'https://tapastic.com/series/Small-Blue-Yonder'
5856
5857
5858
class TizzyStitchBirdTapa(GenericTapasticComic):
5859
    """Class to retrieve Tizzy Stitch Bird comics."""
5860
    # Also on http://tizzystitchbird.com
5861
    # Also on http://tizzystitchbird.tumblr.com
5862
    # Also on http://www.webtoons.com/en/challenge/tizzy-stitchbird/list?title_no=50082
5863
    name = 'tizzy-tapa'
5864
    long_name = 'Tizzy Stitch Bird (from Tapastic)'
5865
    url = 'https://tapastic.com/series/TizzyStitchbird'
5866
5867
5868
class RockPaperCynicTapa(GenericTapasticComic):
5869
    """Class to retrieve RockPaperCynic comics."""
5870
    # Also on http://www.rockpapercynic.com
5871
    # Also on http://rockpapercynic.tumblr.com
5872
    name = 'rpc-tapa'
5873
    long_name = 'Rock Paper Cynic (from Tapastic)'
5874
    url = 'https://tapastic.com/series/rockpapercynic'
5875
5876
5877
class IsItCanonTapa(GenericTapasticComic):
5878
    """Class to retrieve Is It Canon comics."""
5879
    # Also on http://www.isitcanon.com
5880
    name = 'canon-tapa'
5881
    long_name = 'Is It Canon (from Tapastic)'
5882
    url = 'http://tapastic.com/series/isitcanon'
5883
5884
5885
class ItsTheTieTapa(GenericTapasticComic):
5886
    """Class to retrieve It's the tie comics."""
5887
    # Also on http://itsthetie.com
5888
    # Also on http://itsthetie.tumblr.com
5889
    name = 'tie-tapa'
5890
    long_name = "It's the tie (from Tapastic)"
5891
    url = "https://tapastic.com/series/itsthetie"
5892
    _categories = ('TIE', )
5893
5894
5895
class JamesOfNoTradesTapa(GenericTapasticComic):
5896
    """Class to retrieve JamesOfNoTrades comics."""
5897
    # Also on http://jamesofnotrades.com
5898
    # Also on http://www.webtoons.com/en/challenge/james-of-no-trades/list?title_no=43422
5899
    # Also on http://jamesfregan.tumblr.com
5900
    name = 'jamesofnotrades-tapa'
5901
    long_name = 'James Of No Trades (from Tapastic)'
5902
    url = 'https://tapas.io/series/James-of-No-Trades'
5903
    _categories = ('JAMESOFNOTRADES', )
5904
5905
5906
class MomentumTapa(GenericTapasticComic):
5907
    """Class to retrieve Momentum comics."""
5908
    # Also on http://www.momentumcomic.com
5909
    name = 'momentum-tapa'
5910
    long_name = 'Momentum (from Tapastic)'
5911
    url = 'https://tapastic.com/series/momentum'
5912
5913
5914
class InYourFaceCakeTapa(GenericTapasticComic):
5915
    """Class to retrieve In Your Face Cake comics."""
5916
    # Also on https://in-your-face-cake.tumblr.com
5917
    name = 'inyourfacecake-tapa'
5918
    long_name = 'In Your Face Cake (from Tapastic)'
5919
    url = 'https://tapas.io/series/In-Your-Face-Cake'
5920
    _categories = ('INYOURFACECAKE', )
5921
5922
5923
class CowardlyComicsTapa(GenericTapasticComic):
5924
    """Class to retrieve Cowardly Comics."""
5925
    # Also on http://cowardlycomics.tumblr.com
5926
    # Also on http://www.webtoons.com/en/challenge/cowardly-comics/list?title_no=65893
5927
    name = 'cowardly-tapa'
5928
    long_name = 'Cowardly Comics (from Tapastic)'
5929
    url = 'https://tapas.io/series/CowardlyComics'
5930
5931
5932
class Caw4hwTapa(GenericTapasticComic):
5933
    """Class to retrieve Caw4hw comics."""
5934
    # Also on https://caw4hw.tumblr.com
5935
    name = 'caw4hw-tapa'
5936
    long_name = 'Caw4hw (from Tapastic)'
5937
    url = 'https://tapas.io/series/CAW4HW'
5938
5939
5940
class DontBeDadTapa(GenericTapasticComic):
5941
    """Class to retrieve Don't Be Dad comics."""
5942
    # Also on https://dontbedad.com/
5943
    # Also on http://www.webtoons.com/en/challenge/dontbedad/list?title_no=123074
5944
    name = 'dontbedad-tapa'
5945
    long_name = "Don't Be Dad (from Tapastic)"
5946
    url = 'https://tapas.io/series/DontBeDad-Comics'
5947
5948
5949
class APleasantWasteOfTimeTapa(GenericTapasticComic):
5950
    """Class to retrieve A Pleasant Waste Of Time comics."""
5951
    # Also on https://artjcf.tumblr.com
5952
    name = 'pleasant-waste-tapa'
5953
    long_name = 'A Pleasant Waste Of Time (from Tapastic)'
5954
    url = 'https://tapas.io/series/A-Pleasant-'
5955
    _categories = ('WASTE', )
5956
5957
5958
class InfiniteImmortalBensTapa(GenericTapasticComic):
5959
    """Class to retrieve Infinite Immortal Bens comics."""
5960
    # Also on http://www.webtoons.com/en/challenge/infinite-immortal-bens/list?title_no=32847
5961
    # Also on https://infiniteimmortalbens.tumblr.com
5962
    url = 'https://tapas.io/series/Infinite-Immortal-Bens'
5963
    name = 'infiniteimmortal-tapa'
5964
    long_name = 'Infinite Immortal Bens (from Tapastic)'
5965
    _categories = ('INFINITEIMMORTAL', )
5966
5967
5968
class EatMyPaintTapa(GenericTapasticComic):
5969
    """Class to retrieve Eat My Paint comics."""
5970
    # Also on https://eatmypaint.tumblr.com
5971
    name = 'eatmypaint-tapa'
5972
    long_name = 'Eat My Paint (from Tapastic)'
5973
    url = 'https://tapas.io/series/eatmypaint'
5974
    _categories = ('EATMYPAINT', )
5975
5976
5977
class RoryTapastic(GenericTapasticComic):
5978
    """Class to retrieve Rory comics."""
5979
    # Also on https://rorycomics.tumblr.com/
5980
    name = 'rory-tapa'
5981
    long_name = 'Rory (from Tapastic)'
5982
    url = 'https://tapas.io/series/Share-Your-Vulnerability'
5983
    _categories = ('RORY',)
5984
5985
5986
class MercworksTapa(GenericTapasticComic):
5987
    """Class to retrieve Mercworks comics."""
5988
    # Also on http://mercworks.net
5989
    # Also on http://www.webtoons.com/en/comedy/mercworks/list?title_no=426
5990
    # Also on http://mercworks.tumblr.com
5991
    name = 'mercworks-tapa'
5992
    long_name = 'Mercworks (from Tapastic)'
5993
    url = 'https://tapastic.com/series/MercWorks'
5994
    _categories = ('MERCWORKS', )
5995
5996
5997
class AbsurdoLapin(GenericNavigableComic):
5998
    """Class to retrieve Absurdo Lapin comics."""
5999
    name = 'absurdo'
6000
    long_name = 'Absurdo'
6001
    url = 'https://absurdo.lapin.org'
6002
    get_url_from_link = join_cls_url_to_href
6003
6004
    @classmethod
6005
    def get_nav(cls, soup):
6006
        """Get the navigation elements from soup object."""
6007
        cont = soup.find('div', id='content')
6008
        _, b2 = cont.find_all('div', class_='buttons')
6009
        # prev, first, last, next
6010
        return [li.find('a') for li in b2.find_all('li')]
6011
6012
    @classmethod
6013
    def get_first_comic_link(cls):
6014
        """Get link to first comics."""
6015
        return cls.get_nav(get_soup_at_url(cls.url))[1]
6016
6017
    @classmethod
6018
    def get_navi_link(cls, last_soup, next_):
6019
        """Get link to next or previous comic."""
6020
        return cls.get_nav(last_soup)[3 if next_ else 0]
6021
6022
    @classmethod
6023
    def get_comic_info(cls, soup, link):
6024
        """Get information about a particular comics."""
6025
        author = soup.find('meta', attrs={'name': 'author'})['content']
6026
        tags = soup.find('meta', attrs={'name': 'keywords'})['content']
6027
        title = soup.find('title').string
6028
        imgs = soup.find('div', id='content').find_all('img')
6029
        return {
6030
            'title': title,
6031
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
6032
            'tags': tags,
6033
            'author': author,
6034
        }
6035
6036
6037
def get_subclasses(klass):
6038
    """Gets the list of direct/indirect subclasses of a class"""
6039
    subclasses = klass.__subclasses__()
6040
    for derived in list(subclasses):
6041
        subclasses.extend(get_subclasses(derived))
6042
    return subclasses
6043
6044
6045
def remove_st_nd_rd_th_from_date(string):
6046
    """Function to transform 1st/2nd/3rd/4th in a parsable date format."""
6047
    # Hackish way to convert string with numeral "1st"/"2nd"/etc to date
6048
    return (string.replace('st', '')
6049
            .replace('nd', '')
6050
            .replace('rd', '')
6051
            .replace('th', '')
6052
            .replace('Augu', 'August'))
6053
6054
6055
def string_to_date(string, date_format, local=DEFAULT_LOCAL):
6056
    """Function to convert string to date object.
6057
    Wrapper around datetime.datetime.strptime."""
6058
    # format described in https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior
6059
    prev_locale = locale.setlocale(locale.LC_ALL)
6060
    if local != prev_locale:
6061
        locale.setlocale(locale.LC_ALL, local)
6062
    ret = datetime.datetime.strptime(string, date_format).date()
6063
    if local != prev_locale:
6064
        locale.setlocale(locale.LC_ALL, prev_locale)
6065
    return ret
6066
6067
6068
COMICS = set(get_subclasses(GenericComic))
6069
VALID_COMICS = [c for c in COMICS if c.name is not None]
6070
COMIC_NAMES = {c.name: c for c in VALID_COMICS}
6071
assert len(VALID_COMICS) == len(COMIC_NAMES)
6072
CLASS_NAMES = {c.__name__ for c in VALID_COMICS}
6073
assert len(VALID_COMICS) == len(CLASS_NAMES)
6074