Completed
Push — master ( 1e436a...8c5d47 )
by De
28s
created

comics.py (34 issues)

1
#! /usr/bin/python3
2
# vim: set expandtab tabstop=4 shiftwidth=4 :
3
"""Module to retrieve webcomics"""
4
5
from comic_abstract import GenericComic, get_date_for_comic
6
import re
7
from datetime import date, timedelta
8
import datetime
9
from urlfunctions import get_soup_at_url, urljoin_wrapper,\
10
    convert_iri_to_plain_ascii_uri, load_json_at_url, urlopen_wrapper
11
import json
12
import locale
13
import urllib
14
15
DEFAULT_LOCAL = 'en_GB.UTF-8'
16
17
18
class Xkcd(GenericComic):
19
    """Class to retrieve Xkcd comics."""
20
    name = 'xkcd'
21
    long_name = 'xkcd'
22
    url = 'http://xkcd.com'
23
24
    @classmethod
25
    def get_next_comic(cls, last_comic):
26
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
27
        json_url = urljoin_wrapper(cls.url, 'info.0.json')
28
        first_num = last_comic['num'] if last_comic else 0
29
        last_num = load_json_at_url(json_url)['num']
30
31
        for num in range(first_num + 1, last_num + 1):
32
            comic = cls.get_comic_info(num)
33
            if comic is not None:
34
                yield comic
35
36
    @classmethod
37
    def get_comic_info(cls, num):
38
        """Get information about a particular comics."""
39
        if num == 404:
40
            return None
41
        json_url = urljoin_wrapper(cls.url, '%d/info.0.json' % num)
42
        comic_json = load_json_at_url(json_url)
43
        assert comic_json['num'] == num, json_url
44
        return {
45
            'json_url': json_url,
46
            'num': num,
47
            'url': urljoin_wrapper(cls.url, str(num)),
48
            'prefix': '%d-' % num,
49
            'img': [comic_json['img']],
50
            'day': int(comic_json['day']),
51
            'month': int(comic_json['month']),
52
            'year': int(comic_json['year']),
53
            'link': comic_json['link'],
54
            'news': comic_json['news'],
55
            'safe_title': comic_json['safe_title'],
56
            'transcript': comic_json['transcript'],
57
            'alt': comic_json['alt'],
58
            'title': comic_json['title'],
59
        }
60
61
62
# Helper functions corresponding to get_url_from_link/get_url_from_archive_element
63
64
65
@classmethod
66
def get_href(cls, link):
67
    """Implementation of get_url_from_link/get_url_from_archive_element."""
68
    return link['href']
69
70
71
@classmethod
72
def join_cls_url_to_href(cls, link):
73
    """Implementation of get_url_from_link/get_url_from_archive_element."""
74
    return urljoin_wrapper(cls.url, link['href'])
75
76
77
class GenericNavigableComic(GenericComic):
78
    """Generic class for "navigable" comics : with first/next arrows.
79
80
    This class applies to comic where previous and next comics can be
81
    accessed from a given comic. Once given a starting point (either
82
    the first comic or the last comic retrieved), it will handle the
83
    navigation, the retrieval of the soup object and the setting of
84
    the 'url' attribute on retrieved comics. This limits a lot the
85
    amount of boilerplate code in the different implementation classes.
86
87
    The method `get_next_comic` methods is implemented in terms of new
88
    more specialized methods to be implemented/overridden:
89
        - get_first_comic_link
90
        - get_navi_link
91
        - get_comic_info
92
        - get_url_from_link
93
    """
94
    _categories = ('NAVIGABLE', )
95
96
    @classmethod
97
    def get_first_comic_link(cls):
98
        """Get link to first comics.
99
100
        Sometimes this can be retrieved of any comic page, sometimes on
101
        the archive page, sometimes it doesn't exist at all and one has
102
        to iterate backward to find it before hardcoding the result found.
103
        """
104
        raise NotImplementedError
105
106
    @classmethod
107
    def get_navi_link(cls, last_soup, next_):
108
        """Get link to next (or previous - for dev purposes) comic."""
109
        raise NotImplementedError
110
111
    @classmethod
112
    def get_comic_info(cls, soup, link):
113
        """Get information about a particular comics."""
114
        raise NotImplementedError
115
116
    @classmethod
117
    def get_url_from_link(cls, link):
118
        """Get url corresponding to a link. Default implementation is similar to get_href."""
119
        return link['href']
120
121
    @classmethod
122
    def get_next_link(cls, last_soup):
123
        """Get link to next comic."""
124
        link = cls.get_navi_link(last_soup, True)
125
        cls.log("Next link is %s" % link)
126
        return link
127
128
    @classmethod
129
    def get_prev_link(cls, last_soup):
130
        """Get link to previous comic."""
131
        link = cls.get_navi_link(last_soup, False)
132
        cls.log("Prev link is %s" % link)
133
        return link
134
135
    @classmethod
136
    def get_next_comic(cls, last_comic):
137
        """Generic implementation of get_next_comic for navigable comics."""
138
        url = last_comic['url'] if last_comic else None
139
        cls.log("starting 'get_next_comic' from %s" % url)
140
        next_comic = \
141
            cls.get_next_link(get_soup_at_url(url)) \
142
            if url else \
143
            cls.get_first_comic_link()
144
        cls.log("next/first comic will be %s (url is %s)" % (str(next_comic), url))
145
        # cls.check_navigation(url)
146
        while next_comic:
147
            prev_url, url = url, cls.get_url_from_link(next_comic)
148
            if prev_url == url:
149
                cls.log("got same url %s" % url)
150
                break
151
            cls.log("about to get %s (%s)" % (url, str(next_comic)))
152
            soup = get_soup_at_url(url)
153
            comic = cls.get_comic_info(soup, next_comic)
154
            if comic is not None:
155
                assert 'url' not in comic
156
                comic['url'] = url
157
                yield comic
158
            next_comic = cls.get_next_link(soup)
159
            cls.log("next comic will be %s" % str(next_comic))
160
161
    @classmethod
162
    def check_first_link(cls):
163
        """Check that navigation to first comic seems to be working - for dev purposes."""
164
        cls.log("about to check first link")
165
        ok = True
166
        firstlink = cls.get_first_comic_link()
167
        if firstlink is None:
168
            print("From %s : no first link" % cls.url)
169
            ok = False
170
        else:
171
            firsturl = cls.get_url_from_link(firstlink)
172
            try:
173
                get_soup_at_url(firsturl)
174
            except urllib.error.HTTPError:
175
                print("From %s : invalid first url" % cls.url)
176
                ok = False
177
        cls.log("checked first link -> returned %d" % ok)
178
        return ok
179
180
    @classmethod
181
    def check_prev_next_links(cls, url):
182
        """Check that navigation to prev/next from a given URL seems to be working - for dev purposes."""
183
        cls.log("about to check prev/next from %s" % url)
184
        ok = True
185
        if url is None:
186
            prevlink, nextlink = None, None
187
        else:
188
            soup = get_soup_at_url(url)
189
            prevlink, nextlink = cls.get_prev_link(soup), cls.get_next_link(soup)
190
        if prevlink is None and nextlink is None:
191
            print("From %s : no previous nor next" % url)
192
            ok = False
193
        else:
194
            if prevlink:
195
                prevurl = cls.get_url_from_link(prevlink)
196
                prevsoup = get_soup_at_url(prevurl)
197
                prevnextlink = cls.get_next_link(prevsoup)
198
                prevnext = cls.get_url_from_link(prevnextlink) if prevnextlink is not None else "NO URL"
199
                if prevnext != url:
200
                    print("From %s, going backward then forward leads to %s" % (url, prevnext))
201
                    ok = False
202
            if nextlink:
203
                nexturl = cls.get_url_from_link(nextlink)
204
                if nexturl != url:
205
                    nextsoup = get_soup_at_url(nexturl)
206
                    nextprevlink = cls.get_prev_link(nextsoup)
207
                    nextprev = cls.get_url_from_link(nextprevlink) if nextprevlink is not None else "NO URL"
208
                    if nextprev != url:
209
                        print("From %s, going forward then backward leads to %s" % (url, nextprev))
210
                        ok = False
211
        cls.log("checked prev/next from %s -> returned %d" % (url, ok))
212
        return ok
213
214
    @classmethod
215
    def check_navigation(cls, url):
216
        """Check that navigation functions seem to be working - for dev purposes."""
217
        cls.log("about to check navigation from %s" % url)
218
        first = cls.check_first_link()
219
        prevnext = cls.check_prev_next_links(url)
220
        ok = first and prevnext
221
        cls.log("checked navigation from %s -> returned %d" % (url, ok))
222
        return ok
223
224
225
class GenericListableComic(GenericComic):
226
    """Generic class for "listable" comics : with a list of comics (aka 'archive')
227
228
    The method `get_next_comic` methods is implemented in terms of new
229
    more specialized methods to be implemented/overridden:
230
        - get_archive_elements
231
        - get_url_from_archive_element
232
        - get_comic_info
233
    """
234
    _categories = ('LISTABLE', )
235
236
    @classmethod
237
    def get_archive_elements(cls):
238
        """Get the archive elements (iterable)."""
239
        raise NotImplementedError
240
241
    @classmethod
242
    def get_url_from_archive_element(cls, archive_elt):
243
        """Get url corresponding to an archive element."""
244
        raise NotImplementedError
245
246
    @classmethod
247
    def get_comic_info(cls, soup, archive_elt):
248
        """Get information about a particular comics."""
249
        raise NotImplementedError
250
251
    @classmethod
252
    def get_next_comic(cls, last_comic):
253
        """Generic implementation of get_next_comic for listable comics."""
254
        waiting_for_url = last_comic['url'] if last_comic else None
255
        archive_elts = list(cls.get_archive_elements())
256
        for archive_elt in archive_elts:
257
            url = cls.get_url_from_archive_element(archive_elt)
258
            cls.log("considering %s" % url)
259
            if waiting_for_url is None:
260
                cls.log("about to get %s (%s)" % (url, str(archive_elt)))
261
                soup = get_soup_at_url(url)
262
                comic = cls.get_comic_info(soup, archive_elt)
263
                if comic is not None:
264
                    assert 'url' not in comic
265
                    comic['url'] = url
266
                    yield comic
267
            elif waiting_for_url == url:
268
                waiting_for_url = None
269
        if waiting_for_url is not None:
270
            print("Did not find %s in the %d comics: there might be a problem" %
271
                  (waiting_for_url, len(archive_elts)))
272
273
# Helper functions corresponding to get_first_comic_link/get_navi_link
274
275
276
@classmethod
277
def get_link_rel_next(cls, last_soup, next_):
278
    """Implementation of get_navi_link."""
279
    return last_soup.find('link', rel='next' if next_ else 'prev')
280
281
282
@classmethod
283
def get_a_rel_next(cls, last_soup, next_):
284
    """Implementation of get_navi_link."""
285
    return last_soup.find('a', rel='next' if next_ else 'prev')
286
287
288
@classmethod
289
def get_a_navi_navinext(cls, last_soup, next_):
290
    """Implementation of get_navi_link."""
291
    # ComicPress (WordPress plugin)
292
    return last_soup.find('a', class_='navi navi-next' if next_ else 'navi navi-prev')
293
294
295
@classmethod
296
def get_a_navi_comicnavnext_navinext(cls, last_soup, next_):
297
    """Implementation of get_navi_link."""
298
    return last_soup.find('a', class_='navi comic-nav-next navi-next' if next_ else 'navi comic-nav-previous navi-prev')
299
300
301
@classmethod
302
def get_a_comicnavbase_comicnavnext(cls, last_soup, next_):
303
    """Implementation of get_navi_link."""
304
    return last_soup.find('a', class_='comic-nav-base comic-nav-next' if next_ else 'comic-nav-base comic-nav-previous')
305
306
307
@classmethod
308
def get_a_navi_navifirst(cls):
309
    """Implementation of get_first_comic_link."""
310
    # ComicPress (WordPress plugin)
311
    return get_soup_at_url(cls.url).find('a', class_='navi navi-first')
312
313
314
@classmethod
315
def get_div_navfirst_a(cls):
316
    """Implementation of get_first_comic_link."""
317
    return get_soup_at_url(cls.url).find('div', class_="nav-first").find('a')
318
319
320
@classmethod
321
def get_a_comicnavbase_comicnavfirst(cls):
322
    """Implementation of get_first_comic_link."""
323
    return get_soup_at_url(cls.url).find('a', class_='comic-nav-base comic-nav-first')
324
325
326
@classmethod
327
def simulate_first_link(cls):
328
    """Implementation of get_first_comic_link creating a link-like object from
329
    an URL provided by the class.
330
331
    Note: The first URL can easily be found using :
332
    `get_first_comic_link = navigate_to_first_comic`.
333
    """
334
    return {'href': cls.first_url}
335
336
337
@classmethod
338
def navigate_to_first_comic(cls):
339
    """Implementation of get_first_comic_link navigating from a user provided
340
    URL to the first comic.
341
342
    Sometimes, the first comic cannot be reached directly so to start
343
    from the first comic one has to go to the previous comic until
344
    there is no previous comics. Once this URL is reached, it
345
    is better to hardcode it but for development purposes, it
346
    is convenient to have an automatic way to find it.
347
348
    Then, the URL found can easily be used via `simulate_first_link`.
349
    """
350
    try:
351
        url = cls.first_url
352
    except AttributeError:
353
        url = input("Get starting URL: ")
354
    print(url)
355
    comic = cls.get_prev_link(get_soup_at_url(url))
356
    while comic:
357
        url = cls.get_url_from_link(comic)
358
        print(url)
359
        comic = cls.get_prev_link(get_soup_at_url(url))
360
    return {'href': url}
361
362
363
class GenericEmptyComic(GenericComic):
364
    """Generic class for comics where nothing is to be done.
365
366
    It can be useful to deactivate temporarily comics that do not work
367
    properly by replacing `def MyComic(GenericWhateverComic)` with
368
    `def MyComic(GenericEmptyComic, GenericWhateverComic)`."""
369
    _categories = ('EMPTY', )
370
371
    @classmethod
372
    def get_next_comic(cls, last_comic):
373
        """Implementation of get_next_comic returning no comics."""
374
        cls.log("comic is considered as empty - returning no comic")
375
        return []
376
377
378
class GenericComicNotWorking(GenericEmptyComic):
379
    """Subclass of GenericEmptyComic used when comic is not working.
380
381
    This is more explicit than GenericEmptyComic as it hilights that
382
    only the implementation is not working and it can be fixed."""
383
    _categories = ('NOTWORKING', )
384
385
386
class GenericUnavailableComic(GenericEmptyComic):
387
    """Subclass of GenericEmptyComic used when a comic is not available.
388
389
    This is more explicit than GenericEmptyComic as it hilights that
390
    the source of the comic is not available but we expect it to be back
391
    soonish. See also GenericDeletedComic."""
392
    _categories = ('UNAVAILABLE', )
393
394
395
class GenericDeletedComic(GenericEmptyComic):
396
    """Subclass of GenericEmptyComic used when a comic does not exist anymore.
397
398
    This is more explicit than GenericEmptyComic as it hilights that
399
    the source of the comic does not exist anymore and it probably cannot
400
    be fixed. Corresponding classes are kept as we can still use the
401
    downloaded data. See also GenericUnavailableComic."""
402
    _categories = ('DELETED', )
403
404
405 View Code Duplication
class ExtraFabulousComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
406
    """Class to retrieve Extra Fabulous Comics."""
407
    # Also on https://extrafabulouscomics.tumblr.com
408
    name = 'efc'
409
    long_name = 'Extra Fabulous Comics'
410
    url = 'http://extrafabulouscomics.com'
411
    _categories = ('EFC', )
412
    get_navi_link = get_link_rel_next
413
    get_first_comic_link = simulate_first_link
414
    first_url = 'http://extrafabulouscomics.com/comic/buttfly/'
415
416
    @classmethod
417
    def get_comic_info(cls, soup, link):
418
        """Get information about a particular comics."""
419
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
420
        imgs = soup.find_all('img', src=img_src_re)
421
        title = soup.find('meta', property='og:title')['content']
422
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
423
        day = string_to_date(date_str, "%Y-%m-%d")
424
        return {
425
            'title': title,
426
            'img': [i['src'] for i in imgs],
427
            'month': day.month,
428
            'year': day.year,
429
            'day': day.day,
430
            'prefix': title + '-'
431
        }
432
433
434 View Code Duplication
class GenericLeMondeBlog(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
435
    """Generic class to retrieve comics from Le Monde blogs."""
436
    _categories = ('LEMONDE', 'FRANCAIS')
437
    get_navi_link = get_link_rel_next
438
    get_first_comic_link = simulate_first_link
439
    first_url = NotImplemented
440
    date_format = "%d %B %Y"
441
442
    @classmethod
443
    def get_comic_info(cls, soup, link):
444
        """Get information about a particular comics."""
445
        url2 = soup.find('link', rel='shortlink')['href']
446
        title = soup.find('meta', property='og:title')['content']
447
        date_str = soup.find("span", class_="entry-date").string
448
        day = string_to_date(date_str, cls.date_format, "fr_FR.utf8")
449
        imgs = soup.find_all('meta', property='og:image')
450
        return {
451
            'title': title,
452
            'url2': url2,
453
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
454
            'month': day.month,
455
            'year': day.year,
456
            'day': day.day,
457
        }
458
459
460
class ZepWorld(GenericLeMondeBlog):
461
    """Class to retrieve Zep World comics."""
462
    name = "zep"
463
    long_name = "Zep World"
464
    url = "http://zepworld.blog.lemonde.fr"
465
    first_url = "http://zepworld.blog.lemonde.fr/2014/10/31/bientot-le-blog-de-zep/"
466
467
468
class Vidberg(GenericLeMondeBlog):
469
    """Class to retrieve Vidberg comics."""
470
    name = 'vidberg'
471
    long_name = "Vidberg - l'actu en patates"
472
    url = "http://vidberg.blog.lemonde.fr"
473
    # Not the first but I didn't find an efficient way to retrieve it
474
    first_url = "http://vidberg.blog.lemonde.fr/2012/02/09/revue-de-campagne-la-campagne-du-modem-semballe/"
475
476
477
class Plantu(GenericLeMondeBlog):
478
    """Class to retrieve Plantu comics."""
479
    name = 'plantu'
480
    long_name = "Plantu"
481
    url = "http://plantu.blog.lemonde.fr"
482
    first_url = "http://plantu.blog.lemonde.fr/2014/10/28/stress-test-a-bruxelles/"
483
484
485
class XavierGorce(GenericLeMondeBlog):
486
    """Class to retrieve Xavier Gorce comics."""
487
    name = 'gorce'
488
    long_name = "Xavier Gorce"
489
    url = "http://xaviergorce.blog.lemonde.fr"
490
    first_url = "http://xaviergorce.blog.lemonde.fr/2015/01/09/distinction/"
491
492
493
class CartooningForPeace(GenericLeMondeBlog):
494
    """Class to retrieve Cartooning For Peace comics."""
495
    name = 'forpeace'
496
    long_name = "Cartooning For Peace"
497
    url = "http://cartooningforpeace.blog.lemonde.fr"
498
    first_url = "http://cartooningforpeace.blog.lemonde.fr/2014/12/15/bado/"
499
500
501
class Aurel(GenericLeMondeBlog):
502
    """Class to retrieve Aurel comics."""
503
    name = 'aurel'
504
    long_name = "Aurel"
505
    url = "http://aurel.blog.lemonde.fr"
506
    first_url = "http://aurel.blog.lemonde.fr/2014/09/29/le-senat-repasse-a-droite/"
507
508
509
class LesCulottees(GenericLeMondeBlog):
510
    """Class to retrieve Les Culottees comics."""
511
    name = 'culottees'
512
    long_name = 'Les Culottees'
513
    url = "http://lesculottees.blog.lemonde.fr"
514
    first_url = "http://lesculottees.blog.lemonde.fr/2016/01/11/clementine-delait-femme-a-barbe/"
515
516
517
class UneAnneeAuLycee(GenericLeMondeBlog):
518
    """Class to retrieve Une Annee Au Lycee comics."""
519
    name = 'lycee'
520
    long_name = 'Une Annee au Lycee'
521
    url = 'http://uneanneeaulycee.blog.lemonde.fr'
522
    first_url = "http://uneanneeaulycee.blog.lemonde.fr/2016/06/13/la-semaine-du-bac-est-arrivee/"
523
524
525
class LisaMandel(GenericLeMondeBlog):
526
    """Class to retrieve Lisa Mandel comics."""
527
    name = 'mandel'
528
    long_name = 'Lisa Mandel (HP, hors-service)'
529
    url = 'http://lisamandel.blog.lemonde.fr'
530
    first_url = 'http://lisamandel.blog.lemonde.fr/2016/02/23/premiers-jours-a-calais/'
531
532
533
class Avventura(GenericLeMondeBlog):
534
    """Class to retrieve L'Avventura comics."""
535
    name = 'avventura'
536
    long_name = 'Avventura'
537
    url = 'http://lavventura.blog.lemonde.fr'
538
    first_url = 'http://lavventura.blog.lemonde.fr/2013/11/23/roma-paris-aller-simple/'
539
    date_format = "%d/%m/%Y"
540
541
542
class MorganNavarro(GenericLeMondeBlog):
543
    """Class to retrieve Morgan Navarro comics."""
544
    name = 'navarro'
545
    long_name = 'Morgan Navarro (Ma vie de reac)'
546
    url = 'http://morgannavarro.blog.lemonde.fr'
547
    first_url = 'http://morgannavarro.blog.lemonde.fr/2015/09/09/le-doute/'
548
549
550 View Code Duplication
class Rall(GenericComicNotWorking, GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
551
    """Class to retrieve Ted Rall comics."""
552
    # Also on http://www.gocomics.com/tedrall
553
    name = 'rall'
554
    long_name = "Ted Rall"
555
    url = "http://rall.com/comic"
556
    _categories = ('RALL', )
557
    get_navi_link = get_link_rel_next
558
    get_first_comic_link = simulate_first_link
559
    # Not the first but I didn't find an efficient way to retrieve it
560
    first_url = "http://rall.com/2014/01/30/los-angeles-times-cartoon-well-miss-those-california-flowers"
561
562
    @classmethod
563
    def get_comic_info(cls, soup, link):
564
        """Get information about a particular comics."""
565
        title = soup.find('meta', property='og:title')['content']
566
        author = soup.find("span", class_="author vcard").find("a").string
567
        date_str = soup.find("span", class_="entry-date").string
568
        day = string_to_date(date_str, "%B %d, %Y")
569
        desc = soup.find('meta', property='og:description')['content']
570
        imgs = soup.find('div', class_='entry-content').find_all('img')
571
        imgs = imgs[:-7]  # remove social media buttons
572
        return {
573
            'title': title,
574
            'author': author,
575
            'month': day.month,
576
            'year': day.year,
577
            'day': day.day,
578
            'description': desc,
579
            'img': [i['src'] for i in imgs],
580
        }
581
582
583
class Dilem(GenericNavigableComic):
584
    """Class to retrieve Ali Dilem comics."""
585
    name = 'dilem'
586
    long_name = 'Ali Dilem'
587
    url = 'http://information.tv5monde.com/dilem'
588
    _categories = ('FRANCAIS', )
589
    get_url_from_link = join_cls_url_to_href
590
    get_first_comic_link = simulate_first_link
591
    first_url = "http://information.tv5monde.com/dilem/2004-06-26"
592
593
    @classmethod
594
    def get_navi_link(cls, last_soup, next_):
595
        """Get link to next or previous comic."""
596
        # prev is next / next is prev
597
        li = last_soup.find('li', class_='prev' if next_ else 'next')
598
        return li.find('a') if li else None
599
600 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
601
    def get_comic_info(cls, soup, link):
602
        """Get information about a particular comics."""
603
        short_url = soup.find('link', rel='shortlink')['href']
604
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
605
        imgs = soup.find_all('meta', property='og:image')
606
        date_str = soup.find('span', property='dc:date')['content']
607
        date_str = date_str[:10]
608
        day = string_to_date(date_str, "%Y-%m-%d")
609
        return {
610
            'short_url': short_url,
611
            'title': title,
612
            'img': [i['content'] for i in imgs],
613
            'day': day.day,
614
            'month': day.month,
615
            'year': day.year,
616
        }
617
618
619
class SpaceAvalanche(GenericNavigableComic):
620
    """Class to retrieve Space Avalanche comics."""
621
    name = 'avalanche'
622
    long_name = 'Space Avalanche'
623
    url = 'http://www.spaceavalanche.com'
624
    get_navi_link = get_link_rel_next
625
626
    @classmethod
627
    def get_first_comic_link(cls):
628
        """Get link to first comics."""
629
        return {'href': "http://www.spaceavalanche.com/2009/02/02/irish-sea/", 'title': "Irish Sea"}
630
631
    @classmethod
632
    def get_comic_info(cls, soup, link):
633
        """Get information about a particular comics."""
634
        url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
635
        title = link['title']
636
        url = cls.get_url_from_link(link)
637
        year, month, day = [int(s)
638
                            for s in url_date_re.match(url).groups()]
639
        imgs = soup.find("div", class_="entry").find_all("img")
640
        return {
641
            'title': title,
642
            'day': day,
643
            'month': month,
644
            'year': year,
645
            'img': [i['src'] for i in imgs],
646
        }
647
648
649
class ZenPencils(GenericNavigableComic):
650
    """Class to retrieve ZenPencils comics."""
651
    # Also on http://zenpencils.tumblr.com
652
    # Also on http://www.gocomics.com/zen-pencils
653
    name = 'zenpencils'
654
    long_name = 'Zen Pencils'
655
    url = 'http://zenpencils.com'
656
    _categories = ('ZENPENCILS', )
657
    get_navi_link = get_link_rel_next
658
    get_first_comic_link = simulate_first_link
659
    first_url = "http://zenpencils.com/comic/1-ralph-waldo-emerson-make-them-cry/"
660
661
    @classmethod
662
    def get_comic_info(cls, soup, link):
663
        """Get information about a particular comics."""
664
        imgs = soup.find('div', id='comic').find_all('img')
665
        # imgs2 = soup.find_all('meta', property='og:image')
666
        post = soup.find('div', class_='post-content')
667
        author = post.find("span", class_="post-author").find("a").string
668
        title = soup.find('h2', class_='post-title').string
669
        date_str = post.find('span', class_='post-date').string
670
        day = string_to_date(date_str, "%B %d, %Y")
671
        assert imgs
672
        assert all(i['alt'] == i['title'] for i in imgs)
673
        assert all(i['alt'] in (title, "") for i in imgs)
674
        return {
675
            'title': title,
676
            'author': author,
677
            'day': day.day,
678
            'month': day.month,
679
            'year': day.year,
680
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
681
        }
682
683
684
class ItsTheTie(GenericDeletedComic, GenericNavigableComic):
685
    """Class to retrieve It's the tie comics."""
686
    # Also on http://itsthetie.tumblr.com
687
    # Also on https://tapastic.com/series/itsthetie
688
    name = 'tie'
689
    long_name = "It's the tie"
690
    url = "http://itsthetie.com"
691
    _categories = ('TIE', )
692
    get_first_comic_link = get_div_navfirst_a
693
    get_navi_link = get_a_rel_next
694
695
    @classmethod
696
    def get_comic_info(cls, soup, link):
697
        """Get information about a particular comics."""
698
        title = soup.find('h1', class_='comic-title').find('a').string
699
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
700
        day = string_to_date(date_str, "%B %d, %Y")
701
        # Bonus images may or may not be in meta og:image.
702
        imgs = soup.find_all('meta', property='og:image')
703
        imgs_src = [i['content'] for i in imgs]
704
        bonus = soup.find_all('img', attrs={'data-oversrc': True})
705
        bonus_src = [b['data-oversrc'] for b in bonus]
706
        all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src]
707
        all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")]
708
        tag_meta = soup.find('meta', property='article:tag')
709
        tags = tag_meta['content'] if tag_meta else ""
710
        return {
711
            'title': title,
712
            'month': day.month,
713
            'year': day.year,
714
            'day': day.day,
715
            'img': all_imgs_src,
716
            'tags': tags,
717
        }
718
719
720
class PenelopeBagieu(GenericNavigableComic):
721
    """Class to retrieve comics from Penelope Bagieu's blog."""
722
    name = 'bagieu'
723
    long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
724
    url = 'http://www.penelope-jolicoeur.com'
725
    _categories = ('FRANCAIS', )
726
    get_navi_link = get_link_rel_next
727
    get_first_comic_link = simulate_first_link
728
    first_url = 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'
729
730
    @classmethod
731
    def get_comic_info(cls, soup, link):
732
        """Get information about a particular comics."""
733
        date_str = soup.find('h2', class_='date-header').string
734
        day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
735
        imgs = soup.find('div', class_='entry-body').find_all('img')
736
        title = soup.find('h3', class_='entry-header').string
737
        return {
738
            'title': title,
739
            'img': [i['src'] for i in imgs],
740
            'month': day.month,
741
            'year': day.year,
742
            'day': day.day,
743
        }
744
745
746
class OneOneOneOneComic(GenericComicNotWorking, GenericNavigableComic):
747
    """Class to retrieve 1111 Comics."""
748
    # Also on http://comics1111.tumblr.com
749
    # Also on https://tapastic.com/series/1111-Comics
750
    name = '1111'
751
    long_name = '1111 Comics'
752
    url = 'http://www.1111comics.me'
753
    _categories = ('ONEONEONEONE', )
754
    get_first_comic_link = get_div_navfirst_a
755
    get_navi_link = get_link_rel_next
756
757
    @classmethod
758
    def get_comic_info(cls, soup, link):
759
        """Get information about a particular comics."""
760
        title = soup.find('h1', class_='comic-title').find('a').string
761
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
762
        day = string_to_date(date_str, "%B %d, %Y")
763
        imgs = soup.find_all('meta', property='og:image')
764
        return {
765
            'title': title,
766
            'month': day.month,
767
            'year': day.year,
768
            'day': day.day,
769
            'img': [i['content'] for i in imgs],
770
        }
771
772
773
class AngryAtNothing(GenericDeletedComic, GenericNavigableComic):
774
    """Class to retrieve Angry at Nothing comics."""
775
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
776
    # Also on http://angryatnothing.tumblr.com
777
    name = 'angry'
778
    long_name = 'Angry At Nothing'
779
    url = 'http://www.angryatnothing.net'
780
    get_first_comic_link = get_div_navfirst_a
781
    get_navi_link = get_a_rel_next
782
783
    @classmethod
784
    def get_comic_info(cls, soup, link):
785
        """Get information about a particular comics."""
786
        title = soup.find('h1', class_='comic-title').find('a').string
787
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
788
        day = string_to_date(date_str, "%B %d, %Y")
789
        imgs = soup.find_all('meta', property='og:image')
790
        return {
791
            'title': title,
792
            'month': day.month,
793
            'year': day.year,
794
            'day': day.day,
795
            'img': [i['content'] for i in imgs],
796
        }
797
798
799
class NeDroid(GenericNavigableComic):
800
    """Class to retrieve NeDroid comics."""
801
    name = 'nedroid'
802
    long_name = 'NeDroid'
803
    url = 'http://nedroid.com'
804
    get_first_comic_link = get_div_navfirst_a
805
    get_navi_link = get_link_rel_next
806
    get_url_from_link = join_cls_url_to_href
807
808 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
809
    def get_comic_info(cls, soup, link):
810
        """Get information about a particular comics."""
811
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
812
        short_url = cls.get_url_from_link(soup.find('link', rel='shortlink'))
813
        num = int(short_url_re.match(short_url).groups()[0])
814
        imgs = soup.find('div', id='comic').find_all('img')
815
        assert len(imgs) == 1, imgs
816
        title = imgs[0]['alt']
817
        title2 = imgs[0]['title']
818
        return {
819
            'short_url': short_url,
820
            'title': title,
821
            'title2': title2,
822
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
823
            'num': num,
824
        }
825
826
827
class Garfield(GenericNavigableComic):
828
    """Class to retrieve Garfield comics."""
829
    # Also on http://www.gocomics.com/garfield
830
    name = 'garfield'
831
    long_name = 'Garfield'
832
    url = 'https://garfield.com'
833
    _categories = ('GARFIELD', )
834
    get_first_comic_link = simulate_first_link
835
    first_url = 'https://garfield.com/comic/1978/06/19'
836
837
    @classmethod
838
    def get_navi_link(cls, last_soup, next_):
839
        """Get link to next or previous comic."""
840
        return last_soup.find('a', class_='comic-arrow-right' if next_ else 'comic-arrow-left')
841
842
    @classmethod
843
    def get_comic_info(cls, soup, link):
844
        """Get information about a particular comics."""
845
        url = cls.get_url_from_link(link)
846
        date_re = re.compile('^%s/comic/([0-9]*)/([0-9]*)/([0-9]*)' % cls.url)
847
        year, month, day = [int(s) for s in date_re.match(url).groups()]
848
        imgs = soup.find('div', class_='comic-display').find_all('img', class_='img-responsive')
849
        return {
850
            'month': month,
851
            'year': year,
852
            'day': day,
853
            'img': [i['src'] for i in imgs],
854
        }
855
856
857 View Code Duplication
class Dilbert(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
858
    """Class to retrieve Dilbert comics."""
859
    # Also on http://www.gocomics.com/dilbert-classics
860
    name = 'dilbert'
861
    long_name = 'Dilbert'
862
    url = 'http://dilbert.com'
863
    get_url_from_link = join_cls_url_to_href
864
    get_first_comic_link = simulate_first_link
865
    first_url = 'http://dilbert.com/strip/1989-04-16'
866
867
    @classmethod
868
    def get_navi_link(cls, last_soup, next_):
869
        """Get link to next or previous comic."""
870
        link = last_soup.find('div', class_='nav-comic nav-right' if next_ else 'nav-comic nav-left')
871
        return link.find('a') if link else None
872
873
    @classmethod
874
    def get_comic_info(cls, soup, link):
875
        """Get information about a particular comics."""
876
        title = soup.find('meta', property='og:title')['content']
877
        imgs = soup.find_all('meta', property='og:image')
878
        desc = soup.find('meta', property='og:description')['content']
879
        date_str = soup.find('meta', property='article:publish_date')['content']
880
        day = string_to_date(date_str, "%B %d, %Y")
881
        author = soup.find('meta', property='article:author')['content']
882
        tags = soup.find('meta', property='article:tag')['content']
883
        return {
884
            'title': title,
885
            'description': desc,
886
            'img': [i['content'] for i in imgs],
887
            'author': author,
888
            'tags': tags,
889
            'day': day.day,
890
            'month': day.month,
891
            'year': day.year
892
        }
893
894
895
class VictimsOfCircumsolar(GenericDeletedComic, GenericNavigableComic):
896
    """Class to retrieve VictimsOfCircumsolar comics."""
897
    # Also on https://victimsofcomics.tumblr.com
898
    name = 'circumsolar'
899
    long_name = 'Victims Of Circumsolar'
900
    url = 'http://www.victimsofcircumsolar.com'
901
    get_navi_link = get_a_navi_comicnavnext_navinext
902
    get_first_comic_link = simulate_first_link
903
    first_url = 'http://www.victimsofcircumsolar.com/comic/modern-addiction'
904
905
    @classmethod
906
    def get_comic_info(cls, soup, link):
907
        """Get information about a particular comics."""
908
        # Date is on the archive page
909
        title = soup.find_all('meta', property='og:title')[-1]['content']
910
        desc = soup.find_all('meta', property='og:description')[-1]['content']
911
        imgs = soup.find('div', id='comic').find_all('img')
912
        assert all(i['title'] == i['alt'] == title for i in imgs)
913
        return {
914
            'title': title,
915
            'description': desc,
916
            'img': [i['src'] for i in imgs],
917
        }
918
919
920
class ThreeWordPhrase(GenericNavigableComic):
921
    """Class to retrieve Three Word Phrase comics."""
922
    # Also on http://www.threewordphrase.tumblr.com
923
    name = 'threeword'
924
    long_name = 'Three Word Phrase'
925
    url = 'http://threewordphrase.com'
926
    get_url_from_link = join_cls_url_to_href
927
928
    @classmethod
929
    def get_first_comic_link(cls):
930
        """Get link to first comics."""
931
        return get_soup_at_url(cls.url).find('img', src='/firstlink.gif').parent
932
933
    @classmethod
934
    def get_navi_link(cls, last_soup, next_):
935
        """Get link to next or previous comic."""
936
        link = last_soup.find('img', src='/nextlink.gif' if next_ else '/prevlink.gif').parent
937
        return None if link.get('href') is None else link
938
939
    @classmethod
940
    def get_comic_info(cls, soup, link):
941
        """Get information about a particular comics."""
942
        title = soup.find('title')
943
        imgs = [img for img in soup.find_all('img')
944
                if not img['src'].endswith(
945
                    ('link.gif', '32.png', 'twpbookad.jpg',
946
                     'merchad.jpg', 'header.gif', 'tipjar.jpg'))]
947
        return {
948
            'title': title.string if title else None,
949
            'title2': '  '.join(img.get('alt') for img in imgs if img.get('alt')),
950
            'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs],
951
        }
952
953
954
class DeadlyPanel(GenericComicNotWorking, GenericNavigableComic):  # Not working on my machine
955
    """Class to retrieve Deadly Panel comics."""
956
    # Also on https://tapastic.com/series/deadlypanel
957
    # Also on https://deadlypanel.tumblr.com
958
    name = 'deadly'
959
    long_name = 'Deadly Panel'
960
    url = 'http://www.deadlypanel.com'
961
    get_first_comic_link = get_a_navi_navifirst
962
    get_navi_link = get_a_navi_comicnavnext_navinext
963
964
    @classmethod
965
    def get_comic_info(cls, soup, link):
966
        """Get information about a particular comics."""
967
        imgs = soup.find('div', id='comic').find_all('img')
968
        assert all(i['alt'] == i['title'] for i in imgs)
969
        return {
970
            'img': [i['src'] for i in imgs],
971
        }
972
973
974 View Code Duplication
class TheGentlemanArmchair(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
975
    """Class to retrieve The Gentleman Armchair comics."""
976
    name = 'gentlemanarmchair'
977
    long_name = 'The Gentleman Armchair'
978
    url = 'http://thegentlemansarmchair.com'
979
    get_first_comic_link = get_a_navi_navifirst
980
    get_navi_link = get_link_rel_next
981
982
    @classmethod
983
    def get_comic_info(cls, soup, link):
984
        """Get information about a particular comics."""
985
        title = soup.find('h2', class_='post-title').string
986
        author = soup.find("span", class_="post-author").find("a").string
987
        date_str = soup.find('span', class_='post-date').string
988
        day = string_to_date(date_str, "%B %d, %Y")
989
        imgs = soup.find('div', id='comic').find_all('img')
990
        return {
991
            'img': [i['src'] for i in imgs],
992
            'title': title,
993
            'author': author,
994
            'month': day.month,
995
            'year': day.year,
996
            'day': day.day,
997
        }
998
999
1000 View Code Duplication
class ImogenQuest(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1001
    """Class to retrieve Imogen Quest comics."""
1002
    # Also on http://imoquest.tumblr.com
1003
    name = 'imogen'
1004
    long_name = 'Imogen Quest'
1005
    url = 'http://imogenquest.net'
1006
    get_first_comic_link = get_div_navfirst_a
1007
    get_navi_link = get_a_rel_next
1008
1009
    @classmethod
1010
    def get_comic_info(cls, soup, link):
1011
        """Get information about a particular comics."""
1012
        title = soup.find('h2', class_='post-title').string
1013
        author = soup.find("span", class_="post-author").find("a").string
1014
        date_str = soup.find('span', class_='post-date').string
1015
        day = string_to_date(date_str, '%B %d, %Y')
1016
        imgs = soup.find('div', class_='comicpane').find_all('img')
1017
        assert all(i['alt'] == i['title'] for i in imgs)
1018
        title2 = imgs[0]['title']
1019
        return {
1020
            'day': day.day,
1021
            'month': day.month,
1022
            'year': day.year,
1023
            'img': [i['src'] for i in imgs],
1024
            'title': title,
1025
            'title2': title2,
1026
            'author': author,
1027
        }
1028
1029
1030
class MyExtraLife(GenericNavigableComic):
1031
    """Class to retrieve My Extra Life comics."""
1032
    name = 'extralife'
1033
    long_name = 'My Extra Life'
1034
    url = 'http://www.myextralife.com'
1035
    get_navi_link = get_link_rel_next
1036
1037
    @classmethod
1038
    def get_first_comic_link(cls):
1039
        """Get link to first comics."""
1040
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
1041
1042 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1043
    def get_comic_info(cls, soup, link):
1044
        """Get information about a particular comics."""
1045
        title = soup.find("h1", class_="comic_title").string
1046
        date_str = soup.find("span", class_="comic_date").string
1047
        day = string_to_date(date_str, "%B %d, %Y")
1048
        imgs = soup.find_all("img", class_="comic")
1049
        assert all(i['alt'] == i['title'] == title for i in imgs)
1050
        return {
1051
            'title': title,
1052
            'img': [i['src'] for i in imgs if i["src"]],
1053
            'day': day.day,
1054
            'month': day.month,
1055
            'year': day.year
1056
        }
1057
1058
1059
class SaturdayMorningBreakfastCereal(GenericNavigableComic):
1060
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
1061
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
1062
    # Also on http://smbc-comics.tumblr.com
1063
    name = 'smbc'
1064
    long_name = 'Saturday Morning Breakfast Cereal'
1065
    url = 'http://www.smbc-comics.com'
1066
    _categories = ('SMBC', )
1067
    get_navi_link = get_a_rel_next
1068
1069
    @classmethod
1070
    def get_first_comic_link(cls):
1071
        """Get link to first comics."""
1072
        return get_soup_at_url(cls.url).find('a', rel='start')
1073
1074
    @classmethod
1075
    def get_comic_info(cls, soup, link):
1076
        """Get information about a particular comics."""
1077
        image1 = soup.find('img', id='cc-comic')
1078
        image_url1 = image1['src']
1079
        aftercomic = soup.find('div', id='aftercomic')
1080
        image_url2 = aftercomic.find('img')['src'] if aftercomic else ''
1081
        imgs = [image_url1] + ([image_url2] if image_url2 else [])
1082
        date_str = soup.find('div', class_='cc-publishtime').contents[0]
1083
        day = string_to_date(date_str, "%B %d, %Y")
1084
        return {
1085
            'title': image1['title'],
1086
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i)) for i in imgs],
1087
            'day': day.day,
1088
            'month': day.month,
1089
            'year': day.year
1090
        }
1091
1092
1093 View Code Duplication
class PerryBibleFellowship(GenericListableComic):  # Is now navigable too
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1094
    """Class to retrieve Perry Bible Fellowship comics."""
1095
    name = 'pbf'
1096
    long_name = 'Perry Bible Fellowship'
1097
    url = 'http://pbfcomics.com'
1098
    get_url_from_archive_element = join_cls_url_to_href
1099
1100
    @classmethod
1101
    def get_archive_elements(cls):
1102
        soup = get_soup_at_url(cls.url)
1103
        thumbnails = soup.find('div', id='all_thumbnails')
1104
        return reversed(thumbnails.find_all('a'))
1105
1106
    @classmethod
1107
    def get_comic_info(cls, soup, link):
1108
        """Get information about a particular comics."""
1109
        name = soup.find('meta', property='og:title')['content']
1110
        imgs = soup.find_all('meta', property='og:image')
1111
        assert len(imgs) == 1, imgs
1112
        return {
1113
            'name': name,
1114
            'img': [i['content'] for i in imgs],
1115
        }
1116
1117
1118
class Mercworks(GenericDeletedComic):  # Moved to Webtoons
1119
    """Class to retrieve Mercworks comics."""
1120
    # Also on http://mercworks.tumblr.com
1121
    # Also on http://www.webtoons.com/en/comedy/mercworks/list?title_no=426
1122
    # Also on https://tapastic.com/series/MercWorks
1123
    name = 'mercworks'
1124
    long_name = 'Mercworks'
1125
    url = 'http://mercworks.net'
1126
    _categories = ('MERCWORKS', )
1127
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1128
    get_navi_link = get_link_rel_next
1129
1130
    @classmethod
1131
    def get_comic_info(cls, soup, link):
1132
        """Get information about a particular comics."""
1133
        title = soup.find('meta', property='og:title')['content']
1134
        metadesc = soup.find('meta', property='og:description')
1135
        desc = metadesc['content'] if metadesc else ""
1136
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
1137
        day = string_to_date(date_str, "%Y-%m-%d")
1138
        imgs = soup.find_all('meta', property='og:image')
1139
        return {
1140
            'img': [i['content'] for i in imgs],
1141
            'title': title,
1142
            'desc': desc,
1143
            'day': day.day,
1144
            'month': day.month,
1145
            'year': day.year
1146
        }
1147
1148
1149
class BerkeleyMews(GenericListableComic):
1150
    """Class to retrieve Berkeley Mews comics."""
1151
    # Also on http://mews.tumblr.com
1152
    # Also on http://www.gocomics.com/berkeley-mews
1153
    name = 'berkeley'
1154
    long_name = 'Berkeley Mews'
1155
    url = 'http://www.berkeleymews.com'
1156
    _categories = ('BERKELEY', )
1157
    get_url_from_archive_element = get_href
1158
    comic_num_re = re.compile('%s/\\?p=([0-9]*)$' % url)
1159
1160
    @classmethod
1161
    def get_archive_elements(cls):
1162
        archive_url = urljoin_wrapper(cls.url, "?page_id=2")
1163
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_num_re))
1164
1165
    @classmethod
1166
    def get_comic_info(cls, soup, link):
1167
        """Get information about a particular comics."""
1168
        comic_date_re = re.compile('.*/([0-9]*)-([0-9]*)-([0-9]*)-.*')
1169
        url = cls.get_url_from_archive_element(link)
1170
        num = int(cls.comic_num_re.match(url).groups()[0])
1171
        img = soup.find('div', id='comic').find('img')
1172
        assert all(i['alt'] == i['title'] for i in [img])
1173
        title2 = img['title']
1174
        img_url = img['src']
1175
        year, month, day = [int(s) for s in comic_date_re.match(img_url).groups()]
1176
        return {
1177
            'num': num,
1178
            'title': link.string,
1179
            'title2': title2,
1180
            'img': [img_url],
1181
            'year': year,
1182
            'month': month,
1183
            'day': day,
1184
        }
1185
1186
1187
class GenericBouletCorp(GenericNavigableComic):
1188
    """Generic class to retrieve BouletCorp comics in different languages."""
1189
    # Also on https://bouletcorp.tumblr.com
1190
    _categories = ('BOULET', )
1191
    get_navi_link = get_link_rel_next
1192
1193
    @classmethod
1194
    def get_first_comic_link(cls):
1195
        """Get link to first comics."""
1196
        return get_soup_at_url(cls.url).find('div', id='centered_nav').find_all('a')[0]
1197
1198
    @classmethod
1199
    def get_comic_info(cls, soup, link):
1200
        """Get information about a particular comics."""
1201
        url = cls.get_url_from_link(link)
1202
        date_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/' % cls.url)
1203
        year, month, day = [int(s) for s in date_re.match(url).groups()]
1204
        imgs = soup.find('div', id='notes').find('div', class_='storycontent').find_all('img')
1205
        texts = '  '.join(t for t in (i.get('title') for i in imgs) if t)
1206
        title = soup.find('title').string
1207
        return {
1208
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs if i.get('src') is not None],
1209
            'title': title,
1210
            'texts': texts,
1211
            'year': year,
1212
            'month': month,
1213
            'day': day,
1214
        }
1215
1216
1217
class BouletCorp(GenericBouletCorp):
1218
    """Class to retrieve BouletCorp comics."""
1219
    name = 'boulet'
1220
    long_name = 'Boulet Corp'
1221
    url = 'http://www.bouletcorp.com'
1222
    _categories = ('FRANCAIS', )
1223
1224
1225
class BouletCorpEn(GenericBouletCorp):
1226
    """Class to retrieve EnglishBouletCorp comics."""
1227
    name = 'boulet_en'
1228
    long_name = 'Boulet Corp English'
1229
    url = 'http://english.bouletcorp.com'
1230
1231
1232 View Code Duplication
class AmazingSuperPowers(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1233
    """Class to retrieve Amazing Super Powers comics."""
1234
    name = 'asp'
1235
    long_name = 'Amazing Super Powers'
1236
    url = 'http://www.amazingsuperpowers.com'
1237
    get_first_comic_link = get_a_navi_navifirst
1238
    get_navi_link = get_a_navi_navinext
1239
1240
    @classmethod
1241
    def get_comic_info(cls, soup, link):
1242
        """Get information about a particular comics."""
1243
        author = soup.find("span", class_="post-author").find("a").string
1244
        date_str = soup.find('span', class_='post-date').string
1245
        day = string_to_date(date_str, "%B %d, %Y")
1246
        imgs = soup.find('div', id='comic').find_all('img')
1247
        title = ' '.join(i['title'] for i in imgs)
1248
        assert all(i['alt'] == i['title'] for i in imgs)
1249
        return {
1250
            'title': title,
1251
            'author': author,
1252
            'img': [img['src'] for img in imgs],
1253
            'day': day.day,
1254
            'month': day.month,
1255
            'year': day.year
1256
        }
1257
1258
1259 View Code Duplication
class ToonHole(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1260
    """Class to retrieve Toon Holes comics."""
1261
    # Also on http://tapastic.com/series/TOONHOLE
1262
    name = 'toonhole'
1263
    long_name = 'Toon Hole'
1264
    url = 'http://www.toonhole.com'
1265
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1266
    get_navi_link = get_a_comicnavbase_comicnavnext
1267
1268
    @classmethod
1269
    def get_comic_info(cls, soup, link):
1270
        """Get information about a particular comics."""
1271
        date_str = soup.find('div', class_='entry-meta').contents[0].strip()
1272
        day = string_to_date(date_str, "%B %d, %Y")
1273
        imgs = soup.find('div', id='comic').find_all('img')
1274
        if imgs:
1275
            img = imgs[0]
1276
            title = img['alt']
1277
            assert img['title'] == title
1278
        else:
1279
            title = ""
1280
        return {
1281
            'title': title,
1282
            'month': day.month,
1283
            'year': day.year,
1284
            'day': day.day,
1285
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs],
1286
        }
1287
1288
1289
class Channelate(GenericNavigableComic):
1290
    """Class to retrieve Channelate comics."""
1291
    name = 'channelate'
1292
    long_name = 'Channelate'
1293
    url = 'http://www.channelate.com'
1294
    get_first_comic_link = get_div_navfirst_a
1295
    get_navi_link = get_link_rel_next
1296
    get_url_from_link = join_cls_url_to_href
1297
1298
    @classmethod
1299
    def get_comic_info(cls, soup, link):
1300
        """Get information about a particular comics."""
1301
        author = soup.find("span", class_="post-author").find("a").string
1302
        date_str = soup.find('span', class_='post-date').string
1303
        day = string_to_date(date_str, '%Y/%m/%d')
1304
        title = soup.find('meta', property='og:title')['content']
1305
        post = soup.find('div', id='comic')
1306
        imgs = post.find_all('img') if post else []
1307
        extra_url = None
1308
        extra_div = soup.find('div', id='extrapanelbutton')
1309
        if extra_div:
1310
            extra_url = extra_div.find('a')['href']
1311
            extra_soup = get_soup_at_url(extra_url)
1312
            extra_imgs = extra_soup.find_all('img', class_='extrapanelimage')
1313
            imgs.extend(extra_imgs)
1314
        return {
1315
            'url_extra': extra_url,
1316
            'title': title,
1317
            'author': author,
1318
            'month': day.month,
1319
            'year': day.year,
1320
            'day': day.day,
1321
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1322
        }
1323
1324
1325
class CyanideAndHappiness(GenericNavigableComic):
1326
    """Class to retrieve Cyanide And Happiness comics."""
1327
    name = 'cyanide'
1328
    long_name = 'Cyanide and Happiness'
1329
    url = 'http://explosm.net'
1330
    _categories = ('NSFW', )
1331
    get_url_from_link = join_cls_url_to_href
1332
1333
    @classmethod
1334
    def get_first_comic_link(cls):
1335
        """Get link to first comics."""
1336
        return get_soup_at_url(cls.url).find('a', title='Oldest comic')
1337
1338
    @classmethod
1339
    def get_navi_link(cls, last_soup, next_):
1340
        """Get link to next or previous comic."""
1341
        link = last_soup.find('a', class_='nav-next' if next_ else 'nav-previous')
1342
        return None if link.get('href') is None else link
1343
1344
    @classmethod
1345
    def get_comic_info(cls, soup, link):
1346
        """Get information about a particular comics."""
1347
        url2 = soup.find('meta', property='og:url')['content']
1348
        num = int(url2.split('/')[-2])
1349
        date_str, _, author = soup.find('div', id='comic-author').text.strip().partition('\nby ')
1350
        day = string_to_date(date_str, '%Y.%m.%d')
1351
        imgs = soup.find_all('img', id='main-comic')
1352
        return {
1353
            'num': num,
1354
            'author': author,
1355
            'month': day.month,
1356
            'year': day.year,
1357
            'day': day.day,
1358
            'prefix': '%d-' % num,
1359
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i['src'])) for i in imgs]
1360
        }
1361
1362
1363
class MrLovenstein(GenericComic):
1364
    """Class to retrieve Mr Lovenstein comics."""
1365
    # Also on https://tapastic.com/series/MrLovenstein
1366
    name = 'mrlovenstein'
1367
    long_name = 'Mr. Lovenstein'
1368
    url = 'http://www.mrlovenstein.com'
1369
1370
    @classmethod
1371
    def get_next_comic(cls, last_comic):
1372
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1373
        # TODO: more info from http://www.mrlovenstein.com/archive
1374
        comic_num_re = re.compile('^/comic/([0-9]*)$')
1375
        nums = [int(comic_num_re.match(link['href']).groups()[0])
1376
                for link in get_soup_at_url(cls.url).find_all('a', href=comic_num_re)]
1377
        first, last = min(nums), max(nums)
1378
        if last_comic:
1379
            first = last_comic['num'] + 1
1380
        for num in range(first, last + 1):
1381
            url = urljoin_wrapper(cls.url, '/comic/%d' % num)
1382
            soup = get_soup_at_url(url)
1383
            imgs = list(
1384
                reversed(soup.find_all('img', src=re.compile('^/images/comics/'))))
1385
            description = soup.find('meta', attrs={'name': 'description'})['content']
1386
            yield {
1387
                'url': url,
1388
                'num': num,
1389
                'texts': '  '.join(t for t in (i.get('title') for i in imgs) if t),
1390
                'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1391
                'description': description,
1392
            }
1393
1394
1395
class DinosaurComics(GenericListableComic):
1396
    """Class to retrieve Dinosaur Comics comics."""
1397
    name = 'dinosaur'
1398
    long_name = 'Dinosaur Comics'
1399
    url = 'http://www.qwantz.com'
1400
    get_url_from_archive_element = get_href
1401
    comic_link_re = re.compile('^%s/index.php\\?comic=([0-9]*)$' % url)
1402
1403
    @classmethod
1404
    def get_archive_elements(cls):
1405
        archive_url = urljoin_wrapper(cls.url, 'archive.php')
1406
        # first link is random -> skip it
1407
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re)[1:])
1408
1409
    @classmethod
1410
    def get_comic_info(cls, soup, link):
1411
        """Get information about a particular comics."""
1412
        url = cls.get_url_from_archive_element(link)
1413
        num = int(cls.comic_link_re.match(url).groups()[0])
1414
        date_str = link.string
1415
        text = link.next_sibling.string
1416
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1417
        comic_img_re = re.compile('^%s/comics/' % cls.url)
1418
        img = soup.find('img', src=comic_img_re)
1419
        return {
1420
            'month': day.month,
1421
            'year': day.year,
1422
            'day': day.day,
1423
            'img': [img.get('src')],
1424
            'title': img.get('title'),
1425
            'text': text,
1426
            'num': num,
1427
        }
1428
1429
1430 View Code Duplication
class ButterSafe(GenericListableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1431
    """Class to retrieve Butter Safe comics."""
1432
    name = 'butter'
1433
    long_name = 'ButterSafe'
1434
    url = 'http://buttersafe.com'
1435
    get_url_from_archive_element = get_href
1436
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)
1437
1438
    @classmethod
1439
    def get_archive_elements(cls):
1440
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1441
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
1442
1443
    @classmethod
1444
    def get_comic_info(cls, soup, link):
1445
        """Get information about a particular comics."""
1446
        url = cls.get_url_from_archive_element(link)
1447
        title = link.string
1448
        year, month, day = [int(s) for s in cls.comic_link_re.match(url).groups()]
1449
        img = soup.find('div', id='comic').find('img')
1450
        assert img['alt'] == title
1451
        return {
1452
            'title': title,
1453
            'day': day,
1454
            'month': month,
1455
            'year': year,
1456
            'img': [img['src']],
1457
        }
1458
1459
1460
class CalvinAndHobbes(GenericComic):
1461
    """Class to retrieve Calvin and Hobbes comics."""
1462
    # Also on http://www.gocomics.com/calvinandhobbes/
1463
    name = 'calvin'
1464
    long_name = 'Calvin and Hobbes'
1465
    # This is not through any official webpage but eh...
1466
    url = 'http://marcel-oehler.marcellosendos.ch/comics/ch/'
1467
1468
    @classmethod
1469
    def get_next_comic(cls, last_comic):
1470
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1471
        last_date = get_date_for_comic(
1472
            last_comic) if last_comic else date(1985, 11, 1)
1473
        link_re = re.compile('^([0-9]*)/([0-9]*)/')
1474
        img_re = re.compile('')
1475
        for link in get_soup_at_url(cls.url).find_all('a', href=link_re):
1476
            url = link['href']
1477
            year, month = link_re.match(url).groups()
1478
            if date(int(year), int(month), 1) + timedelta(days=31) >= last_date:
1479
                img_re = re.compile('^%s%s([0-9]*)' % (year, month))
1480
                month_url = urljoin_wrapper(cls.url, url)
1481
                for img in get_soup_at_url(month_url).find_all('img', src=img_re):
1482
                    img_src = img['src']
1483
                    day = int(img_re.match(img_src).groups()[0])
1484
                    comic_date = date(int(year), int(month), day)
1485
                    if comic_date > last_date:
1486
                        yield {
1487
                            'url': month_url,
1488
                            'year': int(year),
1489
                            'month': int(month),
1490
                            'day': int(day),
1491
                            'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)],
1492
                        }
1493
                        last_date = comic_date
1494
1495
1496 View Code Duplication
class AbstruseGoose(GenericListableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1497
    """Class to retrieve AbstruseGoose Comics."""
1498
    name = 'abstruse'
1499
    long_name = 'Abstruse Goose'
1500
    url = 'http://abstrusegoose.com'
1501
    get_url_from_archive_element = get_href
1502
    comic_url_re = re.compile('^%s/([0-9]*)$' % url)
1503
    comic_img_re = re.compile('^%s/strips/.*' % url)
1504
1505
    @classmethod
1506
    def get_archive_elements(cls):
1507
        archive_url = urljoin_wrapper(cls.url, 'archive')
1508
        return get_soup_at_url(archive_url).find_all('a', href=cls.comic_url_re)
1509
1510
    @classmethod
1511
    def get_comic_info(cls, soup, archive_elt):
1512
        comic_url = cls.get_url_from_archive_element(archive_elt)
1513
        num = int(cls.comic_url_re.match(comic_url).groups()[0])
1514
        imgs = soup.find_all('img', src=cls.comic_img_re)
1515
        return {
1516
            'num': num,
1517
            'title': archive_elt.string,
1518
            'img': [i['src'] for i in imgs],
1519
        }
1520
1521
1522
class PhDComics(GenericNavigableComic):
1523
    """Class to retrieve PHD Comics."""
1524
    name = 'phd'
1525
    long_name = 'PhD Comics'
1526
    url = 'http://phdcomics.com/comics/archive.php'
1527
1528
    @classmethod
1529
    def get_first_comic_link(cls):
1530
        """Get link to first comics."""
1531
        soup = get_soup_at_url(cls.url)
1532
        img = soup.find('img', src='http://phdcomics.com/comics/images/first_button.gif')
1533
        return None if img is None else img.parent
1534
1535
    @classmethod
1536
    def get_navi_link(cls, last_soup, next_):
1537
        """Get link to next or previous comic."""
1538
        url = 'http://phdcomics.com/comics/images/%s_button.gif' % ('next' if next_ else 'prev')
1539
        img = last_soup.find('img', src=url)
1540
        return None if img is None else img.parent
1541
1542
    @classmethod
1543
    def get_comic_info(cls, soup, link):
1544
        """Get information about a particular comics."""
1545
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
1546
        imgs = soup.find_all('meta', property='og:image')
1547
        return {
1548
            'img': [i['content'] for i in imgs],
1549
            'title': title,
1550
        }
1551
1552
1553
class Quarktees(GenericNavigableComic):
1554
    """Class to retrieve the Quarktees comics."""
1555
    name = 'quarktees'
1556
    long_name = 'Quarktees'
1557
    url = 'http://www.quarktees.com/blogs/news'
1558
    get_url_from_link = join_cls_url_to_href
1559
    get_first_comic_link = simulate_first_link
1560
    first_url = 'http://www.quarktees.com/blogs/news/12486621-coming-soon'
1561
1562
    @classmethod
1563
    def get_navi_link(cls, last_soup, next_):
1564
        """Get link to next or previous comic."""
1565
        return last_soup.find('a', id='article-next' if next_ else 'article-prev')
1566
1567
    @classmethod
1568
    def get_comic_info(cls, soup, link):
1569
        """Get information about a particular comics."""
1570
        title = soup.find('meta', property='og:title')['content']
1571
        article = soup.find('div', class_='single-article')
1572
        imgs = article.find_all('img')
1573
        return {
1574
            'title': title,
1575
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1576
        }
1577
1578
1579
class OverCompensating(GenericNavigableComic):
1580
    """Class to retrieve the Over Compensating comics."""
1581
    name = 'compensating'
1582
    long_name = 'Over Compensating'
1583
    url = 'http://www.overcompensating.com'
1584
    get_url_from_link = join_cls_url_to_href
1585
1586
    @classmethod
1587
    def get_first_comic_link(cls):
1588
        """Get link to first comics."""
1589
        return get_soup_at_url(cls.url).find('a', href=re.compile('comic=1$'))
1590
1591
    @classmethod
1592
    def get_navi_link(cls, last_soup, next_):
1593
        """Get link to next or previous comic."""
1594
        return last_soup.find('a', title='next comic' if next_ else 'go back already')
1595
1596
    @classmethod
1597
    def get_comic_info(cls, soup, link):
1598
        """Get information about a particular comics."""
1599
        img_src_re = re.compile('^/oc/comics/.*')
1600
        comic_num_re = re.compile('.*comic=([0-9]*)$')
1601
        comic_url = cls.get_url_from_link(link)
1602
        num = int(comic_num_re.match(comic_url).groups()[0])
1603
        img = soup.find('img', src=img_src_re)
1604
        return {
1605
            'num': num,
1606
            'img': [urljoin_wrapper(comic_url, img['src'])],
1607
            'title': img.get('title')
1608
        }
1609
1610
1611
class Oglaf(GenericNavigableComic):
1612
    """Class to retrieve Oglaf comics."""
1613
    name = 'oglaf'
1614
    long_name = 'Oglaf [NSFW]'
1615
    url = 'http://oglaf.com'
1616
    _categories = ('NSFW', )
1617
    get_url_from_link = join_cls_url_to_href
1618
1619
    @classmethod
1620
    def get_first_comic_link(cls):
1621
        """Get link to first comics."""
1622
        return get_soup_at_url(cls.url).find("div", id="st").parent
1623
1624
    @classmethod
1625
    def get_navi_link(cls, last_soup, next_):
1626
        """Get link to next or previous comic."""
1627
        div = last_soup.find("div", id="nx" if next_ else "pvs")
1628
        return div.parent if div else None
1629
1630
    @classmethod
1631
    def get_comic_info(cls, soup, link):
1632
        """Get information about a particular comics."""
1633
        title = soup.find('title').string
1634
        title_imgs = soup.find('div', id='tt').find_all('img')
1635
        assert len(title_imgs) == 1, title_imgs
1636
        strip_imgs = soup.find_all('img', id='strip')
1637
        assert len(strip_imgs) == 1, strip_imgs
1638
        imgs = title_imgs + strip_imgs
1639
        desc = ' '.join(i['title'] for i in imgs)
1640
        return {
1641
            'title': title,
1642
            'img': [i['src'] for i in imgs],
1643
            'description': desc,
1644
        }
1645
1646
1647
class ScandinaviaAndTheWorld(GenericNavigableComic):
1648
    """Class to retrieve Scandinavia And The World comics."""
1649
    name = 'satw'
1650
    long_name = 'Scandinavia And The World'
1651
    url = 'http://satwcomic.com'
1652
    get_first_comic_link = simulate_first_link
1653
    first_url = 'http://satwcomic.com/sweden-denmark-and-norway'
1654
1655
    @classmethod
1656
    def get_navi_link(cls, last_soup, next_):
1657
        """Get link to next or previous comic."""
1658
        return last_soup.find('a', accesskey='n' if next_ else 'p')
1659
1660
    @classmethod
1661
    def get_comic_info(cls, soup, link):
1662
        """Get information about a particular comics."""
1663
        title = soup.find('meta', attrs={'name': 'twitter:label1'})['content']
1664
        desc = soup.find('meta', property='og:description')['content']
1665
        imgs = soup.find_all('img', itemprop="image")
1666
        return {
1667
            'title': title,
1668
            'description': desc,
1669
            'img': [i['src'] for i in imgs],
1670
        }
1671
1672
1673
class SomethingOfThatIlk(GenericDeletedComic):
1674
    """Class to retrieve the Something Of That Ilk comics."""
1675
    name = 'somethingofthatilk'
1676
    long_name = 'Something Of That Ilk'
1677
    url = 'http://www.somethingofthatilk.com'
1678
1679
1680
class MonkeyUser(GenericNavigableComic):
1681
    """Class to retrieve Monkey User comics."""
1682
    name = 'monkeyuser'
1683
    long_name = 'Monkey User'
1684
    url = 'http://www.monkeyuser.com'
1685
    get_first_comic_link = simulate_first_link
1686
    first_url = 'http://www.monkeyuser.com/2016/project-lifecycle/'
1687
    get_url_from_link = join_cls_url_to_href
1688
1689
    @classmethod
1690
    def get_navi_link(cls, last_soup, next_):
1691
        """Get link to next or previous comic."""
1692
        div = last_soup.find('div', title='next' if next_ else 'previous')
1693
        return None if div is None else div.find('a')
1694
1695
    @classmethod
1696
    def get_comic_info(cls, soup, link):
1697
        """Get information about a particular comics."""
1698
        title = soup.find('meta', property='og:title')['content']
1699
        desc = soup.find('meta', property='og:description')['content']
1700
        imgs = soup.find_all('meta', property='og:image')
1701
        date_str = soup.find('span', class_='post-date').find('time').string
1702
        day = string_to_date(date_str, "%d %b %Y")
1703
        return {
1704
            'month': day.month,
1705
            'year': day.year,
1706
            'day': day.day,
1707
            'img': [i['content'] for i in imgs],
1708
            'title': title,
1709
            'description': desc,
1710
        }
1711
1712
1713
class InfiniteMonkeyBusiness(GenericNavigableComic):
1714
    """Class to retrieve InfiniteMonkeyBusiness comics."""
1715
    name = 'monkey'
1716
    long_name = 'Infinite Monkey Business'
1717
    url = 'http://infinitemonkeybusiness.net'
1718
    get_navi_link = get_a_navi_comicnavnext_navinext
1719
    get_first_comic_link = simulate_first_link
1720
    first_url = 'http://infinitemonkeybusiness.net/comic/pillory/'
1721
1722
    @classmethod
1723
    def get_comic_info(cls, soup, link):
1724
        """Get information about a particular comics."""
1725
        title = soup.find('meta', property='og:title')['content']
1726
        imgs = soup.find('div', id='comic').find_all('img')
1727
        return {
1728
            'title': title,
1729
            'img': [i['src'] for i in imgs],
1730
        }
1731
1732
1733
class Wondermark(GenericListableComic):
1734
    """Class to retrieve the Wondermark comics."""
1735
    name = 'wondermark'
1736
    long_name = 'Wondermark'
1737
    url = 'http://wondermark.com'
1738
    get_url_from_archive_element = get_href
1739
1740
    @classmethod
1741
    def get_archive_elements(cls):
1742
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1743
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1744
1745
    @classmethod
1746
    def get_comic_info(cls, soup, link):
1747
        """Get information about a particular comics."""
1748
        date_str = soup.find('div', class_='postdate').find('em').string
1749
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1750
        div = soup.find('div', id='comic')
1751
        if div:
1752
            img = div.find('img')
1753
            img_src = [img['src']]
1754
            alt = img['alt']
1755
            assert alt == img['title']
1756
            title = soup.find('meta', property='og:title')['content']
1757
        else:
1758
            img_src = []
1759
            alt = ''
1760
            title = ''
1761
        return {
1762
            'month': day.month,
1763
            'year': day.year,
1764
            'day': day.day,
1765
            'img': img_src,
1766
            'title': title,
1767
            'alt': alt,
1768
            'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')),
1769
        }
1770
1771
1772
class WarehouseComic(GenericNavigableComic):
1773
    """Class to retrieve Warehouse Comic comics."""
1774
    name = 'warehouse'
1775
    long_name = 'Warehouse Comic'
1776
    url = 'http://warehousecomic.com'
1777
    get_first_comic_link = get_a_navi_navifirst
1778
    get_navi_link = get_link_rel_next
1779
1780
    @classmethod
1781
    def get_comic_info(cls, soup, link):
1782
        """Get information about a particular comics."""
1783
        title = soup.find('h2', class_='post-title').string
1784
        date_str = soup.find('span', class_='post-date').string
1785
        day = string_to_date(date_str, "%B %d, %Y")
1786
        imgs = soup.find('div', id='comic').find_all('img')
1787
        return {
1788
            'img': [i['src'] for i in imgs],
1789
            'title': title,
1790
            'day': day.day,
1791
            'month': day.month,
1792
            'year': day.year,
1793
        }
1794
1795
1796
class JustSayEh(GenericNavigableComic):
1797
    """Class to retrieve Just Say Eh comics."""
1798
    # Also on http//tapastic.com/series/Just-Say-Eh
1799
    name = 'justsayeh'
1800
    long_name = 'Just Say Eh'
1801
    url = 'http://www.justsayeh.com'
1802
    get_first_comic_link = get_a_navi_navifirst
1803
    get_navi_link = get_a_navi_comicnavnext_navinext
1804
1805
    @classmethod
1806
    def get_comic_info(cls, soup, link):
1807
        """Get information about a particular comics."""
1808
        title = soup.find('h2', class_='post-title').string
1809
        imgs = soup.find("div", id="comic").find_all("img")
1810
        assert all(i['alt'] == i['title'] for i in imgs)
1811
        alt = imgs[0]['alt']
1812
        return {
1813
            'img': [i['src'] for i in imgs],
1814
            'title': title,
1815
            'alt': alt,
1816
        }
1817
1818
1819
class MouseBearComedy(GenericComicNotWorking):  # Website has changed
1820
    """Class to retrieve Mouse Bear Comedy comics."""
1821
    # Also on http://mousebearcomedy.tumblr.com
1822
    name = 'mousebear'
1823
    long_name = 'Mouse Bear Comedy'
1824
    url = 'http://www.mousebearcomedy.com'
1825
    get_first_comic_link = get_a_navi_navifirst
1826
    get_navi_link = get_a_navi_comicnavnext_navinext
1827
1828
    @classmethod
1829
    def get_comic_info(cls, soup, link):
1830
        """Get information about a particular comics."""
1831
        title = soup.find('h2', class_='post-title').string
1832
        author = soup.find("span", class_="post-author").find("a").string
1833
        date_str = soup.find("span", class_="post-date").string
1834
        day = string_to_date(date_str, '%B %d, %Y')
1835
        imgs = soup.find("div", id="comic").find_all("img")
1836
        assert all(i['alt'] == i['title'] == title for i in imgs)
1837
        return {
1838
            'day': day.day,
1839
            'month': day.month,
1840
            'year': day.year,
1841
            'img': [i['src'] for i in imgs],
1842
            'title': title,
1843
            'author': author,
1844
        }
1845
1846
1847 View Code Duplication
class BigFootJustice(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1848
    """Class to retrieve Big Foot Justice comics."""
1849
    # Also on http://tapastic.com/series/bigfoot-justice
1850
    name = 'bigfoot'
1851
    long_name = 'Big Foot Justice'
1852
    url = 'http://bigfootjustice.com'
1853
    get_first_comic_link = get_a_navi_navifirst
1854
    get_navi_link = get_a_navi_comicnavnext_navinext
1855
1856
    @classmethod
1857
    def get_comic_info(cls, soup, link):
1858
        """Get information about a particular comics."""
1859
        imgs = soup.find('div', id='comic').find_all('img')
1860
        assert all(i['title'] == i['alt'] for i in imgs)
1861
        title = ' '.join(i['title'] for i in imgs)
1862
        return {
1863
            'img': [i['src'] for i in imgs],
1864
            'title': title,
1865
        }
1866
1867
1868
class RespawnComic(GenericNavigableComic):
1869
    """Class to retrieve Respawn Comic."""
1870
    # Also on https://respawncomic.tumblr.com
1871
    name = 'respawn'
1872
    long_name = 'Respawn Comic'
1873
    url = 'http://respawncomic.com '
1874
    _categories = ('RESPAWN', )
1875
    get_navi_link = get_a_rel_next
1876
    get_first_comic_link = simulate_first_link
1877
    first_url = 'http://respawncomic.com/comic/c0001/'
1878
1879 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1880
    def get_comic_info(cls, soup, link):
1881
        """Get information about a particular comics."""
1882
        title = soup.find('meta', property='og:title')['content']
1883
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1884
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1885
        date_str = date_str[:10]
1886
        day = string_to_date(date_str, "%Y-%m-%d")
1887
        imgs = soup.find_all('meta', property='og:image')
1888
        skip_imgs = {
1889
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1890
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1891
        }
1892
        return {
1893
            'title': title,
1894
            'author': author,
1895
            'day': day.day,
1896
            'month': day.month,
1897
            'year': day.year,
1898
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1899
        }
1900
1901
1902 View Code Duplication
class SafelyEndangered(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1903
    """Class to retrieve Safely Endangered comics."""
1904
    # Also on http://tumblr.safelyendangered.com
1905
    name = 'endangered'
1906
    long_name = 'Safely Endangered'
1907
    url = 'http://www.safelyendangered.com'
1908
    get_navi_link = get_link_rel_next
1909
    get_first_comic_link = simulate_first_link
1910
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1911
1912
    @classmethod
1913
    def get_comic_info(cls, soup, link):
1914
        """Get information about a particular comics."""
1915
        title = soup.find('h2', class_='post-title').string
1916
        date_str = soup.find('span', class_='post-date').string
1917
        day = string_to_date(date_str, '%B %d, %Y')
1918
        imgs = soup.find('div', id='comic').find_all('img')
1919
        alt = imgs[0]['alt']
1920
        assert all(i['alt'] == i['title'] for i in imgs)
1921
        return {
1922
            'day': day.day,
1923
            'month': day.month,
1924
            'year': day.year,
1925
            'img': [i['src'] for i in imgs],
1926
            'title': title,
1927
            'alt': alt,
1928
        }
1929
1930
1931
class PicturesInBoxes(GenericNavigableComic):
1932
    """Class to retrieve Pictures In Boxes comics."""
1933
    # Also on https://picturesinboxescomic.tumblr.com
1934
    name = 'picturesinboxes'
1935
    long_name = 'Pictures in Boxes'
1936
    url = 'http://www.picturesinboxes.com'
1937
    get_navi_link = get_a_navi_navinext
1938
    get_first_comic_link = simulate_first_link
1939
    first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'
1940
1941
    @classmethod
1942
    def get_comic_info(cls, soup, link):
1943
        """Get information about a particular comics."""
1944
        title = soup.find('h2', class_='post-title').string
1945
        author = soup.find("span", class_="post-author").find("a").string
1946
        date_str = soup.find('span', class_='post-date').string
1947
        day = string_to_date(date_str, '%B %d, %Y')
1948
        imgs = soup.find('div', class_='comicpane').find_all('img')
1949
        assert imgs
1950
        assert all(i['title'] == i['alt'] == title for i in imgs)
1951
        return {
1952
            'day': day.day,
1953
            'month': day.month,
1954
            'year': day.year,
1955
            'img': [i['src'] for i in imgs],
1956
            'title': title,
1957
            'author': author,
1958
        }
1959
1960
1961 View Code Duplication
class Penmen(GenericComicNotWorking, GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1962
    """Class to retrieve Penmen comics."""
1963
    name = 'penmen'
1964
    long_name = 'Penmen'
1965
    url = 'http://penmen.com'
1966
    get_navi_link = get_link_rel_next
1967
    get_first_comic_link = simulate_first_link
1968
    first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/'
1969
1970
    @classmethod
1971
    def get_comic_info(cls, soup, link):
1972
        """Get information about a particular comics."""
1973
        title = soup.find('title').string
1974
        imgs = soup.find('div', class_='entry-content').find_all('img')
1975
        short_url = soup.find('link', rel='shortlink')['href']
1976
        tags = ' '.join(t.string for t in soup.find_all('a', rel='tag'))
1977
        date_str = soup.find('time')['datetime'][:10]
1978
        day = string_to_date(date_str, "%Y-%m-%d")
1979
        return {
1980
            'title': title,
1981
            'short_url': short_url,
1982
            'img': [i['src'] for i in imgs],
1983
            'tags': tags,
1984
            'month': day.month,
1985
            'year': day.year,
1986
            'day': day.day,
1987
        }
1988
1989
1990
class TheDoghouseDiaries(GenericDeletedComic, GenericNavigableComic):
1991
    """Class to retrieve The Dog House Diaries comics."""
1992
    name = 'doghouse'
1993
    long_name = 'The Dog House Diaries'
1994
    url = 'http://thedoghousediaries.com'
1995
1996
    @classmethod
1997
    def get_first_comic_link(cls):
1998
        """Get link to first comics."""
1999
        return get_soup_at_url(cls.url).find('a', id='firstlink')
2000
2001
    @classmethod
2002
    def get_navi_link(cls, last_soup, next_):
2003
        """Get link to next or previous comic."""
2004
        return last_soup.find('a', id='nextlink' if next_ else 'previouslink')
2005
2006
    @classmethod
2007
    def get_comic_info(cls, soup, link):
2008
        """Get information about a particular comics."""
2009
        comic_img_re = re.compile('^dhdcomics/.*')
2010
        img = soup.find('img', src=comic_img_re)
2011
        comic_url = cls.get_url_from_link(link)
2012
        return {
2013
            'title': soup.find('h2', id='titleheader').string,
2014
            'title2': soup.find('div', id='subtext').string,
2015
            'alt': img.get('title'),
2016
            'img': [urljoin_wrapper(comic_url, img['src'].strip())],
2017
            'num': int(comic_url.split('/')[-1]),
2018
        }
2019
2020
2021
class InvisibleBread(GenericListableComic):
2022
    """Class to retrieve Invisible Bread comics."""
2023
    # Also on http://www.gocomics.com/invisible-bread
2024
    name = 'invisiblebread'
2025
    long_name = 'Invisible Bread'
2026
    url = 'http://invisiblebread.com'
2027
2028
    @classmethod
2029
    def get_archive_elements(cls):
2030
        archive_url = urljoin_wrapper(cls.url, 'archives/')
2031
        return reversed(get_soup_at_url(archive_url).find_all('td', class_='archive-title'))
2032
2033
    @classmethod
2034
    def get_url_from_archive_element(cls, td):
2035
        return td.find('a')['href']
2036
2037
    @classmethod
2038
    def get_comic_info(cls, soup, td):
2039
        """Get information about a particular comics."""
2040
        url = cls.get_url_from_archive_element(td)
2041
        title = td.find('a').string
2042
        month_and_day = td.previous_sibling.string
2043
        link_re = re.compile('^%s/([0-9]+)/' % cls.url)
2044
        year = link_re.match(url).groups()[0]
2045
        date_str = month_and_day + ' ' + year
2046
        day = string_to_date(date_str, '%b %d %Y')
2047
        imgs = [soup.find('div', id='comic').find('img')]
2048
        assert len(imgs) == 1, imgs
2049
        assert all(i['title'] == i['alt'] == title for i in imgs)
2050
        return {
2051
            'month': day.month,
2052
            'year': day.year,
2053
            'day': day.day,
2054
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2055
            'title': title,
2056
        }
2057
2058
2059
class DiscoBleach(GenericDeletedComic):
2060
    """Class to retrieve Disco Bleach Comics."""
2061
    name = 'discobleach'
2062
    long_name = 'Disco Bleach'
2063
    url = 'http://discobleach.com'
2064
2065
2066
class TubeyToons(GenericDeletedComic):
2067
    """Class to retrieve TubeyToons comics."""
2068
    # Also on http://tapastic.com/series/Tubey-Toons
2069
    # Also on https://tubeytoons.tumblr.com
2070
    name = 'tubeytoons'
2071
    long_name = 'Tubey Toons'
2072
    url = 'http://tubeytoons.com'
2073
    _categories = ('TUNEYTOONS', )
2074
2075
2076
class CompletelySeriousComics(GenericNavigableComic):
2077
    """Class to retrieve Completely Serious comics."""
2078
    name = 'completelyserious'
2079
    long_name = 'Completely Serious Comics'
2080
    url = 'http://completelyseriouscomics.com'
2081
    get_first_comic_link = get_a_navi_navifirst
2082
    get_navi_link = get_a_navi_navinext
2083
2084
    @classmethod
2085
    def get_comic_info(cls, soup, link):
2086
        """Get information about a particular comics."""
2087
        title = soup.find('h2', class_='post-title').string
2088
        author = soup.find('span', class_='post-author').contents[1].string
2089
        date_str = soup.find('span', class_='post-date').string
2090
        day = string_to_date(date_str, '%B %d, %Y')
2091
        imgs = soup.find('div', class_='comicpane').find_all('img')
2092
        assert imgs
2093
        alt = imgs[0]['title']
2094
        assert all(i['title'] == i['alt'] == alt for i in imgs)
2095
        return {
2096
            'month': day.month,
2097
            'year': day.year,
2098
            'day': day.day,
2099
            'img': [i['src'] for i in imgs],
2100
            'title': title,
2101
            'alt': alt,
2102
            'author': author,
2103
        }
2104
2105
2106 View Code Duplication
class PoorlyDrawnLines(GenericListableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2107
    """Class to retrieve Poorly Drawn Lines comics."""
2108
    # Also on http://pdlcomics.tumblr.com
2109
    name = 'poorlydrawn'
2110
    long_name = 'Poorly Drawn Lines'
2111
    url = 'https://www.poorlydrawnlines.com'
2112
    _categories = ('POORLYDRAWN', )
2113
    get_url_from_archive_element = get_href
2114
2115
    @classmethod
2116
    def get_comic_info(cls, soup, link):
2117
        """Get information about a particular comics."""
2118
        imgs = soup.find('div', class_='post').find_all('img')
2119
        assert len(imgs) <= 1, imgs
2120
        return {
2121
            'img': [i['src'] for i in imgs],
2122
            'title': imgs[0].get('title', "") if imgs else "",
2123
        }
2124
2125
    @classmethod
2126
    def get_archive_elements(cls):
2127
        archive_url = urljoin_wrapper(cls.url, 'archive')
2128
        url_re = re.compile('^%s/comic/.' % cls.url)
2129
        return reversed(get_soup_at_url(archive_url).find_all('a', href=url_re))
2130
2131
2132
class LoadingComics(GenericNavigableComic):
2133
    """Class to retrieve Loading Artist comics."""
2134
    name = 'loadingartist'
2135
    long_name = 'Loading Artist'
2136
    url = 'http://www.loadingartist.com/latest'
2137
2138
    @classmethod
2139
    def get_first_comic_link(cls):
2140
        """Get link to first comics."""
2141
        return get_soup_at_url(cls.url).find('a', title="First")
2142
2143
    @classmethod
2144
    def get_navi_link(cls, last_soup, next_):
2145
        """Get link to next or previous comic."""
2146
        return last_soup.find('a', title='Next' if next_ else 'Previous')
2147
2148
    @classmethod
2149
    def get_comic_info(cls, soup, link):
2150
        """Get information about a particular comics."""
2151
        title = soup.find('h1').string
2152
        date_str = soup.find('span', class_='date').string.strip()
2153
        day = string_to_date(date_str, "%B %d, %Y")
2154
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
2155
        return {
2156
            'title': title,
2157
            'img': [i['src'] for i in imgs],
2158
            'month': day.month,
2159
            'year': day.year,
2160
            'day': day.day,
2161
        }
2162
2163
2164
class ChuckleADuck(GenericNavigableComic):
2165
    """Class to retrieve Chuckle-A-Duck comics."""
2166
    name = 'chuckleaduck'
2167
    long_name = 'Chuckle-A-duck'
2168
    url = 'http://chuckleaduck.com'
2169
    get_first_comic_link = get_div_navfirst_a
2170
    get_navi_link = get_link_rel_next
2171
2172
    @classmethod
2173
    def get_comic_info(cls, soup, link):
2174
        """Get information about a particular comics."""
2175
        date_str = soup.find('span', class_='post-date').string
2176
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
2177
        author = soup.find('span', class_='post-author').string
2178
        div = soup.find('div', id='comic')
2179
        imgs = div.find_all('img') if div else []
2180
        title = imgs[0]['title'] if imgs else ""
2181
        assert all(i['title'] == i['alt'] == title for i in imgs)
2182
        return {
2183
            'month': day.month,
2184
            'year': day.year,
2185
            'day': day.day,
2186
            'img': [i['src'] for i in imgs],
2187
            'title': title,
2188
            'author': author,
2189
        }
2190
2191
2192
class DepressedAlien(GenericNavigableComic):
2193
    """Class to retrieve Depressed Alien Comics."""
2194
    name = 'depressedalien'
2195
    long_name = 'Depressed Alien'
2196
    url = 'http://depressedalien.com'
2197
    get_url_from_link = join_cls_url_to_href
2198
2199
    @classmethod
2200
    def get_first_comic_link(cls):
2201
        """Get link to first comics."""
2202
        return get_soup_at_url(cls.url).find('img', attrs={'name': 'beginArrow'}).parent
2203
2204
    @classmethod
2205
    def get_navi_link(cls, last_soup, next_):
2206
        """Get link to next or previous comic."""
2207
        return last_soup.find('img', attrs={'name': 'rightArrow' if next_ else 'leftArrow'}).parent
2208
2209
    @classmethod
2210
    def get_comic_info(cls, soup, link):
2211
        """Get information about a particular comics."""
2212
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2213
        imgs = soup.find_all('meta', property='og:image')
2214
        return {
2215
            'title': title,
2216
            'img': [i['content'] for i in imgs],
2217
        }
2218
2219
2220 View Code Duplication
class TurnOffUs(GenericListableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2221
    """Class to retrieve TurnOffUs comics."""
2222
    name = 'turnoffus'
2223
    long_name = 'Turn Off Us'
2224
    url = 'http://turnoff.us'
2225
    get_url_from_archive_element = join_cls_url_to_href
2226
2227
    @classmethod
2228
    def get_archive_elements(cls):
2229
        archive_url = urljoin_wrapper(cls.url, 'all')
2230
        post_list = get_soup_at_url(archive_url).find('ul', class_='post-list')
2231
        return reversed(post_list.find_all('a', class_='post-link'))
2232
2233
    @classmethod
2234
    def get_comic_info(cls, soup, archive_elt):
2235
        """Get information about a particular comics."""
2236
        title = soup.find('meta', property='og:title')['content']
2237
        imgs = soup.find_all('meta', property='og:image')
2238
        return {
2239
            'title': title,
2240
            'img': [i['content'] for i in imgs],
2241
        }
2242
2243
2244
class ThingsInSquares(GenericListableComic):
2245
    """Class to retrieve Things In Squares comics."""
2246
    # This can be retrieved in other languages
2247
    # Also on https://tapastic.com/series/Things-in-Squares
2248
    name = 'squares'
2249
    long_name = 'Things in squares'
2250
    url = 'http://www.thingsinsquares.com'
2251
2252
    @classmethod
2253
    def get_comic_info(cls, soup, tr):
2254
        """Get information about a particular comics."""
2255
        _, td2, td3 = tr.find_all('td')
2256
        a = td2.find('a')
2257
        date_str = td3.string
2258
        day = string_to_date(date_str, "%m.%d.%y")
2259
        title = a.string
2260
        title2 = soup.find('meta', property='og:title')['content']
2261
        desc = soup.find('meta', property='og:description')
2262
        description = desc['content'] if desc else ''
2263
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2264
        imgs = soup.find_all('meta', property='og:image')
2265
        return {
2266
            'day': day.day,
2267
            'month': day.month,
2268
            'year': day.year,
2269
            'title': title,
2270
            'title2': title2,
2271
            'description': description,
2272
            'tags': tags,
2273
            'img': [i['content'] for i in imgs],
2274
        }
2275
2276
    @classmethod
2277
    def get_url_from_archive_element(cls, tr):
2278
        _, td2, __ = tr.find_all('td')
2279
        return td2.find('a')['href']
2280
2281
    @classmethod
2282
    def get_archive_elements(cls):
2283
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
2284
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2285
2286
2287
class HappleTea(GenericNavigableComic):
2288
    """Class to retrieve Happle Tea Comics."""
2289
    name = 'happletea'
2290
    long_name = 'Happle Tea'
2291
    url = 'http://www.happletea.com'
2292
    get_first_comic_link = get_a_navi_navifirst
2293
    get_navi_link = get_link_rel_next
2294
2295
    @classmethod
2296
    def get_comic_info(cls, soup, link):
2297
        """Get information about a particular comics."""
2298
        imgs = soup.find('div', id='comic').find_all('img')
2299
        post = soup.find('div', class_='post-content')
2300
        title = post.find('h2', class_='post-title').string
2301
        author = post.find('a', rel='author').string
2302
        date_str = post.find('span', class_='post-date').string
2303
        day = string_to_date(date_str, "%B %d, %Y")
2304
        assert all(i['alt'] == i['title'] for i in imgs)
2305
        return {
2306
            'title': title,
2307
            'img': [i['src'] for i in imgs],
2308
            'alt': ''.join(i['alt'] for i in imgs),
2309
            'month': day.month,
2310
            'year': day.year,
2311
            'day': day.day,
2312
            'author': author,
2313
        }
2314
2315
2316
class RockPaperScissors(GenericNavigableComic):
2317
    """Class to retrieve Rock Paper Scissors comics."""
2318
    name = 'rps'
2319
    long_name = 'Rock Paper Scissors'
2320
    url = 'http://rps-comics.com'
2321
    get_first_comic_link = get_a_navi_navifirst
2322
    get_navi_link = get_link_rel_next
2323
2324
    @classmethod
2325
    def get_comic_info(cls, soup, link):
2326
        """Get information about a particular comics."""
2327
        title = soup.find('title').string
2328
        imgs = soup.find_all('meta', property='og:image')
2329
        short_url = soup.find('link', rel='shortlink')['href']
2330
        transcript = soup.find('div', id='transcript-content').string
2331
        return {
2332
            'title': title,
2333
            'transcript': transcript,
2334
            'short_url': short_url,
2335
            'img': [i['content'] for i in imgs],
2336
        }
2337
2338
2339
class FatAwesomeComics(GenericNavigableComic):
2340
    """Class to retrieve Fat Awesome Comics."""
2341
    # Also on http://fatawesomecomedy.tumblr.com
2342
    name = 'fatawesome'
2343
    long_name = 'Fat Awesome'
2344
    url = 'http://fatawesome.com/comics'
2345
    get_navi_link = get_a_rel_next
2346
    get_first_comic_link = simulate_first_link
2347
    first_url = 'http://fatawesome.com/shortbus/'
2348
2349
    @classmethod
2350
    def get_comic_info(cls, soup, link):
2351
        """Get information about a particular comics."""
2352
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2353
        description = soup.find('meta', attrs={'name': 'description'})['content']
2354
        tags_prop = soup.find('meta', property='article:tag')
2355
        tags = tags_prop['content'] if tags_prop else ""
2356
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2357
        day = string_to_date(date_str, "%Y-%m-%d")
2358
        imgs = soup.find_all('img', attrs={'data-recalc-dims': "1"})
2359
        assert len(imgs) == 1, imgs
2360
        return {
2361
            'title': title,
2362
            'description': description,
2363
            'tags': tags,
2364
            'alt': "".join(i['alt'] for i in imgs),
2365
            'img': [i['src'].rsplit('?', 1)[0] for i in imgs],
2366
            'month': day.month,
2367
            'year': day.year,
2368
            'day': day.day,
2369
        }
2370
2371
2372
class PeterLauris(GenericNavigableComic):
2373
    """Class to retrieve Peter Lauris comics."""
2374
    name = 'peterlauris'
2375
    long_name = 'Peter Lauris'
2376
    url = 'http://peterlauris.com/comics'
2377
    get_navi_link = get_a_rel_next
2378
    get_first_comic_link = simulate_first_link
2379
    first_url = 'http://peterlauris.com/comics/just-in-case/'
2380
2381
    @classmethod
2382
    def get_comic_info(cls, soup, link):
2383
        """Get information about a particular comics."""
2384
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2385
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2386
        day = string_to_date(date_str, "%Y-%m-%d")
2387
        imgs = soup.find_all('meta', property='og:image')
2388
        return {
2389
            'title': title,
2390
            'img': [i['content'] for i in imgs],
2391
            'month': day.month,
2392
            'year': day.year,
2393
            'day': day.day,
2394
        }
2395
2396
2397
class RandomCrab(GenericNavigableComic):
2398
    """Class to retrieve Random Crab comics."""
2399
    name = 'randomcrab'
2400
    long_name = 'Random Crab'
2401
    url = 'https://randomcrab.com'
2402
    get_navi_link = get_a_rel_next
2403
    get_first_comic_link = simulate_first_link
2404
    first_url = 'https://randomcrab.com/natural-elephant/'
2405
2406
    @classmethod
2407
    def get_comic_info(cls, soup, link):
2408
        """Get information about a particular comics."""
2409
        title = soup.find('meta', property='og:title')['content']
2410
        desc = soup.find('meta', property='og:description')['content']
2411
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2412
        day = string_to_date(date_str, "%Y-%m-%d")
2413
        imgs = soup.find_all('meta', property='og:image')
2414
        author = soup.find('a', rel='author').string
2415
        return {
2416
            'title': title,
2417
            'desc': desc,
2418
            'img': [i['content'] for i in imgs],
2419
            'month': day.month,
2420
            'year': day.year,
2421
            'day': day.day,
2422
            'author': author,
2423
        }
2424
2425
2426 View Code Duplication
class JuliasDrawings(GenericListableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2427
    """Class to retrieve Julia's Drawings."""
2428
    name = 'julia'
2429
    long_name = "Julia's Drawings"
2430
    url = 'https://drawings.jvns.ca'
2431
    get_url_from_archive_element = get_href
2432
2433
    @classmethod
2434
    def get_archive_elements(cls):
2435
        div = get_soup_at_url(cls.url).find('div', class_='drawings')
2436
        return reversed(div.find_all('a'))
2437
2438
    @classmethod
2439
    def get_comic_info(cls, soup, archive_elt):
2440
        """Get information about a particular comics."""
2441
        date_str = soup.find('meta', property='og:article:published_time')['content'][:10]
2442
        day = string_to_date(date_str, "%Y-%m-%d")
2443
        title = soup.find('h3', class_='p-post-title').string
2444
        imgs = soup.find('section', class_='post-content').find_all('img')
2445
        return {
2446
            'title': title,
2447
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2448
            'month': day.month,
2449
            'year': day.year,
2450
            'day': day.day,
2451
        }
2452
2453
2454
class AnythingComic(GenericListableComic):
2455
    """Class to retrieve Anything Comics."""
2456
    # Also on http://tapastic.com/series/anything
2457
    name = 'anythingcomic'
2458
    long_name = 'Anything Comic'
2459
    url = 'http://www.anythingcomic.com'
2460
2461
    @classmethod
2462
    def get_archive_elements(cls):
2463
        archive_url = urljoin_wrapper(cls.url, 'archive/')
2464
        # The first 2 <tr>'s do not correspond to comics
2465
        return get_soup_at_url(archive_url).find('table', id='chapter_table').find_all('tr')[2:]
2466
2467
    @classmethod
2468
    def get_url_from_archive_element(cls, tr):
2469
        """Get url corresponding to an archive element."""
2470
        _, td_comic, td_date, _ = tr.find_all('td')
2471
        link = td_comic.find('a')
2472
        return urljoin_wrapper(cls.url, link['href'])
2473
2474
    @classmethod
2475
    def get_comic_info(cls, soup, tr):
2476
        """Get information about a particular comics."""
2477
        td_num, td_comic, td_date, _ = tr.find_all('td')
2478
        num = int(td_num.string)
2479
        link = td_comic.find('a')
2480
        title = link.string
2481
        imgs = soup.find_all('img', id='comic_image')
2482
        date_str = td_date.string
2483
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y, %I:%M %p")
2484
        assert len(imgs) == 1, imgs
2485
        assert all(i.get('alt') == i.get('title') for i in imgs)
2486
        return {
2487
            'num': num,
2488
            'title': title,
2489
            'alt': imgs[0].get('alt', ''),
2490
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2491
            'month': day.month,
2492
            'year': day.year,
2493
            'day': day.day,
2494
        }
2495
2496
2497 View Code Duplication
class LonnieMillsap(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2498
    """Class to retrieve Lonnie Millsap's comics."""
2499
    name = 'millsap'
2500
    long_name = 'Lonnie Millsap'
2501
    url = 'http://www.lonniemillsap.com'
2502
    get_navi_link = get_link_rel_next
2503
    get_first_comic_link = simulate_first_link
2504
    first_url = 'http://www.lonniemillsap.com/?p=42'
2505
2506
    @classmethod
2507
    def get_comic_info(cls, soup, link):
2508
        """Get information about a particular comics."""
2509
        title = soup.find('h2', class_='post-title').string
2510
        post = soup.find('div', class_='post-content')
2511
        author = post.find("span", class_="post-author").find("a").string
2512
        date_str = post.find("span", class_="post-date").string
2513
        day = string_to_date(date_str, "%B %d, %Y")
2514
        imgs = post.find("div", class_="entry").find_all("img")
2515
        return {
2516
            'title': title,
2517
            'author': author,
2518
            'img': [i['src'] for i in imgs],
2519
            'month': day.month,
2520
            'year': day.year,
2521
            'day': day.day,
2522
        }
2523
2524
2525
class LinsEditions(GenericDeletedComic):  # Permanently moved to warandpeas
2526
    """Class to retrieve L.I.N.S. Editions comics."""
2527
    # Also on https://linscomics.tumblr.com
2528
    # Now on https://warandpeas.com
2529
    name = 'lins'
2530
    long_name = 'L.I.N.S. Editions'
2531
    url = 'https://linsedition.com'
2532
    _categories = ('WARANDPEAS', 'LINS')
2533
2534
2535
class WarAndPeas(GenericNavigableComic):
2536
    """Class to retrieve War And Peas comics."""
2537
    name = 'warandpeas'
2538
    long_name = 'War And Peas'
2539
    url = 'https://warandpeas.com'
2540
    get_navi_link = get_link_rel_next
2541
    get_first_comic_link = simulate_first_link
2542
    first_url = 'https://warandpeas.com/2011/11/07/565/'
2543
    _categories = ('WARANDPEAS', 'LINS')
2544
2545
    @classmethod
2546
    def get_comic_info(cls, soup, link):
2547
        """Get information about a particular comics."""
2548
        title = soup.find('meta', property='og:title')['content']
2549
        imgs = soup.find_all('meta', property='og:image')
2550
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2551
        day = string_to_date(date_str, "%Y-%m-%d")
2552
        return {
2553
            'title': title,
2554
            'img': [i['content'] for i in imgs],
2555
            'month': day.month,
2556
            'year': day.year,
2557
            'day': day.day,
2558
        }
2559
2560
2561
class ThorsThundershack(GenericNavigableComic):
2562
    """Class to retrieve Thor's Thundershack comics."""
2563
    # Also on http://tapastic.com/series/Thors-Thundershac
2564
    name = 'thor'
2565
    long_name = 'Thor\'s Thundershack'
2566
    url = 'http://www.thorsthundershack.com'
2567
    _categories = ('THOR', )
2568
    get_url_from_link = join_cls_url_to_href
2569
2570
    @classmethod
2571
    def get_first_comic_link(cls):
2572
        """Get link to first comics."""
2573
        return get_soup_at_url(cls.url).find('a', class_='first navlink')
2574
2575
    @classmethod
2576
    def get_navi_link(cls, last_soup, next_):
2577
        """Get link to next or previous comic."""
2578
        for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
2579
            if link['href'] != '/comic':
2580
                return link
2581
        return None
2582
2583
    @classmethod
2584
    def get_comic_info(cls, soup, link):
2585
        """Get information about a particular comics."""
2586
        title = soup.find('meta', attrs={'name': 'description'})["content"]
2587
        description = soup.find('div', itemprop='articleBody').text
2588
        author = soup.find('span', itemprop='author copyrightHolder').string
2589
        imgs = soup.find_all('img', itemprop='image')
2590
        assert all(i['title'] == i['alt'] for i in imgs)
2591
        alt = imgs[0]['alt'] if imgs else ""
2592
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
2593
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2594
        return {
2595
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2596
            'month': day.month,
2597
            'year': day.year,
2598
            'day': day.day,
2599
            'author': author,
2600
            'title': title,
2601
            'alt': alt,
2602
            'description': description,
2603
        }
2604
2605
2606 View Code Duplication
class GerbilWithAJetpack(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2607
    """Class to retrieve GerbilWithAJetpack comics."""
2608
    name = 'gerbil'
2609
    long_name = 'Gerbil With A Jetpack'
2610
    url = 'http://gerbilwithajetpack.com'
2611
    get_first_comic_link = get_a_navi_navifirst
2612
    get_navi_link = get_a_rel_next
2613
2614
    @classmethod
2615
    def get_comic_info(cls, soup, link):
2616
        """Get information about a particular comics."""
2617
        title = soup.find('h2', class_='post-title').string
2618
        author = soup.find("span", class_="post-author").find("a").string
2619
        date_str = soup.find("span", class_="post-date").string
2620
        day = string_to_date(date_str, "%B %d, %Y")
2621
        imgs = soup.find("div", id="comic").find_all("img")
2622
        alt = imgs[0]['alt']
2623
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2624
        return {
2625
            'img': [i['src'] for i in imgs],
2626
            'title': title,
2627
            'alt': alt,
2628
            'author': author,
2629
            'day': day.day,
2630
            'month': day.month,
2631
            'year': day.year
2632
        }
2633
2634
2635
class EveryDayBlues(GenericDeletedComic, GenericNavigableComic):
2636
    """Class to retrieve EveryDayBlues Comics."""
2637
    name = "blues"
2638
    long_name = "Every Day Blues"
2639
    url = "http://everydayblues.net"
2640
    get_first_comic_link = get_a_navi_navifirst
2641
    get_navi_link = get_link_rel_next
2642
2643
    @classmethod
2644
    def get_comic_info(cls, soup, link):
2645
        """Get information about a particular comics."""
2646
        title = soup.find("h2", class_="post-title").string
2647
        author = soup.find("span", class_="post-author").find("a").string
2648
        date_str = soup.find("span", class_="post-date").string
2649
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2650
        imgs = soup.find("div", id="comic").find_all("img")
2651
        assert all(i['alt'] == i['title'] == title for i in imgs)
2652
        assert len(imgs) <= 1, imgs
2653
        return {
2654
            'img': [i['src'] for i in imgs],
2655
            'title': title,
2656
            'author': author,
2657
            'day': day.day,
2658
            'month': day.month,
2659
            'year': day.year
2660
        }
2661
2662
2663
class BiterComics(GenericNavigableComic):
2664
    """Class to retrieve Biter Comics."""
2665
    name = "biter"
2666
    long_name = "Biter Comics"
2667
    url = "http://www.bitercomics.com"
2668
    get_first_comic_link = get_a_navi_navifirst
2669
    get_navi_link = get_link_rel_next
2670
2671
    @classmethod
2672
    def get_comic_info(cls, soup, link):
2673
        """Get information about a particular comics."""
2674
        title = soup.find("h1", class_="entry-title").string
2675
        author = soup.find("span", class_="author vcard").find("a").string
2676
        date_str = soup.find("span", class_="entry-date").string
2677
        day = string_to_date(date_str, "%B %d, %Y")
2678
        imgs = soup.find("div", id="comic").find_all("img")
2679
        assert all(i['alt'] == i['title'] for i in imgs)
2680
        assert len(imgs) == 1, imgs
2681
        alt = imgs[0]['alt']
2682
        return {
2683
            'img': [i['src'] for i in imgs],
2684
            'title': title,
2685
            'alt': alt,
2686
            'author': author,
2687
            'day': day.day,
2688
            'month': day.month,
2689
            'year': day.year
2690
        }
2691
2692
2693
class TheAwkwardYeti(GenericNavigableComic):
2694
    """Class to retrieve The Awkward Yeti comics."""
2695
    # Also on http://www.gocomics.com/the-awkward-yeti
2696
    # Also on http://larstheyeti.tumblr.com
2697
    # Also on https://tapastic.com/series/TheAwkwardYeti
2698
    name = 'yeti'
2699
    long_name = 'The Awkward Yeti'
2700
    url = 'http://theawkwardyeti.com'
2701
    _categories = ('YETI', )
2702
    get_first_comic_link = get_a_navi_navifirst
2703
    get_navi_link = get_link_rel_next
2704
2705
    @classmethod
2706
    def get_comic_info(cls, soup, link):
2707
        """Get information about a particular comics."""
2708
        title = soup.find('h2', class_='post-title').string
2709
        date_str = soup.find("span", class_="post-date").string
2710
        day = string_to_date(date_str, "%B %d, %Y")
2711
        imgs = soup.find("div", id="comic").find_all("img")
2712
        assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
2713
        return {
2714
            'img': [i['src'] for i in imgs],
2715
            'title': title,
2716
            'day': day.day,
2717
            'month': day.month,
2718
            'year': day.year
2719
        }
2720
2721
2722
class PleasantThoughts(GenericNavigableComic):
2723
    """Class to retrieve Pleasant Thoughts comics."""
2724
    name = 'pleasant'
2725
    long_name = 'Pleasant Thoughts'
2726
    url = 'http://pleasant-thoughts.com'
2727
    get_first_comic_link = get_a_navi_navifirst
2728
    get_navi_link = get_link_rel_next
2729
2730
    @classmethod
2731
    def get_comic_info(cls, soup, link):
2732
        """Get information about a particular comics."""
2733
        post = soup.find('div', class_='post-content')
2734
        title = post.find('h2', class_='post-title').string
2735
        imgs = post.find("div", class_="entry").find_all("img")
2736
        return {
2737
            'title': title,
2738
            'img': [i['src'] for i in imgs],
2739
        }
2740
2741
2742
class MisterAndMe(GenericNavigableComic):
2743
    """Class to retrieve Mister & Me Comics."""
2744
    # Also on http://www.gocomics.com/mister-and-me
2745
    # Also on https://tapastic.com/series/Mister-and-Me
2746
    name = 'mister'
2747
    long_name = 'Mister & Me'
2748
    url = 'http://www.mister-and-me.com'
2749
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2750
    get_navi_link = get_link_rel_next
2751
2752
    @classmethod
2753
    def get_comic_info(cls, soup, link):
2754
        """Get information about a particular comics."""
2755
        title = soup.find('h2', class_='post-title').string
2756
        author = soup.find("span", class_="post-author").find("a").string
2757
        date_str = soup.find("span", class_="post-date").string
2758
        day = string_to_date(date_str, "%B %d, %Y")
2759
        imgs = soup.find("div", id="comic").find_all("img")
2760
        assert all(i['alt'] == i['title'] for i in imgs)
2761
        assert len(imgs) <= 1, imgs
2762
        alt = imgs[0]['alt'] if imgs else ""
2763
        return {
2764
            'img': [i['src'] for i in imgs],
2765
            'title': title,
2766
            'alt': alt,
2767
            'author': author,
2768
            'day': day.day,
2769
            'month': day.month,
2770
            'year': day.year
2771
        }
2772
2773
2774
class LastPlaceComics(GenericNavigableComic):
2775
    """Class to retrieve Last Place Comics."""
2776
    name = 'lastplace'
2777
    long_name = 'Last Place Comics'
2778
    url = "http://lastplacecomics.com"
2779
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2780
    get_navi_link = get_link_rel_next
2781
2782
    @classmethod
2783
    def get_comic_info(cls, soup, link):
2784
        """Get information about a particular comics."""
2785
        title = soup.find('h2', class_='post-title').string
2786
        author = soup.find("span", class_="post-author").find("a").string
2787
        date_str = soup.find("span", class_="post-date").string
2788
        day = string_to_date(date_str, "%B %d, %Y")
2789
        imgs = soup.find("div", id="comic").find_all("img")
2790
        assert all(i['alt'] == i['title'] for i in imgs)
2791
        assert len(imgs) <= 1, imgs
2792
        alt = imgs[0]['alt'] if imgs else ""
2793
        return {
2794
            'img': [i['src'] for i in imgs],
2795
            'title': title,
2796
            'alt': alt,
2797
            'author': author,
2798
            'day': day.day,
2799
            'month': day.month,
2800
            'year': day.year
2801
        }
2802
2803
2804
class TalesOfAbsurdity(GenericNavigableComic):
2805
    """Class to retrieve Tales Of Absurdity comics."""
2806
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
2807
    # Also on http://talesofabsurdity.tumblr.com
2808
    name = 'absurdity'
2809
    long_name = 'Tales of Absurdity'
2810
    url = 'http://talesofabsurdity.com'
2811
    _categories = ('ABSURDITY', )
2812
    get_first_comic_link = get_a_navi_navifirst
2813
    get_navi_link = get_a_navi_comicnavnext_navinext
2814
2815
    @classmethod
2816
    def get_comic_info(cls, soup, link):
2817
        """Get information about a particular comics."""
2818
        title = soup.find('h2', class_='post-title').string
2819
        author = soup.find("span", class_="post-author").find("a").string
2820
        date_str = soup.find("span", class_="post-date").string
2821
        day = string_to_date(date_str, "%B %d, %Y")
2822
        imgs = soup.find("div", id="comic").find_all("img")
2823
        assert all(i['alt'] == i['title'] for i in imgs)
2824
        alt = imgs[0]['alt'] if imgs else ""
2825
        return {
2826
            'img': [i['src'] for i in imgs],
2827
            'title': title,
2828
            'alt': alt,
2829
            'author': author,
2830
            'day': day.day,
2831
            'month': day.month,
2832
            'year': day.year
2833
        }
2834
2835
2836
class EndlessOrigami(GenericComicNotWorking, GenericNavigableComic):  # Nav not working
2837
    """Class to retrieve Endless Origami Comics."""
2838
    name = "origami"
2839
    long_name = "Endless Origami"
2840
    url = "http://endlessorigami.com"
2841
    get_first_comic_link = get_a_navi_navifirst
2842
    get_navi_link = get_link_rel_next
2843
2844
    @classmethod
2845
    def get_comic_info(cls, soup, link):
2846
        """Get information about a particular comics."""
2847
        title = soup.find('h2', class_='post-title').string
2848
        author = soup.find("span", class_="post-author").find("a").string
2849
        date_str = soup.find("span", class_="post-date").string
2850
        day = string_to_date(date_str, "%B %d, %Y")
2851
        imgs = soup.find("div", id="comic").find_all("img")
2852
        assert all(i['alt'] == i['title'] for i in imgs)
2853
        alt = imgs[0]['alt'] if imgs else ""
2854
        return {
2855
            'img': [i['src'] for i in imgs],
2856
            'title': title,
2857
            'alt': alt,
2858
            'author': author,
2859
            'day': day.day,
2860
            'month': day.month,
2861
            'year': day.year
2862
        }
2863
2864
2865
class PlanC(GenericNavigableComic):
2866
    """Class to retrieve Plan C comics."""
2867
    name = 'planc'
2868
    long_name = 'Plan C'
2869
    url = 'http://www.plancomic.com'
2870
    get_first_comic_link = get_a_navi_navifirst
2871
    get_navi_link = get_a_navi_comicnavnext_navinext
2872
2873
    @classmethod
2874
    def get_comic_info(cls, soup, link):
2875
        """Get information about a particular comics."""
2876
        title = soup.find('h2', class_='post-title').string
2877
        date_str = soup.find("span", class_="post-date").string
2878
        day = string_to_date(date_str, "%B %d, %Y")
2879
        imgs = soup.find('div', id='comic').find_all('img')
2880
        return {
2881
            'title': title,
2882
            'img': [i['src'] for i in imgs],
2883
            'month': day.month,
2884
            'year': day.year,
2885
            'day': day.day,
2886
        }
2887
2888
2889 View Code Duplication
class BuniComic(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2890
    """Class to retrieve Buni Comics."""
2891
    name = 'buni'
2892
    long_name = 'BuniComics'
2893
    url = 'http://www.bunicomic.com'
2894
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2895
    get_navi_link = get_link_rel_next
2896
2897
    @classmethod
2898
    def get_comic_info(cls, soup, link):
2899
        """Get information about a particular comics."""
2900
        imgs = soup.find('div', id='comic').find_all('img')
2901
        assert all(i['alt'] == i['title'] for i in imgs)
2902
        assert len(imgs) == 1, imgs
2903
        return {
2904
            'img': [i['src'] for i in imgs],
2905
            'title': imgs[0]['title'],
2906
        }
2907
2908
2909 View Code Duplication
class GenericCommitStrip(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2910
    """Generic class to retrieve Commit Strips in different languages."""
2911
    get_navi_link = get_a_rel_next
2912
    get_first_comic_link = simulate_first_link
2913
    first_url = NotImplemented
2914
2915
    @classmethod
2916
    def get_comic_info(cls, soup, link):
2917
        """Get information about a particular comics."""
2918
        desc = soup.find('meta', property='og:description')['content']
2919
        title = soup.find('meta', property='og:title')['content']
2920
        imgs = soup.find('div', class_='entry-content').find_all('img')
2921
        title2 = ' '.join(i.get('title', '') for i in imgs)
2922
        return {
2923
            'title': title,
2924
            'title2': title2,
2925
            'description': desc,
2926
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2927
        }
2928
2929
2930
class CommitStripFr(GenericCommitStrip):
2931
    """Class to retrieve Commit Strips in French."""
2932
    name = 'commit_fr'
2933
    long_name = 'Commit Strip (Fr)'
2934
    url = 'http://www.commitstrip.com/fr'
2935
    _categories = ('FRANCAIS', )
2936
    first_url = 'http://www.commitstrip.com/fr/2012/02/22/interview/'
2937
2938
2939
class CommitStripEn(GenericCommitStrip):
2940
    """Class to retrieve Commit Strips in English."""
2941
    name = 'commit_en'
2942
    long_name = 'Commit Strip (En)'
2943
    url = 'http://www.commitstrip.com/en'
2944
    first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'
2945
2946
2947
class GenericBoumerie(GenericNavigableComic):
2948
    """Generic class to retrieve Boumeries comics in different languages."""
2949
    # Also on http://boumeries.tumblr.com
2950
    get_first_comic_link = get_a_navi_navifirst
2951
    get_navi_link = get_link_rel_next
2952
    date_format = NotImplemented
2953
    lang = NotImplemented
2954
2955
    @classmethod
2956
    def get_comic_info(cls, soup, link):
2957
        """Get information about a particular comics."""
2958
        title = soup.find('h2', class_='post-title').string
2959
        short_url = soup.find('link', rel='shortlink')['href']
2960
        author = soup.find("span", class_="post-author").find("a").string
2961
        date_str = soup.find('span', class_='post-date').string
2962
        day = string_to_date(date_str, cls.date_format, cls.lang)
2963
        imgs = soup.find('div', id='comic').find_all('img')
2964
        assert all(i['alt'] == i['title'] for i in imgs)
2965
        return {
2966
            'short_url': short_url,
2967
            'img': [i['src'] for i in imgs],
2968
            'title': title,
2969
            'author': author,
2970
            'month': day.month,
2971
            'year': day.year,
2972
            'day': day.day,
2973
        }
2974
2975
2976
class BoumerieEn(GenericBoumerie):
2977
    """Class to retrieve Boumeries comics in English."""
2978
    name = 'boumeries_en'
2979
    long_name = 'Boumeries (En)'
2980
    url = 'http://comics.boumerie.com'
2981
    _categories = ('BOUMERIES', )
2982
    date_format = "%B %d, %Y"
2983
    lang = 'en_GB.UTF-8'
2984
2985
2986
class BoumerieFr(GenericBoumerie):
2987
    """Class to retrieve Boumeries comics in French."""
2988
    name = 'boumeries_fr'
2989
    long_name = 'Boumeries (Fr)'
2990
    url = 'http://bd.boumerie.com'
2991
    _categories = ('BOUMERIES', 'FRANCAIS')
2992
    date_format = "%B %d, %Y"  # Used to be "%A, %d %B %Y"
2993
    lang = "fr_FR.utf8"
2994
2995
2996 View Code Duplication
class UnearthedComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2997
    """Class to retrieve Unearthed comics."""
2998
    # Also on http://tapastic.com/series/UnearthedComics
2999
    # Also on https://unearthedcomics.tumblr.com
3000
    name = 'unearthed'
3001
    long_name = 'Unearthed Comics'
3002
    url = 'http://unearthedcomics.com'
3003
    _categories = ('UNEARTHED', )
3004
    get_navi_link = get_link_rel_next
3005
    get_first_comic_link = simulate_first_link
3006
    first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'
3007
3008
    @classmethod
3009
    def get_comic_info(cls, soup, link):
3010
        """Get information about a particular comics."""
3011
        short_url = soup.find('link', rel='shortlink')['href']
3012
        title_elt = soup.find('h1') or soup.find('h2')
3013
        title = title_elt.string if title_elt else ""
3014
        desc = soup.find('meta', property='og:description')
3015
        date_str = soup.find('time', class_='published updated hidden')['datetime']
3016
        day = string_to_date(date_str, "%Y-%m-%d")
3017
        post = soup.find('div', class_="entry content entry-content type-portfolio")
3018
        imgs = post.find_all('img')
3019
        return {
3020
            'title': title,
3021
            'description': desc,
3022
            'url2': short_url,
3023
            'img': [i['src'] for i in imgs],
3024
            'month': day.month,
3025
            'year': day.year,
3026
            'day': day.day,
3027
        }
3028
3029
3030
class Optipess(GenericNavigableComic):
3031
    """Class to retrieve Optipess comics."""
3032
    name = 'optipess'
3033
    long_name = 'Optipess'
3034
    url = 'http://www.optipess.com'
3035
    get_first_comic_link = get_a_navi_navifirst
3036
    get_navi_link = get_link_rel_next
3037
3038
    @classmethod
3039
    def get_comic_info(cls, soup, link):
3040
        """Get information about a particular comics."""
3041
        title = soup.find('h2', class_='post-title').string
3042
        author = soup.find("span", class_="post-author").find("a").string
3043
        comic = soup.find('div', id='comic')
3044
        imgs = comic.find_all('img') if comic else []
3045
        alt = imgs[0]['title'] if imgs else ""
3046
        assert all(i['alt'] == i['title'] == alt for i in imgs)
3047
        date_str = soup.find('span', class_='post-date').string
3048
        day = string_to_date(date_str, "%B %d, %Y")
3049
        return {
3050
            'title': title,
3051
            'alt': alt,
3052
            'author': author,
3053
            'img': [i['src'] for i in imgs],
3054
            'month': day.month,
3055
            'year': day.year,
3056
            'day': day.day,
3057
        }
3058
3059
3060
class PainTrainComic(GenericNavigableComic):
3061
    """Class to retrieve Pain Train Comics."""
3062
    name = 'paintrain'
3063
    long_name = 'Pain Train Comics'
3064
    url = 'http://paintraincomic.com'
3065
    get_first_comic_link = get_a_navi_navifirst
3066
    get_navi_link = get_link_rel_next
3067
3068
    @classmethod
3069
    def get_comic_info(cls, soup, link):
3070
        """Get information about a particular comics."""
3071
        title = soup.find('h2', class_='post-title').string
3072
        short_url = soup.find('link', rel='shortlink')['href']
3073
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
3074
        num = int(short_url_re.match(short_url).groups()[0])
3075
        imgs = soup.find('div', id='comic').find_all('img')
3076
        alt = imgs[0]['title']
3077
        assert all(i['alt'] == i['title'] == alt for i in imgs)
3078
        date_str = soup.find('span', class_='post-date').string
3079
        day = string_to_date(date_str, "%d/%m/%Y")
3080
        return {
3081
            'short_url': short_url,
3082
            'num': num,
3083
            'img': [i['src'] for i in imgs],
3084
            'month': day.month,
3085
            'year': day.year,
3086
            'day': day.day,
3087
            'alt': alt,
3088
            'title': title,
3089
        }
3090
3091
3092
class MoonBeard(GenericNavigableComic):
3093
    """Class to retrieve MoonBeard comics."""
3094
    # Also on http://squireseses.tumblr.com
3095
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
3096
    name = 'moonbeard'
3097
    long_name = 'Moon Beard'
3098
    url = 'http://moonbeard.com'
3099
    _categories = ('MOONBEARD', )
3100
    get_first_comic_link = get_a_navi_navifirst
3101
    get_navi_link = get_a_navi_navinext
3102
3103
    @classmethod
3104
    def get_comic_info(cls, soup, link):
3105
        """Get information about a particular comics."""
3106
        title = soup.find('h2', class_='post-title').string
3107
        short_url = soup.find('link', rel='shortlink')['href']
3108
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
3109
        num = int(short_url_re.match(short_url).groups()[0])
3110
        imgs = soup.find('div', id='comic').find_all('img')
3111
        alt = imgs[0]['title']
3112
        assert all(i['alt'] == i['title'] == alt for i in imgs)
3113
        date_str = soup.find('span', class_='post-date').string
3114
        day = string_to_date(date_str, "%B %d, %Y")
3115
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
3116
        author = soup.find('span', class_='post-author').string
3117
        return {
3118
            'short_url': short_url,
3119
            'num': num,
3120
            'img': [i['src'] for i in imgs],
3121
            'month': day.month,
3122
            'year': day.year,
3123
            'day': day.day,
3124
            'title': title,
3125
            'tags': tags,
3126
            'alt': alt,
3127
            'author': author,
3128
        }
3129
3130
3131
class SystemComic(GenericNavigableComic):
3132
    """Class to retrieve System Comic."""
3133
    name = 'system'
3134
    long_name = 'System Comic'
3135
    url = 'http://www.systemcomic.com'
3136
    get_navi_link = get_a_rel_next
3137
3138
    @classmethod
3139
    def get_first_comic_link(cls):
3140
        """Get link to first comics."""
3141
        return get_soup_at_url(cls.url).find('li', class_='first').find('a')
3142
3143
    @classmethod
3144
    def get_comic_info(cls, soup, link):
3145
        """Get information about a particular comics."""
3146
        title = soup.find('meta', property='og:title')['content']
3147
        desc = soup.find('meta', property='og:description')['content']
3148
        date_str = soup.find('time')["datetime"]
3149
        day = string_to_date(date_str, "%Y-%m-%d")
3150
        imgs = soup.find('figure').find_all('img')
3151
        return {
3152
            'title': title,
3153
            'description': desc,
3154
            'day': day.day,
3155
            'month': day.month,
3156
            'year': day.year,
3157
            'img': [i['src'] for i in imgs],
3158
        }
3159
3160
3161 View Code Duplication
class LittleLifeLines(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3162
    """Class to retrieve Little Life Lines comics."""
3163
    # Also on https://little-life-lines.tumblr.com
3164
    name = 'life'
3165
    long_name = 'Little Life Lines'
3166
    url = 'http://www.littlelifelines.com'
3167
    get_url_from_link = join_cls_url_to_href
3168
    get_first_comic_link = simulate_first_link
3169
    first_url = 'http://www.littlelifelines.com/comics/well-done'
3170
3171
    @classmethod
3172
    def get_navi_link(cls, last_soup, next_):
3173
        """Get link to next or previous comic."""
3174
        # prev is next / next is prev
3175
        li = last_soup.find('li', class_='prev' if next_ else 'next')
3176
        return li.find('a') if li else None
3177
3178
    @classmethod
3179
    def get_comic_info(cls, soup, link):
3180
        """Get information about a particular comics."""
3181
        title = soup.find('meta', property='og:title')['content']
3182
        desc = soup.find('meta', property='og:description')['content']
3183
        date_str = soup.find('time', class_='published')['datetime']
3184
        day = string_to_date(date_str, "%Y-%m-%d")
3185
        author = soup.find('a', rel='author').string
3186
        div_content = soup.find('div', class_="body entry-content")
3187
        imgs = div_content.find_all('img')
3188
        imgs = [i for i in imgs if i.get('src') is not None]
3189
        alt = imgs[0]['alt']
3190
        return {
3191
            'title': title,
3192
            'alt': alt,
3193
            'description': desc,
3194
            'author': author,
3195
            'day': day.day,
3196
            'month': day.month,
3197
            'year': day.year,
3198
            'img': [i['src'] for i in imgs],
3199
        }
3200
3201
3202
class GenericWordPressInkblot(GenericNavigableComic):
3203
    """Generic class to retrieve comics using WordPress with Inkblot."""
3204
    get_navi_link = get_link_rel_next
3205
3206
    @classmethod
3207
    def get_first_comic_link(cls):
3208
        """Get link to first comics."""
3209
        return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')
3210
3211
    @classmethod
3212
    def get_comic_info(cls, soup, link):
3213
        """Get information about a particular comics."""
3214
        title = soup.find('meta', property='og:title')['content']
3215
        imgs = soup.find('div', class_='webcomic-image').find_all('img')
3216
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3217
        day = string_to_date(date_str, "%Y-%m-%d")
3218
        return {
3219
            'title': title,
3220
            'day': day.day,
3221
            'month': day.month,
3222
            'year': day.year,
3223
            'img': [i['src'] for i in imgs],
3224
        }
3225
3226
3227
class EverythingsStupid(GenericWordPressInkblot):
3228
    """Class to retrieve Everything's stupid Comics."""
3229
    # Also on http://tapastic.com/series/EverythingsStupid
3230
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
3231
    # Also on http://everythingsstupidcomics.tumblr.com
3232
    name = 'stupid'
3233
    long_name = "Everything's Stupid"
3234
    url = 'http://everythingsstupid.net'
3235
3236
3237
class TheIsmComics(GenericDeletedComic, GenericWordPressInkblot):
3238
    """Class to retrieve The Ism Comics."""
3239
    # Also on https://tapastic.com/series/TheIsm (?)
3240
    name = 'theism'
3241
    long_name = "The Ism"
3242
    url = 'http://www.theism-comics.com'
3243
3244
3245
class WoodenPlankStudios(GenericWordPressInkblot):
3246
    """Class to retrieve Wooden Plank Studios comics."""
3247
    name = 'woodenplank'
3248
    long_name = 'Wooden Plank Studios'
3249
    url = 'http://woodenplankstudios.com'
3250
3251
3252
class ElectricBunnyComic(GenericNavigableComic):
3253
    """Class to retrieve Electric Bunny Comics."""
3254
    # Also on http://electricbunnycomics.tumblr.com
3255
    name = 'bunny'
3256
    long_name = 'Electric Bunny Comic'
3257
    url = 'http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell'
3258
    get_url_from_link = join_cls_url_to_href
3259
3260
    @classmethod
3261
    def get_first_comic_link(cls):
3262
        """Get link to first comics."""
3263
        return get_soup_at_url(cls.url).find('img', alt='First').parent
3264
3265
    @classmethod
3266
    def get_navi_link(cls, last_soup, next_):
3267
        """Get link to next or previous comic."""
3268
        img = last_soup.find('img', alt='Next' if next_ else 'Back')
3269
        return img.parent if img else None
3270
3271
    @classmethod
3272
    def get_comic_info(cls, soup, link):
3273
        """Get information about a particular comics."""
3274
        title = soup.find('meta', property='og:title')['content']
3275
        imgs = soup.find_all('meta', property='og:image')
3276
        return {
3277
            'title': title,
3278
            'img': [i['content'] for i in imgs],
3279
        }
3280
3281
3282
class SheldonComics(GenericNavigableComic):
3283
    """Class to retrieve Sheldon comics."""
3284
    # Also on http://www.gocomics.com/sheldon
3285
    name = 'sheldon'
3286
    long_name = 'Sheldon Comics'
3287
    url = 'http://www.sheldoncomics.com'
3288
3289
    @classmethod
3290
    def get_first_comic_link(cls):
3291
        """Get link to first comics."""
3292
        return get_soup_at_url(cls.url).find("a", id="nav-first")
3293
3294
    @classmethod
3295
    def get_navi_link(cls, last_soup, next_):
3296
        """Get link to next or previous comic."""
3297
        for link in last_soup.find_all("a", id="nav-next" if next_ else "nav-prev"):
3298
            if link['href'] != 'http://www.sheldoncomics.com':
3299
                return link
3300
        return None
3301
3302
    @classmethod
3303
    def get_comic_info(cls, soup, link):
3304
        """Get information about a particular comics."""
3305
        imgs = soup.find("div", id="comic-foot").find_all("img")
3306
        assert all(i['alt'] == i['title'] for i in imgs)
3307
        assert len(imgs) == 1, imgs
3308
        title = imgs[0]['title']
3309
        return {
3310
            'title': title,
3311
            'img': [i['src'] for i in imgs],
3312
        }
3313
3314
3315
class ManVersusManatee(GenericNavigableComic):
3316
    """Class to retrieve Man Versus Manatee comics."""
3317
    url = 'http://manvsmanatee.com'
3318
    name = 'manvsmanatee'
3319
    long_name = 'Man Versus Manatee'
3320
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
3321
    get_navi_link = get_a_comicnavbase_comicnavnext
3322
3323
    @classmethod
3324
    def get_comic_info(cls, soup, link):
3325
        """Get information about a particular comics."""
3326
        title = soup.find('h2', class_='post-title').string
3327
        imgs = soup.find('div', id='comic').find_all('img')
3328
        date_str = soup.find('span', class_='post-date').string
3329
        day = string_to_date(date_str, "%B %d, %Y")
3330
        return {
3331
            'img': [i['src'] for i in imgs],
3332
            'title': title,
3333
            'month': day.month,
3334
            'year': day.year,
3335
            'day': day.day,
3336
        }
3337
3338
3339
class TheMeerkatguy(GenericNavigableComic):
3340
    """Class to retrieve The Meerkatguy comics."""
3341
    long_name = 'The Meerkatguy'
3342
    url = 'http://www.themeerkatguy.com'
3343
    name = 'meerkatguy'
3344
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
3345
    get_navi_link = get_a_comicnavbase_comicnavnext
3346
3347
    @classmethod
3348
    def get_comic_info(cls, soup, link):
3349
        """Get information about a particular comics."""
3350
        title = soup.find('title').string
3351
        imgs = soup.find_all('meta', property='og:image')
3352
        return {
3353
            'img': [i['content'] for i in imgs],
3354
            'title': title,
3355
        }
3356
3357
3358
class Ubertool(GenericNavigableComic):
3359
    """Class to retrieve Ubertool comics."""
3360
    # Also on https://ubertool.tumblr.com
3361
    # Also on https://tapastic.com/series/ubertool
3362
    name = 'ubertool'
3363
    long_name = 'Ubertool'
3364
    url = 'http://ubertoolcomic.com'
3365
    _categories = ('UBERTOOL', )
3366
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
3367
    get_navi_link = get_a_comicnavbase_comicnavnext
3368
3369
    @classmethod
3370
    def get_comic_info(cls, soup, link):
3371
        """Get information about a particular comics."""
3372
        title = soup.find('h2', class_='post-title').string
3373
        date_str = soup.find('span', class_='post-date').string
3374
        day = string_to_date(date_str, "%B %d, %Y")
3375
        imgs = soup.find('div', id='comic').find_all('img')
3376
        return {
3377
            'img': [i['src'] for i in imgs],
3378
            'title': title,
3379
            'month': day.month,
3380
            'year': day.year,
3381
            'day': day.day,
3382
        }
3383
3384
3385
class EarthExplodes(GenericNavigableComic):
3386
    """Class to retrieve The Earth Explodes comics."""
3387
    name = 'earthexplodes'
3388
    long_name = 'The Earth Explodes'
3389
    url = 'http://www.earthexplodes.com'
3390
    get_url_from_link = join_cls_url_to_href
3391
    get_first_comic_link = simulate_first_link
3392
    first_url = 'http://www.earthexplodes.com/comics/000/'
3393
3394
    @classmethod
3395
    def get_navi_link(cls, last_soup, next_):
3396
        """Get link to next or previous comic."""
3397
        return last_soup.find('a', id='next' if next_ else 'prev')
3398
3399
    @classmethod
3400
    def get_comic_info(cls, soup, link):
3401
        """Get information about a particular comics."""
3402
        title = soup.find('title').string
3403
        imgs = soup.find('div', id='image').find_all('img')
3404
        alt = imgs[0].get('title', '')
3405
        return {
3406
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3407
            'title': title,
3408
            'alt': alt,
3409
        }
3410
3411
3412 View Code Duplication
class PomComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3413
    """Class to retrieve PomComics."""
3414
    name = 'pom'
3415
    long_name = 'Pom Comics / Piece of Me'
3416
    url = 'http://www.pomcomic.com'
3417
    get_url_from_link = join_cls_url_to_href
3418
3419
    @classmethod
3420
    def get_first_comic_link(cls):
3421
        """Get link to first comics."""
3422
        return get_soup_at_url(cls.url).find('a', class_='btn-first')
3423
3424
    @classmethod
3425
    def get_navi_link(cls, last_soup, next_):
3426
        """Get link to next or previous comic."""
3427
        return last_soup.find('a', class_='btn-next' if next_ else 'btn-prev')
3428
3429
    @classmethod
3430
    def get_comic_info(cls, soup, link):
3431
        """Get information about a particular comics."""
3432
        title = soup.find('h1').string
3433
        desc = soup.find('meta', property='og:description')['content']
3434
        tags = soup.find('meta', attrs={'name': 'keywords'})['content']
3435
        imgs = soup.find('div', class_='comic').find_all('img')
3436
        return {
3437
            'title': title,
3438
            'desc': desc,
3439
            'tags': tags,
3440
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3441
        }
3442
3443
3444
class CubeDrone(GenericComicNotWorking, GenericNavigableComic):  # Website has changed
3445
    """Class to retrieve Cube Drone comics."""
3446
    name = 'cubedrone'
3447
    long_name = 'Cube Drone'
3448
    url = 'http://cube-drone.com/comics'
3449
    get_url_from_link = join_cls_url_to_href
3450
3451
    @classmethod
3452
    def get_first_comic_link(cls):
3453
        """Get link to first comics."""
3454
        return get_soup_at_url(cls.url).find('span', class_='glyphicon glyphicon-backward').parent
3455
3456
    @classmethod
3457
    def get_navi_link(cls, last_soup, next_):
3458
        """Get link to next or previous comic."""
3459
        class_ = 'glyphicon glyphicon-chevron-' + ('right' if next_ else 'left')
3460
        return last_soup.find('span', class_=class_).parent
3461
3462
    @classmethod
3463
    def get_comic_info(cls, soup, link):
3464
        """Get information about a particular comics."""
3465
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
3466
        url2 = soup.find('meta', attrs={'name': 'twitter:url'})['content']
3467
        # date_str = soup.find('h2', class_='comic_title').find('small').string
3468
        # day = string_to_date(date_str, "%B %d, %Y, %I:%M %p")
3469
        imgs = soup.find_all('img', class_='comic img-responsive')
3470
        title2 = imgs[0]['title']
3471
        alt = imgs[0]['alt']
3472
        return {
3473
            'url2': url2,
3474
            'title': title,
3475
            'title2': title2,
3476
            'alt': alt,
3477
            'img': [i['src'] for i in imgs],
3478
        }
3479
3480
3481
class MakeItStoopid(GenericDeletedComic, GenericNavigableComic):
3482
    """Class to retrieve Make It Stoopid Comics."""
3483
    name = 'stoopid'
3484
    long_name = 'Make it stoopid'
3485
    url = 'http://makeitstoopid.com/comic.php'
3486
3487
    @classmethod
3488
    def get_nav(cls, soup):
3489
        """Get the navigation elements from soup object."""
3490
        cnav = soup.find_all(class_='cnav')
3491
        nav1, nav2 = cnav[:5], cnav[5:]
3492
        assert nav1 == nav2
3493
        # begin, prev, archive, next_, end = nav1
3494
        return [None if i.get('href') is None else i for i in nav1]
3495
3496
    @classmethod
3497
    def get_first_comic_link(cls):
3498
        """Get link to first comics."""
3499
        return cls.get_nav(get_soup_at_url(cls.url))[0]
3500
3501
    @classmethod
3502
    def get_navi_link(cls, last_soup, next_):
3503
        """Get link to next or previous comic."""
3504
        return cls.get_nav(last_soup)[3 if next_ else 1]
3505
3506
    @classmethod
3507
    def get_comic_info(cls, soup, link):
3508
        """Get information about a particular comics."""
3509
        title = link['title']
3510
        imgs = soup.find_all('img', id='comicimg')
3511
        return {
3512
            'title': title,
3513
            'img': [i['src'] for i in imgs],
3514
        }
3515
3516
3517
class OffTheLeashDog(GenericNavigableComic):
3518
    """Class to retrieve Off The Leash Dog comics."""
3519
    # Also on http://rupertfawcettsdoggyblog.tumblr.com
3520
    # Also on http://www.rupertfawcettcartoons.com
3521
    name = 'offtheleash'
3522
    long_name = 'Off The Leash Dog'
3523
    url = 'http://offtheleashdogcartoons.com'
3524
    _categories = ('FAWCETT', )
3525
    get_navi_link = get_a_rel_next
3526
    get_first_comic_link = simulate_first_link
3527
    first_url = 'http://offtheleashdogcartoons.com/uncategorized/can-i-help-you/'
3528
3529
    @classmethod
3530
    def get_comic_info(cls, soup, link):
3531
        """Get information about a particular comics."""
3532
        title = soup.find("h1", class_="entry-title").string
3533
        imgs = soup.find('div', class_='entry-content').find_all('img')
3534
        return {
3535
            'title': title,
3536
            'img': [i['src'] for i in imgs],
3537
        }
3538
3539
3540
class MacadamValley(GenericNavigableComic):
3541
    """Class to retrieve Macadam Valley comics."""
3542
    name = 'macadamvalley'
3543
    long_name = 'Macadam Valley'
3544
    url = 'http://macadamvalley.com'
3545
    get_navi_link = get_a_rel_next
3546
    get_first_comic_link = simulate_first_link
3547
    first_url = 'http://macadamvalley.com/le-debut-de-la-fin/'
3548
3549
    @classmethod
3550
    def get_comic_info(cls, soup, link):
3551
        """Get information about a particular comics."""
3552
        title = soup.find("h1", class_="entry-title").string
3553
        img = soup.find('div', class_='entry-content').find('img')
3554
        date_str = soup.find('time', class_='entry-date')['datetime']
3555
        date_str = date_str[:10]
3556
        day = string_to_date(date_str, "%Y-%m-%d")
3557
        author = soup.find('a', rel='author').string
3558
        return {
3559
            'title': title,
3560
            'img': [i['src'] for i in [img]],
3561
            'day': day.day,
3562
            'month': day.month,
3563
            'year': day.year,
3564
            'author': author,
3565
        }
3566
3567
3568
class MarketoonistComics(GenericNavigableComic):
3569
    """Class to retrieve Marketoonist Comics."""
3570
    name = 'marketoonist'
3571
    long_name = 'Marketoonist'
3572
    url = 'https://marketoonist.com/cartoons'
3573
    get_first_comic_link = simulate_first_link
3574
    get_navi_link = get_link_rel_next
3575
    first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'
3576
3577
    @classmethod
3578
    def get_comic_info(cls, soup, link):
3579
        """Get information about a particular comics."""
3580
        imgs = soup.find_all('meta', property='og:image')
3581
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3582
        day = string_to_date(date_str, "%Y-%m-%d")
3583
        title = soup.find('meta', property='og:title')['content']
3584
        return {
3585
            'img': [i['content'] for i in imgs],
3586
            'day': day.day,
3587
            'month': day.month,
3588
            'year': day.year,
3589
            'title': title,
3590
        }
3591
3592
3593 View Code Duplication
class ConsoliaComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3594
    """Class to retrieve Consolia comics."""
3595
    name = 'consolia'
3596
    long_name = 'consolia'
3597
    url = 'https://consolia-comic.com'
3598
    get_url_from_link = join_cls_url_to_href
3599
3600
    @classmethod
3601
    def get_first_comic_link(cls):
3602
        """Get link to first comics."""
3603
        return get_soup_at_url(cls.url).find('a', class_='first')
3604
3605
    @classmethod
3606
    def get_navi_link(cls, last_soup, next_):
3607
        """Get link to next or previous comic."""
3608
        return last_soup.find('a', class_='next' if next_ else 'prev')
3609
3610
    @classmethod
3611
    def get_comic_info(cls, soup, link):
3612
        """Get information about a particular comics."""
3613
        title = soup.find('meta', property='og:title')['content']
3614
        date_str = soup.find('time')["datetime"]
3615
        day = string_to_date(date_str, "%Y-%m-%d")
3616
        imgs = soup.find_all('meta', property='og:image')
3617
        return {
3618
            'title': title,
3619
            'img': [i['content'] for i in imgs],
3620
            'day': day.day,
3621
            'month': day.month,
3622
            'year': day.year,
3623
        }
3624
3625
3626
class GenericBlogspotComic(GenericNavigableComic):
3627
    """Generic class to retrieve comics from Blogspot."""
3628
    get_first_comic_link = simulate_first_link
3629
    first_url = NotImplemented
3630
    _categories = ('BLOGSPOT', )
3631
3632
    @classmethod
3633
    def get_navi_link(cls, last_soup, next_):
3634
        """Get link to next or previous comic."""
3635
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3636
3637
3638 View Code Duplication
class TuMourrasMoinsBete(GenericBlogspotComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3639
    """Class to retrieve Tu Mourras Moins Bete comics."""
3640
    name = 'mourrasmoinsbete'
3641
    long_name = 'Tu Mourras Moins Bete'
3642
    url = 'http://tumourrasmoinsbete.blogspot.fr'
3643
    _categories = ('FRANCAIS', )
3644
    first_url = 'http://tumourrasmoinsbete.blogspot.fr/2008/06/essai.html'
3645
3646
    @classmethod
3647
    def get_comic_info(cls, soup, link):
3648
        """Get information about a particular comics."""
3649
        title = soup.find('title').string
3650
        imgs = soup.find('div', itemprop='description articleBody').find_all('img')
3651
        author = soup.find('span', itemprop='author').string
3652
        return {
3653
            'img': [i['src'] for i in imgs],
3654
            'author': author,
3655
            'title': title,
3656
        }
3657
3658
3659
class Octopuns(GenericBlogspotComic):
3660
    """Class to retrieve Octopuns comics."""
3661
    # Also on http://octopuns.tumblr.com
3662
    name = 'octopuns'
3663
    long_name = 'Octopuns'
3664
    url = 'http://www.octopuns.net'  # or http://octopuns.blogspot.fr/
3665
    first_url = 'http://octopuns.blogspot.com/2010/12/17122010-always-read-label.html'
3666
3667
    @classmethod
3668
    def get_comic_info(cls, soup, link):
3669
        """Get information about a particular comics."""
3670
        title = soup.find('h3', class_='post-title entry-title').string
3671
        date_str = soup.find('h2', class_='date-header').string
3672
        day = string_to_date(date_str, "%A, %B %d, %Y")
3673
        imgs = soup.find_all('link', rel='image_src')
3674
        return {
3675
            'img': [i['href'] for i in imgs],
3676
            'title': title,
3677
            'day': day.day,
3678
            'month': day.month,
3679
            'year': day.year,
3680
        }
3681
3682
3683
class GeekAndPoke(GenericNavigableComic):
3684
    """Class to retrieve Geek And Poke comics."""
3685
    name = 'geek'
3686
    long_name = 'Geek And Poke'
3687
    url = 'http://geek-and-poke.com'
3688
    get_url_from_link = join_cls_url_to_href
3689
    get_first_comic_link = simulate_first_link
3690
    first_url = 'http://geek-and-poke.com/geekandpoke/2006/8/27/a-new-place-for-a-not-so-old-blog.html'
3691
3692
    @classmethod
3693
    def get_navi_link(cls, last_soup, next_):
3694
        """Get link to next or previous comic."""
3695
        return last_soup.find('a', class_='prev-item' if next_ else 'next-item')
3696
3697
    @classmethod
3698
    def get_comic_info(cls, soup, link):
3699
        """Get information about a particular comics."""
3700
        title = soup.find('meta', property='og:title')['content']
3701
        desc = soup.find('meta', property='og:description')
3702
        desc_str = "" if desc is None else desc['content']
3703
        date_str = soup.find('time', class_='published')['datetime']
3704
        day = string_to_date(date_str, "%Y-%m-%d")
3705
        author = soup.find('a', rel='author').string
3706
        div_content = (soup.find('div', class_="body entry-content") or
3707
                       soup.find('div', class_="special-content"))
3708
        imgs = div_content.find_all('img')
3709
        imgs = [i for i in imgs if i.get('src') is not None]
3710
        assert all('title' not in i or i['alt'] == i['title'] for i in imgs)
3711
        alt = imgs[0].get('alt', "") if imgs else []
3712
        return {
3713
            'title': title,
3714
            'alt': alt,
3715
            'description': desc_str,
3716
            'author': author,
3717
            'day': day.day,
3718
            'month': day.month,
3719
            'year': day.year,
3720
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3721
        }
3722
3723
3724 View Code Duplication
class GloryOwlComix(GenericBlogspotComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3725
    """Class to retrieve Glory Owl comics."""
3726
    name = 'gloryowl'
3727
    long_name = 'Glory Owl'
3728
    url = 'http://gloryowlcomix.blogspot.fr'
3729
    _categories = ('NSFW', 'FRANCAIS')
3730
    first_url = 'http://gloryowlcomix.blogspot.fr/2013/02/1_7.html'
3731
3732
    @classmethod
3733
    def get_comic_info(cls, soup, link):
3734
        """Get information about a particular comics."""
3735
        title = soup.find('title').string
3736
        imgs = soup.find_all('link', rel='image_src')
3737
        author = soup.find('a', rel='author').string
3738
        return {
3739
            'img': [i['href'] for i in imgs],
3740
            'author': author,
3741
            'title': title,
3742
        }
3743
3744
3745 View Code Duplication
class GenericSquareSpace(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3746
    """Generic class to retrieve comics using SquareSpace."""
3747
    _categories = ('SQUARESPACE', )
3748
    get_url_from_link = join_cls_url_to_href
3749
    get_first_comic_link = simulate_first_link
3750
3751
    @classmethod
3752
    def get_navi_link(cls, last_soup, next_):
3753
        """Get link to next or previous comic."""
3754
        return last_soup.find('a', id='prevLink' if next_ else 'nextLink')
3755
3756
    @classmethod
3757
    def get_images(cls, soup):
3758
        """Get image URLs for a comic."""
3759
        raise NotImplementedError
3760
3761
    @classmethod
3762
    def get_comic_info(cls, soup, link):
3763
        """Get information about a particular comics."""
3764
        title = soup.find('meta', property='og:title')['content']
3765
        desc = soup.find('meta', property='og:description')['content']
3766
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
3767
        day = string_to_date(date_str, "%Y-%m-%d")
3768
        author = soup.find('a', rel='author').string
3769
        return {
3770
            'title': title,
3771
            'img': cls.get_images(soup),
3772
            'month': day.month,
3773
            'year': day.year,
3774
            'day': day.day,
3775
            'author': author,
3776
            'description': desc,
3777
        }
3778
3779
3780
class AtRandomComics(GenericSquareSpace):
3781
    """Class to retrieve At Random Comics."""
3782
    name = 'atrandom'
3783
    long_name = 'At Random Comics'
3784
    url = 'http://www.atrandomcomics.com'
3785
    first_url = 'http://www.atrandomcomics.com/at-random-comics-home/2015/5/5/can-of-worms'
3786
3787
    @classmethod
3788
    def get_images(cls, soup):
3789
        """Get image URLs for a comic."""
3790
        imgs = soup.find_all('meta', property='og:image')
3791
        return [i['content'] for i in imgs]
3792
3793
3794
class NothingSuspicious(GenericSquareSpace):
3795
    """Class to retrieve Nothing Suspicious comics."""
3796
    name = 'nothingsuspicious'
3797
    long_name = 'Nothing Suspicious'
3798
    url = 'https://nothingsuspicio.us'
3799
    first_url = 'https://nothingsuspicio.us/?offset=1483592400908'
3800
3801
    @classmethod
3802
    def get_images(cls, soup):
3803
        """Get image URLs for a comic."""
3804
        imgs = soup.find('div', class_='content-wrapper').find('img')
3805
        return [i['src'] for i in [imgs]]
3806
3807
3808
class DeathBulge(GenericComic):
3809
    """Class to retrieve the DeathBulge comics."""
3810
    name = 'deathbulge'
3811
    long_name = 'Death Bulge'
3812
    url = 'http://www.deathbulge.com'
3813
3814
    @classmethod
3815
    def get_next_comic(cls, last_comic):
3816
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
3817
        json_url = urljoin_wrapper(cls.url, 'api/comics/1')
3818
        json = load_json_at_url(json_url)
3819
        pagination = json['pagination_links']
3820
        first_num = last_comic['num'] if last_comic else pagination['first']
3821
        last_num = pagination['last']
3822
        for num in range(first_num + 1, last_num):
3823
            json_url = urljoin_wrapper(cls.url, 'api/comics/%d' % num)
3824
            json = load_json_at_url(json_url)
3825
            pagination = json['pagination_links']
3826
            comic_json = json['comic']
3827
            date_str = comic_json['timestamp'][:10]
3828
            day = string_to_date(date_str, "%Y-%m-%d")
3829
            comic_id = comic_json['id']  # not exactly 'num' o_O
3830
            yield {
3831
                'json_url': json_url,
3832
                'num': comic_id,
3833
                'url': urljoin_wrapper(cls.url, 'comics/%d' % num),
3834
                'alt': comic_json['alt_text'],
3835
                'title': comic_json['title'],
3836
                'img': [urljoin_wrapper(cls.url, comic_json['comic'])],
3837
                'month': day.month,
3838
                'year': day.year,
3839
                'day': day.day,
3840
            }
3841
3842
3843
class GenericTumblrV1(GenericComic):
3844
    """Generic class to retrieve comics from Tumblr using the V1 API."""
3845
    _categories = ('TUMBLR', )
3846
3847
    @classmethod
3848
    def get_next_comic(cls, last_comic):
3849
        """Generic implementation of get_next_comic for Tumblr comics."""
3850
        for p in cls.get_posts(last_comic):
3851
            comic = cls.get_comic_info(p)
3852
            if comic is not None:
3853
                yield comic
3854
3855
    @classmethod
3856
    def check_url(cls, url):
3857
        if not url.startswith(cls.url):
3858
            print("url '%s' does not start with '%s'" % (url, cls.url))
3859
        return url
3860
3861
    @classmethod
3862
    def get_url_from_post(cls, post):
3863
        return cls.check_url(post['url'])
3864
3865
    @classmethod
3866
    def get_api_url(cls):
3867
        return urljoin_wrapper(cls.url, '/api/read/')
3868
3869
    @classmethod
3870
    def get_api_url_for_id(cls, tumblr_id):
3871
        return cls.get_api_url() + '?id=%d' % (tumblr_id)
3872
3873
    @classmethod
3874
    def get_comic_info(cls, post):
3875
        """Get information about a particular comics."""
3876
        type_ = post['type']
3877
        if type_ != 'photo':
3878
            return None
3879
        tumblr_id = int(post['id'])
3880
        api_url = cls.get_api_url_for_id(tumblr_id)
3881
        day = datetime.datetime.fromtimestamp(int(post['unix-timestamp'])).date()
3882
        caption = post.find('photo-caption')
3883
        title = caption.string if caption else ""
3884
        tags = ' '.join(t.string for t in post.find_all('tag'))
3885
        # Photos may appear in 'photo' tags and/or straight in the post
3886
        photo_tags = post.find_all('photo')
3887
        if not photo_tags:
3888
            photo_tags = [post]
3889
        # Images are in multiple resolutions - taking the first one
3890
        imgs = [photo.find('photo-url') for photo in photo_tags]
3891
        return {
3892
            'url': cls.get_url_from_post(post),
3893
            'url2': post['url-with-slug'],
3894
            'day': day.day,
3895
            'month': day.month,
3896
            'year': day.year,
3897
            'title': title,
3898
            'tags': tags,
3899
            'img': [i.string for i in imgs],
3900
            'tumblr-id': tumblr_id,
3901
            'api_url': api_url,
3902
        }
3903
3904
    @classmethod
3905
    def get_posts(cls, last_comic, nb_post_per_call=10):
3906
        """Get posts using API. nb_post_per_call is max 50.
3907
3908
        Posts are retrieved from newer to older as per the tumblr v1 api
3909
        but are returned in chronological order."""
3910
        waiting_for_id = last_comic['tumblr-id'] if last_comic else None
3911
        posts_acc = []
3912
        if last_comic is not None:
3913
            # cls.check_url(last_comic['url'])
3914
            cls.check_url(last_comic['api_url'])
3915
            # Sometimes, tumblr posts are deleted. When previous post is deleted, we
3916
            # might end up spending a lot of time looking for something that
3917
            # doesn't exist. Failing early and clearly might be a better option.
3918
            last_api_url = cls.get_api_url_for_id(waiting_for_id)
3919
            try:
3920
                get_soup_at_url(last_api_url)
3921
            except urllib.error.HTTPError:
3922
                try:
3923
                    get_soup_at_url(cls.url)
3924
                except urllib.error.HTTPError:
3925
                    print("Did not find previous post nor main url %s" % cls.url)
3926
                else:
3927
                    print("Did not find previous post %s : it might have been deleted" % last_api_url)
3928
                return reversed(posts_acc)
3929
        api_url = cls.get_api_url()
3930
        posts = get_soup_at_url(api_url).find('posts')
3931
        start, total = int(posts['start']), int(posts['total'])
3932
        assert start == 0
3933
        for starting_num in range(0, total, nb_post_per_call):
3934
            api_url2 = api_url + '?start=%d&num=%d' % (starting_num, nb_post_per_call)
3935
            posts2 = get_soup_at_url(api_url2).find('posts')
3936
            start2, total2 = int(posts2['start']), int(posts2['total'])
3937
            assert starting_num == start2, "%d != %d" % (starting_num, start2)
3938
            # This may happen and should be handled in the future
3939
            assert total == total2, "%d != %d" % (total, total2)
3940
            for p in posts2.find_all('post'):
3941
                tumblr_id = int(p['id'])
3942
                if waiting_for_id and waiting_for_id == tumblr_id:
3943
                    return reversed(posts_acc)
3944
                posts_acc.append(p)
3945
        if waiting_for_id is None:
3946
            return reversed(posts_acc)
3947
        print("Did not find %s : there might be a problem" % waiting_for_id)
3948
        return []
3949
3950
3951
class SaturdayMorningBreakfastCerealTumblr(GenericTumblrV1):
3952
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
3953
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
3954
    # Also on http://www.smbc-comics.com
3955
    name = 'smbc-tumblr'
3956
    long_name = 'Saturday Morning Breakfast Cereal (from Tumblr)'
3957
    url = 'http://smbc-comics.tumblr.com'
3958
    _categories = ('SMBC', )
3959
3960
3961
class AHammADay(GenericTumblrV1):
3962
    """Class to retrieve class A Hamm A Day comics."""
3963
    name = 'hamm'
3964
    long_name = 'A Hamm A Day'
3965
    url = 'http://www.ahammaday.com'
3966
3967
3968
class IrwinCardozo(GenericTumblrV1):
3969
    """Class to retrieve Irwin Cardozo Comics."""
3970
    name = 'irwinc'
3971
    long_name = 'Irwin Cardozo'
3972
    url = 'http://irwincardozocomics.tumblr.com'
3973
3974
3975
class AccordingToDevin(GenericTumblrV1):
3976
    """Class to retrieve According To Devin comics."""
3977
    name = 'devin'
3978
    long_name = 'According To Devin'
3979
    url = 'http://accordingtodevin.tumblr.com'
3980
3981
3982
class ItsTheTieTumblr(GenericTumblrV1):
3983
    """Class to retrieve It's the tie comics."""
3984
    # Also on http://itsthetie.com
3985
    # Also on https://tapastic.com/series/itsthetie
3986
    name = 'tie-tumblr'
3987
    long_name = "It's the tie (from Tumblr)"
3988
    url = "http://itsthetie.tumblr.com"
3989
    _categories = ('TIE', )
3990
3991
3992
class OctopunsTumblr(GenericTumblrV1):
3993
    """Class to retrieve Octopuns comics."""
3994
    # Also on http://www.octopuns.net
3995
    name = 'octopuns-tumblr'
3996
    long_name = 'Octopuns (from Tumblr)'
3997
    url = 'http://octopuns.tumblr.com'
3998
3999
4000
class PicturesInBoxesTumblr(GenericTumblrV1):
4001
    """Class to retrieve Pictures In Boxes comics."""
4002
    # Also on http://www.picturesinboxes.com
4003
    name = 'picturesinboxes-tumblr'
4004
    long_name = 'Pictures in Boxes (from Tumblr)'
4005
    url = 'https://picturesinboxescomic.tumblr.com'
4006
4007
4008
class TubeyToonsTumblr(GenericTumblrV1):
4009
    """Class to retrieve TubeyToons comics."""
4010
    # Also on http://tapastic.com/series/Tubey-Toons
4011
    # Also on http://tubeytoons.com
4012
    name = 'tubeytoons-tumblr'
4013
    long_name = 'Tubey Toons (from Tumblr)'
4014
    url = 'https://tubeytoons.tumblr.com'
4015
    _categories = ('TUNEYTOONS', )
4016
4017
4018
class UnearthedComicsTumblr(GenericTumblrV1):
4019
    """Class to retrieve Unearthed comics."""
4020
    # Also on http://tapastic.com/series/UnearthedComics
4021
    # Also on http://unearthedcomics.com
4022
    name = 'unearthed-tumblr'
4023
    long_name = 'Unearthed Comics (from Tumblr)'
4024
    url = 'https://unearthedcomics.tumblr.com'
4025
    _categories = ('UNEARTHED', )
4026
4027
4028
class PieComic(GenericTumblrV1):
4029
    """Class to retrieve Pie Comic comics."""
4030
    name = 'pie'
4031
    long_name = 'Pie Comic'
4032
    url = "http://piecomic.tumblr.com"
4033
4034
4035
class MrEthanDiamond(GenericTumblrV1):
4036
    """Class to retrieve Mr Ethan Diamond comics."""
4037
    name = 'diamond'
4038
    long_name = 'Mr Ethan Diamond'
4039
    url = 'http://mrethandiamond.tumblr.com'
4040
4041
4042
class Flocci(GenericTumblrV1):
4043
    """Class to retrieve floccinaucinihilipilification comics."""
4044
    name = 'flocci'
4045
    long_name = 'floccinaucinihilipilification'
4046
    url = "http://floccinaucinihilipilificationa.tumblr.com"
4047
4048
4049
class UpAndOut(GenericTumblrV1):
4050
    """Class to retrieve Up & Out comics."""
4051
    # Also on http://tapastic.com/series/UP-and-OUT
4052
    name = 'upandout'
4053
    long_name = 'Up And Out (from Tumblr)'
4054
    url = 'http://upandoutcomic.tumblr.com'
4055
4056
4057
class Pundemonium(GenericTumblrV1):
4058
    """Class to retrieve Pundemonium comics."""
4059
    name = 'pundemonium'
4060
    long_name = 'Pundemonium'
4061
    url = 'http://monstika.tumblr.com'
4062
4063
4064
class PoorlyDrawnLinesTumblr(GenericTumblrV1):
4065
    """Class to retrieve Poorly Drawn Lines comics."""
4066
    # Also on http://poorlydrawnlines.com
4067
    name = 'poorlydrawn-tumblr'
4068
    long_name = 'Poorly Drawn Lines (from Tumblr)'
4069
    url = 'http://pdlcomics.tumblr.com'
4070
    _categories = ('POORLYDRAWN', )
4071
4072
4073
class PearShapedComics(GenericTumblrV1):
4074
    """Class to retrieve Pear Shaped Comics."""
4075
    name = 'pearshaped'
4076
    long_name = 'Pear-Shaped Comics'
4077
    url = 'http://pearshapedcomics.com'
4078
4079
4080
class PondScumComics(GenericTumblrV1):
4081
    """Class to retrieve Pond Scum Comics."""
4082
    name = 'pond'
4083
    long_name = 'Pond Scum'
4084
    url = 'http://pondscumcomic.tumblr.com'
4085
4086
4087
class MercworksTumblr(GenericTumblrV1):
4088
    """Class to retrieve Mercworks comics."""
4089
    # Also on http://mercworks.net
4090
    # Also on http://www.webtoons.com/en/comedy/mercworks/list?title_no=426
4091
    # Also on https://tapastic.com/series/MercWorks
4092
    name = 'mercworks-tumblr'
4093
    long_name = 'Mercworks (from Tumblr)'
4094
    url = 'http://mercworks.tumblr.com'
4095
    _categories = ('MERCWORKS', )
4096
4097
4098
class OwlTurdTumblr(GenericTumblrV1):
4099
    """Class to retrieve Owl Turd / Shen comix."""
4100
    # Also on https://tapas.io/series/Shen-Comix
4101
    name = 'owlturd-tumblr'
4102
    long_name = 'Owl Turd / Shen Comix (from Tumblr)'
4103
    url = 'http://shencomix.com'
4104
    _categories = ('OWLTURD', 'SHENCOMIX')
4105
4106
4107
class VectorBelly(GenericTumblrV1):
4108
    """Class to retrieve Vector Belly comics."""
4109
    # Also on http://vectorbelly.com
4110
    name = 'vector'
4111
    long_name = 'Vector Belly'
4112
    url = 'http://vectorbelly.tumblr.com'
4113
4114
4115
class GoneIntoRapture(GenericTumblrV1):
4116
    """Class to retrieve Gone Into Rapture comics."""
4117
    # Also on http://goneintorapture.tumblr.com
4118
    # Also on http://tapastic.com/series/Goneintorapture
4119
    name = 'rapture'
4120
    long_name = 'Gone Into Rapture'
4121
    url = 'http://goneintorapture.com'
4122
4123
4124
class TheOatmealTumblr(GenericTumblrV1):
4125
    """Class to retrieve The Oatmeal comics."""
4126
    # Also on http://theoatmeal.com
4127
    name = 'oatmeal-tumblr'
4128
    long_name = 'The Oatmeal (from Tumblr)'
4129
    url = 'http://oatmeal.tumblr.com'
4130
4131
4132
class HeckIfIKnowComicsTumblr(GenericTumblrV1):
4133
    """Class to retrieve Heck If I Know Comics."""
4134
    # Also on http://tapastic.com/series/Regular
4135
    name = 'heck-tumblr'
4136
    long_name = 'Heck if I Know comics (from Tumblr)'
4137
    url = 'http://heckifiknowcomics.com'
4138
4139
4140
class MyJetPack(GenericTumblrV1):
4141
    """Class to retrieve My Jet Pack comics."""
4142
    name = 'jetpack'
4143
    long_name = 'My Jet Pack'
4144
    url = 'http://myjetpack.tumblr.com'
4145
4146
4147
class CheerUpEmoKidTumblr(GenericTumblrV1):
4148
    """Class to retrieve CheerUpEmoKid comics."""
4149
    # Also on http://www.cheerupemokid.com
4150
    # Also on http://tapastic.com/series/CUEK
4151
    name = 'cuek-tumblr'
4152
    long_name = 'Cheer Up Emo Kid (from Tumblr)'
4153
    url = 'https://enzocomics.tumblr.com'
4154
4155
4156
class ForLackOfABetterComic(GenericTumblrV1):
4157
    """Class to retrieve For Lack Of A Better Comics."""
4158
    # Also on http://forlackofabettercomic.com
4159
    name = 'lack'
4160
    long_name = 'For Lack Of A Better Comic'
4161
    url = 'http://forlackofabettercomic.tumblr.com'
4162
4163
4164
class ZenPencilsTumblr(GenericTumblrV1):
4165
    """Class to retrieve ZenPencils comics."""
4166
    # Also on http://zenpencils.com
4167
    # Also on http://www.gocomics.com/zen-pencils
4168
    name = 'zenpencils-tumblr'
4169
    long_name = 'Zen Pencils (from Tumblr)'
4170
    url = 'http://zenpencils.tumblr.com'
4171
    _categories = ('ZENPENCILS', )
4172
4173
4174
class ThreeWordPhraseTumblr(GenericTumblrV1):
4175
    """Class to retrieve Three Word Phrase comics."""
4176
    # Also on http://threewordphrase.com
4177
    name = 'threeword-tumblr'
4178
    long_name = 'Three Word Phrase (from Tumblr)'
4179
    url = 'http://threewordphrase.tumblr.com'
4180
4181
4182
class TimeTrabbleTumblr(GenericTumblrV1):
4183
    """Class to retrieve Time Trabble comics."""
4184
    # Also on http://timetrabble.com
4185
    name = 'timetrabble-tumblr'
4186
    long_name = 'Time Trabble (from Tumblr)'
4187
    url = 'http://timetrabble.tumblr.com'
4188
4189
4190
class SafelyEndangeredTumblr(GenericTumblrV1):
4191
    """Class to retrieve Safely Endangered comics."""
4192
    # Also on http://www.safelyendangered.com
4193
    name = 'endangered-tumblr'
4194
    long_name = 'Safely Endangered (from Tumblr)'
4195
    url = 'http://tumblr.safelyendangered.com'
4196
4197
4198
class MouseBearComedyTumblr(GenericTumblrV1):
4199
    """Class to retrieve Mouse Bear Comedy comics."""
4200
    # Also on http://www.mousebearcomedy.com
4201
    name = 'mousebear-tumblr'
4202
    long_name = 'Mouse Bear Comedy (from Tumblr)'
4203
    url = 'http://mousebearcomedy.tumblr.com'
4204
4205
4206
class BouletCorpTumblr(GenericTumblrV1):
4207
    """Class to retrieve BouletCorp comics."""
4208
    # Also on http://www.bouletcorp.com
4209
    name = 'boulet-tumblr'
4210
    long_name = 'Boulet Corp (from Tumblr)'
4211
    url = 'https://bouletcorp.tumblr.com'
4212
    _categories = ('BOULET', )
4213
4214
4215
class TheAwkwardYetiTumblr(GenericTumblrV1):
4216
    """Class to retrieve The Awkward Yeti comics."""
4217
    # Also on http://www.gocomics.com/the-awkward-yeti
4218
    # Also on http://theawkwardyeti.com
4219
    # Also on https://tapastic.com/series/TheAwkwardYeti
4220
    name = 'yeti-tumblr'
4221
    long_name = 'The Awkward Yeti (from Tumblr)'
4222
    url = 'http://larstheyeti.tumblr.com'
4223
    _categories = ('YETI', )
4224
4225
4226
class NellucNhoj(GenericTumblrV1):
4227
    """Class to retrieve NellucNhoj comics."""
4228
    name = 'nhoj'
4229
    long_name = 'Nelluc Nhoj'
4230
    url = 'http://nellucnhoj.com'
4231
4232
4233
class DownTheUpwardSpiralTumblr(GenericTumblrV1):
4234
    """Class to retrieve Down The Upward Spiral comics."""
4235
    # Also on http://www.downtheupwardspiral.com
4236
    # Also on https://tapastic.com/series/Down-the-Upward-Spiral
4237
    name = 'spiral-tumblr'
4238
    long_name = 'Down the Upward Spiral (from Tumblr)'
4239
    url = 'http://downtheupwardspiral.tumblr.com'
4240
4241
4242
class AsPerUsualTumblr(GenericTumblrV1):
4243
    """Class to retrieve As Per Usual comics."""
4244
    # Also on https://tapastic.com/series/AsPerUsual
4245
    name = 'usual-tumblr'
4246
    long_name = 'As Per Usual (from Tumblr)'
4247
    url = 'http://as-per-usual.tumblr.com'
4248
    categories = ('DAMILEE', )
4249
4250
4251
class HotComicsForCoolPeopleTumblr(GenericTumblrV1):
4252
    """Class to retrieve Hot Comics For Cool People."""
4253
    # Also on https://tapastic.com/series/Hot-Comics-For-Cool-People
4254
    # Also on http://hotcomics.biz (links to tumblr)
4255
    # Also on http://hcfcp.com (links to tumblr)
4256
    name = 'hotcomics-tumblr'
4257
    long_name = 'Hot Comics For Cool People (from Tumblr)'
4258
    url = 'http://hotcomicsforcoolpeople.tumblr.com'
4259
    categories = ('DAMILEE', )
4260
4261
4262
class OneOneOneOneComicTumblr(GenericTumblrV1):
4263
    """Class to retrieve 1111 Comics."""
4264
    # Also on http://www.1111comics.me
4265
    # Also on https://tapastic.com/series/1111-Comics
4266
    name = '1111-tumblr'
4267
    long_name = '1111 Comics (from Tumblr)'
4268
    url = 'http://comics1111.tumblr.com'
4269
    _categories = ('ONEONEONEONE', )
4270
4271
4272
class JhallComicsTumblr(GenericTumblrV1):
4273
    """Class to retrieve Jhall Comics."""
4274
    # Also on http://jhallcomics.com
4275
    name = 'jhall-tumblr'
4276
    long_name = 'Jhall Comics (from Tumblr)'
4277
    url = 'http://jhallcomics.tumblr.com'
4278
4279
4280
class BerkeleyMewsTumblr(GenericTumblrV1):
4281
    """Class to retrieve Berkeley Mews comics."""
4282
    # Also on http://www.gocomics.com/berkeley-mews
4283
    # Also on http://www.berkeleymews.com
4284
    name = 'berkeley-tumblr'
4285
    long_name = 'Berkeley Mews (from Tumblr)'
4286
    url = 'http://mews.tumblr.com'
4287
    _categories = ('BERKELEY', )
4288
4289
4290
class JoanCornellaTumblr(GenericTumblrV1):
4291
    """Class to retrieve Joan Cornella comics."""
4292
    # Also on http://joancornella.net
4293
    name = 'cornella-tumblr'
4294
    long_name = 'Joan Cornella (from Tumblr)'
4295
    url = 'http://cornellajoan.tumblr.com'
4296
4297
4298
class RespawnComicTumblr(GenericTumblrV1):
4299
    """Class to retrieve Respawn Comic."""
4300
    # Also on http://respawncomic.com
4301
    name = 'respawn-tumblr'
4302
    long_name = 'Respawn Comic (from Tumblr)'
4303
    url = 'https://respawncomic.tumblr.com'
4304
4305
4306
class ChrisHallbeckTumblr(GenericTumblrV1):
4307
    """Class to retrieve Chris Hallbeck comics."""
4308
    # Also on https://tapastic.com/ChrisHallbeck
4309
    # Also on http://maximumble.com
4310
    # Also on http://minimumble.com
4311
    # Also on http://thebookofbiff.com
4312
    name = 'hallbeck-tumblr'
4313
    long_name = 'Chris Hallback (from Tumblr)'
4314
    url = 'https://chrishallbeck.tumblr.com'
4315
    _categories = ('HALLBACK', )
4316
4317
4318
class ComicNuggets(GenericTumblrV1):
4319
    """Class to retrieve Comic Nuggets."""
4320
    name = 'nuggets'
4321
    long_name = 'Comic Nuggets'
4322
    url = 'http://comicnuggets.com'
4323
4324
4325
class PigeonGazetteTumblr(GenericTumblrV1):
4326
    """Class to retrieve The Pigeon Gazette comics."""
4327
    # Also on https://tapastic.com/series/The-Pigeon-Gazette
4328
    name = 'pigeon-tumblr'
4329
    long_name = 'The Pigeon Gazette (from Tumblr)'
4330
    url = 'http://thepigeongazette.tumblr.com'
4331
4332
4333
class CancerOwl(GenericTumblrV1):
4334
    """Class to retrieve Cancer Owl comics."""
4335
    # Also on http://cancerowl.com
4336
    name = 'cancerowl-tumblr'
4337
    long_name = 'Cancer Owl (from Tumblr)'
4338
    url = 'http://cancerowl.tumblr.com'
4339
4340
4341
class FowlLanguageTumblr(GenericTumblrV1):
4342
    """Class to retrieve Fowl Language comics."""
4343
    # Also on http://www.fowllanguagecomics.com
4344
    # Also on http://tapastic.com/series/Fowl-Language-Comics
4345
    # Also on http://www.gocomics.com/fowl-language
4346
    name = 'fowllanguage-tumblr'
4347
    long_name = 'Fowl Language Comics (from Tumblr)'
4348
    url = 'http://fowllanguagecomics.tumblr.com'
4349
    _categories = ('FOWLLANGUAGE', )
4350
4351
4352
class TheOdd1sOutTumblr(GenericTumblrV1):
4353
    """Class to retrieve The Odd 1s Out comics."""
4354
    # Also on http://theodd1sout.com
4355
    # Also on https://tapastic.com/series/Theodd1sout
4356
    name = 'theodd-tumblr'
4357
    long_name = 'The Odd 1s Out (from Tumblr)'
4358
    url = 'http://theodd1sout.tumblr.com'
4359
4360
4361
class TheUnderfoldTumblr(GenericTumblrV1):
4362
    """Class to retrieve The Underfold comics."""
4363
    # Also on http://theunderfold.com
4364
    name = 'underfold-tumblr'
4365
    long_name = 'The Underfold (from Tumblr)'
4366
    url = 'http://theunderfold.tumblr.com'
4367
4368
4369
class LolNeinTumblr(GenericTumblrV1):
4370
    """Class to retrieve Lol Nein comics."""
4371
    # Also on http://lolnein.com
4372
    name = 'lolnein-tumblr'
4373
    long_name = 'Lol Nein (from Tumblr)'
4374
    url = 'http://lolneincom.tumblr.com'
4375
4376
4377
class FatAwesomeComicsTumblr(GenericTumblrV1):
4378
    """Class to retrieve Fat Awesome Comics."""
4379
    # Also on http://fatawesome.com/comics
4380
    name = 'fatawesome-tumblr'
4381
    long_name = 'Fat Awesome (from Tumblr)'
4382
    url = 'http://fatawesomecomedy.tumblr.com'
4383
4384
4385
class TheWorldIsFlatTumblr(GenericTumblrV1):
4386
    """Class to retrieve The World Is Flat Comics."""
4387
    # Also on https://tapastic.com/series/The-World-is-Flat
4388
    name = 'flatworld-tumblr'
4389
    long_name = 'The World Is Flat (from Tumblr)'
4390
    url = 'http://theworldisflatcomics.com'
4391
4392
4393
class DorrisMc(GenericTumblrV1):
4394
    """Class to retrieve Dorris Mc Comics"""
4395
    # Also on http://www.gocomics.com/dorris-mccomics
4396
    name = 'dorrismc'
4397
    long_name = 'Dorris Mc'
4398
    url = 'http://dorrismccomics.com'
4399
4400
4401
class LeleozTumblr(GenericDeletedComic, GenericTumblrV1):
4402
    """Class to retrieve Leleoz comics."""
4403
    # Also on https://tapastic.com/series/Leleoz
4404
    name = 'leleoz-tumblr'
4405
    long_name = 'Leleoz (from Tumblr)'
4406
    url = 'http://leleozcomics.tumblr.com'
4407
4408
4409
class MoonBeardTumblr(GenericTumblrV1):
4410
    """Class to retrieve MoonBeard comics."""
4411
    # Also on http://moonbeard.com
4412
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
4413
    name = 'moonbeard-tumblr'
4414
    long_name = 'Moon Beard (from Tumblr)'
4415
    url = 'http://squireseses.tumblr.com'
4416
    _categories = ('MOONBEARD', )
4417
4418
4419
class AComik(GenericTumblrV1):
4420
    """Class to retrieve A Comik"""
4421
    name = 'comik'
4422
    long_name = 'A Comik'
4423
    url = 'http://acomik.com'
4424
4425
4426
class ClassicRandy(GenericTumblrV1):
4427
    """Class to retrieve Classic Randy comics."""
4428
    name = 'randy'
4429
    long_name = 'Classic Randy'
4430
    url = 'http://classicrandy.tumblr.com'
4431
4432
4433
class DagssonTumblr(GenericTumblrV1):
4434
    """Class to retrieve Dagsson comics."""
4435
    # Also on http://www.dagsson.com
4436
    name = 'dagsson-tumblr'
4437
    long_name = 'Dagsson Hugleikur (from Tumblr)'
4438
    url = 'https://hugleikurdagsson.tumblr.com'
4439
4440
4441
class LinsEditionsTumblr(GenericTumblrV1):
4442
    """Class to retrieve L.I.N.S. Editions comics."""
4443
    # Also on https://linsedition.com
4444
    # Now on http://warandpeas.tumblr.com
4445
    name = 'lins-tumblr'
4446
    long_name = 'L.I.N.S. Editions (from Tumblr)'
4447
    url = 'https://linscomics.tumblr.com'
4448
    _categories = ('WARANDPEAS', 'LINS')
4449
4450
4451
class WarAndPeasTumblr(GenericTumblrV1):
4452
    """Class to retrieve War And Peas comics."""
4453
    # Was on https://linscomics.tumblr.com
4454
    name = 'warandpeas-tumblr'
4455
    long_name = 'War And Peas (from Tumblr)'
4456
    url = 'http://warandpeas.tumblr.com'
4457
    _categories = ('WARANDPEAS', 'LINS')
4458
4459
4460
class OrigamiHotDish(GenericTumblrV1):
4461
    """Class to retrieve Origami Hot Dish comics."""
4462
    name = 'origamihotdish'
4463
    long_name = 'Origami Hot Dish'
4464
    url = 'http://origamihotdish.com'
4465
4466
4467
class HitAndMissComicsTumblr(GenericTumblrV1):
4468
    """Class to retrieve Hit and Miss Comics."""
4469
    name = 'hitandmiss'
4470
    long_name = 'Hit and Miss Comics'
4471
    url = 'https://hitandmisscomics.tumblr.com'
4472
4473
4474
class HMBlanc(GenericTumblrV1):
4475
    """Class to retrieve HM Blanc comics."""
4476
    name = 'hmblanc'
4477
    long_name = 'HM Blanc'
4478
    url = 'http://hmblanc.tumblr.com'
4479
4480
4481
class TalesOfAbsurdityTumblr(GenericTumblrV1):
4482
    """Class to retrieve Tales Of Absurdity comics."""
4483
    # Also on http://talesofabsurdity.com
4484
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
4485
    name = 'absurdity-tumblr'
4486
    long_name = 'Tales of Absurdity (from Tumblr)'
4487
    url = 'http://talesofabsurdity.tumblr.com'
4488
    _categories = ('ABSURDITY', )
4489
4490
4491
class RobbieAndBobby(GenericTumblrV1):
4492
    """Class to retrieve Robbie And Bobby comics."""
4493
    # Also on http://robbieandbobby.com
4494
    name = 'robbie-tumblr'
4495
    long_name = 'Robbie And Bobby (from Tumblr)'
4496
    url = 'http://robbieandbobby.tumblr.com'
4497
4498
4499
class ElectricBunnyComicTumblr(GenericTumblrV1):
4500
    """Class to retrieve Electric Bunny Comics."""
4501
    # Also on http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell
4502
    name = 'bunny-tumblr'
4503
    long_name = 'Electric Bunny Comic (from Tumblr)'
4504
    url = 'http://electricbunnycomics.tumblr.com'
4505
4506
4507
class Hoomph(GenericTumblrV1):
4508
    """Class to retrieve Hoomph comics."""
4509
    name = 'hoomph'
4510
    long_name = 'Hoomph'
4511
    url = 'http://hoom.ph'
4512
4513
4514
class BFGFSTumblr(GenericTumblrV1):
4515
    """Class to retrieve BFGFS comics."""
4516
    # Also on https://tapastic.com/series/BFGFS
4517
    # Also on http://bfgfs.com
4518
    name = 'bfgfs-tumblr'
4519
    long_name = 'BFGFS (from Tumblr)'
4520
    url = 'https://bfgfs.tumblr.com'
4521
4522
4523
class DoodleForFood(GenericTumblrV1):
4524
    """Class to retrieve Doodle For Food comics."""
4525
    # Also on https://tapastic.com/series/Doodle-for-Food
4526
    name = 'doodle'
4527
    long_name = 'Doodle For Food'
4528
    url = 'http://www.doodleforfood.com'
4529
4530
4531
class CassandraCalinTumblr(GenericTumblrV1):
4532
    """Class to retrieve C. Cassandra comics."""
4533
    # Also on http://cassandracalin.com
4534
    # Also on https://tapastic.com/series/C-Cassandra-comics
4535
    name = 'cassandra-tumblr'
4536
    long_name = 'Cassandra Calin (from Tumblr)'
4537
    url = 'http://c-cassandra.tumblr.com'
4538
4539
4540
class DougWasTaken(GenericTumblrV1):
4541
    """Class to retrieve Doug Was Taken comics."""
4542
    name = 'doug'
4543
    long_name = 'Doug Was Taken'
4544
    url = 'https://dougwastaken.tumblr.com'
4545
4546
4547
class MandatoryRollerCoaster(GenericTumblrV1):
4548
    """Class to retrieve Mandatory Roller Coaster comics."""
4549
    name = 'rollercoaster'
4550
    long_name = 'Mandatory Roller Coaster'
4551
    url = 'http://mandatoryrollercoaster.com'
4552
4553
4554
class CEstPasEnRegardantSesPompes(GenericTumblrV1):
4555
    """Class to retrieve C'Est Pas En Regardant Ses Pompes (...)  comics."""
4556
    name = 'cperspqccltt'
4557
    long_name = 'C Est Pas En Regardant Ses Pompes (...)'
4558
    url = 'http://marcoandco.tumblr.com'
4559
4560
4561
class TheGrohlTroll(GenericTumblrV1):
4562
    """Class to retrieve The Grohl Troll comics."""
4563
    name = 'grohltroll'
4564
    long_name = 'The Grohl Troll'
4565
    url = 'http://thegrohltroll.com'
4566
4567
4568
class WebcomicName(GenericTumblrV1):
4569
    """Class to retrieve Webcomic Name comics."""
4570
    name = 'webcomicname'
4571
    long_name = 'Webcomic Name'
4572
    url = 'http://webcomicname.com'
4573
4574
4575
class BooksOfAdam(GenericTumblrV1):
4576
    """Class to retrieve Books of Adam comics."""
4577
    # Also on http://www.booksofadam.com
4578
    name = 'booksofadam'
4579
    long_name = 'Books of Adam'
4580
    url = 'http://booksofadam.tumblr.com'
4581
4582
4583
class HarkAVagrant(GenericTumblrV1):
4584
    """Class to retrieve Hark A Vagrant comics."""
4585
    # Also on http://www.harkavagrant.com
4586
    name = 'hark-tumblr'
4587
    long_name = 'Hark A Vagrant (from Tumblr)'
4588
    url = 'http://beatonna.tumblr.com'
4589
4590
4591
class OurSuperAdventureTumblr(GenericTumblrV1):
4592
    """Class to retrieve Our Super Adventure comics."""
4593
    # Also on https://tapastic.com/series/Our-Super-Adventure
4594
    # Also on http://www.oursuperadventure.com
4595
    # http://sarahgraley.com
4596
    name = 'superadventure-tumblr'
4597
    long_name = 'Our Super Adventure (from Tumblr)'
4598
    url = 'http://sarahssketchbook.tumblr.com'
4599
4600
4601
class JakeLikesOnions(GenericTumblrV1):
4602
    """Class to retrieve Jake Likes Onions comics."""
4603
    name = 'jake'
4604
    long_name = 'Jake Likes Onions'
4605
    url = 'http://jakelikesonions.com'
4606
4607
4608
class InYourFaceCakeTumblr(GenericTumblrV1):
4609
    """Class to retrieve In Your Face Cake comics."""
4610
    # Also on https://tapas.io/series/In-Your-Face-Cake
4611
    name = 'inyourfacecake-tumblr'
4612
    long_name = 'In Your Face Cake (from Tumblr)'
4613
    url = 'https://in-your-face-cake.tumblr.com'
4614
    _categories = ('INYOURFACECAKE', )
4615
4616
4617
class Robospunk(GenericTumblrV1):
4618
    """Class to retrieve Robospunk comics."""
4619
    name = 'robospunk'
4620
    long_name = 'Robospunk'
4621
    url = 'http://robospunk.com'
4622
4623
4624
class BananaTwinky(GenericTumblrV1):
4625
    """Class to retrieve Banana Twinky comics."""
4626
    name = 'banana'
4627
    long_name = 'Banana Twinky'
4628
    url = 'https://bananatwinky.tumblr.com'
4629
4630
4631
class YesterdaysPopcornTumblr(GenericTumblrV1):
4632
    """Class to retrieve Yesterday's Popcorn comics."""
4633
    # Also on http://www.yesterdayspopcorn.com
4634
    # Also on https://tapastic.com/series/Yesterdays-Popcorn
4635
    name = 'popcorn-tumblr'
4636
    long_name = 'Yesterday\'s Popcorn (from Tumblr)'
4637
    url = 'http://yesterdayspopcorn.tumblr.com'
4638
4639
4640
class TwistedDoodles(GenericTumblrV1):
4641
    """Class to retrieve Twisted Doodles comics."""
4642
    name = 'twisted'
4643
    long_name = 'Twisted Doodles'
4644
    url = 'http://www.twisteddoodles.com'
4645
4646
4647
class UbertoolTumblr(GenericTumblrV1):
4648
    """Class to retrieve Ubertool comics."""
4649
    # Also on http://ubertoolcomic.com
4650
    # Also on https://tapastic.com/series/ubertool
4651
    name = 'ubertool-tumblr'
4652
    long_name = 'Ubertool (from Tumblr)'
4653
    url = 'https://ubertool.tumblr.com'
4654
    _categories = ('UBERTOOL', )
4655
4656
4657
class LittleLifeLinesTumblr(GenericDeletedComic, GenericTumblrV1):
4658
    """Class to retrieve Little Life Lines comics."""
4659
    # Also on http://www.littlelifelines.com
4660
    name = 'life-tumblr'
4661
    long_name = 'Little Life Lines (from Tumblr)'
4662
    url = 'https://little-life-lines.tumblr.com'
4663
4664
4665
class TheyCanTalk(GenericTumblrV1):
4666
    """Class to retrieve They Can Talk comics."""
4667
    name = 'theycantalk'
4668
    long_name = 'They Can Talk'
4669
    url = 'http://theycantalk.com'
4670
4671
4672
class Will5NeverCome(GenericTumblrV1):
4673
    """Class to retrieve Will 5:00 Never Come comics."""
4674
    name = 'will5'
4675
    long_name = 'Will 5:00 Never Come ?'
4676
    url = 'http://will5nevercome.com'
4677
4678
4679
class Sephko(GenericTumblrV1):
4680
    """Class to retrieve Sephko Comics."""
4681
    # Also on http://www.sephko.com
4682
    name = 'sephko'
4683
    long_name = 'Sephko'
4684
    url = 'https://sephko.tumblr.com'
4685
4686
4687
class BlazersAtDawn(GenericTumblrV1):
4688
    """Class to retrieve Blazers At Dawn Comics."""
4689
    name = 'blazers'
4690
    long_name = 'Blazers At Dawn'
4691
    url = 'http://blazersatdawn.tumblr.com'
4692
4693
4694
class ArtByMoga(GenericEmptyComic, GenericTumblrV1):  # Deactivated because it downloads too many things
4695
    """Class to retrieve Art By Moga Comics."""
4696
    name = 'moga'
4697
    long_name = 'Art By Moga'
4698
    url = 'http://artbymoga.tumblr.com'
4699
4700
4701
class VerbalVomitTumblr(GenericTumblrV1):
4702
    """Class to retrieve Verbal Vomit comics."""
4703
    # Also on http://www.verbal-vomit.com
4704
    name = 'vomit-tumblr'
4705
    long_name = 'Verbal Vomit (from Tumblr)'
4706
    url = 'http://verbalvomits.tumblr.com'
4707
4708
4709
class LibraryComic(GenericTumblrV1):
4710
    """Class to retrieve LibraryComic."""
4711
    # Also on http://librarycomic.com
4712
    name = 'library-tumblr'
4713
    long_name = 'LibraryComic (from Tumblr)'
4714
    url = 'https://librarycomic.tumblr.com'
4715
4716
4717
class TizzyStitchBirdTumblr(GenericTumblrV1):
4718
    """Class to retrieve Tizzy Stitch Bird comics."""
4719
    # Also on http://tizzystitchbird.com
4720
    # Also on https://tapastic.com/series/TizzyStitchbird
4721
    # Also on http://www.webtoons.com/en/challenge/tizzy-stitchbird/list?title_no=50082
4722
    name = 'tizzy-tumblr'
4723
    long_name = 'Tizzy Stitch Bird (from Tumblr)'
4724
    url = 'http://tizzystitchbird.tumblr.com'
4725
4726
4727
class VictimsOfCircumsolarTumblr(GenericTumblrV1):
4728
    """Class to retrieve VictimsOfCircumsolar comics."""
4729
    # Also on http://www.victimsofcircumsolar.com
4730
    name = 'circumsolar-tumblr'
4731
    long_name = 'Victims Of Circumsolar (from Tumblr)'
4732
    url = 'https://victimsofcomics.tumblr.com'
4733
4734
4735
class RockPaperCynicTumblr(GenericTumblrV1):
4736
    """Class to retrieve RockPaperCynic comics."""
4737
    # Also on http://www.rockpapercynic.com
4738
    # Also on https://tapastic.com/series/rockpapercynic
4739
    name = 'rpc-tumblr'
4740
    long_name = 'Rock Paper Cynic (from Tumblr)'
4741
    url = 'http://rockpapercynic.tumblr.com'
4742
4743
4744
class DeadlyPanelTumblr(GenericTumblrV1):
4745
    """Class to retrieve Deadly Panel comics."""
4746
    # Also on http://www.deadlypanel.com
4747
    # Also on https://tapastic.com/series/deadlypanel
4748
    name = 'deadly-tumblr'
4749
    long_name = 'Deadly Panel (from Tumblr)'
4750
    url = 'https://deadlypanel.tumblr.com'
4751
4752
4753
class CatanaComics(GenericComicNotWorking):  # Not a Tumblr anymore ?
4754
    """Class to retrieve Catana comics."""
4755
    name = 'catana'
4756
    long_name = 'Catana'
4757
    url = 'http://www.catanacomics.com'
4758
4759
4760
class AngryAtNothingTumblr(GenericTumblrV1):
4761
    """Class to retrieve Angry at Nothing comics."""
4762
    # Also on http://www.angryatnothing.net
4763
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
4764
    name = 'angry-tumblr'
4765
    long_name = 'Angry At Nothing (from Tumblr)'
4766
    url = 'http://angryatnothing.tumblr.com'
4767
4768
4769
class ShanghaiTango(GenericTumblrV1):
4770
    """Class to retrieve Shanghai Tango comic."""
4771
    name = 'tango'
4772
    long_name = 'Shanghai Tango'
4773
    url = 'http://tango2010weibo.tumblr.com'
4774
4775
4776
class OffTheLeashDogTumblr(GenericTumblrV1):
4777
    """Class to retrieve Off The Leash Dog comics."""
4778
    # Also on http://offtheleashdogcartoons.com
4779
    # Also on http://www.rupertfawcettcartoons.com
4780
    name = 'offtheleash-tumblr'
4781
    long_name = 'Off The Leash Dog (from Tumblr)'
4782
    url = 'http://rupertfawcettsdoggyblog.tumblr.com'
4783
    _categories = ('FAWCETT', )
4784
4785
4786
class ImogenQuestTumblr(GenericTumblrV1):
4787
    """Class to retrieve Imogen Quest comics."""
4788
    # Also on http://imogenquest.net
4789
    name = 'imogen-tumblr'
4790
    long_name = 'Imogen Quest (from Tumblr)'
4791
    url = 'http://imoquest.tumblr.com'
4792
4793
4794
class Shitfest(GenericTumblrV1):
4795
    """Class to retrieve Shitfest comics."""
4796
    name = 'shitfest'
4797
    long_name = 'Shitfest'
4798
    url = 'http://shitfestcomic.com'
4799
4800
4801
class IceCreamSandwichComics(GenericTumblrV1):
4802
    """Class to retrieve Ice Cream Sandwich Comics."""
4803
    name = 'icecream'
4804
    long_name = 'Ice Cream Sandwich Comics'
4805
    url = 'http://icecreamsandwichcomics.com'
4806
4807
4808
class Dustinteractive(GenericTumblrV1):
4809
    """Class to retrieve Dustinteractive comics."""
4810
    name = 'dustinteractive'
4811
    long_name = 'Dustinteractive'
4812
    url = 'http://dustinteractive.com'
4813
4814
4815
class StickyCinemaFloor(GenericTumblrV1):
4816
    """Class to retrieve Sticky Cinema Floor comics."""
4817
    name = 'stickycinema'
4818
    long_name = 'Sticky Cinema Floor'
4819
    url = 'https://stickycinemafloor.tumblr.com'
4820
4821
4822
class IncidentalComicsTumblr(GenericTumblrV1):
4823
    """Class to retrieve Incidental Comics."""
4824
    # Also on http://www.incidentalcomics.com
4825
    name = 'incidental-tumblr'
4826
    long_name = 'Incidental Comics (from Tumblr)'
4827
    url = 'http://incidentalcomics.tumblr.com'
4828
4829
4830
class APleasantWasteOfTimeTumblr(GenericTumblrV1):
4831
    """Class to retrieve A Pleasant Waste Of Time comics."""
4832
    # Also on https://tapas.io/series/A-Pleasant-
4833
    name = 'pleasant-waste-tumblr'
4834
    long_name = 'A Pleasant Waste Of Time (from Tumblr)'
4835
    url = 'https://artjcf.tumblr.com'
4836
    _categories = ('WASTE', )
4837
4838
4839
class HorovitzComicsTumblr(GenericTumblrV1):
4840
    """Class to retrieve Horovitz new comics."""
4841
    # Also on http://www.horovitzcomics.com
4842
    name = 'horovitz-tumblr'
4843
    long_name = 'Horovitz (from Tumblr)'
4844
    url = 'https://horovitzcomics.tumblr.com'
4845
    _categories = ('HOROVITZ', )
4846
4847
4848
class DeepDarkFearsTumblr(GenericTumblrV1):
4849
    """Class to retrieve DeepvDarkvFears comics."""
4850
    name = 'deep-dark-fears-tumblr'
4851
    long_name = 'Deep Dark Fears (from Tumblr)'
4852
    url = 'http://deep-dark-fears.tumblr.com'
4853
4854
4855
class DakotaMcDadzean(GenericTumblrV1):
4856
    """Class to retrieve Dakota McDadzean comics."""
4857
    name = 'dakota'
4858
    long_name = 'Dakota McDadzean'
4859
    url = 'http://dakotamcfadzean.tumblr.com'
4860
4861
4862
class ExtraFabulousComicsTumblr(GenericTumblrV1):
4863
    """Class to retrieve Extra Fabulous Comics."""
4864
    # Also on http://extrafabulouscomics.com
4865
    name = 'efc-tumblr'
4866
    long_name = 'Extra Fabulous Comics (from Tumblr)'
4867
    url = 'https://extrafabulouscomics.tumblr.com'
4868
    _categories = ('EFC', )
4869
4870
4871
class AlexLevesque(GenericTumblrV1):
4872
    """Class to retrieve AlexLevesque comics."""
4873
    name = 'alevesque'
4874
    long_name = 'Alex Levesque'
4875
    url = 'http://alexlevesque.com'
4876
    _categories = ('FRANCAIS', )
4877
4878
4879
class JamesOfNoTradesTumblr(GenericTumblrV1):
4880
    """Class to retrieve JamesOfNoTrades comics."""
4881
    # Also on http://jamesofnotrades.com
4882
    # Also on http://www.webtoons.com/en/challenge/james-of-no-trades/list?title_no=43422
4883
    # Also on https://tapas.io/series/James-of-No-Trades
4884
    name = 'jamesofnotrades-tumblr'
4885
    long_name = 'James Of No Trades (from Tumblr)'
4886
    url = 'http://jamesfregan.tumblr.com'
4887
    _categories = ('JAMESOFNOTRADES', )
4888
4889
4890
class InfiniteGuff(GenericTumblrV1):
4891
    """Class to retrieve Infinite Guff comics."""
4892
    name = 'infiniteguff'
4893
    long_name = 'Infinite Guff'
4894
    url = 'http://infiniteguff.com'
4895
4896
4897
class SkeletonClaw(GenericTumblrV1):
4898
    """Class to retrieve Skeleton Claw comics."""
4899
    name = 'skeletonclaw'
4900
    long_name = 'Skeleton Claw'
4901
    url = 'http://skeletonclaw.com'
4902
4903
4904
class MrsFrolleinTumblr(GenericTumblrV1):
4905
    """Class to retrieve Mrs Frollein comics."""
4906
    # Also on http://www.webtoons.com/en/challenge/mrsfrollein/list?title_no=51710
4907
    name = 'frollein'
4908
    long_name = 'Mrs Frollein (from Tumblr)'
4909
    url = 'https://mrsfrollein.tumblr.com'
4910
4911
4912
class GoodBearComicsTumblr(GenericTumblrV1):
4913
    """Class to retrieve GoodBearComics."""
4914
    # Also on https://goodbearcomics.com
4915
    name = 'goodbear-tumblr'
4916
    long_name = 'Good Bear Comics (from Tumblr)'
4917
    url = 'https://goodbearcomics.tumblr.com'
4918
4919
4920
class BrooklynCartoonsTumblr(GenericTumblrV1):
4921
    """Class to retrieve Brooklyn Cartoons."""
4922
    # Also on https://www.brooklyncartoons.com
4923
    # Also on https://www.instagram.com/brooklyncartoons
4924
    name = 'brooklyn-tumblr'
4925
    long_name = 'Brooklyn Cartoons (from Tumblr)'
4926
    url = 'http://brooklyncartoons.tumblr.com'
4927
4928
4929
class GemmaCorrellTumblr(GenericTumblrV1):
4930
    # Also on http://www.gemmacorrell.com/portfolio/comics/
4931
    name = 'gemma-tumblr'
4932
    long_name = 'Gemma Correll (from Tumblr)'
4933
    url = 'http://gemmacorrell.tumblr.com'
4934
4935
4936
class RobotatertotTumblr(GenericTumblrV1):
4937
    """Class to retrieve Robotatertot comics."""
4938
    # Also on https://www.instagram.com/robotatertotcomics
4939
    name = 'robotatertot-tumblr'
4940
    long_name = 'Robotatertot (from Tumblr)'
4941
    url = 'https://robotatertot.tumblr.com'
4942
4943
4944
class HuffyPenguin(GenericTumblrV1):
4945
    """Class to retrieve Huffy Penguin comics."""
4946
    name = 'huffypenguin'
4947
    long_name = 'Huffy Penguin'
4948
    url = 'http://huffy-penguin.tumblr.com'
4949
4950
4951
class CowardlyComicsTumblr(GenericTumblrV1):
4952
    """Class to retrieve Cowardly Comics."""
4953
    # Also on https://tapas.io/series/CowardlyComics
4954
    # Also on http://www.webtoons.com/en/challenge/cowardly-comics/list?title_no=65893
4955
    name = 'cowardly-tumblr'
4956
    long_name = 'Cowardly Comics (from Tumblr)'
4957
    url = 'http://cowardlycomics.tumblr.com'
4958
4959
4960
class Caw4hwTumblr(GenericTumblrV1):
4961
    """Class to retrieve Caw4hw comics."""
4962
    # Also on https://tapas.io/series/CAW4HW
4963
    name = 'caw4hw-tumblr'
4964
    long_name = 'Caw4hw (from Tumblr)'
4965
    url = 'https://caw4hw.tumblr.com'
4966
4967
4968
class WeFlapsTumblr(GenericTumblrV1):
4969
    """Class to retrieve WeFlaps comics."""
4970
    name = 'weflaps-tumblr'
4971
    long_name = 'We Flaps (from Tumblr)'
4972
    url = 'https://weflaps.tumblr.com'
4973
4974
4975
class TheseInsideJokesTumblr(GenericTumblrV1):
4976
    """Class to retrieve These Inside Jokes comics."""
4977
    # Also on http://www.theseinsidejokes.com
4978
    name = 'theseinsidejokes-tumblr'
4979
    long_name = 'These Inside Jokes (from Tumblr)'
4980
    url = 'http://theseinsidejokes.tumblr.com'
4981
4982
4983
class RustledJimmies(GenericTumblrV1):
4984
    """Class to retrieve Rustled Jimmies comics."""
4985
    name = 'restled'
4986
    long_name = 'Rustled Jimmies'
4987
    url = 'http://rustledjimmies.net'
4988
4989
4990
class SinewynTumblr(GenericTumblrV1):
4991
    """Class to retrieve Sinewyn comics."""
4992
    # Also on https://sinewyn.wordpress.com
4993
    name = 'sinewyn-tumblr'
4994
    long_name = 'Sinewyn (from Tumblr)'
4995
    url = 'https://sinewyn.tumblr.com'
4996
4997
4998
class ItFoolsAMonster(GenericTumblrV1):
4999
    """Class to retrieve It Fools A Monster comics."""
5000
    name = 'itfoolsamonster'
5001
    long_name = 'It Fools A Monster'
5002
    url = 'http://itfoolsamonster.com'
5003
5004
5005
class BoumeriesTumblr(GenericTumblrV1):
5006
    """Class to retrieve Boumeries comics."""
5007
    # Also on http://bd.boumerie.com
5008
    # Also on http://comics.boumerie.com
5009
    name = 'boumeries-tumblr'
5010
    long_name = 'Boumeries (from Tumblr)'
5011
    url = 'http://boumeries.tumblr.com/'
5012
    _categories = ('BOUMERIES', )
5013
5014
5015
class InfiniteImmortalBensTumblr(GenericTumblrV1):
5016
    """Class to retrieve Infinite Immortal Bens comics."""
5017
    # Also on http://www.webtoons.com/en/challenge/infinite-immortal-bens/list?title_no=32847
5018
    # Also on https://tapas.io/series/Infinite-Immortal-Bens
5019
    url = 'https://infiniteimmortalbens.tumblr.com'
5020
    name = 'infiniteimmortal-tumblr'
5021
    long_name = 'Infinite Immortal Bens (from Tumblr)'
5022
    _categories = ('INFINITEIMMORTAL', )
5023
5024
5025
class CheeseCornzTumblr(GenericTumblrV1):
5026
    """Class to retrieve Cheese Cornz comics."""
5027
    name = 'cheesecornz-tumblr'
5028
    long_name = 'Cheese Cornz (from Tumblr)'
5029
    url = 'https://cheesecornz.tumblr.com'
5030
5031
5032
class CinismoIlustrado(GenericTumblrV1):
5033
    """Class to retrieve CinismoIlustrado comics."""
5034
    name = 'cinismo'
5035
    long_name = 'Cinismo Ilustrado'
5036
    url = 'http://cinismoilustrado.com'
5037
    _categories = ('ESPANOL', )
5038
5039
5040
class EatMyPaintTumblr(GenericTumblrV1):
5041
    """Class to retrieve Eat My Paint comics."""
5042
    # Also on https://tapas.io/series/eatmypaint
5043
    name = 'eatmypaint-tumblr'
5044
    long_name = 'Eat My Paint (from Tumblr)'
5045
    url = 'https://eatmypaint.tumblr.com'
5046
    _categories = ('EATMYPAINT', )
5047
5048
5049
class AnomalyTownFromTumblr(GenericTumblrV1):
5050
    """Class to retrieve Anomaly Town."""
5051
    name = 'anomalytown-tumblr'
5052
    long_name = 'Anomaly Town (from Tumblr)'
5053
    url = 'https://anomalytown.tumblr.com'
5054
5055
5056
class RoryTumblr(GenericTumblrV1):
5057
    """Class to retrieve Rory comics."""
5058
    # Also on https://tapas.io/series/Share-Your-Vulnerability
5059
    name = 'rory-tumblr'
5060
    long_name = 'Rory (from Tumblr)'
5061
    url = 'https://rorycomics.tumblr.com/'
5062
    _categories = ('RORY',)
5063
5064
5065
class HorovitzComics(GenericDeletedComic, GenericListableComic):
5066
    """Generic class to handle the logic common to the different comics from Horovitz."""
5067
    # Also on https://horovitzcomics.tumblr.com
5068
    url = 'http://www.horovitzcomics.com'
5069
    _categories = ('HOROVITZ', )
5070
    img_re = re.compile('.*comics/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
5071
    link_re = NotImplemented
5072
    get_url_from_archive_element = join_cls_url_to_href
5073
5074 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
5075
    def get_comic_info(cls, soup, link):
5076
        """Get information about a particular comics."""
5077
        href = link['href']
5078
        num = int(cls.link_re.match(href).groups()[0])
5079
        title = link.string
5080
        imgs = soup.find_all('img', id='comic')
5081
        assert len(imgs) == 1, imgs
5082
        year, month, day = [int(s)
5083
                            for s in cls.img_re.match(imgs[0]['src']).groups()]
5084
        return {
5085
            'title': title,
5086
            'day': day,
5087
            'month': month,
5088
            'year': year,
5089
            'img': [i['src'] for i in imgs],
5090
            'num': num,
5091
        }
5092
5093
    @classmethod
5094
    def get_archive_elements(cls):
5095
        archive_url = 'http://www.horovitzcomics.com/comics/archive/'
5096
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.link_re))
5097
5098
5099
class HorovitzNew(HorovitzComics):
5100
    """Class to retrieve Horovitz new comics."""
5101
    name = 'horovitznew'
5102
    long_name = 'Horovitz New'
5103
    link_re = re.compile('^/comics/new/([0-9]+)$')
5104
5105
5106
class HorovitzClassic(HorovitzComics):
5107
    """Class to retrieve Horovitz classic comics."""
5108
    name = 'horovitzclassic'
5109
    long_name = 'Horovitz Classic'
5110
    link_re = re.compile('^/comics/classic/([0-9]+)$')
5111
5112
5113
class GenericGoComic(GenericNavigableComic):
5114
    """Generic class to handle the logic common to comics from gocomics.com."""
5115
    _categories = ('GOCOMIC', )
5116
5117
    @classmethod
5118
    def get_first_comic_link(cls):
5119
        """Get link to first comics."""
5120
        div = get_soup_at_url(cls.url).find('div', class_='gc-deck gc-deck--cta-1')
5121
        return div.find('a')
5122
5123
    @classmethod
5124
    def get_navi_link(cls, last_soup, next_):
5125
        """Get link to next or previous comic."""
5126
        PREV = 'fa btn btn-outline-default btn-circle fa-caret-left sm js-previous-comic '
5127
        NEXT = 'fa btn btn-outline-default btn-circle fa-caret-right sm '
5128
        return last_soup.find('a', class_=NEXT if next_ else PREV)
5129
5130
    @classmethod
5131
    def get_url_from_link(cls, link):
5132
        gocomics = 'http://www.gocomics.com'
5133
        return urljoin_wrapper(gocomics, link['href'])
5134
5135
    @classmethod
5136
    def get_comic_info(cls, soup, link):
5137
        """Get information about a particular comics."""
5138
        date_str = soup.find('meta', property='article:published_time')['content']
5139
        day = string_to_date(date_str, "%Y-%m-%d")
5140
        imgs = soup.find_all('meta', property='og:image')
5141
        author = soup.find('meta', property='article:author')['content']
5142
        tags = soup.find('meta', property='article:tag')['content']
5143
        return {
5144
            'day': day.day,
5145
            'month': day.month,
5146
            'year': day.year,
5147
            'img': [i['content'] for i in imgs],
5148
            'author': author,
5149
            'tags': tags,
5150
        }
5151
5152
5153
class PearlsBeforeSwine(GenericGoComic):
5154
    """Class to retrieve Pearls Before Swine comics."""
5155
    name = 'pearls'
5156
    long_name = 'Pearls Before Swine'
5157
    url = 'http://www.gocomics.com/pearlsbeforeswine'
5158
5159
5160
class Peanuts(GenericGoComic):
5161
    """Class to retrieve Peanuts comics."""
5162
    name = 'peanuts'
5163
    long_name = 'Peanuts'
5164
    url = 'http://www.gocomics.com/peanuts'
5165
5166
5167
class MattWuerker(GenericGoComic):
5168
    """Class to retrieve Matt Wuerker comics."""
5169
    name = 'wuerker'
5170
    long_name = 'Matt Wuerker'
5171
    url = 'http://www.gocomics.com/mattwuerker'
5172
5173
5174
class TomToles(GenericGoComic):
5175
    """Class to retrieve Tom Toles comics."""
5176
    name = 'toles'
5177
    long_name = 'Tom Toles'
5178
    url = 'http://www.gocomics.com/tomtoles'
5179
5180
5181
class BreakOfDay(GenericGoComic):
5182
    """Class to retrieve Break Of Day comics."""
5183
    name = 'breakofday'
5184
    long_name = 'Break Of Day'
5185
    url = 'http://www.gocomics.com/break-of-day'
5186
5187
5188
class Brevity(GenericGoComic):
5189
    """Class to retrieve Brevity comics."""
5190
    name = 'brevity'
5191
    long_name = 'Brevity'
5192
    url = 'http://www.gocomics.com/brevity'
5193
5194
5195
class MichaelRamirez(GenericGoComic):
5196
    """Class to retrieve Michael Ramirez comics."""
5197
    name = 'ramirez'
5198
    long_name = 'Michael Ramirez'
5199
    url = 'http://www.gocomics.com/michaelramirez'
5200
5201
5202
class MikeLuckovich(GenericGoComic):
5203
    """Class to retrieve Mike Luckovich comics."""
5204
    name = 'luckovich'
5205
    long_name = 'Mike Luckovich'
5206
    url = 'http://www.gocomics.com/mikeluckovich'
5207
5208
5209
class JimBenton(GenericGoComic):
5210
    """Class to retrieve Jim Benton comics."""
5211
    # Also on http://jimbenton.tumblr.com
5212
    name = 'benton'
5213
    long_name = 'Jim Benton'
5214
    url = 'http://www.gocomics.com/jim-benton-cartoons'
5215
5216
5217
class TheArgyleSweater(GenericGoComic):
5218
    """Class to retrieve the Argyle Sweater comics."""
5219
    name = 'argyle'
5220
    long_name = 'Argyle Sweater'
5221
    url = 'http://www.gocomics.com/theargylesweater'
5222
5223
5224
class SunnyStreet(GenericGoComic):
5225
    """Class to retrieve Sunny Street comics."""
5226
    # Also on http://www.sunnystreetcomics.com
5227
    name = 'sunny'
5228
    long_name = 'Sunny Street'
5229
    url = 'http://www.gocomics.com/sunny-street'
5230
5231
5232
class OffTheMark(GenericGoComic):
5233
    """Class to retrieve Off The Mark comics."""
5234
    # Also on https://www.offthemark.com
5235
    name = 'offthemark'
5236
    long_name = 'Off The Mark'
5237
    url = 'http://www.gocomics.com/offthemark'
5238
5239
5240
class WuMo(GenericGoComic):
5241
    """Class to retrieve WuMo comics."""
5242
    # Also on http://wumo.com
5243
    name = 'wumo'
5244
    long_name = 'WuMo'
5245
    url = 'http://www.gocomics.com/wumo'
5246
5247
5248
class LunarBaboon(GenericGoComic):
5249
    """Class to retrieve Lunar Baboon comics."""
5250
    # Also on http://www.lunarbaboon.com
5251
    # Also on https://tapastic.com/series/Lunarbaboon
5252
    name = 'lunarbaboon'
5253
    long_name = 'Lunar Baboon'
5254
    url = 'http://www.gocomics.com/lunarbaboon'
5255
5256
5257
class SandersenGocomic(GenericGoComic):
5258
    """Class to retrieve Sarah Andersen comics."""
5259
    # Also on http://sarahcandersen.com
5260
    # Also on http://tapastic.com/series/Doodle-Time
5261
    name = 'sandersen-goc'
5262
    long_name = 'Sarah Andersen (from GoComics)'
5263
    url = 'http://www.gocomics.com/sarahs-scribbles'
5264
5265
5266
class SaturdayMorningBreakfastCerealGoComic(GenericGoComic):
5267
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
5268
    # Also on http://smbc-comics.tumblr.com
5269
    # Also on http://www.smbc-comics.com
5270
    name = 'smbc-goc'
5271
    long_name = 'Saturday Morning Breakfast Cereal (from GoComics)'
5272
    url = 'http://www.gocomics.com/saturday-morning-breakfast-cereal'
5273
    _categories = ('SMBC', )
5274
5275
5276
class CalvinAndHobbesGoComic(GenericGoComic):
5277
    """Class to retrieve Calvin and Hobbes comics."""
5278
    # From gocomics, not http://marcel-oehler.marcellosendos.ch/comics/ch/
5279
    name = 'calvin-goc'
5280
    long_name = 'Calvin and Hobbes (from GoComics)'
5281
    url = 'http://www.gocomics.com/calvinandhobbes'
5282
5283
5284
class RallGoComic(GenericGoComic):
5285
    """Class to retrieve Ted Rall comics."""
5286
    # Also on http://rall.com/comic
5287
    name = 'rall-goc'
5288
    long_name = "Ted Rall (from GoComics)"
5289
    url = "http://www.gocomics.com/ted-rall"
5290
    _categories = ('RALL', )
5291
5292
5293
class TheAwkwardYetiGoComic(GenericGoComic):
5294
    """Class to retrieve The Awkward Yeti comics."""
5295
    # Also on http://larstheyeti.tumblr.com
5296
    # Also on http://theawkwardyeti.com
5297
    # Also on https://tapastic.com/series/TheAwkwardYeti
5298
    name = 'yeti-goc'
5299
    long_name = 'The Awkward Yeti (from GoComics)'
5300
    url = 'http://www.gocomics.com/the-awkward-yeti'
5301
    _categories = ('YETI', )
5302
5303
5304
class BerkeleyMewsGoComics(GenericGoComic):
5305
    """Class to retrieve Berkeley Mews comics."""
5306
    # Also on http://mews.tumblr.com
5307
    # Also on http://www.berkeleymews.com
5308
    name = 'berkeley-goc'
5309
    long_name = 'Berkeley Mews (from GoComics)'
5310
    url = 'http://www.gocomics.com/berkeley-mews'
5311
    _categories = ('BERKELEY', )
5312
5313
5314
class SheldonGoComics(GenericGoComic):
5315
    """Class to retrieve Sheldon comics."""
5316
    # Also on http://www.sheldoncomics.com
5317
    name = 'sheldon-goc'
5318
    long_name = 'Sheldon Comics (from GoComics)'
5319
    url = 'http://www.gocomics.com/sheldon'
5320
5321
5322
class FowlLanguageGoComics(GenericGoComic):
5323
    """Class to retrieve Fowl Language comics."""
5324
    # Also on http://www.fowllanguagecomics.com
5325
    # Also on http://tapastic.com/series/Fowl-Language-Comics
5326
    # Also on http://fowllanguagecomics.tumblr.com
5327
    name = 'fowllanguage-goc'
5328
    long_name = 'Fowl Language Comics (from GoComics)'
5329
    url = 'http://www.gocomics.com/fowl-language'
5330
    _categories = ('FOWLLANGUAGE', )
5331
5332
5333
class NickAnderson(GenericGoComic):
5334
    """Class to retrieve Nick Anderson comics."""
5335
    name = 'nickanderson'
5336
    long_name = 'Nick Anderson'
5337
    url = 'http://www.gocomics.com/nickanderson'
5338
5339
5340
class GarfieldGoComics(GenericGoComic):
5341
    """Class to retrieve Garfield comics."""
5342
    # Also on http://garfield.com
5343
    name = 'garfield-goc'
5344
    long_name = 'Garfield (from GoComics)'
5345
    url = 'http://www.gocomics.com/garfield'
5346
    _categories = ('GARFIELD', )
5347
5348
5349
class DorrisMcGoComics(GenericGoComic):
5350
    """Class to retrieve Dorris Mc Comics"""
5351
    # Also on http://dorrismccomics.com
5352
    name = 'dorrismc-goc'
5353
    long_name = 'Dorris Mc (from GoComics)'
5354
    url = 'http://www.gocomics.com/dorris-mccomics'
5355
5356
5357
class FoxTrot(GenericGoComic):
5358
    """Class to retrieve FoxTrot comics."""
5359
    name = 'foxtrot'
5360
    long_name = 'FoxTrot'
5361
    url = 'http://www.gocomics.com/foxtrot'
5362
5363
5364
class FoxTrotClassics(GenericGoComic):
5365
    """Class to retrieve FoxTrot Classics comics."""
5366
    name = 'foxtrot-classics'
5367
    long_name = 'FoxTrot Classics'
5368
    url = 'http://www.gocomics.com/foxtrotclassics'
5369
5370
5371
class MisterAndMeGoComics(GenericDeletedComic, GenericGoComic):
5372
    """Class to retrieve Mister & Me Comics."""
5373
    # Also on http://www.mister-and-me.com
5374
    # Also on https://tapastic.com/series/Mister-and-Me
5375
    name = 'mister-goc'
5376
    long_name = 'Mister & Me (from GoComics)'
5377
    url = 'http://www.gocomics.com/mister-and-me'
5378
5379
5380
class NonSequitur(GenericGoComic):
5381
    """Class to retrieve Non Sequitur (Wiley Miller) comics."""
5382
    name = 'nonsequitur'
5383
    long_name = 'Non Sequitur'
5384
    url = 'http://www.gocomics.com/nonsequitur'
5385
5386
5387
class JoeyAlisonSayers(GenericGoComic):
5388
    """Class to retrieve Joey Alison Sayers comics."""
5389
    name = 'joeyalison'
5390
    long_name = 'Joey Alison Sayers (from GoComics)'
5391
    url = 'http://www.gocomics.com/joey-alison-sayers-comics'
5392
5393
5394
class SavageChickenGoComics(GenericGoComic):
5395
    """Class to retrieve Savage Chicken comics."""
5396
    # Also on http://www.savagechickens.com
5397
    name = 'savage-goc'
5398
    long_name = 'Savage Chicken (from GoComics)'
5399
    url = 'http://www.gocomics.com/savage-chickens'
5400
5401
5402
class GenericTapasticComic(GenericListableComic):
5403
    """Generic class to handle the logic common to comics from tapastic.com."""
5404
    _categories = ('TAPASTIC', )
5405
5406 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
5407
    def get_comic_info(cls, soup, archive_elt):
5408
        """Get information about a particular comics."""
5409
        timestamp = int(archive_elt['publishDate']) / 1000.0
5410
        day = datetime.datetime.fromtimestamp(timestamp).date()
5411
        imgs = soup.find_all('img', class_='art-image')
5412
        if not imgs:
5413
            # print("Comic %s is being uploaded, retry later" % cls.get_url_from_archive_element(archive_elt))
5414
            return None
5415
        assert len(imgs) > 0, imgs
5416
        return {
5417
            'day': day.day,
5418
            'year': day.year,
5419
            'month': day.month,
5420
            'img': [i['src'] for i in imgs],
5421
            'title': archive_elt['title'],
5422
        }
5423
5424
    @classmethod
5425
    def get_url_from_archive_element(cls, archive_elt):
5426
        return 'http://tapastic.com/episode/' + str(archive_elt['id'])
5427
5428
    @classmethod
5429
    def get_archive_elements(cls):
5430
        pref, suff = 'episodeList : ', ','
5431
        # Information is stored in the javascript part
5432
        # I don't know the clean way to get it so this is the ugly way.
5433
        string = [s[len(pref):-len(suff)] for s in (s.decode('utf-8').strip() for s in urlopen_wrapper(cls.url).readlines()) if s.startswith(pref) and s.endswith(suff)][0]
5434
        return json.loads(string)
5435
5436
5437
class VegetablesForDessert(GenericTapasticComic):
5438
    """Class to retrieve Vegetables For Dessert comics."""
5439
    # Also on http://vegetablesfordessert.tumblr.com
5440
    name = 'vegetables'
5441
    long_name = 'Vegetables For Dessert'
5442
    url = 'http://tapastic.com/series/vegetablesfordessert'
5443
5444
5445
class FowlLanguageTapa(GenericTapasticComic):
5446
    """Class to retrieve Fowl Language comics."""
5447
    # Also on http://www.fowllanguagecomics.com
5448
    # Also on http://fowllanguagecomics.tumblr.com
5449
    # Also on http://www.gocomics.com/fowl-language
5450
    name = 'fowllanguage-tapa'
5451
    long_name = 'Fowl Language Comics (from Tapastic)'
5452
    url = 'http://tapastic.com/series/Fowl-Language-Comics'
5453
    _categories = ('FOWLLANGUAGE', )
5454
5455
5456
class OscillatingProfundities(GenericTapasticComic):
5457
    """Class to retrieve Oscillating Profundities comics."""
5458
    name = 'oscillating'
5459
    long_name = 'Oscillating Profundities'
5460
    url = 'http://tapastic.com/series/oscillatingprofundities'
5461
5462
5463
class ZnoflatsComics(GenericTapasticComic):
5464
    """Class to retrieve Znoflats comics."""
5465
    name = 'znoflats'
5466
    long_name = 'Znoflats Comics'
5467
    url = 'http://tapastic.com/series/Znoflats-Comics'
5468
5469
5470
class SandersenTapastic(GenericTapasticComic):
5471
    """Class to retrieve Sarah Andersen comics."""
5472
    # Also on http://sarahcandersen.com
5473
    # Also on http://www.gocomics.com/sarahs-scribbles
5474
    name = 'sandersen-tapa'
5475
    long_name = 'Sarah Andersen (from Tapastic)'
5476
    url = 'http://tapastic.com/series/Doodle-Time'
5477
5478
5479
class TubeyToonsTapastic(GenericTapasticComic):
5480
    """Class to retrieve TubeyToons comics."""
5481
    # Also on http://tubeytoons.com
5482
    # Also on https://tubeytoons.tumblr.com
5483
    name = 'tubeytoons-tapa'
5484
    long_name = 'Tubey Toons (from Tapastic)'
5485
    url = 'http://tapastic.com/series/Tubey-Toons'
5486
    _categories = ('TUNEYTOONS', )
5487
5488
5489
class AnythingComicTapastic(GenericTapasticComic):
5490
    """Class to retrieve Anything Comics."""
5491
    # Also on http://www.anythingcomic.com
5492
    name = 'anythingcomic-tapa'
5493
    long_name = 'Anything Comic (from Tapastic)'
5494
    url = 'http://tapastic.com/series/anything'
5495
5496
5497
class UnearthedComicsTapastic(GenericTapasticComic):
5498
    """Class to retrieve Unearthed comics."""
5499
    # Also on http://unearthedcomics.com
5500
    # Also on https://unearthedcomics.tumblr.com
5501
    name = 'unearthed-tapa'
5502
    long_name = 'Unearthed Comics (from Tapastic)'
5503
    url = 'http://tapastic.com/series/UnearthedComics'
5504
    _categories = ('UNEARTHED', )
5505
5506
5507
class EverythingsStupidTapastic(GenericTapasticComic):
5508
    """Class to retrieve Everything's stupid Comics."""
5509
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
5510
    # Also on http://everythingsstupid.net
5511
    name = 'stupid-tapa'
5512
    long_name = "Everything's Stupid (from Tapastic)"
5513
    url = 'http://tapastic.com/series/EverythingsStupid'
5514
5515
5516
class JustSayEhTapastic(GenericTapasticComic):
5517
    """Class to retrieve Just Say Eh comics."""
5518
    # Also on http://www.justsayeh.com
5519
    name = 'justsayeh-tapa'
5520
    long_name = 'Just Say Eh (from Tapastic)'
5521
    url = 'http://tapastic.com/series/Just-Say-Eh'
5522
5523
5524
class ThorsThundershackTapastic(GenericTapasticComic):
5525
    """Class to retrieve Thor's Thundershack comics."""
5526
    # Also on http://www.thorsthundershack.com
5527
    name = 'thor-tapa'
5528
    long_name = 'Thor\'s Thundershack (from Tapastic)'
5529
    url = 'http://tapastic.com/series/Thors-Thundershac'
5530
    _categories = ('THOR', )
5531
5532
5533
class OwlTurdTapastic(GenericTapasticComic):
5534
    """Class to retrieve Owl Turd / Shen comix."""
5535
    # Also on http://shencomix.com
5536
    name = 'owlturd-tapa'
5537
    long_name = 'Owl Turd / Shen Comix (from Tapastic)'
5538
    url = 'https://tapas.io/series/Shen-Comix'
5539
    _categories = ('OWLTURD', 'SHENCOMIX')
5540
5541
5542
class GoneIntoRaptureTapastic(GenericTapasticComic):
5543
    """Class to retrieve Gone Into Rapture comics."""
5544
    # Also on http://goneintorapture.tumblr.com
5545
    # Also on http://goneintorapture.com
5546
    name = 'rapture-tapa'
5547
    long_name = 'Gone Into Rapture (from Tapastic)'
5548
    url = 'http://tapastic.com/series/Goneintorapture'
5549
5550
5551
class HeckIfIKnowComicsTapa(GenericTapasticComic):
5552
    """Class to retrieve Heck If I Know Comics."""
5553
    # Also on http://heckifiknowcomics.com
5554
    name = 'heck-tapa'
5555
    long_name = 'Heck if I Know comics (from Tapastic)'
5556
    url = 'http://tapastic.com/series/Regular'
5557
5558
5559
class CheerUpEmoKidTapa(GenericTapasticComic):
5560
    """Class to retrieve CheerUpEmoKid comics."""
5561
    # Also on http://www.cheerupemokid.com
5562
    # Also on https://enzocomics.tumblr.com
5563
    name = 'cuek-tapa'
5564
    long_name = 'Cheer Up Emo Kid (from Tapastic)'
5565
    url = 'http://tapastic.com/series/CUEK'
5566
5567
5568
class BigFootJusticeTapa(GenericTapasticComic):
5569
    """Class to retrieve Big Foot Justice comics."""
5570
    # Also on http://bigfootjustice.com
5571
    name = 'bigfoot-tapa'
5572
    long_name = 'Big Foot Justice (from Tapastic)'
5573
    url = 'http://tapastic.com/series/bigfoot-justice'
5574
5575
5576
class UpAndOutTapa(GenericTapasticComic):
5577
    """Class to retrieve Up & Out comics."""
5578
    # Also on http://upandoutcomic.tumblr.com
5579
    name = 'upandout-tapa'
5580
    long_name = 'Up And Out (from Tapastic)'
5581
    url = 'http://tapastic.com/series/UP-and-OUT'
5582
5583
5584
class ToonHoleTapa(GenericTapasticComic):
5585
    """Class to retrieve Toon Holes comics."""
5586
    # Also on http://www.toonhole.com
5587
    name = 'toonhole-tapa'
5588
    long_name = 'Toon Hole (from Tapastic)'
5589
    url = 'http://tapastic.com/series/TOONHOLE'
5590
5591
5592
class AngryAtNothingTapa(GenericTapasticComic):
5593
    """Class to retrieve Angry at Nothing comics."""
5594
    # Also on http://www.angryatnothing.net
5595
    # Also on http://angryatnothing.tumblr.com
5596
    name = 'angry-tapa'
5597
    long_name = 'Angry At Nothing (from Tapastic)'
5598
    url = 'http://tapastic.com/series/Comics-yeah-definitely-comics-'
5599
5600
5601
class LeleozTapa(GenericTapasticComic):
5602
    """Class to retrieve Leleoz comics."""
5603
    # Also on http://leleozcomics.tumblr.com
5604
    name = 'leleoz-tapa'
5605
    long_name = 'Leleoz (from Tapastic)'
5606
    url = 'https://tapastic.com/series/Leleoz'
5607
5608
5609
class TheAwkwardYetiTapa(GenericTapasticComic):
5610
    """Class to retrieve The Awkward Yeti comics."""
5611
    # Also on http://www.gocomics.com/the-awkward-yeti
5612
    # Also on http://theawkwardyeti.com
5613
    # Also on http://larstheyeti.tumblr.com
5614
    name = 'yeti-tapa'
5615
    long_name = 'The Awkward Yeti (from Tapastic)'
5616
    url = 'https://tapastic.com/series/TheAwkwardYeti'
5617
    _categories = ('YETI', )
5618
5619
5620
class AsPerUsualTapa(GenericTapasticComic):
5621
    """Class to retrieve As Per Usual comics."""
5622
    # Also on http://as-per-usual.tumblr.com
5623
    name = 'usual-tapa'
5624
    long_name = 'As Per Usual (from Tapastic)'
5625
    url = 'https://tapastic.com/series/AsPerUsual'
5626
    categories = ('DAMILEE', )
5627
5628
5629
class HotComicsForCoolPeopleTapa(GenericTapasticComic):
5630
    """Class to retrieve Hot Comics For Cool People."""
5631
    # Also on http://hotcomicsforcoolpeople.tumblr.com
5632
    # Also on http://hotcomics.biz (links to tumblr)
5633
    # Also on http://hcfcp.com (links to tumblr)
5634
    name = 'hotcomics-tapa'
5635
    long_name = 'Hot Comics For Cool People (from Tapastic)'
5636
    url = 'https://tapastic.com/series/Hot-Comics-For-Cool-People'
5637
    categories = ('DAMILEE', )
5638
5639
5640
class OneOneOneOneComicTapa(GenericTapasticComic):
5641
    """Class to retrieve 1111 Comics."""
5642
    # Also on http://www.1111comics.me
5643
    # Also on http://comics1111.tumblr.com
5644
    name = '1111-tapa'
5645
    long_name = '1111 Comics (from Tapastic)'
5646
    url = 'https://tapastic.com/series/1111-Comics'
5647
    _categories = ('ONEONEONEONE', )
5648
5649
5650
class TumbleDryTapa(GenericTapasticComic):
5651
    """Class to retrieve Tumble Dry comics."""
5652
    # Also on http://tumbledrycomics.com
5653
    name = 'tumbledry-tapa'
5654
    long_name = 'Tumblr Dry (from Tapastic)'
5655
    url = 'https://tapastic.com/series/TumbleDryComics'
5656
5657
5658
class DeadlyPanelTapa(GenericTapasticComic):
5659
    """Class to retrieve Deadly Panel comics."""
5660
    # Also on http://www.deadlypanel.com
5661
    # Also on https://deadlypanel.tumblr.com
5662
    name = 'deadly-tapa'
5663
    long_name = 'Deadly Panel (from Tapastic)'
5664
    url = 'https://tapastic.com/series/deadlypanel'
5665
5666
5667
class ChrisHallbeckMaxiTapa(GenericTapasticComic):
5668
    """Class to retrieve Chris Hallbeck comics."""
5669
    # Also on https://chrishallbeck.tumblr.com
5670
    # Also on http://maximumble.com
5671
    name = 'hallbeckmaxi-tapa'
5672
    long_name = 'Chris Hallback - Maximumble (from Tapastic)'
5673
    url = 'https://tapastic.com/series/Maximumble'
5674
    _categories = ('HALLBACK', )
5675
5676
5677
class ChrisHallbeckMiniTapa(GenericDeletedComic, GenericTapasticComic):
5678
    """Class to retrieve Chris Hallbeck comics."""
5679
    # Also on https://chrishallbeck.tumblr.com
5680
    # Also on http://minimumble.com
5681
    name = 'hallbeckmini-tapa'
5682
    long_name = 'Chris Hallback - Minimumble (from Tapastic)'
5683
    url = 'https://tapastic.com/series/Minimumble'
5684
    _categories = ('HALLBACK', )
5685
5686
5687
class ChrisHallbeckBiffTapa(GenericDeletedComic, GenericTapasticComic):
5688
    """Class to retrieve Chris Hallbeck comics."""
5689
    # Also on https://chrishallbeck.tumblr.com
5690
    # Also on http://thebookofbiff.com
5691
    name = 'hallbeckbiff-tapa'
5692
    long_name = 'Chris Hallback - The Book of Biff (from Tapastic)'
5693
    url = 'https://tapastic.com/series/Biff'
5694
    _categories = ('HALLBACK', )
5695
5696
5697
class RandoWisTapa(GenericTapasticComic):
5698
    """Class to retrieve RandoWis comics."""
5699
    # Also on https://randowis.com
5700
    name = 'randowis-tapa'
5701
    long_name = 'RandoWis (from Tapastic)'
5702
    url = 'https://tapastic.com/series/RandoWis'
5703
5704
5705
class PigeonGazetteTapa(GenericTapasticComic):
5706
    """Class to retrieve The Pigeon Gazette comics."""
5707
    # Also on http://thepigeongazette.tumblr.com
5708
    name = 'pigeon-tapa'
5709
    long_name = 'The Pigeon Gazette (from Tapastic)'
5710
    url = 'https://tapastic.com/series/The-Pigeon-Gazette'
5711
5712
5713
class TheOdd1sOutTapa(GenericTapasticComic):
5714
    """Class to retrieve The Odd 1s Out comics."""
5715
    # Also on http://theodd1sout.com
5716
    # Also on http://theodd1sout.tumblr.com
5717
    name = 'theodd-tapa'
5718
    long_name = 'The Odd 1s Out (from Tapastic)'
5719
    url = 'https://tapastic.com/series/Theodd1sout'
5720
5721
5722
class TheWorldIsFlatTapa(GenericTapasticComic):
5723
    """Class to retrieve The World Is Flat Comics."""
5724
    # Also on http://theworldisflatcomics.tumblr.com
5725
    name = 'flatworld-tapa'
5726
    long_name = 'The World Is Flat (from Tapastic)'
5727
    url = 'https://tapastic.com/series/The-World-is-Flat'
5728
5729
5730
class MisterAndMeTapa(GenericTapasticComic):
5731
    """Class to retrieve Mister & Me Comics."""
5732
    # Also on http://www.mister-and-me.com
5733
    # Also on http://www.gocomics.com/mister-and-me
5734
    name = 'mister-tapa'
5735
    long_name = 'Mister & Me (from Tapastic)'
5736
    url = 'https://tapastic.com/series/Mister-and-Me'
5737
5738
5739
class TalesOfAbsurdityTapa(GenericDeletedComic, GenericTapasticComic):
5740
    """Class to retrieve Tales Of Absurdity comics."""
5741
    # Also on http://talesofabsurdity.com
5742
    # Also on http://talesofabsurdity.tumblr.com
5743
    name = 'absurdity-tapa'
5744
    long_name = 'Tales of Absurdity (from Tapastic)'
5745
    url = 'http://tapastic.com/series/Tales-Of-Absurdity'
5746
    _categories = ('ABSURDITY', )
5747
5748
5749
class BFGFSTapa(GenericTapasticComic):
5750
    """Class to retrieve BFGFS comics."""
5751
    # Also on http://bfgfs.com
5752
    # Also on https://bfgfs.tumblr.com
5753
    name = 'bfgfs-tapa'
5754
    long_name = 'BFGFS (from Tapastic)'
5755
    url = 'https://tapastic.com/series/BFGFS'
5756
5757
5758
class DoodleForFoodTapa(GenericTapasticComic):
5759
    """Class to retrieve Doodle For Food comics."""
5760
    # Also on http://www.doodleforfood.com
5761
    name = 'doodle-tapa'
5762
    long_name = 'Doodle For Food (from Tapastic)'
5763
    url = 'https://tapastic.com/series/Doodle-for-Food'
5764
5765
5766
class MrLovensteinTapa(GenericTapasticComic):
5767
    """Class to retrieve Mr Lovenstein comics."""
5768
    # Also on  https://tapastic.com/series/MrLovenstein
5769
    name = 'mrlovenstein-tapa'
5770
    long_name = 'Mr. Lovenstein (from Tapastic)'
5771
    url = 'https://tapastic.com/series/MrLovenstein'
5772
5773
5774
class CassandraCalinTapa(GenericTapasticComic):
5775
    """Class to retrieve C. Cassandra comics."""
5776
    # Also on http://cassandracalin.com
5777
    # Also on http://c-cassandra.tumblr.com
5778
    name = 'cassandra-tapa'
5779
    long_name = 'Cassandra Calin (from Tapastic)'
5780
    url = 'https://tapastic.com/series/C-Cassandra-comics'
5781
5782
5783
class WafflesAndPancakes(GenericTapasticComic):
5784
    """Class to retrieve Waffles And Pancakes comics."""
5785
    # Also on http://wandpcomic.com
5786
    name = 'waffles'
5787
    long_name = 'Waffles And Pancakes'
5788
    url = 'https://tapastic.com/series/Waffles-and-Pancakes'
5789
5790
5791
class YesterdaysPopcornTapastic(GenericTapasticComic):
5792
    """Class to retrieve Yesterday's Popcorn comics."""
5793
    # Also on http://www.yesterdayspopcorn.com
5794
    # Also on http://yesterdayspopcorn.tumblr.com
5795
    name = 'popcorn-tapa'
5796
    long_name = 'Yesterday\'s Popcorn (from Tapastic)'
5797
    url = 'https://tapastic.com/series/Yesterdays-Popcorn'
5798
5799
5800
class OurSuperAdventureTapastic(GenericDeletedComic, GenericTapasticComic):
5801
    """Class to retrieve Our Super Adventure comics."""
5802
    # Also on http://www.oursuperadventure.com
5803
    # http://sarahssketchbook.tumblr.com
5804
    # http://sarahgraley.com
5805
    name = 'superadventure-tapastic'
5806
    long_name = 'Our Super Adventure (from Tapastic)'
5807
    url = 'https://tapastic.com/series/Our-Super-Adventure'
5808
5809
5810
class NamelessPCs(GenericTapasticComic):
5811
    """Class to retrieve Nameless PCs comics."""
5812
    # Also on http://namelesspcs.com
5813
    name = 'namelesspcs-tapa'
5814
    long_name = 'NamelessPCs (from Tapastic)'
5815
    url = 'https://tapastic.com/series/NamelessPC'
5816
5817
5818
class DownTheUpwardSpiralTapa(GenericTapasticComic):
5819
    """Class to retrieve Down The Upward Spiral comics."""
5820
    # Also on http://www.downtheupwardspiral.com
5821
    # Also on http://downtheupwardspiral.tumblr.com
5822
    name = 'spiral-tapa'
5823
    long_name = 'Down the Upward Spiral (from Tapastic)'
5824
    url = 'https://tapastic.com/series/Down-the-Upward-Spiral'
5825
5826
5827
class UbertoolTapa(GenericTapasticComic):
5828
    """Class to retrieve Ubertool comics."""
5829
    # Also on http://ubertoolcomic.com
5830
    # Also on https://ubertool.tumblr.com
5831
    name = 'ubertool-tapa'
5832
    long_name = 'Ubertool (from Tapastic)'
5833
    url = 'https://tapastic.com/series/ubertool'
5834
    _categories = ('UBERTOOL', )
5835
5836
5837
class BarteNerdsTapa(GenericDeletedComic, GenericTapasticComic):
5838
    """Class to retrieve BarteNerds comics."""
5839
    # Also on http://www.bartenerds.com
5840
    name = 'bartenerds-tapa'
5841
    long_name = 'BarteNerds (from Tapastic)'
5842
    url = 'https://tapastic.com/series/BarteNERDS'
5843
5844
5845
class SmallBlueYonderTapa(GenericTapasticComic):
5846
    """Class to retrieve Small Blue Yonder comics."""
5847
    # Also on http://www.smallblueyonder.com
5848
    name = 'smallblue-tapa'
5849
    long_name = 'Small Blue Yonder (from Tapastic)'
5850
    url = 'https://tapastic.com/series/Small-Blue-Yonder'
5851
5852
5853
class TizzyStitchBirdTapa(GenericTapasticComic):
5854
    """Class to retrieve Tizzy Stitch Bird comics."""
5855
    # Also on http://tizzystitchbird.com
5856
    # Also on http://tizzystitchbird.tumblr.com
5857
    # Also on http://www.webtoons.com/en/challenge/tizzy-stitchbird/list?title_no=50082
5858
    name = 'tizzy-tapa'
5859
    long_name = 'Tizzy Stitch Bird (from Tapastic)'
5860
    url = 'https://tapastic.com/series/TizzyStitchbird'
5861
5862
5863
class RockPaperCynicTapa(GenericTapasticComic):
5864
    """Class to retrieve RockPaperCynic comics."""
5865
    # Also on http://www.rockpapercynic.com
5866
    # Also on http://rockpapercynic.tumblr.com
5867
    name = 'rpc-tapa'
5868
    long_name = 'Rock Paper Cynic (from Tapastic)'
5869
    url = 'https://tapastic.com/series/rockpapercynic'
5870
5871
5872
class IsItCanonTapa(GenericTapasticComic):
5873
    """Class to retrieve Is It Canon comics."""
5874
    # Also on http://www.isitcanon.com
5875
    name = 'canon-tapa'
5876
    long_name = 'Is It Canon (from Tapastic)'
5877
    url = 'http://tapastic.com/series/isitcanon'
5878
5879
5880
class ItsTheTieTapa(GenericTapasticComic):
5881
    """Class to retrieve It's the tie comics."""
5882
    # Also on http://itsthetie.com
5883
    # Also on http://itsthetie.tumblr.com
5884
    name = 'tie-tapa'
5885
    long_name = "It's the tie (from Tapastic)"
5886
    url = "https://tapastic.com/series/itsthetie"
5887
    _categories = ('TIE', )
5888
5889
5890
class JamesOfNoTradesTapa(GenericTapasticComic):
5891
    """Class to retrieve JamesOfNoTrades comics."""
5892
    # Also on http://jamesofnotrades.com
5893
    # Also on http://www.webtoons.com/en/challenge/james-of-no-trades/list?title_no=43422
5894
    # Also on http://jamesfregan.tumblr.com
5895
    name = 'jamesofnotrades-tapa'
5896
    long_name = 'James Of No Trades (from Tapastic)'
5897
    url = 'https://tapas.io/series/James-of-No-Trades'
5898
    _categories = ('JAMESOFNOTRADES', )
5899
5900
5901
class MomentumTapa(GenericTapasticComic):
5902
    """Class to retrieve Momentum comics."""
5903
    # Also on http://www.momentumcomic.com
5904
    name = 'momentum-tapa'
5905
    long_name = 'Momentum (from Tapastic)'
5906
    url = 'https://tapastic.com/series/momentum'
5907
5908
5909
class InYourFaceCakeTapa(GenericTapasticComic):
5910
    """Class to retrieve In Your Face Cake comics."""
5911
    # Also on https://in-your-face-cake.tumblr.com
5912
    name = 'inyourfacecake-tapa'
5913
    long_name = 'In Your Face Cake (from Tapastic)'
5914
    url = 'https://tapas.io/series/In-Your-Face-Cake'
5915
    _categories = ('INYOURFACECAKE', )
5916
5917
5918
class CowardlyComicsTapa(GenericTapasticComic):
5919
    """Class to retrieve Cowardly Comics."""
5920
    # Also on http://cowardlycomics.tumblr.com
5921
    # Also on http://www.webtoons.com/en/challenge/cowardly-comics/list?title_no=65893
5922
    name = 'cowardly-tapa'
5923
    long_name = 'Cowardly Comics (from Tapastic)'
5924
    url = 'https://tapas.io/series/CowardlyComics'
5925
5926
5927
class Caw4hwTapa(GenericTapasticComic):
5928
    """Class to retrieve Caw4hw comics."""
5929
    # Also on https://caw4hw.tumblr.com
5930
    name = 'caw4hw-tapa'
5931
    long_name = 'Caw4hw (from Tapastic)'
5932
    url = 'https://tapas.io/series/CAW4HW'
5933
5934
5935
class DontBeDadTapa(GenericTapasticComic):
5936
    """Class to retrieve Don't Be Dad comics."""
5937
    # Also on https://dontbedad.com/
5938
    # Also on http://www.webtoons.com/en/challenge/dontbedad/list?title_no=123074
5939
    name = 'dontbedad-tapa'
5940
    long_name = "Don't Be Dad (from Tapastic)"
5941
    url = 'https://tapas.io/series/DontBeDad-Comics'
5942
5943
5944
class APleasantWasteOfTimeTapa(GenericTapasticComic):
5945
    """Class to retrieve A Pleasant Waste Of Time comics."""
5946
    # Also on https://artjcf.tumblr.com
5947
    name = 'pleasant-waste-tapa'
5948
    long_name = 'A Pleasant Waste Of Time (from Tapastic)'
5949
    url = 'https://tapas.io/series/A-Pleasant-'
5950
    _categories = ('WASTE', )
5951
5952
5953
class InfiniteImmortalBensTapa(GenericTapasticComic):
5954
    """Class to retrieve Infinite Immortal Bens comics."""
5955
    # Also on http://www.webtoons.com/en/challenge/infinite-immortal-bens/list?title_no=32847
5956
    # Also on https://infiniteimmortalbens.tumblr.com
5957
    url = 'https://tapas.io/series/Infinite-Immortal-Bens'
5958
    name = 'infiniteimmortal-tapa'
5959
    long_name = 'Infinite Immortal Bens (from Tapastic)'
5960
    _categories = ('INFINITEIMMORTAL', )
5961
5962
5963
class EatMyPaintTapa(GenericTapasticComic):
5964
    """Class to retrieve Eat My Paint comics."""
5965
    # Also on https://eatmypaint.tumblr.com
5966
    name = 'eatmypaint-tapa'
5967
    long_name = 'Eat My Paint (from Tapastic)'
5968
    url = 'https://tapas.io/series/eatmypaint'
5969
    _categories = ('EATMYPAINT', )
5970
5971
5972
class RoryTapastic(GenericTapasticComic):
5973
    """Class to retrieve Rory comics."""
5974
    # Also on https://rorycomics.tumblr.com/
5975
    name = 'rory-tapa'
5976
    long_name = 'Rory (from Tapastic)'
5977
    url = 'https://tapas.io/series/Share-Your-Vulnerability'
5978
    _categories = ('RORY',)
5979
5980
5981
class MercworksTapa(GenericTapasticComic):
5982
    """Class to retrieve Mercworks comics."""
5983
    # Also on http://mercworks.net
5984
    # Also on http://www.webtoons.com/en/comedy/mercworks/list?title_no=426
5985
    # Also on http://mercworks.tumblr.com
5986
    name = 'mercworks-tapa'
5987
    long_name = 'Mercworks (from Tapastic)'
5988
    url = 'https://tapastic.com/series/MercWorks'
5989
    _categories = ('MERCWORKS', )
5990
5991
5992
class AbsurdoLapin(GenericNavigableComic):
5993
    """Class to retrieve Absurdo Lapin comics."""
5994
    name = 'absurdo'
5995
    long_name = 'Absurdo'
5996
    url = 'https://absurdo.lapin.org'
5997
    get_url_from_link = join_cls_url_to_href
5998
5999
    @classmethod
6000
    def get_nav(cls, soup):
6001
        """Get the navigation elements from soup object."""
6002
        cont = soup.find('div', id='content')
6003
        _, b2 = cont.find_all('div', class_='buttons')
6004
        # prev, first, last, next
6005
        return [li.find('a') for li in b2.find_all('li')]
6006
6007
    @classmethod
6008
    def get_first_comic_link(cls):
6009
        """Get link to first comics."""
6010
        return cls.get_nav(get_soup_at_url(cls.url))[1]
6011
6012
    @classmethod
6013
    def get_navi_link(cls, last_soup, next_):
6014
        """Get link to next or previous comic."""
6015
        return cls.get_nav(last_soup)[3 if next_ else 0]
6016
6017
    @classmethod
6018
    def get_comic_info(cls, soup, link):
6019
        """Get information about a particular comics."""
6020
        author = soup.find('meta', attrs={'name': 'author'})['content']
6021
        tags = soup.find('meta', attrs={'name': 'keywords'})['content']
6022
        title = soup.find('title').string
6023
        imgs = soup.find('div', id='content').find_all('img')
6024
        return {
6025
            'title': title,
6026
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
6027
            'tags': tags,
6028
            'author': author,
6029
        }
6030
6031
6032
def get_subclasses(klass):
6033
    """Gets the list of direct/indirect subclasses of a class"""
6034
    subclasses = klass.__subclasses__()
6035
    for derived in list(subclasses):
6036
        subclasses.extend(get_subclasses(derived))
6037
    return subclasses
6038
6039
6040
def remove_st_nd_rd_th_from_date(string):
6041
    """Function to transform 1st/2nd/3rd/4th in a parsable date format."""
6042
    # Hackish way to convert string with numeral "1st"/"2nd"/etc to date
6043
    return (string.replace('st', '')
6044
            .replace('nd', '')
6045
            .replace('rd', '')
6046
            .replace('th', '')
6047
            .replace('Augu', 'August'))
6048
6049
6050
def string_to_date(string, date_format, local=DEFAULT_LOCAL):
6051
    """Function to convert string to date object.
6052
    Wrapper around datetime.datetime.strptime."""
6053
    # format described in https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior
6054
    prev_locale = locale.setlocale(locale.LC_ALL)
6055
    if local != prev_locale:
6056
        locale.setlocale(locale.LC_ALL, local)
6057
    ret = datetime.datetime.strptime(string, date_format).date()
6058
    if local != prev_locale:
6059
        locale.setlocale(locale.LC_ALL, prev_locale)
6060
    return ret
6061
6062
6063
COMICS = set(get_subclasses(GenericComic))
6064
VALID_COMICS = [c for c in COMICS if c.name is not None]
6065
COMIC_NAMES = {c.name: c for c in VALID_COMICS}
6066
assert len(VALID_COMICS) == len(COMIC_NAMES)
6067
CLASS_NAMES = {c.__name__ for c in VALID_COMICS}
6068
assert len(VALID_COMICS) == len(CLASS_NAMES)
6069