Completed
Push — master ( 7738bb...465ed4 )
by De
01:11
created

comics.py (20 issues)

Code
1
#! /usr/bin/python3
2
# vim: set expandtab tabstop=4 shiftwidth=4 :
3
"""Module to retrieve webcomics"""
4
5
from comic_abstract import GenericComic, get_date_for_comic
6
import re
7
from datetime import date, timedelta
8
import datetime
9
from urlfunctions import get_soup_at_url, urljoin_wrapper,\
10
    convert_iri_to_plain_ascii_uri, load_json_at_url, urlopen_wrapper
11
import json
12
import locale
13
import urllib
14
15
DEFAULT_LOCAL = 'en_GB.UTF-8'
16
17
18
class Xkcd(GenericComic):
19
    """Class to retrieve Xkcd comics."""
20
    name = 'xkcd'
21
    long_name = 'xkcd'
22
    url = 'http://xkcd.com'
23
24
    @classmethod
25
    def get_next_comic(cls, last_comic):
26
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
27
        first_num = last_comic['num'] if last_comic else 0
28
        last_num = load_json_at_url(
29
            urljoin_wrapper(cls.url, 'info.0.json'))['num']
30
31
        for num in range(first_num + 1, last_num + 1):
32
            if num != 404:
33
                json_url = urljoin_wrapper(cls.url, '%d/info.0.json' % num)
34
                comic = load_json_at_url(json_url)
35
                comic['img'] = [comic['img']]
36
                comic['prefix'] = '%d-' % num
37
                comic['json_url'] = json_url
38
                comic['url'] = urljoin_wrapper(cls.url, str(num))
39
                comic['day'] = int(comic['day'])
40
                comic['month'] = int(comic['month'])
41
                comic['year'] = int(comic['year'])
42
                assert comic['num'] == num
43
                yield comic
44
45
46
# Helper functions corresponding to get_url_from_link/get_url_from_archive_element
47
48
49
@classmethod
50
def get_href(cls, link):
51
    """Implementation of get_url_from_link/get_url_from_archive_element."""
52
    return link['href']
53
54
55
@classmethod
56
def join_cls_url_to_href(cls, link):
57
    """Implementation of get_url_from_link/get_url_from_archive_element."""
58
    return urljoin_wrapper(cls.url, link['href'])
59
60
61
class GenericNavigableComic(GenericComic):
62
    """Generic class for "navigable" comics : with first/next arrows.
63
64
    This class applies to comic where previous and next comics can be
65
    accessed from a given comic. Once given a starting point (either
66
    the first comic or the last comic retrieved), it will handle the
67
    navigation, the retrieval of the soup object and the setting of
68
    the 'url' attribute on retrieved comics. This limits a lot the
69
    amount of boilerplate code in the different implementation classes.
70
71
    The method `get_next_comic` methods is implemented in terms of new
72
    more specialized methods to be implemented/overridden:
73
        - get_first_comic_link
74
        - get_navi_link
75
        - get_comic_info
76
        - get_url_from_link
77
    """
78
    _categories = ('NAVIGABLE', )
79
80
    @classmethod
81
    def get_first_comic_link(cls):
82
        """Get link to first comics.
83
84
        Sometimes this can be retrieved of any comic page, sometimes on
85
        the archive page, sometimes it doesn't exist at all and one has
86
        to iterate backward to find it before hardcoding the result found.
87
        """
88
        raise NotImplementedError
89
90
    @classmethod
91
    def get_navi_link(cls, last_soup, next_):
92
        """Get link to next (or previous - for dev purposes) comic."""
93
        raise NotImplementedError
94
95
    @classmethod
96
    def get_comic_info(cls, soup, link):
97
        """Get information about a particular comics."""
98
        raise NotImplementedError
99
100
    @classmethod
101
    def get_url_from_link(cls, link):
102
        """Get url corresponding to a link. Default implementation is similar to get_href."""
103
        return link['href']
104
105
    @classmethod
106
    def get_next_link(cls, last_soup):
107
        """Get link to next comic."""
108
        link = cls.get_navi_link(last_soup, True)
109
        cls.log("Next link is %s" % link)
110
        return link
111
112
    @classmethod
113
    def get_prev_link(cls, last_soup):
114
        """Get link to previous comic."""
115
        link = cls.get_navi_link(last_soup, False)
116
        cls.log("Prev link is %s" % link)
117
        return link
118
119
    @classmethod
120
    def get_next_comic(cls, last_comic):
121
        """Generic implementation of get_next_comic for navigable comics."""
122
        url = last_comic['url'] if last_comic else None
123
        cls.log("starting 'get_next_comic' from %s" % url)
124
        next_comic = \
125
            cls.get_next_link(get_soup_at_url(url)) \
126
            if url else \
127
            cls.get_first_comic_link()
128
        cls.log("next/first comic will be %s (url is %s)" % (str(next_comic), url))
129
        # cls.check_navigation(url)
130
        while next_comic:
131
            prev_url, url = url, cls.get_url_from_link(next_comic)
132
            if prev_url == url:
133
                cls.log("got same url %s" % url)
134
                break
135
            cls.log("about to get %s (%s)" % (url, str(next_comic)))
136
            soup = get_soup_at_url(url)
137
            comic = cls.get_comic_info(soup, next_comic)
138
            if comic is not None:
139
                assert 'url' not in comic
140
                comic['url'] = url
141
                yield comic
142
            next_comic = cls.get_next_link(soup)
143
            cls.log("next comic will be %s" % str(next_comic))
144
145
    @classmethod
146
    def check_first_link(cls):
147
        """Check that navigation to first comic seems to be working - for dev purposes."""
148
        cls.log("about to check first link")
149
        ok = True
150
        firstlink = cls.get_first_comic_link()
151
        if firstlink is None:
152
            print("From %s : no first link" % cls.url)
153
            ok = False
154
        else:
155
            firsturl = cls.get_url_from_link(firstlink)
156
            try:
157
                get_soup_at_url(firsturl)
158
            except urllib.error.HTTPError:
159
                print("From %s : invalid first url" % cls.url)
160
                ok = False
161
        cls.log("checked first link -> returned %d" % ok)
162
        return ok
163
164
    @classmethod
165
    def check_prev_next_links(cls, url):
166
        """Check that navigation to prev/next from a given URL seems to be working - for dev purposes."""
167
        cls.log("about to check prev/next from %s" % url)
168
        ok = True
169
        if url is None:
170
            prevlink, nextlink = None, None
171
        else:
172
            soup = get_soup_at_url(url)
173
            prevlink, nextlink = cls.get_prev_link(soup), cls.get_next_link(soup)
174
        if prevlink is None and nextlink is None:
175
            print("From %s : no previous nor next" % url)
176
            ok = False
177
        else:
178
            if prevlink:
179
                prevurl = cls.get_url_from_link(prevlink)
180
                prevsoup = get_soup_at_url(prevurl)
181
                prevnextlink = cls.get_next_link(prevsoup)
182
                prevnext = cls.get_url_from_link(prevnextlink) if prevnextlink is not None else "NO URL"
183
                if prevnext != url:
184
                    print("From %s, going backward then forward leads to %s" % (url, prevnext))
185
                    ok = False
186
            if nextlink:
187
                nexturl = cls.get_url_from_link(nextlink)
188
                if nexturl != url:
189
                    nextsoup = get_soup_at_url(nexturl)
190
                    nextprevlink = cls.get_prev_link(nextsoup)
191
                    nextprev = cls.get_url_from_link(nextprevlink) if nextprevlink is not None else "NO URL"
192
                    if nextprev != url:
193
                        print("From %s, going forward then backward leads to %s" % (url, nextprev))
194
                        ok = False
195
        cls.log("checked prev/next from %s -> returned %d" % (url, ok))
196
        return ok
197
198
    @classmethod
199
    def check_navigation(cls, url):
200
        """Check that navigation functions seem to be working - for dev purposes."""
201
        cls.log("about to check navigation from %s" % url)
202
        first = cls.check_first_link()
203
        prevnext = cls.check_prev_next_links(url)
204
        ok = first and prevnext
205
        cls.log("checked navigation from %s -> returned %d" % (url, ok))
206
        return ok
207
208
209
class GenericListableComic(GenericComic):
210
    """Generic class for "listable" comics : with a list of comics (aka 'archive')
211
212
    The method `get_next_comic` methods is implemented in terms of new
213
    more specialized methods to be implemented/overridden:
214
        - get_archive_elements
215
        - get_url_from_archive_element
216
        - get_comic_info
217
    """
218
    _categories = ('LISTABLE', )
219
220
    @classmethod
221
    def get_archive_elements(cls):
222
        """Get the archive elements (iterable)."""
223
        raise NotImplementedError
224
225
    @classmethod
226
    def get_url_from_archive_element(cls, archive_elt):
227
        """Get url corresponding to an archive element."""
228
        raise NotImplementedError
229
230
    @classmethod
231
    def get_comic_info(cls, soup, archive_elt):
232
        """Get information about a particular comics."""
233
        raise NotImplementedError
234
235
    @classmethod
236
    def get_next_comic(cls, last_comic):
237
        """Generic implementation of get_next_comic for listable comics."""
238
        waiting_for_url = last_comic['url'] if last_comic else None
239
        archive_elts = list(cls.get_archive_elements())
240
        for archive_elt in archive_elts:
241
            url = cls.get_url_from_archive_element(archive_elt)
242
            cls.log("considering %s" % url)
243
            if waiting_for_url is None:
244
                cls.log("about to get %s (%s)" % (url, str(archive_elt)))
245
                soup = get_soup_at_url(url)
246
                comic = cls.get_comic_info(soup, archive_elt)
247
                if comic is not None:
248
                    assert 'url' not in comic
249
                    comic['url'] = url
250
                    yield comic
251
            elif waiting_for_url == url:
252
                waiting_for_url = None
253
        if waiting_for_url is not None:
254
            print("Did not find %s in the %d comics: there might be a problem" %
255
                  (waiting_for_url, len(archive_elts)))
256
257
# Helper functions corresponding to get_first_comic_link/get_navi_link
258
259
260
@classmethod
261
def get_link_rel_next(cls, last_soup, next_):
262
    """Implementation of get_navi_link."""
263
    return last_soup.find('link', rel='next' if next_ else 'prev')
264
265
266
@classmethod
267
def get_a_rel_next(cls, last_soup, next_):
268
    """Implementation of get_navi_link."""
269
    return last_soup.find('a', rel='next' if next_ else 'prev')
270
271
272
@classmethod
273
def get_a_navi_navinext(cls, last_soup, next_):
274
    """Implementation of get_navi_link."""
275
    return last_soup.find('a', class_='navi navi-next' if next_ else 'navi navi-prev')
276
277
278
@classmethod
279
def get_a_navi_comicnavnext_navinext(cls, last_soup, next_):
280
    """Implementation of get_navi_link."""
281
    return last_soup.find('a', class_='navi comic-nav-next navi-next' if next_ else 'navi comic-nav-previous navi-prev')
282
283
284
@classmethod
285
def get_a_comicnavbase_comicnavnext(cls, last_soup, next_):
286
    """Implementation of get_navi_link."""
287
    return last_soup.find('a', class_='comic-nav-base comic-nav-next' if next_ else 'comic-nav-base comic-nav-previous')
288
289
290
@classmethod
291
def get_a_navi_navifirst(cls):
292
    """Implementation of get_first_comic_link."""
293
    return get_soup_at_url(cls.url).find('a', class_='navi navi-first')
294
295
296
@classmethod
297
def get_div_navfirst_a(cls):
298
    """Implementation of get_first_comic_link."""
299
    return get_soup_at_url(cls.url).find('div', class_="nav-first").find('a')
300
301
302
@classmethod
303
def get_a_comicnavbase_comicnavfirst(cls):
304
    """Implementation of get_first_comic_link."""
305
    return get_soup_at_url(cls.url).find('a', class_='comic-nav-base comic-nav-first')
306
307
308
@classmethod
309
def simulate_first_link(cls):
310
    """Implementation of get_first_comic_link creating a link-like object from
311
    an URL provided by the class.
312
313
    Note: The first URL can easily be found using :
314
    `get_first_comic_link = navigate_to_first_comic`.
315
    """
316
    return {'href': cls.first_url}
317
318
319
@classmethod
320
def navigate_to_first_comic(cls):
321
    """Implementation of get_first_comic_link navigating from a user provided
322
    URL to the first comic.
323
324
    Sometimes, the first comic cannot be reached directly so to start
325
    from the first comic one has to go to the previous comic until
326
    there is no previous comics. Once this URL is reached, it
327
    is better to hardcode it but for development purposes, it
328
    is convenient to have an automatic way to find it.
329
330
    Then, the URL found can easily be used via `simulate_first_link`.
331
    """
332
    try:
333
        url = cls.first_url
334
    except AttributeError:
335
        url = input("Get starting URL: ")
336
    print(url)
337
    comic = cls.get_prev_link(get_soup_at_url(url))
338
    while comic:
339
        url = cls.get_url_from_link(comic)
340
        print(url)
341
        comic = cls.get_prev_link(get_soup_at_url(url))
342
    return {'href': url}
343
344
345
class GenericEmptyComic(GenericComic):
346
    """Generic class for comics where nothing is to be done.
347
348
    It can be useful to deactivate temporarily comics that do not work
349
    properly by replacing `def MyComic(GenericWhateverComic)` with
350
    `def MyComic(GenericEmptyComic, GenericWhateverComic)`."""
351
    _categories = ('EMPTY', )
352
353
    @classmethod
354
    def get_next_comic(cls, last_comic):
355
        """Implementation of get_next_comic returning no comics."""
356
        cls.log("comic is considered as empty - returning no comic")
357 View Code Duplication
        return []
358
359
360
class ExtraFabulousComics(GenericNavigableComic):
361
    """Class to retrieve Extra Fabulous Comics."""
362
    name = 'efc'
363
    long_name = 'Extra Fabulous Comics'
364
    url = 'http://extrafabulouscomics.com'
365
    get_first_comic_link = get_a_navi_navifirst
366
    get_navi_link = get_link_rel_next
367
368
    @classmethod
369
    def get_comic_info(cls, soup, link):
370
        """Get information about a particular comics."""
371
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
372
        imgs = soup.find_all('img', src=img_src_re)
373
        title = soup.find('meta', property='og:title')['content']
374
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
375
        day = string_to_date(date_str, "%Y-%m-%d")
376
        return {
377
            'title': title,
378
            'img': [i['src'] for i in imgs],
379
            'month': day.month,
380
            'year': day.year,
381
            'day': day.day,
382
            'prefix': title + '-'
383
        }
384
385
386
class GenericLeMondeBlog(GenericNavigableComic):
387
    """Generic class to retrieve comics from Le Monde blogs."""
388
    _categories = ('LEMONDE', 'FRANCAIS')
389
    get_navi_link = get_link_rel_next
390
    get_first_comic_link = simulate_first_link
391
    first_url = NotImplemented
392
393
    @classmethod
394
    def get_comic_info(cls, soup, link):
395
        """Get information about a particular comics."""
396
        url2 = soup.find('link', rel='shortlink')['href']
397
        title = soup.find('meta', property='og:title')['content']
398
        date_str = soup.find("span", class_="entry-date").string
399
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
400
        imgs = soup.find_all('meta', property='og:image')
401
        return {
402
            'title': title,
403
            'url2': url2,
404
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
405
            'month': day.month,
406
            'year': day.year,
407
            'day': day.day,
408
        }
409
410
411
class ZepWorld(GenericLeMondeBlog):
412
    """Class to retrieve Zep World comics."""
413
    name = "zep"
414
    long_name = "Zep World"
415
    url = "http://zepworld.blog.lemonde.fr"
416
    first_url = "http://zepworld.blog.lemonde.fr/2014/10/31/bientot-le-blog-de-zep/"
417
418
419
class Vidberg(GenericLeMondeBlog):
420
    """Class to retrieve Vidberg comics."""
421
    name = 'vidberg'
422
    long_name = "Vidberg - l'actu en patates"
423
    url = "http://vidberg.blog.lemonde.fr"
424
    # Not the first but I didn't find an efficient way to retrieve it
425
    first_url = "http://vidberg.blog.lemonde.fr/2012/02/09/revue-de-campagne-la-campagne-du-modem-semballe/"
426
427
428
class Plantu(GenericLeMondeBlog):
429
    """Class to retrieve Plantu comics."""
430
    name = 'plantu'
431
    long_name = "Plantu"
432
    url = "http://plantu.blog.lemonde.fr"
433
    first_url = "http://plantu.blog.lemonde.fr/2014/10/28/stress-test-a-bruxelles/"
434
435
436
class XavierGorce(GenericLeMondeBlog):
437
    """Class to retrieve Xavier Gorce comics."""
438
    name = 'gorce'
439
    long_name = "Xavier Gorce"
440
    url = "http://xaviergorce.blog.lemonde.fr"
441
    first_url = "http://xaviergorce.blog.lemonde.fr/2015/01/09/distinction/"
442
443
444
class CartooningForPeace(GenericLeMondeBlog):
445
    """Class to retrieve Cartooning For Peace comics."""
446
    name = 'forpeace'
447
    long_name = "Cartooning For Peace"
448
    url = "http://cartooningforpeace.blog.lemonde.fr"
449
    first_url = "http://cartooningforpeace.blog.lemonde.fr/2014/12/15/bado/"
450
451
452
class Aurel(GenericLeMondeBlog):
453
    """Class to retrieve Aurel comics."""
454
    name = 'aurel'
455
    long_name = "Aurel"
456
    url = "http://aurel.blog.lemonde.fr"
457
    first_url = "http://aurel.blog.lemonde.fr/2014/09/29/le-senat-repasse-a-droite/"
458
459
460
class LesCulottees(GenericLeMondeBlog):
461
    """Class to retrieve Les Culottees comics."""
462
    name = 'culottees'
463
    long_name = 'Les Culottees'
464
    url = "http://lesculottees.blog.lemonde.fr"
465
    first_url = "http://lesculottees.blog.lemonde.fr/2016/01/11/clementine-delait-femme-a-barbe/"
466
467
468
class UneAnneeAuLycee(GenericLeMondeBlog):
469
    """Class to retrieve Une Annee Au Lycee comics."""
470
    name = 'lycee'
471
    long_name = 'Une Annee au Lycee'
472
    url = 'http://uneanneeaulycee.blog.lemonde.fr'
473
    first_url = "http://uneanneeaulycee.blog.lemonde.fr/2016/06/13/la-semaine-du-bac-est-arrivee/"
474
475
476 View Code Duplication
class Rall(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
477
    """Class to retrieve Ted Rall comics."""
478
    # Also on http://www.gocomics.com/tedrall
479
    name = 'rall'
480
    long_name = "Ted Rall"
481
    url = "http://rall.com/comic"
482
    _categories = ('RALL', )
483
    get_navi_link = get_link_rel_next
484
    get_first_comic_link = simulate_first_link
485
    # Not the first but I didn't find an efficient way to retrieve it
486
    first_url = "http://rall.com/2014/01/30/los-angeles-times-cartoon-well-miss-those-california-flowers"
487
488
    @classmethod
489
    def get_comic_info(cls, soup, link):
490
        """Get information about a particular comics."""
491
        title = soup.find('meta', property='og:title')['content']
492
        author = soup.find("span", class_="author vcard").find("a").string
493
        date_str = soup.find("span", class_="entry-date").string
494
        day = string_to_date(date_str, "%B %d, %Y")
495
        desc = soup.find('meta', property='og:description')['content']
496
        imgs = soup.find('div', class_='entry-content').find_all('img')
497
        imgs = imgs[:-7]  # remove social media buttons
498
        return {
499
            'title': title,
500
            'author': author,
501
            'month': day.month,
502
            'year': day.year,
503
            'day': day.day,
504
            'description': desc,
505
            'img': [i['src'] for i in imgs],
506
        }
507
508
509
class Dilem(GenericNavigableComic):
510
    """Class to retrieve Ali Dilem comics."""
511
    name = 'dilem'
512
    long_name = 'Ali Dilem'
513
    url = 'http://information.tv5monde.com/dilem'
514
    _categories = ('FRANCAIS', )
515
    get_url_from_link = join_cls_url_to_href
516
    get_first_comic_link = simulate_first_link
517
    first_url = "http://information.tv5monde.com/dilem/2004-06-26"
518
519
    @classmethod
520
    def get_navi_link(cls, last_soup, next_):
521
        """Get link to next or previous comic."""
522
        # prev is next / next is prev
523 View Code Duplication
        li = last_soup.find('li', class_='prev' if next_ else 'next')
524
        return li.find('a') if li else None
525
526
    @classmethod
527
    def get_comic_info(cls, soup, link):
528
        """Get information about a particular comics."""
529
        short_url = soup.find('link', rel='shortlink')['href']
530
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
531
        imgs = soup.find_all('meta', property='og:image')
532
        date_str = soup.find('span', property='dc:date')['content']
533
        date_str = date_str[:10]
534
        day = string_to_date(date_str, "%Y-%m-%d")
535
        return {
536
            'short_url': short_url,
537
            'title': title,
538
            'img': [i['content'] for i in imgs],
539
            'day': day.day,
540
            'month': day.month,
541
            'year': day.year,
542
        }
543
544
545
class SpaceAvalanche(GenericNavigableComic):
546
    """Class to retrieve Space Avalanche comics."""
547
    name = 'avalanche'
548
    long_name = 'Space Avalanche'
549
    url = 'http://www.spaceavalanche.com'
550
    get_navi_link = get_link_rel_next
551
552
    @classmethod
553
    def get_first_comic_link(cls):
554
        """Get link to first comics."""
555
        return {'href': "http://www.spaceavalanche.com/2009/02/02/irish-sea/", 'title': "Irish Sea"}
556
557
    @classmethod
558
    def get_comic_info(cls, soup, link):
559
        """Get information about a particular comics."""
560
        url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
561
        title = link['title']
562
        url = cls.get_url_from_link(link)
563
        year, month, day = [int(s)
564
                            for s in url_date_re.match(url).groups()]
565
        imgs = soup.find("div", class_="entry").find_all("img")
566
        return {
567
            'title': title,
568
            'day': day,
569
            'month': month,
570
            'year': year,
571
            'img': [i['src'] for i in imgs],
572
        }
573
574
575
class ZenPencils(GenericEmptyComic, GenericNavigableComic):
576
    """Class to retrieve ZenPencils comics."""
577
    # Also on http://zenpencils.tumblr.com
578
    # Also on http://www.gocomics.com/zen-pencils
579
    name = 'zenpencils'
580
    long_name = 'Zen Pencils'
581
    url = 'http://zenpencils.com'
582
    _categories = ('ZENPENCILS', )
583
    get_navi_link = get_link_rel_next
584
    get_first_comic_link = simulate_first_link
585
    first_url = "http://zenpencils.com/comic/1-ralph-waldo-emerson-make-them-cry/"
586
587
    @classmethod
588
    def get_comic_info(cls, soup, link):
589
        """Get information about a particular comics."""
590
        imgs = soup.find('div', id='comic').find_all('img')
591
        # imgs2 = soup.find_all('meta', property='og:image')
592
        post = soup.find('div', class_='post-content')
593
        author = post.find("span", class_="post-author").find("a").string
594
        title = soup.find('meta', property='og:title')['content']
595
        date_str = post.find('span', class_='post-date').string
596
        day = string_to_date(date_str, "%B %d, %Y")
597
        assert imgs
598
        assert all(i['alt'] == i['title'] for i in imgs)
599
        assert all(i['alt'] in (title, "") for i in imgs)
600
        desc = soup.find('meta', property='og:description')['content']
601
        return {
602
            'title': title,
603
            'description': desc,
604
            'author': author,
605
            'day': day.day,
606
            'month': day.month,
607
            'year': day.year,
608
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
609
        }
610
611
612
class ItsTheTie(GenericEmptyComic, GenericNavigableComic):
613
    """Class to retrieve It's the tie comics."""
614
    # Also on http://itsthetie.tumblr.com
615
    # Also on https://tapastic.com/series/itsthetie
616
    name = 'tie'
617
    long_name = "It's the tie"
618
    url = "http://itsthetie.com"
619
    _categories = ('TIE', )
620
    get_first_comic_link = get_div_navfirst_a
621
    get_navi_link = get_a_rel_next
622
623
    @classmethod
624
    def get_comic_info(cls, soup, link):
625
        """Get information about a particular comics."""
626
        title = soup.find('h1', class_='comic-title').find('a').string
627
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
628
        day = string_to_date(date_str, "%B %d, %Y")
629
        # Bonus images may or may not be in meta og:image.
630
        imgs = soup.find_all('meta', property='og:image')
631
        imgs_src = [i['content'] for i in imgs]
632
        bonus = soup.find_all('img', attrs={'data-oversrc': True})
633
        bonus_src = [b['data-oversrc'] for b in bonus]
634
        all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src]
635
        all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")]
636
        tag_meta = soup.find('meta', property='article:tag')
637
        tags = tag_meta['content'] if tag_meta else ""
638
        return {
639
            'title': title,
640
            'month': day.month,
641
            'year': day.year,
642
            'day': day.day,
643
            'img': all_imgs_src,
644
            'tags': tags,
645
        }
646
647
648 View Code Duplication
class PenelopeBagieu(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
649
    """Class to retrieve comics from Penelope Bagieu's blog."""
650
    name = 'bagieu'
651
    long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
652
    url = 'http://www.penelope-jolicoeur.com'
653
    _categories = ('FRANCAIS', )
654
    get_navi_link = get_link_rel_next
655
    get_first_comic_link = simulate_first_link
656
    first_url = 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'
657
658
    @classmethod
659
    def get_comic_info(cls, soup, link):
660
        """Get information about a particular comics."""
661
        date_str = soup.find('h2', class_='date-header').string
662
        day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
663
        imgs = soup.find('div', class_='entry-body').find_all('img')
664
        title = soup.find('h3', class_='entry-header').string
665
        return {
666
            'title': title,
667
            'img': [i['src'] for i in imgs],
668
            'month': day.month,
669
            'year': day.year,
670
            'day': day.day,
671
        }
672
673
674 View Code Duplication
class OneOneOneOneComic(GenericEmptyComic, GenericNavigableComic):
675
    """Class to retrieve 1111 Comics."""
676
    # Also on http://comics1111.tumblr.com
677
    # Also on https://tapastic.com/series/1111-Comics
678
    name = '1111'
679
    long_name = '1111 Comics'
680
    url = 'http://www.1111comics.me'
681
    _categories = ('ONEONEONEONE', )
682
    get_first_comic_link = get_div_navfirst_a
683
    get_navi_link = get_link_rel_next
684
685
    @classmethod
686
    def get_comic_info(cls, soup, link):
687
        """Get information about a particular comics."""
688
        title = soup.find('h1', class_='comic-title').find('a').string
689
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
690
        day = string_to_date(date_str, "%B %d, %Y")
691
        imgs = soup.find_all('meta', property='og:image')
692
        return {
693
            'title': title,
694
            'month': day.month,
695
            'year': day.year,
696
            'day': day.day,
697
            'img': [i['content'] for i in imgs],
698
        }
699
700
701 View Code Duplication
class AngryAtNothing(GenericEmptyComic, GenericNavigableComic):
702
    """Class to retrieve Angry at Nothing comics."""
703
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
704
    # Also on http://angryatnothing.tumblr.com
705
    name = 'angry'
706
    long_name = 'Angry At Nothing'
707
    url = 'http://www.angryatnothing.net'
708
    get_first_comic_link = get_div_navfirst_a
709
    get_navi_link = get_a_rel_next
710
711
    @classmethod
712
    def get_comic_info(cls, soup, link):
713
        """Get information about a particular comics."""
714
        title = soup.find('h1', class_='comic-title').find('a').string
715
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
716
        day = string_to_date(date_str, "%B %d, %Y")
717
        imgs = soup.find_all('meta', property='og:image')
718
        return {
719
            'title': title,
720
            'month': day.month,
721
            'year': day.year,
722
            'day': day.day,
723
            'img': [i['content'] for i in imgs],
724
        }
725
726
727
class NeDroid(GenericNavigableComic):
728
    """Class to retrieve NeDroid comics."""
729
    name = 'nedroid'
730 View Code Duplication
    long_name = 'NeDroid'
731
    url = 'http://nedroid.com'
732
    get_first_comic_link = get_div_navfirst_a
733
    get_navi_link = get_link_rel_next
734
    get_url_from_link = join_cls_url_to_href
735
736
    @classmethod
737
    def get_comic_info(cls, soup, link):
738
        """Get information about a particular comics."""
739
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
740
        short_url = cls.get_url_from_link(soup.find('link', rel='shortlink'))
741
        num = int(short_url_re.match(short_url).groups()[0])
742
        imgs = soup.find('div', id='comic').find_all('img')
743
        assert len(imgs) == 1
744
        title = imgs[0]['alt']
745
        title2 = imgs[0]['title']
746
        return {
747
            'short_url': short_url,
748
            'title': title,
749
            'title2': title2,
750
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
751
            'num': num,
752
        }
753
754
755
class Garfield(GenericNavigableComic):
756
    """Class to retrieve Garfield comics."""
757
    # Also on http://www.gocomics.com/garfield
758
    name = 'garfield'
759
    long_name = 'Garfield'
760
    url = 'https://garfield.com'
761
    _categories = ('GARFIELD', )
762
    get_first_comic_link = simulate_first_link
763
    first_url = 'https://garfield.com/comic/1978/06/19'
764
765
    @classmethod
766
    def get_navi_link(cls, last_soup, next_):
767
        """Get link to next or previous comic."""
768
        return last_soup.find('a', class_='comic-arrow-right' if next_ else 'comic-arrow-left')
769
770
    @classmethod
771
    def get_comic_info(cls, soup, link):
772
        """Get information about a particular comics."""
773
        url = cls.get_url_from_link(link)
774
        date_re = re.compile('^%s/comic/([0-9]*)/([0-9]*)/([0-9]*)' % cls.url)
775
        year, month, day = [int(s) for s in date_re.match(url).groups()]
776
        imgs = soup.find('div', class_='comic-display').find_all('img', class_='img-responsive')
777
        return {
778
            'month': month,
779
            'year': year,
780
            'day': day,
781
            'img': [i['src'] for i in imgs],
782 View Code Duplication
        }
783
784
785
class Dilbert(GenericNavigableComic):
786
    """Class to retrieve Dilbert comics."""
787
    # Also on http://www.gocomics.com/dilbert-classics
788
    name = 'dilbert'
789
    long_name = 'Dilbert'
790
    url = 'http://dilbert.com'
791
    get_url_from_link = join_cls_url_to_href
792
    get_first_comic_link = simulate_first_link
793
    first_url = 'http://dilbert.com/strip/1989-04-16'
794
795
    @classmethod
796
    def get_navi_link(cls, last_soup, next_):
797
        """Get link to next or previous comic."""
798
        link = last_soup.find('div', class_='nav-comic nav-right' if next_ else 'nav-comic nav-left')
799
        return link.find('a') if link else None
800
801
    @classmethod
802
    def get_comic_info(cls, soup, link):
803
        """Get information about a particular comics."""
804
        title = soup.find('meta', property='og:title')['content']
805
        imgs = soup.find_all('meta', property='og:image')
806
        desc = soup.find('meta', property='og:description')['content']
807
        date_str = soup.find('meta', property='article:publish_date')['content']
808
        day = string_to_date(date_str, "%B %d, %Y")
809
        author = soup.find('meta', property='article:author')['content']
810
        tags = soup.find('meta', property='article:tag')['content']
811
        return {
812
            'title': title,
813
            'description': desc,
814
            'img': [i['content'] for i in imgs],
815
            'author': author,
816
            'tags': tags,
817
            'day': day.day,
818
            'month': day.month,
819
            'year': day.year
820
        }
821
822
823
class VictimsOfCircumsolar(GenericEmptyComic, GenericNavigableComic):
824
    """Class to retrieve VictimsOfCircumsolar comics."""
825
    # Also on https://victimsofcomics.tumblr.com
826
    name = 'circumsolar'
827
    long_name = 'Victims Of Circumsolar'
828
    url = 'http://www.victimsofcircumsolar.com'
829
    get_navi_link = get_a_navi_comicnavnext_navinext
830
    get_first_comic_link = simulate_first_link
831
    first_url = 'http://www.victimsofcircumsolar.com/comic/modern-addiction'
832
833
    @classmethod
834
    def get_comic_info(cls, soup, link):
835
        """Get information about a particular comics."""
836
        # Date is on the archive page
837
        title = soup.find_all('meta', property='og:title')[-1]['content']
838
        desc = soup.find_all('meta', property='og:description')[-1]['content']
839
        imgs = soup.find('div', id='comic').find_all('img')
840
        assert all(i['title'] == i['alt'] == title for i in imgs)
841
        return {
842
            'title': title,
843
            'description': desc,
844
            'img': [i['src'] for i in imgs],
845
        }
846
847
848
class ThreeWordPhrase(GenericNavigableComic):
849
    """Class to retrieve Three Word Phrase comics."""
850
    # Also on http://www.threewordphrase.tumblr.com
851
    name = 'threeword'
852
    long_name = 'Three Word Phrase'
853
    url = 'http://threewordphrase.com'
854
    get_url_from_link = join_cls_url_to_href
855
856
    @classmethod
857
    def get_first_comic_link(cls):
858
        """Get link to first comics."""
859
        return get_soup_at_url(cls.url).find('img', src='/firstlink.gif').parent
860
861
    @classmethod
862
    def get_navi_link(cls, last_soup, next_):
863
        """Get link to next or previous comic."""
864
        link = last_soup.find('img', src='/nextlink.gif' if next_ else '/prevlink.gif').parent
865
        return None if link.get('href') is None else link
866
867
    @classmethod
868
    def get_comic_info(cls, soup, link):
869
        """Get information about a particular comics."""
870
        title = soup.find('title')
871
        imgs = [img for img in soup.find_all('img')
872
                if not img['src'].endswith(
873
                    ('link.gif', '32.png', 'twpbookad.jpg',
874
                     'merchad.jpg', 'header.gif', 'tipjar.jpg'))]
875
        return {
876
            'title': title.string if title else None,
877
            'title2': '  '.join(img.get('alt') for img in imgs if img.get('alt')),
878
            'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs],
879
        }
880
881
882
class DeadlyPanel(GenericEmptyComic, GenericNavigableComic):
883
    """Class to retrieve Deadly Panel comics."""
884
    # Also on https://tapastic.com/series/deadlypanel
885
    # Also on https://deadlypanel.tumblr.com
886
    name = 'deadly'
887
    long_name = 'Deadly Panel'
888
    url = 'http://www.deadlypanel.com'
889
    get_first_comic_link = get_a_navi_navifirst
890
    get_navi_link = get_a_navi_comicnavnext_navinext
891
892
    @classmethod
893
    def get_comic_info(cls, soup, link):
894
        """Get information about a particular comics."""
895
        imgs = soup.find('div', id='comic').find_all('img')
896
        assert all(i['alt'] == i['title'] for i in imgs)
897
        return {
898
            'img': [i['src'] for i in imgs],
899
        }
900
901
902 View Code Duplication
class TheGentlemanArmchair(GenericNavigableComic):
903
    """Class to retrieve The Gentleman Armchair comics."""
904
    name = 'gentlemanarmchair'
905
    long_name = 'The Gentleman Armchair'
906
    url = 'http://thegentlemansarmchair.com'
907
    get_first_comic_link = get_a_navi_navifirst
908
    get_navi_link = get_link_rel_next
909
910
    @classmethod
911
    def get_comic_info(cls, soup, link):
912
        """Get information about a particular comics."""
913
        title = soup.find('h2', class_='post-title').string
914
        author = soup.find("span", class_="post-author").find("a").string
915
        date_str = soup.find('span', class_='post-date').string
916
        day = string_to_date(date_str, "%B %d, %Y")
917
        imgs = soup.find('div', id='comic').find_all('img')
918
        return {
919
            'img': [i['src'] for i in imgs],
920
            'title': title,
921
            'author': author,
922
            'month': day.month,
923
            'year': day.year,
924
            'day': day.day,
925
        }
926
927
928 View Code Duplication
class ImogenQuest(GenericNavigableComic):
929
    """Class to retrieve Imogen Quest comics."""
930
    # Also on http://imoquest.tumblr.com
931
    name = 'imogen'
932
    long_name = 'Imogen Quest'
933
    url = 'http://imogenquest.net'
934
    get_first_comic_link = get_div_navfirst_a
935
    get_navi_link = get_a_rel_next
936
937
    @classmethod
938
    def get_comic_info(cls, soup, link):
939
        """Get information about a particular comics."""
940
        title = soup.find('h2', class_='post-title').string
941
        author = soup.find("span", class_="post-author").find("a").string
942
        date_str = soup.find('span', class_='post-date').string
943
        day = string_to_date(date_str, '%B %d, %Y')
944
        imgs = soup.find('div', class_='comicpane').find_all('img')
945
        assert all(i['alt'] == i['title'] for i in imgs)
946
        title2 = imgs[0]['title']
947
        return {
948
            'day': day.day,
949
            'month': day.month,
950
            'year': day.year,
951
            'img': [i['src'] for i in imgs],
952
            'title': title,
953
            'title2': title2,
954
            'author': author,
955
        }
956
957
958
class MyExtraLife(GenericNavigableComic):
959
    """Class to retrieve My Extra Life comics."""
960
    name = 'extralife'
961
    long_name = 'My Extra Life'
962
    url = 'http://www.myextralife.com'
963
    get_navi_link = get_link_rel_next
964
965
    @classmethod
966
    def get_first_comic_link(cls):
967
        """Get link to first comics."""
968
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
969
970
    @classmethod
971
    def get_comic_info(cls, soup, link):
972
        """Get information about a particular comics."""
973
        title = soup.find("h1", class_="comic_title").string
974
        date_str = soup.find("span", class_="comic_date").string
975
        day = string_to_date(date_str, "%B %d, %Y")
976
        imgs = soup.find_all("img", class_="comic")
977
        assert all(i['alt'] == i['title'] == title for i in imgs)
978
        return {
979
            'title': title,
980
            'img': [i['src'] for i in imgs if i["src"]],
981
            'day': day.day,
982
            'month': day.month,
983
            'year': day.year
984
        }
985
986
987
class SaturdayMorningBreakfastCereal(GenericNavigableComic):
988
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
989
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
990
    # Also on http://smbc-comics.tumblr.com
991
    name = 'smbc'
992
    long_name = 'Saturday Morning Breakfast Cereal'
993
    url = 'http://www.smbc-comics.com'
994
    _categories = ('SMBC', )
995
    get_navi_link = get_a_rel_next
996
997
    @classmethod
998
    def get_first_comic_link(cls):
999
        """Get link to first comics."""
1000
        return get_soup_at_url(cls.url).find('a', rel='start')
1001
1002
    @classmethod
1003
    def get_comic_info(cls, soup, link):
1004
        """Get information about a particular comics."""
1005
        image1 = soup.find('img', id='cc-comic')
1006
        image_url1 = image1['src']
1007
        aftercomic = soup.find('div', id='aftercomic')
1008
        image_url2 = aftercomic.find('img')['src'] if aftercomic else ''
1009
        imgs = [image_url1] + ([image_url2] if image_url2 else [])
1010
        date_str = soup.find('div', class_='cc-publishtime').contents[0]
1011
        day = string_to_date(date_str, "%B %d, %Y")
1012
        return {
1013
            'title': image1['title'],
1014
            'img': [urljoin_wrapper(cls.url, i) for i in imgs],
1015
            'day': day.day,
1016
            'month': day.month,
1017
            'year': day.year
1018
        }
1019
1020
1021
class PerryBibleFellowship(GenericListableComic):  # Is now navigable too
1022
    """Class to retrieve Perry Bible Fellowship comics."""
1023
    name = 'pbf'
1024
    long_name = 'Perry Bible Fellowship'
1025
    url = 'http://pbfcomics.com'
1026
    get_url_from_archive_element = join_cls_url_to_href
1027
1028
    @classmethod
1029
    def get_archive_elements(cls):
1030
        soup = get_soup_at_url(cls.url)
1031
        thumbnails = soup.find('div', id='all_thumbnails')
1032
        return reversed(thumbnails.find_all('a'))
1033
1034
    @classmethod
1035
    def get_comic_info(cls, soup, link):
1036
        """Get information about a particular comics."""
1037
        name = soup.find('meta', property='og:title')['content']
1038
        imgs = soup.find_all('meta', property='og:image')
1039
        assert len(imgs) == 1
1040
        return {
1041
            'name': name,
1042
            'img': [i['content'] for i in imgs],
1043 View Code Duplication
        }
1044
1045
1046
class Mercworks(GenericNavigableComic):
1047
    """Class to retrieve Mercworks comics."""
1048
    # Also on http://mercworks.tumblr.com
1049
    name = 'mercworks'
1050
    long_name = 'Mercworks'
1051
    url = 'http://mercworks.net'
1052
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1053
    get_navi_link = get_link_rel_next
1054
1055
    @classmethod
1056
    def get_comic_info(cls, soup, link):
1057
        """Get information about a particular comics."""
1058
        title = soup.find('meta', property='og:title')['content']
1059
        metadesc = soup.find('meta', property='og:description')
1060
        desc = metadesc['content'] if metadesc else ""
1061
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
1062
        day = string_to_date(date_str, "%Y-%m-%d")
1063
        imgs = soup.find_all('meta', property='og:image')
1064
        return {
1065
            'img': [i['content'] for i in imgs],
1066
            'title': title,
1067
            'desc': desc,
1068
            'day': day.day,
1069
            'month': day.month,
1070
            'year': day.year
1071
        }
1072
1073
1074
class BerkeleyMews(GenericListableComic):
1075
    """Class to retrieve Berkeley Mews comics."""
1076
    # Also on http://mews.tumblr.com
1077
    # Also on http://www.gocomics.com/berkeley-mews
1078
    name = 'berkeley'
1079
    long_name = 'Berkeley Mews'
1080
    url = 'http://www.berkeleymews.com'
1081
    _categories = ('BERKELEY', )
1082
    get_url_from_archive_element = get_href
1083
    comic_num_re = re.compile('%s/\\?p=([0-9]*)$' % url)
1084
1085
    @classmethod
1086
    def get_archive_elements(cls):
1087
        archive_url = urljoin_wrapper(cls.url, "?page_id=2")
1088
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_num_re))
1089
1090
    @classmethod
1091
    def get_comic_info(cls, soup, link):
1092
        """Get information about a particular comics."""
1093
        comic_date_re = re.compile('.*/([0-9]*)-([0-9]*)-([0-9]*)-.*')
1094
        url = cls.get_url_from_archive_element(link)
1095
        num = int(cls.comic_num_re.match(url).groups()[0])
1096
        img = soup.find('div', id='comic').find('img')
1097
        assert all(i['alt'] == i['title'] for i in [img])
1098
        title2 = img['title']
1099
        img_url = img['src']
1100
        year, month, day = [int(s) for s in comic_date_re.match(img_url).groups()]
1101
        return {
1102
            'num': num,
1103
            'title': link.string,
1104
            'title2': title2,
1105
            'img': [img_url],
1106
            'year': year,
1107
            'month': month,
1108
            'day': day,
1109
        }
1110
1111
1112
class GenericBouletCorp(GenericNavigableComic):
1113
    """Generic class to retrieve BouletCorp comics in different languages."""
1114
    # Also on https://bouletcorp.tumblr.com
1115
    _categories = ('BOULET', )
1116
    get_navi_link = get_link_rel_next
1117
1118
    @classmethod
1119
    def get_first_comic_link(cls):
1120
        """Get link to first comics."""
1121
        return get_soup_at_url(cls.url).find('div', id='centered_nav').find_all('a')[0]
1122
1123
    @classmethod
1124
    def get_comic_info(cls, soup, link):
1125
        """Get information about a particular comics."""
1126
        url = cls.get_url_from_link(link)
1127
        date_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/' % cls.url)
1128
        year, month, day = [int(s) for s in date_re.match(url).groups()]
1129
        imgs = soup.find('div', id='notes').find('div', class_='storycontent').find_all('img')
1130
        texts = '  '.join(t for t in (i.get('title') for i in imgs) if t)
1131
        title = soup.find('title').string
1132
        return {
1133
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs if i.get('src') is not None],
1134
            'title': title,
1135
            'texts': texts,
1136
            'year': year,
1137
            'month': month,
1138
            'day': day,
1139
        }
1140
1141
1142
class BouletCorp(GenericBouletCorp):
1143
    """Class to retrieve BouletCorp comics."""
1144
    name = 'boulet'
1145
    long_name = 'Boulet Corp'
1146
    url = 'http://www.bouletcorp.com'
1147
    _categories = ('FRANCAIS', )
1148
1149
1150
class BouletCorpEn(GenericBouletCorp):
1151
    """Class to retrieve EnglishBouletCorp comics."""
1152
    name = 'boulet_en'
1153
    long_name = 'Boulet Corp English'
1154
    url = 'http://english.bouletcorp.com'
1155
1156
1157 View Code Duplication
class AmazingSuperPowers(GenericNavigableComic):
1158
    """Class to retrieve Amazing Super Powers comics."""
1159
    name = 'asp'
1160
    long_name = 'Amazing Super Powers'
1161
    url = 'http://www.amazingsuperpowers.com'
1162
    get_first_comic_link = get_a_navi_navifirst
1163
    get_navi_link = get_a_navi_navinext
1164
1165
    @classmethod
1166
    def get_comic_info(cls, soup, link):
1167
        """Get information about a particular comics."""
1168
        author = soup.find("span", class_="post-author").find("a").string
1169
        date_str = soup.find('span', class_='post-date').string
1170
        day = string_to_date(date_str, "%B %d, %Y")
1171
        imgs = soup.find('div', id='comic').find_all('img')
1172
        title = ' '.join(i['title'] for i in imgs)
1173
        assert all(i['alt'] == i['title'] for i in imgs)
1174
        return {
1175
            'title': title,
1176
            'author': author,
1177
            'img': [img['src'] for img in imgs],
1178
            'day': day.day,
1179
            'month': day.month,
1180
            'year': day.year
1181
        }
1182
1183
1184
class ToonHole(GenericNavigableComic):
1185
    """Class to retrieve Toon Holes comics."""
1186
    # Also on http://tapastic.com/series/TOONHOLE
1187
    name = 'toonhole'
1188
    long_name = 'Toon Hole'
1189
    url = 'http://www.toonhole.com'
1190
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1191
    get_navi_link = get_link_rel_next
1192
1193
    @classmethod
1194
    def get_comic_info(cls, soup, link):
1195
        """Get information about a particular comics."""
1196
        short_url = soup.find('link', rel='shortlink')['href']
1197
        date_str = soup.find('div', class_='entry-meta').contents[0].strip()
1198
        day = string_to_date(date_str, "%B %d, %Y")
1199
        imgs = soup.find('div', id='comic').find_all('img')
1200
        if imgs:
1201
            img = imgs[0]
1202
            title = img['alt']
1203
            assert img['title'] == title
1204
        else:
1205
            title = ""
1206
        return {
1207
            'short_url': short_url,
1208
            'title': title,
1209
            'month': day.month,
1210
            'year': day.year,
1211
            'day': day.day,
1212
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs],
1213
        }
1214
1215
1216
class Channelate(GenericNavigableComic):
1217
    """Class to retrieve Channelate comics."""
1218
    name = 'channelate'
1219
    long_name = 'Channelate'
1220
    url = 'http://www.channelate.com'
1221
    get_first_comic_link = get_div_navfirst_a
1222
    get_navi_link = get_link_rel_next
1223
    get_url_from_link = join_cls_url_to_href
1224
1225
    @classmethod
1226
    def get_comic_info(cls, soup, link):
1227
        """Get information about a particular comics."""
1228
        author = soup.find("span", class_="post-author").find("a").string
1229
        date_str = soup.find('span', class_='post-date').string
1230
        day = string_to_date(date_str, '%Y/%m/%d')
1231
        title = soup.find('meta', property='og:title')['content']
1232
        post = soup.find('div', id='comic')
1233
        imgs = post.find_all('img') if post else []
1234
        extra_url = None
1235
        extra_div = soup.find('div', id='extrapanelbutton')
1236
        if extra_div:
1237
            extra_url = extra_div.find('a')['href']
1238
            extra_soup = get_soup_at_url(extra_url)
1239
            extra_imgs = extra_soup.find_all('img', class_='extrapanelimage')
1240
            imgs.extend(extra_imgs)
1241
        return {
1242
            'url_extra': extra_url,
1243
            'title': title,
1244
            'author': author,
1245
            'month': day.month,
1246
            'year': day.year,
1247
            'day': day.day,
1248
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1249
        }
1250
1251
1252
class CyanideAndHappiness(GenericNavigableComic):
1253
    """Class to retrieve Cyanide And Happiness comics."""
1254
    name = 'cyanide'
1255
    long_name = 'Cyanide and Happiness'
1256
    url = 'http://explosm.net'
1257
    _categories = ('NSFW', )
1258
    get_url_from_link = join_cls_url_to_href
1259
1260
    @classmethod
1261
    def get_first_comic_link(cls):
1262
        """Get link to first comics."""
1263
        return get_soup_at_url(cls.url).find('a', title='Oldest comic')
1264
1265
    @classmethod
1266
    def get_navi_link(cls, last_soup, next_):
1267
        """Get link to next or previous comic."""
1268
        link = last_soup.find('a', class_='next-comic' if next_ else 'previous-comic ')
1269
        return None if link.get('href') is None else link
1270
1271
    @classmethod
1272
    def get_comic_info(cls, soup, link):
1273
        """Get information about a particular comics."""
1274
        url2 = soup.find('meta', property='og:url')['content']
1275
        num = int(url2.split('/')[-2])
1276
        date_str = soup.find('h3').find('a').string
1277
        day = string_to_date(date_str, '%Y.%m.%d')
1278
        author = soup.find('small', class_="author-credit-name").string
1279
        assert author.startswith('by ')
1280
        author = author[3:]
1281
        imgs = soup.find_all('img', id='main-comic')
1282
        return {
1283
            'num': num,
1284
            'author': author,
1285
            'month': day.month,
1286
            'year': day.year,
1287
            'day': day.day,
1288
            'prefix': '%d-' % num,
1289
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i['src'])) for i in imgs]
1290
        }
1291
1292
1293
class MrLovenstein(GenericComic):
1294
    """Class to retrieve Mr Lovenstein comics."""
1295
    # Also on https://tapastic.com/series/MrLovenstein
1296
    name = 'mrlovenstein'
1297
    long_name = 'Mr. Lovenstein'
1298
    url = 'http://www.mrlovenstein.com'
1299
1300
    @classmethod
1301
    def get_next_comic(cls, last_comic):
1302
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1303
        # TODO: more info from http://www.mrlovenstein.com/archive
1304
        comic_num_re = re.compile('^/comic/([0-9]*)$')
1305
        nums = [int(comic_num_re.match(link['href']).groups()[0])
1306
                for link in get_soup_at_url(cls.url).find_all('a', href=comic_num_re)]
1307
        first, last = min(nums), max(nums)
1308
        if last_comic:
1309
            first = last_comic['num'] + 1
1310
        for num in range(first, last + 1):
1311
            url = urljoin_wrapper(cls.url, '/comic/%d' % num)
1312
            soup = get_soup_at_url(url)
1313
            imgs = list(
1314
                reversed(soup.find_all('img', src=re.compile('^/images/comics/'))))
1315
            description = soup.find('meta', attrs={'name': 'description'})['content']
1316
            yield {
1317
                'url': url,
1318
                'num': num,
1319
                'texts': '  '.join(t for t in (i.get('title') for i in imgs) if t),
1320
                'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1321
                'description': description,
1322
            }
1323
1324
1325
class DinosaurComics(GenericListableComic):
1326
    """Class to retrieve Dinosaur Comics comics."""
1327
    name = 'dinosaur'
1328
    long_name = 'Dinosaur Comics'
1329
    url = 'http://www.qwantz.com'
1330
    get_url_from_archive_element = get_href
1331
    comic_link_re = re.compile('^%s/index.php\\?comic=([0-9]*)$' % url)
1332
1333
    @classmethod
1334
    def get_archive_elements(cls):
1335
        archive_url = urljoin_wrapper(cls.url, 'archive.php')
1336
        # first link is random -> skip it
1337
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re)[1:])
1338
1339
    @classmethod
1340
    def get_comic_info(cls, soup, link):
1341
        """Get information about a particular comics."""
1342
        url = cls.get_url_from_archive_element(link)
1343
        num = int(cls.comic_link_re.match(url).groups()[0])
1344
        date_str = link.string
1345
        text = link.next_sibling.string
1346
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1347
        comic_img_re = re.compile('^%s/comics/' % cls.url)
1348
        img = soup.find('img', src=comic_img_re)
1349
        return {
1350
            'month': day.month,
1351
            'year': day.year,
1352
            'day': day.day,
1353
            'img': [img.get('src')],
1354
            'title': img.get('title'),
1355
            'text': text,
1356
            'num': num,
1357
        }
1358
1359
1360
class ButterSafe(GenericListableComic):
1361 View Code Duplication
    """Class to retrieve Butter Safe comics."""
1362
    name = 'butter'
1363
    long_name = 'ButterSafe'
1364
    url = 'http://buttersafe.com'
1365
    get_url_from_archive_element = get_href
1366
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)
1367
1368
    @classmethod
1369
    def get_archive_elements(cls):
1370
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1371
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
1372
1373
    @classmethod
1374
    def get_comic_info(cls, soup, link):
1375
        """Get information about a particular comics."""
1376
        url = cls.get_url_from_archive_element(link)
1377
        title = link.string
1378
        year, month, day = [int(s) for s in cls.comic_link_re.match(url).groups()]
1379
        img = soup.find('div', id='comic').find('img')
1380
        assert img['alt'] == title
1381
        return {
1382
            'title': title,
1383
            'day': day,
1384
            'month': month,
1385
            'year': year,
1386
            'img': [img['src']],
1387
        }
1388
1389
1390
class CalvinAndHobbes(GenericComic):
1391
    """Class to retrieve Calvin and Hobbes comics."""
1392
    # Also on http://www.gocomics.com/calvinandhobbes/
1393
    name = 'calvin'
1394
    long_name = 'Calvin and Hobbes'
1395
    # This is not through any official webpage but eh...
1396
    url = 'http://marcel-oehler.marcellosendos.ch/comics/ch/'
1397
1398
    @classmethod
1399
    def get_next_comic(cls, last_comic):
1400
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1401
        last_date = get_date_for_comic(
1402
            last_comic) if last_comic else date(1985, 11, 1)
1403
        link_re = re.compile('^([0-9]*)/([0-9]*)/')
1404
        img_re = re.compile('')
1405
        for link in get_soup_at_url(cls.url).find_all('a', href=link_re):
1406
            url = link['href']
1407
            year, month = link_re.match(url).groups()
1408
            if date(int(year), int(month), 1) + timedelta(days=31) >= last_date:
1409
                img_re = re.compile('^%s%s([0-9]*)' % (year, month))
1410
                month_url = urljoin_wrapper(cls.url, url)
1411
                for img in get_soup_at_url(month_url).find_all('img', src=img_re):
1412
                    img_src = img['src']
1413
                    day = int(img_re.match(img_src).groups()[0])
1414
                    comic_date = date(int(year), int(month), day)
1415
                    if comic_date > last_date:
1416
                        yield {
1417
                            'url': month_url,
1418
                            'year': int(year),
1419
                            'month': int(month),
1420
                            'day': int(day),
1421
                            'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)],
1422
                        }
1423
                        last_date = comic_date
1424
1425
1426
class AbstruseGoose(GenericListableComic):
1427 View Code Duplication
    """Class to retrieve AbstruseGoose Comics."""
1428
    name = 'abstruse'
1429
    long_name = 'Abstruse Goose'
1430
    url = 'http://abstrusegoose.com'
1431
    get_url_from_archive_element = get_href
1432
    comic_url_re = re.compile('^%s/([0-9]*)$' % url)
1433
    comic_img_re = re.compile('^%s/strips/.*' % url)
1434
1435
    @classmethod
1436
    def get_archive_elements(cls):
1437
        archive_url = urljoin_wrapper(cls.url, 'archive')
1438
        return get_soup_at_url(archive_url).find_all('a', href=cls.comic_url_re)
1439
1440
    @classmethod
1441
    def get_comic_info(cls, soup, archive_elt):
1442
        comic_url = cls.get_url_from_archive_element(archive_elt)
1443
        num = int(cls.comic_url_re.match(comic_url).groups()[0])
1444
        return {
1445
            'num': num,
1446
            'title': archive_elt.string,
1447
            'img': [soup.find('img', src=cls.comic_img_re)['src']]
1448
        }
1449
1450
1451
class PhDComics(GenericNavigableComic):
1452
    """Class to retrieve PHD Comics."""
1453
    name = 'phd'
1454
    long_name = 'PhD Comics'
1455
    url = 'http://phdcomics.com/comics/archive.php'
1456
1457
    @classmethod
1458
    def get_first_comic_link(cls):
1459
        """Get link to first comics."""
1460
        soup = get_soup_at_url(cls.url)
1461
        img = soup.find('img', src='http://phdcomics.com/comics/images/first_button.gif')
1462
        return None if img is None else img.parent
1463
1464
    @classmethod
1465
    def get_navi_link(cls, last_soup, next_):
1466
        """Get link to next or previous comic."""
1467
        url = 'http://phdcomics.com/comics/images/%s_button.gif' % ('next' if next_ else 'prev')
1468
        img = last_soup.find('img', src=url)
1469
        return None if img is None else img.parent
1470
1471
    @classmethod
1472
    def get_comic_info(cls, soup, link):
1473
        """Get information about a particular comics."""
1474
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
1475
        imgs = soup.find_all('meta', property='og:image')
1476
        return {
1477
            'img': [i['content'] for i in imgs],
1478
            'title': title,
1479
        }
1480
1481
1482 View Code Duplication
class Octopuns(GenericEmptyComic, GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1483
    """Class to retrieve Octopuns comics."""
1484
    # Also on http://octopuns.tumblr.com
1485
    name = 'octopuns'
1486
    long_name = 'Octopuns'
1487
    url = 'http://www.octopuns.net'
1488
1489
    @classmethod
1490
    def get_first_comic_link(cls):
1491
        """Get link to first comics."""
1492
        return get_soup_at_url(cls.url).find('img', src=re.compile('.*/First.png')).parent
1493
1494
    @classmethod
1495
    def get_navi_link(cls, last_soup, next_):
1496
        """Get link to next or previous comic."""
1497
        link = last_soup.find('img', src=re.compile('.*/Next.png' if next_ else '.*/Back.png')).parent
1498
        return None if link.get('href') is None else link
1499
1500
    @classmethod
1501
    def get_comic_info(cls, soup, link):
1502
        """Get information about a particular comics."""
1503
        title = soup.find('h3', class_='post-title entry-title').string
1504
        date_str = soup.find('h2', class_='date-header').string
1505
        day = string_to_date(date_str, "%A, %B %d, %Y")
1506
        imgs = soup.find_all('link', rel='image_src')
1507
        return {
1508
            'img': [i['href'] for i in imgs],
1509
            'title': title,
1510
            'day': day.day,
1511
            'month': day.month,
1512
            'year': day.year,
1513
        }
1514
1515
1516
class Quarktees(GenericNavigableComic):
1517
    """Class to retrieve the Quarktees comics."""
1518
    name = 'quarktees'
1519
    long_name = 'Quarktees'
1520
    url = 'http://www.quarktees.com/blogs/news'
1521
    get_url_from_link = join_cls_url_to_href
1522
    get_first_comic_link = simulate_first_link
1523
    first_url = 'http://www.quarktees.com/blogs/news/12486621-coming-soon'
1524
1525
    @classmethod
1526
    def get_navi_link(cls, last_soup, next_):
1527
        """Get link to next or previous comic."""
1528
        return last_soup.find('a', id='article-next' if next_ else 'article-prev')
1529
1530
    @classmethod
1531
    def get_comic_info(cls, soup, link):
1532
        """Get information about a particular comics."""
1533
        title = soup.find('meta', property='og:title')['content']
1534
        article = soup.find('div', class_='single-article')
1535
        imgs = article.find_all('img')
1536
        return {
1537
            'title': title,
1538
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1539
        }
1540
1541
1542
class OverCompensating(GenericNavigableComic):
1543
    """Class to retrieve the Over Compensating comics."""
1544
    name = 'compensating'
1545
    long_name = 'Over Compensating'
1546
    url = 'http://www.overcompensating.com'
1547
    get_url_from_link = join_cls_url_to_href
1548
1549
    @classmethod
1550
    def get_first_comic_link(cls):
1551
        """Get link to first comics."""
1552
        return get_soup_at_url(cls.url).find('a', href=re.compile('comic=1$'))
1553
1554
    @classmethod
1555
    def get_navi_link(cls, last_soup, next_):
1556
        """Get link to next or previous comic."""
1557
        return last_soup.find('a', title='next comic' if next_ else 'go back already')
1558
1559
    @classmethod
1560
    def get_comic_info(cls, soup, link):
1561
        """Get information about a particular comics."""
1562
        img_src_re = re.compile('^/oc/comics/.*')
1563
        comic_num_re = re.compile('.*comic=([0-9]*)$')
1564
        comic_url = cls.get_url_from_link(link)
1565
        num = int(comic_num_re.match(comic_url).groups()[0])
1566
        img = soup.find('img', src=img_src_re)
1567
        return {
1568
            'num': num,
1569
            'img': [urljoin_wrapper(comic_url, img['src'])],
1570
            'title': img.get('title')
1571
        }
1572
1573
1574
class Oglaf(GenericNavigableComic):
1575
    """Class to retrieve Oglaf comics."""
1576
    name = 'oglaf'
1577
    long_name = 'Oglaf [NSFW]'
1578
    url = 'http://oglaf.com'
1579
    _categories = ('NSFW', )
1580
    get_url_from_link = join_cls_url_to_href
1581
1582
    @classmethod
1583
    def get_first_comic_link(cls):
1584
        """Get link to first comics."""
1585
        return get_soup_at_url(cls.url).find("div", id="st").parent
1586
1587
    @classmethod
1588
    def get_navi_link(cls, last_soup, next_):
1589
        """Get link to next or previous comic."""
1590
        div = last_soup.find("div", id="nx" if next_ else "pvs")
1591
        return div.parent if div else None
1592
1593
    @classmethod
1594
    def get_comic_info(cls, soup, link):
1595
        """Get information about a particular comics."""
1596
        title = soup.find('title').string
1597
        title_imgs = soup.find('div', id='tt').find_all('img')
1598
        assert len(title_imgs) == 1
1599
        strip_imgs = soup.find_all('img', id='strip')
1600
        assert len(strip_imgs) == 1
1601
        imgs = title_imgs + strip_imgs
1602
        desc = ' '.join(i['title'] for i in imgs)
1603
        return {
1604
            'title': title,
1605
            'img': [i['src'] for i in imgs],
1606
            'description': desc,
1607
        }
1608
1609
1610
class ScandinaviaAndTheWorld(GenericNavigableComic):
1611
    """Class to retrieve Scandinavia And The World comics."""
1612
    name = 'satw'
1613
    long_name = 'Scandinavia And The World'
1614
    url = 'http://satwcomic.com'
1615
    get_first_comic_link = simulate_first_link
1616
    first_url = 'http://satwcomic.com/sweden-denmark-and-norway'
1617
1618
    @classmethod
1619
    def get_navi_link(cls, last_soup, next_):
1620
        """Get link to next or previous comic."""
1621
        return last_soup.find('a', accesskey='n' if next_ else 'p')
1622
1623
    @classmethod
1624
    def get_comic_info(cls, soup, link):
1625
        """Get information about a particular comics."""
1626
        title = soup.find('meta', attrs={'name': 'twitter:label1'})['content']
1627
        desc = soup.find('meta', property='og:description')['content']
1628
        imgs = soup.find_all('img', itemprop="image")
1629
        return {
1630
            'title': title,
1631
            'description': desc,
1632
            'img': [i['src'] for i in imgs],
1633
        }
1634
1635
1636
class SomethingOfThatIlk(GenericEmptyComic):  # Does not exist anymore
1637
    """Class to retrieve the Something Of That Ilk comics."""
1638
    name = 'somethingofthatilk'
1639
    long_name = 'Something Of That Ilk'
1640
    url = 'http://www.somethingofthatilk.com'
1641
1642
1643
class InfiniteMonkeyBusiness(GenericNavigableComic):
1644
    """Generic class to retrieve InfiniteMonkeyBusiness comics."""
1645
    name = 'monkey'
1646
    long_name = 'Infinite Monkey Business'
1647
    url = 'http://infinitemonkeybusiness.net'
1648
    get_navi_link = get_a_navi_comicnavnext_navinext
1649
    get_first_comic_link = simulate_first_link
1650
    first_url = 'http://infinitemonkeybusiness.net/comic/pillory/'
1651
1652
    @classmethod
1653
    def get_comic_info(cls, soup, link):
1654
        """Get information about a particular comics."""
1655
        title = soup.find('meta', property='og:title')['content']
1656
        imgs = soup.find('div', id='comic').find_all('img')
1657
        return {
1658
            'title': title,
1659
            'img': [i['src'] for i in imgs],
1660
        }
1661
1662
1663
class Wondermark(GenericListableComic):
1664
    """Class to retrieve the Wondermark comics."""
1665
    name = 'wondermark'
1666
    long_name = 'Wondermark'
1667
    url = 'http://wondermark.com'
1668
    get_url_from_archive_element = get_href
1669
1670
    @classmethod
1671
    def get_archive_elements(cls):
1672
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1673
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1674
1675
    @classmethod
1676
    def get_comic_info(cls, soup, link):
1677
        """Get information about a particular comics."""
1678
        date_str = soup.find('div', class_='postdate').find('em').string
1679
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1680
        div = soup.find('div', id='comic')
1681
        if div:
1682
            img = div.find('img')
1683
            img_src = [img['src']]
1684
            alt = img['alt']
1685
            assert alt == img['title']
1686
            title = soup.find('meta', property='og:title')['content']
1687
        else:
1688
            img_src = []
1689
            alt = ''
1690
            title = ''
1691
        return {
1692
            'month': day.month,
1693
            'year': day.year,
1694
            'day': day.day,
1695
            'img': img_src,
1696
            'title': title,
1697
            'alt': alt,
1698
            'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')),
1699
        }
1700
1701
1702 View Code Duplication
class WarehouseComic(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1703
    """Class to retrieve Warehouse Comic comics."""
1704
    name = 'warehouse'
1705
    long_name = 'Warehouse Comic'
1706
    url = 'http://warehousecomic.com'
1707
    get_first_comic_link = get_a_navi_navifirst
1708
    get_navi_link = get_link_rel_next
1709
1710
    @classmethod
1711
    def get_comic_info(cls, soup, link):
1712
        """Get information about a particular comics."""
1713
        title = soup.find('h2', class_='post-title').string
1714
        date_str = soup.find('span', class_='post-date').string
1715
        day = string_to_date(date_str, "%B %d, %Y")
1716
        imgs = soup.find('div', id='comic').find_all('img')
1717
        return {
1718
            'img': [i['src'] for i in imgs],
1719
            'title': title,
1720
            'day': day.day,
1721
            'month': day.month,
1722
            'year': day.year,
1723
        }
1724
1725
1726
class JustSayEh(GenericNavigableComic):
1727
    """Class to retrieve Just Say Eh comics."""
1728
    # Also on http//tapastic.com/series/Just-Say-Eh
1729
    name = 'justsayeh'
1730
    long_name = 'Just Say Eh'
1731
    url = 'http://www.justsayeh.com'
1732
    get_first_comic_link = get_a_navi_navifirst
1733
    get_navi_link = get_a_navi_comicnavnext_navinext
1734
1735
    @classmethod
1736
    def get_comic_info(cls, soup, link):
1737
        """Get information about a particular comics."""
1738
        title = soup.find('h2', class_='post-title').string
1739
        imgs = soup.find("div", id="comic").find_all("img")
1740
        assert all(i['alt'] == i['title'] for i in imgs)
1741
        alt = imgs[0]['alt']
1742
        return {
1743
            'img': [i['src'] for i in imgs],
1744
            'title': title,
1745
            'alt': alt,
1746
        }
1747
1748
1749 View Code Duplication
class MouseBearComedy(GenericNavigableComic):
1750
    """Class to retrieve Mouse Bear Comedy comics."""
1751
    # Also on http://mousebearcomedy.tumblr.com
1752
    name = 'mousebear'
1753
    long_name = 'Mouse Bear Comedy'
1754
    url = 'http://www.mousebearcomedy.com'
1755
    get_first_comic_link = get_a_navi_navifirst
1756
    get_navi_link = get_a_navi_comicnavnext_navinext
1757
1758
    @classmethod
1759
    def get_comic_info(cls, soup, link):
1760
        """Get information about a particular comics."""
1761
        title = soup.find('h2', class_='post-title').string
1762
        author = soup.find("span", class_="post-author").find("a").string
1763
        date_str = soup.find("span", class_="post-date").string
1764
        day = string_to_date(date_str, '%B %d, %Y')
1765
        imgs = soup.find("div", id="comic").find_all("img")
1766
        assert all(i['alt'] == i['title'] == title for i in imgs)
1767
        return {
1768
            'day': day.day,
1769
            'month': day.month,
1770
            'year': day.year,
1771
            'img': [i['src'] for i in imgs],
1772
            'title': title,
1773
            'author': author,
1774
        }
1775
1776
1777
class BigFootJustice(GenericNavigableComic):
1778
    """Class to retrieve Big Foot Justice comics."""
1779
    # Also on http://tapastic.com/series/bigfoot-justice
1780
    name = 'bigfoot'
1781
    long_name = 'Big Foot Justice'
1782
    url = 'http://bigfootjustice.com'
1783
    get_first_comic_link = get_a_navi_navifirst
1784
    get_navi_link = get_a_navi_comicnavnext_navinext
1785
1786
    @classmethod
1787
    def get_comic_info(cls, soup, link):
1788
        """Get information about a particular comics."""
1789
        imgs = soup.find('div', id='comic').find_all('img')
1790
        assert all(i['title'] == i['alt'] for i in imgs)
1791
        title = ' '.join(i['title'] for i in imgs)
1792
        return {
1793
            'img': [i['src'] for i in imgs],
1794
            'title': title,
1795
        }
1796
1797
1798
class RespawnComic(GenericNavigableComic):
1799
    """Class to retrieve Respawn Comic."""
1800
    # Also on https://respawncomic.tumblr.com
1801
    name = 'respawn'
1802
    long_name = 'Respawn Comic'
1803
    url = 'http://respawncomic.com '
1804
    _categories = ('RESPAWN', )
1805
    get_navi_link = get_a_rel_next
1806 View Code Duplication
    get_first_comic_link = simulate_first_link
1807
    first_url = 'http://respawncomic.com/comic/c0001/'
1808
1809
    @classmethod
1810
    def get_comic_info(cls, soup, link):
1811
        """Get information about a particular comics."""
1812
        title = soup.find('meta', property='og:title')['content']
1813
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1814
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1815
        date_str = date_str[:10]
1816
        day = string_to_date(date_str, "%Y-%m-%d")
1817
        imgs = soup.find_all('meta', property='og:image')
1818
        skip_imgs = {
1819
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1820
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1821
        }
1822
        return {
1823
            'title': title,
1824
            'author': author,
1825
            'day': day.day,
1826
            'month': day.month,
1827
            'year': day.year,
1828
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1829
        }
1830
1831
1832 View Code Duplication
class SafelyEndangered(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1833
    """Class to retrieve Safely Endangered comics."""
1834
    # Also on http://tumblr.safelyendangered.com
1835
    name = 'endangered'
1836
    long_name = 'Safely Endangered'
1837
    url = 'http://www.safelyendangered.com'
1838
    get_navi_link = get_link_rel_next
1839
    get_first_comic_link = simulate_first_link
1840
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1841
1842
    @classmethod
1843
    def get_comic_info(cls, soup, link):
1844
        """Get information about a particular comics."""
1845
        title = soup.find('h2', class_='post-title').string
1846
        date_str = soup.find('span', class_='post-date').string
1847
        day = string_to_date(date_str, '%B %d, %Y')
1848
        imgs = soup.find('div', id='comic').find_all('img')
1849
        alt = imgs[0]['alt']
1850
        assert all(i['alt'] == i['title'] for i in imgs)
1851
        return {
1852
            'day': day.day,
1853
            'month': day.month,
1854
            'year': day.year,
1855
            'img': [i['src'] for i in imgs],
1856
            'title': title,
1857
            'alt': alt,
1858
        }
1859
1860
1861 View Code Duplication
class PicturesInBoxes(GenericNavigableComic):
1862
    """Class to retrieve Pictures In Boxes comics."""
1863
    # Also on https://picturesinboxescomic.tumblr.com
1864
    name = 'picturesinboxes'
1865
    long_name = 'Pictures in Boxes'
1866
    url = 'http://www.picturesinboxes.com'
1867
    get_navi_link = get_a_navi_navinext
1868
    get_first_comic_link = simulate_first_link
1869
    first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'
1870
1871
    @classmethod
1872
    def get_comic_info(cls, soup, link):
1873
        """Get information about a particular comics."""
1874
        title = soup.find('h2', class_='post-title').string
1875
        author = soup.find("span", class_="post-author").find("a").string
1876
        date_str = soup.find('span', class_='post-date').string
1877
        day = string_to_date(date_str, '%B %d, %Y')
1878
        imgs = soup.find('div', class_='comicpane').find_all('img')
1879
        assert imgs
1880
        assert all(i['title'] == i['alt'] == title for i in imgs)
1881
        return {
1882
            'day': day.day,
1883
            'month': day.month,
1884
            'year': day.year,
1885
            'img': [i['src'] for i in imgs],
1886
            'title': title,
1887
            'author': author,
1888
        }
1889
1890
1891 View Code Duplication
class Penmen(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1892
    """Class to retrieve Penmen comics."""
1893
    name = 'penmen'
1894
    long_name = 'Penmen'
1895
    url = 'http://penmen.com'
1896
    get_navi_link = get_link_rel_next
1897
    get_first_comic_link = simulate_first_link
1898
    first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/'
1899
1900
    @classmethod
1901
    def get_comic_info(cls, soup, link):
1902
        """Get information about a particular comics."""
1903
        title = soup.find('title').string
1904
        imgs = soup.find('div', class_='entry-content').find_all('img')
1905
        short_url = soup.find('link', rel='shortlink')['href']
1906
        tags = ' '.join(t.string for t in soup.find_all('a', rel='tag'))
1907
        date_str = soup.find('time')['datetime'][:10]
1908
        day = string_to_date(date_str, "%Y-%m-%d")
1909
        return {
1910
            'title': title,
1911
            'short_url': short_url,
1912
            'img': [i['src'] for i in imgs],
1913
            'tags': tags,
1914
            'month': day.month,
1915
            'year': day.year,
1916
            'day': day.day,
1917
        }
1918
1919
1920
class TheDoghouseDiaries(GenericNavigableComic):
1921
    """Class to retrieve The Dog House Diaries comics."""
1922
    name = 'doghouse'
1923
    long_name = 'The Dog House Diaries'
1924
    url = 'http://thedoghousediaries.com'
1925
1926
    @classmethod
1927
    def get_first_comic_link(cls):
1928
        """Get link to first comics."""
1929
        return get_soup_at_url(cls.url).find('a', id='firstlink')
1930
1931
    @classmethod
1932
    def get_navi_link(cls, last_soup, next_):
1933
        """Get link to next or previous comic."""
1934
        return last_soup.find('a', id='nextlink' if next_ else 'previouslink')
1935
1936
    @classmethod
1937
    def get_comic_info(cls, soup, link):
1938
        """Get information about a particular comics."""
1939
        comic_img_re = re.compile('^dhdcomics/.*')
1940
        img = soup.find('img', src=comic_img_re)
1941
        comic_url = cls.get_url_from_link(link)
1942
        return {
1943
            'title': soup.find('h2', id='titleheader').string,
1944
            'title2': soup.find('div', id='subtext').string,
1945
            'alt': img.get('title'),
1946
            'img': [urljoin_wrapper(comic_url, img['src'].strip())],
1947
            'num': int(comic_url.split('/')[-1]),
1948
        }
1949
1950
1951
class InvisibleBread(GenericListableComic):
1952
    """Class to retrieve Invisible Bread comics."""
1953
    # Also on http://www.gocomics.com/invisible-bread
1954
    name = 'invisiblebread'
1955
    long_name = 'Invisible Bread'
1956
    url = 'http://invisiblebread.com'
1957
1958
    @classmethod
1959
    def get_archive_elements(cls):
1960
        archive_url = urljoin_wrapper(cls.url, 'archives/')
1961
        return reversed(get_soup_at_url(archive_url).find_all('td', class_='archive-title'))
1962
1963
    @classmethod
1964
    def get_url_from_archive_element(cls, td):
1965
        return td.find('a')['href']
1966
1967
    @classmethod
1968 View Code Duplication
    def get_comic_info(cls, soup, td):
1969
        """Get information about a particular comics."""
1970
        url = cls.get_url_from_archive_element(td)
1971
        title = td.find('a').string
1972
        month_and_day = td.previous_sibling.string
1973
        link_re = re.compile('^%s/([0-9]+)/' % cls.url)
1974
        year = link_re.match(url).groups()[0]
1975
        date_str = month_and_day + ' ' + year
1976
        day = string_to_date(date_str, '%b %d %Y')
1977
        imgs = [soup.find('div', id='comic').find('img')]
1978
        assert len(imgs) == 1
1979
        assert all(i['title'] == i['alt'] == title for i in imgs)
1980
        return {
1981
            'month': day.month,
1982
            'year': day.year,
1983
            'day': day.day,
1984
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1985
            'title': title,
1986
        }
1987
1988
1989
class DiscoBleach(GenericEmptyComic):  # Does not work anymore
1990
    """Class to retrieve Disco Bleach Comics."""
1991
    name = 'discobleach'
1992
    long_name = 'Disco Bleach'
1993
    url = 'http://discobleach.com'
1994
1995
1996
class TubeyToons(GenericEmptyComic):  # Does not work anymore
1997
    """Class to retrieve TubeyToons comics."""
1998
    # Also on http://tapastic.com/series/Tubey-Toons
1999
    # Also on https://tubeytoons.tumblr.com
2000
    name = 'tubeytoons'
2001
    long_name = 'Tubey Toons'
2002
    url = 'http://tubeytoons.com'
2003
    _categories = ('TUNEYTOONS', )
2004
2005
2006 View Code Duplication
class CompletelySeriousComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2007
    """Class to retrieve Completely Serious comics."""
2008
    name = 'completelyserious'
2009
    long_name = 'Completely Serious Comics'
2010
    url = 'http://completelyseriouscomics.com'
2011
    get_first_comic_link = get_a_navi_navifirst
2012
    get_navi_link = get_a_navi_navinext
2013
2014
    @classmethod
2015
    def get_comic_info(cls, soup, link):
2016
        """Get information about a particular comics."""
2017
        title = soup.find('h2', class_='post-title').string
2018
        author = soup.find('span', class_='post-author').contents[1].string
2019
        date_str = soup.find('span', class_='post-date').string
2020
        day = string_to_date(date_str, '%B %d, %Y')
2021
        imgs = soup.find('div', class_='comicpane').find_all('img')
2022
        assert imgs
2023
        alt = imgs[0]['title']
2024
        assert all(i['title'] == i['alt'] == alt for i in imgs)
2025
        return {
2026
            'month': day.month,
2027
            'year': day.year,
2028
            'day': day.day,
2029
            'img': [i['src'] for i in imgs],
2030
            'title': title,
2031
            'alt': alt,
2032
            'author': author,
2033
        }
2034
2035
2036
class PoorlyDrawnLines(GenericListableComic):
2037 View Code Duplication
    """Class to retrieve Poorly Drawn Lines comics."""
2038
    # Also on http://pdlcomics.tumblr.com
2039
    name = 'poorlydrawn'
2040
    long_name = 'Poorly Drawn Lines'
2041
    url = 'https://www.poorlydrawnlines.com'
2042
    _categories = ('POORLYDRAWN', )
2043
    get_url_from_archive_element = get_href
2044
2045
    @classmethod
2046
    def get_comic_info(cls, soup, link):
2047
        """Get information about a particular comics."""
2048
        imgs = soup.find('div', class_='post').find_all('img')
2049
        assert len(imgs) <= 1
2050
        return {
2051
            'img': [i['src'] for i in imgs],
2052
            'title': imgs[0].get('title', "") if imgs else "",
2053
        }
2054
2055
    @classmethod
2056
    def get_archive_elements(cls):
2057
        archive_url = urljoin_wrapper(cls.url, 'archive')
2058
        url_re = re.compile('^%s/comic/.' % cls.url)
2059
        return reversed(get_soup_at_url(archive_url).find_all('a', href=url_re))
2060
2061
2062 View Code Duplication
class LoadingComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2063
    """Class to retrieve Loading Artist comics."""
2064
    name = 'loadingartist'
2065
    long_name = 'Loading Artist'
2066
    url = 'http://www.loadingartist.com/latest'
2067
2068
    @classmethod
2069
    def get_first_comic_link(cls):
2070
        """Get link to first comics."""
2071
        return get_soup_at_url(cls.url).find('a', title="First")
2072
2073
    @classmethod
2074
    def get_navi_link(cls, last_soup, next_):
2075
        """Get link to next or previous comic."""
2076
        return last_soup.find('a', title='Next' if next_ else 'Previous')
2077
2078
    @classmethod
2079
    def get_comic_info(cls, soup, link):
2080
        """Get information about a particular comics."""
2081
        title = soup.find('h1').string
2082
        date_str = soup.find('span', class_='date').string.strip()
2083
        day = string_to_date(date_str, "%B %d, %Y")
2084
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
2085
        return {
2086
            'title': title,
2087
            'img': [i['src'] for i in imgs],
2088
            'month': day.month,
2089
            'year': day.year,
2090
            'day': day.day,
2091
        }
2092
2093
2094 View Code Duplication
class ChuckleADuck(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2095
    """Class to retrieve Chuckle-A-Duck comics."""
2096
    name = 'chuckleaduck'
2097
    long_name = 'Chuckle-A-duck'
2098
    url = 'http://chuckleaduck.com'
2099
    get_first_comic_link = get_div_navfirst_a
2100
    get_navi_link = get_link_rel_next
2101
2102
    @classmethod
2103
    def get_comic_info(cls, soup, link):
2104
        """Get information about a particular comics."""
2105
        date_str = soup.find('span', class_='post-date').string
2106
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
2107
        author = soup.find('span', class_='post-author').string
2108
        div = soup.find('div', id='comic')
2109
        imgs = div.find_all('img') if div else []
2110
        title = imgs[0]['title'] if imgs else ""
2111
        assert all(i['title'] == i['alt'] == title for i in imgs)
2112
        return {
2113
            'month': day.month,
2114
            'year': day.year,
2115
            'day': day.day,
2116
            'img': [i['src'] for i in imgs],
2117
            'title': title,
2118
            'author': author,
2119
        }
2120
2121
2122
class DepressedAlien(GenericNavigableComic):
2123
    """Class to retrieve Depressed Alien Comics."""
2124
    name = 'depressedalien'
2125
    long_name = 'Depressed Alien'
2126
    url = 'http://depressedalien.com'
2127
    get_url_from_link = join_cls_url_to_href
2128
2129
    @classmethod
2130
    def get_first_comic_link(cls):
2131
        """Get link to first comics."""
2132
        return get_soup_at_url(cls.url).find('img', attrs={'name': 'beginArrow'}).parent
2133
2134
    @classmethod
2135
    def get_navi_link(cls, last_soup, next_):
2136
        """Get link to next or previous comic."""
2137
        return last_soup.find('img', attrs={'name': 'rightArrow' if next_ else 'leftArrow'}).parent
2138
2139
    @classmethod
2140
    def get_comic_info(cls, soup, link):
2141
        """Get information about a particular comics."""
2142
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2143
        imgs = soup.find_all('meta', property='og:image')
2144
        return {
2145
            'title': title,
2146
            'img': [i['content'] for i in imgs],
2147 View Code Duplication
        }
2148
2149
2150
class TurnOffUs(GenericListableComic):
2151
    """Class to retrieve TurnOffUs comics."""
2152
    name = 'turnoffus'
2153
    long_name = 'Turn Off Us'
2154
    url = 'http://turnoff.us'
2155
    get_url_from_archive_element = join_cls_url_to_href
2156
2157
    @classmethod
2158
    def get_archive_elements(cls):
2159
        archive_url = urljoin_wrapper(cls.url, 'all')
2160
        post_list = get_soup_at_url(archive_url).find('ul', class_='post-list')
2161
        return reversed(post_list.find_all('a', class_='post-link'))
2162
2163
    @classmethod
2164
    def get_comic_info(cls, soup, archive_elt):
2165
        """Get information about a particular comics."""
2166
        title = soup.find('meta', property='og:title')['content']
2167
        imgs = soup.find_all('meta', property='og:image')
2168
        return {
2169
            'title': title,
2170
            'img': [i['content'] for i in imgs],
2171
        }
2172
2173
2174
class ThingsInSquares(GenericListableComic):
2175
    """Class to retrieve Things In Squares comics."""
2176
    # This can be retrieved in other languages
2177
    # Also on https://tapastic.com/series/Things-in-Squares
2178
    name = 'squares'
2179
    long_name = 'Things in squares'
2180
    url = 'http://www.thingsinsquares.com'
2181
2182
    @classmethod
2183
    def get_comic_info(cls, soup, tr):
2184
        """Get information about a particular comics."""
2185
        _, td2, td3 = tr.find_all('td')
2186
        a = td2.find('a')
2187
        date_str = td3.string
2188
        day = string_to_date(date_str, "%m.%d.%y")
2189
        title = a.string
2190
        title2 = soup.find('meta', property='og:title')['content']
2191
        desc = soup.find('meta', property='og:description')
2192
        description = desc['content'] if desc else ''
2193
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2194
        imgs = soup.find('div', class_='entry-content').find_all('img')
2195
        return {
2196
            'day': day.day,
2197
            'month': day.month,
2198
            'year': day.year,
2199
            'title': title,
2200
            'title2': title2,
2201
            'description': description,
2202
            'tags': tags,
2203
            'img': [i['src'] for i in imgs],
2204
            'alt': ' '.join(i['alt'] for i in imgs),
2205
        }
2206
2207
    @classmethod
2208
    def get_url_from_archive_element(cls, tr):
2209
        _, td2, td3 = tr.find_all('td')
2210
        return td2.find('a')['href']
2211
2212
    @classmethod
2213
    def get_archive_elements(cls):
2214
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
2215
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2216
2217
2218 View Code Duplication
class HappleTea(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2219
    """Class to retrieve Happle Tea Comics."""
2220
    name = 'happletea'
2221
    long_name = 'Happle Tea'
2222
    url = 'http://www.happletea.com'
2223
    get_first_comic_link = get_a_navi_navifirst
2224
    get_navi_link = get_link_rel_next
2225
2226
    @classmethod
2227
    def get_comic_info(cls, soup, link):
2228
        """Get information about a particular comics."""
2229
        imgs = soup.find('div', id='comic').find_all('img')
2230
        post = soup.find('div', class_='post-content')
2231
        title = post.find('h2', class_='post-title').string
2232
        author = post.find('a', rel='author').string
2233
        date_str = post.find('span', class_='post-date').string
2234
        day = string_to_date(date_str, "%B %d, %Y")
2235
        assert all(i['alt'] == i['title'] for i in imgs)
2236
        return {
2237
            'title': title,
2238
            'img': [i['src'] for i in imgs],
2239
            'alt': ''.join(i['alt'] for i in imgs),
2240
            'month': day.month,
2241
            'year': day.year,
2242
            'day': day.day,
2243
            'author': author,
2244
        }
2245
2246
2247
class RockPaperScissors(GenericNavigableComic):
2248
    """Class to retrieve Rock Paper Scissors comics."""
2249
    name = 'rps'
2250
    long_name = 'Rock Paper Scissors'
2251
    url = 'http://rps-comics.com'
2252
    get_first_comic_link = get_a_navi_navifirst
2253
    get_navi_link = get_link_rel_next
2254
2255
    @classmethod
2256
    def get_comic_info(cls, soup, link):
2257
        """Get information about a particular comics."""
2258
        title = soup.find('title').string
2259
        imgs = soup.find_all('meta', property='og:image')
2260
        short_url = soup.find('link', rel='shortlink')['href']
2261
        transcript = soup.find('div', id='transcript-content').string
2262
        return {
2263
            'title': title,
2264
            'transcript': transcript,
2265
            'short_url': short_url,
2266
            'img': [i['content'] for i in imgs],
2267
        }
2268
2269
2270
class FatAwesomeComics(GenericNavigableComic):
2271
    """Class to retrieve Fat Awesome Comics."""
2272
    # Also on http://fatawesomecomedy.tumblr.com
2273
    name = 'fatawesome'
2274
    long_name = 'Fat Awesome'
2275
    url = 'http://fatawesome.com/comics'
2276
    get_navi_link = get_a_rel_next
2277
    get_first_comic_link = simulate_first_link
2278
    first_url = 'http://fatawesome.com/shortbus/'
2279
2280
    @classmethod
2281
    def get_comic_info(cls, soup, link):
2282
        """Get information about a particular comics."""
2283
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2284
        description = soup.find('meta', attrs={'name': 'description'})['content']
2285
        tags_prop = soup.find('meta', property='article:tag')
2286
        tags = tags_prop['content'] if tags_prop else ""
2287
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2288
        day = string_to_date(date_str, "%Y-%m-%d")
2289
        imgs = soup.find_all('img', attrs={'data-recalc-dims': "1"})
2290
        assert len(imgs) == 1
2291
        return {
2292
            'title': title,
2293
            'description': description,
2294
            'tags': tags,
2295
            'alt': "".join(i['alt'] for i in imgs),
2296
            'img': [i['src'].rsplit('?', 1)[0] for i in imgs],
2297
            'month': day.month,
2298
            'year': day.year,
2299
            'day': day.day,
2300 View Code Duplication
        }
2301
2302
2303
class AnythingComic(GenericListableComic):
2304
    """Class to retrieve Anything Comics."""
2305
    # Also on http://tapastic.com/series/anything
2306
    name = 'anythingcomic'
2307
    long_name = 'Anything Comic'
2308
    url = 'http://www.anythingcomic.com'
2309
2310
    @classmethod
2311
    def get_archive_elements(cls):
2312
        archive_url = urljoin_wrapper(cls.url, 'archive/')
2313
        # The first 2 <tr>'s do not correspond to comics
2314
        return get_soup_at_url(archive_url).find('table', id='chapter_table').find_all('tr')[2:]
2315
2316
    @classmethod
2317
    def get_url_from_archive_element(cls, tr):
2318
        """Get url corresponding to an archive element."""
2319
        td_num, td_comic, td_date, _ = tr.find_all('td')
2320
        link = td_comic.find('a')
2321
        return urljoin_wrapper(cls.url, link['href'])
2322
2323
    @classmethod
2324
    def get_comic_info(cls, soup, tr):
2325
        """Get information about a particular comics."""
2326
        td_num, td_comic, td_date, _ = tr.find_all('td')
2327
        num = int(td_num.string)
2328
        link = td_comic.find('a')
2329
        title = link.string
2330
        imgs = soup.find_all('img', id='comic_image')
2331
        date_str = td_date.string
2332
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y, %I:%M %p")
2333
        assert len(imgs) == 1
2334
        assert all(i.get('alt') == i.get('title') for i in imgs)
2335
        return {
2336
            'num': num,
2337
            'title': title,
2338
            'alt': imgs[0].get('alt', ''),
2339
            'img': [i['src'] for i in imgs],
2340
            'month': day.month,
2341
            'year': day.year,
2342
            'day': day.day,
2343
        }
2344
2345
2346 View Code Duplication
class LonnieMillsap(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2347
    """Class to retrieve Lonnie Millsap's comics."""
2348
    name = 'millsap'
2349
    long_name = 'Lonnie Millsap'
2350
    url = 'http://www.lonniemillsap.com'
2351
    get_navi_link = get_link_rel_next
2352
    get_first_comic_link = simulate_first_link
2353
    first_url = 'http://www.lonniemillsap.com/?p=42'
2354
2355
    @classmethod
2356
    def get_comic_info(cls, soup, link):
2357
        """Get information about a particular comics."""
2358
        title = soup.find('h2', class_='post-title').string
2359
        post = soup.find('div', class_='post-content')
2360
        author = post.find("span", class_="post-author").find("a").string
2361
        date_str = post.find("span", class_="post-date").string
2362
        day = string_to_date(date_str, "%B %d, %Y")
2363
        imgs = post.find("div", class_="entry").find_all("img")
2364
        return {
2365
            'title': title,
2366
            'author': author,
2367
            'img': [i['src'] for i in imgs],
2368
            'month': day.month,
2369
            'year': day.year,
2370
            'day': day.day,
2371 View Code Duplication
        }
2372
2373
2374
class LinsEditions(GenericNavigableComic):
2375
    """Class to retrieve L.I.N.S. Editions comics."""
2376
    # Also on https://linscomics.tumblr.com
2377
    # Now on https://warandpeas.com
2378
    name = 'lins'
2379
    long_name = 'L.I.N.S. Editions'
2380
    url = 'https://linsedition.com'
2381
    _categories = ('LINS', )
2382
    get_navi_link = get_link_rel_next
2383
    get_first_comic_link = simulate_first_link
2384
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2385
2386
    @classmethod
2387
    def get_comic_info(cls, soup, link):
2388
        """Get information about a particular comics."""
2389
        title = soup.find('meta', property='og:title')['content']
2390
        imgs = soup.find_all('meta', property='og:image')
2391
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2392
        day = string_to_date(date_str, "%Y-%m-%d")
2393
        return {
2394
            'title': title,
2395
            'img': [i['content'] for i in imgs],
2396
            'month': day.month,
2397
            'year': day.year,
2398
            'day': day.day,
2399
        }
2400
2401
2402
class ThorsThundershack(GenericNavigableComic):
2403
    """Class to retrieve Thor's Thundershack comics."""
2404
    # Also on http://tapastic.com/series/Thors-Thundershac
2405
    name = 'thor'
2406
    long_name = 'Thor\'s Thundershack'
2407
    url = 'http://www.thorsthundershack.com'
2408
    _categories = ('THOR', )
2409
    get_url_from_link = join_cls_url_to_href
2410
2411
    @classmethod
2412
    def get_first_comic_link(cls):
2413
        """Get link to first comics."""
2414
        return get_soup_at_url(cls.url).find('a', class_='first navlink')
2415
2416
    @classmethod
2417
    def get_navi_link(cls, last_soup, next_):
2418
        """Get link to next or previous comic."""
2419
        for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
2420
            if link['href'] != '/comic':
2421
                return link
2422
        return None
2423
2424
    @classmethod
2425
    def get_comic_info(cls, soup, link):
2426
        """Get information about a particular comics."""
2427
        title = soup.find('meta', attrs={'name': 'description'})["content"]
2428
        description = soup.find('div', itemprop='articleBody').text
2429
        author = soup.find('span', itemprop='author copyrightHolder').string
2430
        imgs = soup.find_all('img', itemprop='image')
2431
        assert all(i['title'] == i['alt'] for i in imgs)
2432
        alt = imgs[0]['alt'] if imgs else ""
2433
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
2434
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2435
        return {
2436
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2437
            'month': day.month,
2438
            'year': day.year,
2439
            'day': day.day,
2440
            'author': author,
2441
            'title': title,
2442
            'alt': alt,
2443
            'description': description,
2444
        }
2445
2446
2447
class GerbilWithAJetpack(GenericNavigableComic):
2448
    """Class to retrieve GerbilWithAJetpack comics."""
2449
    name = 'gerbil'
2450
    long_name = 'Gerbil With A Jetpack'
2451
    url = 'http://gerbilwithajetpack.com'
2452
    get_first_comic_link = get_a_navi_navifirst
2453
    get_navi_link = get_a_rel_next
2454
2455
    @classmethod
2456
    def get_comic_info(cls, soup, link):
2457
        """Get information about a particular comics."""
2458
        title = soup.find('h2', class_='post-title').string
2459
        author = soup.find("span", class_="post-author").find("a").string
2460
        date_str = soup.find("span", class_="post-date").string
2461
        day = string_to_date(date_str, "%B %d, %Y")
2462
        imgs = soup.find("div", id="comic").find_all("img")
2463
        alt = imgs[0]['alt']
2464
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2465
        return {
2466
            'img': [i['src'] for i in imgs],
2467
            'title': title,
2468
            'alt': alt,
2469
            'author': author,
2470
            'day': day.day,
2471
            'month': day.month,
2472
            'year': day.year
2473
        }
2474
2475
2476 View Code Duplication
class EveryDayBlues(GenericEmptyComic, GenericNavigableComic):
2477
    """Class to retrieve EveryDayBlues Comics."""
2478
    name = "blues"
2479
    long_name = "Every Day Blues"
2480
    url = "http://everydayblues.net"
2481
    get_first_comic_link = get_a_navi_navifirst
2482
    get_navi_link = get_link_rel_next
2483
2484
    @classmethod
2485
    def get_comic_info(cls, soup, link):
2486
        """Get information about a particular comics."""
2487
        title = soup.find("h2", class_="post-title").string
2488
        author = soup.find("span", class_="post-author").find("a").string
2489
        date_str = soup.find("span", class_="post-date").string
2490
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2491
        imgs = soup.find("div", id="comic").find_all("img")
2492
        assert all(i['alt'] == i['title'] == title for i in imgs)
2493
        assert len(imgs) <= 1
2494
        return {
2495
            'img': [i['src'] for i in imgs],
2496
            'title': title,
2497
            'author': author,
2498
            'day': day.day,
2499
            'month': day.month,
2500
            'year': day.year
2501
        }
2502
2503
2504 View Code Duplication
class BiterComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2505
    """Class to retrieve Biter Comics."""
2506
    name = "biter"
2507
    long_name = "Biter Comics"
2508
    url = "http://www.bitercomics.com"
2509
    get_first_comic_link = get_a_navi_navifirst
2510
    get_navi_link = get_link_rel_next
2511
2512
    @classmethod
2513
    def get_comic_info(cls, soup, link):
2514
        """Get information about a particular comics."""
2515
        title = soup.find("h1", class_="entry-title").string
2516
        author = soup.find("span", class_="author vcard").find("a").string
2517
        date_str = soup.find("span", class_="entry-date").string
2518
        day = string_to_date(date_str, "%B %d, %Y")
2519
        imgs = soup.find("div", id="comic").find_all("img")
2520
        assert all(i['alt'] == i['title'] for i in imgs)
2521
        assert len(imgs) == 1
2522
        alt = imgs[0]['alt']
2523
        return {
2524
            'img': [i['src'] for i in imgs],
2525
            'title': title,
2526
            'alt': alt,
2527
            'author': author,
2528
            'day': day.day,
2529
            'month': day.month,
2530
            'year': day.year
2531
        }
2532
2533
2534 View Code Duplication
class TheAwkwardYeti(GenericNavigableComic):
2535
    """Class to retrieve The Awkward Yeti comics."""
2536
    # Also on http://www.gocomics.com/the-awkward-yeti
2537
    # Also on http://larstheyeti.tumblr.com
2538
    # Also on https://tapastic.com/series/TheAwkwardYeti
2539
    name = 'yeti'
2540
    long_name = 'The Awkward Yeti'
2541
    url = 'http://theawkwardyeti.com'
2542
    _categories = ('YETI', )
2543
    get_first_comic_link = get_a_navi_navifirst
2544
    get_navi_link = get_link_rel_next
2545
2546
    @classmethod
2547
    def get_comic_info(cls, soup, link):
2548
        """Get information about a particular comics."""
2549
        title = soup.find('h2', class_='post-title').string
2550
        date_str = soup.find("span", class_="post-date").string
2551
        day = string_to_date(date_str, "%B %d, %Y")
2552
        imgs = soup.find("div", id="comic").find_all("img")
2553
        assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
2554
        return {
2555
            'img': [i['src'] for i in imgs],
2556
            'title': title,
2557
            'day': day.day,
2558
            'month': day.month,
2559
            'year': day.year
2560
        }
2561
2562
2563
class PleasantThoughts(GenericNavigableComic):
2564
    """Class to retrieve Pleasant Thoughts comics."""
2565
    name = 'pleasant'
2566
    long_name = 'Pleasant Thoughts'
2567
    url = 'http://pleasant-thoughts.com'
2568
    get_first_comic_link = get_a_navi_navifirst
2569
    get_navi_link = get_link_rel_next
2570
2571
    @classmethod
2572
    def get_comic_info(cls, soup, link):
2573
        """Get information about a particular comics."""
2574
        post = soup.find('div', class_='post-content')
2575
        title = post.find('h2', class_='post-title').string
2576
        imgs = post.find("div", class_="entry").find_all("img")
2577
        return {
2578
            'title': title,
2579
            'img': [i['src'] for i in imgs],
2580
        }
2581
2582
2583 View Code Duplication
class MisterAndMe(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2584
    """Class to retrieve Mister & Me Comics."""
2585
    # Also on http://www.gocomics.com/mister-and-me
2586
    # Also on https://tapastic.com/series/Mister-and-Me
2587
    name = 'mister'
2588
    long_name = 'Mister & Me'
2589
    url = 'http://www.mister-and-me.com'
2590
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2591
    get_navi_link = get_link_rel_next
2592
2593
    @classmethod
2594
    def get_comic_info(cls, soup, link):
2595
        """Get information about a particular comics."""
2596
        title = soup.find('h2', class_='post-title').string
2597
        author = soup.find("span", class_="post-author").find("a").string
2598
        date_str = soup.find("span", class_="post-date").string
2599
        day = string_to_date(date_str, "%B %d, %Y")
2600
        imgs = soup.find("div", id="comic").find_all("img")
2601
        assert all(i['alt'] == i['title'] for i in imgs)
2602
        assert len(imgs) <= 1
2603
        alt = imgs[0]['alt'] if imgs else ""
2604
        return {
2605
            'img': [i['src'] for i in imgs],
2606
            'title': title,
2607
            'alt': alt,
2608
            'author': author,
2609
            'day': day.day,
2610
            'month': day.month,
2611
            'year': day.year
2612
        }
2613
2614
2615 View Code Duplication
class LastPlaceComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2616
    """Class to retrieve Last Place Comics."""
2617
    name = 'lastplace'
2618
    long_name = 'Last Place Comics'
2619
    url = "http://lastplacecomics.com"
2620
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2621
    get_navi_link = get_link_rel_next
2622
2623
    @classmethod
2624
    def get_comic_info(cls, soup, link):
2625
        """Get information about a particular comics."""
2626
        title = soup.find('h2', class_='post-title').string
2627
        author = soup.find("span", class_="post-author").find("a").string
2628
        date_str = soup.find("span", class_="post-date").string
2629
        day = string_to_date(date_str, "%B %d, %Y")
2630
        imgs = soup.find("div", id="comic").find_all("img")
2631
        assert all(i['alt'] == i['title'] for i in imgs)
2632
        assert len(imgs) <= 1
2633
        alt = imgs[0]['alt'] if imgs else ""
2634
        return {
2635
            'img': [i['src'] for i in imgs],
2636
            'title': title,
2637
            'alt': alt,
2638
            'author': author,
2639
            'day': day.day,
2640
            'month': day.month,
2641
            'year': day.year
2642
        }
2643
2644
2645 View Code Duplication
class TalesOfAbsurdity(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2646
    """Class to retrieve Tales Of Absurdity comics."""
2647
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
2648
    # Also on http://talesofabsurdity.tumblr.com
2649
    name = 'absurdity'
2650
    long_name = 'Tales of Absurdity'
2651
    url = 'http://talesofabsurdity.com'
2652
    _categories = ('ABSURDITY', )
2653
    get_first_comic_link = get_a_navi_navifirst
2654
    get_navi_link = get_a_navi_comicnavnext_navinext
2655
2656
    @classmethod
2657
    def get_comic_info(cls, soup, link):
2658
        """Get information about a particular comics."""
2659
        title = soup.find('h2', class_='post-title').string
2660
        author = soup.find("span", class_="post-author").find("a").string
2661
        date_str = soup.find("span", class_="post-date").string
2662
        day = string_to_date(date_str, "%B %d, %Y")
2663
        imgs = soup.find("div", id="comic").find_all("img")
2664
        assert all(i['alt'] == i['title'] for i in imgs)
2665
        alt = imgs[0]['alt'] if imgs else ""
2666
        return {
2667
            'img': [i['src'] for i in imgs],
2668
            'title': title,
2669
            'alt': alt,
2670
            'author': author,
2671
            'day': day.day,
2672
            'month': day.month,
2673
            'year': day.year
2674
        }
2675
2676
2677 View Code Duplication
class EndlessOrigami(GenericEmptyComic, GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2678
    """Class to retrieve Endless Origami Comics."""
2679
    name = "origami"
2680
    long_name = "Endless Origami"
2681
    url = "http://endlessorigami.com"
2682
    get_first_comic_link = get_a_navi_navifirst
2683
    get_navi_link = get_link_rel_next
2684
2685
    @classmethod
2686
    def get_comic_info(cls, soup, link):
2687
        """Get information about a particular comics."""
2688
        title = soup.find('h2', class_='post-title').string
2689
        author = soup.find("span", class_="post-author").find("a").string
2690
        date_str = soup.find("span", class_="post-date").string
2691
        day = string_to_date(date_str, "%B %d, %Y")
2692
        imgs = soup.find("div", id="comic").find_all("img")
2693
        assert all(i['alt'] == i['title'] for i in imgs)
2694
        alt = imgs[0]['alt'] if imgs else ""
2695
        return {
2696
            'img': [i['src'] for i in imgs],
2697
            'title': title,
2698
            'alt': alt,
2699
            'author': author,
2700
            'day': day.day,
2701
            'month': day.month,
2702
            'year': day.year
2703
        }
2704
2705
2706 View Code Duplication
class PlanC(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2707
    """Class to retrieve Plan C comics."""
2708
    name = 'planc'
2709
    long_name = 'Plan C'
2710
    url = 'http://www.plancomic.com'
2711
    get_first_comic_link = get_a_navi_navifirst
2712
    get_navi_link = get_a_navi_comicnavnext_navinext
2713
2714
    @classmethod
2715
    def get_comic_info(cls, soup, link):
2716
        """Get information about a particular comics."""
2717
        title = soup.find('h2', class_='post-title').string
2718
        date_str = soup.find("span", class_="post-date").string
2719
        day = string_to_date(date_str, "%B %d, %Y")
2720
        imgs = soup.find('div', id='comic').find_all('img')
2721
        return {
2722
            'title': title,
2723
            'img': [i['src'] for i in imgs],
2724
            'month': day.month,
2725
            'year': day.year,
2726
            'day': day.day,
2727
        }
2728
2729
2730
class BuniComic(GenericNavigableComic):
2731
    """Class to retrieve Buni Comics."""
2732
    name = 'buni'
2733
    long_name = 'BuniComics'
2734
    url = 'http://www.bunicomic.com'
2735
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2736
    get_navi_link = get_link_rel_next
2737
2738
    @classmethod
2739
    def get_comic_info(cls, soup, link):
2740
        """Get information about a particular comics."""
2741
        imgs = soup.find('div', id='comic').find_all('img')
2742
        assert all(i['alt'] == i['title'] for i in imgs)
2743
        assert len(imgs) == 1
2744
        return {
2745
            'img': [i['src'] for i in imgs],
2746
            'title': imgs[0]['title'],
2747 View Code Duplication
        }
2748
2749
2750
class GenericCommitStrip(GenericNavigableComic):
2751
    """Generic class to retrieve Commit Strips in different languages."""
2752
    get_navi_link = get_a_rel_next
2753
    get_first_comic_link = simulate_first_link
2754
    first_url = NotImplemented
2755
2756
    @classmethod
2757
    def get_comic_info(cls, soup, link):
2758
        """Get information about a particular comics."""
2759
        desc = soup.find('meta', property='og:description')['content']
2760
        title = soup.find('meta', property='og:title')['content']
2761
        imgs = soup.find('div', class_='entry-content').find_all('img')
2762
        title2 = ' '.join(i.get('title', '') for i in imgs)
2763
        return {
2764
            'title': title,
2765
            'title2': title2,
2766
            'description': desc,
2767
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2768
        }
2769
2770
2771
class CommitStripFr(GenericCommitStrip):
2772
    """Class to retrieve Commit Strips in French."""
2773
    name = 'commit_fr'
2774
    long_name = 'Commit Strip (Fr)'
2775
    url = 'http://www.commitstrip.com/fr'
2776
    _categories = ('FRANCAIS', )
2777
    first_url = 'http://www.commitstrip.com/fr/2012/02/22/interview/'
2778
2779
2780
class CommitStripEn(GenericCommitStrip):
2781
    """Class to retrieve Commit Strips in English."""
2782
    name = 'commit_en'
2783
    long_name = 'Commit Strip (En)'
2784
    url = 'http://www.commitstrip.com/en'
2785
    first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'
2786
2787
2788 View Code Duplication
class GenericBoumerie(GenericNavigableComic):
2789
    """Generic class to retrieve Boumeries comics in different languages."""
2790
    get_first_comic_link = get_a_navi_navifirst
2791
    get_navi_link = get_link_rel_next
2792
    date_format = NotImplemented
2793
    lang = NotImplemented
2794
2795
    @classmethod
2796
    def get_comic_info(cls, soup, link):
2797
        """Get information about a particular comics."""
2798
        title = soup.find('h2', class_='post-title').string
2799
        short_url = soup.find('link', rel='shortlink')['href']
2800
        author = soup.find("span", class_="post-author").find("a").string
2801
        date_str = soup.find('span', class_='post-date').string
2802
        day = string_to_date(date_str, cls.date_format, cls.lang)
2803
        imgs = soup.find('div', id='comic').find_all('img')
2804
        assert all(i['alt'] == i['title'] for i in imgs)
2805
        return {
2806
            'short_url': short_url,
2807
            'img': [i['src'] for i in imgs],
2808
            'title': title,
2809
            'author': author,
2810
            'month': day.month,
2811
            'year': day.year,
2812
            'day': day.day,
2813
        }
2814
2815
2816
class BoumerieEn(GenericBoumerie):
2817
    """Class to retrieve Boumeries comics in English."""
2818
    name = 'boumeries_en'
2819
    long_name = 'Boumeries (En)'
2820
    url = 'http://comics.boumerie.com'
2821
    date_format = "%B %d, %Y"
2822
    lang = 'en_GB.UTF-8'
2823
2824
2825
class BoumerieFr(GenericBoumerie):
2826
    """Class to retrieve Boumeries comics in French."""
2827
    name = 'boumeries_fr'
2828
    long_name = 'Boumeries (Fr)'
2829
    url = 'http://bd.boumerie.com'
2830
    _categories = ('FRANCAIS', )
2831
    date_format = "%A, %d %B %Y"
2832
    lang = "fr_FR.utf8"
2833
2834
2835 View Code Duplication
class UnearthedComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2836
    """Class to retrieve Unearthed comics."""
2837
    # Also on http://tapastic.com/series/UnearthedComics
2838
    # Also on https://unearthedcomics.tumblr.com
2839
    name = 'unearthed'
2840
    long_name = 'Unearthed Comics'
2841
    url = 'http://unearthedcomics.com'
2842
    _categories = ('UNEARTHED', )
2843
    get_navi_link = get_link_rel_next
2844
    get_first_comic_link = simulate_first_link
2845
    first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'
2846
2847
    @classmethod
2848
    def get_comic_info(cls, soup, link):
2849
        """Get information about a particular comics."""
2850
        short_url = soup.find('link', rel='shortlink')['href']
2851
        title_elt = soup.find('h1') or soup.find('h2')
2852
        title = title_elt.string if title_elt else ""
2853
        desc = soup.find('meta', property='og:description')
2854
        date_str = soup.find('time', class_='published updated hidden')['datetime']
2855
        day = string_to_date(date_str, "%Y-%m-%d")
2856
        post = soup.find('div', class_="entry content entry-content type-portfolio")
2857
        imgs = post.find_all('img')
2858
        return {
2859
            'title': title,
2860
            'description': desc,
2861
            'url2': short_url,
2862
            'img': [i['src'] for i in imgs],
2863
            'month': day.month,
2864
            'year': day.year,
2865
            'day': day.day,
2866
        }
2867
2868
2869 View Code Duplication
class Optipess(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2870
    """Class to retrieve Optipess comics."""
2871
    name = 'optipess'
2872
    long_name = 'Optipess'
2873
    url = 'http://www.optipess.com'
2874
    get_first_comic_link = get_a_navi_navifirst
2875
    get_navi_link = get_link_rel_next
2876
2877
    @classmethod
2878
    def get_comic_info(cls, soup, link):
2879
        """Get information about a particular comics."""
2880
        title = soup.find('h2', class_='post-title').string
2881
        author = soup.find("span", class_="post-author").find("a").string
2882
        comic = soup.find('div', id='comic')
2883
        imgs = comic.find_all('img') if comic else []
2884
        alt = imgs[0]['title'] if imgs else ""
2885
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2886
        date_str = soup.find('span', class_='post-date').string
2887
        day = string_to_date(date_str, "%B %d, %Y")
2888
        return {
2889
            'title': title,
2890
            'alt': alt,
2891
            'author': author,
2892
            'img': [i['src'] for i in imgs],
2893
            'month': day.month,
2894
            'year': day.year,
2895
            'day': day.day,
2896
        }
2897
2898
2899
class PainTrainComic(GenericNavigableComic):
2900
    """Class to retrieve Pain Train Comics."""
2901
    name = 'paintrain'
2902
    long_name = 'Pain Train Comics'
2903
    url = 'http://paintraincomic.com'
2904
    get_first_comic_link = get_a_navi_navifirst
2905
    get_navi_link = get_link_rel_next
2906
2907
    @classmethod
2908
    def get_comic_info(cls, soup, link):
2909
        """Get information about a particular comics."""
2910
        title = soup.find('h2', class_='post-title').string
2911
        short_url = soup.find('link', rel='shortlink')['href']
2912
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2913
        num = int(short_url_re.match(short_url).groups()[0])
2914
        imgs = soup.find('div', id='comic').find_all('img')
2915
        alt = imgs[0]['title']
2916
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2917
        date_str = soup.find('span', class_='post-date').string
2918
        day = string_to_date(date_str, "%d/%m/%Y")
2919
        return {
2920
            'short_url': short_url,
2921
            'num': num,
2922
            'img': [i['src'] for i in imgs],
2923
            'month': day.month,
2924
            'year': day.year,
2925
            'day': day.day,
2926
            'alt': alt,
2927
            'title': title,
2928
        }
2929
2930
2931
class MoonBeard(GenericNavigableComic):
2932
    """Class to retrieve MoonBeard comics."""
2933
    # Also on http://blog.squiresjam.es
2934
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
2935
    name = 'moonbeard'
2936
    long_name = 'Moon Beard'
2937
    url = 'http://moonbeard.com'
2938
    get_first_comic_link = get_a_navi_navifirst
2939
    get_navi_link = get_a_navi_navinext
2940
2941
    @classmethod
2942
    def get_comic_info(cls, soup, link):
2943
        """Get information about a particular comics."""
2944
        title = soup.find('h2', class_='post-title').string
2945
        short_url = soup.find('link', rel='shortlink')['href']
2946
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2947
        num = int(short_url_re.match(short_url).groups()[0])
2948
        imgs = soup.find('div', id='comic').find_all('img')
2949
        alt = imgs[0]['title']
2950
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2951
        date_str = soup.find('span', class_='post-date').string
2952
        day = string_to_date(date_str, "%B %d, %Y")
2953
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2954
        author = soup.find('span', class_='post-author').string
2955
        return {
2956
            'short_url': short_url,
2957
            'num': num,
2958
            'img': [i['src'] for i in imgs],
2959
            'month': day.month,
2960
            'year': day.year,
2961
            'day': day.day,
2962
            'title': title,
2963
            'tags': tags,
2964
            'alt': alt,
2965
            'author': author,
2966
        }
2967
2968
2969
class AHammADay(GenericEmptyComic, GenericNavigableComic):
2970
    """Class to retrieve class A Hamm A Day comics."""
2971
    name = 'hamm'
2972
    long_name = 'A Hamm A Day'
2973
    url = 'http://www.ahammaday.com'
2974
    get_url_from_link = join_cls_url_to_href
2975
    get_first_comic_link = simulate_first_link
2976
    first_url = 'http://www.ahammaday.com/today/3/6/french'
2977
2978
    @classmethod
2979
    def get_navi_link(cls, last_soup, next_):
2980
        """Get link to next or previous comic."""
2981
        # prev is next / next is prev
2982
        return last_soup.find('li', class_='previous' if next_ else 'next').find('a')
2983
2984
    @classmethod
2985
    def get_comic_info(cls, soup, link):
2986
        """Get information about a particular comics."""
2987
        date_str = soup.find('time', class_='published')['datetime']
2988
        day = string_to_date(date_str, "%Y-%m-%d")
2989
        author = soup.find('span', class_='blog-author').find('a').string
2990
        title = soup.find('meta', property='og:title')['content']
2991
        imgs = soup.find_all('meta', itemprop='image')
2992
        return {
2993
            'img': [i['content'] for i in imgs],
2994
            'title': title,
2995
            'author': author,
2996
            'day': day.day,
2997
            'month': day.month,
2998
            'year': day.year,
2999 View Code Duplication
        }
3000
3001
3002
class LittleLifeLines(GenericNavigableComic):
3003
    """Class to retrieve Little Life Lines comics."""
3004
    # Also on https://little-life-lines.tumblr.com
3005
    name = 'life'
3006
    long_name = 'Little Life Lines'
3007
    url = 'http://www.littlelifelines.com'
3008
    get_url_from_link = join_cls_url_to_href
3009
    get_first_comic_link = simulate_first_link
3010
    first_url = 'http://www.littlelifelines.com/comics/well-done'
3011
3012
    @classmethod
3013
    def get_navi_link(cls, last_soup, next_):
3014
        """Get link to next or previous comic."""
3015
        # prev is next / next is prev
3016
        li = last_soup.find('li', class_='prev' if next_ else 'next')
3017
        return li.find('a') if li else None
3018
3019
    @classmethod
3020
    def get_comic_info(cls, soup, link):
3021
        """Get information about a particular comics."""
3022
        title = soup.find('meta', property='og:title')['content']
3023
        desc = soup.find('meta', property='og:description')['content']
3024
        date_str = soup.find('time', class_='published')['datetime']
3025
        day = string_to_date(date_str, "%Y-%m-%d")
3026
        author = soup.find('a', rel='author').string
3027
        div_content = soup.find('div', class_="body entry-content")
3028
        imgs = div_content.find_all('img')
3029
        imgs = [i for i in imgs if i.get('src') is not None]
3030
        alt = imgs[0]['alt']
3031
        return {
3032
            'title': title,
3033
            'alt': alt,
3034
            'description': desc,
3035
            'author': author,
3036
            'day': day.day,
3037
            'month': day.month,
3038
            'year': day.year,
3039
            'img': [i['src'] for i in imgs],
3040
        }
3041
3042
3043
class GenericWordPressInkblot(GenericNavigableComic):
3044
    """Generic class to retrieve comics using WordPress with Inkblot."""
3045
    get_navi_link = get_link_rel_next
3046
3047
    @classmethod
3048
    def get_first_comic_link(cls):
3049
        """Get link to first comics."""
3050
        return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')
3051
3052
    @classmethod
3053
    def get_comic_info(cls, soup, link):
3054
        """Get information about a particular comics."""
3055
        title = soup.find('meta', property='og:title')['content']
3056
        imgs = soup.find('div', class_='webcomic-image').find_all('img')
3057
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3058
        day = string_to_date(date_str, "%Y-%m-%d")
3059
        return {
3060
            'title': title,
3061
            'day': day.day,
3062
            'month': day.month,
3063
            'year': day.year,
3064
            'img': [i['src'] for i in imgs],
3065
        }
3066
3067
3068
class EverythingsStupid(GenericWordPressInkblot):
3069
    """Class to retrieve Everything's stupid Comics."""
3070
    # Also on http://tapastic.com/series/EverythingsStupid
3071
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
3072
    # Also on http://everythingsstupidcomics.tumblr.com
3073
    name = 'stupid'
3074
    long_name = "Everything's Stupid"
3075
    url = 'http://everythingsstupid.net'
3076
3077
3078
class TheIsmComics(GenericWordPressInkblot):
3079
    """Class to retrieve The Ism Comics."""
3080
    # Also on https://tapastic.com/series/TheIsm (?)
3081
    name = 'theism'
3082
    long_name = "The Ism"
3083
    url = 'http://www.theism-comics.com'
3084
3085
3086
class WoodenPlankStudios(GenericEmptyComic, GenericWordPressInkblot):
3087
    """Class to retrieve Wooden Plank Studios comics."""
3088
    name = 'woodenplank'
3089
    long_name = 'Wooden Plank Studios'
3090
    url = 'http://woodenplankstudios.com'
3091
3092
3093
class ElectricBunnyComic(GenericNavigableComic):
3094
    """Class to retrieve Electric Bunny Comics."""
3095
    # Also on http://electricbunnycomics.tumblr.com
3096
    name = 'bunny'
3097
    long_name = 'Electric Bunny Comic'
3098
    url = 'http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell'
3099
    get_url_from_link = join_cls_url_to_href
3100
3101
    @classmethod
3102
    def get_first_comic_link(cls):
3103
        """Get link to first comics."""
3104
        return get_soup_at_url(cls.url).find('img', alt='First').parent
3105
3106
    @classmethod
3107
    def get_navi_link(cls, last_soup, next_):
3108
        """Get link to next or previous comic."""
3109
        img = last_soup.find('img', alt='Next' if next_ else 'Back')
3110
        return img.parent if img else None
3111
3112
    @classmethod
3113
    def get_comic_info(cls, soup, link):
3114
        """Get information about a particular comics."""
3115
        title = soup.find('meta', property='og:title')['content']
3116
        imgs = soup.find_all('meta', property='og:image')
3117
        return {
3118
            'title': title,
3119
            'img': [i['content'] for i in imgs],
3120
        }
3121
3122
3123
class SheldonComics(GenericNavigableComic):
3124
    """Class to retrieve Sheldon comics."""
3125
    # Also on http://www.gocomics.com/sheldon
3126
    name = 'sheldon'
3127
    long_name = 'Sheldon Comics'
3128
    url = 'http://www.sheldoncomics.com'
3129
3130
    @classmethod
3131
    def get_first_comic_link(cls):
3132
        """Get link to first comics."""
3133
        return get_soup_at_url(cls.url).find("a", id="nav-first")
3134
3135
    @classmethod
3136
    def get_navi_link(cls, last_soup, next_):
3137
        """Get link to next or previous comic."""
3138
        for link in last_soup.find_all("a", id="nav-next" if next_ else "nav-prev"):
3139
            if link['href'] != 'http://www.sheldoncomics.com':
3140
                return link
3141
        return None
3142
3143
    @classmethod
3144
    def get_comic_info(cls, soup, link):
3145
        """Get information about a particular comics."""
3146
        imgs = soup.find("div", id="comic-foot").find_all("img")
3147
        assert all(i['alt'] == i['title'] for i in imgs)
3148
        assert len(imgs) == 1
3149
        title = imgs[0]['title']
3150
        return {
3151
            'title': title,
3152
            'img': [i['src'] for i in imgs],
3153
        }
3154
3155
3156 View Code Duplication
class Ubertool(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3157
    """Class to retrieve Ubertool comics."""
3158
    # Also on https://ubertool.tumblr.com
3159
    # Also on https://tapastic.com/series/ubertool
3160
    name = 'ubertool'
3161
    long_name = 'Ubertool'
3162
    url = 'http://ubertoolcomic.com'
3163
    _categories = ('UBERTOOL', )
3164
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
3165
    get_navi_link = get_a_comicnavbase_comicnavnext
3166
3167
    @classmethod
3168
    def get_comic_info(cls, soup, link):
3169
        """Get information about a particular comics."""
3170
        title = soup.find('h2', class_='post-title').string
3171
        date_str = soup.find('span', class_='post-date').string
3172
        day = string_to_date(date_str, "%B %d, %Y")
3173
        imgs = soup.find('div', id='comic').find_all('img')
3174
        return {
3175
            'img': [i['src'] for i in imgs],
3176
            'title': title,
3177
            'month': day.month,
3178
            'year': day.year,
3179
            'day': day.day,
3180
        }
3181
3182
3183
class EarthExplodes(GenericNavigableComic):
3184
    """Class to retrieve The Earth Explodes comics."""
3185
    name = 'earthexplodes'
3186
    long_name = 'The Earth Explodes'
3187
    url = 'http://www.earthexplodes.com'
3188
    get_url_from_link = join_cls_url_to_href
3189
    get_first_comic_link = simulate_first_link
3190
    first_url = 'http://www.earthexplodes.com/comics/000/'
3191
3192
    @classmethod
3193
    def get_navi_link(cls, last_soup, next_):
3194
        """Get link to next or previous comic."""
3195
        return last_soup.find('a', id='next' if next_ else 'prev')
3196
3197
    @classmethod
3198
    def get_comic_info(cls, soup, link):
3199
        """Get information about a particular comics."""
3200
        title = soup.find('title').string
3201
        imgs = soup.find('div', id='image').find_all('img')
3202
        alt = imgs[0].get('title', '')
3203
        return {
3204
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3205
            'title': title,
3206
            'alt': alt,
3207
        }
3208
3209
3210
class PomComics(GenericNavigableComic):
3211
    """Class to retrieve PomComics."""
3212
    name = 'pom'
3213
    long_name = 'Pom Comics / Piece of Me'
3214
    url = 'http://www.pomcomic.com'
3215
    get_url_from_link = join_cls_url_to_href
3216
3217
    @classmethod
3218
    def get_first_comic_link(cls):
3219
        """Get link to first comics."""
3220
        return get_soup_at_url(cls.url).find('a', class_='btn_first')
3221
3222
    @classmethod
3223
    def get_navi_link(cls, last_soup, next_):
3224
        """Get link to next or previous comic."""
3225
        return last_soup.find('a', class_='btn_next' if next_ else 'btn_prev')
3226
3227
    @classmethod
3228
    def get_comic_info(cls, soup, link):
3229
        """Get information about a particular comics."""
3230
        title = soup.find('h1', id="comic-name").string
3231
        desc = soup.find('meta', property='og:description')['content']
3232
        tags = soup.find('meta', attrs={'name': 'keywords'})['content']
3233
        imgs = soup.find('div', class_='comic').find_all('img')
3234
        return {
3235
            'title': title,
3236
            'desc': desc,
3237
            'tags': tags,
3238
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3239
        }
3240
3241
3242
class CubeDrone(GenericEmptyComic, GenericNavigableComic):
3243
    """Class to retrieve Cube Drone comics."""
3244
    name = 'cubedrone'
3245
    long_name = 'Cube Drone'
3246
    url = 'http://cube-drone.com/comics'
3247
    get_url_from_link = join_cls_url_to_href
3248
3249
    @classmethod
3250
    def get_first_comic_link(cls):
3251
        """Get link to first comics."""
3252
        return get_soup_at_url(cls.url).find('span', class_='glyphicon glyphicon-backward').parent
3253
3254
    @classmethod
3255
    def get_navi_link(cls, last_soup, next_):
3256
        """Get link to next or previous comic."""
3257
        class_ = 'glyphicon glyphicon-chevron-' + ('right' if next_ else 'left')
3258
        return last_soup.find('span', class_=class_).parent
3259
3260
    @classmethod
3261
    def get_comic_info(cls, soup, link):
3262
        """Get information about a particular comics."""
3263
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
3264
        url2 = soup.find('meta', attrs={'name': 'twitter:url'})['content']
3265
        # date_str = soup.find('h2', class_='comic_title').find('small').string
3266
        # day = string_to_date(date_str, "%B %d, %Y, %I:%M %p")
3267
        imgs = soup.find_all('img', class_='comic img-responsive')
3268
        title2 = imgs[0]['title']
3269
        alt = imgs[0]['alt']
3270
        return {
3271
            'url2': url2,
3272
            'title': title,
3273
            'title2': title2,
3274
            'alt': alt,
3275
            'img': [i['src'] for i in imgs],
3276
        }
3277
3278
3279
class MakeItStoopid(GenericNavigableComic):
3280
    """Class to retrieve Make It Stoopid Comics."""
3281
    name = 'stoopid'
3282
    long_name = 'Make it stoopid'
3283
    url = 'http://makeitstoopid.com/comic.php'
3284
3285
    @classmethod
3286
    def get_nav(cls, soup):
3287
        """Get the navigation elements from soup object."""
3288
        cnav = soup.find_all(class_='cnav')
3289
        nav1, nav2 = cnav[:5], cnav[5:]
3290
        assert nav1 == nav2
3291
        # begin, prev, archive, next_, end = nav1
3292
        return [None if i.get('href') is None else i for i in nav1]
3293
3294
    @classmethod
3295
    def get_first_comic_link(cls):
3296
        """Get link to first comics."""
3297
        return cls.get_nav(get_soup_at_url(cls.url))[0]
3298
3299
    @classmethod
3300
    def get_navi_link(cls, last_soup, next_):
3301
        """Get link to next or previous comic."""
3302
        return cls.get_nav(last_soup)[3 if next_ else 1]
3303
3304
    @classmethod
3305
    def get_comic_info(cls, soup, link):
3306
        """Get information about a particular comics."""
3307
        title = link['title']
3308
        imgs = soup.find_all('img', id='comicimg')
3309
        return {
3310
            'title': title,
3311
            'img': [i['src'] for i in imgs],
3312 View Code Duplication
        }
3313
3314
3315
class MarketoonistComics(GenericNavigableComic):
3316
    """Class to retrieve Marketoonist Comics."""
3317
    name = 'marketoonist'
3318
    long_name = 'Marketoonist'
3319
    url = 'https://marketoonist.com/cartoons'
3320
    get_first_comic_link = simulate_first_link
3321
    get_navi_link = get_link_rel_next
3322
    first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'
3323
3324
    @classmethod
3325
    def get_comic_info(cls, soup, link):
3326
        """Get information about a particular comics."""
3327
        imgs = soup.find_all('meta', property='og:image')
3328
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3329
        day = string_to_date(date_str, "%Y-%m-%d")
3330
        title = soup.find('meta', property='og:title')['content']
3331
        return {
3332
            'img': [i['content'] for i in imgs],
3333
            'day': day.day,
3334
            'month': day.month,
3335
            'year': day.year,
3336
            'title': title,
3337
        }
3338
3339
3340
class ConsoliaComics(GenericNavigableComic):
3341
    """Class to retrieve Consolia comics."""
3342
    name = 'consolia'
3343
    long_name = 'consolia'
3344
    url = 'https://consolia-comic.com'
3345
    get_url_from_link = join_cls_url_to_href
3346
3347
    @classmethod
3348
    def get_first_comic_link(cls):
3349
        """Get link to first comics."""
3350
        return get_soup_at_url(cls.url).find('a', class_='first')
3351
3352
    @classmethod
3353
    def get_navi_link(cls, last_soup, next_):
3354
        """Get link to next or previous comic."""
3355
        return last_soup.find('a', class_='next' if next_ else 'prev')
3356
3357
    @classmethod
3358
    def get_comic_info(cls, soup, link):
3359
        """Get information about a particular comics."""
3360
        title = soup.find('meta', property='og:title')['content']
3361
        date_str = soup.find('time')["datetime"]
3362
        day = string_to_date(date_str, "%Y-%m-%d")
3363
        imgs = soup.find_all('meta', property='og:image')
3364
        return {
3365
            'title': title,
3366
            'img': [i['content'] for i in imgs],
3367
            'day': day.day,
3368
            'month': day.month,
3369
            'year': day.year,
3370
        }
3371
3372
3373
class TuMourrasMoinsBete(GenericNavigableComic):
3374
    """Class to retrieve Tu Mourras Moins Bete comics."""
3375
    name = 'mourrasmoinsbete'
3376
    long_name = 'Tu Mourras Moins Bete'
3377
    url = 'http://tumourrasmoinsbete.blogspot.fr'
3378
    _categories = ('FRANCAIS', )
3379
    get_first_comic_link = simulate_first_link
3380
    first_url = 'http://tumourrasmoinsbete.blogspot.fr/2008/06/essai.html'
3381
3382
    @classmethod
3383
    def get_navi_link(cls, last_soup, next_):
3384
        """Get link to next or previous comic."""
3385
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3386
3387
    @classmethod
3388
    def get_comic_info(cls, soup, link):
3389
        """Get information about a particular comics."""
3390
        title = soup.find('title').string
3391
        imgs = soup.find('div', itemprop='description articleBody').find_all('img')
3392
        author = soup.find('span', itemprop='author').string
3393
        return {
3394
            'img': [i['src'] for i in imgs],
3395
            'author': author,
3396
            'title': title,
3397
        }
3398
3399
3400
class GeekAndPoke(GenericNavigableComic):
3401
    """Class to retrieve Geek And Poke comics."""
3402
    name = 'geek'
3403
    long_name = 'Geek And Poke'
3404
    url = 'http://geek-and-poke.com'
3405
    get_url_from_link = join_cls_url_to_href
3406
    get_first_comic_link = simulate_first_link
3407
    first_url = 'http://geek-and-poke.com/geekandpoke/2006/8/27/a-new-place-for-a-not-so-old-blog.html'
3408
3409
    @classmethod
3410
    def get_navi_link(cls, last_soup, next_):
3411
        """Get link to next or previous comic."""
3412
        return last_soup.find('a', class_='prev-item' if next_ else 'next-item')
3413
3414
    @classmethod
3415
    def get_comic_info(cls, soup, link):
3416
        """Get information about a particular comics."""
3417
        title = soup.find('meta', property='og:title')['content']
3418
        desc = soup.find('meta', property='og:description')['content']
3419
        date_str = soup.find('time', class_='published')['datetime']
3420
        day = string_to_date(date_str, "%Y-%m-%d")
3421
        author = soup.find('a', rel='author').string
3422
        div_content = (soup.find('div', class_="body entry-content") or
3423
                       soup.find('div', class_="special-content"))
3424
        imgs = div_content.find_all('img')
3425
        imgs = [i for i in imgs if i.get('src') is not None]
3426
        assert all('title' not in i or i['alt'] == i['title'] for i in imgs)
3427
        alt = imgs[0].get('alt', "") if imgs else []
3428
        return {
3429
            'title': title,
3430
            'alt': alt,
3431
            'description': desc,
3432
            'author': author,
3433
            'day': day.day,
3434
            'month': day.month,
3435
            'year': day.year,
3436
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3437
        }
3438
3439
3440
class GloryOwlComix(GenericNavigableComic):
3441
    """Class to retrieve Glory Owl comics."""
3442
    name = 'gloryowl'
3443
    long_name = 'Glory Owl'
3444
    url = 'http://gloryowlcomix.blogspot.fr'
3445
    _categories = ('NSFW', 'FRANCAIS')
3446
    get_first_comic_link = simulate_first_link
3447
    first_url = 'http://gloryowlcomix.blogspot.fr/2013/02/1_7.html'
3448
3449
    @classmethod
3450
    def get_navi_link(cls, last_soup, next_):
3451
        """Get link to next or previous comic."""
3452
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3453
3454
    @classmethod
3455
    def get_comic_info(cls, soup, link):
3456
        """Get information about a particular comics."""
3457
        title = soup.find('title').string
3458
        imgs = soup.find_all('link', rel='image_src')
3459
        author = soup.find('a', rel='author').string
3460
        return {
3461
            'img': [i['href'] for i in imgs],
3462
            'author': author,
3463
            'title': title,
3464
        }
3465
3466
3467
class GenericTumblrV1(GenericComic):
3468
    """Generic class to retrieve comics from Tumblr using the V1 API."""
3469
    _categories = ('TUMBLR', )
3470
3471
    @classmethod
3472
    def get_next_comic(cls, last_comic):
3473
        """Generic implementation of get_next_comic for Tumblr comics."""
3474
        for p in cls.get_posts(last_comic):
3475
            comic = cls.get_comic_info(p)
3476
            if comic is not None:
3477
                yield comic
3478
3479
    @classmethod
3480
    def get_url_from_post(cls, post):
3481
        url = post['url']
3482
        if not url.startswith(cls.url):
3483
            print("url '%s' does not start with '%s'" % (url, cls.url))
3484
        return url
3485
3486
    @classmethod
3487
    def get_api_url(cls):
3488
        return urljoin_wrapper(cls.url, '/api/read/')
3489
3490
    @classmethod
3491
    def get_api_url_for_id(cls, tumblr_id):
3492
        return cls.get_api_url() + '?id=%d' % (tumblr_id)
3493
3494
    @classmethod
3495
    def get_comic_info(cls, post):
3496
        """Get information about a particular comics."""
3497
        type_ = post['type']
3498
        if type_ != 'photo':
3499
            return None
3500
        tumblr_id = int(post['id'])
3501
        api_url = cls.get_api_url_for_id(tumblr_id)
3502
        day = datetime.datetime.fromtimestamp(int(post['unix-timestamp'])).date()
3503
        caption = post.find('photo-caption')
3504
        title = caption.string if caption else ""
3505
        tags = ' '.join(t.string for t in post.find_all('tag'))
3506
        # Photos may appear in 'photo' tags and/or straight in the post
3507
        photo_tags = post.find_all('photo')
3508
        if not photo_tags:
3509
            photo_tags = [post]
3510
        # Images are in multiple resolutions - taking the first one
3511
        imgs = [photo.find('photo-url') for photo in photo_tags]
3512
        return {
3513
            'url': cls.get_url_from_post(post),
3514
            'url2': post['url-with-slug'],
3515
            'day': day.day,
3516
            'month': day.month,
3517
            'year': day.year,
3518
            'title': title,
3519
            'tags': tags,
3520
            'img': [i.string for i in imgs],
3521
            'tumblr-id': tumblr_id,
3522
            'api_url': api_url,
3523
        }
3524
3525
    @classmethod
3526
    def get_posts(cls, last_comic, nb_post_per_call=10):
3527
        """Get posts using API. nb_post_per_call is max 50.
3528
3529
        Posts are retrieved from newer to older as per the tumblr v1 api
3530
        but are returned in chronological order."""
3531
        waiting_for_id = last_comic['tumblr-id'] if last_comic else None
3532
        posts_acc = []
3533
        if last_comic is not None:
3534
            # Sometimes, tumblr posts are deleted. When previous post is deleted, we
3535
            # might end up spending a lot of time looking for something that
3536
            # doesn't exist. Failing early and clearly might be a better option.
3537
            last_api_url = cls.get_api_url_for_id(waiting_for_id)
3538
            try:
3539
                get_soup_at_url(last_api_url)
3540
            except urllib.error.HTTPError:
3541
                try:
3542
                    get_soup_at_url(cls.url)
3543
                except urllib.error.HTTPError:
3544
                    print("Did not find previous post nor main url %s" % cls.url)
3545
                else:
3546
                    print("Did not find previous post %s : it might have been deleted" % last_api_url)
3547
                return reversed(posts_acc)
3548
        api_url = cls.get_api_url()
3549
        posts = get_soup_at_url(api_url).find('posts')
3550
        start, total = int(posts['start']), int(posts['total'])
3551
        assert start == 0
3552
        for starting_num in range(0, total, nb_post_per_call):
3553
            api_url2 = api_url + '?start=%d&num=%d' % (starting_num, nb_post_per_call)
3554
            posts2 = get_soup_at_url(api_url2).find('posts')
3555
            start2, total2 = int(posts2['start']), int(posts2['total'])
3556
            assert starting_num == start2, "%d != %d" % (starting_num, start2)
3557
            # This may happen and should be handled in the future
3558
            assert total == total2, "%d != %d" % (total, total2)
3559
            for p in posts2.find_all('post'):
3560
                tumblr_id = int(p['id'])
3561
                if waiting_for_id and waiting_for_id == tumblr_id:
3562
                    return reversed(posts_acc)
3563
                posts_acc.append(p)
3564
        if waiting_for_id is None:
3565
            return reversed(posts_acc)
3566
        print("Did not find %s : there might be a problem" % waiting_for_id)
3567
        return []
3568
3569
3570
class SaturdayMorningBreakfastCerealTumblr(GenericTumblrV1):
3571
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
3572
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
3573
    # Also on http://www.smbc-comics.com
3574
    name = 'smbc-tumblr'
3575
    long_name = 'Saturday Morning Breakfast Cereal (from Tumblr)'
3576
    url = 'http://smbc-comics.tumblr.com'
3577
    _categories = ('SMBC', )
3578
3579
3580
class IrwinCardozo(GenericTumblrV1):
3581
    """Class to retrieve Irwin Cardozo Comics."""
3582
    name = 'irwinc'
3583
    long_name = 'Irwin Cardozo'
3584
    url = 'http://irwincardozocomics.tumblr.com'
3585
3586
3587
class AccordingToDevin(GenericTumblrV1):
3588
    """Class to retrieve According To Devin comics."""
3589
    name = 'devin'
3590
    long_name = 'According To Devin'
3591
    url = 'http://accordingtodevin.tumblr.com'
3592
3593
3594
class ItsTheTieTumblr(GenericTumblrV1):
3595
    """Class to retrieve It's the tie comics."""
3596
    # Also on http://itsthetie.com
3597
    # Also on https://tapastic.com/series/itsthetie
3598
    name = 'tie-tumblr'
3599
    long_name = "It's the tie (from Tumblr)"
3600
    url = "http://itsthetie.tumblr.com"
3601
    _categories = ('TIE', )
3602
3603
3604
class OctopunsTumblr(GenericTumblrV1):
3605
    """Class to retrieve Octopuns comics."""
3606
    # Also on http://www.octopuns.net
3607
    name = 'octopuns-tumblr'
3608
    long_name = 'Octopuns (from Tumblr)'
3609
    url = 'http://octopuns.tumblr.com'
3610
3611
3612
class PicturesInBoxesTumblr(GenericTumblrV1):
3613
    """Class to retrieve Pictures In Boxes comics."""
3614
    # Also on http://www.picturesinboxes.com
3615
    name = 'picturesinboxes-tumblr'
3616
    long_name = 'Pictures in Boxes (from Tumblr)'
3617
    url = 'https://picturesinboxescomic.tumblr.com'
3618
3619
3620
class TubeyToonsTumblr(GenericTumblrV1):
3621
    """Class to retrieve TubeyToons comics."""
3622
    # Also on http://tapastic.com/series/Tubey-Toons
3623
    # Also on http://tubeytoons.com
3624
    name = 'tubeytoons-tumblr'
3625
    long_name = 'Tubey Toons (from Tumblr)'
3626
    url = 'https://tubeytoons.tumblr.com'
3627
    _categories = ('TUNEYTOONS', )
3628
3629
3630
class UnearthedComicsTumblr(GenericTumblrV1):
3631
    """Class to retrieve Unearthed comics."""
3632
    # Also on http://tapastic.com/series/UnearthedComics
3633
    # Also on http://unearthedcomics.com
3634
    name = 'unearthed-tumblr'
3635
    long_name = 'Unearthed Comics (from Tumblr)'
3636
    url = 'https://unearthedcomics.tumblr.com'
3637
    _categories = ('UNEARTHED', )
3638
3639
3640
class PieComic(GenericTumblrV1):
3641
    """Class to retrieve Pie Comic comics."""
3642
    name = 'pie'
3643
    long_name = 'Pie Comic'
3644
    url = "http://piecomic.tumblr.com"
3645
3646
3647
class MrEthanDiamond(GenericTumblrV1):
3648
    """Class to retrieve Mr Ethan Diamond comics."""
3649
    name = 'diamond'
3650
    long_name = 'Mr Ethan Diamond'
3651
    url = 'http://mrethandiamond.tumblr.com'
3652
3653
3654
class Flocci(GenericTumblrV1):
3655
    """Class to retrieve floccinaucinihilipilification comics."""
3656
    name = 'flocci'
3657
    long_name = 'floccinaucinihilipilification'
3658
    url = "http://floccinaucinihilipilificationa.tumblr.com"
3659
3660
3661
class UpAndOut(GenericTumblrV1):
3662
    """Class to retrieve Up & Out comics."""
3663
    # Also on http://tapastic.com/series/UP-and-OUT
3664
    name = 'upandout'
3665
    long_name = 'Up And Out (from Tumblr)'
3666
    url = 'http://upandoutcomic.tumblr.com'
3667
3668
3669
class Pundemonium(GenericTumblrV1):
3670
    """Class to retrieve Pundemonium comics."""
3671
    name = 'pundemonium'
3672
    long_name = 'Pundemonium'
3673
    url = 'http://monstika.tumblr.com'
3674
3675
3676
class PoorlyDrawnLinesTumblr(GenericTumblrV1):
3677
    """Class to retrieve Poorly Drawn Lines comics."""
3678
    # Also on http://poorlydrawnlines.com
3679
    name = 'poorlydrawn-tumblr'
3680
    long_name = 'Poorly Drawn Lines (from Tumblr)'
3681
    url = 'http://pdlcomics.tumblr.com'
3682
    _categories = ('POORLYDRAWN', )
3683
3684
3685
class PearShapedComics(GenericTumblrV1):
3686
    """Class to retrieve Pear Shaped Comics."""
3687
    name = 'pearshaped'
3688
    long_name = 'Pear-Shaped Comics'
3689
    url = 'http://pearshapedcomics.com'
3690
3691
3692
class PondScumComics(GenericTumblrV1):
3693
    """Class to retrieve Pond Scum Comics."""
3694
    name = 'pond'
3695
    long_name = 'Pond Scum'
3696
    url = 'http://pondscumcomic.tumblr.com'
3697
3698
3699
class MercworksTumblr(GenericTumblrV1):
3700
    """Class to retrieve Mercworks comics."""
3701
    # Also on http://mercworks.net
3702
    name = 'mercworks-tumblr'
3703
    long_name = 'Mercworks (from Tumblr)'
3704
    url = 'http://mercworks.tumblr.com'
3705
3706
3707
class OwlTurdTumblr(GenericTumblrV1):
3708
    """Class to retrieve Owl Turd comics."""
3709
    # Also on http://tapastic.com/series/Owl-Turd-Comix
3710
    name = 'owlturd-tumblr'
3711
    long_name = 'Owl Turd (from Tumblr)'
3712
    url = 'http://owlturdcomix.tumblr.com'
3713
    _categories = ('OWLTURD', )
3714
3715
3716
class VectorBelly(GenericTumblrV1):
3717
    """Class to retrieve Vector Belly comics."""
3718
    # Also on http://vectorbelly.com
3719
    name = 'vector'
3720
    long_name = 'Vector Belly'
3721
    url = 'http://vectorbelly.tumblr.com'
3722
3723
3724
class GoneIntoRapture(GenericTumblrV1):
3725
    """Class to retrieve Gone Into Rapture comics."""
3726
    # Also on http://goneintorapture.tumblr.com
3727
    # Also on http://tapastic.com/series/Goneintorapture
3728
    name = 'rapture'
3729
    long_name = 'Gone Into Rapture'
3730
    url = 'http://goneintorapture.com'
3731
3732
3733
class TheOatmealTumblr(GenericTumblrV1):
3734
    """Class to retrieve The Oatmeal comics."""
3735
    # Also on http://theoatmeal.com
3736
    name = 'oatmeal-tumblr'
3737
    long_name = 'The Oatmeal (from Tumblr)'
3738
    url = 'http://oatmeal.tumblr.com'
3739
3740
3741
class HeckIfIKnowComicsTumblr(GenericTumblrV1):
3742
    """Class to retrieve Heck If I Know Comics."""
3743
    # Also on http://tapastic.com/series/Regular
3744
    name = 'heck-tumblr'
3745
    long_name = 'Heck if I Know comics (from Tumblr)'
3746
    url = 'http://heckifiknowcomics.com'
3747
3748
3749
class MyJetPack(GenericTumblrV1):
3750
    """Class to retrieve My Jet Pack comics."""
3751
    name = 'jetpack'
3752
    long_name = 'My Jet Pack'
3753
    url = 'http://myjetpack.tumblr.com'
3754
3755
3756
class CheerUpEmoKidTumblr(GenericTumblrV1):
3757
    """Class to retrieve CheerUpEmoKid comics."""
3758
    # Also on http://www.cheerupemokid.com
3759
    # Also on http://tapastic.com/series/CUEK
3760
    name = 'cuek-tumblr'
3761
    long_name = 'Cheer Up Emo Kid (from Tumblr)'
3762
    url = 'https://enzocomics.tumblr.com'
3763
3764
3765
class ForLackOfABetterComic(GenericTumblrV1):
3766
    """Class to retrieve For Lack Of A Better Comics."""
3767
    # Also on http://forlackofabettercomic.com
3768
    name = 'lack'
3769
    long_name = 'For Lack Of A Better Comic'
3770
    url = 'http://forlackofabettercomic.tumblr.com'
3771
3772
3773
class ZenPencilsTumblr(GenericTumblrV1):
3774
    """Class to retrieve ZenPencils comics."""
3775
    # Also on http://zenpencils.com
3776
    # Also on http://www.gocomics.com/zen-pencils
3777
    name = 'zenpencils-tumblr'
3778
    long_name = 'Zen Pencils (from Tumblr)'
3779
    url = 'http://zenpencils.tumblr.com'
3780
    _categories = ('ZENPENCILS', )
3781
3782
3783
class ThreeWordPhraseTumblr(GenericTumblrV1):
3784
    """Class to retrieve Three Word Phrase comics."""
3785
    # Also on http://threewordphrase.com
3786
    name = 'threeword-tumblr'
3787
    long_name = 'Three Word Phrase (from Tumblr)'
3788
    url = 'http://threewordphrase.tumblr.com'
3789
3790
3791
class TimeTrabbleTumblr(GenericTumblrV1):
3792
    """Class to retrieve Time Trabble comics."""
3793
    # Also on http://timetrabble.com
3794
    name = 'timetrabble-tumblr'
3795
    long_name = 'Time Trabble (from Tumblr)'
3796
    url = 'http://timetrabble.tumblr.com'
3797
3798
3799
class SafelyEndangeredTumblr(GenericTumblrV1):
3800
    """Class to retrieve Safely Endangered comics."""
3801
    # Also on http://www.safelyendangered.com
3802
    name = 'endangered-tumblr'
3803
    long_name = 'Safely Endangered (from Tumblr)'
3804
    url = 'http://tumblr.safelyendangered.com'
3805
3806
3807
class MouseBearComedyTumblr(GenericTumblrV1):
3808
    """Class to retrieve Mouse Bear Comedy comics."""
3809
    # Also on http://www.mousebearcomedy.com
3810
    name = 'mousebear-tumblr'
3811
    long_name = 'Mouse Bear Comedy (from Tumblr)'
3812
    url = 'http://mousebearcomedy.tumblr.com'
3813
3814
3815
class BouletCorpTumblr(GenericTumblrV1):
3816
    """Class to retrieve BouletCorp comics."""
3817
    # Also on http://www.bouletcorp.com
3818
    name = 'boulet-tumblr'
3819
    long_name = 'Boulet Corp (from Tumblr)'
3820
    url = 'https://bouletcorp.tumblr.com'
3821
    _categories = ('BOULET', )
3822
3823
3824
class TheAwkwardYetiTumblr(GenericTumblrV1):
3825
    """Class to retrieve The Awkward Yeti comics."""
3826
    # Also on http://www.gocomics.com/the-awkward-yeti
3827
    # Also on http://theawkwardyeti.com
3828
    # Also on https://tapastic.com/series/TheAwkwardYeti
3829
    name = 'yeti-tumblr'
3830
    long_name = 'The Awkward Yeti (from Tumblr)'
3831
    url = 'http://larstheyeti.tumblr.com'
3832
    _categories = ('YETI', )
3833
3834
3835
class NellucNhoj(GenericTumblrV1):
3836
    """Class to retrieve NellucNhoj comics."""
3837
    name = 'nhoj'
3838
    long_name = 'Nelluc Nhoj'
3839
    url = 'http://nellucnhoj.com'
3840
3841
3842
class DownTheUpwardSpiralTumblr(GenericTumblrV1):
3843
    """Class to retrieve Down The Upward Spiral comics."""
3844
    # Also on http://www.downtheupwardspiral.com
3845
    # Also on https://tapastic.com/series/Down-the-Upward-Spiral
3846
    name = 'spiral-tumblr'
3847
    long_name = 'Down the Upward Spiral (from Tumblr)'
3848
    url = 'http://downtheupwardspiral.tumblr.com'
3849
3850
3851
class AsPerUsualTumblr(GenericTumblrV1):
3852
    """Class to retrieve As Per Usual comics."""
3853
    # Also on https://tapastic.com/series/AsPerUsual
3854
    name = 'usual-tumblr'
3855
    long_name = 'As Per Usual (from Tumblr)'
3856
    url = 'http://as-per-usual.tumblr.com'
3857
    categories = ('DAMILEE', )
3858
3859
3860
class HotComicsForCoolPeopleTumblr(GenericTumblrV1):
3861
    """Class to retrieve Hot Comics For Cool People."""
3862
    # Also on https://tapastic.com/series/Hot-Comics-For-Cool-People
3863
    # Also on http://hotcomics.biz (links to tumblr)
3864
    # Also on http://hcfcp.com (links to tumblr)
3865
    name = 'hotcomics-tumblr'
3866
    long_name = 'Hot Comics For Cool People (from Tumblr)'
3867
    url = 'http://hotcomicsforcoolpeople.tumblr.com'
3868
    categories = ('DAMILEE', )
3869
3870
3871
class OneOneOneOneComicTumblr(GenericTumblrV1):
3872
    """Class to retrieve 1111 Comics."""
3873
    # Also on http://www.1111comics.me
3874
    # Also on https://tapastic.com/series/1111-Comics
3875
    name = '1111-tumblr'
3876
    long_name = '1111 Comics (from Tumblr)'
3877
    url = 'http://comics1111.tumblr.com'
3878
    _categories = ('ONEONEONEONE', )
3879
3880
3881
class JhallComicsTumblr(GenericTumblrV1):
3882
    """Class to retrieve Jhall Comics."""
3883
    # Also on http://jhallcomics.com
3884
    name = 'jhall-tumblr'
3885
    long_name = 'Jhall Comics (from Tumblr)'
3886
    url = 'http://jhallcomics.tumblr.com'
3887
3888
3889
class BerkeleyMewsTumblr(GenericTumblrV1):
3890
    """Class to retrieve Berkeley Mews comics."""
3891
    # Also on http://www.gocomics.com/berkeley-mews
3892
    # Also on http://www.berkeleymews.com
3893
    name = 'berkeley-tumblr'
3894
    long_name = 'Berkeley Mews (from Tumblr)'
3895
    url = 'http://mews.tumblr.com'
3896
    _categories = ('BERKELEY', )
3897
3898
3899
class JoanCornellaTumblr(GenericTumblrV1):
3900
    """Class to retrieve Joan Cornella comics."""
3901
    # Also on http://joancornella.net
3902
    name = 'cornella-tumblr'
3903
    long_name = 'Joan Cornella (from Tumblr)'
3904
    url = 'http://cornellajoan.tumblr.com'
3905
3906
3907
class RespawnComicTumblr(GenericTumblrV1):
3908
    """Class to retrieve Respawn Comic."""
3909
    # Also on http://respawncomic.com
3910
    name = 'respawn-tumblr'
3911
    long_name = 'Respawn Comic (from Tumblr)'
3912
    url = 'https://respawncomic.tumblr.com'
3913
3914
3915
class ChrisHallbeckTumblr(GenericTumblrV1):
3916
    """Class to retrieve Chris Hallbeck comics."""
3917
    # Also on https://tapastic.com/ChrisHallbeck
3918
    # Also on http://maximumble.com
3919
    # Also on http://minimumble.com
3920
    # Also on http://thebookofbiff.com
3921
    name = 'hallbeck-tumblr'
3922
    long_name = 'Chris Hallback (from Tumblr)'
3923
    url = 'https://chrishallbeck.tumblr.com'
3924
    _categories = ('HALLBACK', )
3925
3926
3927
class ComicNuggets(GenericTumblrV1):
3928
    """Class to retrieve Comic Nuggets."""
3929
    name = 'nuggets'
3930
    long_name = 'Comic Nuggets'
3931
    url = 'http://comicnuggets.com'
3932
3933
3934
class PigeonGazetteTumblr(GenericTumblrV1):
3935
    """Class to retrieve The Pigeon Gazette comics."""
3936
    # Also on https://tapastic.com/series/The-Pigeon-Gazette
3937
    name = 'pigeon-tumblr'
3938
    long_name = 'The Pigeon Gazette (from Tumblr)'
3939
    url = 'http://thepigeongazette.tumblr.com'
3940
3941
3942
class CancerOwl(GenericTumblrV1):
3943
    """Class to retrieve Cancer Owl comics."""
3944
    # Also on http://cancerowl.com
3945
    name = 'cancerowl-tumblr'
3946
    long_name = 'Cancer Owl (from Tumblr)'
3947
    url = 'http://cancerowl.tumblr.com'
3948
3949
3950
class FowlLanguageTumblr(GenericTumblrV1):
3951
    """Class to retrieve Fowl Language comics."""
3952
    # Also on http://www.fowllanguagecomics.com
3953
    # Also on http://tapastic.com/series/Fowl-Language-Comics
3954
    # Also on http://www.gocomics.com/fowl-language
3955
    name = 'fowllanguage-tumblr'
3956
    long_name = 'Fowl Language Comics (from Tumblr)'
3957
    url = 'http://fowllanguagecomics.tumblr.com'
3958
    _categories = ('FOWLLANGUAGE', )
3959
3960
3961
class TheOdd1sOutTumblr(GenericTumblrV1):
3962
    """Class to retrieve The Odd 1s Out comics."""
3963
    # Also on http://theodd1sout.com
3964
    # Also on https://tapastic.com/series/Theodd1sout
3965
    name = 'theodd-tumblr'
3966
    long_name = 'The Odd 1s Out (from Tumblr)'
3967
    url = 'http://theodd1sout.tumblr.com'
3968
3969
3970
class TheUnderfoldTumblr(GenericTumblrV1):
3971
    """Class to retrieve The Underfold comics."""
3972
    # Also on http://theunderfold.com
3973
    name = 'underfold-tumblr'
3974
    long_name = 'The Underfold (from Tumblr)'
3975
    url = 'http://theunderfold.tumblr.com'
3976
3977
3978
class LolNeinTumblr(GenericTumblrV1):
3979
    """Class to retrieve Lol Nein comics."""
3980
    # Also on http://lolnein.com
3981
    name = 'lolnein-tumblr'
3982
    long_name = 'Lol Nein (from Tumblr)'
3983
    url = 'http://lolneincom.tumblr.com'
3984
3985
3986
class FatAwesomeComicsTumblr(GenericTumblrV1):
3987
    """Class to retrieve Fat Awesome Comics."""
3988
    # Also on http://fatawesome.com/comics
3989
    name = 'fatawesome-tumblr'
3990
    long_name = 'Fat Awesome (from Tumblr)'
3991
    url = 'http://fatawesomecomedy.tumblr.com'
3992
3993
3994
class TheWorldIsFlatTumblr(GenericTumblrV1):
3995
    """Class to retrieve The World Is Flat Comics."""
3996
    # Also on https://tapastic.com/series/The-World-is-Flat
3997
    name = 'flatworld-tumblr'
3998
    long_name = 'The World Is Flat (from Tumblr)'
3999
    url = 'http://theworldisflatcomics.com'
4000
4001
4002
class DorrisMc(GenericTumblrV1):
4003
    """Class to retrieve Dorris Mc Comics"""
4004
    # Also on http://www.gocomics.com/dorris-mccomics
4005
    name = 'dorrismc'
4006
    long_name = 'Dorris Mc'
4007
    url = 'http://dorrismccomics.com'
4008
4009
4010
class LeleozTumblr(GenericEmptyComic, GenericTumblrV1):
4011
    """Class to retrieve Leleoz comics."""
4012
    # Also on https://tapastic.com/series/Leleoz
4013
    name = 'leleoz-tumblr'
4014
    long_name = 'Leleoz (from Tumblr)'
4015
    url = 'http://leleozcomics.tumblr.com'
4016
4017
4018
class MoonBeardTumblr(GenericTumblrV1):
4019
    """Class to retrieve MoonBeard comics."""
4020
    # Also on http://moonbeard.com
4021
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
4022
    name = 'moonbeard-tumblr'
4023
    long_name = 'Moon Beard (from Tumblr)'
4024
    url = 'http://blog.squiresjam.es'
4025
4026
4027
class AComik(GenericTumblrV1):
4028
    """Class to retrieve A Comik"""
4029
    name = 'comik'
4030
    long_name = 'A Comik'
4031
    url = 'http://acomik.com'
4032
4033
4034
class ClassicRandy(GenericTumblrV1):
4035
    """Class to retrieve Classic Randy comics."""
4036
    name = 'randy'
4037
    long_name = 'Classic Randy'
4038
    url = 'http://classicrandy.tumblr.com'
4039
4040
4041
class DagssonTumblr(GenericTumblrV1):
4042
    """Class to retrieve Dagsson comics."""
4043
    # Also on http://www.dagsson.com
4044
    name = 'dagsson-tumblr'
4045
    long_name = 'Dagsson Hugleikur (from Tumblr)'
4046
    url = 'https://hugleikurdagsson.tumblr.com'
4047
4048
4049
class LinsEditionsTumblr(GenericTumblrV1):
4050
    """Class to retrieve L.I.N.S. Editions comics."""
4051
    # Also on https://linsedition.com
4052
    # Now on http://warandpeas.tumblr.com
4053
    name = 'lins-tumblr'
4054
    long_name = 'L.I.N.S. Editions (from Tumblr)'
4055
    url = 'https://linscomics.tumblr.com'
4056
    _categories = ('LINS', )
4057
4058
4059
class WarAndPeasTumblr(GenericTumblrV1):
4060
    """Class to retrieve War And Peas comics."""
4061
    # Was on https://linscomics.tumblr.com
4062
    name = 'warandpeas-tumblr'
4063
    long_name = 'War And Peas (from Tumblr)'
4064
    url = 'http://warandpeas.tumblr.com'
4065
    _categories = ('WARANDPEAS', )
4066
4067
4068
class OrigamiHotDish(GenericTumblrV1):
4069
    """Class to retrieve Origami Hot Dish comics."""
4070
    name = 'origamihotdish'
4071
    long_name = 'Origami Hot Dish'
4072
    url = 'http://origamihotdish.com'
4073
4074
4075
class HitAndMissComicsTumblr(GenericTumblrV1):
4076
    """Class to retrieve Hit and Miss Comics."""
4077
    name = 'hitandmiss'
4078
    long_name = 'Hit and Miss Comics'
4079
    url = 'https://hitandmisscomics.tumblr.com'
4080
4081
4082
class HMBlanc(GenericTumblrV1):
4083
    """Class to retrieve HM Blanc comics."""
4084
    name = 'hmblanc'
4085
    long_name = 'HM Blanc'
4086
    url = 'http://hmblanc.tumblr.com'
4087
4088
4089
class TalesOfAbsurdityTumblr(GenericTumblrV1):
4090
    """Class to retrieve Tales Of Absurdity comics."""
4091
    # Also on http://talesofabsurdity.com
4092
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
4093
    name = 'absurdity-tumblr'
4094
    long_name = 'Tales of Absurdity (from Tumblr)'
4095
    url = 'http://talesofabsurdity.tumblr.com'
4096
    _categories = ('ABSURDITY', )
4097
4098
4099
class RobbieAndBobby(GenericTumblrV1):
4100
    """Class to retrieve Robbie And Bobby comics."""
4101
    # Also on http://robbieandbobby.com
4102
    name = 'robbie-tumblr'
4103
    long_name = 'Robbie And Bobby (from Tumblr)'
4104
    url = 'http://robbieandbobby.tumblr.com'
4105
4106
4107
class ElectricBunnyComicTumblr(GenericTumblrV1):
4108
    """Class to retrieve Electric Bunny Comics."""
4109
    # Also on http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell
4110
    name = 'bunny-tumblr'
4111
    long_name = 'Electric Bunny Comic (from Tumblr)'
4112
    url = 'http://electricbunnycomics.tumblr.com'
4113
4114
4115
class Hoomph(GenericTumblrV1):
4116
    """Class to retrieve Hoomph comics."""
4117
    name = 'hoomph'
4118
    long_name = 'Hoomph'
4119
    url = 'http://hoom.ph'
4120
4121
4122
class BFGFSTumblr(GenericTumblrV1):
4123
    """Class to retrieve BFGFS comics."""
4124
    # Also on https://tapastic.com/series/BFGFS
4125
    # Also on http://bfgfs.com
4126
    name = 'bfgfs-tumblr'
4127
    long_name = 'BFGFS (from Tumblr)'
4128
    url = 'https://bfgfs.tumblr.com'
4129
4130
4131
class DoodleForFood(GenericTumblrV1):
4132
    """Class to retrieve Doodle For Food comics."""
4133
    # Also on https://tapastic.com/series/Doodle-for-Food
4134
    name = 'doodle'
4135
    long_name = 'Doodle For Food'
4136
    url = 'http://www.doodleforfood.com'
4137
4138
4139
class CassandraCalinTumblr(GenericTumblrV1):
4140
    """Class to retrieve C. Cassandra comics."""
4141
    # Also on http://cassandracalin.com
4142
    # Also on https://tapastic.com/series/C-Cassandra-comics
4143
    name = 'cassandra-tumblr'
4144
    long_name = 'Cassandra Calin (from Tumblr)'
4145
    url = 'http://c-cassandra.tumblr.com'
4146
4147
4148
class DougWasTaken(GenericTumblrV1):
4149
    """Class to retrieve Doug Was Taken comics."""
4150
    name = 'doug'
4151
    long_name = 'Doug Was Taken'
4152
    url = 'https://dougwastaken.tumblr.com'
4153
4154
4155
class MandatoryRollerCoaster(GenericTumblrV1):
4156
    """Class to retrieve Mandatory Roller Coaster comics."""
4157
    name = 'rollercoaster'
4158
    long_name = 'Mandatory Roller Coaster'
4159
    url = 'http://mandatoryrollercoaster.com'
4160
4161
4162
class CEstPasEnRegardantSesPompes(GenericTumblrV1):
4163
    """Class to retrieve C'Est Pas En Regardant Ses Pompes (...)  comics."""
4164
    name = 'cperspqccltt'
4165
    long_name = 'C Est Pas En Regardant Ses Pompes (...)'
4166
    url = 'http://cperspqccltt.tumblr.com'
4167
4168
4169
class TheGrohlTroll(GenericTumblrV1):
4170
    """Class to retrieve The Grohl Troll comics."""
4171
    name = 'grohltroll'
4172
    long_name = 'The Grohl Troll'
4173
    url = 'http://thegrohltroll.com'
4174
4175
4176
class WebcomicName(GenericTumblrV1):
4177
    """Class to retrieve Webcomic Name comics."""
4178
    name = 'webcomicname'
4179
    long_name = 'Webcomic Name'
4180
    url = 'http://webcomicname.com'
4181
4182
4183
class BooksOfAdam(GenericTumblrV1):
4184
    """Class to retrieve Books of Adam comics."""
4185
    # Also on http://www.booksofadam.com
4186
    name = 'booksofadam'
4187
    long_name = 'Books of Adam'
4188
    url = 'http://booksofadam.tumblr.com'
4189
4190
4191
class HarkAVagrant(GenericTumblrV1):
4192
    """Class to retrieve Hark A Vagrant comics."""
4193
    # Also on http://www.harkavagrant.com
4194
    name = 'hark-tumblr'
4195
    long_name = 'Hark A Vagrant (from Tumblr)'
4196
    url = 'http://beatonna.tumblr.com'
4197
4198
4199
class OurSuperAdventureTumblr(GenericTumblrV1):
4200
    """Class to retrieve Our Super Adventure comics."""
4201
    # Also on https://tapastic.com/series/Our-Super-Adventure
4202
    # Also on http://www.oursuperadventure.com
4203
    # http://sarahgraley.com
4204
    name = 'superadventure-tumblr'
4205
    long_name = 'Our Super Adventure (from Tumblr)'
4206
    url = 'http://sarahssketchbook.tumblr.com'
4207
4208
4209
class JakeLikesOnions(GenericTumblrV1):
4210
    """Class to retrieve Jake Likes Onions comics."""
4211
    name = 'jake'
4212
    long_name = 'Jake Likes Onions'
4213
    url = 'http://jakelikesonions.com'
4214
4215
4216
class InYourFaceCake(GenericTumblrV1):
4217
    """Class to retrieve In Your Face Cake comics."""
4218
    name = 'inyourfacecake-tumblr'
4219
    long_name = 'In Your Face Cake (from Tumblr)'
4220
    url = 'https://in-your-face-cake.tumblr.com'
4221
4222
4223
class Robospunk(GenericTumblrV1):
4224
    """Class to retrieve Robospunk comics."""
4225
    name = 'robospunk'
4226
    long_name = 'Robospunk'
4227
    url = 'http://robospunk.com'
4228
4229
4230
class BananaTwinky(GenericTumblrV1):
4231
    """Class to retrieve Banana Twinky comics."""
4232
    name = 'banana'
4233
    long_name = 'Banana Twinky'
4234
    url = 'https://bananatwinky.tumblr.com'
4235
4236
4237
class YesterdaysPopcornTumblr(GenericTumblrV1):
4238
    """Class to retrieve Yesterday's Popcorn comics."""
4239
    # Also on http://www.yesterdayspopcorn.com
4240
    # Also on https://tapastic.com/series/Yesterdays-Popcorn
4241
    name = 'popcorn-tumblr'
4242
    long_name = 'Yesterday\'s Popcorn (from Tumblr)'
4243
    url = 'http://yesterdayspopcorn.tumblr.com'
4244
4245
4246
class TwistedDoodles(GenericTumblrV1):
4247
    """Class to retrieve Twisted Doodles comics."""
4248
    name = 'twisted'
4249
    long_name = 'Twisted Doodles'
4250
    url = 'http://www.twisteddoodles.com'
4251
4252
4253
class UbertoolTumblr(GenericTumblrV1):
4254
    """Class to retrieve Ubertool comics."""
4255
    # Also on http://ubertoolcomic.com
4256
    # Also on https://tapastic.com/series/ubertool
4257
    name = 'ubertool-tumblr'
4258
    long_name = 'Ubertool (from Tumblr)'
4259
    url = 'https://ubertool.tumblr.com'
4260
    _categories = ('UBERTOOL', )
4261
4262
4263
class LittleLifeLinesTumblr(GenericTumblrV1):
4264
    """Class to retrieve Little Life Lines comics."""
4265
    # Also on http://www.littlelifelines.com
4266
    name = 'life-tumblr'
4267
    long_name = 'Little Life Lines (from Tumblr)'
4268
    url = 'https://little-life-lines.tumblr.com'
4269
4270
4271
class TheyCanTalk(GenericTumblrV1):
4272
    """Class to retrieve They Can Talk comics."""
4273
    name = 'theycantalk'
4274
    long_name = 'They Can Talk'
4275
    url = 'http://theycantalk.com'
4276
4277
4278
class Will5NeverCome(GenericTumblrV1):
4279
    """Class to retrieve Will 5:00 Never Come comics."""
4280
    name = 'will5'
4281
    long_name = 'Will 5:00 Never Come ?'
4282
    url = 'http://will5nevercome.com'
4283
4284
4285
class Sephko(GenericTumblrV1):
4286
    """Class to retrieve Sephko Comics."""
4287
    # Also on http://www.sephko.com
4288
    name = 'sephko'
4289
    long_name = 'Sephko'
4290
    url = 'https://sephko.tumblr.com'
4291
4292
4293
class BlazersAtDawn(GenericTumblrV1):
4294
    """Class to retrieve Blazers At Dawn Comics."""
4295
    name = 'blazers'
4296
    long_name = 'Blazers At Dawn'
4297
    url = 'http://blazersatdawn.tumblr.com'
4298
4299
4300
class ArtByMoga(GenericEmptyComic, GenericTumblrV1):  # Deactivated because it downloads too many things
4301
    """Class to retrieve Art By Moga Comics."""
4302
    name = 'moga'
4303
    long_name = 'Art By Moga'
4304
    url = 'http://artbymoga.tumblr.com'
4305
4306
4307
class VerbalVomitTumblr(GenericTumblrV1):
4308
    """Class to retrieve Verbal Vomit comics."""
4309
    # Also on http://www.verbal-vomit.com
4310
    name = 'vomit-tumblr'
4311
    long_name = 'Verbal Vomit (from Tumblr)'
4312 View Code Duplication
    url = 'http://verbalvomits.tumblr.com'
4313
4314
4315
class LibraryComic(GenericTumblrV1):
4316
    """Class to retrieve LibraryComic."""
4317
    # Also on http://librarycomic.com
4318
    name = 'library-tumblr'
4319
    long_name = 'LibraryComic (from Tumblr)'
4320
    url = 'https://librarycomic.tumblr.com'
4321
4322
4323
class TizzyStitchBirdTumblr(GenericTumblrV1):
4324
    """Class to retrieve Tizzy Stitch Bird comics."""
4325
    # Also on http://tizzystitchbird.com
4326
    # Also on https://tapastic.com/series/TizzyStitchbird
4327
    # Also on http://www.webtoons.com/en/challenge/tizzy-stitchbird/list?title_no=50082
4328
    name = 'tizzy-tumblr'
4329
    long_name = 'Tizzy Stitch Bird (from Tumblr)'
4330
    url = 'http://tizzystitchbird.tumblr.com'
4331
4332
4333
class VictimsOfCircumsolarTumblr(GenericTumblrV1):
4334
    """Class to retrieve VictimsOfCircumsolar comics."""
4335
    # Also on http://www.victimsofcircumsolar.com
4336
    name = 'circumsolar-tumblr'
4337
    long_name = 'Victims Of Circumsolar (from Tumblr)'
4338
    url = 'https://victimsofcomics.tumblr.com'
4339
4340
4341
class RockPaperCynicTumblr(GenericTumblrV1):
4342
    """Class to retrieve RockPaperCynic comics."""
4343
    # Also on http://www.rockpapercynic.com
4344
    # Also on https://tapastic.com/series/rockpapercynic
4345
    name = 'rpc-tumblr'
4346
    long_name = 'Rock Paper Cynic (from Tumblr)'
4347
    url = 'http://rockpapercynic.tumblr.com'
4348
4349
4350
class DeadlyPanelTumblr(GenericTumblrV1):
4351
    """Class to retrieve Deadly Panel comics."""
4352
    # Also on http://www.deadlypanel.com
4353
    # Also on https://tapastic.com/series/deadlypanel
4354
    name = 'deadly-tumblr'
4355
    long_name = 'Deadly Panel (from Tumblr)'
4356
    url = 'https://deadlypanel.tumblr.com'
4357
4358
4359
class CatanaComics(GenericTumblrV1):
4360
    """Class to retrieve Catana comics."""
4361
    name = 'catana'
4362
    long_name = 'Catana'
4363
    url = 'http://www.catanacomics.com'
4364
4365
4366
class AngryAtNothingTumblr(GenericTumblrV1):
4367
    """Class to retrieve Angry at Nothing comics."""
4368
    # Also on http://www.angryatnothing.net
4369
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
4370
    name = 'angry-tumblr'
4371
    long_name = 'Angry At Nothing (from Tumblr)'
4372
    url = 'http://angryatnothing.tumblr.com'
4373
4374
4375
class ShanghaiTango(GenericTumblrV1):
4376
    """Class to retrieve Shanghai Tango comic."""
4377
    name = 'tango'
4378
    long_name = 'Shanghai Tango'
4379
    url = 'http://tango2010weibo.tumblr.com'
4380
4381
4382
class OffTheLeashDogTumblr(GenericTumblrV1):
4383
    """Class to retrieve Off The Leash Dog comics."""
4384
    # Also on http://offtheleashdogcartoons.com
4385
    # Also on http://www.rupertfawcettcartoons.com
4386
    name = 'offtheleash-tumblr'
4387
    long_name = 'Off The Leash Dog (from Tumblr)'
4388
    url = 'http://rupertfawcettsdoggyblog.tumblr.com'
4389
    _categories = ('FAWCETT', )
4390
4391
4392
class ImogenQuestTumblr(GenericTumblrV1):
4393
    """Class to retrieve Imogen Quest comics."""
4394
    # Also on http://imogenquest.net
4395
    name = 'imogen-tumblr'
4396
    long_name = 'Imogen Quest (from Tumblr)'
4397
    url = 'http://imoquest.tumblr.com'
4398
4399
4400
class Shitfest(GenericTumblrV1):
4401
    """Class to retrieve Shitfest comics."""
4402
    name = 'shitfest'
4403
    long_name = 'Shitfest'
4404
    url = 'http://shitfestcomic.com'
4405
4406
4407
class IceCreamSandwichComics(GenericTumblrV1):
4408
    """Class to retrieve Ice Cream Sandwich Comics."""
4409
    name = 'icecream'
4410
    long_name = 'Ice Cream Sandwich Comics'
4411
    url = 'http://icecreamsandwichcomics.com'
4412
4413
4414
class Dustinteractive(GenericTumblrV1):
4415
    """Class to retrieve Dustinteractive comics."""
4416
    name = 'dustinteractive'
4417
    long_name = 'Dustinteractive'
4418
    url = 'http://dustinteractive.com'
4419
4420
4421
class StickyCinemaFloor(GenericTumblrV1):
4422
    """Class to retrieve Sticky Cinema Floor comics."""
4423
    name = 'stickycinema'
4424
    long_name = 'Sticky Cinema Floor'
4425
    url = 'https://stickycinemafloor.tumblr.com'
4426
4427
4428
class HorovitzComics(GenericListableComic):
4429
    """Generic class to handle the logic common to the different comics from Horovitz."""
4430
    url = 'http://www.horovitzcomics.com'
4431
    _categories = ('HOROVITZ', )
4432
    img_re = re.compile('.*comics/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
4433
    link_re = NotImplemented
4434
    get_url_from_archive_element = join_cls_url_to_href
4435
4436
    @classmethod
4437
    def get_comic_info(cls, soup, link):
4438
        """Get information about a particular comics."""
4439
        href = link['href']
4440
        num = int(cls.link_re.match(href).groups()[0])
4441
        title = link.string
4442
        imgs = soup.find_all('img', id='comic')
4443
        assert len(imgs) == 1
4444
        year, month, day = [int(s)
4445
                            for s in cls.img_re.match(imgs[0]['src']).groups()]
4446
        return {
4447
            'title': title,
4448
            'day': day,
4449
            'month': month,
4450
            'year': year,
4451
            'img': [i['src'] for i in imgs],
4452
            'num': num,
4453
        }
4454
4455
    @classmethod
4456
    def get_archive_elements(cls):
4457
        archive_url = 'http://www.horovitzcomics.com/comics/archive/'
4458
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.link_re))
4459
4460
4461
class HorovitzNew(HorovitzComics):
4462
    """Class to retrieve Horovitz new comics."""
4463
    name = 'horovitznew'
4464
    long_name = 'Horovitz New'
4465
    link_re = re.compile('^/comics/new/([0-9]+)$')
4466
4467
4468
class HorovitzClassic(HorovitzComics):
4469
    """Class to retrieve Horovitz classic comics."""
4470
    name = 'horovitzclassic'
4471
    long_name = 'Horovitz Classic'
4472
    link_re = re.compile('^/comics/classic/([0-9]+)$')
4473
4474
4475
class GenericGoComic(GenericNavigableComic):
4476
    """Generic class to handle the logic common to comics from gocomics.com."""
4477
    _categories = ('GOCOMIC', )
4478
4479
    @classmethod
4480
    def get_first_comic_link(cls):
4481
        """Get link to first comics."""
4482
        return get_soup_at_url(cls.url).find('a', class_='fa btn btn-outline-default btn-circle fa-backward sm ')
4483
4484
    @classmethod
4485
    def get_navi_link(cls, last_soup, next_):
4486
        """Get link to next or previous comic."""
4487
        PREV = 'fa btn btn-outline-default btn-circle fa-caret-left sm '
4488
        NEXT = 'fa btn btn-outline-default btn-circle fa-caret-right hidden-sm-up sm '
4489
        return last_soup.find('a', class_=NEXT if next_ else PREV)
4490
4491
    @classmethod
4492
    def get_url_from_link(cls, link):
4493
        gocomics = 'http://www.gocomics.com'
4494
        return urljoin_wrapper(gocomics, link['href'])
4495
4496
    @classmethod
4497
    def get_comic_info(cls, soup, link):
4498
        """Get information about a particular comics."""
4499
        date_str = soup.find('meta', property='article:published_time')['content']
4500
        day = string_to_date(date_str, "%Y-%m-%d")
4501
        imgs = soup.find('picture', class_='img-fluid item-comic-image').find_all('img')
4502
        author = soup.find('meta', property='article:author')['content']
4503
        tags = soup.find('meta', property='article:tag')['content']
4504
        return {
4505
            'day': day.day,
4506
            'month': day.month,
4507
            'year': day.year,
4508
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
4509
            'author': author,
4510
            'tags': tags,
4511
        }
4512
4513
4514
class PearlsBeforeSwine(GenericGoComic):
4515
    """Class to retrieve Pearls Before Swine comics."""
4516
    name = 'pearls'
4517
    long_name = 'Pearls Before Swine'
4518
    url = 'http://www.gocomics.com/pearlsbeforeswine'
4519
4520
4521
class Peanuts(GenericGoComic):
4522
    """Class to retrieve Peanuts comics."""
4523
    name = 'peanuts'
4524
    long_name = 'Peanuts'
4525
    url = 'http://www.gocomics.com/peanuts'
4526
4527
4528
class MattWuerker(GenericGoComic):
4529
    """Class to retrieve Matt Wuerker comics."""
4530
    name = 'wuerker'
4531
    long_name = 'Matt Wuerker'
4532
    url = 'http://www.gocomics.com/mattwuerker'
4533
4534
4535
class TomToles(GenericGoComic):
4536
    """Class to retrieve Tom Toles comics."""
4537
    name = 'toles'
4538
    long_name = 'Tom Toles'
4539
    url = 'http://www.gocomics.com/tomtoles'
4540
4541
4542
class BreakOfDay(GenericGoComic):
4543
    """Class to retrieve Break Of Day comics."""
4544
    name = 'breakofday'
4545
    long_name = 'Break Of Day'
4546
    url = 'http://www.gocomics.com/break-of-day'
4547
4548
4549
class Brevity(GenericGoComic):
4550
    """Class to retrieve Brevity comics."""
4551
    name = 'brevity'
4552
    long_name = 'Brevity'
4553
    url = 'http://www.gocomics.com/brevitypanel'
4554
4555
4556
class MichaelRamirez(GenericGoComic):
4557
    """Class to retrieve Michael Ramirez comics."""
4558
    name = 'ramirez'
4559
    long_name = 'Michael Ramirez'
4560
    url = 'http://www.gocomics.com/michaelramirez'
4561
4562
4563
class MikeLuckovich(GenericGoComic):
4564
    """Class to retrieve Mike Luckovich comics."""
4565
    name = 'luckovich'
4566
    long_name = 'Mike Luckovich'
4567
    url = 'http://www.gocomics.com/mikeluckovich'
4568
4569
4570
class JimBenton(GenericGoComic):
4571
    """Class to retrieve Jim Benton comics."""
4572
    # Also on http://jimbenton.tumblr.com
4573
    name = 'benton'
4574
    long_name = 'Jim Benton'
4575
    url = 'http://www.gocomics.com/jim-benton-cartoons'
4576
4577
4578
class TheArgyleSweater(GenericGoComic):
4579
    """Class to retrieve the Argyle Sweater comics."""
4580
    name = 'argyle'
4581
    long_name = 'Argyle Sweater'
4582
    url = 'http://www.gocomics.com/theargylesweater'
4583
4584
4585
class SunnyStreet(GenericGoComic):
4586
    """Class to retrieve Sunny Street comics."""
4587
    # Also on http://www.sunnystreetcomics.com
4588
    name = 'sunny'
4589
    long_name = 'Sunny Street'
4590
    url = 'http://www.gocomics.com/sunny-street'
4591
4592
4593
class OffTheMark(GenericGoComic):
4594
    """Class to retrieve Off The Mark comics."""
4595
    # Also on https://www.offthemark.com
4596
    name = 'offthemark'
4597
    long_name = 'Off The Mark'
4598
    url = 'http://www.gocomics.com/offthemark'
4599
4600
4601
class WuMo(GenericGoComic):
4602
    """Class to retrieve WuMo comics."""
4603
    # Also on http://wumo.com
4604
    name = 'wumo'
4605
    long_name = 'WuMo'
4606
    url = 'http://www.gocomics.com/wumo'
4607
4608
4609
class LunarBaboon(GenericGoComic):
4610
    """Class to retrieve Lunar Baboon comics."""
4611
    # Also on http://www.lunarbaboon.com
4612
    # Also on https://tapastic.com/series/Lunarbaboon
4613
    name = 'lunarbaboon'
4614
    long_name = 'Lunar Baboon'
4615
    url = 'http://www.gocomics.com/lunarbaboon'
4616
4617
4618
class SandersenGocomic(GenericGoComic):
4619
    """Class to retrieve Sarah Andersen comics."""
4620
    # Also on http://sarahcandersen.com
4621
    # Also on http://tapastic.com/series/Doodle-Time
4622
    name = 'sandersen-goc'
4623
    long_name = 'Sarah Andersen (from GoComics)'
4624
    url = 'http://www.gocomics.com/sarahs-scribbles'
4625
4626
4627
class SaturdayMorningBreakfastCerealGoComic(GenericGoComic):
4628
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
4629
    # Also on http://smbc-comics.tumblr.com
4630
    # Also on http://www.smbc-comics.com
4631
    name = 'smbc-goc'
4632
    long_name = 'Saturday Morning Breakfast Cereal (from GoComics)'
4633
    url = 'http://www.gocomics.com/saturday-morning-breakfast-cereal'
4634
    _categories = ('SMBC', )
4635
4636
4637
class CalvinAndHobbesGoComic(GenericGoComic):
4638
    """Class to retrieve Calvin and Hobbes comics."""
4639
    # From gocomics, not http://marcel-oehler.marcellosendos.ch/comics/ch/
4640
    name = 'calvin-goc'
4641
    long_name = 'Calvin and Hobbes (from GoComics)'
4642
    url = 'http://www.gocomics.com/calvinandhobbes'
4643
4644
4645
class RallGoComic(GenericGoComic):
4646
    """Class to retrieve Ted Rall comics."""
4647
    # Also on http://rall.com/comic
4648
    name = 'rall-goc'
4649
    long_name = "Ted Rall (from GoComics)"
4650
    url = "http://www.gocomics.com/ted-rall"
4651
    _categories = ('RALL', )
4652
4653
4654
class TheAwkwardYetiGoComic(GenericGoComic):
4655
    """Class to retrieve The Awkward Yeti comics."""
4656
    # Also on http://larstheyeti.tumblr.com
4657
    # Also on http://theawkwardyeti.com
4658
    # Also on https://tapastic.com/series/TheAwkwardYeti
4659
    name = 'yeti-goc'
4660
    long_name = 'The Awkward Yeti (from GoComics)'
4661
    url = 'http://www.gocomics.com/the-awkward-yeti'
4662
    _categories = ('YETI', )
4663
4664
4665
class BerkeleyMewsGoComics(GenericGoComic):
4666
    """Class to retrieve Berkeley Mews comics."""
4667
    # Also on http://mews.tumblr.com
4668
    # Also on http://www.berkeleymews.com
4669
    name = 'berkeley-goc'
4670
    long_name = 'Berkeley Mews (from GoComics)'
4671
    url = 'http://www.gocomics.com/berkeley-mews'
4672
    _categories = ('BERKELEY', )
4673
4674
4675
class SheldonGoComics(GenericGoComic):
4676
    """Class to retrieve Sheldon comics."""
4677
    # Also on http://www.sheldoncomics.com
4678
    name = 'sheldon-goc'
4679
    long_name = 'Sheldon Comics (from GoComics)'
4680
    url = 'http://www.gocomics.com/sheldon'
4681
4682
4683
class FowlLanguageGoComics(GenericGoComic):
4684
    """Class to retrieve Fowl Language comics."""
4685
    # Also on http://www.fowllanguagecomics.com
4686
    # Also on http://tapastic.com/series/Fowl-Language-Comics
4687
    # Also on http://fowllanguagecomics.tumblr.com
4688
    name = 'fowllanguage-goc'
4689
    long_name = 'Fowl Language Comics (from GoComics)'
4690
    url = 'http://www.gocomics.com/fowl-language'
4691
    _categories = ('FOWLLANGUAGE', )
4692
4693
4694
class NickAnderson(GenericGoComic):
4695
    """Class to retrieve Nick Anderson comics."""
4696
    name = 'nickanderson'
4697
    long_name = 'Nick Anderson'
4698
    url = 'http://www.gocomics.com/nickanderson'
4699
4700
4701
class GarfieldGoComics(GenericGoComic):
4702
    """Class to retrieve Garfield comics."""
4703
    # Also on http://garfield.com
4704
    name = 'garfield-goc'
4705
    long_name = 'Garfield (from GoComics)'
4706
    url = 'http://www.gocomics.com/garfield'
4707
    _categories = ('GARFIELD', )
4708
4709
4710
class DorrisMcGoComics(GenericGoComic):
4711
    """Class to retrieve Dorris Mc Comics"""
4712
    # Also on http://dorrismccomics.com
4713
    name = 'dorrismc-goc'
4714
    long_name = 'Dorris Mc (from GoComics)'
4715
    url = 'http://www.gocomics.com/dorris-mccomics'
4716
4717
4718
class FoxTrot(GenericGoComic):
4719
    """Class to retrieve FoxTrot comics."""
4720
    name = 'foxtrot'
4721
    long_name = 'FoxTrot'
4722
    url = 'http://www.gocomics.com/foxtrot'
4723
4724
4725
class FoxTrotClassics(GenericGoComic):
4726
    """Class to retrieve FoxTrot Classics comics."""
4727
    name = 'foxtrot-classics'
4728
    long_name = 'FoxTrot Classics'
4729
    url = 'http://www.gocomics.com/foxtrotclassics'
4730
4731
4732
class MisterAndMeGoComics(GenericEmptyComic, GenericGoComic):  # Removed ?
4733
    """Class to retrieve Mister & Me Comics."""
4734
    # Also on http://www.mister-and-me.com
4735
    # Also on https://tapastic.com/series/Mister-and-Me
4736
    name = 'mister-goc'
4737
    long_name = 'Mister & Me (from GoComics)'
4738
    url = 'http://www.gocomics.com/mister-and-me'
4739
4740
4741
class NonSequitur(GenericGoComic):
4742
    """Class to retrieve Non Sequitur (Wiley Miller) comics."""
4743
    name = 'nonsequitur'
4744
    long_name = 'Non Sequitur'
4745
    url = 'http://www.gocomics.com/nonsequitur'
4746
4747
4748
class GenericTapasticComic(GenericListableComic):
4749
    """Generic class to handle the logic common to comics from tapastic.com."""
4750
    _categories = ('TAPASTIC', )
4751
4752
    @classmethod
4753
    def get_comic_info(cls, soup, archive_elt):
4754
        """Get information about a particular comics."""
4755
        timestamp = int(archive_elt['publishDate']) / 1000.0
4756
        day = datetime.datetime.fromtimestamp(timestamp).date()
4757
        imgs = soup.find_all('img', class_='art-image')
4758
        if not imgs:
4759
            print("Comic %s is being uploaded, retry later" % cls.get_url_from_archive_element(archive_elt))
4760
            return None
4761
        assert len(imgs) > 0
4762
        return {
4763
            'day': day.day,
4764
            'year': day.year,
4765
            'month': day.month,
4766
            'img': [i['src'] for i in imgs],
4767
            'title': archive_elt['title'],
4768
        }
4769
4770
    @classmethod
4771
    def get_url_from_archive_element(cls, archive_elt):
4772
        return 'http://tapastic.com/episode/' + str(archive_elt['id'])
4773
4774
    @classmethod
4775
    def get_archive_elements(cls):
4776
        pref, suff = 'episodeList : ', ','
4777
        # Information is stored in the javascript part
4778
        # I don't know the clean way to get it so this is the ugly way.
4779
        string = [s[len(pref):-len(suff)] for s in (s.decode('utf-8').strip() for s in urlopen_wrapper(cls.url).readlines()) if s.startswith(pref) and s.endswith(suff)][0]
4780
        return json.loads(string)
4781
4782
4783
class VegetablesForDessert(GenericTapasticComic):
4784
    """Class to retrieve Vegetables For Dessert comics."""
4785
    # Also on http://vegetablesfordessert.tumblr.com
4786
    name = 'vegetables'
4787
    long_name = 'Vegetables For Dessert'
4788
    url = 'http://tapastic.com/series/vegetablesfordessert'
4789
4790
4791
class FowlLanguageTapa(GenericTapasticComic):
4792
    """Class to retrieve Fowl Language comics."""
4793
    # Also on http://www.fowllanguagecomics.com
4794
    # Also on http://fowllanguagecomics.tumblr.com
4795
    # Also on http://www.gocomics.com/fowl-language
4796
    name = 'fowllanguage-tapa'
4797
    long_name = 'Fowl Language Comics (from Tapastic)'
4798
    url = 'http://tapastic.com/series/Fowl-Language-Comics'
4799
    _categories = ('FOWLLANGUAGE', )
4800
4801
4802
class OscillatingProfundities(GenericTapasticComic):
4803
    """Class to retrieve Oscillating Profundities comics."""
4804
    name = 'oscillating'
4805
    long_name = 'Oscillating Profundities'
4806
    url = 'http://tapastic.com/series/oscillatingprofundities'
4807
4808
4809
class ZnoflatsComics(GenericTapasticComic):
4810
    """Class to retrieve Znoflats comics."""
4811
    name = 'znoflats'
4812
    long_name = 'Znoflats Comics'
4813
    url = 'http://tapastic.com/series/Znoflats-Comics'
4814
4815
4816
class SandersenTapastic(GenericTapasticComic):
4817
    """Class to retrieve Sarah Andersen comics."""
4818
    # Also on http://sarahcandersen.com
4819
    # Also on http://www.gocomics.com/sarahs-scribbles
4820
    name = 'sandersen-tapa'
4821
    long_name = 'Sarah Andersen (from Tapastic)'
4822
    url = 'http://tapastic.com/series/Doodle-Time'
4823
4824
4825
class TubeyToonsTapastic(GenericTapasticComic):
4826
    """Class to retrieve TubeyToons comics."""
4827
    # Also on http://tubeytoons.com
4828
    # Also on https://tubeytoons.tumblr.com
4829
    name = 'tubeytoons-tapa'
4830
    long_name = 'Tubey Toons (from Tapastic)'
4831
    url = 'http://tapastic.com/series/Tubey-Toons'
4832
    _categories = ('TUNEYTOONS', )
4833
4834
4835
class AnythingComicTapastic(GenericTapasticComic):
4836
    """Class to retrieve Anything Comics."""
4837
    # Also on http://www.anythingcomic.com
4838
    name = 'anythingcomic-tapa'
4839
    long_name = 'Anything Comic (from Tapastic)'
4840
    url = 'http://tapastic.com/series/anything'
4841
4842
4843
class UnearthedComicsTapastic(GenericTapasticComic):
4844
    """Class to retrieve Unearthed comics."""
4845
    # Also on http://unearthedcomics.com
4846
    # Also on https://unearthedcomics.tumblr.com
4847
    name = 'unearthed-tapa'
4848
    long_name = 'Unearthed Comics (from Tapastic)'
4849
    url = 'http://tapastic.com/series/UnearthedComics'
4850
    _categories = ('UNEARTHED', )
4851
4852
4853
class EverythingsStupidTapastic(GenericTapasticComic):
4854
    """Class to retrieve Everything's stupid Comics."""
4855
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
4856
    # Also on http://everythingsstupid.net
4857
    name = 'stupid-tapa'
4858
    long_name = "Everything's Stupid (from Tapastic)"
4859
    url = 'http://tapastic.com/series/EverythingsStupid'
4860
4861
4862
class JustSayEhTapastic(GenericTapasticComic):
4863
    """Class to retrieve Just Say Eh comics."""
4864
    # Also on http://www.justsayeh.com
4865
    name = 'justsayeh-tapa'
4866
    long_name = 'Just Say Eh (from Tapastic)'
4867
    url = 'http://tapastic.com/series/Just-Say-Eh'
4868
4869
4870
class ThorsThundershackTapastic(GenericTapasticComic):
4871
    """Class to retrieve Thor's Thundershack comics."""
4872
    # Also on http://www.thorsthundershack.com
4873
    name = 'thor-tapa'
4874
    long_name = 'Thor\'s Thundershack (from Tapastic)'
4875
    url = 'http://tapastic.com/series/Thors-Thundershac'
4876
    _categories = ('THOR', )
4877
4878
4879
class OwlTurdTapastic(GenericTapasticComic):
4880
    """Class to retrieve Owl Turd comics."""
4881
    # Also on http://owlturd.com
4882
    name = 'owlturd-tapa'
4883
    long_name = 'Owl Turd (from Tapastic)'
4884
    url = 'http://tapastic.com/series/Owl-Turd-Comix'
4885
    _categories = ('OWLTURD', )
4886
4887
4888
class GoneIntoRaptureTapastic(GenericTapasticComic):
4889
    """Class to retrieve Gone Into Rapture comics."""
4890
    # Also on http://goneintorapture.tumblr.com
4891
    # Also on http://goneintorapture.com
4892
    name = 'rapture-tapa'
4893
    long_name = 'Gone Into Rapture (from Tapastic)'
4894
    url = 'http://tapastic.com/series/Goneintorapture'
4895
4896
4897
class HeckIfIKnowComicsTapa(GenericTapasticComic):
4898
    """Class to retrieve Heck If I Know Comics."""
4899
    # Also on http://heckifiknowcomics.com
4900
    name = 'heck-tapa'
4901
    long_name = 'Heck if I Know comics (from Tapastic)'
4902
    url = 'http://tapastic.com/series/Regular'
4903
4904
4905
class CheerUpEmoKidTapa(GenericTapasticComic):
4906
    """Class to retrieve CheerUpEmoKid comics."""
4907
    # Also on http://www.cheerupemokid.com
4908
    # Also on https://enzocomics.tumblr.com
4909
    name = 'cuek-tapa'
4910
    long_name = 'Cheer Up Emo Kid (from Tapastic)'
4911
    url = 'http://tapastic.com/series/CUEK'
4912
4913
4914
class BigFootJusticeTapa(GenericTapasticComic):
4915
    """Class to retrieve Big Foot Justice comics."""
4916
    # Also on http://bigfootjustice.com
4917
    name = 'bigfoot-tapa'
4918
    long_name = 'Big Foot Justice (from Tapastic)'
4919
    url = 'http://tapastic.com/series/bigfoot-justice'
4920
4921
4922
class UpAndOutTapa(GenericTapasticComic):
4923
    """Class to retrieve Up & Out comics."""
4924
    # Also on http://upandoutcomic.tumblr.com
4925
    name = 'upandout-tapa'
4926
    long_name = 'Up And Out (from Tapastic)'
4927
    url = 'http://tapastic.com/series/UP-and-OUT'
4928
4929
4930
class ToonHoleTapa(GenericTapasticComic):
4931
    """Class to retrieve Toon Holes comics."""
4932
    # Also on http://www.toonhole.com
4933
    name = 'toonhole-tapa'
4934
    long_name = 'Toon Hole (from Tapastic)'
4935
    url = 'http://tapastic.com/series/TOONHOLE'
4936
4937
4938
class AngryAtNothingTapa(GenericTapasticComic):
4939
    """Class to retrieve Angry at Nothing comics."""
4940
    # Also on http://www.angryatnothing.net
4941
    # Also on http://angryatnothing.tumblr.com
4942
    name = 'angry-tapa'
4943
    long_name = 'Angry At Nothing (from Tapastic)'
4944
    url = 'http://tapastic.com/series/Comics-yeah-definitely-comics-'
4945
4946
4947
class LeleozTapa(GenericTapasticComic):
4948
    """Class to retrieve Leleoz comics."""
4949
    # Also on http://leleozcomics.tumblr.com
4950
    name = 'leleoz-tapa'
4951
    long_name = 'Leleoz (from Tapastic)'
4952
    url = 'https://tapastic.com/series/Leleoz'
4953
4954
4955
class TheAwkwardYetiTapa(GenericTapasticComic):
4956
    """Class to retrieve The Awkward Yeti comics."""
4957
    # Also on http://www.gocomics.com/the-awkward-yeti
4958
    # Also on http://theawkwardyeti.com
4959
    # Also on http://larstheyeti.tumblr.com
4960
    name = 'yeti-tapa'
4961
    long_name = 'The Awkward Yeti (from Tapastic)'
4962
    url = 'https://tapastic.com/series/TheAwkwardYeti'
4963
    _categories = ('YETI', )
4964
4965
4966
class AsPerUsualTapa(GenericTapasticComic):
4967
    """Class to retrieve As Per Usual comics."""
4968
    # Also on http://as-per-usual.tumblr.com
4969
    name = 'usual-tapa'
4970
    long_name = 'As Per Usual (from Tapastic)'
4971
    url = 'https://tapastic.com/series/AsPerUsual'
4972
    categories = ('DAMILEE', )
4973
4974
4975
class HotComicsForCoolPeopleTapa(GenericTapasticComic):
4976
    """Class to retrieve Hot Comics For Cool People."""
4977
    # Also on http://hotcomicsforcoolpeople.tumblr.com
4978
    # Also on http://hotcomics.biz (links to tumblr)
4979
    # Also on http://hcfcp.com (links to tumblr)
4980
    name = 'hotcomics-tapa'
4981
    long_name = 'Hot Comics For Cool People (from Tapastic)'
4982
    url = 'https://tapastic.com/series/Hot-Comics-For-Cool-People'
4983
    categories = ('DAMILEE', )
4984
4985
4986
class OneOneOneOneComicTapa(GenericTapasticComic):
4987
    """Class to retrieve 1111 Comics."""
4988
    # Also on http://www.1111comics.me
4989
    # Also on http://comics1111.tumblr.com
4990
    name = '1111-tapa'
4991
    long_name = '1111 Comics (from Tapastic)'
4992
    url = 'https://tapastic.com/series/1111-Comics'
4993
    _categories = ('ONEONEONEONE', )
4994
4995
4996
class TumbleDryTapa(GenericTapasticComic):
4997
    """Class to retrieve Tumble Dry comics."""
4998
    # Also on http://tumbledrycomics.com
4999
    name = 'tumbledry-tapa'
5000
    long_name = 'Tumblr Dry (from Tapastic)'
5001
    url = 'https://tapastic.com/series/TumbleDryComics'
5002
5003
5004
class DeadlyPanelTapa(GenericTapasticComic):
5005
    """Class to retrieve Deadly Panel comics."""
5006
    # Also on http://www.deadlypanel.com
5007
    # Also on https://deadlypanel.tumblr.com
5008
    name = 'deadly-tapa'
5009
    long_name = 'Deadly Panel (from Tapastic)'
5010
    url = 'https://tapastic.com/series/deadlypanel'
5011
5012
5013
class ChrisHallbeckMaxiTapa(GenericTapasticComic):
5014
    """Class to retrieve Chris Hallbeck comics."""
5015
    # Also on https://chrishallbeck.tumblr.com
5016
    # Also on http://maximumble.com
5017
    name = 'hallbeckmaxi-tapa'
5018
    long_name = 'Chris Hallback - Maximumble (from Tapastic)'
5019
    url = 'https://tapastic.com/series/Maximumble'
5020
    _categories = ('HALLBACK', )
5021
5022
5023
class ChrisHallbeckMiniTapa(GenericEmptyComic, GenericTapasticComic):
5024
    """Class to retrieve Chris Hallbeck comics."""
5025
    # Also on https://chrishallbeck.tumblr.com
5026
    # Also on http://minimumble.com
5027
    name = 'hallbeckmini-tapa'
5028
    long_name = 'Chris Hallback - Minimumble (from Tapastic)'
5029
    url = 'https://tapastic.com/series/Minimumble'
5030
    _categories = ('HALLBACK', )
5031
5032
5033
class ChrisHallbeckBiffTapa(GenericEmptyComic, GenericTapasticComic):
5034
    """Class to retrieve Chris Hallbeck comics."""
5035
    # Also on https://chrishallbeck.tumblr.com
5036
    # Also on http://thebookofbiff.com
5037
    name = 'hallbeckbiff-tapa'
5038
    long_name = 'Chris Hallback - The Book of Biff (from Tapastic)'
5039
    url = 'https://tapastic.com/series/Biff'
5040
    _categories = ('HALLBACK', )
5041
5042
5043
class RandoWisTapa(GenericTapasticComic):
5044
    """Class to retrieve RandoWis comics."""
5045
    # Also on https://randowis.com
5046
    name = 'randowis-tapa'
5047
    long_name = 'RandoWis (from Tapastic)'
5048
    url = 'https://tapastic.com/series/RandoWis'
5049
5050
5051
class PigeonGazetteTapa(GenericTapasticComic):
5052
    """Class to retrieve The Pigeon Gazette comics."""
5053
    # Also on http://thepigeongazette.tumblr.com
5054
    name = 'pigeon-tapa'
5055
    long_name = 'The Pigeon Gazette (from Tapastic)'
5056
    url = 'https://tapastic.com/series/The-Pigeon-Gazette'
5057
5058
5059
class TheOdd1sOutTapa(GenericTapasticComic):
5060
    """Class to retrieve The Odd 1s Out comics."""
5061
    # Also on http://theodd1sout.com
5062
    # Also on http://theodd1sout.tumblr.com
5063
    name = 'theodd-tapa'
5064
    long_name = 'The Odd 1s Out (from Tapastic)'
5065
    url = 'https://tapastic.com/series/Theodd1sout'
5066
5067
5068
class TheWorldIsFlatTapa(GenericTapasticComic):
5069
    """Class to retrieve The World Is Flat Comics."""
5070
    # Also on http://theworldisflatcomics.tumblr.com
5071
    name = 'flatworld-tapa'
5072
    long_name = 'The World Is Flat (from Tapastic)'
5073
    url = 'https://tapastic.com/series/The-World-is-Flat'
5074
5075
5076
class MisterAndMeTapa(GenericTapasticComic):
5077
    """Class to retrieve Mister & Me Comics."""
5078
    # Also on http://www.mister-and-me.com
5079
    # Also on http://www.gocomics.com/mister-and-me
5080
    name = 'mister-tapa'
5081
    long_name = 'Mister & Me (from Tapastic)'
5082
    url = 'https://tapastic.com/series/Mister-and-Me'
5083
5084
5085
class TalesOfAbsurdityTapa(GenericTapasticComic):
5086
    """Class to retrieve Tales Of Absurdity comics."""
5087
    # Also on http://talesofabsurdity.com
5088
    # Also on http://talesofabsurdity.tumblr.com
5089
    name = 'absurdity-tapa'
5090
    long_name = 'Tales of Absurdity (from Tapastic)'
5091
    url = 'http://tapastic.com/series/Tales-Of-Absurdity'
5092
    _categories = ('ABSURDITY', )
5093
5094
5095
class BFGFSTapa(GenericTapasticComic):
5096
    """Class to retrieve BFGFS comics."""
5097
    # Also on http://bfgfs.com
5098
    # Also on https://bfgfs.tumblr.com
5099
    name = 'bfgfs-tapa'
5100
    long_name = 'BFGFS (from Tapastic)'
5101
    url = 'https://tapastic.com/series/BFGFS'
5102
5103
5104
class DoodleForFoodTapa(GenericTapasticComic):
5105
    """Class to retrieve Doodle For Food comics."""
5106
    # Also on http://www.doodleforfood.com
5107
    name = 'doodle-tapa'
5108
    long_name = 'Doodle For Food (from Tapastic)'
5109
    url = 'https://tapastic.com/series/Doodle-for-Food'
5110
5111
5112
class MrLovensteinTapa(GenericTapasticComic):
5113
    """Class to retrieve Mr Lovenstein comics."""
5114
    # Also on  https://tapastic.com/series/MrLovenstein
5115
    name = 'mrlovenstein-tapa'
5116
    long_name = 'Mr. Lovenstein (from Tapastic)'
5117
    url = 'https://tapastic.com/series/MrLovenstein'
5118
5119
5120
class CassandraCalinTapa(GenericTapasticComic):
5121
    """Class to retrieve C. Cassandra comics."""
5122
    # Also on http://cassandracalin.com
5123
    # Also on http://c-cassandra.tumblr.com
5124
    name = 'cassandra-tapa'
5125
    long_name = 'Cassandra Calin (from Tapastic)'
5126
    url = 'https://tapastic.com/series/C-Cassandra-comics'
5127
5128
5129
class WafflesAndPancakes(GenericTapasticComic):
5130
    """Class to retrieve Waffles And Pancakes comics."""
5131
    # Also on http://wandpcomic.com
5132
    name = 'waffles'
5133
    long_name = 'Waffles And Pancakes'
5134
    url = 'https://tapastic.com/series/Waffles-and-Pancakes'
5135
5136
5137
class YesterdaysPopcornTapastic(GenericTapasticComic):
5138
    """Class to retrieve Yesterday's Popcorn comics."""
5139
    # Also on http://www.yesterdayspopcorn.com
5140
    # Also on http://yesterdayspopcorn.tumblr.com
5141
    name = 'popcorn-tapa'
5142
    long_name = 'Yesterday\'s Popcorn (from Tapastic)'
5143
    url = 'https://tapastic.com/series/Yesterdays-Popcorn'
5144
5145
5146
class OurSuperAdventureTapastic(GenericEmptyComic, GenericTapasticComic):
5147
    """Class to retrieve Our Super Adventure comics."""
5148
    # Also on http://www.oursuperadventure.com
5149
    # http://sarahssketchbook.tumblr.com
5150
    # http://sarahgraley.com
5151
    name = 'superadventure-tapastic'
5152
    long_name = 'Our Super Adventure (from Tapastic)'
5153
    url = 'https://tapastic.com/series/Our-Super-Adventure'
5154
5155
5156
class NamelessPCs(GenericTapasticComic):
5157
    """Class to retrieve Nameless PCs comics."""
5158
    # Also on http://namelesspcs.com
5159
    name = 'namelesspcs-tapa'
5160
    long_name = 'NamelessPCs (from Tapastic)'
5161
    url = 'https://tapastic.com/series/NamelessPC'
5162
5163
5164
class DownTheUpwardSpiralTapa(GenericTapasticComic):
5165
    """Class to retrieve Down The Upward Spiral comics."""
5166
    # Also on http://www.downtheupwardspiral.com
5167
    # Also on http://downtheupwardspiral.tumblr.com
5168
    name = 'spiral-tapa'
5169
    long_name = 'Down the Upward Spiral (from Tapastic)'
5170
    url = 'https://tapastic.com/series/Down-the-Upward-Spiral'
5171
5172
5173
class UbertoolTapa(GenericTapasticComic):
5174
    """Class to retrieve Ubertool comics."""
5175
    # Also on http://ubertoolcomic.com
5176
    # Also on https://ubertool.tumblr.com
5177
    name = 'ubertool-tapa'
5178
    long_name = 'Ubertool (from Tapastic)'
5179
    url = 'https://tapastic.com/series/ubertool'
5180
    _categories = ('UBERTOOL', )
5181
5182
5183
class BarteNerdsTapa(GenericTapasticComic):
5184
    """Class to retrieve BarteNerds comics."""
5185
    # Also on http://www.bartenerds.com
5186
    name = 'bartenerds-tapa'
5187
    long_name = 'BarteNerds (from Tapastic)'
5188
    url = 'https://tapastic.com/series/BarteNERDS'
5189
5190
5191
class SmallBlueYonderTapa(GenericTapasticComic):
5192
    """Class to retrieve Small Blue Yonder comics."""
5193
    # Also on http://www.smallblueyonder.com
5194
    name = 'smallblue-tapa'
5195
    long_name = 'Small Blue Yonder (from Tapastic)'
5196
    url = 'https://tapastic.com/series/Small-Blue-Yonder'
5197
5198
5199
class TizzyStitchBirdTapa(GenericTapasticComic):
5200
    """Class to retrieve Tizzy Stitch Bird comics."""
5201
    # Also on http://tizzystitchbird.com
5202
    # Also on http://tizzystitchbird.tumblr.com
5203
    # Also on http://www.webtoons.com/en/challenge/tizzy-stitchbird/list?title_no=50082
5204
    name = 'tizzy-tapa'
5205
    long_name = 'Tizzy Stitch Bird (from Tapastic)'
5206
    url = 'https://tapastic.com/series/TizzyStitchbird'
5207
5208
5209
class RockPaperCynicTapa(GenericTapasticComic):
5210
    """Class to retrieve RockPaperCynic comics."""
5211
    # Also on http://www.rockpapercynic.com
5212
    # Also on http://rockpapercynic.tumblr.com
5213
    name = 'rpc-tapa'
5214
    long_name = 'Rock Paper Cynic (from Tapastic)'
5215
    url = 'https://tapastic.com/series/rockpapercynic'
5216
5217
5218
class ItsTheTieTapa(GenericTapasticComic):
5219
    """Class to retrieve It's the tie comics."""
5220
    # Also on http://itsthetie.com
5221
    # Also on http://itsthetie.tumblr.com
5222
    name = 'tie-tapa'
5223
    long_name = "It's the tie (from Tapastic)"
5224
    url = "https://tapastic.com/series/itsthetie"
5225
    _categories = ('TIE', )
5226
5227
5228
def get_subclasses(klass):
5229
    """Gets the list of direct/indirect subclasses of a class"""
5230
    subclasses = klass.__subclasses__()
5231
    for derived in list(subclasses):
5232
        subclasses.extend(get_subclasses(derived))
5233
    return subclasses
5234
5235
5236
def remove_st_nd_rd_th_from_date(string):
5237
    """Function to transform 1st/2nd/3rd/4th in a parsable date format."""
5238
    # Hackish way to convert string with numeral "1st"/"2nd"/etc to date
5239
    return (string.replace('st', '')
5240
            .replace('nd', '')
5241
            .replace('rd', '')
5242
            .replace('th', '')
5243
            .replace('Augu', 'August'))
5244
5245
5246
def string_to_date(string, date_format, local=DEFAULT_LOCAL):
5247
    """Function to convert string to date object.
5248
    Wrapper around datetime.datetime.strptime."""
5249
    # format described in https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior
5250
    prev_locale = locale.setlocale(locale.LC_ALL)
5251
    if local != prev_locale:
5252
        locale.setlocale(locale.LC_ALL, local)
5253
    ret = datetime.datetime.strptime(string, date_format).date()
5254
    if local != prev_locale:
5255
        locale.setlocale(locale.LC_ALL, prev_locale)
5256
    return ret
5257
5258
5259
COMICS = set(get_subclasses(GenericComic))
5260
VALID_COMICS = [c for c in COMICS if c.name is not None]
5261
COMIC_NAMES = {c.name: c for c in VALID_COMICS}
5262
assert len(VALID_COMICS) == len(COMIC_NAMES)
5263
CLASS_NAMES = {c.__name__ for c in VALID_COMICS}
5264
assert len(VALID_COMICS) == len(CLASS_NAMES)
5265