Completed
Push — master ( c1fbf1...7dbaee )
by De
27s
created

comics.py (2 issues)

1
#! /usr/bin/python3
2
# vim: set expandtab tabstop=4 shiftwidth=4 :
3
"""Module to retrieve webcomics"""
4
5
from comic_abstract import GenericComic, get_date_for_comic
6
import re
7
from datetime import date, timedelta
8
import datetime
9
from urlfunctions import get_soup_at_url, urljoin_wrapper,\
10
    convert_iri_to_plain_ascii_uri, load_json_at_url, urlopen_wrapper
11
import json
12
import locale
13
import urllib
14
15
DEFAULT_LOCAL = 'en_GB.UTF-8'
16
17
18
class Xkcd(GenericComic):
19
    """Class to retrieve Xkcd comics."""
20
    name = 'xkcd'
21
    long_name = 'xkcd'
22
    url = 'http://xkcd.com'
23
24
    @classmethod
25
    def get_next_comic(cls, last_comic):
26
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
27
        first_num = last_comic['num'] if last_comic else 0
28
        last_num = load_json_at_url(
29
            urljoin_wrapper(cls.url, 'info.0.json'))['num']
30
31
        for num in range(first_num + 1, last_num + 1):
32
            if num != 404:
33
                json_url = urljoin_wrapper(cls.url, '%d/info.0.json' % num)
34
                comic = load_json_at_url(json_url)
35
                comic['img'] = [comic['img']]
36
                comic['prefix'] = '%d-' % num
37
                comic['json_url'] = json_url
38
                comic['url'] = urljoin_wrapper(cls.url, str(num))
39
                comic['day'] = int(comic['day'])
40
                comic['month'] = int(comic['month'])
41
                comic['year'] = int(comic['year'])
42
                assert comic['num'] == num
43
                yield comic
44
45
46
# Helper functions corresponding to get_url_from_link/get_url_from_archive_element
47
48
49
@classmethod
50
def get_href(cls, link):
51
    """Implementation of get_url_from_link/get_url_from_archive_element."""
52
    return link['href']
53
54
55
@classmethod
56
def join_cls_url_to_href(cls, link):
57
    """Implementation of get_url_from_link/get_url_from_archive_element."""
58
    return urljoin_wrapper(cls.url, link['href'])
59
60
61
class GenericNavigableComic(GenericComic):
62
    """Generic class for "navigable" comics : with first/next arrows.
63
64
    This class applies to comic where previous and next comics can be
65
    accessed from a given comic. Once given a starting point (either
66
    the first comic or the last comic retrieved), it will handle the
67
    navigation, the retrieval of the soup object and the setting of
68
    the 'url' attribute on retrieved comics. This limits a lot the
69
    amount of boilerplate code in the different implementation classes.
70
71
    The method `get_next_comic` methods is implemented in terms of new
72
    more specialized methods to be implemented/overridden:
73
        - get_first_comic_link
74
        - get_navi_link
75
        - get_comic_info
76
        - get_url_from_link
77
    """
78
    _categories = ('NAVIGABLE', )
79
80
    @classmethod
81
    def get_first_comic_link(cls):
82
        """Get link to first comics.
83
84
        Sometimes this can be retrieved of any comic page, sometimes on
85
        the archive page, sometimes it doesn't exist at all and one has
86
        to iterate backward to find it before hardcoding the result found.
87
        """
88
        raise NotImplementedError
89
90
    @classmethod
91
    def get_navi_link(cls, last_soup, next_):
92
        """Get link to next (or previous - for dev purposes) comic."""
93
        raise NotImplementedError
94
95
    @classmethod
96
    def get_comic_info(cls, soup, link):
97
        """Get information about a particular comics."""
98
        raise NotImplementedError
99
100
    @classmethod
101
    def get_url_from_link(cls, link):
102
        """Get url corresponding to a link. Default implementation is similar to get_href."""
103
        return link['href']
104
105
    @classmethod
106
    def get_next_link(cls, last_soup):
107
        """Get link to next comic."""
108
        link = cls.get_navi_link(last_soup, True)
109
        cls.log("Next link is %s" % link)
110
        return link
111
112
    @classmethod
113
    def get_prev_link(cls, last_soup):
114
        """Get link to previous comic."""
115
        link = cls.get_navi_link(last_soup, False)
116
        cls.log("Prev link is %s" % link)
117
        return link
118
119
    @classmethod
120
    def get_next_comic(cls, last_comic):
121
        """Generic implementation of get_next_comic for navigable comics."""
122
        url = last_comic['url'] if last_comic else None
123
        cls.log("starting 'get_next_comic' from %s" % url)
124
        next_comic = \
125
            cls.get_next_link(get_soup_at_url(url)) \
126
            if url else \
127
            cls.get_first_comic_link()
128
        cls.log("next/first comic will be %s (url is %s)" % (str(next_comic), url))
129
        # cls.check_navigation(url)
130
        while next_comic:
131
            prev_url, url = url, cls.get_url_from_link(next_comic)
132
            if prev_url == url:
133
                cls.log("got same url %s" % url)
134
                break
135
            cls.log("about to get %s (%s)" % (url, str(next_comic)))
136
            soup = get_soup_at_url(url)
137
            comic = cls.get_comic_info(soup, next_comic)
138
            if comic is not None:
139
                assert 'url' not in comic
140
                comic['url'] = url
141
                yield comic
142
            next_comic = cls.get_next_link(soup)
143
            cls.log("next comic will be %s" % str(next_comic))
144
145
    @classmethod
146
    def check_first_link(cls):
147
        """Check that navigation to first comic seems to be working - for dev purposes."""
148
        cls.log("about to check first link")
149
        ok = True
150
        firstlink = cls.get_first_comic_link()
151
        if firstlink is None:
152
            print("From %s : no first link" % cls.url)
153
            ok = False
154
        else:
155
            firsturl = cls.get_url_from_link(firstlink)
156
            try:
157
                get_soup_at_url(firsturl)
158
            except urllib.error.HTTPError:
159
                print("From %s : invalid first url" % cls.url)
160
                ok = False
161
        cls.log("checked first link -> returned %d" % ok)
162
        return ok
163
164
    @classmethod
165
    def check_prev_next_links(cls, url):
166
        """Check that navigation to prev/next from a given URL seems to be working - for dev purposes."""
167
        cls.log("about to check prev/next from %s" % url)
168
        ok = True
169
        if url is None:
170
            prevlink, nextlink = None, None
171
        else:
172
            soup = get_soup_at_url(url)
173
            prevlink, nextlink = cls.get_prev_link(soup), cls.get_next_link(soup)
174
        if prevlink is None and nextlink is None:
175
            print("From %s : no previous nor next" % url)
176
            ok = False
177
        else:
178
            if prevlink:
179
                prevurl = cls.get_url_from_link(prevlink)
180
                prevsoup = get_soup_at_url(prevurl)
181
                prevnextlink = cls.get_next_link(prevsoup)
182
                prevnext = cls.get_url_from_link(prevnextlink) if prevnextlink is not None else "NO URL"
183
                if prevnext != url:
184
                    print("From %s, going backward then forward leads to %s" % (url, prevnext))
185
                    ok = False
186
            if nextlink:
187
                nexturl = cls.get_url_from_link(nextlink)
188
                if nexturl != url:
189
                    nextsoup = get_soup_at_url(nexturl)
190
                    nextprevlink = cls.get_prev_link(nextsoup)
191
                    nextprev = cls.get_url_from_link(nextprevlink) if nextprevlink is not None else "NO URL"
192
                    if nextprev != url:
193
                        print("From %s, going forward then backward leads to %s" % (url, nextprev))
194
                        ok = False
195
        cls.log("checked prev/next from %s -> returned %d" % (url, ok))
196
        return ok
197
198
    @classmethod
199
    def check_navigation(cls, url):
200
        """Check that navigation functions seem to be working - for dev purposes."""
201
        cls.log("about to check navigation from %s" % url)
202
        first = cls.check_first_link()
203
        prevnext = cls.check_prev_next_links(url)
204
        ok = first and prevnext
205
        cls.log("checked navigation from %s -> returned %d" % (url, ok))
206
        return ok
207
208
209
class GenericListableComic(GenericComic):
210
    """Generic class for "listable" comics : with a list of comics (aka 'archive')
211
212
    The method `get_next_comic` methods is implemented in terms of new
213
    more specialized methods to be implemented/overridden:
214
        - get_archive_elements
215
        - get_url_from_archive_element
216
        - get_comic_info
217
    """
218
    _categories = ('LISTABLE', )
219
220
    @classmethod
221
    def get_archive_elements(cls):
222
        """Get the archive elements (iterable)."""
223
        raise NotImplementedError
224
225
    @classmethod
226
    def get_url_from_archive_element(cls, archive_elt):
227
        """Get url corresponding to an archive element."""
228
        raise NotImplementedError
229
230
    @classmethod
231
    def get_comic_info(cls, soup, archive_elt):
232
        """Get information about a particular comics."""
233
        raise NotImplementedError
234
235
    @classmethod
236
    def get_next_comic(cls, last_comic):
237
        """Generic implementation of get_next_comic for listable comics."""
238
        waiting_for_url = last_comic['url'] if last_comic else None
239
        archive_elts = list(cls.get_archive_elements())
240
        for archive_elt in archive_elts:
241
            url = cls.get_url_from_archive_element(archive_elt)
242
            cls.log("considering %s" % url)
243
            if waiting_for_url is None:
244
                cls.log("about to get %s (%s)" % (url, str(archive_elt)))
245
                soup = get_soup_at_url(url)
246
                comic = cls.get_comic_info(soup, archive_elt)
247
                if comic is not None:
248
                    assert 'url' not in comic
249
                    comic['url'] = url
250
                    yield comic
251
            elif waiting_for_url == url:
252
                waiting_for_url = None
253
        if waiting_for_url is not None:
254
            print("Did not find %s in the %d comics: there might be a problem" %
255
                  (waiting_for_url, len(archive_elts)))
256
257
# Helper functions corresponding to get_first_comic_link/get_navi_link
258
259
260
@classmethod
261
def get_link_rel_next(cls, last_soup, next_):
262
    """Implementation of get_navi_link."""
263
    return last_soup.find('link', rel='next' if next_ else 'prev')
264
265
266
@classmethod
267
def get_a_rel_next(cls, last_soup, next_):
268
    """Implementation of get_navi_link."""
269
    return last_soup.find('a', rel='next' if next_ else 'prev')
270
271
272
@classmethod
273
def get_a_navi_navinext(cls, last_soup, next_):
274
    """Implementation of get_navi_link."""
275
    return last_soup.find('a', class_='navi navi-next' if next_ else 'navi navi-prev')
276
277
278
@classmethod
279
def get_a_navi_comicnavnext_navinext(cls, last_soup, next_):
280
    """Implementation of get_navi_link."""
281
    return last_soup.find('a', class_='navi comic-nav-next navi-next' if next_ else 'navi comic-nav-previous navi-prev')
282
283
284
@classmethod
285
def get_a_comicnavbase_comicnavnext(cls, last_soup, next_):
286
    """Implementation of get_navi_link."""
287
    return last_soup.find('a', class_='comic-nav-base comic-nav-next' if next_ else 'comic-nav-base comic-nav-previous')
288
289
290
@classmethod
291
def get_a_navi_navifirst(cls):
292
    """Implementation of get_first_comic_link."""
293
    return get_soup_at_url(cls.url).find('a', class_='navi navi-first')
294
295
296
@classmethod
297
def get_div_navfirst_a(cls):
298
    """Implementation of get_first_comic_link."""
299
    return get_soup_at_url(cls.url).find('div', class_="nav-first").find('a')
300
301
302
@classmethod
303
def get_a_comicnavbase_comicnavfirst(cls):
304
    """Implementation of get_first_comic_link."""
305
    return get_soup_at_url(cls.url).find('a', class_='comic-nav-base comic-nav-first')
306
307
308
@classmethod
309
def simulate_first_link(cls):
310
    """Implementation of get_first_comic_link creating a link-like object from
311
    an URL provided by the class.
312
313
    Note: The first URL can easily be found using :
314
    `get_first_comic_link = navigate_to_first_comic`.
315
    """
316
    return {'href': cls.first_url}
317
318
319
@classmethod
320
def navigate_to_first_comic(cls):
321
    """Implementation of get_first_comic_link navigating from a user provided
322
    URL to the first comic.
323
324
    Sometimes, the first comic cannot be reached directly so to start
325
    from the first comic one has to go to the previous comic until
326
    there is no previous comics. Once this URL is reached, it
327
    is better to hardcode it but for development purposes, it
328
    is convenient to have an automatic way to find it.
329
330
    Then, the URL found can easily be used via `simulate_first_link`.
331
    """
332
    try:
333
        url = cls.first_url
334
    except AttributeError:
335
        url = input("Get starting URL: ")
336
    print(url)
337
    comic = cls.get_prev_link(get_soup_at_url(url))
338
    while comic:
339
        url = cls.get_url_from_link(comic)
340
        print(url)
341
        comic = cls.get_prev_link(get_soup_at_url(url))
342
    return {'href': url}
343
344
345
class GenericEmptyComic(GenericComic):
346
    """Generic class for comics where nothing is to be done.
347
348
    It can be useful to deactivate temporarily comics that do not work
349
    properly by replacing `def MyComic(GenericWhateverComic)` with
350
    `def MyComic(GenericEmptyComic, GenericWhateverComic)`."""
351
    _categories = ('EMPTY', )
352
353
    @classmethod
354
    def get_next_comic(cls, last_comic):
355
        """Implementation of get_next_comic returning no comics."""
356
        cls.log("comic is considered as empty - returning no comic")
357
        return []
358
359
360 View Code Duplication
class ExtraFabulousComics(GenericNavigableComic):
361
    """Class to retrieve Extra Fabulous Comics."""
362
    # Also on https://extrafabulouscomics.tumblr.com
363
    name = 'efc'
364
    long_name = 'Extra Fabulous Comics'
365
    url = 'http://extrafabulouscomics.com'
366
    _categories = ('EFC', )
367
    get_first_comic_link = get_a_navi_navifirst
368
    get_navi_link = get_link_rel_next
369
370
    @classmethod
371
    def get_comic_info(cls, soup, link):
372
        """Get information about a particular comics."""
373
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
374
        imgs = soup.find_all('img', src=img_src_re)
375
        title = soup.find('meta', property='og:title')['content']
376
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
377
        day = string_to_date(date_str, "%Y-%m-%d")
378
        return {
379
            'title': title,
380
            'img': [i['src'] for i in imgs],
381
            'month': day.month,
382
            'year': day.year,
383
            'day': day.day,
384
            'prefix': title + '-'
385
        }
386
387
388 View Code Duplication
class GenericLeMondeBlog(GenericNavigableComic):
389
    """Generic class to retrieve comics from Le Monde blogs."""
390
    _categories = ('LEMONDE', 'FRANCAIS')
391
    get_navi_link = get_link_rel_next
392
    get_first_comic_link = simulate_first_link
393
    first_url = NotImplemented
394
395
    @classmethod
396
    def get_comic_info(cls, soup, link):
397
        """Get information about a particular comics."""
398
        url2 = soup.find('link', rel='shortlink')['href']
399
        title = soup.find('meta', property='og:title')['content']
400
        date_str = soup.find("span", class_="entry-date").string
401
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
402
        imgs = soup.find_all('meta', property='og:image')
403
        return {
404
            'title': title,
405
            'url2': url2,
406
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
407
            'month': day.month,
408
            'year': day.year,
409
            'day': day.day,
410
        }
411
412
413
class ZepWorld(GenericLeMondeBlog):
414
    """Class to retrieve Zep World comics."""
415
    name = "zep"
416
    long_name = "Zep World"
417
    url = "http://zepworld.blog.lemonde.fr"
418
    first_url = "http://zepworld.blog.lemonde.fr/2014/10/31/bientot-le-blog-de-zep/"
419
420
421
class Vidberg(GenericLeMondeBlog):
422
    """Class to retrieve Vidberg comics."""
423
    name = 'vidberg'
424
    long_name = "Vidberg - l'actu en patates"
425
    url = "http://vidberg.blog.lemonde.fr"
426
    # Not the first but I didn't find an efficient way to retrieve it
427
    first_url = "http://vidberg.blog.lemonde.fr/2012/02/09/revue-de-campagne-la-campagne-du-modem-semballe/"
428
429
430
class Plantu(GenericLeMondeBlog):
431
    """Class to retrieve Plantu comics."""
432
    name = 'plantu'
433
    long_name = "Plantu"
434
    url = "http://plantu.blog.lemonde.fr"
435
    first_url = "http://plantu.blog.lemonde.fr/2014/10/28/stress-test-a-bruxelles/"
436
437
438
class XavierGorce(GenericLeMondeBlog):
439
    """Class to retrieve Xavier Gorce comics."""
440
    name = 'gorce'
441
    long_name = "Xavier Gorce"
442
    url = "http://xaviergorce.blog.lemonde.fr"
443
    first_url = "http://xaviergorce.blog.lemonde.fr/2015/01/09/distinction/"
444
445
446
class CartooningForPeace(GenericLeMondeBlog):
447
    """Class to retrieve Cartooning For Peace comics."""
448
    name = 'forpeace'
449
    long_name = "Cartooning For Peace"
450
    url = "http://cartooningforpeace.blog.lemonde.fr"
451
    first_url = "http://cartooningforpeace.blog.lemonde.fr/2014/12/15/bado/"
452
453
454
class Aurel(GenericLeMondeBlog):
455
    """Class to retrieve Aurel comics."""
456
    name = 'aurel'
457
    long_name = "Aurel"
458
    url = "http://aurel.blog.lemonde.fr"
459
    first_url = "http://aurel.blog.lemonde.fr/2014/09/29/le-senat-repasse-a-droite/"
460
461
462
class LesCulottees(GenericLeMondeBlog):
463
    """Class to retrieve Les Culottees comics."""
464
    name = 'culottees'
465
    long_name = 'Les Culottees'
466
    url = "http://lesculottees.blog.lemonde.fr"
467
    first_url = "http://lesculottees.blog.lemonde.fr/2016/01/11/clementine-delait-femme-a-barbe/"
468
469
470
class UneAnneeAuLycee(GenericLeMondeBlog):
471
    """Class to retrieve Une Annee Au Lycee comics."""
472
    name = 'lycee'
473
    long_name = 'Une Annee au Lycee'
474
    url = 'http://uneanneeaulycee.blog.lemonde.fr'
475
    first_url = "http://uneanneeaulycee.blog.lemonde.fr/2016/06/13/la-semaine-du-bac-est-arrivee/"
476
477
478 View Code Duplication
class Rall(GenericNavigableComic):
479
    """Class to retrieve Ted Rall comics."""
480
    # Also on http://www.gocomics.com/tedrall
481
    name = 'rall'
482
    long_name = "Ted Rall"
483
    url = "http://rall.com/comic"
484
    _categories = ('RALL', )
485
    get_navi_link = get_link_rel_next
486
    get_first_comic_link = simulate_first_link
487
    # Not the first but I didn't find an efficient way to retrieve it
488
    first_url = "http://rall.com/2014/01/30/los-angeles-times-cartoon-well-miss-those-california-flowers"
489
490
    @classmethod
491
    def get_comic_info(cls, soup, link):
492
        """Get information about a particular comics."""
493
        title = soup.find('meta', property='og:title')['content']
494
        author = soup.find("span", class_="author vcard").find("a").string
495
        date_str = soup.find("span", class_="entry-date").string
496
        day = string_to_date(date_str, "%B %d, %Y")
497
        desc = soup.find('meta', property='og:description')['content']
498
        imgs = soup.find('div', class_='entry-content').find_all('img')
499
        imgs = imgs[:-7]  # remove social media buttons
500
        return {
501
            'title': title,
502
            'author': author,
503
            'month': day.month,
504
            'year': day.year,
505
            'day': day.day,
506
            'description': desc,
507
            'img': [i['src'] for i in imgs],
508
        }
509
510
511
class Dilem(GenericNavigableComic):
512
    """Class to retrieve Ali Dilem comics."""
513
    name = 'dilem'
514
    long_name = 'Ali Dilem'
515
    url = 'http://information.tv5monde.com/dilem'
516
    _categories = ('FRANCAIS', )
517
    get_url_from_link = join_cls_url_to_href
518
    get_first_comic_link = simulate_first_link
519
    first_url = "http://information.tv5monde.com/dilem/2004-06-26"
520
521
    @classmethod
522
    def get_navi_link(cls, last_soup, next_):
523
        """Get link to next or previous comic."""
524
        # prev is next / next is prev
525
        li = last_soup.find('li', class_='prev' if next_ else 'next')
526
        return li.find('a') if li else None
527
528
    @classmethod
529
    def get_comic_info(cls, soup, link):
530
        """Get information about a particular comics."""
531
        short_url = soup.find('link', rel='shortlink')['href']
532
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
533
        imgs = soup.find_all('meta', property='og:image')
534
        date_str = soup.find('span', property='dc:date')['content']
535
        date_str = date_str[:10]
536
        day = string_to_date(date_str, "%Y-%m-%d")
537
        return {
538
            'short_url': short_url,
539
            'title': title,
540
            'img': [i['content'] for i in imgs],
541
            'day': day.day,
542
            'month': day.month,
543
            'year': day.year,
544
        }
545
546
547
class SpaceAvalanche(GenericNavigableComic):
548
    """Class to retrieve Space Avalanche comics."""
549
    name = 'avalanche'
550
    long_name = 'Space Avalanche'
551
    url = 'http://www.spaceavalanche.com'
552
    get_navi_link = get_link_rel_next
553
554
    @classmethod
555
    def get_first_comic_link(cls):
556
        """Get link to first comics."""
557
        return {'href': "http://www.spaceavalanche.com/2009/02/02/irish-sea/", 'title': "Irish Sea"}
558
559
    @classmethod
560
    def get_comic_info(cls, soup, link):
561
        """Get information about a particular comics."""
562
        url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
563
        title = link['title']
564
        url = cls.get_url_from_link(link)
565
        year, month, day = [int(s)
566
                            for s in url_date_re.match(url).groups()]
567
        imgs = soup.find("div", class_="entry").find_all("img")
568
        return {
569
            'title': title,
570
            'day': day,
571
            'month': month,
572
            'year': year,
573
            'img': [i['src'] for i in imgs],
574
        }
575
576
577
class ZenPencils(GenericNavigableComic):
578
    """Class to retrieve ZenPencils comics."""
579
    # Also on http://zenpencils.tumblr.com
580
    # Also on http://www.gocomics.com/zen-pencils
581
    name = 'zenpencils'
582
    long_name = 'Zen Pencils'
583
    url = 'http://zenpencils.com'
584
    _categories = ('ZENPENCILS', )
585
    get_navi_link = get_link_rel_next
586
    get_first_comic_link = simulate_first_link
587
    first_url = "http://zenpencils.com/comic/1-ralph-waldo-emerson-make-them-cry/"
588
589
    @classmethod
590
    def get_comic_info(cls, soup, link):
591
        """Get information about a particular comics."""
592
        imgs = soup.find('div', id='comic').find_all('img')
593
        # imgs2 = soup.find_all('meta', property='og:image')
594
        post = soup.find('div', class_='post-content')
595
        author = post.find("span", class_="post-author").find("a").string
596
        title = soup.find('h2', class_='post-title').string
597
        date_str = post.find('span', class_='post-date').string
598
        day = string_to_date(date_str, "%B %d, %Y")
599
        assert imgs
600
        assert all(i['alt'] == i['title'] for i in imgs)
601
        assert all(i['alt'] in (title, "") for i in imgs)
602
        return {
603
            'title': title,
604
            'author': author,
605
            'day': day.day,
606
            'month': day.month,
607
            'year': day.year,
608
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
609
        }
610
611
612
class ItsTheTie(GenericEmptyComic, GenericNavigableComic):
613
    """Class to retrieve It's the tie comics."""
614
    # Also on http://itsthetie.tumblr.com
615
    # Also on https://tapastic.com/series/itsthetie
616
    name = 'tie'
617
    long_name = "It's the tie"
618
    url = "http://itsthetie.com"
619
    _categories = ('TIE', )
620
    get_first_comic_link = get_div_navfirst_a
621
    get_navi_link = get_a_rel_next
622
623
    @classmethod
624
    def get_comic_info(cls, soup, link):
625
        """Get information about a particular comics."""
626
        title = soup.find('h1', class_='comic-title').find('a').string
627
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
628
        day = string_to_date(date_str, "%B %d, %Y")
629
        # Bonus images may or may not be in meta og:image.
630
        imgs = soup.find_all('meta', property='og:image')
631
        imgs_src = [i['content'] for i in imgs]
632
        bonus = soup.find_all('img', attrs={'data-oversrc': True})
633
        bonus_src = [b['data-oversrc'] for b in bonus]
634
        all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src]
635
        all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")]
636
        tag_meta = soup.find('meta', property='article:tag')
637
        tags = tag_meta['content'] if tag_meta else ""
638
        return {
639
            'title': title,
640
            'month': day.month,
641
            'year': day.year,
642
            'day': day.day,
643
            'img': all_imgs_src,
644
            'tags': tags,
645
        }
646
647
648 View Code Duplication
class PenelopeBagieu(GenericNavigableComic):
649
    """Class to retrieve comics from Penelope Bagieu's blog."""
650
    name = 'bagieu'
651
    long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
652
    url = 'http://www.penelope-jolicoeur.com'
653
    _categories = ('FRANCAIS', )
654
    get_navi_link = get_link_rel_next
655
    get_first_comic_link = simulate_first_link
656
    first_url = 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'
657
658
    @classmethod
659
    def get_comic_info(cls, soup, link):
660
        """Get information about a particular comics."""
661
        date_str = soup.find('h2', class_='date-header').string
662
        day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
663
        imgs = soup.find('div', class_='entry-body').find_all('img')
664
        title = soup.find('h3', class_='entry-header').string
665
        return {
666
            'title': title,
667
            'img': [i['src'] for i in imgs],
668
            'month': day.month,
669
            'year': day.year,
670
            'day': day.day,
671
        }
672
673
674 View Code Duplication
class OneOneOneOneComic(GenericEmptyComic, GenericNavigableComic):
675
    """Class to retrieve 1111 Comics."""
676
    # Also on http://comics1111.tumblr.com
677
    # Also on https://tapastic.com/series/1111-Comics
678
    name = '1111'
679
    long_name = '1111 Comics'
680
    url = 'http://www.1111comics.me'
681
    _categories = ('ONEONEONEONE', )
682
    get_first_comic_link = get_div_navfirst_a
683
    get_navi_link = get_link_rel_next
684
685
    @classmethod
686
    def get_comic_info(cls, soup, link):
687
        """Get information about a particular comics."""
688
        title = soup.find('h1', class_='comic-title').find('a').string
689
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
690
        day = string_to_date(date_str, "%B %d, %Y")
691
        imgs = soup.find_all('meta', property='og:image')
692
        return {
693
            'title': title,
694
            'month': day.month,
695
            'year': day.year,
696
            'day': day.day,
697
            'img': [i['content'] for i in imgs],
698
        }
699
700
701 View Code Duplication
class AngryAtNothing(GenericEmptyComic, GenericNavigableComic):
702
    """Class to retrieve Angry at Nothing comics."""
703
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
704
    # Also on http://angryatnothing.tumblr.com
705
    name = 'angry'
706
    long_name = 'Angry At Nothing'
707
    url = 'http://www.angryatnothing.net'
708
    get_first_comic_link = get_div_navfirst_a
709
    get_navi_link = get_a_rel_next
710
711
    @classmethod
712
    def get_comic_info(cls, soup, link):
713
        """Get information about a particular comics."""
714
        title = soup.find('h1', class_='comic-title').find('a').string
715
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
716
        day = string_to_date(date_str, "%B %d, %Y")
717
        imgs = soup.find_all('meta', property='og:image')
718
        return {
719
            'title': title,
720
            'month': day.month,
721
            'year': day.year,
722
            'day': day.day,
723
            'img': [i['content'] for i in imgs],
724
        }
725
726
727
class NeDroid(GenericNavigableComic):
728
    """Class to retrieve NeDroid comics."""
729
    name = 'nedroid'
730 View Code Duplication
    long_name = 'NeDroid'
731
    url = 'http://nedroid.com'
732
    get_first_comic_link = get_div_navfirst_a
733
    get_navi_link = get_link_rel_next
734
    get_url_from_link = join_cls_url_to_href
735
736
    @classmethod
737
    def get_comic_info(cls, soup, link):
738
        """Get information about a particular comics."""
739
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
740
        short_url = cls.get_url_from_link(soup.find('link', rel='shortlink'))
741
        num = int(short_url_re.match(short_url).groups()[0])
742
        imgs = soup.find('div', id='comic').find_all('img')
743
        assert len(imgs) == 1
744
        title = imgs[0]['alt']
745
        title2 = imgs[0]['title']
746
        return {
747
            'short_url': short_url,
748
            'title': title,
749
            'title2': title2,
750
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
751
            'num': num,
752
        }
753
754
755
class Garfield(GenericNavigableComic):
756
    """Class to retrieve Garfield comics."""
757
    # Also on http://www.gocomics.com/garfield
758
    name = 'garfield'
759
    long_name = 'Garfield'
760
    url = 'https://garfield.com'
761
    _categories = ('GARFIELD', )
762
    get_first_comic_link = simulate_first_link
763
    first_url = 'https://garfield.com/comic/1978/06/19'
764
765
    @classmethod
766
    def get_navi_link(cls, last_soup, next_):
767
        """Get link to next or previous comic."""
768
        return last_soup.find('a', class_='comic-arrow-right' if next_ else 'comic-arrow-left')
769
770
    @classmethod
771
    def get_comic_info(cls, soup, link):
772
        """Get information about a particular comics."""
773
        url = cls.get_url_from_link(link)
774
        date_re = re.compile('^%s/comic/([0-9]*)/([0-9]*)/([0-9]*)' % cls.url)
775
        year, month, day = [int(s) for s in date_re.match(url).groups()]
776
        imgs = soup.find('div', class_='comic-display').find_all('img', class_='img-responsive')
777
        return {
778
            'month': month,
779
            'year': year,
780
            'day': day,
781
            'img': [i['src'] for i in imgs],
782 View Code Duplication
        }
783
784
785
class Dilbert(GenericNavigableComic):
786
    """Class to retrieve Dilbert comics."""
787
    # Also on http://www.gocomics.com/dilbert-classics
788
    name = 'dilbert'
789
    long_name = 'Dilbert'
790
    url = 'http://dilbert.com'
791
    get_url_from_link = join_cls_url_to_href
792
    get_first_comic_link = simulate_first_link
793
    first_url = 'http://dilbert.com/strip/1989-04-16'
794
795
    @classmethod
796
    def get_navi_link(cls, last_soup, next_):
797
        """Get link to next or previous comic."""
798
        link = last_soup.find('div', class_='nav-comic nav-right' if next_ else 'nav-comic nav-left')
799
        return link.find('a') if link else None
800
801
    @classmethod
802
    def get_comic_info(cls, soup, link):
803
        """Get information about a particular comics."""
804
        title = soup.find('meta', property='og:title')['content']
805
        imgs = soup.find_all('meta', property='og:image')
806
        desc = soup.find('meta', property='og:description')['content']
807
        date_str = soup.find('meta', property='article:publish_date')['content']
808
        day = string_to_date(date_str, "%B %d, %Y")
809
        author = soup.find('meta', property='article:author')['content']
810
        tags = soup.find('meta', property='article:tag')['content']
811
        return {
812
            'title': title,
813
            'description': desc,
814
            'img': [i['content'] for i in imgs],
815
            'author': author,
816
            'tags': tags,
817
            'day': day.day,
818
            'month': day.month,
819
            'year': day.year
820
        }
821
822
823
class VictimsOfCircumsolar(GenericEmptyComic, GenericNavigableComic):
824
    """Class to retrieve VictimsOfCircumsolar comics."""
825
    # Also on https://victimsofcomics.tumblr.com
826
    name = 'circumsolar'
827
    long_name = 'Victims Of Circumsolar'
828
    url = 'http://www.victimsofcircumsolar.com'
829
    get_navi_link = get_a_navi_comicnavnext_navinext
830
    get_first_comic_link = simulate_first_link
831
    first_url = 'http://www.victimsofcircumsolar.com/comic/modern-addiction'
832
833
    @classmethod
834
    def get_comic_info(cls, soup, link):
835
        """Get information about a particular comics."""
836
        # Date is on the archive page
837
        title = soup.find_all('meta', property='og:title')[-1]['content']
838
        desc = soup.find_all('meta', property='og:description')[-1]['content']
839
        imgs = soup.find('div', id='comic').find_all('img')
840
        assert all(i['title'] == i['alt'] == title for i in imgs)
841
        return {
842
            'title': title,
843
            'description': desc,
844
            'img': [i['src'] for i in imgs],
845
        }
846
847
848
class ThreeWordPhrase(GenericNavigableComic):
849
    """Class to retrieve Three Word Phrase comics."""
850
    # Also on http://www.threewordphrase.tumblr.com
851
    name = 'threeword'
852
    long_name = 'Three Word Phrase'
853
    url = 'http://threewordphrase.com'
854
    get_url_from_link = join_cls_url_to_href
855
856
    @classmethod
857
    def get_first_comic_link(cls):
858
        """Get link to first comics."""
859
        return get_soup_at_url(cls.url).find('img', src='/firstlink.gif').parent
860
861
    @classmethod
862
    def get_navi_link(cls, last_soup, next_):
863
        """Get link to next or previous comic."""
864
        link = last_soup.find('img', src='/nextlink.gif' if next_ else '/prevlink.gif').parent
865
        return None if link.get('href') is None else link
866
867
    @classmethod
868
    def get_comic_info(cls, soup, link):
869
        """Get information about a particular comics."""
870
        title = soup.find('title')
871
        imgs = [img for img in soup.find_all('img')
872
                if not img['src'].endswith(
873
                    ('link.gif', '32.png', 'twpbookad.jpg',
874
                     'merchad.jpg', 'header.gif', 'tipjar.jpg'))]
875
        return {
876
            'title': title.string if title else None,
877
            'title2': '  '.join(img.get('alt') for img in imgs if img.get('alt')),
878
            'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs],
879
        }
880
881
882
class DeadlyPanel(GenericEmptyComic, GenericNavigableComic):
883
    """Class to retrieve Deadly Panel comics."""
884
    # Also on https://tapastic.com/series/deadlypanel
885
    # Also on https://deadlypanel.tumblr.com
886
    name = 'deadly'
887
    long_name = 'Deadly Panel'
888
    url = 'http://www.deadlypanel.com'
889
    get_first_comic_link = get_a_navi_navifirst
890
    get_navi_link = get_a_navi_comicnavnext_navinext
891
892
    @classmethod
893
    def get_comic_info(cls, soup, link):
894
        """Get information about a particular comics."""
895
        imgs = soup.find('div', id='comic').find_all('img')
896
        assert all(i['alt'] == i['title'] for i in imgs)
897
        return {
898
            'img': [i['src'] for i in imgs],
899
        }
900
901
902 View Code Duplication
class TheGentlemanArmchair(GenericNavigableComic):
903
    """Class to retrieve The Gentleman Armchair comics."""
904
    name = 'gentlemanarmchair'
905
    long_name = 'The Gentleman Armchair'
906
    url = 'http://thegentlemansarmchair.com'
907
    get_first_comic_link = get_a_navi_navifirst
908
    get_navi_link = get_link_rel_next
909
910
    @classmethod
911
    def get_comic_info(cls, soup, link):
912
        """Get information about a particular comics."""
913
        title = soup.find('h2', class_='post-title').string
914
        author = soup.find("span", class_="post-author").find("a").string
915
        date_str = soup.find('span', class_='post-date').string
916
        day = string_to_date(date_str, "%B %d, %Y")
917
        imgs = soup.find('div', id='comic').find_all('img')
918
        return {
919
            'img': [i['src'] for i in imgs],
920
            'title': title,
921
            'author': author,
922
            'month': day.month,
923
            'year': day.year,
924
            'day': day.day,
925
        }
926
927
928
class ImogenQuest(GenericNavigableComic):
929
    """Class to retrieve Imogen Quest comics."""
930
    # Also on http://imoquest.tumblr.com
931
    name = 'imogen'
932
    long_name = 'Imogen Quest'
933
    url = 'http://imogenquest.net'
934
    get_first_comic_link = get_div_navfirst_a
935
    get_navi_link = get_a_rel_next
936
937
    @classmethod
938
    def get_comic_info(cls, soup, link):
939
        """Get information about a particular comics."""
940
        title = soup.find('h2', class_='post-title').string
941
        author = soup.find("span", class_="post-author").find("a").string
942
        date_str = soup.find('span', class_='post-date').string
943
        day = string_to_date(date_str, '%B %d, %Y')
944
        imgs = soup.find('div', class_='comicpane').find_all('img')
945
        assert all(i['alt'] == i['title'] for i in imgs)
946
        title2 = imgs[0]['title']
947
        return {
948
            'day': day.day,
949
            'month': day.month,
950
            'year': day.year,
951
            'img': [i['src'] for i in imgs],
952
            'title': title,
953
            'title2': title2,
954
            'author': author,
955
        }
956
957
958 View Code Duplication
class MyExtraLife(GenericNavigableComic):
959
    """Class to retrieve My Extra Life comics."""
960
    name = 'extralife'
961
    long_name = 'My Extra Life'
962
    url = 'http://www.myextralife.com'
963
    get_navi_link = get_link_rel_next
964
965
    @classmethod
966
    def get_first_comic_link(cls):
967
        """Get link to first comics."""
968
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
969
970
    @classmethod
971
    def get_comic_info(cls, soup, link):
972
        """Get information about a particular comics."""
973
        title = soup.find("h1", class_="comic_title").string
974
        date_str = soup.find("span", class_="comic_date").string
975
        day = string_to_date(date_str, "%B %d, %Y")
976
        imgs = soup.find_all("img", class_="comic")
977
        assert all(i['alt'] == i['title'] == title for i in imgs)
978
        return {
979
            'title': title,
980
            'img': [i['src'] for i in imgs if i["src"]],
981
            'day': day.day,
982
            'month': day.month,
983
            'year': day.year
984
        }
985
986
987
class SaturdayMorningBreakfastCereal(GenericNavigableComic):
988
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
989
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
990
    # Also on http://smbc-comics.tumblr.com
991
    name = 'smbc'
992
    long_name = 'Saturday Morning Breakfast Cereal'
993
    url = 'http://www.smbc-comics.com'
994
    _categories = ('SMBC', )
995
    get_navi_link = get_a_rel_next
996
997
    @classmethod
998
    def get_first_comic_link(cls):
999
        """Get link to first comics."""
1000
        return get_soup_at_url(cls.url).find('a', rel='start')
1001
1002
    @classmethod
1003
    def get_comic_info(cls, soup, link):
1004
        """Get information about a particular comics."""
1005
        image1 = soup.find('img', id='cc-comic')
1006
        image_url1 = image1['src']
1007
        aftercomic = soup.find('div', id='aftercomic')
1008
        image_url2 = aftercomic.find('img')['src'] if aftercomic else ''
1009
        imgs = [image_url1] + ([image_url2] if image_url2 else [])
1010
        date_str = soup.find('div', class_='cc-publishtime').contents[0]
1011
        day = string_to_date(date_str, "%B %d, %Y")
1012
        return {
1013
            'title': image1['title'],
1014
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i)) for i in imgs],
1015
            'day': day.day,
1016
            'month': day.month,
1017
            'year': day.year
1018
        }
1019
1020
1021
class PerryBibleFellowship(GenericListableComic):  # Is now navigable too
1022
    """Class to retrieve Perry Bible Fellowship comics."""
1023
    name = 'pbf'
1024
    long_name = 'Perry Bible Fellowship'
1025
    url = 'http://pbfcomics.com'
1026
    get_url_from_archive_element = join_cls_url_to_href
1027
1028
    @classmethod
1029
    def get_archive_elements(cls):
1030
        soup = get_soup_at_url(cls.url)
1031
        thumbnails = soup.find('div', id='all_thumbnails')
1032
        return reversed(thumbnails.find_all('a'))
1033
1034
    @classmethod
1035
    def get_comic_info(cls, soup, link):
1036
        """Get information about a particular comics."""
1037
        name = soup.find('meta', property='og:title')['content']
1038
        imgs = soup.find_all('meta', property='og:image')
1039
        assert len(imgs) == 1
1040
        return {
1041
            'name': name,
1042
            'img': [i['content'] for i in imgs],
1043
        }
1044
1045
1046 View Code Duplication
class Mercworks(GenericNavigableComic):
1047
    """Class to retrieve Mercworks comics."""
1048
    # Also on http://mercworks.tumblr.com
1049
    name = 'mercworks'
1050
    long_name = 'Mercworks'
1051
    url = 'http://mercworks.net'
1052
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1053
    get_navi_link = get_link_rel_next
1054
1055
    @classmethod
1056
    def get_comic_info(cls, soup, link):
1057
        """Get information about a particular comics."""
1058
        title = soup.find('meta', property='og:title')['content']
1059
        metadesc = soup.find('meta', property='og:description')
1060
        desc = metadesc['content'] if metadesc else ""
1061
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
1062
        day = string_to_date(date_str, "%Y-%m-%d")
1063
        imgs = soup.find_all('meta', property='og:image')
1064
        return {
1065
            'img': [i['content'] for i in imgs],
1066
            'title': title,
1067
            'desc': desc,
1068
            'day': day.day,
1069
            'month': day.month,
1070
            'year': day.year
1071
        }
1072
1073
1074
class BerkeleyMews(GenericListableComic):
1075
    """Class to retrieve Berkeley Mews comics."""
1076
    # Also on http://mews.tumblr.com
1077
    # Also on http://www.gocomics.com/berkeley-mews
1078
    name = 'berkeley'
1079
    long_name = 'Berkeley Mews'
1080
    url = 'http://www.berkeleymews.com'
1081
    _categories = ('BERKELEY', )
1082
    get_url_from_archive_element = get_href
1083
    comic_num_re = re.compile('%s/\\?p=([0-9]*)$' % url)
1084
1085
    @classmethod
1086
    def get_archive_elements(cls):
1087
        archive_url = urljoin_wrapper(cls.url, "?page_id=2")
1088
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_num_re))
1089
1090
    @classmethod
1091
    def get_comic_info(cls, soup, link):
1092
        """Get information about a particular comics."""
1093
        comic_date_re = re.compile('.*/([0-9]*)-([0-9]*)-([0-9]*)-.*')
1094
        url = cls.get_url_from_archive_element(link)
1095
        num = int(cls.comic_num_re.match(url).groups()[0])
1096
        img = soup.find('div', id='comic').find('img')
1097
        assert all(i['alt'] == i['title'] for i in [img])
1098
        title2 = img['title']
1099
        img_url = img['src']
1100
        year, month, day = [int(s) for s in comic_date_re.match(img_url).groups()]
1101
        return {
1102
            'num': num,
1103
            'title': link.string,
1104
            'title2': title2,
1105
            'img': [img_url],
1106
            'year': year,
1107
            'month': month,
1108
            'day': day,
1109
        }
1110
1111
1112
class GenericBouletCorp(GenericNavigableComic):
1113
    """Generic class to retrieve BouletCorp comics in different languages."""
1114
    # Also on https://bouletcorp.tumblr.com
1115
    _categories = ('BOULET', )
1116
    get_navi_link = get_link_rel_next
1117
1118
    @classmethod
1119
    def get_first_comic_link(cls):
1120
        """Get link to first comics."""
1121
        return get_soup_at_url(cls.url).find('div', id='centered_nav').find_all('a')[0]
1122
1123
    @classmethod
1124
    def get_comic_info(cls, soup, link):
1125
        """Get information about a particular comics."""
1126
        url = cls.get_url_from_link(link)
1127
        date_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/' % cls.url)
1128
        year, month, day = [int(s) for s in date_re.match(url).groups()]
1129
        imgs = soup.find('div', id='notes').find('div', class_='storycontent').find_all('img')
1130
        texts = '  '.join(t for t in (i.get('title') for i in imgs) if t)
1131
        title = soup.find('title').string
1132
        return {
1133
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs if i.get('src') is not None],
1134
            'title': title,
1135
            'texts': texts,
1136
            'year': year,
1137
            'month': month,
1138
            'day': day,
1139
        }
1140
1141
1142
class BouletCorp(GenericBouletCorp):
1143
    """Class to retrieve BouletCorp comics."""
1144
    name = 'boulet'
1145
    long_name = 'Boulet Corp'
1146
    url = 'http://www.bouletcorp.com'
1147
    _categories = ('FRANCAIS', )
1148
1149
1150
class BouletCorpEn(GenericBouletCorp):
1151
    """Class to retrieve EnglishBouletCorp comics."""
1152
    name = 'boulet_en'
1153
    long_name = 'Boulet Corp English'
1154
    url = 'http://english.bouletcorp.com'
1155
1156
1157 View Code Duplication
class AmazingSuperPowers(GenericNavigableComic):
1158
    """Class to retrieve Amazing Super Powers comics."""
1159
    name = 'asp'
1160
    long_name = 'Amazing Super Powers'
1161
    url = 'http://www.amazingsuperpowers.com'
1162
    get_first_comic_link = get_a_navi_navifirst
1163
    get_navi_link = get_a_navi_navinext
1164
1165
    @classmethod
1166
    def get_comic_info(cls, soup, link):
1167
        """Get information about a particular comics."""
1168
        author = soup.find("span", class_="post-author").find("a").string
1169
        date_str = soup.find('span', class_='post-date').string
1170
        day = string_to_date(date_str, "%B %d, %Y")
1171
        imgs = soup.find('div', id='comic').find_all('img')
1172
        title = ' '.join(i['title'] for i in imgs)
1173
        assert all(i['alt'] == i['title'] for i in imgs)
1174
        return {
1175
            'title': title,
1176
            'author': author,
1177
            'img': [img['src'] for img in imgs],
1178
            'day': day.day,
1179
            'month': day.month,
1180
            'year': day.year
1181
        }
1182
1183
1184
class ToonHole(GenericNavigableComic):
1185
    """Class to retrieve Toon Holes comics."""
1186
    # Also on http://tapastic.com/series/TOONHOLE
1187
    name = 'toonhole'
1188
    long_name = 'Toon Hole'
1189
    url = 'http://www.toonhole.com'
1190
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1191
    get_navi_link = get_link_rel_next
1192
1193 View Code Duplication
    @classmethod
1194
    def get_comic_info(cls, soup, link):
1195
        """Get information about a particular comics."""
1196
        short_url = soup.find('link', rel='shortlink')['href']
1197
        date_str = soup.find('div', class_='entry-meta').contents[0].strip()
1198
        day = string_to_date(date_str, "%B %d, %Y")
1199
        imgs = soup.find('div', id='comic').find_all('img')
1200
        if imgs:
1201
            img = imgs[0]
1202
            title = img['alt']
1203
            assert img['title'] == title
1204
        else:
1205
            title = ""
1206
        return {
1207
            'short_url': short_url,
1208
            'title': title,
1209
            'month': day.month,
1210
            'year': day.year,
1211
            'day': day.day,
1212
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs],
1213
        }
1214
1215
1216
class Channelate(GenericNavigableComic):
1217
    """Class to retrieve Channelate comics."""
1218
    name = 'channelate'
1219
    long_name = 'Channelate'
1220
    url = 'http://www.channelate.com'
1221
    get_first_comic_link = get_div_navfirst_a
1222
    get_navi_link = get_link_rel_next
1223
    get_url_from_link = join_cls_url_to_href
1224
1225
    @classmethod
1226
    def get_comic_info(cls, soup, link):
1227
        """Get information about a particular comics."""
1228
        author = soup.find("span", class_="post-author").find("a").string
1229
        date_str = soup.find('span', class_='post-date').string
1230
        day = string_to_date(date_str, '%Y/%m/%d')
1231
        title = soup.find('meta', property='og:title')['content']
1232
        post = soup.find('div', id='comic')
1233
        imgs = post.find_all('img') if post else []
1234
        extra_url = None
1235
        extra_div = soup.find('div', id='extrapanelbutton')
1236
        if extra_div:
1237
            extra_url = extra_div.find('a')['href']
1238
            extra_soup = get_soup_at_url(extra_url)
1239
            extra_imgs = extra_soup.find_all('img', class_='extrapanelimage')
1240
            imgs.extend(extra_imgs)
1241
        return {
1242
            'url_extra': extra_url,
1243
            'title': title,
1244
            'author': author,
1245
            'month': day.month,
1246
            'year': day.year,
1247
            'day': day.day,
1248
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1249
        }
1250
1251
1252
class CyanideAndHappiness(GenericNavigableComic):
1253
    """Class to retrieve Cyanide And Happiness comics."""
1254
    name = 'cyanide'
1255
    long_name = 'Cyanide and Happiness'
1256
    url = 'http://explosm.net'
1257
    _categories = ('NSFW', )
1258
    get_url_from_link = join_cls_url_to_href
1259
1260
    @classmethod
1261
    def get_first_comic_link(cls):
1262
        """Get link to first comics."""
1263
        return get_soup_at_url(cls.url).find('a', title='Oldest comic')
1264
1265
    @classmethod
1266
    def get_navi_link(cls, last_soup, next_):
1267
        """Get link to next or previous comic."""
1268
        link = last_soup.find('a', class_='next-comic' if next_ else 'previous-comic ')
1269
        return None if link.get('href') is None else link
1270
1271
    @classmethod
1272
    def get_comic_info(cls, soup, link):
1273
        """Get information about a particular comics."""
1274
        url2 = soup.find('meta', property='og:url')['content']
1275
        num = int(url2.split('/')[-2])
1276
        date_str = soup.find('h3').find('a').string
1277
        day = string_to_date(date_str, '%Y.%m.%d')
1278
        author = soup.find('small', class_="author-credit-name").string
1279
        assert author.startswith('by ')
1280
        author = author[3:]
1281
        imgs = soup.find_all('img', id='main-comic')
1282
        return {
1283
            'num': num,
1284
            'author': author,
1285
            'month': day.month,
1286
            'year': day.year,
1287
            'day': day.day,
1288
            'prefix': '%d-' % num,
1289
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i['src'])) for i in imgs]
1290
        }
1291
1292
1293
class MrLovenstein(GenericComic):
1294
    """Class to retrieve Mr Lovenstein comics."""
1295
    # Also on https://tapastic.com/series/MrLovenstein
1296
    name = 'mrlovenstein'
1297
    long_name = 'Mr. Lovenstein'
1298
    url = 'http://www.mrlovenstein.com'
1299
1300
    @classmethod
1301
    def get_next_comic(cls, last_comic):
1302
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1303
        # TODO: more info from http://www.mrlovenstein.com/archive
1304
        comic_num_re = re.compile('^/comic/([0-9]*)$')
1305
        nums = [int(comic_num_re.match(link['href']).groups()[0])
1306
                for link in get_soup_at_url(cls.url).find_all('a', href=comic_num_re)]
1307
        first, last = min(nums), max(nums)
1308
        if last_comic:
1309
            first = last_comic['num'] + 1
1310
        for num in range(first, last + 1):
1311
            url = urljoin_wrapper(cls.url, '/comic/%d' % num)
1312
            soup = get_soup_at_url(url)
1313
            imgs = list(
1314
                reversed(soup.find_all('img', src=re.compile('^/images/comics/'))))
1315
            description = soup.find('meta', attrs={'name': 'description'})['content']
1316
            yield {
1317
                'url': url,
1318
                'num': num,
1319
                'texts': '  '.join(t for t in (i.get('title') for i in imgs) if t),
1320
                'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1321
                'description': description,
1322
            }
1323
1324
1325
class DinosaurComics(GenericListableComic):
1326
    """Class to retrieve Dinosaur Comics comics."""
1327
    name = 'dinosaur'
1328
    long_name = 'Dinosaur Comics'
1329
    url = 'http://www.qwantz.com'
1330
    get_url_from_archive_element = get_href
1331
    comic_link_re = re.compile('^%s/index.php\\?comic=([0-9]*)$' % url)
1332
1333
    @classmethod
1334
    def get_archive_elements(cls):
1335
        archive_url = urljoin_wrapper(cls.url, 'archive.php')
1336
        # first link is random -> skip it
1337
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re)[1:])
1338
1339
    @classmethod
1340
    def get_comic_info(cls, soup, link):
1341
        """Get information about a particular comics."""
1342
        url = cls.get_url_from_archive_element(link)
1343
        num = int(cls.comic_link_re.match(url).groups()[0])
1344
        date_str = link.string
1345
        text = link.next_sibling.string
1346
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1347
        comic_img_re = re.compile('^%s/comics/' % cls.url)
1348
        img = soup.find('img', src=comic_img_re)
1349
        return {
1350
            'month': day.month,
1351
            'year': day.year,
1352
            'day': day.day,
1353
            'img': [img.get('src')],
1354
            'title': img.get('title'),
1355
            'text': text,
1356
            'num': num,
1357
        }
1358
1359
1360
class ButterSafe(GenericListableComic):
1361 View Code Duplication
    """Class to retrieve Butter Safe comics."""
1362
    name = 'butter'
1363
    long_name = 'ButterSafe'
1364
    url = 'http://buttersafe.com'
1365
    get_url_from_archive_element = get_href
1366
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)
1367
1368
    @classmethod
1369
    def get_archive_elements(cls):
1370
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1371
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
1372
1373
    @classmethod
1374
    def get_comic_info(cls, soup, link):
1375
        """Get information about a particular comics."""
1376
        url = cls.get_url_from_archive_element(link)
1377
        title = link.string
1378
        year, month, day = [int(s) for s in cls.comic_link_re.match(url).groups()]
1379
        img = soup.find('div', id='comic').find('img')
1380
        assert img['alt'] == title
1381
        return {
1382
            'title': title,
1383
            'day': day,
1384
            'month': month,
1385
            'year': year,
1386
            'img': [img['src']],
1387
        }
1388
1389
1390
class CalvinAndHobbes(GenericComic):
1391
    """Class to retrieve Calvin and Hobbes comics."""
1392
    # Also on http://www.gocomics.com/calvinandhobbes/
1393
    name = 'calvin'
1394
    long_name = 'Calvin and Hobbes'
1395
    # This is not through any official webpage but eh...
1396
    url = 'http://marcel-oehler.marcellosendos.ch/comics/ch/'
1397
1398
    @classmethod
1399
    def get_next_comic(cls, last_comic):
1400
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1401
        last_date = get_date_for_comic(
1402
            last_comic) if last_comic else date(1985, 11, 1)
1403
        link_re = re.compile('^([0-9]*)/([0-9]*)/')
1404
        img_re = re.compile('')
1405
        for link in get_soup_at_url(cls.url).find_all('a', href=link_re):
1406
            url = link['href']
1407
            year, month = link_re.match(url).groups()
1408
            if date(int(year), int(month), 1) + timedelta(days=31) >= last_date:
1409
                img_re = re.compile('^%s%s([0-9]*)' % (year, month))
1410
                month_url = urljoin_wrapper(cls.url, url)
1411
                for img in get_soup_at_url(month_url).find_all('img', src=img_re):
1412
                    img_src = img['src']
1413
                    day = int(img_re.match(img_src).groups()[0])
1414
                    comic_date = date(int(year), int(month), day)
1415
                    if comic_date > last_date:
1416
                        yield {
1417
                            'url': month_url,
1418
                            'year': int(year),
1419
                            'month': int(month),
1420
                            'day': int(day),
1421
                            'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)],
1422
                        }
1423
                        last_date = comic_date
1424
1425
1426
class AbstruseGoose(GenericListableComic):
1427 View Code Duplication
    """Class to retrieve AbstruseGoose Comics."""
1428
    name = 'abstruse'
1429
    long_name = 'Abstruse Goose'
1430
    url = 'http://abstrusegoose.com'
1431
    get_url_from_archive_element = get_href
1432
    comic_url_re = re.compile('^%s/([0-9]*)$' % url)
1433
    comic_img_re = re.compile('^%s/strips/.*' % url)
1434
1435
    @classmethod
1436
    def get_archive_elements(cls):
1437
        archive_url = urljoin_wrapper(cls.url, 'archive')
1438
        return get_soup_at_url(archive_url).find_all('a', href=cls.comic_url_re)
1439
1440
    @classmethod
1441
    def get_comic_info(cls, soup, archive_elt):
1442
        comic_url = cls.get_url_from_archive_element(archive_elt)
1443
        num = int(cls.comic_url_re.match(comic_url).groups()[0])
1444
        return {
1445
            'num': num,
1446
            'title': archive_elt.string,
1447
            'img': [soup.find('img', src=cls.comic_img_re)['src']]
1448
        }
1449
1450
1451
class PhDComics(GenericNavigableComic):
1452
    """Class to retrieve PHD Comics."""
1453
    name = 'phd'
1454
    long_name = 'PhD Comics'
1455
    url = 'http://phdcomics.com/comics/archive.php'
1456
1457
    @classmethod
1458
    def get_first_comic_link(cls):
1459
        """Get link to first comics."""
1460
        soup = get_soup_at_url(cls.url)
1461
        img = soup.find('img', src='http://phdcomics.com/comics/images/first_button.gif')
1462
        return None if img is None else img.parent
1463
1464
    @classmethod
1465
    def get_navi_link(cls, last_soup, next_):
1466
        """Get link to next or previous comic."""
1467
        url = 'http://phdcomics.com/comics/images/%s_button.gif' % ('next' if next_ else 'prev')
1468
        img = last_soup.find('img', src=url)
1469
        return None if img is None else img.parent
1470
1471
    @classmethod
1472
    def get_comic_info(cls, soup, link):
1473
        """Get information about a particular comics."""
1474
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
1475
        imgs = soup.find_all('meta', property='og:image')
1476
        return {
1477
            'img': [i['content'] for i in imgs],
1478
            'title': title,
1479
        }
1480
1481
1482 View Code Duplication
class Octopuns(GenericEmptyComic, GenericNavigableComic):
1483
    """Class to retrieve Octopuns comics."""
1484
    # Also on http://octopuns.tumblr.com
1485
    name = 'octopuns'
1486
    long_name = 'Octopuns'
1487
    url = 'http://www.octopuns.net'
1488
1489
    @classmethod
1490
    def get_first_comic_link(cls):
1491
        """Get link to first comics."""
1492
        return get_soup_at_url(cls.url).find('img', src=re.compile('.*/First.png')).parent
1493
1494
    @classmethod
1495
    def get_navi_link(cls, last_soup, next_):
1496
        """Get link to next or previous comic."""
1497
        link = last_soup.find('img', src=re.compile('.*/Next.png' if next_ else '.*/Back.png')).parent
1498
        return None if link.get('href') is None else link
1499
1500
    @classmethod
1501
    def get_comic_info(cls, soup, link):
1502
        """Get information about a particular comics."""
1503
        title = soup.find('h3', class_='post-title entry-title').string
1504
        date_str = soup.find('h2', class_='date-header').string
1505
        day = string_to_date(date_str, "%A, %B %d, %Y")
1506
        imgs = soup.find_all('link', rel='image_src')
1507
        return {
1508
            'img': [i['href'] for i in imgs],
1509
            'title': title,
1510
            'day': day.day,
1511
            'month': day.month,
1512
            'year': day.year,
1513
        }
1514
1515
1516
class Quarktees(GenericNavigableComic):
1517
    """Class to retrieve the Quarktees comics."""
1518
    name = 'quarktees'
1519
    long_name = 'Quarktees'
1520
    url = 'http://www.quarktees.com/blogs/news'
1521
    get_url_from_link = join_cls_url_to_href
1522
    get_first_comic_link = simulate_first_link
1523
    first_url = 'http://www.quarktees.com/blogs/news/12486621-coming-soon'
1524
1525
    @classmethod
1526
    def get_navi_link(cls, last_soup, next_):
1527
        """Get link to next or previous comic."""
1528
        return last_soup.find('a', id='article-next' if next_ else 'article-prev')
1529
1530
    @classmethod
1531
    def get_comic_info(cls, soup, link):
1532
        """Get information about a particular comics."""
1533
        title = soup.find('meta', property='og:title')['content']
1534
        article = soup.find('div', class_='single-article')
1535
        imgs = article.find_all('img')
1536
        return {
1537
            'title': title,
1538
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1539
        }
1540
1541
1542
class OverCompensating(GenericNavigableComic):
1543
    """Class to retrieve the Over Compensating comics."""
1544
    name = 'compensating'
1545
    long_name = 'Over Compensating'
1546
    url = 'http://www.overcompensating.com'
1547
    get_url_from_link = join_cls_url_to_href
1548
1549
    @classmethod
1550
    def get_first_comic_link(cls):
1551
        """Get link to first comics."""
1552
        return get_soup_at_url(cls.url).find('a', href=re.compile('comic=1$'))
1553
1554
    @classmethod
1555
    def get_navi_link(cls, last_soup, next_):
1556
        """Get link to next or previous comic."""
1557
        return last_soup.find('a', title='next comic' if next_ else 'go back already')
1558
1559
    @classmethod
1560
    def get_comic_info(cls, soup, link):
1561
        """Get information about a particular comics."""
1562
        img_src_re = re.compile('^/oc/comics/.*')
1563
        comic_num_re = re.compile('.*comic=([0-9]*)$')
1564
        comic_url = cls.get_url_from_link(link)
1565
        num = int(comic_num_re.match(comic_url).groups()[0])
1566
        img = soup.find('img', src=img_src_re)
1567
        return {
1568
            'num': num,
1569
            'img': [urljoin_wrapper(comic_url, img['src'])],
1570
            'title': img.get('title')
1571
        }
1572
1573
1574
class Oglaf(GenericNavigableComic):
1575
    """Class to retrieve Oglaf comics."""
1576
    name = 'oglaf'
1577
    long_name = 'Oglaf [NSFW]'
1578
    url = 'http://oglaf.com'
1579
    _categories = ('NSFW', )
1580
    get_url_from_link = join_cls_url_to_href
1581
1582
    @classmethod
1583
    def get_first_comic_link(cls):
1584
        """Get link to first comics."""
1585
        return get_soup_at_url(cls.url).find("div", id="st").parent
1586
1587
    @classmethod
1588
    def get_navi_link(cls, last_soup, next_):
1589
        """Get link to next or previous comic."""
1590
        div = last_soup.find("div", id="nx" if next_ else "pvs")
1591
        return div.parent if div else None
1592
1593
    @classmethod
1594
    def get_comic_info(cls, soup, link):
1595
        """Get information about a particular comics."""
1596
        title = soup.find('title').string
1597
        title_imgs = soup.find('div', id='tt').find_all('img')
1598
        assert len(title_imgs) == 1
1599
        strip_imgs = soup.find_all('img', id='strip')
1600
        assert len(strip_imgs) == 1
1601
        imgs = title_imgs + strip_imgs
1602
        desc = ' '.join(i['title'] for i in imgs)
1603
        return {
1604
            'title': title,
1605
            'img': [i['src'] for i in imgs],
1606
            'description': desc,
1607
        }
1608
1609
1610
class ScandinaviaAndTheWorld(GenericNavigableComic):
1611
    """Class to retrieve Scandinavia And The World comics."""
1612
    name = 'satw'
1613
    long_name = 'Scandinavia And The World'
1614
    url = 'http://satwcomic.com'
1615
    get_first_comic_link = simulate_first_link
1616
    first_url = 'http://satwcomic.com/sweden-denmark-and-norway'
1617
1618
    @classmethod
1619
    def get_navi_link(cls, last_soup, next_):
1620
        """Get link to next or previous comic."""
1621
        return last_soup.find('a', accesskey='n' if next_ else 'p')
1622
1623
    @classmethod
1624
    def get_comic_info(cls, soup, link):
1625
        """Get information about a particular comics."""
1626
        title = soup.find('meta', attrs={'name': 'twitter:label1'})['content']
1627
        desc = soup.find('meta', property='og:description')['content']
1628
        imgs = soup.find_all('img', itemprop="image")
1629
        return {
1630
            'title': title,
1631
            'description': desc,
1632
            'img': [i['src'] for i in imgs],
1633
        }
1634
1635
1636
class SomethingOfThatIlk(GenericEmptyComic):  # Does not exist anymore
1637
    """Class to retrieve the Something Of That Ilk comics."""
1638
    name = 'somethingofthatilk'
1639
    long_name = 'Something Of That Ilk'
1640
    url = 'http://www.somethingofthatilk.com'
1641
1642
1643
class InfiniteMonkeyBusiness(GenericNavigableComic):
1644
    """Generic class to retrieve InfiniteMonkeyBusiness comics."""
1645
    name = 'monkey'
1646
    long_name = 'Infinite Monkey Business'
1647
    url = 'http://infinitemonkeybusiness.net'
1648
    get_navi_link = get_a_navi_comicnavnext_navinext
1649
    get_first_comic_link = simulate_first_link
1650
    first_url = 'http://infinitemonkeybusiness.net/comic/pillory/'
1651
1652
    @classmethod
1653
    def get_comic_info(cls, soup, link):
1654
        """Get information about a particular comics."""
1655
        title = soup.find('meta', property='og:title')['content']
1656
        imgs = soup.find('div', id='comic').find_all('img')
1657
        return {
1658
            'title': title,
1659
            'img': [i['src'] for i in imgs],
1660
        }
1661
1662
1663
class Wondermark(GenericListableComic):
1664
    """Class to retrieve the Wondermark comics."""
1665
    name = 'wondermark'
1666
    long_name = 'Wondermark'
1667
    url = 'http://wondermark.com'
1668
    get_url_from_archive_element = get_href
1669
1670
    @classmethod
1671
    def get_archive_elements(cls):
1672
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1673
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1674
1675
    @classmethod
1676
    def get_comic_info(cls, soup, link):
1677
        """Get information about a particular comics."""
1678
        date_str = soup.find('div', class_='postdate').find('em').string
1679
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1680
        div = soup.find('div', id='comic')
1681
        if div:
1682
            img = div.find('img')
1683
            img_src = [img['src']]
1684
            alt = img['alt']
1685
            assert alt == img['title']
1686
            title = soup.find('meta', property='og:title')['content']
1687
        else:
1688
            img_src = []
1689
            alt = ''
1690
            title = ''
1691
        return {
1692
            'month': day.month,
1693
            'year': day.year,
1694
            'day': day.day,
1695
            'img': img_src,
1696
            'title': title,
1697
            'alt': alt,
1698
            'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')),
1699
        }
1700
1701
1702 View Code Duplication
class WarehouseComic(GenericNavigableComic):
1703
    """Class to retrieve Warehouse Comic comics."""
1704
    name = 'warehouse'
1705
    long_name = 'Warehouse Comic'
1706
    url = 'http://warehousecomic.com'
1707
    get_first_comic_link = get_a_navi_navifirst
1708
    get_navi_link = get_link_rel_next
1709
1710
    @classmethod
1711
    def get_comic_info(cls, soup, link):
1712
        """Get information about a particular comics."""
1713
        title = soup.find('h2', class_='post-title').string
1714
        date_str = soup.find('span', class_='post-date').string
1715
        day = string_to_date(date_str, "%B %d, %Y")
1716
        imgs = soup.find('div', id='comic').find_all('img')
1717
        return {
1718
            'img': [i['src'] for i in imgs],
1719
            'title': title,
1720
            'day': day.day,
1721
            'month': day.month,
1722
            'year': day.year,
1723
        }
1724
1725
1726
class JustSayEh(GenericNavigableComic):
1727
    """Class to retrieve Just Say Eh comics."""
1728
    # Also on http//tapastic.com/series/Just-Say-Eh
1729
    name = 'justsayeh'
1730
    long_name = 'Just Say Eh'
1731
    url = 'http://www.justsayeh.com'
1732
    get_first_comic_link = get_a_navi_navifirst
1733
    get_navi_link = get_a_navi_comicnavnext_navinext
1734
1735
    @classmethod
1736
    def get_comic_info(cls, soup, link):
1737
        """Get information about a particular comics."""
1738
        title = soup.find('h2', class_='post-title').string
1739
        imgs = soup.find("div", id="comic").find_all("img")
1740
        assert all(i['alt'] == i['title'] for i in imgs)
1741
        alt = imgs[0]['alt']
1742
        return {
1743
            'img': [i['src'] for i in imgs],
1744
            'title': title,
1745
            'alt': alt,
1746
        }
1747
1748
1749 View Code Duplication
class MouseBearComedy(GenericNavigableComic):
1750
    """Class to retrieve Mouse Bear Comedy comics."""
1751
    # Also on http://mousebearcomedy.tumblr.com
1752
    name = 'mousebear'
1753
    long_name = 'Mouse Bear Comedy'
1754
    url = 'http://www.mousebearcomedy.com'
1755
    get_first_comic_link = get_a_navi_navifirst
1756
    get_navi_link = get_a_navi_comicnavnext_navinext
1757
1758
    @classmethod
1759
    def get_comic_info(cls, soup, link):
1760
        """Get information about a particular comics."""
1761
        title = soup.find('h2', class_='post-title').string
1762
        author = soup.find("span", class_="post-author").find("a").string
1763
        date_str = soup.find("span", class_="post-date").string
1764
        day = string_to_date(date_str, '%B %d, %Y')
1765
        imgs = soup.find("div", id="comic").find_all("img")
1766
        assert all(i['alt'] == i['title'] == title for i in imgs)
1767
        return {
1768
            'day': day.day,
1769
            'month': day.month,
1770
            'year': day.year,
1771
            'img': [i['src'] for i in imgs],
1772
            'title': title,
1773
            'author': author,
1774
        }
1775
1776
1777 View Code Duplication
class BigFootJustice(GenericNavigableComic):
1778
    """Class to retrieve Big Foot Justice comics."""
1779
    # Also on http://tapastic.com/series/bigfoot-justice
1780
    name = 'bigfoot'
1781
    long_name = 'Big Foot Justice'
1782
    url = 'http://bigfootjustice.com'
1783
    get_first_comic_link = get_a_navi_navifirst
1784
    get_navi_link = get_a_navi_comicnavnext_navinext
1785
1786
    @classmethod
1787
    def get_comic_info(cls, soup, link):
1788
        """Get information about a particular comics."""
1789
        imgs = soup.find('div', id='comic').find_all('img')
1790
        assert all(i['title'] == i['alt'] for i in imgs)
1791
        title = ' '.join(i['title'] for i in imgs)
1792
        return {
1793
            'img': [i['src'] for i in imgs],
1794
            'title': title,
1795
        }
1796
1797
1798
class RespawnComic(GenericNavigableComic):
1799
    """Class to retrieve Respawn Comic."""
1800
    # Also on https://respawncomic.tumblr.com
1801
    name = 'respawn'
1802
    long_name = 'Respawn Comic'
1803
    url = 'http://respawncomic.com '
1804
    _categories = ('RESPAWN', )
1805
    get_navi_link = get_a_rel_next
1806
    get_first_comic_link = simulate_first_link
1807
    first_url = 'http://respawncomic.com/comic/c0001/'
1808
1809
    @classmethod
1810
    def get_comic_info(cls, soup, link):
1811
        """Get information about a particular comics."""
1812
        title = soup.find('meta', property='og:title')['content']
1813
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1814
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1815
        date_str = date_str[:10]
1816
        day = string_to_date(date_str, "%Y-%m-%d")
1817
        imgs = soup.find_all('meta', property='og:image')
1818
        skip_imgs = {
1819
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1820
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1821
        }
1822
        return {
1823
            'title': title,
1824
            'author': author,
1825
            'day': day.day,
1826
            'month': day.month,
1827
            'year': day.year,
1828
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1829
        }
1830
1831
1832 View Code Duplication
class SafelyEndangered(GenericNavigableComic):
1833
    """Class to retrieve Safely Endangered comics."""
1834
    # Also on http://tumblr.safelyendangered.com
1835
    name = 'endangered'
1836
    long_name = 'Safely Endangered'
1837
    url = 'http://www.safelyendangered.com'
1838
    get_navi_link = get_link_rel_next
1839
    get_first_comic_link = simulate_first_link
1840
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1841
1842
    @classmethod
1843
    def get_comic_info(cls, soup, link):
1844
        """Get information about a particular comics."""
1845
        title = soup.find('h2', class_='post-title').string
1846
        date_str = soup.find('span', class_='post-date').string
1847
        day = string_to_date(date_str, '%B %d, %Y')
1848
        imgs = soup.find('div', id='comic').find_all('img')
1849
        alt = imgs[0]['alt']
1850
        assert all(i['alt'] == i['title'] for i in imgs)
1851
        return {
1852
            'day': day.day,
1853
            'month': day.month,
1854
            'year': day.year,
1855
            'img': [i['src'] for i in imgs],
1856
            'title': title,
1857
            'alt': alt,
1858
        }
1859
1860
1861 View Code Duplication
class PicturesInBoxes(GenericNavigableComic):
1862
    """Class to retrieve Pictures In Boxes comics."""
1863
    # Also on https://picturesinboxescomic.tumblr.com
1864
    name = 'picturesinboxes'
1865
    long_name = 'Pictures in Boxes'
1866
    url = 'http://www.picturesinboxes.com'
1867
    get_navi_link = get_a_navi_navinext
1868
    get_first_comic_link = simulate_first_link
1869
    first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'
1870
1871
    @classmethod
1872
    def get_comic_info(cls, soup, link):
1873
        """Get information about a particular comics."""
1874
        title = soup.find('h2', class_='post-title').string
1875
        author = soup.find("span", class_="post-author").find("a").string
1876
        date_str = soup.find('span', class_='post-date').string
1877
        day = string_to_date(date_str, '%B %d, %Y')
1878
        imgs = soup.find('div', class_='comicpane').find_all('img')
1879
        assert imgs
1880
        assert all(i['title'] == i['alt'] == title for i in imgs)
1881
        return {
1882
            'day': day.day,
1883
            'month': day.month,
1884
            'year': day.year,
1885
            'img': [i['src'] for i in imgs],
1886
            'title': title,
1887
            'author': author,
1888
        }
1889
1890
1891 View Code Duplication
class Penmen(GenericNavigableComic):
1892
    """Class to retrieve Penmen comics."""
1893
    name = 'penmen'
1894
    long_name = 'Penmen'
1895
    url = 'http://penmen.com'
1896
    get_navi_link = get_link_rel_next
1897
    get_first_comic_link = simulate_first_link
1898
    first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/'
1899
1900
    @classmethod
1901
    def get_comic_info(cls, soup, link):
1902
        """Get information about a particular comics."""
1903
        title = soup.find('title').string
1904
        imgs = soup.find('div', class_='entry-content').find_all('img')
1905
        short_url = soup.find('link', rel='shortlink')['href']
1906
        tags = ' '.join(t.string for t in soup.find_all('a', rel='tag'))
1907
        date_str = soup.find('time')['datetime'][:10]
1908
        day = string_to_date(date_str, "%Y-%m-%d")
1909
        return {
1910
            'title': title,
1911
            'short_url': short_url,
1912
            'img': [i['src'] for i in imgs],
1913
            'tags': tags,
1914
            'month': day.month,
1915
            'year': day.year,
1916
            'day': day.day,
1917
        }
1918
1919
1920
class TheDoghouseDiaries(GenericNavigableComic):
1921
    """Class to retrieve The Dog House Diaries comics."""
1922
    name = 'doghouse'
1923
    long_name = 'The Dog House Diaries'
1924
    url = 'http://thedoghousediaries.com'
1925
1926
    @classmethod
1927
    def get_first_comic_link(cls):
1928
        """Get link to first comics."""
1929
        return get_soup_at_url(cls.url).find('a', id='firstlink')
1930
1931
    @classmethod
1932
    def get_navi_link(cls, last_soup, next_):
1933
        """Get link to next or previous comic."""
1934
        return last_soup.find('a', id='nextlink' if next_ else 'previouslink')
1935
1936
    @classmethod
1937
    def get_comic_info(cls, soup, link):
1938
        """Get information about a particular comics."""
1939
        comic_img_re = re.compile('^dhdcomics/.*')
1940
        img = soup.find('img', src=comic_img_re)
1941
        comic_url = cls.get_url_from_link(link)
1942
        return {
1943
            'title': soup.find('h2', id='titleheader').string,
1944
            'title2': soup.find('div', id='subtext').string,
1945
            'alt': img.get('title'),
1946
            'img': [urljoin_wrapper(comic_url, img['src'].strip())],
1947
            'num': int(comic_url.split('/')[-1]),
1948
        }
1949
1950
1951
class InvisibleBread(GenericListableComic):
1952
    """Class to retrieve Invisible Bread comics."""
1953
    # Also on http://www.gocomics.com/invisible-bread
1954
    name = 'invisiblebread'
1955
    long_name = 'Invisible Bread'
1956
    url = 'http://invisiblebread.com'
1957
1958
    @classmethod
1959
    def get_archive_elements(cls):
1960
        archive_url = urljoin_wrapper(cls.url, 'archives/')
1961
        return reversed(get_soup_at_url(archive_url).find_all('td', class_='archive-title'))
1962
1963
    @classmethod
1964
    def get_url_from_archive_element(cls, td):
1965
        return td.find('a')['href']
1966
1967 View Code Duplication
    @classmethod
1968
    def get_comic_info(cls, soup, td):
1969
        """Get information about a particular comics."""
1970
        url = cls.get_url_from_archive_element(td)
1971
        title = td.find('a').string
1972
        month_and_day = td.previous_sibling.string
1973
        link_re = re.compile('^%s/([0-9]+)/' % cls.url)
1974
        year = link_re.match(url).groups()[0]
1975
        date_str = month_and_day + ' ' + year
1976
        day = string_to_date(date_str, '%b %d %Y')
1977
        imgs = [soup.find('div', id='comic').find('img')]
1978
        assert len(imgs) == 1
1979
        assert all(i['title'] == i['alt'] == title for i in imgs)
1980
        return {
1981
            'month': day.month,
1982
            'year': day.year,
1983
            'day': day.day,
1984
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1985
            'title': title,
1986
        }
1987
1988
1989
class DiscoBleach(GenericEmptyComic):  # Does not work anymore
1990
    """Class to retrieve Disco Bleach Comics."""
1991
    name = 'discobleach'
1992
    long_name = 'Disco Bleach'
1993
    url = 'http://discobleach.com'
1994
1995
1996
class TubeyToons(GenericEmptyComic):  # Does not work anymore
1997
    """Class to retrieve TubeyToons comics."""
1998
    # Also on http://tapastic.com/series/Tubey-Toons
1999
    # Also on https://tubeytoons.tumblr.com
2000
    name = 'tubeytoons'
2001
    long_name = 'Tubey Toons'
2002
    url = 'http://tubeytoons.com'
2003
    _categories = ('TUNEYTOONS', )
2004
2005
2006 View Code Duplication
class CompletelySeriousComics(GenericNavigableComic):
2007
    """Class to retrieve Completely Serious comics."""
2008
    name = 'completelyserious'
2009
    long_name = 'Completely Serious Comics'
2010
    url = 'http://completelyseriouscomics.com'
2011
    get_first_comic_link = get_a_navi_navifirst
2012
    get_navi_link = get_a_navi_navinext
2013
2014
    @classmethod
2015
    def get_comic_info(cls, soup, link):
2016
        """Get information about a particular comics."""
2017
        title = soup.find('h2', class_='post-title').string
2018
        author = soup.find('span', class_='post-author').contents[1].string
2019
        date_str = soup.find('span', class_='post-date').string
2020
        day = string_to_date(date_str, '%B %d, %Y')
2021
        imgs = soup.find('div', class_='comicpane').find_all('img')
2022
        assert imgs
2023
        alt = imgs[0]['title']
2024
        assert all(i['title'] == i['alt'] == alt for i in imgs)
2025
        return {
2026
            'month': day.month,
2027
            'year': day.year,
2028
            'day': day.day,
2029
            'img': [i['src'] for i in imgs],
2030
            'title': title,
2031
            'alt': alt,
2032
            'author': author,
2033
        }
2034
2035
2036
class PoorlyDrawnLines(GenericListableComic):
2037 View Code Duplication
    """Class to retrieve Poorly Drawn Lines comics."""
2038
    # Also on http://pdlcomics.tumblr.com
2039
    name = 'poorlydrawn'
2040
    long_name = 'Poorly Drawn Lines'
2041
    url = 'https://www.poorlydrawnlines.com'
2042
    _categories = ('POORLYDRAWN', )
2043
    get_url_from_archive_element = get_href
2044
2045
    @classmethod
2046
    def get_comic_info(cls, soup, link):
2047
        """Get information about a particular comics."""
2048
        imgs = soup.find('div', class_='post').find_all('img')
2049
        assert len(imgs) <= 1
2050
        return {
2051
            'img': [i['src'] for i in imgs],
2052
            'title': imgs[0].get('title', "") if imgs else "",
2053
        }
2054
2055
    @classmethod
2056
    def get_archive_elements(cls):
2057
        archive_url = urljoin_wrapper(cls.url, 'archive')
2058
        url_re = re.compile('^%s/comic/.' % cls.url)
2059
        return reversed(get_soup_at_url(archive_url).find_all('a', href=url_re))
2060
2061
2062 View Code Duplication
class LoadingComics(GenericNavigableComic):
2063
    """Class to retrieve Loading Artist comics."""
2064
    name = 'loadingartist'
2065
    long_name = 'Loading Artist'
2066
    url = 'http://www.loadingartist.com/latest'
2067
2068
    @classmethod
2069
    def get_first_comic_link(cls):
2070
        """Get link to first comics."""
2071
        return get_soup_at_url(cls.url).find('a', title="First")
2072
2073
    @classmethod
2074
    def get_navi_link(cls, last_soup, next_):
2075
        """Get link to next or previous comic."""
2076
        return last_soup.find('a', title='Next' if next_ else 'Previous')
2077
2078
    @classmethod
2079
    def get_comic_info(cls, soup, link):
2080
        """Get information about a particular comics."""
2081
        title = soup.find('h1').string
2082
        date_str = soup.find('span', class_='date').string.strip()
2083
        day = string_to_date(date_str, "%B %d, %Y")
2084
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
2085
        return {
2086
            'title': title,
2087
            'img': [i['src'] for i in imgs],
2088
            'month': day.month,
2089
            'year': day.year,
2090
            'day': day.day,
2091
        }
2092
2093
2094 View Code Duplication
class ChuckleADuck(GenericNavigableComic):
2095
    """Class to retrieve Chuckle-A-Duck comics."""
2096
    name = 'chuckleaduck'
2097
    long_name = 'Chuckle-A-duck'
2098
    url = 'http://chuckleaduck.com'
2099
    get_first_comic_link = get_div_navfirst_a
2100
    get_navi_link = get_link_rel_next
2101
2102
    @classmethod
2103
    def get_comic_info(cls, soup, link):
2104
        """Get information about a particular comics."""
2105
        date_str = soup.find('span', class_='post-date').string
2106
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
2107
        author = soup.find('span', class_='post-author').string
2108
        div = soup.find('div', id='comic')
2109
        imgs = div.find_all('img') if div else []
2110
        title = imgs[0]['title'] if imgs else ""
2111
        assert all(i['title'] == i['alt'] == title for i in imgs)
2112
        return {
2113
            'month': day.month,
2114
            'year': day.year,
2115
            'day': day.day,
2116
            'img': [i['src'] for i in imgs],
2117
            'title': title,
2118
            'author': author,
2119
        }
2120
2121
2122
class DepressedAlien(GenericNavigableComic):
2123
    """Class to retrieve Depressed Alien Comics."""
2124
    name = 'depressedalien'
2125
    long_name = 'Depressed Alien'
2126
    url = 'http://depressedalien.com'
2127
    get_url_from_link = join_cls_url_to_href
2128
2129
    @classmethod
2130
    def get_first_comic_link(cls):
2131
        """Get link to first comics."""
2132
        return get_soup_at_url(cls.url).find('img', attrs={'name': 'beginArrow'}).parent
2133
2134
    @classmethod
2135
    def get_navi_link(cls, last_soup, next_):
2136
        """Get link to next or previous comic."""
2137
        return last_soup.find('img', attrs={'name': 'rightArrow' if next_ else 'leftArrow'}).parent
2138
2139
    @classmethod
2140
    def get_comic_info(cls, soup, link):
2141
        """Get information about a particular comics."""
2142
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2143
        imgs = soup.find_all('meta', property='og:image')
2144
        return {
2145
            'title': title,
2146
            'img': [i['content'] for i in imgs],
2147
        }
2148
2149
2150 View Code Duplication
class TurnOffUs(GenericListableComic):
2151
    """Class to retrieve TurnOffUs comics."""
2152
    name = 'turnoffus'
2153
    long_name = 'Turn Off Us'
2154
    url = 'http://turnoff.us'
2155
    get_url_from_archive_element = join_cls_url_to_href
2156
2157
    @classmethod
2158
    def get_archive_elements(cls):
2159
        archive_url = urljoin_wrapper(cls.url, 'all')
2160
        post_list = get_soup_at_url(archive_url).find('ul', class_='post-list')
2161
        return reversed(post_list.find_all('a', class_='post-link'))
2162
2163
    @classmethod
2164
    def get_comic_info(cls, soup, archive_elt):
2165
        """Get information about a particular comics."""
2166
        title = soup.find('meta', property='og:title')['content']
2167
        imgs = soup.find_all('meta', property='og:image')
2168
        return {
2169
            'title': title,
2170
            'img': [i['content'] for i in imgs],
2171
        }
2172
2173
2174
class ThingsInSquares(GenericListableComic):
2175
    """Class to retrieve Things In Squares comics."""
2176
    # This can be retrieved in other languages
2177
    # Also on https://tapastic.com/series/Things-in-Squares
2178
    name = 'squares'
2179
    long_name = 'Things in squares'
2180
    url = 'http://www.thingsinsquares.com'
2181
2182
    @classmethod
2183
    def get_comic_info(cls, soup, tr):
2184
        """Get information about a particular comics."""
2185
        _, td2, td3 = tr.find_all('td')
2186
        a = td2.find('a')
2187
        date_str = td3.string
2188
        day = string_to_date(date_str, "%m.%d.%y")
2189
        title = a.string
2190
        title2 = soup.find('meta', property='og:title')['content']
2191
        desc = soup.find('meta', property='og:description')
2192
        description = desc['content'] if desc else ''
2193
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2194
        imgs = soup.find('div', class_='entry-content').find_all('img')
2195
        return {
2196
            'day': day.day,
2197
            'month': day.month,
2198
            'year': day.year,
2199
            'title': title,
2200
            'title2': title2,
2201
            'description': description,
2202
            'tags': tags,
2203
            'img': [i['src'] for i in imgs],
2204
            'alt': ' '.join(i['alt'] for i in imgs),
2205
        }
2206
2207
    @classmethod
2208
    def get_url_from_archive_element(cls, tr):
2209
        _, td2, __ = tr.find_all('td')
2210
        return td2.find('a')['href']
2211
2212
    @classmethod
2213
    def get_archive_elements(cls):
2214
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
2215
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2216
2217
2218 View Code Duplication
class HappleTea(GenericNavigableComic):
2219
    """Class to retrieve Happle Tea Comics."""
2220
    name = 'happletea'
2221
    long_name = 'Happle Tea'
2222
    url = 'http://www.happletea.com'
2223
    get_first_comic_link = get_a_navi_navifirst
2224
    get_navi_link = get_link_rel_next
2225
2226
    @classmethod
2227
    def get_comic_info(cls, soup, link):
2228
        """Get information about a particular comics."""
2229
        imgs = soup.find('div', id='comic').find_all('img')
2230
        post = soup.find('div', class_='post-content')
2231
        title = post.find('h2', class_='post-title').string
2232
        author = post.find('a', rel='author').string
2233
        date_str = post.find('span', class_='post-date').string
2234
        day = string_to_date(date_str, "%B %d, %Y")
2235
        assert all(i['alt'] == i['title'] for i in imgs)
2236
        return {
2237
            'title': title,
2238
            'img': [i['src'] for i in imgs],
2239
            'alt': ''.join(i['alt'] for i in imgs),
2240
            'month': day.month,
2241
            'year': day.year,
2242
            'day': day.day,
2243
            'author': author,
2244
        }
2245
2246
2247
class RockPaperScissors(GenericNavigableComic):
2248
    """Class to retrieve Rock Paper Scissors comics."""
2249
    name = 'rps'
2250
    long_name = 'Rock Paper Scissors'
2251
    url = 'http://rps-comics.com'
2252
    get_first_comic_link = get_a_navi_navifirst
2253
    get_navi_link = get_link_rel_next
2254
2255
    @classmethod
2256
    def get_comic_info(cls, soup, link):
2257
        """Get information about a particular comics."""
2258
        title = soup.find('title').string
2259
        imgs = soup.find_all('meta', property='og:image')
2260
        short_url = soup.find('link', rel='shortlink')['href']
2261
        transcript = soup.find('div', id='transcript-content').string
2262
        return {
2263
            'title': title,
2264
            'transcript': transcript,
2265
            'short_url': short_url,
2266
            'img': [i['content'] for i in imgs],
2267
        }
2268
2269
2270
class FatAwesomeComics(GenericNavigableComic):
2271
    """Class to retrieve Fat Awesome Comics."""
2272
    # Also on http://fatawesomecomedy.tumblr.com
2273
    name = 'fatawesome'
2274
    long_name = 'Fat Awesome'
2275
    url = 'http://fatawesome.com/comics'
2276
    get_navi_link = get_a_rel_next
2277
    get_first_comic_link = simulate_first_link
2278
    first_url = 'http://fatawesome.com/shortbus/'
2279
2280
    @classmethod
2281
    def get_comic_info(cls, soup, link):
2282
        """Get information about a particular comics."""
2283
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2284
        description = soup.find('meta', attrs={'name': 'description'})['content']
2285
        tags_prop = soup.find('meta', property='article:tag')
2286
        tags = tags_prop['content'] if tags_prop else ""
2287
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2288
        day = string_to_date(date_str, "%Y-%m-%d")
2289
        imgs = soup.find_all('img', attrs={'data-recalc-dims': "1"})
2290
        assert len(imgs) == 1
2291
        return {
2292
            'title': title,
2293
            'description': description,
2294
            'tags': tags,
2295
            'alt': "".join(i['alt'] for i in imgs),
2296
            'img': [i['src'].rsplit('?', 1)[0] for i in imgs],
2297
            'month': day.month,
2298
            'year': day.year,
2299
            'day': day.day,
2300
        }
2301
2302
2303
class JuliasDrawings(GenericListableComic):
2304 View Code Duplication
    """Class to retrieve Julia's Drawings."""
2305
    name = 'julia'
2306
    long_name = "Julia's Drawings"
2307
    url = 'https://drawings.jvns.ca'
2308
    get_url_from_archive_element = get_href
2309
2310
    @classmethod
2311
    def get_archive_elements(cls):
2312
        articles = get_soup_at_url(cls.url).find_all('article', class_='li post')
2313
        return [art.find('a') for art in reversed(articles)]
2314
2315
    @classmethod
2316
    def get_comic_info(cls, soup, archive_elt):
2317
        """Get information about a particular comics."""
2318
        date_str = soup.find('meta', property='og:article:published_time')['content'][:10]
2319
        day = string_to_date(date_str, "%Y-%m-%d")
2320
        title = soup.find('h3', class_='p-post-title').string
2321
        imgs = soup.find('section', class_='post-content').find_all('img')
2322
        return {
2323
            'title': title,
2324
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2325
            'month': day.month,
2326
            'year': day.year,
2327
            'day': day.day,
2328
        }
2329
2330
2331
class AnythingComic(GenericListableComic):
2332
    """Class to retrieve Anything Comics."""
2333
    # Also on http://tapastic.com/series/anything
2334
    name = 'anythingcomic'
2335
    long_name = 'Anything Comic'
2336
    url = 'http://www.anythingcomic.com'
2337
2338
    @classmethod
2339
    def get_archive_elements(cls):
2340
        archive_url = urljoin_wrapper(cls.url, 'archive/')
2341
        # The first 2 <tr>'s do not correspond to comics
2342
        return get_soup_at_url(archive_url).find('table', id='chapter_table').find_all('tr')[2:]
2343
2344
    @classmethod
2345
    def get_url_from_archive_element(cls, tr):
2346
        """Get url corresponding to an archive element."""
2347
        _, td_comic, td_date, _ = tr.find_all('td')
2348
        link = td_comic.find('a')
2349
        return urljoin_wrapper(cls.url, link['href'])
2350
2351 View Code Duplication
    @classmethod
2352
    def get_comic_info(cls, soup, tr):
2353
        """Get information about a particular comics."""
2354
        td_num, td_comic, td_date, _ = tr.find_all('td')
2355
        num = int(td_num.string)
2356
        link = td_comic.find('a')
2357
        title = link.string
2358
        imgs = soup.find_all('img', id='comic_image')
2359
        date_str = td_date.string
2360
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y, %I:%M %p")
2361
        assert len(imgs) == 1
2362
        assert all(i.get('alt') == i.get('title') for i in imgs)
2363
        return {
2364
            'num': num,
2365
            'title': title,
2366
            'alt': imgs[0].get('alt', ''),
2367
            'img': [i['src'] for i in imgs],
2368
            'month': day.month,
2369
            'year': day.year,
2370
            'day': day.day,
2371
        }
2372
2373
2374 View Code Duplication
class LonnieMillsap(GenericNavigableComic):
2375
    """Class to retrieve Lonnie Millsap's comics."""
2376
    name = 'millsap'
2377
    long_name = 'Lonnie Millsap'
2378
    url = 'http://www.lonniemillsap.com'
2379
    get_navi_link = get_link_rel_next
2380
    get_first_comic_link = simulate_first_link
2381
    first_url = 'http://www.lonniemillsap.com/?p=42'
2382
2383
    @classmethod
2384
    def get_comic_info(cls, soup, link):
2385
        """Get information about a particular comics."""
2386
        title = soup.find('h2', class_='post-title').string
2387
        post = soup.find('div', class_='post-content')
2388
        author = post.find("span", class_="post-author").find("a").string
2389
        date_str = post.find("span", class_="post-date").string
2390
        day = string_to_date(date_str, "%B %d, %Y")
2391
        imgs = post.find("div", class_="entry").find_all("img")
2392
        return {
2393
            'title': title,
2394
            'author': author,
2395
            'img': [i['src'] for i in imgs],
2396
            'month': day.month,
2397
            'year': day.year,
2398
            'day': day.day,
2399
        }
2400
2401
2402 View Code Duplication
class LinsEditions(GenericNavigableComic):
2403
    """Class to retrieve L.I.N.S. Editions comics."""
2404
    # Also on https://linscomics.tumblr.com
2405
    # Now on https://warandpeas.com
2406
    name = 'lins'
2407
    long_name = 'L.I.N.S. Editions'
2408
    url = 'https://linsedition.com'
2409
    _categories = ('LINS', )
2410
    get_navi_link = get_link_rel_next
2411
    get_first_comic_link = simulate_first_link
2412
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2413
2414
    @classmethod
2415
    def get_comic_info(cls, soup, link):
2416
        """Get information about a particular comics."""
2417
        title = soup.find('meta', property='og:title')['content']
2418
        imgs = soup.find_all('meta', property='og:image')
2419
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2420
        day = string_to_date(date_str, "%Y-%m-%d")
2421
        return {
2422
            'title': title,
2423
            'img': [i['content'] for i in imgs],
2424
            'month': day.month,
2425
            'year': day.year,
2426
            'day': day.day,
2427
        }
2428
2429
2430
class ThorsThundershack(GenericNavigableComic):
2431
    """Class to retrieve Thor's Thundershack comics."""
2432
    # Also on http://tapastic.com/series/Thors-Thundershac
2433
    name = 'thor'
2434
    long_name = 'Thor\'s Thundershack'
2435
    url = 'http://www.thorsthundershack.com'
2436
    _categories = ('THOR', )
2437
    get_url_from_link = join_cls_url_to_href
2438
2439
    @classmethod
2440
    def get_first_comic_link(cls):
2441
        """Get link to first comics."""
2442
        return get_soup_at_url(cls.url).find('a', class_='first navlink')
2443
2444
    @classmethod
2445
    def get_navi_link(cls, last_soup, next_):
2446
        """Get link to next or previous comic."""
2447
        for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
2448
            if link['href'] != '/comic':
2449
                return link
2450
        return None
2451
2452 View Code Duplication
    @classmethod
2453
    def get_comic_info(cls, soup, link):
2454
        """Get information about a particular comics."""
2455
        title = soup.find('meta', attrs={'name': 'description'})["content"]
2456
        description = soup.find('div', itemprop='articleBody').text
2457
        author = soup.find('span', itemprop='author copyrightHolder').string
2458
        imgs = soup.find_all('img', itemprop='image')
2459
        assert all(i['title'] == i['alt'] for i in imgs)
2460
        alt = imgs[0]['alt'] if imgs else ""
2461
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
2462
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2463
        return {
2464
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2465
            'month': day.month,
2466
            'year': day.year,
2467
            'day': day.day,
2468
            'author': author,
2469
            'title': title,
2470
            'alt': alt,
2471
            'description': description,
2472
        }
2473
2474
2475 View Code Duplication
class GerbilWithAJetpack(GenericNavigableComic):
2476
    """Class to retrieve GerbilWithAJetpack comics."""
2477
    name = 'gerbil'
2478
    long_name = 'Gerbil With A Jetpack'
2479
    url = 'http://gerbilwithajetpack.com'
2480
    get_first_comic_link = get_a_navi_navifirst
2481
    get_navi_link = get_a_rel_next
2482
2483
    @classmethod
2484
    def get_comic_info(cls, soup, link):
2485
        """Get information about a particular comics."""
2486
        title = soup.find('h2', class_='post-title').string
2487
        author = soup.find("span", class_="post-author").find("a").string
2488
        date_str = soup.find("span", class_="post-date").string
2489
        day = string_to_date(date_str, "%B %d, %Y")
2490
        imgs = soup.find("div", id="comic").find_all("img")
2491
        alt = imgs[0]['alt']
2492
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2493
        return {
2494
            'img': [i['src'] for i in imgs],
2495
            'title': title,
2496
            'alt': alt,
2497
            'author': author,
2498
            'day': day.day,
2499
            'month': day.month,
2500
            'year': day.year
2501
        }
2502
2503
2504 View Code Duplication
class EveryDayBlues(GenericEmptyComic, GenericNavigableComic):
2505
    """Class to retrieve EveryDayBlues Comics."""
2506
    name = "blues"
2507
    long_name = "Every Day Blues"
2508
    url = "http://everydayblues.net"
2509
    get_first_comic_link = get_a_navi_navifirst
2510
    get_navi_link = get_link_rel_next
2511
2512
    @classmethod
2513
    def get_comic_info(cls, soup, link):
2514
        """Get information about a particular comics."""
2515
        title = soup.find("h2", class_="post-title").string
2516
        author = soup.find("span", class_="post-author").find("a").string
2517
        date_str = soup.find("span", class_="post-date").string
2518
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2519
        imgs = soup.find("div", id="comic").find_all("img")
2520
        assert all(i['alt'] == i['title'] == title for i in imgs)
2521
        assert len(imgs) <= 1
2522
        return {
2523
            'img': [i['src'] for i in imgs],
2524
            'title': title,
2525
            'author': author,
2526
            'day': day.day,
2527
            'month': day.month,
2528
            'year': day.year
2529
        }
2530
2531
2532 View Code Duplication
class BiterComics(GenericNavigableComic):
2533
    """Class to retrieve Biter Comics."""
2534
    name = "biter"
2535
    long_name = "Biter Comics"
2536
    url = "http://www.bitercomics.com"
2537
    get_first_comic_link = get_a_navi_navifirst
2538
    get_navi_link = get_link_rel_next
2539
2540
    @classmethod
2541
    def get_comic_info(cls, soup, link):
2542
        """Get information about a particular comics."""
2543
        title = soup.find("h1", class_="entry-title").string
2544
        author = soup.find("span", class_="author vcard").find("a").string
2545
        date_str = soup.find("span", class_="entry-date").string
2546
        day = string_to_date(date_str, "%B %d, %Y")
2547
        imgs = soup.find("div", id="comic").find_all("img")
2548
        assert all(i['alt'] == i['title'] for i in imgs)
2549
        assert len(imgs) == 1
2550
        alt = imgs[0]['alt']
2551
        return {
2552
            'img': [i['src'] for i in imgs],
2553
            'title': title,
2554
            'alt': alt,
2555
            'author': author,
2556
            'day': day.day,
2557
            'month': day.month,
2558
            'year': day.year
2559
        }
2560
2561
2562 View Code Duplication
class TheAwkwardYeti(GenericNavigableComic):
2563
    """Class to retrieve The Awkward Yeti comics."""
2564
    # Also on http://www.gocomics.com/the-awkward-yeti
2565
    # Also on http://larstheyeti.tumblr.com
2566
    # Also on https://tapastic.com/series/TheAwkwardYeti
2567
    name = 'yeti'
2568
    long_name = 'The Awkward Yeti'
2569
    url = 'http://theawkwardyeti.com'
2570
    _categories = ('YETI', )
2571
    get_first_comic_link = get_a_navi_navifirst
2572
    get_navi_link = get_link_rel_next
2573
2574
    @classmethod
2575
    def get_comic_info(cls, soup, link):
2576
        """Get information about a particular comics."""
2577
        title = soup.find('h2', class_='post-title').string
2578
        date_str = soup.find("span", class_="post-date").string
2579
        day = string_to_date(date_str, "%B %d, %Y")
2580
        imgs = soup.find("div", id="comic").find_all("img")
2581
        assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
2582
        return {
2583
            'img': [i['src'] for i in imgs],
2584
            'title': title,
2585
            'day': day.day,
2586
            'month': day.month,
2587
            'year': day.year
2588
        }
2589
2590
2591
class PleasantThoughts(GenericNavigableComic):
2592
    """Class to retrieve Pleasant Thoughts comics."""
2593
    name = 'pleasant'
2594
    long_name = 'Pleasant Thoughts'
2595
    url = 'http://pleasant-thoughts.com'
2596
    get_first_comic_link = get_a_navi_navifirst
2597
    get_navi_link = get_link_rel_next
2598
2599
    @classmethod
2600
    def get_comic_info(cls, soup, link):
2601
        """Get information about a particular comics."""
2602
        post = soup.find('div', class_='post-content')
2603
        title = post.find('h2', class_='post-title').string
2604
        imgs = post.find("div", class_="entry").find_all("img")
2605
        return {
2606
            'title': title,
2607
            'img': [i['src'] for i in imgs],
2608
        }
2609
2610
2611 View Code Duplication
class MisterAndMe(GenericNavigableComic):
2612
    """Class to retrieve Mister & Me Comics."""
2613
    # Also on http://www.gocomics.com/mister-and-me
2614
    # Also on https://tapastic.com/series/Mister-and-Me
2615
    name = 'mister'
2616
    long_name = 'Mister & Me'
2617
    url = 'http://www.mister-and-me.com'
2618
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2619
    get_navi_link = get_link_rel_next
2620
2621
    @classmethod
2622
    def get_comic_info(cls, soup, link):
2623
        """Get information about a particular comics."""
2624
        title = soup.find('h2', class_='post-title').string
2625
        author = soup.find("span", class_="post-author").find("a").string
2626
        date_str = soup.find("span", class_="post-date").string
2627
        day = string_to_date(date_str, "%B %d, %Y")
2628
        imgs = soup.find("div", id="comic").find_all("img")
2629
        assert all(i['alt'] == i['title'] for i in imgs)
2630
        assert len(imgs) <= 1
2631
        alt = imgs[0]['alt'] if imgs else ""
2632
        return {
2633
            'img': [i['src'] for i in imgs],
2634
            'title': title,
2635
            'alt': alt,
2636
            'author': author,
2637
            'day': day.day,
2638
            'month': day.month,
2639
            'year': day.year
2640
        }
2641
2642
2643
class LastPlaceComics(GenericNavigableComic):
2644
    """Class to retrieve Last Place Comics."""
2645
    name = 'lastplace'
2646
    long_name = 'Last Place Comics'
2647
    url = "http://lastplacecomics.com"
2648
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2649
    get_navi_link = get_link_rel_next
2650
2651
    @classmethod
2652
    def get_comic_info(cls, soup, link):
2653
        """Get information about a particular comics."""
2654
        title = soup.find('h2', class_='post-title').string
2655
        author = soup.find("span", class_="post-author").find("a").string
2656
        date_str = soup.find("span", class_="post-date").string
2657
        day = string_to_date(date_str, "%B %d, %Y")
2658
        imgs = soup.find("div", id="comic").find_all("img")
2659
        assert all(i['alt'] == i['title'] for i in imgs)
2660
        assert len(imgs) <= 1
2661
        alt = imgs[0]['alt'] if imgs else ""
2662
        return {
2663
            'img': [i['src'] for i in imgs],
2664
            'title': title,
2665
            'alt': alt,
2666
            'author': author,
2667
            'day': day.day,
2668
            'month': day.month,
2669
            'year': day.year
2670
        }
2671
2672
2673 View Code Duplication
class TalesOfAbsurdity(GenericNavigableComic):
2674
    """Class to retrieve Tales Of Absurdity comics."""
2675
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
2676
    # Also on http://talesofabsurdity.tumblr.com
2677
    name = 'absurdity'
2678
    long_name = 'Tales of Absurdity'
2679
    url = 'http://talesofabsurdity.com'
2680
    _categories = ('ABSURDITY', )
2681
    get_first_comic_link = get_a_navi_navifirst
2682
    get_navi_link = get_a_navi_comicnavnext_navinext
2683
2684
    @classmethod
2685
    def get_comic_info(cls, soup, link):
2686
        """Get information about a particular comics."""
2687
        title = soup.find('h2', class_='post-title').string
2688
        author = soup.find("span", class_="post-author").find("a").string
2689
        date_str = soup.find("span", class_="post-date").string
2690
        day = string_to_date(date_str, "%B %d, %Y")
2691
        imgs = soup.find("div", id="comic").find_all("img")
2692
        assert all(i['alt'] == i['title'] for i in imgs)
2693
        alt = imgs[0]['alt'] if imgs else ""
2694
        return {
2695
            'img': [i['src'] for i in imgs],
2696
            'title': title,
2697
            'alt': alt,
2698
            'author': author,
2699
            'day': day.day,
2700
            'month': day.month,
2701
            'year': day.year
2702
        }
2703
2704
2705 View Code Duplication
class EndlessOrigami(GenericEmptyComic, GenericNavigableComic):
2706
    """Class to retrieve Endless Origami Comics."""
2707
    name = "origami"
2708
    long_name = "Endless Origami"
2709
    url = "http://endlessorigami.com"
2710
    get_first_comic_link = get_a_navi_navifirst
2711
    get_navi_link = get_link_rel_next
2712
2713
    @classmethod
2714
    def get_comic_info(cls, soup, link):
2715
        """Get information about a particular comics."""
2716
        title = soup.find('h2', class_='post-title').string
2717
        author = soup.find("span", class_="post-author").find("a").string
2718
        date_str = soup.find("span", class_="post-date").string
2719
        day = string_to_date(date_str, "%B %d, %Y")
2720
        imgs = soup.find("div", id="comic").find_all("img")
2721
        assert all(i['alt'] == i['title'] for i in imgs)
2722
        alt = imgs[0]['alt'] if imgs else ""
2723
        return {
2724
            'img': [i['src'] for i in imgs],
2725
            'title': title,
2726
            'alt': alt,
2727
            'author': author,
2728
            'day': day.day,
2729
            'month': day.month,
2730
            'year': day.year
2731
        }
2732
2733
2734
class PlanC(GenericNavigableComic):
2735
    """Class to retrieve Plan C comics."""
2736
    name = 'planc'
2737
    long_name = 'Plan C'
2738
    url = 'http://www.plancomic.com'
2739
    get_first_comic_link = get_a_navi_navifirst
2740
    get_navi_link = get_a_navi_comicnavnext_navinext
2741
2742
    @classmethod
2743
    def get_comic_info(cls, soup, link):
2744
        """Get information about a particular comics."""
2745
        title = soup.find('h2', class_='post-title').string
2746
        date_str = soup.find("span", class_="post-date").string
2747
        day = string_to_date(date_str, "%B %d, %Y")
2748
        imgs = soup.find('div', id='comic').find_all('img')
2749
        return {
2750
            'title': title,
2751
            'img': [i['src'] for i in imgs],
2752
            'month': day.month,
2753
            'year': day.year,
2754
            'day': day.day,
2755
        }
2756
2757
2758 View Code Duplication
class BuniComic(GenericNavigableComic):
2759
    """Class to retrieve Buni Comics."""
2760
    name = 'buni'
2761
    long_name = 'BuniComics'
2762
    url = 'http://www.bunicomic.com'
2763
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2764
    get_navi_link = get_link_rel_next
2765
2766
    @classmethod
2767
    def get_comic_info(cls, soup, link):
2768
        """Get information about a particular comics."""
2769
        imgs = soup.find('div', id='comic').find_all('img')
2770
        assert all(i['alt'] == i['title'] for i in imgs)
2771
        assert len(imgs) == 1
2772
        return {
2773
            'img': [i['src'] for i in imgs],
2774
            'title': imgs[0]['title'],
2775
        }
2776
2777
2778 View Code Duplication
class GenericCommitStrip(GenericNavigableComic):
2779
    """Generic class to retrieve Commit Strips in different languages."""
2780
    get_navi_link = get_a_rel_next
2781
    get_first_comic_link = simulate_first_link
2782
    first_url = NotImplemented
2783
2784
    @classmethod
2785
    def get_comic_info(cls, soup, link):
2786
        """Get information about a particular comics."""
2787
        desc = soup.find('meta', property='og:description')['content']
2788
        title = soup.find('meta', property='og:title')['content']
2789
        imgs = soup.find('div', class_='entry-content').find_all('img')
2790
        title2 = ' '.join(i.get('title', '') for i in imgs)
2791
        return {
2792
            'title': title,
2793
            'title2': title2,
2794
            'description': desc,
2795
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2796
        }
2797
2798
2799
class CommitStripFr(GenericCommitStrip):
2800
    """Class to retrieve Commit Strips in French."""
2801
    name = 'commit_fr'
2802
    long_name = 'Commit Strip (Fr)'
2803
    url = 'http://www.commitstrip.com/fr'
2804
    _categories = ('FRANCAIS', )
2805
    first_url = 'http://www.commitstrip.com/fr/2012/02/22/interview/'
2806
2807
2808
class CommitStripEn(GenericCommitStrip):
2809
    """Class to retrieve Commit Strips in English."""
2810
    name = 'commit_en'
2811
    long_name = 'Commit Strip (En)'
2812
    url = 'http://www.commitstrip.com/en'
2813
    first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'
2814
2815
2816 View Code Duplication
class GenericBoumerie(GenericNavigableComic):
2817
    """Generic class to retrieve Boumeries comics in different languages."""
2818
    get_first_comic_link = get_a_navi_navifirst
2819
    get_navi_link = get_link_rel_next
2820
    date_format = NotImplemented
2821
    lang = NotImplemented
2822
2823
    @classmethod
2824
    def get_comic_info(cls, soup, link):
2825
        """Get information about a particular comics."""
2826
        title = soup.find('h2', class_='post-title').string
2827
        short_url = soup.find('link', rel='shortlink')['href']
2828
        author = soup.find("span", class_="post-author").find("a").string
2829
        date_str = soup.find('span', class_='post-date').string
2830
        day = string_to_date(date_str, cls.date_format, cls.lang)
2831
        imgs = soup.find('div', id='comic').find_all('img')
2832
        assert all(i['alt'] == i['title'] for i in imgs)
2833
        return {
2834
            'short_url': short_url,
2835
            'img': [i['src'] for i in imgs],
2836
            'title': title,
2837
            'author': author,
2838
            'month': day.month,
2839
            'year': day.year,
2840
            'day': day.day,
2841
        }
2842
2843
2844
class BoumerieEn(GenericBoumerie):
2845
    """Class to retrieve Boumeries comics in English."""
2846
    name = 'boumeries_en'
2847
    long_name = 'Boumeries (En)'
2848
    url = 'http://comics.boumerie.com'
2849
    date_format = "%B %d, %Y"
2850
    lang = 'en_GB.UTF-8'
2851
2852
2853
class BoumerieFr(GenericBoumerie):
2854
    """Class to retrieve Boumeries comics in French."""
2855
    name = 'boumeries_fr'
2856
    long_name = 'Boumeries (Fr)'
2857
    url = 'http://bd.boumerie.com'
2858
    _categories = ('FRANCAIS', )
2859
    date_format = "%A, %d %B %Y"
2860
    lang = "fr_FR.utf8"
2861
2862
2863 View Code Duplication
class UnearthedComics(GenericNavigableComic):
2864
    """Class to retrieve Unearthed comics."""
2865
    # Also on http://tapastic.com/series/UnearthedComics
2866
    # Also on https://unearthedcomics.tumblr.com
2867
    name = 'unearthed'
2868
    long_name = 'Unearthed Comics'
2869
    url = 'http://unearthedcomics.com'
2870
    _categories = ('UNEARTHED', )
2871
    get_navi_link = get_link_rel_next
2872
    get_first_comic_link = simulate_first_link
2873
    first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'
2874
2875
    @classmethod
2876
    def get_comic_info(cls, soup, link):
2877
        """Get information about a particular comics."""
2878
        short_url = soup.find('link', rel='shortlink')['href']
2879
        title_elt = soup.find('h1') or soup.find('h2')
2880
        title = title_elt.string if title_elt else ""
2881
        desc = soup.find('meta', property='og:description')
2882
        date_str = soup.find('time', class_='published updated hidden')['datetime']
2883
        day = string_to_date(date_str, "%Y-%m-%d")
2884
        post = soup.find('div', class_="entry content entry-content type-portfolio")
2885
        imgs = post.find_all('img')
2886
        return {
2887
            'title': title,
2888
            'description': desc,
2889
            'url2': short_url,
2890
            'img': [i['src'] for i in imgs],
2891
            'month': day.month,
2892
            'year': day.year,
2893
            'day': day.day,
2894
        }
2895
2896
2897 View Code Duplication
class Optipess(GenericNavigableComic):
2898
    """Class to retrieve Optipess comics."""
2899
    name = 'optipess'
2900
    long_name = 'Optipess'
2901
    url = 'http://www.optipess.com'
2902
    get_first_comic_link = get_a_navi_navifirst
2903
    get_navi_link = get_link_rel_next
2904
2905
    @classmethod
2906
    def get_comic_info(cls, soup, link):
2907
        """Get information about a particular comics."""
2908
        title = soup.find('h2', class_='post-title').string
2909
        author = soup.find("span", class_="post-author").find("a").string
2910
        comic = soup.find('div', id='comic')
2911
        imgs = comic.find_all('img') if comic else []
2912
        alt = imgs[0]['title'] if imgs else ""
2913
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2914
        date_str = soup.find('span', class_='post-date').string
2915
        day = string_to_date(date_str, "%B %d, %Y")
2916
        return {
2917
            'title': title,
2918
            'alt': alt,
2919
            'author': author,
2920
            'img': [i['src'] for i in imgs],
2921
            'month': day.month,
2922
            'year': day.year,
2923
            'day': day.day,
2924
        }
2925
2926
2927
class PainTrainComic(GenericNavigableComic):
2928
    """Class to retrieve Pain Train Comics."""
2929
    name = 'paintrain'
2930
    long_name = 'Pain Train Comics'
2931
    url = 'http://paintraincomic.com'
2932
    get_first_comic_link = get_a_navi_navifirst
2933
    get_navi_link = get_link_rel_next
2934
2935
    @classmethod
2936
    def get_comic_info(cls, soup, link):
2937
        """Get information about a particular comics."""
2938
        title = soup.find('h2', class_='post-title').string
2939
        short_url = soup.find('link', rel='shortlink')['href']
2940
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2941
        num = int(short_url_re.match(short_url).groups()[0])
2942
        imgs = soup.find('div', id='comic').find_all('img')
2943
        alt = imgs[0]['title']
2944
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2945
        date_str = soup.find('span', class_='post-date').string
2946
        day = string_to_date(date_str, "%d/%m/%Y")
2947
        return {
2948
            'short_url': short_url,
2949
            'num': num,
2950
            'img': [i['src'] for i in imgs],
2951
            'month': day.month,
2952
            'year': day.year,
2953
            'day': day.day,
2954
            'alt': alt,
2955
            'title': title,
2956
        }
2957
2958
2959
class MoonBeard(GenericNavigableComic):
2960
    """Class to retrieve MoonBeard comics."""
2961
    # Also on http://squireseses.tumblr.com
2962
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
2963
    name = 'moonbeard'
2964
    long_name = 'Moon Beard'
2965
    url = 'http://moonbeard.com'
2966
    _categories = ('MOONBEARD', )
2967
    get_first_comic_link = get_a_navi_navifirst
2968
    get_navi_link = get_a_navi_navinext
2969
2970
    @classmethod
2971
    def get_comic_info(cls, soup, link):
2972
        """Get information about a particular comics."""
2973
        title = soup.find('h2', class_='post-title').string
2974
        short_url = soup.find('link', rel='shortlink')['href']
2975
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2976
        num = int(short_url_re.match(short_url).groups()[0])
2977
        imgs = soup.find('div', id='comic').find_all('img')
2978
        alt = imgs[0]['title']
2979
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2980
        date_str = soup.find('span', class_='post-date').string
2981
        day = string_to_date(date_str, "%B %d, %Y")
2982
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2983
        author = soup.find('span', class_='post-author').string
2984
        return {
2985
            'short_url': short_url,
2986
            'num': num,
2987
            'img': [i['src'] for i in imgs],
2988
            'month': day.month,
2989
            'year': day.year,
2990
            'day': day.day,
2991
            'title': title,
2992
            'tags': tags,
2993
            'alt': alt,
2994
            'author': author,
2995
        }
2996
2997
2998
class AHammADay(GenericEmptyComic, GenericNavigableComic):
2999 View Code Duplication
    """Class to retrieve class A Hamm A Day comics."""
3000
    name = 'hamm'
3001
    long_name = 'A Hamm A Day'
3002
    url = 'http://www.ahammaday.com'
3003
    get_url_from_link = join_cls_url_to_href
3004
    get_first_comic_link = simulate_first_link
3005
    first_url = 'http://www.ahammaday.com/today/3/6/french'
3006
3007
    @classmethod
3008
    def get_navi_link(cls, last_soup, next_):
3009
        """Get link to next or previous comic."""
3010
        # prev is next / next is prev
3011
        return last_soup.find('li', class_='previous' if next_ else 'next').find('a')
3012
3013
    @classmethod
3014
    def get_comic_info(cls, soup, link):
3015
        """Get information about a particular comics."""
3016
        date_str = soup.find('time', class_='published')['datetime']
3017
        day = string_to_date(date_str, "%Y-%m-%d")
3018
        author = soup.find('span', class_='blog-author').find('a').string
3019
        title = soup.find('meta', property='og:title')['content']
3020
        imgs = soup.find_all('meta', itemprop='image')
3021
        return {
3022
            'img': [i['content'] for i in imgs],
3023
            'title': title,
3024
            'author': author,
3025
            'day': day.day,
3026
            'month': day.month,
3027
            'year': day.year,
3028
        }
3029
3030
3031
class SystemComic(GenericNavigableComic):
3032
    """Class to retrieve System Comic."""
3033
    name = 'system'
3034
    long_name = 'System Comic'
3035
    url = 'http://www.systemcomic.com'
3036
    get_navi_link = get_a_rel_next
3037
3038
    @classmethod
3039
    def get_first_comic_link(cls):
3040
        """Get link to first comics."""
3041
        return get_soup_at_url(cls.url).find('li', class_='first').find('a')
3042
3043
    @classmethod
3044
    def get_comic_info(cls, soup, link):
3045
        """Get information about a particular comics."""
3046
        title = soup.find('meta', property='og:title')['content']
3047
        desc = soup.find('meta', property='og:description')['content']
3048
        date_str = soup.find('time')["datetime"]
3049
        day = string_to_date(date_str, "%Y-%m-%d")
3050
        imgs = soup.find('figure').find_all('img')
3051
        return {
3052
            'title': title,
3053
            'description': desc,
3054
            'day': day.day,
3055
            'month': day.month,
3056
            'year': day.year,
3057
            'img': [i['src'] for i in imgs],
3058
        }
3059
3060
3061
class LittleLifeLines(GenericNavigableComic):
3062
    """Class to retrieve Little Life Lines comics."""
3063
    # Also on https://little-life-lines.tumblr.com
3064
    name = 'life'
3065
    long_name = 'Little Life Lines'
3066
    url = 'http://www.littlelifelines.com'
3067
    get_url_from_link = join_cls_url_to_href
3068
    get_first_comic_link = simulate_first_link
3069
    first_url = 'http://www.littlelifelines.com/comics/well-done'
3070
3071
    @classmethod
3072
    def get_navi_link(cls, last_soup, next_):
3073
        """Get link to next or previous comic."""
3074
        # prev is next / next is prev
3075
        li = last_soup.find('li', class_='prev' if next_ else 'next')
3076
        return li.find('a') if li else None
3077
3078
    @classmethod
3079
    def get_comic_info(cls, soup, link):
3080
        """Get information about a particular comics."""
3081
        title = soup.find('meta', property='og:title')['content']
3082
        desc = soup.find('meta', property='og:description')['content']
3083
        date_str = soup.find('time', class_='published')['datetime']
3084
        day = string_to_date(date_str, "%Y-%m-%d")
3085
        author = soup.find('a', rel='author').string
3086
        div_content = soup.find('div', class_="body entry-content")
3087
        imgs = div_content.find_all('img')
3088
        imgs = [i for i in imgs if i.get('src') is not None]
3089
        alt = imgs[0]['alt']
3090
        return {
3091
            'title': title,
3092
            'alt': alt,
3093
            'description': desc,
3094
            'author': author,
3095
            'day': day.day,
3096
            'month': day.month,
3097
            'year': day.year,
3098
            'img': [i['src'] for i in imgs],
3099
        }
3100
3101
3102
class GenericWordPressInkblot(GenericNavigableComic):
3103
    """Generic class to retrieve comics using WordPress with Inkblot."""
3104
    get_navi_link = get_link_rel_next
3105
3106
    @classmethod
3107
    def get_first_comic_link(cls):
3108
        """Get link to first comics."""
3109
        return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')
3110
3111
    @classmethod
3112
    def get_comic_info(cls, soup, link):
3113
        """Get information about a particular comics."""
3114
        title = soup.find('meta', property='og:title')['content']
3115
        imgs = soup.find('div', class_='webcomic-image').find_all('img')
3116
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3117
        day = string_to_date(date_str, "%Y-%m-%d")
3118
        return {
3119
            'title': title,
3120
            'day': day.day,
3121
            'month': day.month,
3122
            'year': day.year,
3123
            'img': [i['src'] for i in imgs],
3124
        }
3125
3126
3127
class EverythingsStupid(GenericWordPressInkblot):
3128
    """Class to retrieve Everything's stupid Comics."""
3129
    # Also on http://tapastic.com/series/EverythingsStupid
3130
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
3131
    # Also on http://everythingsstupidcomics.tumblr.com
3132
    name = 'stupid'
3133
    long_name = "Everything's Stupid"
3134
    url = 'http://everythingsstupid.net'
3135
3136
3137
class TheIsmComics(GenericWordPressInkblot):
3138
    """Class to retrieve The Ism Comics."""
3139
    # Also on https://tapastic.com/series/TheIsm (?)
3140
    name = 'theism'
3141
    long_name = "The Ism"
3142
    url = 'http://www.theism-comics.com'
3143
3144
3145
class WoodenPlankStudios(GenericWordPressInkblot):
3146
    """Class to retrieve Wooden Plank Studios comics."""
3147
    name = 'woodenplank'
3148
    long_name = 'Wooden Plank Studios'
3149
    url = 'http://woodenplankstudios.com'
3150
3151
3152
class ElectricBunnyComic(GenericNavigableComic):
3153
    """Class to retrieve Electric Bunny Comics."""
3154
    # Also on http://electricbunnycomics.tumblr.com
3155
    name = 'bunny'
3156
    long_name = 'Electric Bunny Comic'
3157
    url = 'http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell'
3158
    get_url_from_link = join_cls_url_to_href
3159
3160
    @classmethod
3161
    def get_first_comic_link(cls):
3162
        """Get link to first comics."""
3163
        return get_soup_at_url(cls.url).find('img', alt='First').parent
3164
3165
    @classmethod
3166
    def get_navi_link(cls, last_soup, next_):
3167
        """Get link to next or previous comic."""
3168
        img = last_soup.find('img', alt='Next' if next_ else 'Back')
3169
        return img.parent if img else None
3170
3171
    @classmethod
3172
    def get_comic_info(cls, soup, link):
3173
        """Get information about a particular comics."""
3174
        title = soup.find('meta', property='og:title')['content']
3175
        imgs = soup.find_all('meta', property='og:image')
3176
        return {
3177
            'title': title,
3178
            'img': [i['content'] for i in imgs],
3179
        }
3180
3181
3182
class SheldonComics(GenericNavigableComic):
3183
    """Class to retrieve Sheldon comics."""
3184
    # Also on http://www.gocomics.com/sheldon
3185
    name = 'sheldon'
3186
    long_name = 'Sheldon Comics'
3187
    url = 'http://www.sheldoncomics.com'
3188
3189
    @classmethod
3190
    def get_first_comic_link(cls):
3191
        """Get link to first comics."""
3192
        return get_soup_at_url(cls.url).find("a", id="nav-first")
3193
3194
    @classmethod
3195
    def get_navi_link(cls, last_soup, next_):
3196
        """Get link to next or previous comic."""
3197
        for link in last_soup.find_all("a", id="nav-next" if next_ else "nav-prev"):
3198
            if link['href'] != 'http://www.sheldoncomics.com':
3199
                return link
3200
        return None
3201
3202
    @classmethod
3203
    def get_comic_info(cls, soup, link):
3204
        """Get information about a particular comics."""
3205
        imgs = soup.find("div", id="comic-foot").find_all("img")
3206
        assert all(i['alt'] == i['title'] for i in imgs)
3207
        assert len(imgs) == 1
3208
        title = imgs[0]['title']
3209
        return {
3210
            'title': title,
3211
            'img': [i['src'] for i in imgs],
3212
        }
3213
3214
3215 View Code Duplication
class Ubertool(GenericNavigableComic):
3216
    """Class to retrieve Ubertool comics."""
3217
    # Also on https://ubertool.tumblr.com
3218
    # Also on https://tapastic.com/series/ubertool
3219
    name = 'ubertool'
3220
    long_name = 'Ubertool'
3221
    url = 'http://ubertoolcomic.com'
3222
    _categories = ('UBERTOOL', )
3223
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
3224
    get_navi_link = get_a_comicnavbase_comicnavnext
3225
3226
    @classmethod
3227
    def get_comic_info(cls, soup, link):
3228
        """Get information about a particular comics."""
3229
        title = soup.find('h2', class_='post-title').string
3230
        date_str = soup.find('span', class_='post-date').string
3231
        day = string_to_date(date_str, "%B %d, %Y")
3232
        imgs = soup.find('div', id='comic').find_all('img')
3233
        return {
3234
            'img': [i['src'] for i in imgs],
3235
            'title': title,
3236
            'month': day.month,
3237
            'year': day.year,
3238
            'day': day.day,
3239
        }
3240
3241
3242 View Code Duplication
class EarthExplodes(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3243
    """Class to retrieve The Earth Explodes comics."""
3244
    name = 'earthexplodes'
3245
    long_name = 'The Earth Explodes'
3246
    url = 'http://www.earthexplodes.com'
3247
    get_url_from_link = join_cls_url_to_href
3248
    get_first_comic_link = simulate_first_link
3249
    first_url = 'http://www.earthexplodes.com/comics/000/'
3250
3251
    @classmethod
3252
    def get_navi_link(cls, last_soup, next_):
3253
        """Get link to next or previous comic."""
3254
        return last_soup.find('a', id='next' if next_ else 'prev')
3255
3256
    @classmethod
3257
    def get_comic_info(cls, soup, link):
3258
        """Get information about a particular comics."""
3259
        title = soup.find('title').string
3260
        imgs = soup.find('div', id='image').find_all('img')
3261
        alt = imgs[0].get('title', '')
3262
        return {
3263
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3264
            'title': title,
3265
            'alt': alt,
3266
        }
3267
3268
3269
class PomComics(GenericNavigableComic):
3270
    """Class to retrieve PomComics."""
3271
    name = 'pom'
3272
    long_name = 'Pom Comics / Piece of Me'
3273
    url = 'http://www.pomcomic.com'
3274
    get_url_from_link = join_cls_url_to_href
3275
3276
    @classmethod
3277
    def get_first_comic_link(cls):
3278
        """Get link to first comics."""
3279
        return get_soup_at_url(cls.url).find('a', class_='btn-first')
3280
3281
    @classmethod
3282
    def get_navi_link(cls, last_soup, next_):
3283
        """Get link to next or previous comic."""
3284
        return last_soup.find('a', class_='btn-next' if next_ else 'btn-prev')
3285
3286
    @classmethod
3287
    def get_comic_info(cls, soup, link):
3288
        """Get information about a particular comics."""
3289
        title = soup.find('h1').string
3290
        desc = soup.find('meta', property='og:description')['content']
3291
        tags = soup.find('meta', attrs={'name': 'keywords'})['content']
3292
        imgs = soup.find('div', class_='comic').find_all('img')
3293
        return {
3294
            'title': title,
3295
            'desc': desc,
3296
            'tags': tags,
3297
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3298
        }
3299
3300
3301
class CubeDrone(GenericEmptyComic, GenericNavigableComic):
3302
    """Class to retrieve Cube Drone comics."""
3303
    name = 'cubedrone'
3304
    long_name = 'Cube Drone'
3305
    url = 'http://cube-drone.com/comics'
3306
    get_url_from_link = join_cls_url_to_href
3307
3308
    @classmethod
3309
    def get_first_comic_link(cls):
3310
        """Get link to first comics."""
3311
        return get_soup_at_url(cls.url).find('span', class_='glyphicon glyphicon-backward').parent
3312
3313
    @classmethod
3314
    def get_navi_link(cls, last_soup, next_):
3315
        """Get link to next or previous comic."""
3316
        class_ = 'glyphicon glyphicon-chevron-' + ('right' if next_ else 'left')
3317
        return last_soup.find('span', class_=class_).parent
3318
3319
    @classmethod
3320
    def get_comic_info(cls, soup, link):
3321
        """Get information about a particular comics."""
3322
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
3323
        url2 = soup.find('meta', attrs={'name': 'twitter:url'})['content']
3324
        # date_str = soup.find('h2', class_='comic_title').find('small').string
3325
        # day = string_to_date(date_str, "%B %d, %Y, %I:%M %p")
3326
        imgs = soup.find_all('img', class_='comic img-responsive')
3327
        title2 = imgs[0]['title']
3328
        alt = imgs[0]['alt']
3329
        return {
3330
            'url2': url2,
3331
            'title': title,
3332
            'title2': title2,
3333
            'alt': alt,
3334
            'img': [i['src'] for i in imgs],
3335
        }
3336
3337
3338
class MakeItStoopid(GenericNavigableComic):
3339
    """Class to retrieve Make It Stoopid Comics."""
3340
    name = 'stoopid'
3341
    long_name = 'Make it stoopid'
3342
    url = 'http://makeitstoopid.com/comic.php'
3343
3344
    @classmethod
3345
    def get_nav(cls, soup):
3346
        """Get the navigation elements from soup object."""
3347
        cnav = soup.find_all(class_='cnav')
3348
        nav1, nav2 = cnav[:5], cnav[5:]
3349
        assert nav1 == nav2
3350
        # begin, prev, archive, next_, end = nav1
3351
        return [None if i.get('href') is None else i for i in nav1]
3352
3353
    @classmethod
3354
    def get_first_comic_link(cls):
3355
        """Get link to first comics."""
3356
        return cls.get_nav(get_soup_at_url(cls.url))[0]
3357
3358
    @classmethod
3359
    def get_navi_link(cls, last_soup, next_):
3360
        """Get link to next or previous comic."""
3361
        return cls.get_nav(last_soup)[3 if next_ else 1]
3362
3363
    @classmethod
3364
    def get_comic_info(cls, soup, link):
3365
        """Get information about a particular comics."""
3366
        title = link['title']
3367
        imgs = soup.find_all('img', id='comicimg')
3368 View Code Duplication
        return {
3369
            'title': title,
3370
            'img': [i['src'] for i in imgs],
3371
        }
3372
3373
3374
class OffTheLeashDog(GenericNavigableComic):
3375
    """Class to retrieve Off The Leash Dog comics."""
3376
    # Also on http://rupertfawcettsdoggyblog.tumblr.com
3377
    # Also on http://www.rupertfawcettcartoons.com
3378
    name = 'offtheleash'
3379
    long_name = 'Off The Leash Dog'
3380
    url = 'http://offtheleashdogcartoons.com'
3381
    _categories = ('FAWCETT', )
3382
    get_navi_link = get_a_rel_next
3383
    get_first_comic_link = simulate_first_link
3384
    first_url = 'http://offtheleashdogcartoons.com/uncategorized/can-i-help-you/'
3385
3386
    @classmethod
3387
    def get_comic_info(cls, soup, link):
3388
        """Get information about a particular comics."""
3389
        print(link)
3390
        title = soup.find("h1", class_="entry-title").string
3391
        imgs = soup.find('div', class_='entry-content').find_all('img')
3392
        return {
3393
            'title': title,
3394
            'img': [i['src'] for i in imgs],
3395
        }
3396
3397
3398
class MarketoonistComics(GenericNavigableComic):
3399
    """Class to retrieve Marketoonist Comics."""
3400
    name = 'marketoonist'
3401
    long_name = 'Marketoonist'
3402
    url = 'https://marketoonist.com/cartoons'
3403
    get_first_comic_link = simulate_first_link
3404
    get_navi_link = get_link_rel_next
3405
    first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'
3406
3407
    @classmethod
3408
    def get_comic_info(cls, soup, link):
3409
        """Get information about a particular comics."""
3410
        imgs = soup.find_all('meta', property='og:image')
3411
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3412
        day = string_to_date(date_str, "%Y-%m-%d")
3413
        title = soup.find('meta', property='og:title')['content']
3414
        return {
3415
            'img': [i['content'] for i in imgs],
3416
            'day': day.day,
3417
            'month': day.month,
3418
            'year': day.year,
3419
            'title': title,
3420
        }
3421
3422
3423
class ConsoliaComics(GenericNavigableComic):
3424
    """Class to retrieve Consolia comics."""
3425
    name = 'consolia'
3426 View Code Duplication
    long_name = 'consolia'
3427
    url = 'https://consolia-comic.com'
3428
    get_url_from_link = join_cls_url_to_href
3429
3430
    @classmethod
3431
    def get_first_comic_link(cls):
3432
        """Get link to first comics."""
3433
        return get_soup_at_url(cls.url).find('a', class_='first')
3434
3435
    @classmethod
3436
    def get_navi_link(cls, last_soup, next_):
3437
        """Get link to next or previous comic."""
3438
        return last_soup.find('a', class_='next' if next_ else 'prev')
3439
3440
    @classmethod
3441
    def get_comic_info(cls, soup, link):
3442
        """Get information about a particular comics."""
3443
        title = soup.find('meta', property='og:title')['content']
3444
        date_str = soup.find('time')["datetime"]
3445
        day = string_to_date(date_str, "%Y-%m-%d")
3446
        imgs = soup.find_all('meta', property='og:image')
3447
        return {
3448
            'title': title,
3449
            'img': [i['content'] for i in imgs],
3450
            'day': day.day,
3451
            'month': day.month,
3452
            'year': day.year,
3453
        }
3454
3455
3456 View Code Duplication
class TuMourrasMoinsBete(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3457
    """Class to retrieve Tu Mourras Moins Bete comics."""
3458
    name = 'mourrasmoinsbete'
3459
    long_name = 'Tu Mourras Moins Bete'
3460
    url = 'http://tumourrasmoinsbete.blogspot.fr'
3461
    _categories = ('FRANCAIS', )
3462
    get_first_comic_link = simulate_first_link
3463
    first_url = 'http://tumourrasmoinsbete.blogspot.fr/2008/06/essai.html'
3464
3465
    @classmethod
3466
    def get_navi_link(cls, last_soup, next_):
3467
        """Get link to next or previous comic."""
3468
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3469
3470
    @classmethod
3471
    def get_comic_info(cls, soup, link):
3472
        """Get information about a particular comics."""
3473
        title = soup.find('title').string
3474
        imgs = soup.find('div', itemprop='description articleBody').find_all('img')
3475
        author = soup.find('span', itemprop='author').string
3476
        return {
3477
            'img': [i['src'] for i in imgs],
3478
            'author': author,
3479
            'title': title,
3480
        }
3481
3482
3483
class GeekAndPoke(GenericNavigableComic):
3484
    """Class to retrieve Geek And Poke comics."""
3485
    name = 'geek'
3486
    long_name = 'Geek And Poke'
3487
    url = 'http://geek-and-poke.com'
3488
    get_url_from_link = join_cls_url_to_href
3489
    get_first_comic_link = simulate_first_link
3490
    first_url = 'http://geek-and-poke.com/geekandpoke/2006/8/27/a-new-place-for-a-not-so-old-blog.html'
3491
3492
    @classmethod
3493 View Code Duplication
    def get_navi_link(cls, last_soup, next_):
3494
        """Get link to next or previous comic."""
3495
        return last_soup.find('a', class_='prev-item' if next_ else 'next-item')
3496
3497
    @classmethod
3498
    def get_comic_info(cls, soup, link):
3499
        """Get information about a particular comics."""
3500
        title = soup.find('meta', property='og:title')['content']
3501
        desc = soup.find('meta', property='og:description')['content']
3502
        date_str = soup.find('time', class_='published')['datetime']
3503
        day = string_to_date(date_str, "%Y-%m-%d")
3504
        author = soup.find('a', rel='author').string
3505
        div_content = (soup.find('div', class_="body entry-content") or
3506
                       soup.find('div', class_="special-content"))
3507
        imgs = div_content.find_all('img')
3508
        imgs = [i for i in imgs if i.get('src') is not None]
3509
        assert all('title' not in i or i['alt'] == i['title'] for i in imgs)
3510
        alt = imgs[0].get('alt', "") if imgs else []
3511
        return {
3512
            'title': title,
3513
            'alt': alt,
3514
            'description': desc,
3515
            'author': author,
3516
            'day': day.day,
3517
            'month': day.month,
3518
            'year': day.year,
3519
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3520
        }
3521
3522
3523
class GloryOwlComix(GenericNavigableComic):
3524
    """Class to retrieve Glory Owl comics."""
3525
    name = 'gloryowl'
3526
    long_name = 'Glory Owl'
3527
    url = 'http://gloryowlcomix.blogspot.fr'
3528
    _categories = ('NSFW', 'FRANCAIS')
3529
    get_first_comic_link = simulate_first_link
3530
    first_url = 'http://gloryowlcomix.blogspot.fr/2013/02/1_7.html'
3531
3532
    @classmethod
3533
    def get_navi_link(cls, last_soup, next_):
3534
        """Get link to next or previous comic."""
3535
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3536
3537
    @classmethod
3538
    def get_comic_info(cls, soup, link):
3539
        """Get information about a particular comics."""
3540
        title = soup.find('title').string
3541
        imgs = soup.find_all('link', rel='image_src')
3542
        author = soup.find('a', rel='author').string
3543
        return {
3544
            'img': [i['href'] for i in imgs],
3545
            'author': author,
3546
            'title': title,
3547
        }
3548
3549
3550
class AtRandomComics(GenericNavigableComic):
3551
    """Class to retrieve At Random Comics."""
3552
    name = 'atrandom'
3553
    long_name = 'At Random Comics'
3554
    url = 'http://www.atrandomcomics.com'
3555
    get_url_from_link = join_cls_url_to_href
3556
    get_first_comic_link = simulate_first_link
3557
    first_url = 'http://www.atrandomcomics.com/at-random-comics-home/2015/5/5/can-of-worms'
3558
3559
    @classmethod
3560
    def get_navi_link(cls, last_soup, next_):
3561
        """Get link to next or previous comic."""
3562
        return last_soup.find('a', id='prevLink' if next_ else 'nextLink')
3563
3564
    @classmethod
3565
    def get_comic_info(cls, soup, link):
3566
        """Get information about a particular comics."""
3567
        title = soup.find('meta', property='og:title')['content']
3568
        desc = soup.find('meta', property='og:description')['content']
3569
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
3570
        day = string_to_date(date_str, "%Y-%m-%d")
3571
        author = soup.find('a', rel='author').string
3572
        imgs = soup.find_all('meta', property='og:image')
3573
        return {
3574
            'title': title,
3575
            'img': [i['content'] for i in imgs],
3576
            'month': day.month,
3577
            'year': day.year,
3578
            'day': day.day,
3579
            'author': author,
3580
            'description': desc,
3581
        }
3582
3583
3584
class GenericTumblrV1(GenericComic):
3585
    """Generic class to retrieve comics from Tumblr using the V1 API."""
3586
    _categories = ('TUMBLR', )
3587
3588
    @classmethod
3589
    def get_next_comic(cls, last_comic):
3590
        """Generic implementation of get_next_comic for Tumblr comics."""
3591
        for p in cls.get_posts(last_comic):
3592
            comic = cls.get_comic_info(p)
3593
            if comic is not None:
3594
                yield comic
3595
3596
    @classmethod
3597
    def get_url_from_post(cls, post):
3598
        url = post['url']
3599
        if not url.startswith(cls.url):
3600
            print("url '%s' does not start with '%s'" % (url, cls.url))
3601
        return url
3602
3603
    @classmethod
3604
    def get_api_url(cls):
3605
        return urljoin_wrapper(cls.url, '/api/read/')
3606
3607
    @classmethod
3608
    def get_api_url_for_id(cls, tumblr_id):
3609
        return cls.get_api_url() + '?id=%d' % (tumblr_id)
3610
3611
    @classmethod
3612
    def get_comic_info(cls, post):
3613
        """Get information about a particular comics."""
3614
        type_ = post['type']
3615
        if type_ != 'photo':
3616
            return None
3617
        tumblr_id = int(post['id'])
3618
        api_url = cls.get_api_url_for_id(tumblr_id)
3619
        day = datetime.datetime.fromtimestamp(int(post['unix-timestamp'])).date()
3620
        caption = post.find('photo-caption')
3621
        title = caption.string if caption else ""
3622
        tags = ' '.join(t.string for t in post.find_all('tag'))
3623
        # Photos may appear in 'photo' tags and/or straight in the post
3624
        photo_tags = post.find_all('photo')
3625
        if not photo_tags:
3626
            photo_tags = [post]
3627
        # Images are in multiple resolutions - taking the first one
3628
        imgs = [photo.find('photo-url') for photo in photo_tags]
3629
        return {
3630
            'url': cls.get_url_from_post(post),
3631
            'url2': post['url-with-slug'],
3632
            'day': day.day,
3633
            'month': day.month,
3634
            'year': day.year,
3635
            'title': title,
3636
            'tags': tags,
3637
            'img': [i.string for i in imgs],
3638
            'tumblr-id': tumblr_id,
3639
            'api_url': api_url,
3640
        }
3641
3642
    @classmethod
3643
    def get_posts(cls, last_comic, nb_post_per_call=10):
3644
        """Get posts using API. nb_post_per_call is max 50.
3645
3646
        Posts are retrieved from newer to older as per the tumblr v1 api
3647
        but are returned in chronological order."""
3648
        waiting_for_id = last_comic['tumblr-id'] if last_comic else None
3649
        posts_acc = []
3650
        if last_comic is not None:
3651
            # Sometimes, tumblr posts are deleted. When previous post is deleted, we
3652
            # might end up spending a lot of time looking for something that
3653
            # doesn't exist. Failing early and clearly might be a better option.
3654
            last_api_url = cls.get_api_url_for_id(waiting_for_id)
3655
            try:
3656
                get_soup_at_url(last_api_url)
3657
            except urllib.error.HTTPError:
3658
                try:
3659
                    get_soup_at_url(cls.url)
3660
                except urllib.error.HTTPError:
3661
                    print("Did not find previous post nor main url %s" % cls.url)
3662
                else:
3663
                    print("Did not find previous post %s : it might have been deleted" % last_api_url)
3664
                return reversed(posts_acc)
3665
        api_url = cls.get_api_url()
3666
        posts = get_soup_at_url(api_url).find('posts')
3667
        start, total = int(posts['start']), int(posts['total'])
3668
        assert start == 0
3669
        for starting_num in range(0, total, nb_post_per_call):
3670
            api_url2 = api_url + '?start=%d&num=%d' % (starting_num, nb_post_per_call)
3671
            posts2 = get_soup_at_url(api_url2).find('posts')
3672
            start2, total2 = int(posts2['start']), int(posts2['total'])
3673
            assert starting_num == start2, "%d != %d" % (starting_num, start2)
3674
            # This may happen and should be handled in the future
3675
            assert total == total2, "%d != %d" % (total, total2)
3676
            for p in posts2.find_all('post'):
3677
                tumblr_id = int(p['id'])
3678
                if waiting_for_id and waiting_for_id == tumblr_id:
3679
                    return reversed(posts_acc)
3680
                posts_acc.append(p)
3681
        if waiting_for_id is None:
3682
            return reversed(posts_acc)
3683
        print("Did not find %s : there might be a problem" % waiting_for_id)
3684
        return []
3685
3686
3687
class SaturdayMorningBreakfastCerealTumblr(GenericTumblrV1):
3688
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
3689
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
3690
    # Also on http://www.smbc-comics.com
3691
    name = 'smbc-tumblr'
3692
    long_name = 'Saturday Morning Breakfast Cereal (from Tumblr)'
3693
    url = 'http://smbc-comics.tumblr.com'
3694
    _categories = ('SMBC', )
3695
3696
3697
class IrwinCardozo(GenericTumblrV1):
3698
    """Class to retrieve Irwin Cardozo Comics."""
3699
    name = 'irwinc'
3700
    long_name = 'Irwin Cardozo'
3701
    url = 'http://irwincardozocomics.tumblr.com'
3702
3703
3704
class AccordingToDevin(GenericTumblrV1):
3705
    """Class to retrieve According To Devin comics."""
3706
    name = 'devin'
3707
    long_name = 'According To Devin'
3708
    url = 'http://accordingtodevin.tumblr.com'
3709
3710
3711
class ItsTheTieTumblr(GenericTumblrV1):
3712
    """Class to retrieve It's the tie comics."""
3713
    # Also on http://itsthetie.com
3714
    # Also on https://tapastic.com/series/itsthetie
3715
    name = 'tie-tumblr'
3716
    long_name = "It's the tie (from Tumblr)"
3717
    url = "http://itsthetie.tumblr.com"
3718
    _categories = ('TIE', )
3719
3720
3721
class OctopunsTumblr(GenericTumblrV1):
3722
    """Class to retrieve Octopuns comics."""
3723
    # Also on http://www.octopuns.net
3724
    name = 'octopuns-tumblr'
3725
    long_name = 'Octopuns (from Tumblr)'
3726
    url = 'http://octopuns.tumblr.com'
3727
3728
3729
class PicturesInBoxesTumblr(GenericTumblrV1):
3730
    """Class to retrieve Pictures In Boxes comics."""
3731
    # Also on http://www.picturesinboxes.com
3732
    name = 'picturesinboxes-tumblr'
3733
    long_name = 'Pictures in Boxes (from Tumblr)'
3734
    url = 'https://picturesinboxescomic.tumblr.com'
3735
3736
3737
class TubeyToonsTumblr(GenericTumblrV1):
3738
    """Class to retrieve TubeyToons comics."""
3739
    # Also on http://tapastic.com/series/Tubey-Toons
3740
    # Also on http://tubeytoons.com
3741
    name = 'tubeytoons-tumblr'
3742
    long_name = 'Tubey Toons (from Tumblr)'
3743
    url = 'https://tubeytoons.tumblr.com'
3744
    _categories = ('TUNEYTOONS', )
3745
3746
3747
class UnearthedComicsTumblr(GenericTumblrV1):
3748
    """Class to retrieve Unearthed comics."""
3749
    # Also on http://tapastic.com/series/UnearthedComics
3750
    # Also on http://unearthedcomics.com
3751
    name = 'unearthed-tumblr'
3752
    long_name = 'Unearthed Comics (from Tumblr)'
3753
    url = 'https://unearthedcomics.tumblr.com'
3754
    _categories = ('UNEARTHED', )
3755
3756
3757
class PieComic(GenericTumblrV1):
3758
    """Class to retrieve Pie Comic comics."""
3759
    name = 'pie'
3760
    long_name = 'Pie Comic'
3761
    url = "http://piecomic.tumblr.com"
3762
3763
3764
class MrEthanDiamond(GenericTumblrV1):
3765
    """Class to retrieve Mr Ethan Diamond comics."""
3766
    name = 'diamond'
3767
    long_name = 'Mr Ethan Diamond'
3768
    url = 'http://mrethandiamond.tumblr.com'
3769
3770
3771
class Flocci(GenericTumblrV1):
3772
    """Class to retrieve floccinaucinihilipilification comics."""
3773
    name = 'flocci'
3774
    long_name = 'floccinaucinihilipilification'
3775
    url = "http://floccinaucinihilipilificationa.tumblr.com"
3776
3777
3778
class UpAndOut(GenericTumblrV1):
3779
    """Class to retrieve Up & Out comics."""
3780
    # Also on http://tapastic.com/series/UP-and-OUT
3781
    name = 'upandout'
3782
    long_name = 'Up And Out (from Tumblr)'
3783
    url = 'http://upandoutcomic.tumblr.com'
3784
3785
3786
class Pundemonium(GenericTumblrV1):
3787
    """Class to retrieve Pundemonium comics."""
3788
    name = 'pundemonium'
3789
    long_name = 'Pundemonium'
3790
    url = 'http://monstika.tumblr.com'
3791
3792
3793
class PoorlyDrawnLinesTumblr(GenericTumblrV1):
3794
    """Class to retrieve Poorly Drawn Lines comics."""
3795
    # Also on http://poorlydrawnlines.com
3796
    name = 'poorlydrawn-tumblr'
3797
    long_name = 'Poorly Drawn Lines (from Tumblr)'
3798
    url = 'http://pdlcomics.tumblr.com'
3799
    _categories = ('POORLYDRAWN', )
3800
3801
3802
class PearShapedComics(GenericTumblrV1):
3803
    """Class to retrieve Pear Shaped Comics."""
3804
    name = 'pearshaped'
3805
    long_name = 'Pear-Shaped Comics'
3806
    url = 'http://pearshapedcomics.com'
3807
3808
3809
class PondScumComics(GenericTumblrV1):
3810
    """Class to retrieve Pond Scum Comics."""
3811
    name = 'pond'
3812
    long_name = 'Pond Scum'
3813
    url = 'http://pondscumcomic.tumblr.com'
3814
3815
3816
class MercworksTumblr(GenericTumblrV1):
3817
    """Class to retrieve Mercworks comics."""
3818
    # Also on http://mercworks.net
3819
    name = 'mercworks-tumblr'
3820
    long_name = 'Mercworks (from Tumblr)'
3821
    url = 'http://mercworks.tumblr.com'
3822
3823
3824
class OwlTurdTumblr(GenericTumblrV1):
3825
    """Class to retrieve Owl Turd comics."""
3826
    # Also on http://tapastic.com/series/Owl-Turd-Comix
3827
    name = 'owlturd-tumblr'
3828
    long_name = 'Owl Turd (from Tumblr)'
3829
    url = 'http://owlturd.com'
3830
    _categories = ('OWLTURD', )
3831
3832
3833
class VectorBelly(GenericTumblrV1):
3834
    """Class to retrieve Vector Belly comics."""
3835
    # Also on http://vectorbelly.com
3836
    name = 'vector'
3837
    long_name = 'Vector Belly'
3838
    url = 'http://vectorbelly.tumblr.com'
3839
3840
3841
class GoneIntoRapture(GenericTumblrV1):
3842
    """Class to retrieve Gone Into Rapture comics."""
3843
    # Also on http://goneintorapture.tumblr.com
3844
    # Also on http://tapastic.com/series/Goneintorapture
3845
    name = 'rapture'
3846
    long_name = 'Gone Into Rapture'
3847
    url = 'http://goneintorapture.com'
3848
3849
3850
class TheOatmealTumblr(GenericTumblrV1):
3851
    """Class to retrieve The Oatmeal comics."""
3852
    # Also on http://theoatmeal.com
3853
    name = 'oatmeal-tumblr'
3854
    long_name = 'The Oatmeal (from Tumblr)'
3855
    url = 'http://oatmeal.tumblr.com'
3856
3857
3858
class HeckIfIKnowComicsTumblr(GenericTumblrV1):
3859
    """Class to retrieve Heck If I Know Comics."""
3860
    # Also on http://tapastic.com/series/Regular
3861
    name = 'heck-tumblr'
3862
    long_name = 'Heck if I Know comics (from Tumblr)'
3863
    url = 'http://heckifiknowcomics.com'
3864
3865
3866
class MyJetPack(GenericTumblrV1):
3867
    """Class to retrieve My Jet Pack comics."""
3868
    name = 'jetpack'
3869
    long_name = 'My Jet Pack'
3870
    url = 'http://myjetpack.tumblr.com'
3871
3872
3873
class CheerUpEmoKidTumblr(GenericTumblrV1):
3874
    """Class to retrieve CheerUpEmoKid comics."""
3875
    # Also on http://www.cheerupemokid.com
3876
    # Also on http://tapastic.com/series/CUEK
3877
    name = 'cuek-tumblr'
3878
    long_name = 'Cheer Up Emo Kid (from Tumblr)'
3879
    url = 'https://enzocomics.tumblr.com'
3880
3881
3882
class ForLackOfABetterComic(GenericTumblrV1):
3883
    """Class to retrieve For Lack Of A Better Comics."""
3884
    # Also on http://forlackofabettercomic.com
3885
    name = 'lack'
3886
    long_name = 'For Lack Of A Better Comic'
3887
    url = 'http://forlackofabettercomic.tumblr.com'
3888
3889
3890
class ZenPencilsTumblr(GenericTumblrV1):
3891
    """Class to retrieve ZenPencils comics."""
3892
    # Also on http://zenpencils.com
3893
    # Also on http://www.gocomics.com/zen-pencils
3894
    name = 'zenpencils-tumblr'
3895
    long_name = 'Zen Pencils (from Tumblr)'
3896
    url = 'http://zenpencils.tumblr.com'
3897
    _categories = ('ZENPENCILS', )
3898
3899
3900
class ThreeWordPhraseTumblr(GenericTumblrV1):
3901
    """Class to retrieve Three Word Phrase comics."""
3902
    # Also on http://threewordphrase.com
3903
    name = 'threeword-tumblr'
3904
    long_name = 'Three Word Phrase (from Tumblr)'
3905
    url = 'http://threewordphrase.tumblr.com'
3906
3907
3908
class TimeTrabbleTumblr(GenericTumblrV1):
3909
    """Class to retrieve Time Trabble comics."""
3910
    # Also on http://timetrabble.com
3911
    name = 'timetrabble-tumblr'
3912
    long_name = 'Time Trabble (from Tumblr)'
3913
    url = 'http://timetrabble.tumblr.com'
3914
3915
3916
class SafelyEndangeredTumblr(GenericTumblrV1):
3917
    """Class to retrieve Safely Endangered comics."""
3918
    # Also on http://www.safelyendangered.com
3919
    name = 'endangered-tumblr'
3920
    long_name = 'Safely Endangered (from Tumblr)'
3921
    url = 'http://tumblr.safelyendangered.com'
3922
3923
3924
class MouseBearComedyTumblr(GenericTumblrV1):
3925
    """Class to retrieve Mouse Bear Comedy comics."""
3926
    # Also on http://www.mousebearcomedy.com
3927
    name = 'mousebear-tumblr'
3928
    long_name = 'Mouse Bear Comedy (from Tumblr)'
3929
    url = 'http://mousebearcomedy.tumblr.com'
3930
3931
3932
class BouletCorpTumblr(GenericTumblrV1):
3933
    """Class to retrieve BouletCorp comics."""
3934
    # Also on http://www.bouletcorp.com
3935
    name = 'boulet-tumblr'
3936
    long_name = 'Boulet Corp (from Tumblr)'
3937
    url = 'https://bouletcorp.tumblr.com'
3938
    _categories = ('BOULET', )
3939
3940
3941
class TheAwkwardYetiTumblr(GenericTumblrV1):
3942
    """Class to retrieve The Awkward Yeti comics."""
3943
    # Also on http://www.gocomics.com/the-awkward-yeti
3944
    # Also on http://theawkwardyeti.com
3945
    # Also on https://tapastic.com/series/TheAwkwardYeti
3946
    name = 'yeti-tumblr'
3947
    long_name = 'The Awkward Yeti (from Tumblr)'
3948
    url = 'http://larstheyeti.tumblr.com'
3949
    _categories = ('YETI', )
3950
3951
3952
class NellucNhoj(GenericTumblrV1):
3953
    """Class to retrieve NellucNhoj comics."""
3954
    name = 'nhoj'
3955
    long_name = 'Nelluc Nhoj'
3956
    url = 'http://nellucnhoj.com'
3957
3958
3959
class DownTheUpwardSpiralTumblr(GenericTumblrV1):
3960
    """Class to retrieve Down The Upward Spiral comics."""
3961
    # Also on http://www.downtheupwardspiral.com
3962
    # Also on https://tapastic.com/series/Down-the-Upward-Spiral
3963
    name = 'spiral-tumblr'
3964
    long_name = 'Down the Upward Spiral (from Tumblr)'
3965
    url = 'http://downtheupwardspiral.tumblr.com'
3966
3967
3968
class AsPerUsualTumblr(GenericTumblrV1):
3969
    """Class to retrieve As Per Usual comics."""
3970
    # Also on https://tapastic.com/series/AsPerUsual
3971
    name = 'usual-tumblr'
3972
    long_name = 'As Per Usual (from Tumblr)'
3973
    url = 'http://as-per-usual.tumblr.com'
3974
    categories = ('DAMILEE', )
3975
3976
3977
class HotComicsForCoolPeopleTumblr(GenericTumblrV1):
3978
    """Class to retrieve Hot Comics For Cool People."""
3979
    # Also on https://tapastic.com/series/Hot-Comics-For-Cool-People
3980
    # Also on http://hotcomics.biz (links to tumblr)
3981
    # Also on http://hcfcp.com (links to tumblr)
3982
    name = 'hotcomics-tumblr'
3983
    long_name = 'Hot Comics For Cool People (from Tumblr)'
3984
    url = 'http://hotcomicsforcoolpeople.tumblr.com'
3985
    categories = ('DAMILEE', )
3986
3987
3988
class OneOneOneOneComicTumblr(GenericTumblrV1):
3989
    """Class to retrieve 1111 Comics."""
3990
    # Also on http://www.1111comics.me
3991
    # Also on https://tapastic.com/series/1111-Comics
3992
    name = '1111-tumblr'
3993
    long_name = '1111 Comics (from Tumblr)'
3994
    url = 'http://comics1111.tumblr.com'
3995
    _categories = ('ONEONEONEONE', )
3996
3997
3998
class JhallComicsTumblr(GenericTumblrV1):
3999
    """Class to retrieve Jhall Comics."""
4000
    # Also on http://jhallcomics.com
4001
    name = 'jhall-tumblr'
4002
    long_name = 'Jhall Comics (from Tumblr)'
4003
    url = 'http://jhallcomics.tumblr.com'
4004
4005
4006
class BerkeleyMewsTumblr(GenericTumblrV1):
4007
    """Class to retrieve Berkeley Mews comics."""
4008
    # Also on http://www.gocomics.com/berkeley-mews
4009
    # Also on http://www.berkeleymews.com
4010
    name = 'berkeley-tumblr'
4011
    long_name = 'Berkeley Mews (from Tumblr)'
4012
    url = 'http://mews.tumblr.com'
4013
    _categories = ('BERKELEY', )
4014
4015
4016
class JoanCornellaTumblr(GenericTumblrV1):
4017
    """Class to retrieve Joan Cornella comics."""
4018
    # Also on http://joancornella.net
4019
    name = 'cornella-tumblr'
4020
    long_name = 'Joan Cornella (from Tumblr)'
4021
    url = 'http://cornellajoan.tumblr.com'
4022
4023
4024
class RespawnComicTumblr(GenericTumblrV1):
4025
    """Class to retrieve Respawn Comic."""
4026
    # Also on http://respawncomic.com
4027
    name = 'respawn-tumblr'
4028
    long_name = 'Respawn Comic (from Tumblr)'
4029
    url = 'https://respawncomic.tumblr.com'
4030
4031
4032
class ChrisHallbeckTumblr(GenericTumblrV1):
4033
    """Class to retrieve Chris Hallbeck comics."""
4034
    # Also on https://tapastic.com/ChrisHallbeck
4035
    # Also on http://maximumble.com
4036
    # Also on http://minimumble.com
4037
    # Also on http://thebookofbiff.com
4038
    name = 'hallbeck-tumblr'
4039
    long_name = 'Chris Hallback (from Tumblr)'
4040
    url = 'https://chrishallbeck.tumblr.com'
4041
    _categories = ('HALLBACK', )
4042
4043
4044
class ComicNuggets(GenericTumblrV1):
4045
    """Class to retrieve Comic Nuggets."""
4046
    name = 'nuggets'
4047
    long_name = 'Comic Nuggets'
4048
    url = 'http://comicnuggets.com'
4049
4050
4051
class PigeonGazetteTumblr(GenericTumblrV1):
4052
    """Class to retrieve The Pigeon Gazette comics."""
4053
    # Also on https://tapastic.com/series/The-Pigeon-Gazette
4054
    name = 'pigeon-tumblr'
4055
    long_name = 'The Pigeon Gazette (from Tumblr)'
4056
    url = 'http://thepigeongazette.tumblr.com'
4057
4058
4059
class CancerOwl(GenericTumblrV1):
4060
    """Class to retrieve Cancer Owl comics."""
4061
    # Also on http://cancerowl.com
4062
    name = 'cancerowl-tumblr'
4063
    long_name = 'Cancer Owl (from Tumblr)'
4064
    url = 'http://cancerowl.tumblr.com'
4065
4066
4067
class FowlLanguageTumblr(GenericTumblrV1):
4068
    """Class to retrieve Fowl Language comics."""
4069
    # Also on http://www.fowllanguagecomics.com
4070
    # Also on http://tapastic.com/series/Fowl-Language-Comics
4071
    # Also on http://www.gocomics.com/fowl-language
4072
    name = 'fowllanguage-tumblr'
4073
    long_name = 'Fowl Language Comics (from Tumblr)'
4074
    url = 'http://fowllanguagecomics.tumblr.com'
4075
    _categories = ('FOWLLANGUAGE', )
4076
4077
4078
class TheOdd1sOutTumblr(GenericTumblrV1):
4079
    """Class to retrieve The Odd 1s Out comics."""
4080
    # Also on http://theodd1sout.com
4081
    # Also on https://tapastic.com/series/Theodd1sout
4082
    name = 'theodd-tumblr'
4083
    long_name = 'The Odd 1s Out (from Tumblr)'
4084
    url = 'http://theodd1sout.tumblr.com'
4085
4086
4087
class TheUnderfoldTumblr(GenericTumblrV1):
4088
    """Class to retrieve The Underfold comics."""
4089
    # Also on http://theunderfold.com
4090
    name = 'underfold-tumblr'
4091
    long_name = 'The Underfold (from Tumblr)'
4092
    url = 'http://theunderfold.tumblr.com'
4093
4094
4095
class LolNeinTumblr(GenericTumblrV1):
4096
    """Class to retrieve Lol Nein comics."""
4097
    # Also on http://lolnein.com
4098
    name = 'lolnein-tumblr'
4099
    long_name = 'Lol Nein (from Tumblr)'
4100
    url = 'http://lolneincom.tumblr.com'
4101
4102
4103
class FatAwesomeComicsTumblr(GenericTumblrV1):
4104
    """Class to retrieve Fat Awesome Comics."""
4105
    # Also on http://fatawesome.com/comics
4106
    name = 'fatawesome-tumblr'
4107
    long_name = 'Fat Awesome (from Tumblr)'
4108
    url = 'http://fatawesomecomedy.tumblr.com'
4109
4110
4111
class TheWorldIsFlatTumblr(GenericTumblrV1):
4112
    """Class to retrieve The World Is Flat Comics."""
4113
    # Also on https://tapastic.com/series/The-World-is-Flat
4114
    name = 'flatworld-tumblr'
4115
    long_name = 'The World Is Flat (from Tumblr)'
4116
    url = 'http://theworldisflatcomics.com'
4117
4118
4119
class DorrisMc(GenericTumblrV1):
4120
    """Class to retrieve Dorris Mc Comics"""
4121
    # Also on http://www.gocomics.com/dorris-mccomics
4122
    name = 'dorrismc'
4123
    long_name = 'Dorris Mc'
4124
    url = 'http://dorrismccomics.com'
4125
4126
4127
class LeleozTumblr(GenericEmptyComic, GenericTumblrV1):
4128
    """Class to retrieve Leleoz comics."""
4129
    # Also on https://tapastic.com/series/Leleoz
4130
    name = 'leleoz-tumblr'
4131
    long_name = 'Leleoz (from Tumblr)'
4132
    url = 'http://leleozcomics.tumblr.com'
4133
4134
4135
class MoonBeardTumblr(GenericTumblrV1):
4136
    """Class to retrieve MoonBeard comics."""
4137
    # Also on http://moonbeard.com
4138
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
4139
    name = 'moonbeard-tumblr'
4140
    long_name = 'Moon Beard (from Tumblr)'
4141
    url = 'http://squireseses.tumblr.com'
4142
    _categories = ('MOONBEARD', )
4143
4144
4145
class AComik(GenericTumblrV1):
4146
    """Class to retrieve A Comik"""
4147
    name = 'comik'
4148
    long_name = 'A Comik'
4149
    url = 'http://acomik.com'
4150
4151
4152
class ClassicRandy(GenericTumblrV1):
4153
    """Class to retrieve Classic Randy comics."""
4154
    name = 'randy'
4155
    long_name = 'Classic Randy'
4156
    url = 'http://classicrandy.tumblr.com'
4157
4158
4159
class DagssonTumblr(GenericTumblrV1):
4160
    """Class to retrieve Dagsson comics."""
4161
    # Also on http://www.dagsson.com
4162
    name = 'dagsson-tumblr'
4163
    long_name = 'Dagsson Hugleikur (from Tumblr)'
4164
    url = 'https://hugleikurdagsson.tumblr.com'
4165
4166
4167
class LinsEditionsTumblr(GenericTumblrV1):
4168
    """Class to retrieve L.I.N.S. Editions comics."""
4169
    # Also on https://linsedition.com
4170
    # Now on http://warandpeas.tumblr.com
4171
    name = 'lins-tumblr'
4172
    long_name = 'L.I.N.S. Editions (from Tumblr)'
4173
    url = 'https://linscomics.tumblr.com'
4174
    _categories = ('LINS', )
4175
4176
4177
class WarAndPeasTumblr(GenericTumblrV1):
4178
    """Class to retrieve War And Peas comics."""
4179
    # Was on https://linscomics.tumblr.com
4180
    name = 'warandpeas-tumblr'
4181
    long_name = 'War And Peas (from Tumblr)'
4182
    url = 'http://warandpeas.tumblr.com'
4183
    _categories = ('WARANDPEAS', )
4184
4185
4186
class OrigamiHotDish(GenericTumblrV1):
4187
    """Class to retrieve Origami Hot Dish comics."""
4188
    name = 'origamihotdish'
4189
    long_name = 'Origami Hot Dish'
4190
    url = 'http://origamihotdish.com'
4191
4192
4193
class HitAndMissComicsTumblr(GenericTumblrV1):
4194
    """Class to retrieve Hit and Miss Comics."""
4195
    name = 'hitandmiss'
4196
    long_name = 'Hit and Miss Comics'
4197
    url = 'https://hitandmisscomics.tumblr.com'
4198
4199
4200
class HMBlanc(GenericTumblrV1):
4201
    """Class to retrieve HM Blanc comics."""
4202
    name = 'hmblanc'
4203
    long_name = 'HM Blanc'
4204
    url = 'http://hmblanc.tumblr.com'
4205
4206
4207
class TalesOfAbsurdityTumblr(GenericTumblrV1):
4208
    """Class to retrieve Tales Of Absurdity comics."""
4209
    # Also on http://talesofabsurdity.com
4210
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
4211
    name = 'absurdity-tumblr'
4212
    long_name = 'Tales of Absurdity (from Tumblr)'
4213
    url = 'http://talesofabsurdity.tumblr.com'
4214
    _categories = ('ABSURDITY', )
4215
4216
4217
class RobbieAndBobby(GenericTumblrV1):
4218
    """Class to retrieve Robbie And Bobby comics."""
4219
    # Also on http://robbieandbobby.com
4220
    name = 'robbie-tumblr'
4221
    long_name = 'Robbie And Bobby (from Tumblr)'
4222
    url = 'http://robbieandbobby.tumblr.com'
4223
4224
4225
class ElectricBunnyComicTumblr(GenericTumblrV1):
4226
    """Class to retrieve Electric Bunny Comics."""
4227
    # Also on http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell
4228
    name = 'bunny-tumblr'
4229
    long_name = 'Electric Bunny Comic (from Tumblr)'
4230
    url = 'http://electricbunnycomics.tumblr.com'
4231
4232
4233
class Hoomph(GenericTumblrV1):
4234
    """Class to retrieve Hoomph comics."""
4235
    name = 'hoomph'
4236
    long_name = 'Hoomph'
4237
    url = 'http://hoom.ph'
4238
4239
4240
class BFGFSTumblr(GenericTumblrV1):
4241
    """Class to retrieve BFGFS comics."""
4242
    # Also on https://tapastic.com/series/BFGFS
4243
    # Also on http://bfgfs.com
4244
    name = 'bfgfs-tumblr'
4245
    long_name = 'BFGFS (from Tumblr)'
4246
    url = 'https://bfgfs.tumblr.com'
4247
4248
4249
class DoodleForFood(GenericTumblrV1):
4250
    """Class to retrieve Doodle For Food comics."""
4251
    # Also on https://tapastic.com/series/Doodle-for-Food
4252
    name = 'doodle'
4253
    long_name = 'Doodle For Food'
4254
    url = 'http://www.doodleforfood.com'
4255
4256
4257
class CassandraCalinTumblr(GenericTumblrV1):
4258
    """Class to retrieve C. Cassandra comics."""
4259
    # Also on http://cassandracalin.com
4260
    # Also on https://tapastic.com/series/C-Cassandra-comics
4261
    name = 'cassandra-tumblr'
4262
    long_name = 'Cassandra Calin (from Tumblr)'
4263
    url = 'http://c-cassandra.tumblr.com'
4264
4265
4266
class DougWasTaken(GenericTumblrV1):
4267
    """Class to retrieve Doug Was Taken comics."""
4268
    name = 'doug'
4269
    long_name = 'Doug Was Taken'
4270
    url = 'https://dougwastaken.tumblr.com'
4271
4272
4273
class MandatoryRollerCoaster(GenericTumblrV1):
4274
    """Class to retrieve Mandatory Roller Coaster comics."""
4275
    name = 'rollercoaster'
4276
    long_name = 'Mandatory Roller Coaster'
4277
    url = 'http://mandatoryrollercoaster.com'
4278
4279
4280
class CEstPasEnRegardantSesPompes(GenericTumblrV1):
4281
    """Class to retrieve C'Est Pas En Regardant Ses Pompes (...)  comics."""
4282
    name = 'cperspqccltt'
4283
    long_name = 'C Est Pas En Regardant Ses Pompes (...)'
4284
    url = 'http://marcoandco.tumblr.com'
4285
4286
4287
class TheGrohlTroll(GenericTumblrV1):
4288
    """Class to retrieve The Grohl Troll comics."""
4289
    name = 'grohltroll'
4290
    long_name = 'The Grohl Troll'
4291
    url = 'http://thegrohltroll.com'
4292
4293
4294
class WebcomicName(GenericTumblrV1):
4295
    """Class to retrieve Webcomic Name comics."""
4296
    name = 'webcomicname'
4297
    long_name = 'Webcomic Name'
4298
    url = 'http://webcomicname.com'
4299
4300
4301
class BooksOfAdam(GenericTumblrV1):
4302
    """Class to retrieve Books of Adam comics."""
4303
    # Also on http://www.booksofadam.com
4304
    name = 'booksofadam'
4305
    long_name = 'Books of Adam'
4306
    url = 'http://booksofadam.tumblr.com'
4307
4308
4309
class HarkAVagrant(GenericTumblrV1):
4310
    """Class to retrieve Hark A Vagrant comics."""
4311
    # Also on http://www.harkavagrant.com
4312 View Code Duplication
    name = 'hark-tumblr'
4313
    long_name = 'Hark A Vagrant (from Tumblr)'
4314
    url = 'http://beatonna.tumblr.com'
4315
4316
4317
class OurSuperAdventureTumblr(GenericTumblrV1):
4318
    """Class to retrieve Our Super Adventure comics."""
4319
    # Also on https://tapastic.com/series/Our-Super-Adventure
4320
    # Also on http://www.oursuperadventure.com
4321
    # http://sarahgraley.com
4322
    name = 'superadventure-tumblr'
4323
    long_name = 'Our Super Adventure (from Tumblr)'
4324
    url = 'http://sarahssketchbook.tumblr.com'
4325
4326
4327
class JakeLikesOnions(GenericTumblrV1):
4328
    """Class to retrieve Jake Likes Onions comics."""
4329
    name = 'jake'
4330
    long_name = 'Jake Likes Onions'
4331
    url = 'http://jakelikesonions.com'
4332
4333
4334
class InYourFaceCakeTumblr(GenericTumblrV1):
4335
    """Class to retrieve In Your Face Cake comics."""
4336
    # Also on https://tapas.io/series/In-Your-Face-Cake
4337
    name = 'inyourfacecake-tumblr'
4338
    long_name = 'In Your Face Cake (from Tumblr)'
4339
    url = 'https://in-your-face-cake.tumblr.com'
4340
    _categories = ('INYOURFACECAKE', )
4341
4342
4343
class Robospunk(GenericTumblrV1):
4344
    """Class to retrieve Robospunk comics."""
4345
    name = 'robospunk'
4346
    long_name = 'Robospunk'
4347
    url = 'http://robospunk.com'
4348
4349
4350
class BananaTwinky(GenericTumblrV1):
4351
    """Class to retrieve Banana Twinky comics."""
4352
    name = 'banana'
4353
    long_name = 'Banana Twinky'
4354
    url = 'https://bananatwinky.tumblr.com'
4355
4356
4357
class YesterdaysPopcornTumblr(GenericTumblrV1):
4358
    """Class to retrieve Yesterday's Popcorn comics."""
4359
    # Also on http://www.yesterdayspopcorn.com
4360
    # Also on https://tapastic.com/series/Yesterdays-Popcorn
4361
    name = 'popcorn-tumblr'
4362
    long_name = 'Yesterday\'s Popcorn (from Tumblr)'
4363
    url = 'http://yesterdayspopcorn.tumblr.com'
4364
4365
4366
class TwistedDoodles(GenericTumblrV1):
4367
    """Class to retrieve Twisted Doodles comics."""
4368
    name = 'twisted'
4369
    long_name = 'Twisted Doodles'
4370
    url = 'http://www.twisteddoodles.com'
4371
4372
4373
class UbertoolTumblr(GenericTumblrV1):
4374
    """Class to retrieve Ubertool comics."""
4375
    # Also on http://ubertoolcomic.com
4376
    # Also on https://tapastic.com/series/ubertool
4377
    name = 'ubertool-tumblr'
4378
    long_name = 'Ubertool (from Tumblr)'
4379
    url = 'https://ubertool.tumblr.com'
4380
    _categories = ('UBERTOOL', )
4381
4382
4383
class LittleLifeLinesTumblr(GenericTumblrV1):
4384
    """Class to retrieve Little Life Lines comics."""
4385
    # Also on http://www.littlelifelines.com
4386
    name = 'life-tumblr'
4387
    long_name = 'Little Life Lines (from Tumblr)'
4388
    url = 'https://little-life-lines.tumblr.com'
4389
4390
4391
class TheyCanTalk(GenericTumblrV1):
4392
    """Class to retrieve They Can Talk comics."""
4393
    name = 'theycantalk'
4394
    long_name = 'They Can Talk'
4395
    url = 'http://theycantalk.com'
4396
4397
4398
class Will5NeverCome(GenericTumblrV1):
4399
    """Class to retrieve Will 5:00 Never Come comics."""
4400
    name = 'will5'
4401
    long_name = 'Will 5:00 Never Come ?'
4402
    url = 'http://will5nevercome.com'
4403
4404
4405
class Sephko(GenericTumblrV1):
4406
    """Class to retrieve Sephko Comics."""
4407
    # Also on http://www.sephko.com
4408
    name = 'sephko'
4409
    long_name = 'Sephko'
4410
    url = 'https://sephko.tumblr.com'
4411
4412
4413
class BlazersAtDawn(GenericTumblrV1):
4414
    """Class to retrieve Blazers At Dawn Comics."""
4415
    name = 'blazers'
4416
    long_name = 'Blazers At Dawn'
4417
    url = 'http://blazersatdawn.tumblr.com'
4418
4419
4420
class ArtByMoga(GenericEmptyComic, GenericTumblrV1):  # Deactivated because it downloads too many things
4421
    """Class to retrieve Art By Moga Comics."""
4422
    name = 'moga'
4423
    long_name = 'Art By Moga'
4424
    url = 'http://artbymoga.tumblr.com'
4425
4426
4427
class VerbalVomitTumblr(GenericTumblrV1):
4428
    """Class to retrieve Verbal Vomit comics."""
4429
    # Also on http://www.verbal-vomit.com
4430
    name = 'vomit-tumblr'
4431
    long_name = 'Verbal Vomit (from Tumblr)'
4432
    url = 'http://verbalvomits.tumblr.com'
4433
4434
4435
class LibraryComic(GenericTumblrV1):
4436
    """Class to retrieve LibraryComic."""
4437
    # Also on http://librarycomic.com
4438
    name = 'library-tumblr'
4439
    long_name = 'LibraryComic (from Tumblr)'
4440
    url = 'https://librarycomic.tumblr.com'
4441
4442
4443
class TizzyStitchBirdTumblr(GenericTumblrV1):
4444
    """Class to retrieve Tizzy Stitch Bird comics."""
4445
    # Also on http://tizzystitchbird.com
4446
    # Also on https://tapastic.com/series/TizzyStitchbird
4447
    # Also on http://www.webtoons.com/en/challenge/tizzy-stitchbird/list?title_no=50082
4448
    name = 'tizzy-tumblr'
4449
    long_name = 'Tizzy Stitch Bird (from Tumblr)'
4450
    url = 'http://tizzystitchbird.tumblr.com'
4451
4452
4453
class VictimsOfCircumsolarTumblr(GenericTumblrV1):
4454
    """Class to retrieve VictimsOfCircumsolar comics."""
4455
    # Also on http://www.victimsofcircumsolar.com
4456
    name = 'circumsolar-tumblr'
4457
    long_name = 'Victims Of Circumsolar (from Tumblr)'
4458
    url = 'https://victimsofcomics.tumblr.com'
4459
4460
4461
class RockPaperCynicTumblr(GenericTumblrV1):
4462
    """Class to retrieve RockPaperCynic comics."""
4463
    # Also on http://www.rockpapercynic.com
4464
    # Also on https://tapastic.com/series/rockpapercynic
4465
    name = 'rpc-tumblr'
4466
    long_name = 'Rock Paper Cynic (from Tumblr)'
4467
    url = 'http://rockpapercynic.tumblr.com'
4468
4469
4470
class DeadlyPanelTumblr(GenericTumblrV1):
4471
    """Class to retrieve Deadly Panel comics."""
4472
    # Also on http://www.deadlypanel.com
4473
    # Also on https://tapastic.com/series/deadlypanel
4474
    name = 'deadly-tumblr'
4475
    long_name = 'Deadly Panel (from Tumblr)'
4476
    url = 'https://deadlypanel.tumblr.com'
4477
4478
4479
class CatanaComics(GenericTumblrV1):
4480
    """Class to retrieve Catana comics."""
4481
    name = 'catana'
4482
    long_name = 'Catana'
4483
    url = 'http://www.catanacomics.com'
4484
4485
4486
class AngryAtNothingTumblr(GenericTumblrV1):
4487
    """Class to retrieve Angry at Nothing comics."""
4488
    # Also on http://www.angryatnothing.net
4489
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
4490
    name = 'angry-tumblr'
4491
    long_name = 'Angry At Nothing (from Tumblr)'
4492
    url = 'http://angryatnothing.tumblr.com'
4493
4494
4495
class ShanghaiTango(GenericTumblrV1):
4496
    """Class to retrieve Shanghai Tango comic."""
4497
    name = 'tango'
4498
    long_name = 'Shanghai Tango'
4499
    url = 'http://tango2010weibo.tumblr.com'
4500
4501
4502
class OffTheLeashDogTumblr(GenericTumblrV1):
4503
    """Class to retrieve Off The Leash Dog comics."""
4504
    # Also on http://offtheleashdogcartoons.com
4505
    # Also on http://www.rupertfawcettcartoons.com
4506
    name = 'offtheleash-tumblr'
4507
    long_name = 'Off The Leash Dog (from Tumblr)'
4508
    url = 'http://rupertfawcettsdoggyblog.tumblr.com'
4509
    _categories = ('FAWCETT', )
4510
4511
4512
class ImogenQuestTumblr(GenericTumblrV1):
4513
    """Class to retrieve Imogen Quest comics."""
4514
    # Also on http://imogenquest.net
4515
    name = 'imogen-tumblr'
4516
    long_name = 'Imogen Quest (from Tumblr)'
4517
    url = 'http://imoquest.tumblr.com'
4518
4519
4520
class Shitfest(GenericTumblrV1):
4521
    """Class to retrieve Shitfest comics."""
4522
    name = 'shitfest'
4523
    long_name = 'Shitfest'
4524
    url = 'http://shitfestcomic.com'
4525
4526
4527
class IceCreamSandwichComics(GenericTumblrV1):
4528
    """Class to retrieve Ice Cream Sandwich Comics."""
4529
    name = 'icecream'
4530
    long_name = 'Ice Cream Sandwich Comics'
4531
    url = 'http://icecreamsandwichcomics.com'
4532
4533
4534
class Dustinteractive(GenericTumblrV1):
4535
    """Class to retrieve Dustinteractive comics."""
4536
    name = 'dustinteractive'
4537
    long_name = 'Dustinteractive'
4538
    url = 'http://dustinteractive.com'
4539
4540
4541
class StickyCinemaFloor(GenericTumblrV1):
4542
    """Class to retrieve Sticky Cinema Floor comics."""
4543
    name = 'stickycinema'
4544
    long_name = 'Sticky Cinema Floor'
4545
    url = 'https://stickycinemafloor.tumblr.com'
4546
4547
4548
class IncidentalComicsTumblr(GenericTumblrV1):
4549
    """Class to retrieve Incidental Comics."""
4550
    # Also on http://www.incidentalcomics.com
4551
    name = 'incidental-tumblr'
4552
    long_name = 'Incidental Comics (from Tumblr)'
4553
    url = 'http://incidentalcomics.tumblr.com'
4554
4555
4556
class APleasantWasteOfTimeTumblr(GenericTumblrV1):
4557
    """Class to retrieve A Pleasant Waste Of Time comics."""
4558
    # Also on https://tapas.io/series/A-Pleasant-
4559
    name = 'pleasant-waste-tumblr'
4560
    long_name = 'A Pleasant Waste Of Time (from Tumblr)'
4561
    url = 'https://artjcf.tumblr.com'
4562
    _categories = ('WASTE', )
4563
4564
4565
class HorovitzComicsTumblr(GenericTumblrV1):
4566
    """Class to retrieve Horovitz new comics."""
4567
    # Also on http://www.horovitzcomics.com
4568
    name = 'horovitz-tumblr'
4569
    long_name = 'Horovitz (from Tumblr)'
4570
    url = 'https://horovitzcomics.tumblr.com'
4571
    _categories = ('HOROVITZ', )
4572
4573
4574
class DeepDarkFearsTumblr(GenericTumblrV1):
4575
    """Class to retrieve DeepvDarkvFears comics."""
4576
    name = 'deep-dark-fears-tumblr'
4577
    long_name = 'Deep Dark Fears (from Tumblr)'
4578
    url = 'http://deep-dark-fears.tumblr.com'
4579
4580
4581
class ExtraFabulousComicsTumblr(GenericTumblrV1):
4582
    """Class to retrieve Extra Fabulous Comics."""
4583
    # Also on http://extrafabulouscomics.com
4584
    name = 'efc-tumblr'
4585
    long_name = 'Extra Fabulous Comics (from Tumblr)'
4586
    url = 'https://extrafabulouscomics.tumblr.com'
4587
    _categories = ('EFC', )
4588
4589
4590
class AlexLevesque(GenericTumblrV1):
4591
    """Class to retrieve AlexLevesque comics."""
4592
    name = 'alevesque'
4593
    long_name = 'Alex Levesque'
4594
    url = 'http://alexlevesque.com'
4595
    _categories = ('FRANCAIS', )
4596
4597
4598
class JamesOfNoTradesTumblr(GenericTumblrV1):
4599
    """Class to retrieve JamesOfNoTrades comics."""
4600
    # Also on http://jamesofnotrades.com
4601
    # Also on http://www.webtoons.com/en/challenge/james-of-no-trades/list?title_no=43422
4602
    # Also on https://tapas.io/series/James-of-No-Trades
4603
    name = 'jamesofnotrades-tumblr'
4604
    long_name = 'James Of No Trades (from Tumblr)'
4605
    url = 'http://jamesfregan.tumblr.com'
4606
    _categories = ('JAMESOFNOTRADES', )
4607
4608
4609
class InfiniteGuff(GenericTumblrV1):
4610
    """Class to retrieve Infinite Guff comics."""
4611
    name = 'infiniteguff'
4612
    long_name = 'Infinite Guff'
4613
    url = 'http://infiniteguff.com'
4614
4615
4616
class HorovitzComics(GenericEmptyComic, GenericListableComic):
4617
    """Generic class to handle the logic common to the different comics from Horovitz."""
4618
    # Also on https://horovitzcomics.tumblr.com
4619
    url = 'http://www.horovitzcomics.com'
4620
    _categories = ('HOROVITZ', )
4621
    img_re = re.compile('.*comics/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
4622
    link_re = NotImplemented
4623
    get_url_from_archive_element = join_cls_url_to_href
4624
4625
    @classmethod
4626
    def get_comic_info(cls, soup, link):
4627
        """Get information about a particular comics."""
4628
        href = link['href']
4629
        num = int(cls.link_re.match(href).groups()[0])
4630
        title = link.string
4631
        imgs = soup.find_all('img', id='comic')
4632
        assert len(imgs) == 1
4633
        year, month, day = [int(s)
4634
                            for s in cls.img_re.match(imgs[0]['src']).groups()]
4635
        return {
4636
            'title': title,
4637
            'day': day,
4638
            'month': month,
4639
            'year': year,
4640
            'img': [i['src'] for i in imgs],
4641
            'num': num,
4642
        }
4643
4644
    @classmethod
4645
    def get_archive_elements(cls):
4646
        archive_url = 'http://www.horovitzcomics.com/comics/archive/'
4647
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.link_re))
4648
4649
4650
class HorovitzNew(HorovitzComics):
4651
    """Class to retrieve Horovitz new comics."""
4652
    name = 'horovitznew'
4653
    long_name = 'Horovitz New'
4654
    link_re = re.compile('^/comics/new/([0-9]+)$')
4655
4656
4657
class HorovitzClassic(HorovitzComics):
4658
    """Class to retrieve Horovitz classic comics."""
4659
    name = 'horovitzclassic'
4660
    long_name = 'Horovitz Classic'
4661
    link_re = re.compile('^/comics/classic/([0-9]+)$')
4662
4663
4664
class GenericGoComic(GenericNavigableComic):
4665
    """Generic class to handle the logic common to comics from gocomics.com."""
4666
    _categories = ('GOCOMIC', )
4667
4668
    @classmethod
4669
    def get_first_comic_link(cls):
4670
        """Get link to first comics."""
4671
        return get_soup_at_url(cls.url).find('a', class_='fa btn btn-outline-default btn-circle fa-backward sm ')
4672
4673
    @classmethod
4674
    def get_navi_link(cls, last_soup, next_):
4675
        """Get link to next or previous comic."""
4676
        PREV = 'fa btn btn-outline-default btn-circle fa-caret-left sm '
4677
        NEXT = 'fa btn btn-outline-default btn-circle fa-caret-right hidden-sm-up sm '
4678
        return last_soup.find('a', class_=NEXT if next_ else PREV)
4679
4680
    @classmethod
4681
    def get_url_from_link(cls, link):
4682
        gocomics = 'http://www.gocomics.com'
4683
        return urljoin_wrapper(gocomics, link['href'])
4684
4685
    @classmethod
4686
    def get_comic_info(cls, soup, link):
4687
        """Get information about a particular comics."""
4688
        date_str = soup.find('meta', property='article:published_time')['content']
4689
        day = string_to_date(date_str, "%Y-%m-%d")
4690
        imgs = soup.find('picture', class_='img-fluid item-comic-image').find_all('img')
4691
        author = soup.find('meta', property='article:author')['content']
4692
        tags = soup.find('meta', property='article:tag')['content']
4693
        return {
4694
            'day': day.day,
4695
            'month': day.month,
4696
            'year': day.year,
4697
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
4698
            'author': author,
4699
            'tags': tags,
4700
        }
4701
4702
4703
class PearlsBeforeSwine(GenericGoComic):
4704
    """Class to retrieve Pearls Before Swine comics."""
4705
    name = 'pearls'
4706
    long_name = 'Pearls Before Swine'
4707
    url = 'http://www.gocomics.com/pearlsbeforeswine'
4708
4709
4710
class Peanuts(GenericGoComic):
4711
    """Class to retrieve Peanuts comics."""
4712
    name = 'peanuts'
4713
    long_name = 'Peanuts'
4714
    url = 'http://www.gocomics.com/peanuts'
4715
4716
4717
class MattWuerker(GenericGoComic):
4718
    """Class to retrieve Matt Wuerker comics."""
4719
    name = 'wuerker'
4720
    long_name = 'Matt Wuerker'
4721
    url = 'http://www.gocomics.com/mattwuerker'
4722
4723
4724
class TomToles(GenericGoComic):
4725
    """Class to retrieve Tom Toles comics."""
4726
    name = 'toles'
4727
    long_name = 'Tom Toles'
4728
    url = 'http://www.gocomics.com/tomtoles'
4729
4730
4731
class BreakOfDay(GenericGoComic):
4732
    """Class to retrieve Break Of Day comics."""
4733
    name = 'breakofday'
4734
    long_name = 'Break Of Day'
4735
    url = 'http://www.gocomics.com/break-of-day'
4736
4737
4738
class Brevity(GenericGoComic):
4739
    """Class to retrieve Brevity comics."""
4740
    name = 'brevity'
4741
    long_name = 'Brevity'
4742
    url = 'http://www.gocomics.com/brevity'
4743
4744
4745
class MichaelRamirez(GenericGoComic):
4746
    """Class to retrieve Michael Ramirez comics."""
4747
    name = 'ramirez'
4748
    long_name = 'Michael Ramirez'
4749
    url = 'http://www.gocomics.com/michaelramirez'
4750
4751
4752
class MikeLuckovich(GenericGoComic):
4753
    """Class to retrieve Mike Luckovich comics."""
4754
    name = 'luckovich'
4755
    long_name = 'Mike Luckovich'
4756
    url = 'http://www.gocomics.com/mikeluckovich'
4757
4758
4759
class JimBenton(GenericGoComic):
4760
    """Class to retrieve Jim Benton comics."""
4761
    # Also on http://jimbenton.tumblr.com
4762
    name = 'benton'
4763
    long_name = 'Jim Benton'
4764
    url = 'http://www.gocomics.com/jim-benton-cartoons'
4765
4766
4767
class TheArgyleSweater(GenericGoComic):
4768
    """Class to retrieve the Argyle Sweater comics."""
4769
    name = 'argyle'
4770
    long_name = 'Argyle Sweater'
4771
    url = 'http://www.gocomics.com/theargylesweater'
4772
4773
4774
class SunnyStreet(GenericGoComic):
4775
    """Class to retrieve Sunny Street comics."""
4776
    # Also on http://www.sunnystreetcomics.com
4777
    name = 'sunny'
4778
    long_name = 'Sunny Street'
4779
    url = 'http://www.gocomics.com/sunny-street'
4780
4781
4782
class OffTheMark(GenericGoComic):
4783
    """Class to retrieve Off The Mark comics."""
4784
    # Also on https://www.offthemark.com
4785
    name = 'offthemark'
4786
    long_name = 'Off The Mark'
4787
    url = 'http://www.gocomics.com/offthemark'
4788
4789
4790
class WuMo(GenericGoComic):
4791
    """Class to retrieve WuMo comics."""
4792
    # Also on http://wumo.com
4793
    name = 'wumo'
4794
    long_name = 'WuMo'
4795
    url = 'http://www.gocomics.com/wumo'
4796
4797
4798
class LunarBaboon(GenericGoComic):
4799
    """Class to retrieve Lunar Baboon comics."""
4800
    # Also on http://www.lunarbaboon.com
4801
    # Also on https://tapastic.com/series/Lunarbaboon
4802
    name = 'lunarbaboon'
4803
    long_name = 'Lunar Baboon'
4804
    url = 'http://www.gocomics.com/lunarbaboon'
4805
4806
4807
class SandersenGocomic(GenericGoComic):
4808
    """Class to retrieve Sarah Andersen comics."""
4809
    # Also on http://sarahcandersen.com
4810
    # Also on http://tapastic.com/series/Doodle-Time
4811
    name = 'sandersen-goc'
4812
    long_name = 'Sarah Andersen (from GoComics)'
4813
    url = 'http://www.gocomics.com/sarahs-scribbles'
4814
4815
4816
class SaturdayMorningBreakfastCerealGoComic(GenericGoComic):
4817
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
4818
    # Also on http://smbc-comics.tumblr.com
4819
    # Also on http://www.smbc-comics.com
4820
    name = 'smbc-goc'
4821
    long_name = 'Saturday Morning Breakfast Cereal (from GoComics)'
4822
    url = 'http://www.gocomics.com/saturday-morning-breakfast-cereal'
4823
    _categories = ('SMBC', )
4824
4825
4826
class CalvinAndHobbesGoComic(GenericGoComic):
4827
    """Class to retrieve Calvin and Hobbes comics."""
4828
    # From gocomics, not http://marcel-oehler.marcellosendos.ch/comics/ch/
4829
    name = 'calvin-goc'
4830
    long_name = 'Calvin and Hobbes (from GoComics)'
4831
    url = 'http://www.gocomics.com/calvinandhobbes'
4832
4833
4834
class RallGoComic(GenericGoComic):
4835
    """Class to retrieve Ted Rall comics."""
4836
    # Also on http://rall.com/comic
4837
    name = 'rall-goc'
4838
    long_name = "Ted Rall (from GoComics)"
4839
    url = "http://www.gocomics.com/ted-rall"
4840
    _categories = ('RALL', )
4841
4842
4843
class TheAwkwardYetiGoComic(GenericGoComic):
4844
    """Class to retrieve The Awkward Yeti comics."""
4845
    # Also on http://larstheyeti.tumblr.com
4846
    # Also on http://theawkwardyeti.com
4847
    # Also on https://tapastic.com/series/TheAwkwardYeti
4848
    name = 'yeti-goc'
4849
    long_name = 'The Awkward Yeti (from GoComics)'
4850
    url = 'http://www.gocomics.com/the-awkward-yeti'
4851
    _categories = ('YETI', )
4852
4853
4854
class BerkeleyMewsGoComics(GenericGoComic):
4855
    """Class to retrieve Berkeley Mews comics."""
4856
    # Also on http://mews.tumblr.com
4857
    # Also on http://www.berkeleymews.com
4858
    name = 'berkeley-goc'
4859
    long_name = 'Berkeley Mews (from GoComics)'
4860
    url = 'http://www.gocomics.com/berkeley-mews'
4861
    _categories = ('BERKELEY', )
4862
4863
4864
class SheldonGoComics(GenericGoComic):
4865
    """Class to retrieve Sheldon comics."""
4866
    # Also on http://www.sheldoncomics.com
4867
    name = 'sheldon-goc'
4868
    long_name = 'Sheldon Comics (from GoComics)'
4869
    url = 'http://www.gocomics.com/sheldon'
4870
4871
4872
class FowlLanguageGoComics(GenericGoComic):
4873
    """Class to retrieve Fowl Language comics."""
4874
    # Also on http://www.fowllanguagecomics.com
4875
    # Also on http://tapastic.com/series/Fowl-Language-Comics
4876
    # Also on http://fowllanguagecomics.tumblr.com
4877
    name = 'fowllanguage-goc'
4878
    long_name = 'Fowl Language Comics (from GoComics)'
4879
    url = 'http://www.gocomics.com/fowl-language'
4880
    _categories = ('FOWLLANGUAGE', )
4881
4882
4883
class NickAnderson(GenericGoComic):
4884
    """Class to retrieve Nick Anderson comics."""
4885
    name = 'nickanderson'
4886
    long_name = 'Nick Anderson'
4887
    url = 'http://www.gocomics.com/nickanderson'
4888
4889
4890
class GarfieldGoComics(GenericGoComic):
4891
    """Class to retrieve Garfield comics."""
4892
    # Also on http://garfield.com
4893
    name = 'garfield-goc'
4894
    long_name = 'Garfield (from GoComics)'
4895
    url = 'http://www.gocomics.com/garfield'
4896
    _categories = ('GARFIELD', )
4897
4898
4899
class DorrisMcGoComics(GenericGoComic):
4900
    """Class to retrieve Dorris Mc Comics"""
4901
    # Also on http://dorrismccomics.com
4902
    name = 'dorrismc-goc'
4903
    long_name = 'Dorris Mc (from GoComics)'
4904
    url = 'http://www.gocomics.com/dorris-mccomics'
4905
4906
4907
class FoxTrot(GenericGoComic):
4908
    """Class to retrieve FoxTrot comics."""
4909
    name = 'foxtrot'
4910
    long_name = 'FoxTrot'
4911
    url = 'http://www.gocomics.com/foxtrot'
4912
4913
4914
class FoxTrotClassics(GenericGoComic):
4915
    """Class to retrieve FoxTrot Classics comics."""
4916
    name = 'foxtrot-classics'
4917
    long_name = 'FoxTrot Classics'
4918
    url = 'http://www.gocomics.com/foxtrotclassics'
4919
4920
4921
class MisterAndMeGoComics(GenericEmptyComic, GenericGoComic):  # Removed ?
4922
    """Class to retrieve Mister & Me Comics."""
4923
    # Also on http://www.mister-and-me.com
4924
    # Also on https://tapastic.com/series/Mister-and-Me
4925
    name = 'mister-goc'
4926
    long_name = 'Mister & Me (from GoComics)'
4927
    url = 'http://www.gocomics.com/mister-and-me'
4928
4929
4930
class NonSequitur(GenericGoComic):
4931
    """Class to retrieve Non Sequitur (Wiley Miller) comics."""
4932
    name = 'nonsequitur'
4933
    long_name = 'Non Sequitur'
4934
    url = 'http://www.gocomics.com/nonsequitur'
4935
4936
4937
class GenericTapasticComic(GenericListableComic):
4938
    """Generic class to handle the logic common to comics from tapastic.com."""
4939
    _categories = ('TAPASTIC', )
4940
4941
    @classmethod
4942
    def get_comic_info(cls, soup, archive_elt):
4943
        """Get information about a particular comics."""
4944
        timestamp = int(archive_elt['publishDate']) / 1000.0
4945
        day = datetime.datetime.fromtimestamp(timestamp).date()
4946
        imgs = soup.find_all('img', class_='art-image')
4947
        if not imgs:
4948
            print("Comic %s is being uploaded, retry later" % cls.get_url_from_archive_element(archive_elt))
4949
            return None
4950
        assert len(imgs) > 0
4951
        return {
4952
            'day': day.day,
4953
            'year': day.year,
4954
            'month': day.month,
4955
            'img': [i['src'] for i in imgs],
4956
            'title': archive_elt['title'],
4957
        }
4958
4959
    @classmethod
4960
    def get_url_from_archive_element(cls, archive_elt):
4961
        return 'http://tapastic.com/episode/' + str(archive_elt['id'])
4962
4963
    @classmethod
4964
    def get_archive_elements(cls):
4965
        pref, suff = 'episodeList : ', ','
4966
        # Information is stored in the javascript part
4967
        # I don't know the clean way to get it so this is the ugly way.
4968
        string = [s[len(pref):-len(suff)] for s in (s.decode('utf-8').strip() for s in urlopen_wrapper(cls.url).readlines()) if s.startswith(pref) and s.endswith(suff)][0]
4969
        return json.loads(string)
4970
4971
4972
class VegetablesForDessert(GenericTapasticComic):
4973
    """Class to retrieve Vegetables For Dessert comics."""
4974
    # Also on http://vegetablesfordessert.tumblr.com
4975
    name = 'vegetables'
4976
    long_name = 'Vegetables For Dessert'
4977
    url = 'http://tapastic.com/series/vegetablesfordessert'
4978
4979
4980
class FowlLanguageTapa(GenericTapasticComic):
4981
    """Class to retrieve Fowl Language comics."""
4982
    # Also on http://www.fowllanguagecomics.com
4983
    # Also on http://fowllanguagecomics.tumblr.com
4984
    # Also on http://www.gocomics.com/fowl-language
4985
    name = 'fowllanguage-tapa'
4986
    long_name = 'Fowl Language Comics (from Tapastic)'
4987
    url = 'http://tapastic.com/series/Fowl-Language-Comics'
4988
    _categories = ('FOWLLANGUAGE', )
4989
4990
4991
class OscillatingProfundities(GenericTapasticComic):
4992
    """Class to retrieve Oscillating Profundities comics."""
4993
    name = 'oscillating'
4994
    long_name = 'Oscillating Profundities'
4995
    url = 'http://tapastic.com/series/oscillatingprofundities'
4996
4997
4998
class ZnoflatsComics(GenericTapasticComic):
4999
    """Class to retrieve Znoflats comics."""
5000
    name = 'znoflats'
5001
    long_name = 'Znoflats Comics'
5002
    url = 'http://tapastic.com/series/Znoflats-Comics'
5003
5004
5005
class SandersenTapastic(GenericTapasticComic):
5006
    """Class to retrieve Sarah Andersen comics."""
5007
    # Also on http://sarahcandersen.com
5008
    # Also on http://www.gocomics.com/sarahs-scribbles
5009
    name = 'sandersen-tapa'
5010
    long_name = 'Sarah Andersen (from Tapastic)'
5011
    url = 'http://tapastic.com/series/Doodle-Time'
5012
5013
5014
class TubeyToonsTapastic(GenericTapasticComic):
5015
    """Class to retrieve TubeyToons comics."""
5016
    # Also on http://tubeytoons.com
5017
    # Also on https://tubeytoons.tumblr.com
5018
    name = 'tubeytoons-tapa'
5019
    long_name = 'Tubey Toons (from Tapastic)'
5020
    url = 'http://tapastic.com/series/Tubey-Toons'
5021
    _categories = ('TUNEYTOONS', )
5022
5023
5024
class AnythingComicTapastic(GenericTapasticComic):
5025
    """Class to retrieve Anything Comics."""
5026
    # Also on http://www.anythingcomic.com
5027
    name = 'anythingcomic-tapa'
5028
    long_name = 'Anything Comic (from Tapastic)'
5029
    url = 'http://tapastic.com/series/anything'
5030
5031
5032
class UnearthedComicsTapastic(GenericTapasticComic):
5033
    """Class to retrieve Unearthed comics."""
5034
    # Also on http://unearthedcomics.com
5035
    # Also on https://unearthedcomics.tumblr.com
5036
    name = 'unearthed-tapa'
5037
    long_name = 'Unearthed Comics (from Tapastic)'
5038
    url = 'http://tapastic.com/series/UnearthedComics'
5039
    _categories = ('UNEARTHED', )
5040
5041
5042
class EverythingsStupidTapastic(GenericTapasticComic):
5043
    """Class to retrieve Everything's stupid Comics."""
5044
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
5045
    # Also on http://everythingsstupid.net
5046
    name = 'stupid-tapa'
5047
    long_name = "Everything's Stupid (from Tapastic)"
5048
    url = 'http://tapastic.com/series/EverythingsStupid'
5049
5050
5051
class JustSayEhTapastic(GenericTapasticComic):
5052
    """Class to retrieve Just Say Eh comics."""
5053
    # Also on http://www.justsayeh.com
5054
    name = 'justsayeh-tapa'
5055
    long_name = 'Just Say Eh (from Tapastic)'
5056
    url = 'http://tapastic.com/series/Just-Say-Eh'
5057
5058
5059
class ThorsThundershackTapastic(GenericTapasticComic):
5060
    """Class to retrieve Thor's Thundershack comics."""
5061
    # Also on http://www.thorsthundershack.com
5062
    name = 'thor-tapa'
5063
    long_name = 'Thor\'s Thundershack (from Tapastic)'
5064
    url = 'http://tapastic.com/series/Thors-Thundershac'
5065
    _categories = ('THOR', )
5066
5067
5068
class OwlTurdTapastic(GenericTapasticComic):
5069
    """Class to retrieve Owl Turd comics."""
5070
    # Also on http://owlturd.com
5071
    name = 'owlturd-tapa'
5072
    long_name = 'Owl Turd (from Tapastic)'
5073
    url = 'http://tapastic.com/series/Owl-Turd-Comix'
5074
    _categories = ('OWLTURD', )
5075
5076
5077
class GoneIntoRaptureTapastic(GenericTapasticComic):
5078
    """Class to retrieve Gone Into Rapture comics."""
5079
    # Also on http://goneintorapture.tumblr.com
5080
    # Also on http://goneintorapture.com
5081
    name = 'rapture-tapa'
5082
    long_name = 'Gone Into Rapture (from Tapastic)'
5083
    url = 'http://tapastic.com/series/Goneintorapture'
5084
5085
5086
class HeckIfIKnowComicsTapa(GenericTapasticComic):
5087
    """Class to retrieve Heck If I Know Comics."""
5088
    # Also on http://heckifiknowcomics.com
5089
    name = 'heck-tapa'
5090
    long_name = 'Heck if I Know comics (from Tapastic)'
5091
    url = 'http://tapastic.com/series/Regular'
5092
5093
5094
class CheerUpEmoKidTapa(GenericTapasticComic):
5095
    """Class to retrieve CheerUpEmoKid comics."""
5096
    # Also on http://www.cheerupemokid.com
5097
    # Also on https://enzocomics.tumblr.com
5098
    name = 'cuek-tapa'
5099
    long_name = 'Cheer Up Emo Kid (from Tapastic)'
5100
    url = 'http://tapastic.com/series/CUEK'
5101
5102
5103
class BigFootJusticeTapa(GenericTapasticComic):
5104
    """Class to retrieve Big Foot Justice comics."""
5105
    # Also on http://bigfootjustice.com
5106
    name = 'bigfoot-tapa'
5107
    long_name = 'Big Foot Justice (from Tapastic)'
5108
    url = 'http://tapastic.com/series/bigfoot-justice'
5109
5110
5111
class UpAndOutTapa(GenericTapasticComic):
5112
    """Class to retrieve Up & Out comics."""
5113
    # Also on http://upandoutcomic.tumblr.com
5114
    name = 'upandout-tapa'
5115
    long_name = 'Up And Out (from Tapastic)'
5116
    url = 'http://tapastic.com/series/UP-and-OUT'
5117
5118
5119
class ToonHoleTapa(GenericTapasticComic):
5120
    """Class to retrieve Toon Holes comics."""
5121
    # Also on http://www.toonhole.com
5122
    name = 'toonhole-tapa'
5123
    long_name = 'Toon Hole (from Tapastic)'
5124
    url = 'http://tapastic.com/series/TOONHOLE'
5125
5126
5127
class AngryAtNothingTapa(GenericTapasticComic):
5128
    """Class to retrieve Angry at Nothing comics."""
5129
    # Also on http://www.angryatnothing.net
5130
    # Also on http://angryatnothing.tumblr.com
5131
    name = 'angry-tapa'
5132
    long_name = 'Angry At Nothing (from Tapastic)'
5133
    url = 'http://tapastic.com/series/Comics-yeah-definitely-comics-'
5134
5135
5136
class LeleozTapa(GenericTapasticComic):
5137
    """Class to retrieve Leleoz comics."""
5138
    # Also on http://leleozcomics.tumblr.com
5139
    name = 'leleoz-tapa'
5140
    long_name = 'Leleoz (from Tapastic)'
5141
    url = 'https://tapastic.com/series/Leleoz'
5142
5143
5144
class TheAwkwardYetiTapa(GenericTapasticComic):
5145
    """Class to retrieve The Awkward Yeti comics."""
5146
    # Also on http://www.gocomics.com/the-awkward-yeti
5147
    # Also on http://theawkwardyeti.com
5148
    # Also on http://larstheyeti.tumblr.com
5149
    name = 'yeti-tapa'
5150
    long_name = 'The Awkward Yeti (from Tapastic)'
5151
    url = 'https://tapastic.com/series/TheAwkwardYeti'
5152
    _categories = ('YETI', )
5153
5154
5155
class AsPerUsualTapa(GenericTapasticComic):
5156
    """Class to retrieve As Per Usual comics."""
5157
    # Also on http://as-per-usual.tumblr.com
5158
    name = 'usual-tapa'
5159
    long_name = 'As Per Usual (from Tapastic)'
5160
    url = 'https://tapastic.com/series/AsPerUsual'
5161
    categories = ('DAMILEE', )
5162
5163
5164
class HotComicsForCoolPeopleTapa(GenericTapasticComic):
5165
    """Class to retrieve Hot Comics For Cool People."""
5166
    # Also on http://hotcomicsforcoolpeople.tumblr.com
5167
    # Also on http://hotcomics.biz (links to tumblr)
5168
    # Also on http://hcfcp.com (links to tumblr)
5169
    name = 'hotcomics-tapa'
5170
    long_name = 'Hot Comics For Cool People (from Tapastic)'
5171
    url = 'https://tapastic.com/series/Hot-Comics-For-Cool-People'
5172
    categories = ('DAMILEE', )
5173
5174
5175
class OneOneOneOneComicTapa(GenericTapasticComic):
5176
    """Class to retrieve 1111 Comics."""
5177
    # Also on http://www.1111comics.me
5178
    # Also on http://comics1111.tumblr.com
5179
    name = '1111-tapa'
5180
    long_name = '1111 Comics (from Tapastic)'
5181
    url = 'https://tapastic.com/series/1111-Comics'
5182
    _categories = ('ONEONEONEONE', )
5183
5184
5185
class TumbleDryTapa(GenericTapasticComic):
5186
    """Class to retrieve Tumble Dry comics."""
5187
    # Also on http://tumbledrycomics.com
5188
    name = 'tumbledry-tapa'
5189
    long_name = 'Tumblr Dry (from Tapastic)'
5190
    url = 'https://tapastic.com/series/TumbleDryComics'
5191
5192
5193
class DeadlyPanelTapa(GenericTapasticComic):
5194
    """Class to retrieve Deadly Panel comics."""
5195
    # Also on http://www.deadlypanel.com
5196
    # Also on https://deadlypanel.tumblr.com
5197
    name = 'deadly-tapa'
5198
    long_name = 'Deadly Panel (from Tapastic)'
5199
    url = 'https://tapastic.com/series/deadlypanel'
5200
5201
5202
class ChrisHallbeckMaxiTapa(GenericTapasticComic):
5203
    """Class to retrieve Chris Hallbeck comics."""
5204
    # Also on https://chrishallbeck.tumblr.com
5205
    # Also on http://maximumble.com
5206
    name = 'hallbeckmaxi-tapa'
5207
    long_name = 'Chris Hallback - Maximumble (from Tapastic)'
5208
    url = 'https://tapastic.com/series/Maximumble'
5209
    _categories = ('HALLBACK', )
5210
5211
5212
class ChrisHallbeckMiniTapa(GenericEmptyComic, GenericTapasticComic):
5213
    """Class to retrieve Chris Hallbeck comics."""
5214
    # Also on https://chrishallbeck.tumblr.com
5215
    # Also on http://minimumble.com
5216
    name = 'hallbeckmini-tapa'
5217
    long_name = 'Chris Hallback - Minimumble (from Tapastic)'
5218
    url = 'https://tapastic.com/series/Minimumble'
5219
    _categories = ('HALLBACK', )
5220
5221
5222
class ChrisHallbeckBiffTapa(GenericEmptyComic, GenericTapasticComic):
5223
    """Class to retrieve Chris Hallbeck comics."""
5224
    # Also on https://chrishallbeck.tumblr.com
5225
    # Also on http://thebookofbiff.com
5226
    name = 'hallbeckbiff-tapa'
5227
    long_name = 'Chris Hallback - The Book of Biff (from Tapastic)'
5228
    url = 'https://tapastic.com/series/Biff'
5229
    _categories = ('HALLBACK', )
5230
5231
5232
class RandoWisTapa(GenericTapasticComic):
5233
    """Class to retrieve RandoWis comics."""
5234
    # Also on https://randowis.com
5235
    name = 'randowis-tapa'
5236
    long_name = 'RandoWis (from Tapastic)'
5237
    url = 'https://tapastic.com/series/RandoWis'
5238
5239
5240
class PigeonGazetteTapa(GenericTapasticComic):
5241
    """Class to retrieve The Pigeon Gazette comics."""
5242
    # Also on http://thepigeongazette.tumblr.com
5243
    name = 'pigeon-tapa'
5244
    long_name = 'The Pigeon Gazette (from Tapastic)'
5245
    url = 'https://tapastic.com/series/The-Pigeon-Gazette'
5246
5247
5248
class TheOdd1sOutTapa(GenericTapasticComic):
5249
    """Class to retrieve The Odd 1s Out comics."""
5250
    # Also on http://theodd1sout.com
5251
    # Also on http://theodd1sout.tumblr.com
5252
    name = 'theodd-tapa'
5253
    long_name = 'The Odd 1s Out (from Tapastic)'
5254
    url = 'https://tapastic.com/series/Theodd1sout'
5255
5256
5257
class TheWorldIsFlatTapa(GenericTapasticComic):
5258
    """Class to retrieve The World Is Flat Comics."""
5259
    # Also on http://theworldisflatcomics.tumblr.com
5260
    name = 'flatworld-tapa'
5261
    long_name = 'The World Is Flat (from Tapastic)'
5262
    url = 'https://tapastic.com/series/The-World-is-Flat'
5263
5264
5265
class MisterAndMeTapa(GenericTapasticComic):
5266
    """Class to retrieve Mister & Me Comics."""
5267
    # Also on http://www.mister-and-me.com
5268
    # Also on http://www.gocomics.com/mister-and-me
5269
    name = 'mister-tapa'
5270
    long_name = 'Mister & Me (from Tapastic)'
5271
    url = 'https://tapastic.com/series/Mister-and-Me'
5272
5273
5274
class TalesOfAbsurdityTapa(GenericEmptyComic, GenericTapasticComic):
5275
    """Class to retrieve Tales Of Absurdity comics."""
5276
    # Also on http://talesofabsurdity.com
5277
    # Also on http://talesofabsurdity.tumblr.com
5278
    name = 'absurdity-tapa'
5279
    long_name = 'Tales of Absurdity (from Tapastic)'
5280
    url = 'http://tapastic.com/series/Tales-Of-Absurdity'
5281
    _categories = ('ABSURDITY', )
5282
5283
5284
class BFGFSTapa(GenericTapasticComic):
5285
    """Class to retrieve BFGFS comics."""
5286
    # Also on http://bfgfs.com
5287
    # Also on https://bfgfs.tumblr.com
5288
    name = 'bfgfs-tapa'
5289
    long_name = 'BFGFS (from Tapastic)'
5290
    url = 'https://tapastic.com/series/BFGFS'
5291
5292
5293
class DoodleForFoodTapa(GenericTapasticComic):
5294
    """Class to retrieve Doodle For Food comics."""
5295
    # Also on http://www.doodleforfood.com
5296
    name = 'doodle-tapa'
5297
    long_name = 'Doodle For Food (from Tapastic)'
5298
    url = 'https://tapastic.com/series/Doodle-for-Food'
5299
5300
5301
class MrLovensteinTapa(GenericTapasticComic):
5302
    """Class to retrieve Mr Lovenstein comics."""
5303
    # Also on  https://tapastic.com/series/MrLovenstein
5304
    name = 'mrlovenstein-tapa'
5305
    long_name = 'Mr. Lovenstein (from Tapastic)'
5306
    url = 'https://tapastic.com/series/MrLovenstein'
5307
5308
5309
class CassandraCalinTapa(GenericTapasticComic):
5310
    """Class to retrieve C. Cassandra comics."""
5311
    # Also on http://cassandracalin.com
5312
    # Also on http://c-cassandra.tumblr.com
5313
    name = 'cassandra-tapa'
5314
    long_name = 'Cassandra Calin (from Tapastic)'
5315
    url = 'https://tapastic.com/series/C-Cassandra-comics'
5316
5317
5318
class WafflesAndPancakes(GenericTapasticComic):
5319
    """Class to retrieve Waffles And Pancakes comics."""
5320
    # Also on http://wandpcomic.com
5321
    name = 'waffles'
5322
    long_name = 'Waffles And Pancakes'
5323
    url = 'https://tapastic.com/series/Waffles-and-Pancakes'
5324
5325
5326
class YesterdaysPopcornTapastic(GenericTapasticComic):
5327
    """Class to retrieve Yesterday's Popcorn comics."""
5328
    # Also on http://www.yesterdayspopcorn.com
5329
    # Also on http://yesterdayspopcorn.tumblr.com
5330
    name = 'popcorn-tapa'
5331
    long_name = 'Yesterday\'s Popcorn (from Tapastic)'
5332
    url = 'https://tapastic.com/series/Yesterdays-Popcorn'
5333
5334
5335
class OurSuperAdventureTapastic(GenericEmptyComic, GenericTapasticComic):
5336
    """Class to retrieve Our Super Adventure comics."""
5337
    # Also on http://www.oursuperadventure.com
5338
    # http://sarahssketchbook.tumblr.com
5339
    # http://sarahgraley.com
5340
    name = 'superadventure-tapastic'
5341
    long_name = 'Our Super Adventure (from Tapastic)'
5342
    url = 'https://tapastic.com/series/Our-Super-Adventure'
5343
5344
5345
class NamelessPCs(GenericTapasticComic):
5346
    """Class to retrieve Nameless PCs comics."""
5347
    # Also on http://namelesspcs.com
5348
    name = 'namelesspcs-tapa'
5349
    long_name = 'NamelessPCs (from Tapastic)'
5350
    url = 'https://tapastic.com/series/NamelessPC'
5351
5352
5353
class DownTheUpwardSpiralTapa(GenericTapasticComic):
5354
    """Class to retrieve Down The Upward Spiral comics."""
5355
    # Also on http://www.downtheupwardspiral.com
5356
    # Also on http://downtheupwardspiral.tumblr.com
5357
    name = 'spiral-tapa'
5358
    long_name = 'Down the Upward Spiral (from Tapastic)'
5359
    url = 'https://tapastic.com/series/Down-the-Upward-Spiral'
5360
5361
5362
class UbertoolTapa(GenericTapasticComic):
5363
    """Class to retrieve Ubertool comics."""
5364
    # Also on http://ubertoolcomic.com
5365
    # Also on https://ubertool.tumblr.com
5366
    name = 'ubertool-tapa'
5367
    long_name = 'Ubertool (from Tapastic)'
5368
    url = 'https://tapastic.com/series/ubertool'
5369
    _categories = ('UBERTOOL', )
5370
5371
5372
class BarteNerdsTapa(GenericEmptyComic, GenericTapasticComic):
5373
    """Class to retrieve BarteNerds comics."""
5374
    # Also on http://www.bartenerds.com
5375
    name = 'bartenerds-tapa'
5376
    long_name = 'BarteNerds (from Tapastic)'
5377
    url = 'https://tapastic.com/series/BarteNERDS'
5378
5379
5380
class SmallBlueYonderTapa(GenericTapasticComic):
5381
    """Class to retrieve Small Blue Yonder comics."""
5382
    # Also on http://www.smallblueyonder.com
5383
    name = 'smallblue-tapa'
5384
    long_name = 'Small Blue Yonder (from Tapastic)'
5385
    url = 'https://tapastic.com/series/Small-Blue-Yonder'
5386
5387
5388
class TizzyStitchBirdTapa(GenericTapasticComic):
5389
    """Class to retrieve Tizzy Stitch Bird comics."""
5390
    # Also on http://tizzystitchbird.com
5391
    # Also on http://tizzystitchbird.tumblr.com
5392
    # Also on http://www.webtoons.com/en/challenge/tizzy-stitchbird/list?title_no=50082
5393
    name = 'tizzy-tapa'
5394
    long_name = 'Tizzy Stitch Bird (from Tapastic)'
5395
    url = 'https://tapastic.com/series/TizzyStitchbird'
5396
5397
5398
class RockPaperCynicTapa(GenericTapasticComic):
5399
    """Class to retrieve RockPaperCynic comics."""
5400
    # Also on http://www.rockpapercynic.com
5401
    # Also on http://rockpapercynic.tumblr.com
5402
    name = 'rpc-tapa'
5403
    long_name = 'Rock Paper Cynic (from Tapastic)'
5404
    url = 'https://tapastic.com/series/rockpapercynic'
5405
5406
5407
class IsItCanonTapa(GenericTapasticComic):
5408
    """Class to retrieve Is It Canon comics."""
5409
    # Also on http://www.isitcanon.com
5410
    name = 'canon-tapa'
5411
    long_name = 'Is It Canon (from Tapastic)'
5412
    url = 'http://tapastic.com/series/isitcanon'
5413
5414
5415
class ItsTheTieTapa(GenericTapasticComic):
5416
    """Class to retrieve It's the tie comics."""
5417
    # Also on http://itsthetie.com
5418
    # Also on http://itsthetie.tumblr.com
5419
    name = 'tie-tapa'
5420
    long_name = "It's the tie (from Tapastic)"
5421
    url = "https://tapastic.com/series/itsthetie"
5422
    _categories = ('TIE', )
5423
5424
5425
class JamesOfNoTradesTapa(GenericTapasticComic):
5426
    """Class to retrieve JamesOfNoTrades comics."""
5427
    # Also on http://jamesofnotrades.com
5428
    # Also on http://www.webtoons.com/en/challenge/james-of-no-trades/list?title_no=43422
5429
    # Also on http://jamesfregan.tumblr.com
5430
    name = 'jamesofnotrades-tapa'
5431
    long_name = 'James Of No Trades (from Tapastic)'
5432
    url = 'https://tapas.io/series/James-of-No-Trades'
5433
    _categories = ('JAMESOFNOTRADES', )
5434
5435
5436
class MomentumTapa(GenericTapasticComic):
5437
    """Class to retrieve Momentum comics."""
5438
    # Also on http://www.momentumcomic.com
5439
    name = 'momentum-tapa'
5440
    long_name = 'Momentum (from Tapastic)'
5441
    url = 'https://tapastic.com/series/momentum'
5442
5443
5444
class InYourFaceCakeTapa(GenericTapasticComic):
5445
    """Class to retrieve In Your Face Cake comics."""
5446
    # Also on https://in-your-face-cake.tumblr.com
5447
    name = 'inyourfacecake-tapa'
5448
    long_name = 'In Your Face Cake (from Tapastic)'
5449
    url = 'https://tapas.io/series/In-Your-Face-Cake'
5450
    _categories = ('INYOURFACECAKE', )
5451
5452
5453
class APleasantWasteOfTimeTapa(GenericTapasticComic):
5454
    """Class to retrieve A Pleasant Waste Of Time comics."""
5455
    # Also on https://artjcf.tumblr.com
5456
    name = 'pleasant-waste-tapa'
5457
    long_name = 'A Pleasant Waste Of Time (from Tapastic)'
5458
    url = 'https://tapas.io/series/A-Pleasant-'
5459
    _categories = ('WASTE', )
5460
5461
5462
def get_subclasses(klass):
5463
    """Gets the list of direct/indirect subclasses of a class"""
5464
    subclasses = klass.__subclasses__()
5465
    for derived in list(subclasses):
5466
        subclasses.extend(get_subclasses(derived))
5467
    return subclasses
5468
5469
5470
def remove_st_nd_rd_th_from_date(string):
5471
    """Function to transform 1st/2nd/3rd/4th in a parsable date format."""
5472
    # Hackish way to convert string with numeral "1st"/"2nd"/etc to date
5473
    return (string.replace('st', '')
5474
            .replace('nd', '')
5475
            .replace('rd', '')
5476
            .replace('th', '')
5477
            .replace('Augu', 'August'))
5478
5479
5480
def string_to_date(string, date_format, local=DEFAULT_LOCAL):
5481
    """Function to convert string to date object.
5482
    Wrapper around datetime.datetime.strptime."""
5483
    # format described in https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior
5484
    prev_locale = locale.setlocale(locale.LC_ALL)
5485
    if local != prev_locale:
5486
        locale.setlocale(locale.LC_ALL, local)
5487
    ret = datetime.datetime.strptime(string, date_format).date()
5488
    if local != prev_locale:
5489
        locale.setlocale(locale.LC_ALL, prev_locale)
5490
    return ret
5491
5492
5493
COMICS = set(get_subclasses(GenericComic))
5494
VALID_COMICS = [c for c in COMICS if c.name is not None]
5495
COMIC_NAMES = {c.name: c for c in VALID_COMICS}
5496
assert len(VALID_COMICS) == len(COMIC_NAMES)
5497
CLASS_NAMES = {c.__name__ for c in VALID_COMICS}
5498
assert len(VALID_COMICS) == len(CLASS_NAMES)
5499