Completed
Push — master ( beec46...77a11c )
by De
58s
created

comics.py (2 issues)

1
#! /usr/bin/python3
2
# vim: set expandtab tabstop=4 shiftwidth=4 :
3
"""Module to retrieve webcomics"""
4
5
from comic_abstract import GenericComic, get_date_for_comic
6
import re
7
from datetime import date, timedelta
8
import datetime
9
from urlfunctions import get_soup_at_url, urljoin_wrapper,\
10
    convert_iri_to_plain_ascii_uri, load_json_at_url, urlopen_wrapper
11
import json
12
import locale
13
import urllib
14
15
DEFAULT_LOCAL = 'en_GB.UTF-8'
16
17
18
class Xkcd(GenericComic):
19
    """Class to retrieve Xkcd comics."""
20
    name = 'xkcd'
21
    long_name = 'xkcd'
22
    url = 'http://xkcd.com'
23
24
    @classmethod
25
    def get_next_comic(cls, last_comic):
26
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
27
        first_num = last_comic['num'] if last_comic else 0
28
        last_num = load_json_at_url(
29
            urljoin_wrapper(cls.url, 'info.0.json'))['num']
30
31
        for num in range(first_num + 1, last_num + 1):
32
            if num != 404:
33
                json_url = urljoin_wrapper(cls.url, '%d/info.0.json' % num)
34
                comic = load_json_at_url(json_url)
35
                comic['img'] = [comic['img']]
36
                comic['prefix'] = '%d-' % num
37
                comic['json_url'] = json_url
38
                comic['url'] = urljoin_wrapper(cls.url, str(num))
39
                comic['day'] = int(comic['day'])
40
                comic['month'] = int(comic['month'])
41
                comic['year'] = int(comic['year'])
42
                assert comic['num'] == num
43
                yield comic
44
45
46
# Helper functions corresponding to get_url_from_link/get_url_from_archive_element
47
48
49
@classmethod
50
def get_href(cls, link):
51
    """Implementation of get_url_from_link/get_url_from_archive_element."""
52
    return link['href']
53
54
55
@classmethod
56
def join_cls_url_to_href(cls, link):
57
    """Implementation of get_url_from_link/get_url_from_archive_element."""
58
    return urljoin_wrapper(cls.url, link['href'])
59
60
61
class GenericNavigableComic(GenericComic):
62
    """Generic class for "navigable" comics : with first/next arrows.
63
64
    This class applies to comic where previous and next comics can be
65
    accessed from a given comic. Once given a starting point (either
66
    the first comic or the last comic retrieved), it will handle the
67
    navigation, the retrieval of the soup object and the setting of
68
    the 'url' attribute on retrieved comics. This limits a lot the
69
    amount of boilerplate code in the different implementation classes.
70
71
    The method `get_next_comic` methods is implemented in terms of new
72
    more specialized methods to be implemented/overridden:
73
        - get_first_comic_link
74
        - get_navi_link
75
        - get_comic_info
76
        - get_url_from_link
77
    """
78
    _categories = ('NAVIGABLE', )
79
80
    @classmethod
81
    def get_first_comic_link(cls):
82
        """Get link to first comics.
83
84
        Sometimes this can be retrieved of any comic page, sometimes on
85
        the archive page, sometimes it doesn't exist at all and one has
86
        to iterate backward to find it before hardcoding the result found.
87
        """
88
        raise NotImplementedError
89
90
    @classmethod
91
    def get_navi_link(cls, last_soup, next_):
92
        """Get link to next (or previous - for dev purposes) comic."""
93
        raise NotImplementedError
94
95
    @classmethod
96
    def get_comic_info(cls, soup, link):
97
        """Get information about a particular comics."""
98
        raise NotImplementedError
99
100
    @classmethod
101
    def get_url_from_link(cls, link):
102
        """Get url corresponding to a link. Default implementation is similar to get_href."""
103
        return link['href']
104
105
    @classmethod
106
    def get_next_link(cls, last_soup):
107
        """Get link to next comic."""
108
        return cls.get_navi_link(last_soup, True)
109
110
    @classmethod
111
    def get_prev_link(cls, last_soup):
112
        """Get link to previous comic."""
113
        return cls.get_navi_link(last_soup, False)
114
115
    @classmethod
116
    def get_next_comic(cls, last_comic):
117
        """Generic implementation of get_next_comic for navigable comics."""
118
        url = last_comic['url'] if last_comic else None
119
        cls.log("starting 'get_next_comic' from %s" % url)
120
        next_comic = \
121
            cls.get_next_link(get_soup_at_url(url)) \
122
            if url else \
123
            cls.get_first_comic_link()
124
        cls.log("next/first comic will be %s (url is %s)" % (str(next_comic), url))
125
        # cls.check_navigation(url)
126
        while next_comic:
127
            prev_url, url = url, cls.get_url_from_link(next_comic)
128
            if prev_url == url:
129
                cls.log("got same url %s" % url)
130
                break
131
            cls.log("about to get %s (%s)" % (url, str(next_comic)))
132
            soup = get_soup_at_url(url)
133
            comic = cls.get_comic_info(soup, next_comic)
134
            if comic is not None:
135
                assert 'url' not in comic
136
                comic['url'] = url
137
                yield comic
138
            next_comic = cls.get_next_link(soup)
139
            cls.log("next comic will be %s" % str(next_comic))
140
141
    @classmethod
142
    def check_first_link(cls):
143
        """Check that navigation to first comic seems to be working - for dev purposes."""
144
        cls.log("about to check first link")
145
        ok = True
146
        firstlink = cls.get_first_comic_link()
147
        if firstlink is None:
148
            print("From %s : no first link" % cls.url)
149
            ok = False
150
        else:
151
            firsturl = cls.get_url_from_link(firstlink)
152
            try:
153
                get_soup_at_url(firsturl)
154
            except urllib.error.HTTPError:
155
                print("From %s : invalid first url" % cls.url)
156
                ok = False
157
        cls.log("checked first link -> returned %d" % ok)
158
        return ok
159
160
    @classmethod
161
    def check_prev_next_links(cls, url):
162
        """Check that navigation to prev/next from a given URL seems to be working - for dev purposes."""
163
        cls.log("about to check prev/next from %s" % url)
164
        ok = True
165
        if url is None:
166
            prevlink, nextlink = None, None
167
        else:
168
            soup = get_soup_at_url(url)
169
            prevlink, nextlink = cls.get_prev_link(soup), cls.get_next_link(soup)
170
        if prevlink is None and nextlink is None:
171
            print("From %s : no previous nor next" % url)
172
            ok = False
173
        else:
174
            if prevlink:
175
                prevurl = cls.get_url_from_link(prevlink)
176
                prevsoup = get_soup_at_url(prevurl)
177
                prevnext = cls.get_url_from_link(cls.get_next_link(prevsoup))
178
                if prevnext != url:
179
                    print("From %s, going backward then forward leads to %s" % (url, prevnext))
180
                    ok = False
181
            if nextlink:
182
                nexturl = cls.get_url_from_link(nextlink)
183
                if nexturl != url:
184
                    nextsoup = get_soup_at_url(nexturl)
185
                    nextprev = cls.get_url_from_link(cls.get_prev_link(nextsoup))
186
                    if nextprev != url:
187
                        print("From %s, going forward then backward leads to %s" % (url, nextprev))
188
                        ok = False
189
        cls.log("checked prev/next from %s -> returned %d" % (url, ok))
190
        return ok
191
192
    @classmethod
193
    def check_navigation(cls, url):
194
        """Check that navigation functions seem to be working - for dev purposes."""
195
        cls.log("about to check navigation from %s" % url)
196
        first = cls.check_first_link()
197
        prevnext = cls.check_prev_next_links(url)
198
        ok = first and prevnext
199
        cls.log("checked navigation from %s -> returned %d" % (url, ok))
200
        return ok
201
202
203
class GenericListableComic(GenericComic):
204
    """Generic class for "listable" comics : with a list of comics (aka 'archive')
205
206
    The method `get_next_comic` methods is implemented in terms of new
207
    more specialized methods to be implemented/overridden:
208
        - get_archive_elements
209
        - get_url_from_archive_element
210
        - get_comic_info
211
    """
212
    _categories = ('LISTABLE', )
213
214
    @classmethod
215
    def get_archive_elements(cls):
216
        """Get the archive elements (iterable)."""
217
        raise NotImplementedError
218
219
    @classmethod
220
    def get_url_from_archive_element(cls, archive_elt):
221
        """Get url corresponding to an archive element."""
222
        raise NotImplementedError
223
224
    @classmethod
225
    def get_comic_info(cls, soup, archive_elt):
226
        """Get information about a particular comics."""
227
        raise NotImplementedError
228
229
    @classmethod
230
    def get_next_comic(cls, last_comic):
231
        """Generic implementation of get_next_comic for listable comics."""
232
        waiting_for_url = last_comic['url'] if last_comic else None
233
        for archive_elt in cls.get_archive_elements():
234
            url = cls.get_url_from_archive_element(archive_elt)
235
            cls.log("considering %s" % url)
236
            if waiting_for_url is None:
237
                cls.log("about to get %s (%s)" % (url, str(archive_elt)))
238
                soup = get_soup_at_url(url)
239
                comic = cls.get_comic_info(soup, archive_elt)
240
                if comic is not None:
241
                    assert 'url' not in comic
242
                    comic['url'] = url
243
                    yield comic
244
            elif waiting_for_url == url:
245
                waiting_for_url = None
246
        if waiting_for_url is not None:
247
            print("Did not find %s : there might be a problem" % waiting_for_url)
248
249
# Helper functions corresponding to get_first_comic_link/get_navi_link
250
251
252
@classmethod
253
def get_link_rel_next(cls, last_soup, next_):
254
    """Implementation of get_navi_link."""
255
    return last_soup.find('link', rel='next' if next_ else 'prev')
256
257
258
@classmethod
259
def get_a_rel_next(cls, last_soup, next_):
260
    """Implementation of get_navi_link."""
261
    return last_soup.find('a', rel='next' if next_ else 'prev')
262
263
264
@classmethod
265
def get_a_navi_navinext(cls, last_soup, next_):
266
    """Implementation of get_navi_link."""
267
    return last_soup.find('a', class_='navi navi-next' if next_ else 'navi navi-prev')
268
269
270
@classmethod
271
def get_a_navi_comicnavnext_navinext(cls, last_soup, next_):
272
    """Implementation of get_navi_link."""
273
    return last_soup.find('a', class_='navi comic-nav-next navi-next' if next_ else 'navi comic-nav-previous navi-prev')
274
275
276
@classmethod
277
def get_a_comicnavbase_comicnavnext(cls, last_soup, next_):
278
    """Implementation of get_navi_link."""
279
    return last_soup.find('a', class_='comic-nav-base comic-nav-next' if next_ else 'comic-nav-base comic-nav-previous')
280
281
282
@classmethod
283
def get_a_navi_navifirst(cls):
284
    """Implementation of get_first_comic_link."""
285
    return get_soup_at_url(cls.url).find('a', class_='navi navi-first')
286
287
288
@classmethod
289
def get_div_navfirst_a(cls):
290
    """Implementation of get_first_comic_link."""
291
    return get_soup_at_url(cls.url).find('div', class_="nav-first").find('a')
292
293
294
@classmethod
295
def get_a_comicnavbase_comicnavfirst(cls):
296
    """Implementation of get_first_comic_link."""
297
    return get_soup_at_url(cls.url).find('a', class_='comic-nav-base comic-nav-first')
298
299
300
@classmethod
301
def simulate_first_link(cls):
302
    """Implementation of get_first_comic_link creating a link-like object from
303
    an URL provided by the class.
304
305
    Note: The first URL can easily be found using :
306
    `get_first_comic_link = navigate_to_first_comic`.
307
    """
308
    return {'href': cls.first_url}
309
310
311
@classmethod
312
def navigate_to_first_comic(cls):
313
    """Implementation of get_first_comic_link navigating from a user provided
314
    URL to the first comic.
315
316
    Sometimes, the first comic cannot be reached directly so to start
317
    from the first comic one has to go to the previous comic until
318
    there is no previous comics. Once this URL is reached, it
319
    is better to hardcode it but for development purposes, it
320
    is convenient to have an automatic way to find it.
321
322
    Then, the URL found can easily be used via `simulate_first_link`.
323
    """
324
    url = input("Get starting URL: ")
325
    print(url)
326
    comic = cls.get_prev_link(get_soup_at_url(url))
327
    while comic:
328
        url = cls.get_url_from_link(comic)
329
        print(url)
330
        comic = cls.get_prev_link(get_soup_at_url(url))
331
    return {'href': url}
332
333
334
class GenericEmptyComic(GenericComic):
335
    """Generic class for comics where nothing is to be done.
336
337
    It can be useful to deactivate temporarily comics that do not work
338
    properly by replacing `def MyComic(GenericWhateverComic)` with
339
    `def MyComic(GenericEmptyComic, GenericWhateverComic)`."""
340
    _categories = ('EMPTY', )
341
342
    @classmethod
343
    def get_next_comic(cls, last_comic):
344
        """Implementation of get_next_comic returning no comics."""
345
        cls.log("comic is considered as empty - returning no comic")
346
        return []
347
348
349 View Code Duplication
class ExtraFabulousComics(GenericNavigableComic):
350
    """Class to retrieve Extra Fabulous Comics."""
351
    name = 'efc'
352
    long_name = 'Extra Fabulous Comics'
353
    url = 'http://extrafabulouscomics.com'
354
    get_first_comic_link = get_a_navi_navifirst
355
    get_navi_link = get_link_rel_next
356
357
    @classmethod
358
    def get_comic_info(cls, soup, link):
359
        """Get information about a particular comics."""
360
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
361
        imgs = soup.find_all('img', src=img_src_re)
362
        title = soup.find('meta', property='og:title')['content']
363
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
364
        day = string_to_date(date_str, "%Y-%m-%d")
365
        return {
366
            'title': title,
367
            'img': [i['src'] for i in imgs],
368
            'month': day.month,
369
            'year': day.year,
370
            'day': day.day,
371
            'prefix': title + '-'
372
        }
373
374
375 View Code Duplication
class GenericLeMondeBlog(GenericNavigableComic):
376
    """Generic class to retrieve comics from Le Monde blogs."""
377
    _categories = ('LEMONDE', 'FRANCAIS')
378
    get_navi_link = get_link_rel_next
379
    get_first_comic_link = simulate_first_link
380
    first_url = NotImplemented
381
382
    @classmethod
383
    def get_comic_info(cls, soup, link):
384
        """Get information about a particular comics."""
385
        url2 = soup.find('link', rel='shortlink')['href']
386
        title = soup.find('meta', property='og:title')['content']
387
        date_str = soup.find("span", class_="entry-date").string
388
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
389
        imgs = soup.find_all('meta', property='og:image')
390
        return {
391
            'title': title,
392
            'url2': url2,
393
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
394
            'month': day.month,
395
            'year': day.year,
396
            'day': day.day,
397
        }
398
399
400
class ZepWorld(GenericLeMondeBlog):
401
    """Class to retrieve Zep World comics."""
402
    name = "zep"
403
    long_name = "Zep World"
404
    url = "http://zepworld.blog.lemonde.fr"
405
    first_url = "http://zepworld.blog.lemonde.fr/2014/10/31/bientot-le-blog-de-zep/"
406
407
408
class Vidberg(GenericLeMondeBlog):
409
    """Class to retrieve Vidberg comics."""
410
    name = 'vidberg'
411
    long_name = "Vidberg - l'actu en patates"
412
    url = "http://vidberg.blog.lemonde.fr"
413
    # Not the first but I didn't find an efficient way to retrieve it
414
    first_url = "http://vidberg.blog.lemonde.fr/2012/02/09/revue-de-campagne-la-campagne-du-modem-semballe/"
415
416
417
class Plantu(GenericLeMondeBlog):
418
    """Class to retrieve Plantu comics."""
419
    name = 'plantu'
420
    long_name = "Plantu"
421
    url = "http://plantu.blog.lemonde.fr"
422
    first_url = "http://plantu.blog.lemonde.fr/2014/10/28/stress-test-a-bruxelles/"
423
424
425
class XavierGorce(GenericLeMondeBlog):
426
    """Class to retrieve Xavier Gorce comics."""
427
    name = 'gorce'
428
    long_name = "Xavier Gorce"
429
    url = "http://xaviergorce.blog.lemonde.fr"
430
    first_url = "http://xaviergorce.blog.lemonde.fr/2015/01/09/distinction/"
431
432
433
class CartooningForPeace(GenericLeMondeBlog):
434
    """Class to retrieve Cartooning For Peace comics."""
435
    name = 'forpeace'
436
    long_name = "Cartooning For Peace"
437
    url = "http://cartooningforpeace.blog.lemonde.fr"
438
    first_url = "http://cartooningforpeace.blog.lemonde.fr/2014/12/15/bado/"
439
440
441
class Aurel(GenericLeMondeBlog):
442
    """Class to retrieve Aurel comics."""
443
    name = 'aurel'
444
    long_name = "Aurel"
445
    url = "http://aurel.blog.lemonde.fr"
446
    first_url = "http://aurel.blog.lemonde.fr/2014/09/29/le-senat-repasse-a-droite/"
447
448
449
class LesCulottees(GenericLeMondeBlog):
450
    """Class to retrieve Les Culottees comics."""
451
    name = 'culottees'
452
    long_name = 'Les Culottees'
453
    url = "http://lesculottees.blog.lemonde.fr"
454
    first_url = "http://lesculottees.blog.lemonde.fr/2016/01/11/clementine-delait-femme-a-barbe/"
455
456
457
class UneAnneeAuLycee(GenericLeMondeBlog):
458
    """Class to retrieve Une Annee Au Lycee comics."""
459 View Code Duplication
    name = 'lycee'
460
    long_name = 'Une Annee au Lycee'
461
    url = 'http://uneanneeaulycee.blog.lemonde.fr'
462
    first_url = "http://uneanneeaulycee.blog.lemonde.fr/2016/06/13/la-semaine-du-bac-est-arrivee/"
463
464
465
class Rall(GenericNavigableComic):
466
    """Class to retrieve Ted Rall comics."""
467
    # Also on http://www.gocomics.com/tedrall
468
    name = 'rall'
469
    long_name = "Ted Rall"
470
    url = "http://rall.com/comic"
471
    _categories = ('RALL', )
472
    get_navi_link = get_link_rel_next
473
    get_first_comic_link = simulate_first_link
474
    # Not the first but I didn't find an efficient way to retrieve it
475
    first_url = "http://rall.com/2014/01/30/los-angeles-times-cartoon-well-miss-those-california-flowers"
476
477
    @classmethod
478
    def get_comic_info(cls, soup, link):
479
        """Get information about a particular comics."""
480
        title = soup.find('meta', property='og:title')['content']
481
        author = soup.find("span", class_="author vcard").find("a").string
482
        date_str = soup.find("span", class_="entry-date").string
483
        day = string_to_date(date_str, "%B %d, %Y")
484
        desc = soup.find('meta', property='og:description')['content']
485
        imgs = soup.find('div', class_='entry-content').find_all('img')
486
        imgs = imgs[:-7]  # remove social media buttons
487
        return {
488
            'title': title,
489
            'author': author,
490
            'month': day.month,
491
            'year': day.year,
492
            'day': day.day,
493
            'description': desc,
494
            'img': [i['src'] for i in imgs],
495
        }
496
497
498
class Dilem(GenericNavigableComic):
499
    """Class to retrieve Ali Dilem comics."""
500
    name = 'dilem'
501
    long_name = 'Ali Dilem'
502
    url = 'http://information.tv5monde.com/dilem'
503
    _categories = ('FRANCAIS', )
504
    get_url_from_link = join_cls_url_to_href
505
    get_first_comic_link = simulate_first_link
506
    first_url = "http://information.tv5monde.com/dilem/2004-06-26"
507
508
    @classmethod
509
    def get_navi_link(cls, last_soup, next_):
510
        """Get link to next or previous comic."""
511
        # prev is next / next is prev
512
        li = last_soup.find('li', class_='prev' if next_ else 'next')
513
        return li.find('a') if li else None
514
515
    @classmethod
516
    def get_comic_info(cls, soup, link):
517
        """Get information about a particular comics."""
518
        short_url = soup.find('link', rel='shortlink')['href']
519
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
520
        imgs = soup.find_all('meta', property='og:image')
521
        date_str = soup.find('span', property='dc:date')['content']
522
        date_str = date_str[:10]
523
        day = string_to_date(date_str, "%Y-%m-%d")
524
        return {
525
            'short_url': short_url,
526
            'title': title,
527
            'img': [i['content'] for i in imgs],
528
            'day': day.day,
529
            'month': day.month,
530
            'year': day.year,
531
        }
532
533
534
class SpaceAvalanche(GenericNavigableComic):
535
    """Class to retrieve Space Avalanche comics."""
536
    name = 'avalanche'
537
    long_name = 'Space Avalanche'
538
    url = 'http://www.spaceavalanche.com'
539
    get_navi_link = get_link_rel_next
540
541
    @classmethod
542
    def get_first_comic_link(cls):
543
        """Get link to first comics."""
544
        return {'href': "http://www.spaceavalanche.com/2009/02/02/irish-sea/", 'title': "Irish Sea"}
545
546
    @classmethod
547
    def get_comic_info(cls, soup, link):
548
        """Get information about a particular comics."""
549
        url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
550
        title = link['title']
551
        url = cls.get_url_from_link(link)
552
        year, month, day = [int(s)
553
                            for s in url_date_re.match(url).groups()]
554
        imgs = soup.find("div", class_="entry").find_all("img")
555
        return {
556
            'title': title,
557
            'day': day,
558
            'month': month,
559
            'year': year,
560
            'img': [i['src'] for i in imgs],
561
        }
562
563
564
class ZenPencils(GenericNavigableComic):
565
    """Class to retrieve ZenPencils comics."""
566
    # Also on http://zenpencils.tumblr.com
567
    # Also on http://www.gocomics.com/zen-pencils
568
    name = 'zenpencils'
569
    long_name = 'Zen Pencils'
570
    url = 'http://zenpencils.com'
571
    _categories = ('ZENPENCILS', )
572
    get_navi_link = get_link_rel_next
573
    get_first_comic_link = simulate_first_link
574
    first_url = "http://zenpencils.com/comic/1-ralph-waldo-emerson-make-them-cry/"
575
576
    @classmethod
577
    def get_comic_info(cls, soup, link):
578
        """Get information about a particular comics."""
579
        imgs = soup.find('div', id='comic').find_all('img')
580
        # imgs2 = soup.find_all('meta', property='og:image')
581
        post = soup.find('div', class_='post-content')
582
        author = post.find("span", class_="post-author").find("a").string
583
        title = soup.find('meta', property='og:title')['content']
584
        date_str = post.find('span', class_='post-date').string
585
        day = string_to_date(date_str, "%B %d, %Y")
586
        assert imgs
587
        assert all(i['alt'] == i['title'] for i in imgs)
588
        assert all(i['alt'] in (title, "") for i in imgs)
589
        desc = soup.find('meta', property='og:description')['content']
590
        return {
591
            'title': title,
592
            'description': desc,
593
            'author': author,
594
            'day': day.day,
595
            'month': day.month,
596
            'year': day.year,
597
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
598
        }
599
600
601
class ItsTheTie(GenericNavigableComic):
602
    """Class to retrieve It's the tie comics."""
603
    # Also on http://itsthetie.tumblr.com
604
    # Also on https://tapastic.com/series/itsthetie
605
    name = 'tie'
606
    long_name = "It's the tie"
607
    url = "http://itsthetie.com"
608
    _categories = ('TIE', )
609
    get_first_comic_link = get_div_navfirst_a
610
    get_navi_link = get_a_rel_next
611
612
    @classmethod
613
    def get_comic_info(cls, soup, link):
614
        """Get information about a particular comics."""
615
        title = soup.find('h1', class_='comic-title').find('a').string
616
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
617
        day = string_to_date(date_str, "%B %d, %Y")
618
        # Bonus images may or may not be in meta og:image.
619
        imgs = soup.find_all('meta', property='og:image')
620
        imgs_src = [i['content'] for i in imgs]
621
        bonus = soup.find_all('img', attrs={'data-oversrc': True})
622
        bonus_src = [b['data-oversrc'] for b in bonus]
623
        all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src]
624
        all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")]
625
        tag_meta = soup.find('meta', property='article:tag')
626
        tags = tag_meta['content'] if tag_meta else ""
627
        return {
628
            'title': title,
629
            'month': day.month,
630
            'year': day.year,
631
            'day': day.day,
632
            'img': all_imgs_src,
633
            'tags': tags,
634
        }
635
636
637
class PenelopeBagieu(GenericNavigableComic):
638
    """Class to retrieve comics from Penelope Bagieu's blog."""
639
    name = 'bagieu'
640
    long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
641
    url = 'http://www.penelope-jolicoeur.com'
642
    _categories = ('FRANCAIS', )
643
    get_navi_link = get_link_rel_next
644
    get_first_comic_link = simulate_first_link
645 View Code Duplication
    first_url = 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'
646
647
    @classmethod
648
    def get_comic_info(cls, soup, link):
649
        """Get information about a particular comics."""
650
        date_str = soup.find('h2', class_='date-header').string
651
        day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
652
        imgs = soup.find('div', class_='entry-body').find_all('img')
653
        title = soup.find('h3', class_='entry-header').string
654
        return {
655
            'title': title,
656
            'img': [i['src'] for i in imgs],
657
            'month': day.month,
658
            'year': day.year,
659
            'day': day.day,
660
        }
661
662
663
class OneOneOneOneComic(GenericNavigableComic):
664
    """Class to retrieve 1111 Comics."""
665
    # Also on http://comics1111.tumblr.com
666
    # Also on https://tapastic.com/series/1111-Comics
667
    name = '1111'
668
    long_name = '1111 Comics'
669
    url = 'http://www.1111comics.me'
670
    _categories = ('ONEONEONEONE', )
671
    get_first_comic_link = get_div_navfirst_a
672
    get_navi_link = get_link_rel_next
673
674
    @classmethod
675
    def get_comic_info(cls, soup, link):
676
        """Get information about a particular comics."""
677
        title = soup.find('h1', class_='comic-title').find('a').string
678
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
679
        day = string_to_date(date_str, "%B %d, %Y")
680
        imgs = soup.find_all('meta', property='og:image')
681
        return {
682
            'title': title,
683
            'month': day.month,
684
            'year': day.year,
685
            'day': day.day,
686
            'img': [i['content'] for i in imgs],
687
        }
688
689
690
class AngryAtNothing(GenericNavigableComic):
691
    """Class to retrieve Angry at Nothing comics."""
692
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
693
    name = 'angry'
694
    long_name = 'Angry At Nothing'
695
    url = 'http://www.angryatnothing.net'
696
    get_first_comic_link = get_div_navfirst_a
697
    get_navi_link = get_a_rel_next
698
699
    @classmethod
700
    def get_comic_info(cls, soup, link):
701
        """Get information about a particular comics."""
702
        title = soup.find('h1', class_='comic-title').find('a').string
703
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
704
        day = string_to_date(date_str, "%B %d, %Y")
705
        imgs = soup.find_all('meta', property='og:image')
706
        return {
707
            'title': title,
708
            'month': day.month,
709
            'year': day.year,
710
            'day': day.day,
711
            'img': [i['content'] for i in imgs],
712
        }
713
714
715
class NeDroid(GenericNavigableComic):
716
    """Class to retrieve NeDroid comics."""
717
    name = 'nedroid'
718
    long_name = 'NeDroid'
719
    url = 'http://nedroid.com'
720
    get_first_comic_link = get_div_navfirst_a
721
    get_navi_link = get_link_rel_next
722
    get_url_from_link = join_cls_url_to_href
723
724
    @classmethod
725
    def get_comic_info(cls, soup, link):
726
        """Get information about a particular comics."""
727
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
728
        comic_url_re = re.compile('//nedroid.com/comics/([0-9]*)-([0-9]*)-([0-9]*).*')
729
        short_url = cls.get_url_from_link(soup.find('link', rel='shortlink'))
730
        num = int(short_url_re.match(short_url).groups()[0])
731
        imgs = soup.find('div', id='comic').find_all('img')
732
        year, month, day = [int(s) for s in comic_url_re.match(imgs[0]['src']).groups()]
733
        assert len(imgs) == 1
734
        title = imgs[0]['alt']
735
        title2 = imgs[0]['title']
736
        return {
737
            'short_url': short_url,
738
            'title': title,
739
            'title2': title2,
740
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
741
            'day': day,
742
            'month': month,
743
            'year': year,
744
            'num': num,
745
        }
746
747
748
class Garfield(GenericNavigableComic):
749
    """Class to retrieve Garfield comics."""
750
    # Also on http://www.gocomics.com/garfield
751
    name = 'garfield'
752
    long_name = 'Garfield'
753
    url = 'https://garfield.com'
754
    _categories = ('GARFIELD', )
755
    get_first_comic_link = simulate_first_link
756
    first_url = 'https://garfield.com/comic/1978/06/19'
757
758 View Code Duplication
    @classmethod
759
    def get_navi_link(cls, last_soup, next_):
760
        """Get link to next or previous comic."""
761
        return last_soup.find('a', class_='comic-arrow-right' if next_ else 'comic-arrow-left')
762
763
    @classmethod
764
    def get_comic_info(cls, soup, link):
765
        """Get information about a particular comics."""
766
        url = cls.get_url_from_link(link)
767
        date_re = re.compile('^%s/comic/([0-9]*)/([0-9]*)/([0-9]*)' % cls.url)
768
        year, month, day = [int(s) for s in date_re.match(url).groups()]
769
        imgs = soup.find('div', class_='comic-display').find_all('img', class_='img-responsive')
770
        return {
771
            'month': month,
772
            'year': year,
773
            'day': day,
774
            'img': [i['src'] for i in imgs],
775
        }
776
777
778
class Dilbert(GenericNavigableComic):
779
    """Class to retrieve Dilbert comics."""
780
    # Also on http://www.gocomics.com/dilbert-classics
781
    name = 'dilbert'
782
    long_name = 'Dilbert'
783
    url = 'http://dilbert.com'
784
    get_url_from_link = join_cls_url_to_href
785
    get_first_comic_link = simulate_first_link
786
    first_url = 'http://dilbert.com/strip/1989-04-16'
787
788
    @classmethod
789
    def get_navi_link(cls, last_soup, next_):
790
        """Get link to next or previous comic."""
791
        link = last_soup.find('div', class_='nav-comic nav-right' if next_ else 'nav-comic nav-left')
792
        return link.find('a') if link else None
793
794
    @classmethod
795
    def get_comic_info(cls, soup, link):
796
        """Get information about a particular comics."""
797
        title = soup.find('meta', property='og:title')['content']
798
        imgs = soup.find_all('meta', property='og:image')
799
        desc = soup.find('meta', property='og:description')['content']
800
        date_str = soup.find('meta', property='article:publish_date')['content']
801
        day = string_to_date(date_str, "%B %d, %Y")
802
        author = soup.find('meta', property='article:author')['content']
803
        tags = soup.find('meta', property='article:tag')['content']
804
        return {
805
            'title': title,
806
            'description': desc,
807
            'img': [i['content'] for i in imgs],
808
            'author': author,
809
            'tags': tags,
810
            'day': day.day,
811
            'month': day.month,
812
            'year': day.year
813
        }
814
815
816
class VictimsOfCircumsolar(GenericNavigableComic):
817
    """Class to retrieve VictimsOfCircumsolar comics."""
818
    name = 'circumsolar'
819
    long_name = 'Victims Of Circumsolar'
820
    url = 'http://www.victimsofcircumsolar.com'
821
    get_navi_link = get_a_navi_comicnavnext_navinext
822
    get_first_comic_link = simulate_first_link
823
    first_url = 'http://www.victimsofcircumsolar.com/comic/modern-addiction'
824
825
    @classmethod
826
    def get_comic_info(cls, soup, link):
827
        """Get information about a particular comics."""
828
        # Date is on the archive page
829
        title = soup.find_all('meta', property='og:title')[-1]['content']
830
        desc = soup.find_all('meta', property='og:description')[-1]['content']
831
        imgs = soup.find('div', id='comic').find_all('img')
832
        assert all(i['title'] == i['alt'] == title for i in imgs)
833
        return {
834
            'title': title,
835
            'description': desc,
836
            'img': [i['src'] for i in imgs],
837
        }
838
839
840
class ThreeWordPhrase(GenericNavigableComic):
841
    """Class to retrieve Three Word Phrase comics."""
842
    # Also on http://www.threewordphrase.tumblr.com
843
    name = 'threeword'
844
    long_name = 'Three Word Phrase'
845
    url = 'http://threewordphrase.com'
846
    get_url_from_link = join_cls_url_to_href
847
848
    @classmethod
849
    def get_first_comic_link(cls):
850
        """Get link to first comics."""
851
        return get_soup_at_url(cls.url).find('img', src='/firstlink.gif').parent
852
853
    @classmethod
854
    def get_navi_link(cls, last_soup, next_):
855
        """Get link to next or previous comic."""
856
        link = last_soup.find('img', src='/nextlink.gif' if next_ else '/prevlink.gif').parent
857
        return None if link.get('href') is None else link
858
859
    @classmethod
860
    def get_comic_info(cls, soup, link):
861
        """Get information about a particular comics."""
862
        title = soup.find('title')
863
        imgs = [img for img in soup.find_all('img')
864
                if not img['src'].endswith(
865
                    ('link.gif', '32.png', 'twpbookad.jpg',
866
                     'merchad.jpg', 'header.gif', 'tipjar.jpg'))]
867
        return {
868
            'title': title.string if title else None,
869
            'title2': '  '.join(img.get('alt') for img in imgs if img.get('alt')),
870
            'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs],
871
        }
872
873
874
class DeadlyPanel(GenericEmptyComic, GenericNavigableComic):
875
    """Class to retrieve Deadly Panel comics."""
876
    # Also on https://tapastic.com/series/deadlypanel
877
    name = 'deadly'
878
    long_name = 'Deadly Panel'
879
    url = 'http://www.deadlypanel.com'
880
    get_first_comic_link = get_a_navi_navifirst
881
    get_navi_link = get_a_navi_comicnavnext_navinext
882
883
    @classmethod
884
    def get_comic_info(cls, soup, link):
885
        """Get information about a particular comics."""
886
        imgs = soup.find('div', id='comic').find_all('img')
887
        assert all(i['alt'] == i['title'] for i in imgs)
888
        return {
889
            'img': [i['src'] for i in imgs],
890
        }
891
892
893
class TheGentlemanArmchair(GenericNavigableComic):
894
    """Class to retrieve The Gentleman Armchair comics."""
895
    name = 'gentlemanarmchair'
896
    long_name = 'The Gentleman Armchair'
897
    url = 'http://thegentlemansarmchair.com'
898
    get_first_comic_link = get_a_navi_navifirst
899
    get_navi_link = get_link_rel_next
900
901
    @classmethod
902
    def get_comic_info(cls, soup, link):
903
        """Get information about a particular comics."""
904
        title = soup.find('h2', class_='post-title').string
905
        author = soup.find("span", class_="post-author").find("a").string
906
        date_str = soup.find('span', class_='post-date').string
907
        day = string_to_date(date_str, "%B %d, %Y")
908
        imgs = soup.find('div', id='comic').find_all('img')
909
        return {
910
            'img': [i['src'] for i in imgs],
911
            'title': title,
912
            'author': author,
913
            'month': day.month,
914
            'year': day.year,
915
            'day': day.day,
916
        }
917
918
919
class MyExtraLife(GenericNavigableComic):
920
    """Class to retrieve My Extra Life comics."""
921
    name = 'extralife'
922
    long_name = 'My Extra Life'
923
    url = 'http://www.myextralife.com'
924
    get_navi_link = get_link_rel_next
925
926
    @classmethod
927
    def get_first_comic_link(cls):
928
        """Get link to first comics."""
929
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
930
931
    @classmethod
932
    def get_comic_info(cls, soup, link):
933
        """Get information about a particular comics."""
934
        title = soup.find("h1", class_="comic_title").string
935
        date_str = soup.find("span", class_="comic_date").string
936
        day = string_to_date(date_str, "%B %d, %Y")
937
        imgs = soup.find_all("img", class_="comic")
938
        assert all(i['alt'] == i['title'] == title for i in imgs)
939
        return {
940
            'title': title,
941
            'img': [i['src'] for i in imgs if i["src"]],
942
            'day': day.day,
943
            'month': day.month,
944
            'year': day.year
945
        }
946
947
948
class SaturdayMorningBreakfastCereal(GenericNavigableComic):
949
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
950
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
951
    # Also on http://smbc-comics.tumblr.com
952
    name = 'smbc'
953
    long_name = 'Saturday Morning Breakfast Cereal'
954
    url = 'http://www.smbc-comics.com'
955
    _categories = ('SMBC', )
956
    get_navi_link = get_a_rel_next
957
958
    @classmethod
959
    def get_first_comic_link(cls):
960
        """Get link to first comics."""
961
        return get_soup_at_url(cls.url).find('a', rel='start')
962
963
    @classmethod
964
    def get_comic_info(cls, soup, link):
965
        """Get information about a particular comics."""
966
        image1 = soup.find('img', id='cc-comic')
967
        image_url1 = image1['src']
968
        aftercomic = soup.find('div', id='aftercomic')
969
        image_url2 = aftercomic.find('img')['src'] if aftercomic else ''
970
        imgs = [image_url1] + ([image_url2] if image_url2 else [])
971
        date_str = soup.find('div', class_='cc-publishtime').contents[0]
972
        day = string_to_date(date_str, "%B %d, %Y")
973
        return {
974
            'title': image1['title'],
975
            'img': [urljoin_wrapper(cls.url, i) for i in imgs],
976
            'day': day.day,
977
            'month': day.month,
978
            'year': day.year
979
        }
980
981
982
class PerryBibleFellowship(GenericListableComic):
983
    """Class to retrieve Perry Bible Fellowship comics."""
984
    name = 'pbf'
985
    long_name = 'Perry Bible Fellowship'
986
    url = 'http://pbfcomics.com'
987
    get_url_from_archive_element = join_cls_url_to_href
988
989
    @classmethod
990
    def get_archive_elements(cls):
991
        comic_link_re = re.compile('^/[0-9]*/$')
992
        return reversed(get_soup_at_url(cls.url).find_all('a', href=comic_link_re))
993
994
    @classmethod
995
    def get_comic_info(cls, soup, link):
996
        """Get information about a particular comics."""
997
        url = cls.get_url_from_archive_element(link)
998
        comic_img_re = re.compile('^/archive_b/PBF.*')
999
        name = link.string
1000
        num = int(link['name'])
1001
        href = link['href']
1002
        assert href == '/%d/' % num
1003
        imgs = soup.find_all('img', src=comic_img_re)
1004
        assert len(imgs) == 1
1005
        assert imgs[0]['alt'] == name
1006 View Code Duplication
        return {
1007
            'num': num,
1008
            'name': name,
1009
            'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1010
            'prefix': '%d-' % num,
1011
        }
1012
1013
1014
class Mercworks(GenericNavigableComic):
1015
    """Class to retrieve Mercworks comics."""
1016
    # Also on http://mercworks.tumblr.com
1017
    name = 'mercworks'
1018
    long_name = 'Mercworks'
1019
    url = 'http://mercworks.net'
1020
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1021
    get_navi_link = get_a_rel_next
1022
1023
    @classmethod
1024
    def get_comic_info(cls, soup, link):
1025
        """Get information about a particular comics."""
1026
        title = soup.find('meta', property='og:title')['content']
1027
        metadesc = soup.find('meta', property='og:description')
1028
        desc = metadesc['content'] if metadesc else ""
1029
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1030
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1031
        date_str = date_str[:10]
1032
        day = string_to_date(date_str, "%Y-%m-%d")
1033
        imgs = soup.find_all('meta', property='og:image')
1034
        return {
1035
            'img': [i['content'] for i in imgs],
1036
            'title': title,
1037
            'author': author,
1038
            'desc': desc,
1039
            'day': day.day,
1040
            'month': day.month,
1041
            'year': day.year
1042
        }
1043
1044
1045
class BerkeleyMews(GenericListableComic):
1046
    """Class to retrieve Berkeley Mews comics."""
1047
    # Also on http://mews.tumblr.com
1048
    # Also on http://www.gocomics.com/berkeley-mews
1049
    name = 'berkeley'
1050
    long_name = 'Berkeley Mews'
1051
    url = 'http://www.berkeleymews.com'
1052
    _categories = ('BERKELEY', )
1053
    get_url_from_archive_element = get_href
1054
    comic_num_re = re.compile('%s/\\?p=([0-9]*)$' % url)
1055
1056
    @classmethod
1057
    def get_archive_elements(cls):
1058
        archive_url = urljoin_wrapper(cls.url, "?page_id=2")
1059
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_num_re))
1060
1061
    @classmethod
1062
    def get_comic_info(cls, soup, link):
1063
        """Get information about a particular comics."""
1064
        comic_date_re = re.compile('.*/([0-9]*)-([0-9]*)-([0-9]*)-.*')
1065
        url = cls.get_url_from_archive_element(link)
1066
        num = int(cls.comic_num_re.match(url).groups()[0])
1067
        img = soup.find('div', id='comic').find('img')
1068
        assert all(i['alt'] == i['title'] for i in [img])
1069
        title2 = img['title']
1070
        img_url = img['src']
1071
        year, month, day = [int(s) for s in comic_date_re.match(img_url).groups()]
1072
        return {
1073
            'num': num,
1074
            'title': link.string,
1075
            'title2': title2,
1076
            'img': [img_url],
1077
            'year': year,
1078
            'month': month,
1079
            'day': day,
1080
        }
1081
1082
1083
class GenericBouletCorp(GenericNavigableComic):
1084
    """Generic class to retrieve BouletCorp comics in different languages."""
1085
    # Also on http://bouletcorp.tumblr.com
1086
    _categories = ('BOULET', )
1087
    get_navi_link = get_link_rel_next
1088
1089
    @classmethod
1090
    def get_first_comic_link(cls):
1091
        """Get link to first comics."""
1092
        return get_soup_at_url(cls.url).find('div', id='centered_nav').find_all('a')[0]
1093
1094
    @classmethod
1095
    def get_comic_info(cls, soup, link):
1096
        """Get information about a particular comics."""
1097
        url = cls.get_url_from_link(link)
1098
        date_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/' % cls.url)
1099
        year, month, day = [int(s) for s in date_re.match(url).groups()]
1100
        imgs = soup.find('div', id='notes').find('div', class_='storycontent').find_all('img')
1101
        texts = '  '.join(t for t in (i.get('title') for i in imgs) if t)
1102
        title = soup.find('title').string
1103
        return {
1104
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs if i.get('src') is not None],
1105
            'title': title,
1106
            'texts': texts,
1107
            'year': year,
1108
            'month': month,
1109
            'day': day,
1110
        }
1111
1112
1113
class BouletCorp(GenericBouletCorp):
1114
    """Class to retrieve BouletCorp comics."""
1115
    name = 'boulet'
1116
    long_name = 'Boulet Corp'
1117
    url = 'http://www.bouletcorp.com'
1118
    _categories = ('FRANCAIS', )
1119
1120
1121
class BouletCorpEn(GenericBouletCorp):
1122
    """Class to retrieve EnglishBouletCorp comics."""
1123
    name = 'boulet_en'
1124
    long_name = 'Boulet Corp English'
1125
    url = 'http://english.bouletcorp.com'
1126
1127
1128
class AmazingSuperPowers(GenericNavigableComic):
1129
    """Class to retrieve Amazing Super Powers comics."""
1130
    name = 'asp'
1131
    long_name = 'Amazing Super Powers'
1132
    url = 'http://www.amazingsuperpowers.com'
1133
    get_first_comic_link = get_a_navi_navifirst
1134
    get_navi_link = get_a_navi_navinext
1135
1136
    @classmethod
1137
    def get_comic_info(cls, soup, link):
1138
        """Get information about a particular comics."""
1139
        author = soup.find("span", class_="post-author").find("a").string
1140
        date_str = soup.find('span', class_='post-date').string
1141
        day = string_to_date(date_str, "%B %d, %Y")
1142
        imgs = soup.find('div', id='comic').find_all('img')
1143
        title = ' '.join(i['title'] for i in imgs)
1144
        assert all(i['alt'] == i['title'] for i in imgs)
1145
        return {
1146
            'title': title,
1147
            'author': author,
1148
            'img': [img['src'] for img in imgs],
1149
            'day': day.day,
1150
            'month': day.month,
1151
            'year': day.year
1152
        }
1153
1154
1155
class ToonHole(GenericNavigableComic):
1156
    """Class to retrieve Toon Holes comics."""
1157
    # Also on http://tapastic.com/series/TOONHOLE
1158
    name = 'toonhole'
1159
    long_name = 'Toon Hole'
1160
    url = 'http://www.toonhole.com'
1161
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1162
    get_navi_link = get_link_rel_next
1163
1164
    @classmethod
1165
    def get_comic_info(cls, soup, link):
1166
        """Get information about a particular comics."""
1167
        short_url = soup.find('link', rel='shortlink')['href']
1168
        date_str = soup.find('div', class_='entry-meta').contents[0].strip()
1169
        day = string_to_date(date_str, "%B %d, %Y")
1170
        imgs = soup.find('div', id='comic').find_all('img')
1171
        if imgs:
1172
            img = imgs[0]
1173
            title = img['alt']
1174
            assert img['title'] == title
1175
        else:
1176
            title = ""
1177
        return {
1178
            'short_url': short_url,
1179
            'title': title,
1180
            'month': day.month,
1181
            'year': day.year,
1182
            'day': day.day,
1183
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs],
1184
        }
1185
1186
1187
class Channelate(GenericNavigableComic):
1188
    """Class to retrieve Channelate comics."""
1189
    name = 'channelate'
1190
    long_name = 'Channelate'
1191
    url = 'http://www.channelate.com'
1192
    get_first_comic_link = get_div_navfirst_a
1193
    get_navi_link = get_link_rel_next
1194
    get_url_from_link = join_cls_url_to_href
1195
1196
    @classmethod
1197
    def get_comic_info(cls, soup, link):
1198
        """Get information about a particular comics."""
1199
        author = soup.find("span", class_="post-author").find("a").string
1200
        date_str = soup.find('span', class_='post-date').string
1201
        day = string_to_date(date_str, '%Y/%m/%d')
1202
        title = soup.find('meta', property='og:title')['content']
1203
        post = soup.find('div', id='comic')
1204
        imgs = post.find_all('img') if post else []
1205
        extra_url = None
1206
        extra_div = soup.find('div', id='extrapanelbutton')
1207
        if extra_div:
1208
            extra_url = extra_div.find('a')['href']
1209
            extra_soup = get_soup_at_url(extra_url)
1210
            extra_imgs = extra_soup.find_all('img', class_='extrapanelimage')
1211
            imgs.extend(extra_imgs)
1212
        return {
1213
            'url_extra': extra_url,
1214
            'title': title,
1215
            'author': author,
1216
            'month': day.month,
1217
            'year': day.year,
1218
            'day': day.day,
1219
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1220
        }
1221
1222
1223
class CyanideAndHappiness(GenericNavigableComic):
1224
    """Class to retrieve Cyanide And Happiness comics."""
1225
    name = 'cyanide'
1226
    long_name = 'Cyanide and Happiness'
1227
    url = 'http://explosm.net'
1228
    _categories = ('NSFW', )
1229
    get_url_from_link = join_cls_url_to_href
1230
1231
    @classmethod
1232
    def get_first_comic_link(cls):
1233
        """Get link to first comics."""
1234
        return get_soup_at_url(cls.url).find('a', title='Oldest comic')
1235
1236
    @classmethod
1237
    def get_navi_link(cls, last_soup, next_):
1238
        """Get link to next or previous comic."""
1239
        link = last_soup.find('a', class_='next-comic' if next_ else 'previous-comic ')
1240
        return None if link.get('href') is None else link
1241
1242
    @classmethod
1243
    def get_comic_info(cls, soup, link):
1244
        """Get information about a particular comics."""
1245
        url2 = soup.find('meta', property='og:url')['content']
1246
        num = int(url2.split('/')[-2])
1247
        date_str = soup.find('h3').find('a').string
1248
        day = string_to_date(date_str, '%Y.%m.%d')
1249
        author = soup.find('small', class_="author-credit-name").string
1250
        assert author.startswith('by ')
1251
        author = author[3:]
1252
        imgs = soup.find_all('img', id='main-comic')
1253
        return {
1254
            'num': num,
1255
            'author': author,
1256
            'month': day.month,
1257
            'year': day.year,
1258
            'day': day.day,
1259
            'prefix': '%d-' % num,
1260
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i['src'])) for i in imgs]
1261
        }
1262
1263
1264
class MrLovenstein(GenericComic):
1265
    """Class to retrieve Mr Lovenstein comics."""
1266
    # Also on https://tapastic.com/series/MrLovenstein
1267
    name = 'mrlovenstein'
1268
    long_name = 'Mr. Lovenstein'
1269
    url = 'http://www.mrlovenstein.com'
1270
1271
    @classmethod
1272
    def get_next_comic(cls, last_comic):
1273
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1274
        # TODO: more info from http://www.mrlovenstein.com/archive
1275
        comic_num_re = re.compile('^/comic/([0-9]*)$')
1276
        nums = [int(comic_num_re.match(link['href']).groups()[0])
1277
                for link in get_soup_at_url(cls.url).find_all('a', href=comic_num_re)]
1278
        first, last = min(nums), max(nums)
1279
        if last_comic:
1280
            first = last_comic['num'] + 1
1281
        for num in range(first, last + 1):
1282
            url = urljoin_wrapper(cls.url, '/comic/%d' % num)
1283
            soup = get_soup_at_url(url)
1284
            imgs = list(
1285
                reversed(soup.find_all('img', src=re.compile('^/images/comics/'))))
1286
            description = soup.find('meta', attrs={'name': 'description'})['content']
1287
            yield {
1288
                'url': url,
1289
                'num': num,
1290
                'texts': '  '.join(t for t in (i.get('title') for i in imgs) if t),
1291
                'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1292
                'description': description,
1293
            }
1294
1295
1296
class DinosaurComics(GenericListableComic):
1297
    """Class to retrieve Dinosaur Comics comics."""
1298
    name = 'dinosaur'
1299
    long_name = 'Dinosaur Comics'
1300
    url = 'http://www.qwantz.com'
1301
    get_url_from_archive_element = get_href
1302
    comic_link_re = re.compile('^%s/index.php\\?comic=([0-9]*)$' % url)
1303
1304
    @classmethod
1305
    def get_archive_elements(cls):
1306
        archive_url = urljoin_wrapper(cls.url, 'archive.php')
1307
        # first link is random -> skip it
1308
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re)[1:])
1309
1310
    @classmethod
1311
    def get_comic_info(cls, soup, link):
1312
        """Get information about a particular comics."""
1313
        url = cls.get_url_from_archive_element(link)
1314
        num = int(cls.comic_link_re.match(url).groups()[0])
1315
        date_str = link.string
1316
        text = link.next_sibling.string
1317
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1318
        comic_img_re = re.compile('^%s/comics/' % cls.url)
1319
        img = soup.find('img', src=comic_img_re)
1320
        return {
1321
            'month': day.month,
1322
            'year': day.year,
1323
            'day': day.day,
1324
            'img': [img.get('src')],
1325
            'title': img.get('title'),
1326
            'text': text,
1327 View Code Duplication
            'num': num,
1328
        }
1329
1330
1331
class ButterSafe(GenericListableComic):
1332
    """Class to retrieve Butter Safe comics."""
1333
    name = 'butter'
1334
    long_name = 'ButterSafe'
1335
    url = 'http://buttersafe.com'
1336
    get_url_from_archive_element = get_href
1337
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)
1338
1339
    @classmethod
1340
    def get_archive_elements(cls):
1341
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1342
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
1343
1344
    @classmethod
1345
    def get_comic_info(cls, soup, link):
1346
        """Get information about a particular comics."""
1347
        url = cls.get_url_from_archive_element(link)
1348
        title = link.string
1349
        year, month, day = [int(s) for s in cls.comic_link_re.match(url).groups()]
1350
        img = soup.find('div', id='comic').find('img')
1351
        assert img['alt'] == title
1352
        return {
1353
            'title': title,
1354
            'day': day,
1355
            'month': month,
1356
            'year': year,
1357
            'img': [img['src']],
1358
        }
1359
1360
1361
class CalvinAndHobbes(GenericComic):
1362
    """Class to retrieve Calvin and Hobbes comics."""
1363
    # Also on http://www.gocomics.com/calvinandhobbes/
1364
    name = 'calvin'
1365
    long_name = 'Calvin and Hobbes'
1366
    # This is not through any official webpage but eh...
1367
    url = 'http://marcel-oehler.marcellosendos.ch/comics/ch/'
1368
1369
    @classmethod
1370
    def get_next_comic(cls, last_comic):
1371
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1372
        last_date = get_date_for_comic(
1373
            last_comic) if last_comic else date(1985, 11, 1)
1374
        link_re = re.compile('^([0-9]*)/([0-9]*)/')
1375
        img_re = re.compile('')
1376
        for link in get_soup_at_url(cls.url).find_all('a', href=link_re):
1377
            url = link['href']
1378
            year, month = link_re.match(url).groups()
1379
            if date(int(year), int(month), 1) + timedelta(days=31) >= last_date:
1380
                img_re = re.compile('^%s%s([0-9]*)' % (year, month))
1381
                month_url = urljoin_wrapper(cls.url, url)
1382
                for img in get_soup_at_url(month_url).find_all('img', src=img_re):
1383
                    img_src = img['src']
1384
                    day = int(img_re.match(img_src).groups()[0])
1385
                    comic_date = date(int(year), int(month), day)
1386
                    if comic_date > last_date:
1387
                        yield {
1388
                            'url': month_url,
1389
                            'year': int(year),
1390
                            'month': int(month),
1391
                            'day': int(day),
1392
                            'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)],
1393 View Code Duplication
                        }
1394
                        last_date = comic_date
1395
1396
1397
class AbstruseGoose(GenericListableComic):
1398
    """Class to retrieve AbstruseGoose Comics."""
1399
    name = 'abstruse'
1400
    long_name = 'Abstruse Goose'
1401
    url = 'http://abstrusegoose.com'
1402
    get_url_from_archive_element = get_href
1403
    comic_url_re = re.compile('^%s/([0-9]*)$' % url)
1404
    comic_img_re = re.compile('^%s/strips/.*' % url)
1405
1406
    @classmethod
1407
    def get_archive_elements(cls):
1408
        archive_url = urljoin_wrapper(cls.url, 'archive')
1409
        return get_soup_at_url(archive_url).find_all('a', href=cls.comic_url_re)
1410
1411
    @classmethod
1412
    def get_comic_info(cls, soup, archive_elt):
1413
        comic_url = cls.get_url_from_archive_element(archive_elt)
1414
        num = int(cls.comic_url_re.match(comic_url).groups()[0])
1415
        return {
1416
            'num': num,
1417
            'title': archive_elt.string,
1418
            'img': [soup.find('img', src=cls.comic_img_re)['src']]
1419
        }
1420
1421
1422
class PhDComics(GenericNavigableComic):
1423
    """Class to retrieve PHD Comics."""
1424
    name = 'phd'
1425
    long_name = 'PhD Comics'
1426
    url = 'http://phdcomics.com/comics/archive.php'
1427
    get_url_from_link = join_cls_url_to_href
1428
1429
    @classmethod
1430
    def get_first_comic_link(cls):
1431
        """Get link to first comics."""
1432
        return get_soup_at_url(cls.url).find('img', src='images/first_button.gif').parent
1433
1434
    @classmethod
1435
    def get_navi_link(cls, last_soup, next_):
1436
        """Get link to next or previous comic."""
1437
        img = last_soup.find('img', src='images/next_button.gif' if next_ else 'images/prev_button.gif')
1438
        return None if img is None else img.parent
1439
1440
    @classmethod
1441
    def get_comic_info(cls, soup, link):
1442
        """Get information about a particular comics."""
1443
        date_str = soup.find('font', face='Arial,Helvetica,Geneva,Swiss,SunSans-Regular', color='white').string.strip()
1444
        try:
1445
            day = string_to_date(date_str, '%m/%d/%Y')
1446
        except ValueError:
1447
            print("Invalid date %s" % date_str)
1448
            day = date.today()
1449
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
1450
        return {
1451
            'year': day.year,
1452
            'month': day.month,
1453
            'day': day.day,
1454
            'img': [soup.find('img', id='comic')['src']],
1455
            'title': title,
1456
        }
1457
1458
1459
class Octopuns(GenericEmptyComic, GenericNavigableComic):
1460
    """Class to retrieve Octopuns comics."""
1461
    # Also on http://octopuns.tumblr.com
1462
    name = 'octopuns'
1463
    long_name = 'Octopuns'
1464
    url = 'http://www.octopuns.net'
1465
1466
    @classmethod
1467
    def get_first_comic_link(cls):
1468
        """Get link to first comics."""
1469
        return get_soup_at_url(cls.url).find('img', src=re.compile('.*/First.png')).parent
1470
1471
    @classmethod
1472
    def get_navi_link(cls, last_soup, next_):
1473
        """Get link to next or previous comic."""
1474
        link = last_soup.find('img', src=re.compile('.*/Next.png' if next_ else '.*/Back.png')).parent
1475
        return None if link.get('href') is None else link
1476
1477
    @classmethod
1478
    def get_comic_info(cls, soup, link):
1479
        """Get information about a particular comics."""
1480
        title = soup.find('h3', class_='post-title entry-title').string
1481
        date_str = soup.find('h2', class_='date-header').string
1482
        day = string_to_date(date_str, "%A, %B %d, %Y")
1483
        imgs = soup.find_all('link', rel='image_src')
1484
        return {
1485
            'img': [i['href'] for i in imgs],
1486
            'title': title,
1487
            'day': day.day,
1488
            'month': day.month,
1489
            'year': day.year,
1490
        }
1491
1492
1493
class Quarktees(GenericNavigableComic):
1494
    """Class to retrieve the Quarktees comics."""
1495
    name = 'quarktees'
1496
    long_name = 'Quarktees'
1497
    url = 'http://www.quarktees.com/blogs/news'
1498
    get_url_from_link = join_cls_url_to_href
1499
    get_first_comic_link = simulate_first_link
1500
    first_url = 'http://www.quarktees.com/blogs/news/12486621-coming-soon'
1501
1502
    @classmethod
1503
    def get_navi_link(cls, last_soup, next_):
1504
        """Get link to next or previous comic."""
1505
        return last_soup.find('a', id='article-next' if next_ else 'article-prev')
1506
1507
    @classmethod
1508
    def get_comic_info(cls, soup, link):
1509
        """Get information about a particular comics."""
1510
        title = soup.find('meta', property='og:title')['content']
1511
        article = soup.find('div', class_='single-article')
1512
        imgs = article.find_all('img')
1513
        return {
1514
            'title': title,
1515
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1516
        }
1517
1518
1519
class OverCompensating(GenericNavigableComic):
1520
    """Class to retrieve the Over Compensating comics."""
1521
    name = 'compensating'
1522
    long_name = 'Over Compensating'
1523
    url = 'http://www.overcompensating.com'
1524
    get_url_from_link = join_cls_url_to_href
1525
1526
    @classmethod
1527
    def get_first_comic_link(cls):
1528
        """Get link to first comics."""
1529
        return get_soup_at_url(cls.url).find('a', href=re.compile('comic=1$'))
1530
1531
    @classmethod
1532
    def get_navi_link(cls, last_soup, next_):
1533
        """Get link to next or previous comic."""
1534
        return last_soup.find('a', title='next comic' if next_ else 'go back already')
1535
1536
    @classmethod
1537
    def get_comic_info(cls, soup, link):
1538
        """Get information about a particular comics."""
1539
        img_src_re = re.compile('^/oc/comics/.*')
1540
        comic_num_re = re.compile('.*comic=([0-9]*)$')
1541
        comic_url = cls.get_url_from_link(link)
1542
        num = int(comic_num_re.match(comic_url).groups()[0])
1543
        img = soup.find('img', src=img_src_re)
1544
        return {
1545
            'num': num,
1546
            'img': [urljoin_wrapper(comic_url, img['src'])],
1547
            'title': img.get('title')
1548
        }
1549
1550
1551
class Oglaf(GenericNavigableComic):
1552
    """Class to retrieve Oglaf comics."""
1553
    name = 'oglaf'
1554
    long_name = 'Oglaf [NSFW]'
1555
    url = 'http://oglaf.com'
1556
    _categories = ('NSFW', )
1557
    get_url_from_link = join_cls_url_to_href
1558
1559
    @classmethod
1560
    def get_first_comic_link(cls):
1561
        """Get link to first comics."""
1562
        return get_soup_at_url(cls.url).find("div", id="st").parent
1563
1564
    @classmethod
1565
    def get_navi_link(cls, last_soup, next_):
1566
        """Get link to next or previous comic."""
1567
        div = last_soup.find("div", id="nx" if next_ else "pv")
1568
        return div.parent if div else None
1569
1570
    @classmethod
1571
    def get_comic_info(cls, soup, link):
1572
        """Get information about a particular comics."""
1573
        title = soup.find('title').string
1574
        title_imgs = soup.find('div', id='tt').find_all('img')
1575
        assert len(title_imgs) == 1
1576
        strip_imgs = soup.find_all('img', id='strip')
1577
        assert len(strip_imgs) == 1
1578
        imgs = title_imgs + strip_imgs
1579
        desc = ' '.join(i['title'] for i in imgs)
1580
        return {
1581
            'title': title,
1582
            'img': [i['src'] for i in imgs],
1583
            'description': desc,
1584
        }
1585
1586
1587
class ScandinaviaAndTheWorld(GenericNavigableComic):
1588
    """Class to retrieve Scandinavia And The World comics."""
1589
    name = 'satw'
1590
    long_name = 'Scandinavia And The World'
1591
    url = 'http://satwcomic.com'
1592
    get_first_comic_link = simulate_first_link
1593
    first_url = 'http://satwcomic.com/sweden-denmark-and-norway'
1594
1595
    @classmethod
1596
    def get_navi_link(cls, last_soup, next_):
1597
        """Get link to next or previous comic."""
1598
        return last_soup.find('a', accesskey='n' if next_ else 'p')
1599
1600
    @classmethod
1601
    def get_comic_info(cls, soup, link):
1602
        """Get information about a particular comics."""
1603
        title = soup.find('meta', attrs={'name': 'twitter:label1'})['content']
1604
        desc = soup.find('meta', property='og:description')['content']
1605
        imgs = soup.find_all('img', itemprop="image")
1606
        return {
1607
            'title': title,
1608
            'description': desc,
1609
            'img': [i['src'] for i in imgs],
1610
        }
1611
1612
1613
class SomethingOfThatIlk(GenericEmptyComic):  # Does not exist anymore
1614
    """Class to retrieve the Something Of That Ilk comics."""
1615
    name = 'somethingofthatilk'
1616
    long_name = 'Something Of That Ilk'
1617
    url = 'http://www.somethingofthatilk.com'
1618
1619
1620
class InfiniteMonkeyBusiness(GenericNavigableComic):
1621
    """Generic class to retrieve InfiniteMonkeyBusiness comics."""
1622
    name = 'monkey'
1623
    long_name = 'Infinite Monkey Business'
1624
    url = 'http://infinitemonkeybusiness.net'
1625
    get_navi_link = get_a_navi_comicnavnext_navinext
1626
    get_first_comic_link = simulate_first_link
1627
    first_url = 'http://infinitemonkeybusiness.net/comic/pillory/'
1628
1629
    @classmethod
1630
    def get_comic_info(cls, soup, link):
1631
        """Get information about a particular comics."""
1632
        title = soup.find('meta', property='og:title')['content']
1633
        imgs = soup.find('div', id='comic').find_all('img')
1634
        return {
1635
            'title': title,
1636
            'img': [i['src'] for i in imgs],
1637
        }
1638
1639
1640
class Wondermark(GenericListableComic):
1641
    """Class to retrieve the Wondermark comics."""
1642
    name = 'wondermark'
1643
    long_name = 'Wondermark'
1644
    url = 'http://wondermark.com'
1645
    get_url_from_archive_element = get_href
1646
1647
    @classmethod
1648
    def get_archive_elements(cls):
1649
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1650 View Code Duplication
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1651
1652
    @classmethod
1653
    def get_comic_info(cls, soup, link):
1654
        """Get information about a particular comics."""
1655
        date_str = soup.find('div', class_='postdate').find('em').string
1656
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1657
        div = soup.find('div', id='comic')
1658
        if div:
1659
            img = div.find('img')
1660
            img_src = [img['src']]
1661
            alt = img['alt']
1662
            assert alt == img['title']
1663
            title = soup.find('meta', property='og:title')['content']
1664
        else:
1665
            img_src = []
1666
            alt = ''
1667
            title = ''
1668
        return {
1669
            'month': day.month,
1670
            'year': day.year,
1671
            'day': day.day,
1672
            'img': img_src,
1673
            'title': title,
1674
            'alt': alt,
1675
            'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')),
1676
        }
1677
1678
1679
class WarehouseComic(GenericNavigableComic):
1680
    """Class to retrieve Warehouse Comic comics."""
1681
    name = 'warehouse'
1682
    long_name = 'Warehouse Comic'
1683
    url = 'http://warehousecomic.com'
1684
    get_first_comic_link = get_a_navi_navifirst
1685
    get_navi_link = get_link_rel_next
1686
1687
    @classmethod
1688
    def get_comic_info(cls, soup, link):
1689
        """Get information about a particular comics."""
1690
        title = soup.find('h2', class_='post-title').string
1691
        date_str = soup.find('span', class_='post-date').string
1692
        day = string_to_date(date_str, "%B %d, %Y")
1693
        imgs = soup.find('div', id='comic').find_all('img')
1694
        return {
1695
            'img': [i['src'] for i in imgs],
1696
            'title': title,
1697
            'day': day.day,
1698
            'month': day.month,
1699
            'year': day.year,
1700
        }
1701
1702
1703
class JustSayEh(GenericNavigableComic):
1704
    """Class to retrieve Just Say Eh comics."""
1705
    # Also on http//tapastic.com/series/Just-Say-Eh
1706
    name = 'justsayeh'
1707
    long_name = 'Just Say Eh'
1708
    url = 'http://www.justsayeh.com'
1709
    get_first_comic_link = get_a_navi_navifirst
1710
    get_navi_link = get_a_navi_comicnavnext_navinext
1711
1712
    @classmethod
1713
    def get_comic_info(cls, soup, link):
1714
        """Get information about a particular comics."""
1715
        title = soup.find('h2', class_='post-title').string
1716
        imgs = soup.find("div", id="comic").find_all("img")
1717
        assert all(i['alt'] == i['title'] for i in imgs)
1718
        alt = imgs[0]['alt']
1719
        return {
1720
            'img': [i['src'] for i in imgs],
1721
            'title': title,
1722
            'alt': alt,
1723
        }
1724
1725
1726
class MouseBearComedy(GenericNavigableComic):
1727
    """Class to retrieve Mouse Bear Comedy comics."""
1728
    # Also on http://mousebearcomedy.tumblr.com
1729
    name = 'mousebear'
1730
    long_name = 'Mouse Bear Comedy'
1731
    url = 'http://www.mousebearcomedy.com'
1732
    get_first_comic_link = get_a_navi_navifirst
1733
    get_navi_link = get_a_navi_comicnavnext_navinext
1734
1735
    @classmethod
1736
    def get_comic_info(cls, soup, link):
1737
        """Get information about a particular comics."""
1738
        title = soup.find('h2', class_='post-title').string
1739
        author = soup.find("span", class_="post-author").find("a").string
1740
        date_str = soup.find("span", class_="post-date").string
1741
        day = string_to_date(date_str, '%B %d, %Y')
1742
        imgs = soup.find("div", id="comic").find_all("img")
1743
        assert all(i['alt'] == i['title'] == title for i in imgs)
1744
        return {
1745
            'day': day.day,
1746
            'month': day.month,
1747
            'year': day.year,
1748
            'img': [i['src'] for i in imgs],
1749
            'title': title,
1750
            'author': author,
1751
        }
1752
1753
1754
class BigFootJustice(GenericNavigableComic):
1755
    """Class to retrieve Big Foot Justice comics."""
1756
    # Also on http://tapastic.com/series/bigfoot-justice
1757
    name = 'bigfoot'
1758
    long_name = 'Big Foot Justice'
1759
    url = 'http://bigfootjustice.com'
1760
    get_first_comic_link = get_a_navi_navifirst
1761
    get_navi_link = get_a_navi_comicnavnext_navinext
1762
1763
    @classmethod
1764
    def get_comic_info(cls, soup, link):
1765
        """Get information about a particular comics."""
1766
        imgs = soup.find('div', id='comic').find_all('img')
1767 View Code Duplication
        assert all(i['title'] == i['alt'] for i in imgs)
1768
        title = ' '.join(i['title'] for i in imgs)
1769
        return {
1770
            'img': [i['src'] for i in imgs],
1771
            'title': title,
1772
        }
1773
1774
1775
class RespawnComic(GenericNavigableComic):
1776
    """Class to retrieve Respawn Comic."""
1777
    # Also on http://respawncomic.tumblr.com
1778
    name = 'respawn'
1779
    long_name = 'Respawn Comic'
1780
    url = 'http://respawncomic.com '
1781
    _categories = ('RESPAWN', )
1782
    get_navi_link = get_a_rel_next
1783
    get_first_comic_link = simulate_first_link
1784
    first_url = 'http://respawncomic.com/comic/c0001/'
1785
1786
    @classmethod
1787
    def get_comic_info(cls, soup, link):
1788
        """Get information about a particular comics."""
1789
        title = soup.find('meta', property='og:title')['content']
1790
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1791
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1792
        date_str = date_str[:10]
1793
        day = string_to_date(date_str, "%Y-%m-%d")
1794
        imgs = soup.find_all('meta', property='og:image')
1795
        skip_imgs = {
1796
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1797
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1798
        }
1799
        return {
1800
            'title': title,
1801
            'author': author,
1802
            'day': day.day,
1803
            'month': day.month,
1804
            'year': day.year,
1805
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1806
        }
1807
1808
1809
class SafelyEndangered(GenericNavigableComic):
1810
    """Class to retrieve Safely Endangered comics."""
1811
    # Also on http://tumblr.safelyendangered.com
1812
    name = 'endangered'
1813
    long_name = 'Safely Endangered'
1814
    url = 'http://www.safelyendangered.com'
1815
    get_navi_link = get_link_rel_next
1816
    get_first_comic_link = simulate_first_link
1817
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1818
1819
    @classmethod
1820
    def get_comic_info(cls, soup, link):
1821
        """Get information about a particular comics."""
1822
        title = soup.find('h2', class_='post-title').string
1823
        date_str = soup.find('span', class_='post-date').string
1824
        day = string_to_date(date_str, '%B %d, %Y')
1825
        imgs = soup.find('div', id='comic').find_all('img')
1826
        alt = imgs[0]['alt']
1827
        assert all(i['alt'] == i['title'] for i in imgs)
1828
        return {
1829
            'day': day.day,
1830
            'month': day.month,
1831
            'year': day.year,
1832
            'img': [i['src'] for i in imgs],
1833
            'title': title,
1834
            'alt': alt,
1835
        }
1836
1837
1838
class PicturesInBoxes(GenericNavigableComic):
1839
    """Class to retrieve Pictures In Boxes comics."""
1840
    # Also on http://picturesinboxescomic.tumblr.com
1841
    name = 'picturesinboxes'
1842
    long_name = 'Pictures in Boxes'
1843
    url = 'http://www.picturesinboxes.com'
1844
    get_navi_link = get_a_navi_navinext
1845
    get_first_comic_link = simulate_first_link
1846
    first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'
1847
1848
    @classmethod
1849
    def get_comic_info(cls, soup, link):
1850
        """Get information about a particular comics."""
1851
        title = soup.find('h2', class_='post-title').string
1852
        author = soup.find("span", class_="post-author").find("a").string
1853
        date_str = soup.find('span', class_='post-date').string
1854
        day = string_to_date(date_str, '%B %d, %Y')
1855
        imgs = soup.find('div', class_='comicpane').find_all('img')
1856
        assert imgs
1857
        assert all(i['title'] == i['alt'] == title for i in imgs)
1858
        return {
1859
            'day': day.day,
1860
            'month': day.month,
1861
            'year': day.year,
1862
            'img': [i['src'] for i in imgs],
1863
            'title': title,
1864
            'author': author,
1865
        }
1866
1867
1868
class Penmen(GenericEmptyComic):
1869
    """Class to retrieve Penmen comics."""
1870
    name = 'penmen'
1871
    long_name = 'Penmen'
1872
    url = 'http://penmen.com'
1873
1874
1875
class TheDoghouseDiaries(GenericNavigableComic):
1876
    """Class to retrieve The Dog House Diaries comics."""
1877
    name = 'doghouse'
1878
    long_name = 'The Dog House Diaries'
1879
    url = 'http://thedoghousediaries.com'
1880
1881
    @classmethod
1882
    def get_first_comic_link(cls):
1883
        """Get link to first comics."""
1884
        return get_soup_at_url(cls.url).find('a', id='firstlink')
1885
1886
    @classmethod
1887
    def get_navi_link(cls, last_soup, next_):
1888
        """Get link to next or previous comic."""
1889
        return last_soup.find('a', id='nextlink' if next_ else 'previouslink')
1890
1891
    @classmethod
1892
    def get_comic_info(cls, soup, link):
1893
        """Get information about a particular comics."""
1894
        comic_img_re = re.compile('^dhdcomics/.*')
1895
        img = soup.find('img', src=comic_img_re)
1896
        comic_url = cls.get_url_from_link(link)
1897
        return {
1898
            'title': soup.find('h2', id='titleheader').string,
1899
            'title2': soup.find('div', id='subtext').string,
1900
            'alt': img.get('title'),
1901
            'img': [urljoin_wrapper(comic_url, img['src'].strip())],
1902
            'num': int(comic_url.split('/')[-1]),
1903
        }
1904
1905
1906
class InvisibleBread(GenericListableComic):
1907
    """Class to retrieve Invisible Bread comics."""
1908
    # Also on http://www.gocomics.com/invisible-bread
1909
    name = 'invisiblebread'
1910
    long_name = 'Invisible Bread'
1911
    url = 'http://invisiblebread.com'
1912
1913
    @classmethod
1914 View Code Duplication
    def get_archive_elements(cls):
1915
        archive_url = urljoin_wrapper(cls.url, 'archives/')
1916
        return reversed(get_soup_at_url(archive_url).find_all('td', class_='archive-title'))
1917
1918
    @classmethod
1919
    def get_url_from_archive_element(cls, td):
1920
        return td.find('a')['href']
1921
1922
    @classmethod
1923
    def get_comic_info(cls, soup, td):
1924
        """Get information about a particular comics."""
1925
        url = cls.get_url_from_archive_element(td)
1926
        title = td.find('a').string
1927
        month_and_day = td.previous_sibling.string
1928
        link_re = re.compile('^%s/([0-9]+)/' % cls.url)
1929
        year = link_re.match(url).groups()[0]
1930
        date_str = month_and_day + ' ' + year
1931
        day = string_to_date(date_str, '%b %d %Y')
1932
        imgs = [soup.find('div', id='comic').find('img')]
1933
        assert len(imgs) == 1
1934
        assert all(i['title'] == i['alt'] == title for i in imgs)
1935
        return {
1936
            'month': day.month,
1937
            'year': day.year,
1938
            'day': day.day,
1939
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1940
            'title': title,
1941
        }
1942
1943
1944
class DiscoBleach(GenericEmptyComic):  # Does not work anymore
1945
    """Class to retrieve Disco Bleach Comics."""
1946
    name = 'discobleach'
1947
    long_name = 'Disco Bleach'
1948
    url = 'http://discobleach.com'
1949
1950
1951
class TubeyToons(GenericEmptyComic):  # Does not work anymore
1952
    """Class to retrieve TubeyToons comics."""
1953
    # Also on http://tapastic.com/series/Tubey-Toons
1954
    # Also on http://tubeytoons.tumblr.com
1955
    name = 'tubeytoons'
1956
    long_name = 'Tubey Toons'
1957
    url = 'http://tubeytoons.com'
1958
    _categories = ('TUNEYTOONS', )
1959
1960
1961
class CompletelySeriousComics(GenericNavigableComic):
1962
    """Class to retrieve Completely Serious comics."""
1963
    name = 'completelyserious'
1964
    long_name = 'Completely Serious Comics'
1965
    url = 'http://completelyseriouscomics.com'
1966
    get_first_comic_link = get_a_navi_navifirst
1967
    get_navi_link = get_a_navi_navinext
1968
1969
    @classmethod
1970
    def get_comic_info(cls, soup, link):
1971
        """Get information about a particular comics."""
1972
        title = soup.find('h2', class_='post-title').string
1973
        author = soup.find('span', class_='post-author').contents[1].string
1974
        date_str = soup.find('span', class_='post-date').string
1975
        day = string_to_date(date_str, '%B %d, %Y')
1976
        imgs = soup.find('div', class_='comicpane').find_all('img')
1977
        assert imgs
1978
        alt = imgs[0]['title']
1979
        assert all(i['title'] == i['alt'] == alt for i in imgs)
1980
        return {
1981
            'month': day.month,
1982
            'year': day.year,
1983
            'day': day.day,
1984
            'img': [i['src'] for i in imgs],
1985
            'title': title,
1986
            'alt': alt,
1987
            'author': author,
1988
        }
1989
1990
1991
class PoorlyDrawnLines(GenericListableComic):
1992
    """Class to retrieve Poorly Drawn Lines comics."""
1993
    # Also on http://pdlcomics.tumblr.com
1994
    name = 'poorlydrawn'
1995
    long_name = 'Poorly Drawn Lines'
1996
    url = 'http://poorlydrawnlines.com'
1997
    _categories = ('POORLYDRAWN', )
1998
    get_url_from_archive_element = get_href
1999
2000
    @classmethod
2001
    def get_comic_info(cls, soup, link):
2002
        """Get information about a particular comics."""
2003
        imgs = soup.find('div', class_='post').find_all('img')
2004
        assert len(imgs) <= 1
2005
        return {
2006
            'img': [i['src'] for i in imgs],
2007
            'title': imgs[0].get('title', "") if imgs else "",
2008
        }
2009
2010
    @classmethod
2011
    def get_archive_elements(cls):
2012
        archive_url = urljoin_wrapper(cls.url, 'archive')
2013
        url_re = re.compile('^%s/comic/.' % cls.url)
2014
        return reversed(get_soup_at_url(archive_url).find_all('a', href=url_re))
2015
2016
2017
class LoadingComics(GenericNavigableComic):
2018
    """Class to retrieve Loading Artist comics."""
2019
    name = 'loadingartist'
2020
    long_name = 'Loading Artist'
2021
    url = 'http://www.loadingartist.com/latest'
2022
2023
    @classmethod
2024
    def get_first_comic_link(cls):
2025
        """Get link to first comics."""
2026
        return get_soup_at_url(cls.url).find('a', title="First")
2027
2028
    @classmethod
2029
    def get_navi_link(cls, last_soup, next_):
2030
        """Get link to next or previous comic."""
2031
        return last_soup.find('a', title='Next' if next_ else 'Previous')
2032
2033
    @classmethod
2034
    def get_comic_info(cls, soup, link):
2035
        """Get information about a particular comics."""
2036
        title = soup.find('h1').string
2037
        date_str = soup.find('span', class_='date').string.strip()
2038
        day = string_to_date(date_str, "%B %d, %Y")
2039
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
2040
        return {
2041
            'title': title,
2042
            'img': [i['src'] for i in imgs],
2043
            'month': day.month,
2044
            'year': day.year,
2045
            'day': day.day,
2046
        }
2047
2048
2049
class ChuckleADuck(GenericNavigableComic):
2050
    """Class to retrieve Chuckle-A-Duck comics."""
2051
    name = 'chuckleaduck'
2052
    long_name = 'Chuckle-A-duck'
2053
    url = 'http://chuckleaduck.com'
2054
    get_first_comic_link = get_div_navfirst_a
2055
    get_navi_link = get_link_rel_next
2056
2057
    @classmethod
2058
    def get_comic_info(cls, soup, link):
2059
        """Get information about a particular comics."""
2060
        date_str = soup.find('span', class_='post-date').string
2061
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
2062
        author = soup.find('span', class_='post-author').string
2063
        div = soup.find('div', id='comic')
2064
        imgs = div.find_all('img') if div else []
2065
        title = imgs[0]['title'] if imgs else ""
2066
        assert all(i['title'] == i['alt'] == title for i in imgs)
2067
        return {
2068
            'month': day.month,
2069
            'year': day.year,
2070
            'day': day.day,
2071
            'img': [i['src'] for i in imgs],
2072
            'title': title,
2073
            'author': author,
2074
        }
2075
2076
2077
class DepressedAlien(GenericNavigableComic):
2078
    """Class to retrieve Depressed Alien Comics."""
2079
    name = 'depressedalien'
2080
    long_name = 'Depressed Alien'
2081
    url = 'http://depressedalien.com'
2082
    get_url_from_link = join_cls_url_to_href
2083
2084
    @classmethod
2085
    def get_first_comic_link(cls):
2086
        """Get link to first comics."""
2087
        return get_soup_at_url(cls.url).find('img', attrs={'name': 'beginArrow'}).parent
2088
2089
    @classmethod
2090
    def get_navi_link(cls, last_soup, next_):
2091
        """Get link to next or previous comic."""
2092
        return last_soup.find('img', attrs={'name': 'rightArrow' if next_ else 'leftArrow'}).parent
2093
2094
    @classmethod
2095
    def get_comic_info(cls, soup, link):
2096
        """Get information about a particular comics."""
2097
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2098
        imgs = soup.find_all('meta', property='og:image')
2099
        return {
2100
            'title': title,
2101
            'img': [i['content'] for i in imgs],
2102
        }
2103
2104
2105
class ThingsInSquares(GenericListableComic):
2106
    """Class to retrieve Things In Squares comics."""
2107
    # This can be retrieved in other languages
2108
    # Also on https://tapastic.com/series/Things-in-Squares
2109
    name = 'squares'
2110
    long_name = 'Things in squares'
2111
    url = 'http://www.thingsinsquares.com'
2112
2113
    @classmethod
2114
    def get_comic_info(cls, soup, tr):
2115
        """Get information about a particular comics."""
2116
        _, td2, td3 = tr.find_all('td')
2117
        a = td2.find('a')
2118
        date_str = td3.string
2119
        day = string_to_date(date_str, "%m.%d.%y")
2120
        title = a.string
2121
        title2 = soup.find('meta', property='og:title')['content']
2122
        desc = soup.find('meta', property='og:description')
2123
        description = desc['content'] if desc else ''
2124
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2125
        imgs = soup.find('div', class_='entry-content').find_all('img')
2126
        return {
2127
            'day': day.day,
2128
            'month': day.month,
2129
            'year': day.year,
2130
            'title': title,
2131
            'title2': title2,
2132
            'description': description,
2133
            'tags': tags,
2134
            'img': [i['src'] for i in imgs],
2135
            'alt': ' '.join(i['alt'] for i in imgs),
2136
        }
2137
2138
    @classmethod
2139
    def get_url_from_archive_element(cls, tr):
2140
        _, td2, td3 = tr.find_all('td')
2141
        return td2.find('a')['href']
2142
2143
    @classmethod
2144
    def get_archive_elements(cls):
2145
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
2146
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2147
2148
2149
class HappleTea(GenericNavigableComic):
2150
    """Class to retrieve Happle Tea Comics."""
2151
    name = 'happletea'
2152
    long_name = 'Happle Tea'
2153
    url = 'http://www.happletea.com'
2154
    get_first_comic_link = get_a_navi_navifirst
2155
    get_navi_link = get_link_rel_next
2156
2157
    @classmethod
2158
    def get_comic_info(cls, soup, link):
2159
        """Get information about a particular comics."""
2160
        imgs = soup.find('div', id='comic').find_all('img')
2161
        post = soup.find('div', class_='post-content')
2162
        title = post.find('h2', class_='post-title').string
2163
        author = post.find('a', rel='author').string
2164
        date_str = post.find('span', class_='post-date').string
2165
        day = string_to_date(date_str, "%B %d, %Y")
2166
        assert all(i['alt'] == i['title'] for i in imgs)
2167
        return {
2168
            'title': title,
2169
            'img': [i['src'] for i in imgs],
2170
            'alt': ''.join(i['alt'] for i in imgs),
2171
            'month': day.month,
2172
            'year': day.year,
2173
            'day': day.day,
2174
            'author': author,
2175
        }
2176
2177
2178
class FatAwesomeComics(GenericNavigableComic):
2179
    """Class to retrieve Fat Awesome Comics."""
2180
    # Also on http://fatawesomecomedy.tumblr.com
2181
    name = 'fatawesome'
2182
    long_name = 'Fat Awesome'
2183
    url = 'http://fatawesome.com/comics'
2184
    get_navi_link = get_a_rel_next
2185
    get_first_comic_link = simulate_first_link
2186
    first_url = 'http://fatawesome.com/shortbus/'
2187
2188
    @classmethod
2189
    def get_comic_info(cls, soup, link):
2190
        """Get information about a particular comics."""
2191
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2192
        description = soup.find('meta', attrs={'name': 'description'})['content']
2193
        tags_prop = soup.find('meta', property='article:tag')
2194
        tags = tags_prop['content'] if tags_prop else ""
2195
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2196
        day = string_to_date(date_str, "%Y-%m-%d")
2197
        imgs = soup.find_all('img', attrs={'data-recalc-dims': "1"})
2198
        assert len(imgs) == 1
2199
        return {
2200
            'title': title,
2201
            'description': description,
2202
            'tags': tags,
2203
            'alt': "".join(i['alt'] for i in imgs),
2204
            'img': [i['src'].rsplit('?', 1)[0] for i in imgs],
2205
            'month': day.month,
2206
            'year': day.year,
2207
            'day': day.day,
2208
        }
2209
2210
2211
class AnythingComic(GenericListableComic):
2212
    """Class to retrieve Anything Comics."""
2213
    # Also on http://tapastic.com/series/anything
2214
    name = 'anythingcomic'
2215
    long_name = 'Anything Comic'
2216
    url = 'http://www.anythingcomic.com'
2217
2218
    @classmethod
2219
    def get_archive_elements(cls):
2220
        archive_url = urljoin_wrapper(cls.url, 'archive/')
2221
        # The first 2 <tr>'s do not correspond to comics
2222
        return get_soup_at_url(archive_url).find('table', id='chapter_table').find_all('tr')[2:]
2223 View Code Duplication
2224
    @classmethod
2225
    def get_url_from_archive_element(cls, tr):
2226
        """Get url corresponding to an archive element."""
2227
        td_num, td_comic, td_date, _ = tr.find_all('td')
2228
        link = td_comic.find('a')
2229
        return urljoin_wrapper(cls.url, link['href'])
2230
2231
    @classmethod
2232
    def get_comic_info(cls, soup, tr):
2233
        """Get information about a particular comics."""
2234
        td_num, td_comic, td_date, _ = tr.find_all('td')
2235
        num = int(td_num.string)
2236
        link = td_comic.find('a')
2237
        title = link.string
2238
        imgs = soup.find_all('img', id='comic_image')
2239
        date_str = td_date.string
2240
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y, %I:%M %p")
2241
        assert len(imgs) == 1
2242
        assert all(i.get('alt') == i.get('title') for i in imgs)
2243
        return {
2244
            'num': num,
2245
            'title': title,
2246
            'alt': imgs[0].get('alt', ''),
2247
            'img': [i['src'] for i in imgs],
2248
            'month': day.month,
2249
            'year': day.year,
2250
            'day': day.day,
2251
        }
2252
2253
2254
class LonnieMillsap(GenericNavigableComic):
2255
    """Class to retrieve Lonnie Millsap's comics."""
2256
    name = 'millsap'
2257
    long_name = 'Lonnie Millsap'
2258
    url = 'http://www.lonniemillsap.com'
2259
    get_navi_link = get_link_rel_next
2260
    get_first_comic_link = simulate_first_link
2261
    first_url = 'http://www.lonniemillsap.com/?p=42'
2262
2263
    @classmethod
2264
    def get_comic_info(cls, soup, link):
2265
        """Get information about a particular comics."""
2266
        title = soup.find('h2', class_='post-title').string
2267
        post = soup.find('div', class_='post-content')
2268
        author = post.find("span", class_="post-author").find("a").string
2269
        date_str = post.find("span", class_="post-date").string
2270
        day = string_to_date(date_str, "%B %d, %Y")
2271
        imgs = post.find("div", class_="entry").find_all("img")
2272
        return {
2273
            'title': title,
2274
            'author': author,
2275
            'img': [i['src'] for i in imgs],
2276
            'month': day.month,
2277
            'year': day.year,
2278
            'day': day.day,
2279
        }
2280
2281
2282 View Code Duplication
class LinsEditions(GenericNavigableComic):
2283
    """Class to retrieve L.I.N.S. Editions comics."""
2284
    # Also on http://linscomics.tumblr.com
2285
    # Now on https://warandpeas.com
2286
    name = 'lins'
2287
    long_name = 'L.I.N.S. Editions'
2288
    url = 'https://linsedition.com'
2289
    _categories = ('LINS', )
2290
    get_navi_link = get_link_rel_next
2291
    get_first_comic_link = simulate_first_link
2292
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2293
2294
    @classmethod
2295
    def get_comic_info(cls, soup, link):
2296
        """Get information about a particular comics."""
2297
        title = soup.find('meta', property='og:title')['content']
2298
        imgs = soup.find_all('meta', property='og:image')
2299
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2300
        day = string_to_date(date_str, "%Y-%m-%d")
2301
        return {
2302
            'title': title,
2303
            'img': [i['content'] for i in imgs],
2304
            'month': day.month,
2305
            'year': day.year,
2306
            'day': day.day,
2307
        }
2308
2309
2310
class ThorsThundershack(GenericNavigableComic):
2311
    """Class to retrieve Thor's Thundershack comics."""
2312
    # Also on http://tapastic.com/series/Thors-Thundershac
2313
    name = 'thor'
2314
    long_name = 'Thor\'s Thundershack'
2315
    url = 'http://www.thorsthundershack.com'
2316
    _categories = ('THOR', )
2317
    get_url_from_link = join_cls_url_to_href
2318
2319
    @classmethod
2320
    def get_first_comic_link(cls):
2321
        """Get link to first comics."""
2322
        return get_soup_at_url(cls.url).find('a', class_='first navlink')
2323
2324
    @classmethod
2325
    def get_navi_link(cls, last_soup, next_):
2326
        """Get link to next or previous comic."""
2327
        for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
2328
            if link['href'] != '/comic':
2329
                return link
2330
        return None
2331
2332
    @classmethod
2333
    def get_comic_info(cls, soup, link):
2334
        """Get information about a particular comics."""
2335
        title = soup.find('meta', attrs={'name': 'description'})["content"]
2336
        description = soup.find('div', itemprop='articleBody').text
2337
        author = soup.find('span', itemprop='author copyrightHolder').string
2338
        imgs = soup.find_all('img', itemprop='image')
2339
        assert all(i['title'] == i['alt'] for i in imgs)
2340
        alt = imgs[0]['alt'] if imgs else ""
2341
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
2342
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2343
        return {
2344
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2345
            'month': day.month,
2346
            'year': day.year,
2347
            'day': day.day,
2348
            'author': author,
2349
            'title': title,
2350
            'alt': alt,
2351
            'description': description,
2352
        }
2353
2354
2355
class GerbilWithAJetpack(GenericNavigableComic):
2356
    """Class to retrieve GerbilWithAJetpack comics."""
2357
    name = 'gerbil'
2358
    long_name = 'Gerbil With A Jetpack'
2359
    url = 'http://gerbilwithajetpack.com'
2360
    get_first_comic_link = get_a_navi_navifirst
2361
    get_navi_link = get_a_rel_next
2362
2363
    @classmethod
2364
    def get_comic_info(cls, soup, link):
2365
        """Get information about a particular comics."""
2366
        title = soup.find('h2', class_='post-title').string
2367
        author = soup.find("span", class_="post-author").find("a").string
2368
        date_str = soup.find("span", class_="post-date").string
2369
        day = string_to_date(date_str, "%B %d, %Y")
2370
        imgs = soup.find("div", id="comic").find_all("img")
2371
        alt = imgs[0]['alt']
2372
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2373
        return {
2374
            'img': [i['src'] for i in imgs],
2375
            'title': title,
2376
            'alt': alt,
2377
            'author': author,
2378
            'day': day.day,
2379
            'month': day.month,
2380
            'year': day.year
2381
        }
2382
2383
2384
class EveryDayBlues(GenericNavigableComic):
2385
    """Class to retrieve EveryDayBlues Comics."""
2386
    name = "blues"
2387
    long_name = "Every Day Blues"
2388
    url = "http://everydayblues.net"
2389
    get_first_comic_link = get_a_navi_navifirst
2390
    get_navi_link = get_link_rel_next
2391
2392
    @classmethod
2393
    def get_comic_info(cls, soup, link):
2394
        """Get information about a particular comics."""
2395
        title = soup.find("h2", class_="post-title").string
2396
        author = soup.find("span", class_="post-author").find("a").string
2397
        date_str = soup.find("span", class_="post-date").string
2398
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2399
        imgs = soup.find("div", id="comic").find_all("img")
2400
        assert all(i['alt'] == i['title'] == title for i in imgs)
2401
        assert len(imgs) <= 1
2402
        return {
2403
            'img': [i['src'] for i in imgs],
2404
            'title': title,
2405
            'author': author,
2406
            'day': day.day,
2407
            'month': day.month,
2408
            'year': day.year
2409
        }
2410
2411
2412
class BiterComics(GenericNavigableComic):
2413
    """Class to retrieve Biter Comics."""
2414
    name = "biter"
2415
    long_name = "Biter Comics"
2416
    url = "http://www.bitercomics.com"
2417
    get_first_comic_link = get_a_navi_navifirst
2418
    get_navi_link = get_link_rel_next
2419
2420
    @classmethod
2421
    def get_comic_info(cls, soup, link):
2422
        """Get information about a particular comics."""
2423
        title = soup.find("h1", class_="entry-title").string
2424
        author = soup.find("span", class_="author vcard").find("a").string
2425
        date_str = soup.find("span", class_="entry-date").string
2426
        day = string_to_date(date_str, "%B %d, %Y")
2427
        imgs = soup.find("div", id="comic").find_all("img")
2428
        assert all(i['alt'] == i['title'] for i in imgs)
2429
        assert len(imgs) == 1
2430
        alt = imgs[0]['alt']
2431
        return {
2432
            'img': [i['src'] for i in imgs],
2433
            'title': title,
2434
            'alt': alt,
2435
            'author': author,
2436
            'day': day.day,
2437
            'month': day.month,
2438
            'year': day.year
2439
        }
2440
2441
2442
class TheAwkwardYeti(GenericNavigableComic):
2443
    """Class to retrieve The Awkward Yeti comics."""
2444
    # Also on http://www.gocomics.com/the-awkward-yeti
2445
    # Also on http://larstheyeti.tumblr.com
2446
    # Also on https://tapastic.com/series/TheAwkwardYeti
2447
    name = 'yeti'
2448
    long_name = 'The Awkward Yeti'
2449
    url = 'http://theawkwardyeti.com'
2450
    _categories = ('YETI', )
2451
    get_first_comic_link = get_a_navi_navifirst
2452
    get_navi_link = get_link_rel_next
2453
2454
    @classmethod
2455
    def get_comic_info(cls, soup, link):
2456
        """Get information about a particular comics."""
2457
        title = soup.find('h2', class_='post-title').string
2458
        date_str = soup.find("span", class_="post-date").string
2459
        day = string_to_date(date_str, "%B %d, %Y")
2460
        imgs = soup.find("div", id="comic").find_all("img")
2461
        assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
2462
        return {
2463
            'img': [i['src'] for i in imgs],
2464
            'title': title,
2465
            'day': day.day,
2466
            'month': day.month,
2467
            'year': day.year
2468
        }
2469
2470
2471
class PleasantThoughts(GenericNavigableComic):
2472
    """Class to retrieve Pleasant Thoughts comics."""
2473
    name = 'pleasant'
2474
    long_name = 'Pleasant Thoughts'
2475
    url = 'http://pleasant-thoughts.com'
2476
    get_first_comic_link = get_a_navi_navifirst
2477
    get_navi_link = get_link_rel_next
2478
2479
    @classmethod
2480
    def get_comic_info(cls, soup, link):
2481
        """Get information about a particular comics."""
2482
        post = soup.find('div', class_='post-content')
2483
        title = post.find('h2', class_='post-title').string
2484
        imgs = post.find("div", class_="entry").find_all("img")
2485
        return {
2486
            'title': title,
2487
            'img': [i['src'] for i in imgs],
2488
        }
2489
2490
2491
class MisterAndMe(GenericNavigableComic):
2492
    """Class to retrieve Mister & Me Comics."""
2493
    # Also on http://www.gocomics.com/mister-and-me
2494
    # Also on https://tapastic.com/series/Mister-and-Me
2495
    name = 'mister'
2496
    long_name = 'Mister & Me'
2497
    url = 'http://www.mister-and-me.com'
2498
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2499
    get_navi_link = get_link_rel_next
2500
2501
    @classmethod
2502
    def get_comic_info(cls, soup, link):
2503
        """Get information about a particular comics."""
2504
        title = soup.find('h2', class_='post-title').string
2505
        author = soup.find("span", class_="post-author").find("a").string
2506
        date_str = soup.find("span", class_="post-date").string
2507
        day = string_to_date(date_str, "%B %d, %Y")
2508
        imgs = soup.find("div", id="comic").find_all("img")
2509
        assert all(i['alt'] == i['title'] for i in imgs)
2510
        assert len(imgs) <= 1
2511
        alt = imgs[0]['alt'] if imgs else ""
2512
        return {
2513
            'img': [i['src'] for i in imgs],
2514
            'title': title,
2515
            'alt': alt,
2516
            'author': author,
2517
            'day': day.day,
2518
            'month': day.month,
2519
            'year': day.year
2520
        }
2521
2522
2523
class LastPlaceComics(GenericNavigableComic):
2524
    """Class to retrieve Last Place Comics."""
2525
    name = 'lastplace'
2526
    long_name = 'Last Place Comics'
2527
    url = "http://lastplacecomics.com"
2528
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2529
    get_navi_link = get_link_rel_next
2530
2531
    @classmethod
2532
    def get_comic_info(cls, soup, link):
2533
        """Get information about a particular comics."""
2534
        title = soup.find('h2', class_='post-title').string
2535
        author = soup.find("span", class_="post-author").find("a").string
2536
        date_str = soup.find("span", class_="post-date").string
2537
        day = string_to_date(date_str, "%B %d, %Y")
2538
        imgs = soup.find("div", id="comic").find_all("img")
2539
        assert all(i['alt'] == i['title'] for i in imgs)
2540
        assert len(imgs) <= 1
2541
        alt = imgs[0]['alt'] if imgs else ""
2542
        return {
2543
            'img': [i['src'] for i in imgs],
2544
            'title': title,
2545
            'alt': alt,
2546
            'author': author,
2547
            'day': day.day,
2548
            'month': day.month,
2549
            'year': day.year
2550
        }
2551
2552
2553
class TalesOfAbsurdity(GenericNavigableComic):
2554
    """Class to retrieve Tales Of Absurdity comics."""
2555
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
2556
    # Also on http://talesofabsurdity.tumblr.com
2557
    name = 'absurdity'
2558
    long_name = 'Tales of Absurdity'
2559
    url = 'http://talesofabsurdity.com'
2560
    _categories = ('ABSURDITY', )
2561
    get_first_comic_link = get_a_navi_navifirst
2562
    get_navi_link = get_a_navi_comicnavnext_navinext
2563
2564
    @classmethod
2565
    def get_comic_info(cls, soup, link):
2566
        """Get information about a particular comics."""
2567
        title = soup.find('h2', class_='post-title').string
2568
        author = soup.find("span", class_="post-author").find("a").string
2569
        date_str = soup.find("span", class_="post-date").string
2570
        day = string_to_date(date_str, "%B %d, %Y")
2571
        imgs = soup.find("div", id="comic").find_all("img")
2572
        assert all(i['alt'] == i['title'] for i in imgs)
2573
        alt = imgs[0]['alt'] if imgs else ""
2574
        return {
2575
            'img': [i['src'] for i in imgs],
2576
            'title': title,
2577
            'alt': alt,
2578
            'author': author,
2579
            'day': day.day,
2580
            'month': day.month,
2581
            'year': day.year
2582
        }
2583
2584
2585
class EndlessOrigami(GenericEmptyComic, GenericNavigableComic):
2586
    """Class to retrieve Endless Origami Comics."""
2587
    name = "origami"
2588
    long_name = "Endless Origami"
2589
    url = "http://endlessorigami.com"
2590
    get_first_comic_link = get_a_navi_navifirst
2591
    get_navi_link = get_link_rel_next
2592
2593
    @classmethod
2594
    def get_comic_info(cls, soup, link):
2595
        """Get information about a particular comics."""
2596
        title = soup.find('h2', class_='post-title').string
2597
        author = soup.find("span", class_="post-author").find("a").string
2598
        date_str = soup.find("span", class_="post-date").string
2599
        day = string_to_date(date_str, "%B %d, %Y")
2600
        imgs = soup.find("div", id="comic").find_all("img")
2601
        assert all(i['alt'] == i['title'] for i in imgs)
2602
        alt = imgs[0]['alt'] if imgs else ""
2603
        return {
2604
            'img': [i['src'] for i in imgs],
2605
            'title': title,
2606
            'alt': alt,
2607
            'author': author,
2608
            'day': day.day,
2609
            'month': day.month,
2610
            'year': day.year
2611
        }
2612
2613
2614
class PlanC(GenericNavigableComic):
2615
    """Class to retrieve Plan C comics."""
2616
    name = 'planc'
2617
    long_name = 'Plan C'
2618
    url = 'http://www.plancomic.com'
2619
    get_first_comic_link = get_a_navi_navifirst
2620
    get_navi_link = get_a_navi_comicnavnext_navinext
2621
2622
    @classmethod
2623
    def get_comic_info(cls, soup, link):
2624
        """Get information about a particular comics."""
2625
        title = soup.find('h2', class_='post-title').string
2626
        date_str = soup.find("span", class_="post-date").string
2627
        day = string_to_date(date_str, "%B %d, %Y")
2628
        imgs = soup.find('div', id='comic').find_all('img')
2629
        return {
2630
            'title': title,
2631
            'img': [i['src'] for i in imgs],
2632
            'month': day.month,
2633
            'year': day.year,
2634
            'day': day.day,
2635
        }
2636
2637
2638
class BuniComic(GenericNavigableComic):
2639
    """Class to retrieve Buni Comics."""
2640
    name = 'buni'
2641
    long_name = 'BuniComics'
2642
    url = 'http://www.bunicomic.com'
2643
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2644
    get_navi_link = get_link_rel_next
2645
2646
    @classmethod
2647
    def get_comic_info(cls, soup, link):
2648
        """Get information about a particular comics."""
2649
        imgs = soup.find('div', id='comic').find_all('img')
2650
        assert all(i['alt'] == i['title'] for i in imgs)
2651
        assert len(imgs) == 1
2652
        return {
2653
            'img': [i['src'] for i in imgs],
2654
            'title': imgs[0]['title'],
2655
        }
2656 View Code Duplication
2657
2658
class GenericCommitStrip(GenericNavigableComic):
2659
    """Generic class to retrieve Commit Strips in different languages."""
2660
    get_navi_link = get_a_rel_next
2661
    get_first_comic_link = simulate_first_link
2662
    first_url = NotImplemented
2663
2664
    @classmethod
2665
    def get_comic_info(cls, soup, link):
2666
        """Get information about a particular comics."""
2667
        desc = soup.find('meta', property='og:description')['content']
2668
        title = soup.find('meta', property='og:title')['content']
2669
        imgs = soup.find('div', class_='entry-content').find_all('img')
2670
        title2 = ' '.join(i.get('title', '') for i in imgs)
2671
        return {
2672
            'title': title,
2673
            'title2': title2,
2674
            'description': desc,
2675
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2676
        }
2677
2678
2679
class CommitStripFr(GenericCommitStrip):
2680
    """Class to retrieve Commit Strips in French."""
2681
    name = 'commit_fr'
2682
    long_name = 'Commit Strip (Fr)'
2683
    url = 'http://www.commitstrip.com/fr'
2684
    _categories = ('FRANCAIS', )
2685
    first_url = 'http://www.commitstrip.com/fr/2012/02/22/interview/'
2686
2687
2688
class CommitStripEn(GenericCommitStrip):
2689
    """Class to retrieve Commit Strips in English."""
2690
    name = 'commit_en'
2691
    long_name = 'Commit Strip (En)'
2692
    url = 'http://www.commitstrip.com/en'
2693
    first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'
2694
2695
2696
class GenericBoumerie(GenericNavigableComic):
2697
    """Generic class to retrieve Boumeries comics in different languages."""
2698
    get_first_comic_link = get_a_navi_navifirst
2699
    get_navi_link = get_link_rel_next
2700
    date_format = NotImplemented
2701
    lang = NotImplemented
2702
2703
    @classmethod
2704
    def get_comic_info(cls, soup, link):
2705
        """Get information about a particular comics."""
2706
        title = soup.find('h2', class_='post-title').string
2707
        short_url = soup.find('link', rel='shortlink')['href']
2708
        author = soup.find("span", class_="post-author").find("a").string
2709
        date_str = soup.find('span', class_='post-date').string
2710
        day = string_to_date(date_str, cls.date_format, cls.lang)
2711
        imgs = soup.find('div', id='comic').find_all('img')
2712
        assert all(i['alt'] == i['title'] for i in imgs)
2713
        return {
2714
            'short_url': short_url,
2715
            'img': [i['src'] for i in imgs],
2716
            'title': title,
2717
            'author': author,
2718
            'month': day.month,
2719
            'year': day.year,
2720
            'day': day.day,
2721
        }
2722
2723
2724
class BoumerieEn(GenericBoumerie):
2725
    """Class to retrieve Boumeries comics in English."""
2726
    name = 'boumeries_en'
2727
    long_name = 'Boumeries (En)'
2728
    url = 'http://comics.boumerie.com'
2729
    date_format = "%B %d, %Y"
2730
    lang = 'en_GB.UTF-8'
2731
2732
2733
class BoumerieFr(GenericBoumerie):
2734
    """Class to retrieve Boumeries comics in French."""
2735
    name = 'boumeries_fr'
2736
    long_name = 'Boumeries (Fr)'
2737 View Code Duplication
    url = 'http://bd.boumerie.com'
2738
    _categories = ('FRANCAIS', )
2739
    date_format = "%A, %d %B %Y"
2740
    lang = "fr_FR.utf8"
2741
2742
2743
class UnearthedComics(GenericNavigableComic):
2744
    """Class to retrieve Unearthed comics."""
2745
    # Also on http://tapastic.com/series/UnearthedComics
2746
    # Also on http://unearthedcomics.tumblr.com
2747
    name = 'unearthed'
2748
    long_name = 'Unearthed Comics'
2749
    url = 'http://unearthedcomics.com'
2750
    _categories = ('UNEARTHED', )
2751
    get_navi_link = get_link_rel_next
2752
    get_first_comic_link = simulate_first_link
2753
    first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'
2754
2755
    @classmethod
2756
    def get_comic_info(cls, soup, link):
2757
        """Get information about a particular comics."""
2758
        short_url = soup.find('link', rel='shortlink')['href']
2759
        title_elt = soup.find('h1') or soup.find('h2')
2760
        title = title_elt.string if title_elt else ""
2761
        desc = soup.find('meta', property='og:description')
2762
        date_str = soup.find('time', class_='published updated hidden')['datetime']
2763
        day = string_to_date(date_str, "%Y-%m-%d")
2764
        post = soup.find('div', class_="entry content entry-content type-portfolio")
2765
        imgs = post.find_all('img')
2766
        return {
2767
            'title': title,
2768
            'description': desc,
2769
            'url2': short_url,
2770
            'img': [i['src'] for i in imgs],
2771
            'month': day.month,
2772
            'year': day.year,
2773
            'day': day.day,
2774
        }
2775
2776
2777
class Optipess(GenericNavigableComic):
2778
    """Class to retrieve Optipess comics."""
2779
    name = 'optipess'
2780
    long_name = 'Optipess'
2781
    url = 'http://www.optipess.com'
2782
    get_first_comic_link = get_a_navi_navifirst
2783
    get_navi_link = get_link_rel_next
2784
2785
    @classmethod
2786
    def get_comic_info(cls, soup, link):
2787
        """Get information about a particular comics."""
2788
        title = soup.find('h2', class_='post-title').string
2789
        author = soup.find("span", class_="post-author").find("a").string
2790
        comic = soup.find('div', id='comic')
2791
        imgs = comic.find_all('img') if comic else []
2792
        alt = imgs[0]['title'] if imgs else ""
2793
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2794
        date_str = soup.find('span', class_='post-date').string
2795
        day = string_to_date(date_str, "%B %d, %Y")
2796
        return {
2797
            'title': title,
2798
            'alt': alt,
2799
            'author': author,
2800
            'img': [i['src'] for i in imgs],
2801
            'month': day.month,
2802
            'year': day.year,
2803
            'day': day.day,
2804
        }
2805
2806
2807
class PainTrainComic(GenericNavigableComic):
2808
    """Class to retrieve Pain Train Comics."""
2809
    name = 'paintrain'
2810
    long_name = 'Pain Train Comics'
2811
    url = 'http://paintraincomic.com'
2812
    get_first_comic_link = get_a_navi_navifirst
2813
    get_navi_link = get_link_rel_next
2814
2815
    @classmethod
2816
    def get_comic_info(cls, soup, link):
2817
        """Get information about a particular comics."""
2818
        title = soup.find('h2', class_='post-title').string
2819
        short_url = soup.find('link', rel='shortlink')['href']
2820
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2821
        num = int(short_url_re.match(short_url).groups()[0])
2822
        imgs = soup.find('div', id='comic').find_all('img')
2823
        alt = imgs[0]['title']
2824
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2825
        date_str = soup.find('span', class_='post-date').string
2826
        day = string_to_date(date_str, "%d/%m/%Y")
2827
        return {
2828
            'short_url': short_url,
2829
            'num': num,
2830
            'img': [i['src'] for i in imgs],
2831
            'month': day.month,
2832
            'year': day.year,
2833
            'day': day.day,
2834
            'alt': alt,
2835
            'title': title,
2836
        }
2837
2838
2839
class MoonBeard(GenericNavigableComic):
2840
    """Class to retrieve MoonBeard comics."""
2841
    # Also on http://blog.squiresjam.es/moonbeard
2842
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
2843
    name = 'moonbeard'
2844
    long_name = 'Moon Beard'
2845
    url = 'http://moonbeard.com'
2846
    get_first_comic_link = get_a_navi_navifirst
2847
    get_navi_link = get_a_navi_navinext
2848
2849
    @classmethod
2850
    def get_comic_info(cls, soup, link):
2851
        """Get information about a particular comics."""
2852
        title = soup.find('h2', class_='post-title').string
2853
        short_url = soup.find('link', rel='shortlink')['href']
2854
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2855
        num = int(short_url_re.match(short_url).groups()[0])
2856
        imgs = soup.find('div', id='comic').find_all('img')
2857
        alt = imgs[0]['title']
2858
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2859
        date_str = soup.find('span', class_='post-date').string
2860
        day = string_to_date(date_str, "%B %d, %Y")
2861
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2862
        author = soup.find('span', class_='post-author').string
2863
        return {
2864
            'short_url': short_url,
2865
            'num': num,
2866
            'img': [i['src'] for i in imgs],
2867
            'month': day.month,
2868
            'year': day.year,
2869 View Code Duplication
            'day': day.day,
2870
            'title': title,
2871
            'tags': tags,
2872
            'alt': alt,
2873
            'author': author,
2874
        }
2875
2876
2877
class AHamADay(GenericNavigableComic):
2878
    """Class to retrieve class A Ham A Day comics."""
2879
    name = 'ham'
2880
    long_name = 'A Ham A Day'
2881
    url = 'http://www.ahammaday.com'
2882
    get_url_from_link = join_cls_url_to_href
2883
    get_first_comic_link = simulate_first_link
2884
    first_url = 'http://www.ahammaday.com/today/3/6/french'
2885
2886
    @classmethod
2887
    def get_navi_link(cls, last_soup, next_):
2888
        """Get link to next or previous comic."""
2889
        # prev is next / next is prev
2890
        return last_soup.find('li', class_='previous' if next_ else 'next').find('a')
2891
2892
    @classmethod
2893
    def get_comic_info(cls, soup, link):
2894
        """Get information about a particular comics."""
2895
        date_str = soup.find('time', class_='published')['datetime']
2896
        day = string_to_date(date_str, "%Y-%m-%d")
2897
        author = soup.find('span', class_='blog-author').find('a').string
2898
        title = soup.find('meta', property='og:title')['content']
2899
        imgs = soup.find_all('meta', itemprop='image')
2900
        return {
2901
            'img': [i['content'] for i in imgs],
2902
            'title': title,
2903
            'author': author,
2904
            'day': day.day,
2905
            'month': day.month,
2906
            'year': day.year,
2907
        }
2908
2909
2910
class LittleLifeLines(GenericNavigableComic):
2911
    """Class to retrieve Little Life Lines comics."""
2912
    # Also on https://little-life-lines.tumblr.com
2913
    name = 'life'
2914
    long_name = 'Little Life Lines'
2915
    url = 'http://www.littlelifelines.com'
2916
    get_url_from_link = join_cls_url_to_href
2917
    get_first_comic_link = simulate_first_link
2918
    first_url = 'http://www.littlelifelines.com/comics/well-done'
2919
2920
    @classmethod
2921
    def get_navi_link(cls, last_soup, next_):
2922
        """Get link to next or previous comic."""
2923
        # prev is next / next is prev
2924
        li = last_soup.find('li', class_='prev' if next_ else 'next')
2925
        return li.find('a') if li else None
2926
2927
    @classmethod
2928
    def get_comic_info(cls, soup, link):
2929
        """Get information about a particular comics."""
2930
        title = soup.find('meta', property='og:title')['content']
2931
        desc = soup.find('meta', property='og:description')['content']
2932
        date_str = soup.find('time', class_='published')['datetime']
2933
        day = string_to_date(date_str, "%Y-%m-%d")
2934
        author = soup.find('a', rel='author').string
2935
        div_content = soup.find('div', class_="body entry-content")
2936
        imgs = div_content.find_all('img')
2937
        imgs = [i for i in imgs if i.get('src') is not None]
2938
        alt = imgs[0]['alt']
2939
        return {
2940
            'title': title,
2941
            'alt': alt,
2942
            'description': desc,
2943
            'author': author,
2944
            'day': day.day,
2945
            'month': day.month,
2946
            'year': day.year,
2947
            'img': [i['src'] for i in imgs],
2948
        }
2949
2950
2951
class GenericWordPressInkblot(GenericNavigableComic):
2952 View Code Duplication
    """Generic class to retrieve comics using WordPress with Inkblot."""
2953
    get_navi_link = get_link_rel_next
2954
2955
    @classmethod
2956
    def get_first_comic_link(cls):
2957
        """Get link to first comics."""
2958
        return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')
2959
2960
    @classmethod
2961
    def get_comic_info(cls, soup, link):
2962
        """Get information about a particular comics."""
2963
        title = soup.find('meta', property='og:title')['content']
2964
        imgs = soup.find('div', class_='webcomic-image').find_all('img')
2965
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2966
        day = string_to_date(date_str, "%Y-%m-%d")
2967
        return {
2968
            'title': title,
2969
            'day': day.day,
2970
            'month': day.month,
2971
            'year': day.year,
2972
            'img': [i['src'] for i in imgs],
2973
        }
2974
2975
2976
class EverythingsStupid(GenericWordPressInkblot):
2977
    """Class to retrieve Everything's stupid Comics."""
2978
    # Also on http://tapastic.com/series/EverythingsStupid
2979
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
2980
    # Also on http://everythingsstupidcomics.tumblr.com
2981
    name = 'stupid'
2982
    long_name = "Everything's Stupid"
2983
    url = 'http://everythingsstupid.net'
2984
2985
2986
class TheIsmComics(GenericWordPressInkblot):
2987
    """Class to retrieve The Ism Comics."""
2988
    # Also on https://tapastic.com/series/TheIsm (?)
2989
    name = 'theism'
2990
    long_name = "The Ism"
2991
    url = 'http://www.theism-comics.com'
2992
2993
2994
class WoodenPlankStudios(GenericWordPressInkblot):
2995
    """Class to retrieve Wooden Plank Studios comics."""
2996
    name = 'woodenplank'
2997
    long_name = 'Wooden Plank Studios'
2998
    url = 'http://woodenplankstudios.com'
2999
3000
3001
class ElectricBunnyComic(GenericNavigableComic):
3002
    """Class to retrieve Electric Bunny Comics."""
3003
    # Also on http://electricbunnycomics.tumblr.com
3004
    name = 'bunny'
3005
    long_name = 'Electric Bunny Comic'
3006
    url = 'http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell'
3007
    get_url_from_link = join_cls_url_to_href
3008
3009
    @classmethod
3010
    def get_first_comic_link(cls):
3011
        """Get link to first comics."""
3012
        return get_soup_at_url(cls.url).find('img', alt='First').parent
3013
3014
    @classmethod
3015
    def get_navi_link(cls, last_soup, next_):
3016
        """Get link to next or previous comic."""
3017
        img = last_soup.find('img', alt='Next' if next_ else 'Back')
3018
        return img.parent if img else None
3019
3020
    @classmethod
3021
    def get_comic_info(cls, soup, link):
3022
        """Get information about a particular comics."""
3023
        title = soup.find('meta', property='og:title')['content']
3024
        imgs = soup.find_all('meta', property='og:image')
3025
        return {
3026
            'title': title,
3027
            'img': [i['content'] for i in imgs],
3028
        }
3029
3030
3031
class SheldonComics(GenericNavigableComic):
3032
    """Class to retrieve Sheldon comics."""
3033
    # Also on http://www.gocomics.com/sheldon
3034
    name = 'sheldon'
3035
    long_name = 'Sheldon Comics'
3036
    url = 'http://www.sheldoncomics.com'
3037
3038
    @classmethod
3039
    def get_first_comic_link(cls):
3040
        """Get link to first comics."""
3041
        return get_soup_at_url(cls.url).find("a", id="nav-first")
3042
3043
    @classmethod
3044
    def get_navi_link(cls, last_soup, next_):
3045
        """Get link to next or previous comic."""
3046
        for link in last_soup.find_all("a", id="nav-next" if next_ else "nav-prev"):
3047
            if link['href'] != 'http://www.sheldoncomics.com':
3048
                return link
3049
        return None
3050
3051
    @classmethod
3052
    def get_comic_info(cls, soup, link):
3053
        """Get information about a particular comics."""
3054
        imgs = soup.find("div", id="comic-foot").find_all("img")
3055
        assert all(i['alt'] == i['title'] for i in imgs)
3056
        assert len(imgs) == 1
3057
        title = imgs[0]['title']
3058
        return {
3059
            'title': title,
3060
            'img': [i['src'] for i in imgs],
3061
        }
3062
3063
3064
class Ubertool(GenericNavigableComic):
3065
    """Class to retrieve Ubertool comics."""
3066
    # Also on http://ubertool.tumblr.com
3067
    # Also on https://tapastic.com/series/ubertool
3068
    name = 'ubertool'
3069
    long_name = 'Ubertool'
3070
    url = 'http://ubertoolcomic.com'
3071
    _categories = ('UBERTOOL', )
3072
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
3073
    get_navi_link = get_a_comicnavbase_comicnavnext
3074
3075
    @classmethod
3076
    def get_comic_info(cls, soup, link):
3077
        """Get information about a particular comics."""
3078
        title = soup.find('h2', class_='post-title').string
3079
        date_str = soup.find('span', class_='post-date').string
3080
        day = string_to_date(date_str, "%B %d, %Y")
3081
        imgs = soup.find('div', id='comic').find_all('img')
3082
        return {
3083
            'img': [i['src'] for i in imgs],
3084
            'title': title,
3085
            'month': day.month,
3086
            'year': day.year,
3087
            'day': day.day,
3088
        }
3089
3090
3091 View Code Duplication
class EarthExplodes(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3092
    """Class to retrieve The Earth Explodes comics."""
3093
    name = 'earthexplodes'
3094
    long_name = 'The Earth Explodes'
3095
    url = 'http://www.earthexplodes.com'
3096
    get_url_from_link = join_cls_url_to_href
3097
    get_first_comic_link = simulate_first_link
3098
    first_url = 'http://www.earthexplodes.com/comics/000/'
3099
3100
    @classmethod
3101
    def get_navi_link(cls, last_soup, next_):
3102
        """Get link to next or previous comic."""
3103
        return last_soup.find('a', id='next' if next_ else 'prev')
3104
3105
    @classmethod
3106
    def get_comic_info(cls, soup, link):
3107
        """Get information about a particular comics."""
3108
        title = soup.find('title').string
3109
        imgs = soup.find('div', id='image').find_all('img')
3110
        alt = imgs[0].get('title', '')
3111
        return {
3112
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3113
            'title': title,
3114
            'alt': alt,
3115
        }
3116
3117
3118
class CubeDrone(GenericNavigableComic):
3119
    """Class to retrieve Cube Drone comics."""
3120
    name = 'cubedrone'
3121
    long_name = 'Cube Drone'
3122
    url = 'http://cube-drone.com/comics'
3123
    get_url_from_link = join_cls_url_to_href
3124
3125
    @classmethod
3126
    def get_first_comic_link(cls):
3127
        """Get link to first comics."""
3128
        return get_soup_at_url(cls.url).find('span', class_='glyphicon glyphicon-backward').parent
3129
3130
    @classmethod
3131
    def get_navi_link(cls, last_soup, next_):
3132
        """Get link to next or previous comic."""
3133
        class_ = 'glyphicon glyphicon-chevron-' + ('right' if next_ else 'left')
3134
        return last_soup.find('span', class_=class_).parent
3135
3136
    @classmethod
3137
    def get_comic_info(cls, soup, link):
3138
        """Get information about a particular comics."""
3139
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
3140
        url2 = soup.find('meta', attrs={'name': 'twitter:url'})['content']
3141
        # date_str = soup.find('h2', class_='comic_title').find('small').string
3142
        # day = string_to_date(date_str, "%B %d, %Y, %I:%M %p")
3143
        imgs = soup.find_all('img', class_='comic img-responsive')
3144
        title2 = imgs[0]['title']
3145
        alt = imgs[0]['alt']
3146
        return {
3147
            'url2': url2,
3148
            'title': title,
3149
            'title2': title2,
3150
            'alt': alt,
3151
            'img': [i['src'] for i in imgs],
3152
        }
3153
3154
3155
class MakeItStoopid(GenericNavigableComic):
3156
    """Class to retrieve Make It Stoopid Comics."""
3157
    name = 'stoopid'
3158
    long_name = 'Make it stoopid'
3159
    url = 'http://makeitstoopid.com/comic.php'
3160
3161
    @classmethod
3162
    def get_nav(cls, soup):
3163
        """Get the navigation elements from soup object."""
3164
        cnav = soup.find_all(class_='cnav')
3165
        nav1, nav2 = cnav[:5], cnav[5:]
3166
        assert nav1 == nav2
3167
        # begin, prev, archive, next_, end = nav1
3168
        return [None if i.get('href') is None else i for i in nav1]
3169
3170
    @classmethod
3171
    def get_first_comic_link(cls):
3172
        """Get link to first comics."""
3173
        return cls.get_nav(get_soup_at_url(cls.url))[0]
3174
3175
    @classmethod
3176
    def get_navi_link(cls, last_soup, next_):
3177
        """Get link to next or previous comic."""
3178
        return cls.get_nav(last_soup)[3 if next_ else 1]
3179
3180
    @classmethod
3181
    def get_comic_info(cls, soup, link):
3182
        """Get information about a particular comics."""
3183
        title = link['title']
3184
        imgs = soup.find_all('img', id='comicimg')
3185
        return {
3186
            'title': title,
3187
            'img': [i['src'] for i in imgs],
3188
        }
3189
3190
3191 View Code Duplication
class MarketoonistComics(GenericNavigableComic):
3192
    """Class to retrieve Marketoonist Comics."""
3193
    name = 'marketoonist'
3194
    long_name = 'Marketoonist'
3195
    url = 'https://marketoonist.com/cartoons'
3196
    get_first_comic_link = simulate_first_link
3197
    get_navi_link = get_link_rel_next
3198
    first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'
3199
3200
    @classmethod
3201
    def get_comic_info(cls, soup, link):
3202
        imgs = soup.find_all('meta', property='og:image')
3203
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3204
        day = string_to_date(date_str, "%Y-%m-%d")
3205
        title = soup.find('meta', property='og:title')['content']
3206
        return {
3207
            'img': [i['content'] for i in imgs],
3208
            'day': day.day,
3209
            'month': day.month,
3210
            'year': day.year,
3211
            'title': title,
3212
        }
3213
3214
3215 View Code Duplication
class ConsoliaComics(GenericNavigableComic):
3216
    """Class to retrieve Consolia comics."""
3217
    name = 'consolia'
3218
    long_name = 'consolia'
3219
    url = 'https://consolia-comic.com'
3220
    get_url_from_link = join_cls_url_to_href
3221
3222
    @classmethod
3223
    def get_first_comic_link(cls):
3224
        """Get link to first comics."""
3225
        return get_soup_at_url(cls.url).find('span', class_='first').find('a')
3226
3227
    @classmethod
3228
    def get_navi_link(cls, last_soup, next_):
3229
        """Get link to next or previous comic."""
3230
        return last_soup.find('span', class_='next' if next_ else 'prev').find('a')
3231
3232
    @classmethod
3233
    def get_comic_info(cls, soup, link):
3234
        """Get information about a particular comics."""
3235
        title = soup.find('meta', property='og:title')['content']
3236
        date_str = soup.find('time')["datetime"]
3237
        day = string_to_date(date_str, "%Y-%m-%d")
3238
        imgs = soup.find('div', id='comic').find_all('img')
3239
        alt = imgs[0]['title']
3240
        # article = soup.find('div', id='blag')
3241
        # text = article.encode_contents()
3242
        return {
3243
            'title': title,
3244
            'alt': alt,
3245
            'img': [i['src'] for i in imgs],
3246
            # 'text': text,
3247
            'day': day.day,
3248
            'month': day.month,
3249
            'year': day.year,
3250
        }
3251
3252
3253 View Code Duplication
class TuMourrasMoinsBete(GenericNavigableComic):
3254
    """Class to retrieve Tu Mourras Moins Bete comics."""
3255
    name = 'mourrasmoinsbete'
3256
    long_name = 'Tu Mourras Moins Bete'
3257
    url = 'http://tumourrasmoinsbete.blogspot.fr'
3258
    _categories = ('FRANCAIS', )
3259
    get_first_comic_link = simulate_first_link
3260
    first_url = 'http://tumourrasmoinsbete.blogspot.fr/2008/06/essai.html'
3261
3262
    @classmethod
3263
    def get_navi_link(cls, last_soup, next_):
3264
        """Get link to next or previous comic."""
3265
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3266
3267
    @classmethod
3268
    def get_comic_info(cls, soup, link):
3269
        """Get information about a particular comics."""
3270
        title = soup.find('title').string
3271
        imgs = soup.find('div', itemprop='description articleBody').find_all('img')
3272
        author = soup.find('span', itemprop='author').string
3273
        return {
3274
            'img': [i['src'] for i in imgs],
3275
            'author': author,
3276
            'title': title,
3277
        }
3278
3279
3280
class GeekAndPoke(GenericNavigableComic):
3281
    """Class to retrieve Geek And Poke comics."""
3282
    name = 'geek'
3283
    long_name = 'Geek And Poke'
3284
    url = 'http://geek-and-poke.com'
3285
    get_url_from_link = join_cls_url_to_href
3286
    get_first_comic_link = simulate_first_link
3287
    first_url = 'http://geek-and-poke.com/geekandpoke/2006/8/27/a-new-place-for-a-not-so-old-blog.html'
3288
3289
    @classmethod
3290
    def get_navi_link(cls, last_soup, next_):
3291
        """Get link to next or previous comic."""
3292
        return last_soup.find('a', class_='prev-item' if next_ else 'next-item')
3293
3294
    @classmethod
3295
    def get_comic_info(cls, soup, link):
3296
        """Get information about a particular comics."""
3297
        title = soup.find('meta', property='og:title')['content']
3298
        desc = soup.find('meta', property='og:description')['content']
3299
        date_str = soup.find('time', class_='published')['datetime']
3300
        day = string_to_date(date_str, "%Y-%m-%d")
3301
        author = soup.find('a', rel='author').string
3302
        div_content = (soup.find('div', class_="body entry-content") or
3303
                       soup.find('div', class_="special-content"))
3304
        imgs = div_content.find_all('img')
3305
        imgs = [i for i in imgs if i.get('src') is not None]
3306
        assert all('title' not in i or i['alt'] == i['title'] for i in imgs)
3307
        alt = imgs[0].get('alt', "") if imgs else []
3308
        return {
3309
            'title': title,
3310
            'alt': alt,
3311
            'description': desc,
3312
            'author': author,
3313
            'day': day.day,
3314
            'month': day.month,
3315
            'year': day.year,
3316
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3317
        }
3318
3319
3320
class GloryOwlComix(GenericNavigableComic):
3321
    """Class to retrieve Glory Owl comics."""
3322
    name = 'gloryowl'
3323
    long_name = 'Glory Owl'
3324
    url = 'http://gloryowlcomix.blogspot.fr'
3325
    _categories = ('NSFW', 'FRANCAIS')
3326
    get_first_comic_link = simulate_first_link
3327
    first_url = 'http://gloryowlcomix.blogspot.fr/2013/02/1_7.html'
3328
3329
    @classmethod
3330
    def get_navi_link(cls, last_soup, next_):
3331
        """Get link to next or previous comic."""
3332
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3333
3334
    @classmethod
3335
    def get_comic_info(cls, soup, link):
3336
        """Get information about a particular comics."""
3337
        title = soup.find('title').string
3338
        imgs = soup.find_all('link', rel='image_src')
3339
        author = soup.find('a', rel='author').string
3340
        return {
3341
            'img': [i['href'] for i in imgs],
3342
            'author': author,
3343
            'title': title,
3344
        }
3345
3346
3347
class GenericTumblrV1(GenericComic):
3348
    """Generic class to retrieve comics from Tumblr using the V1 API."""
3349
    _categories = ('TUMBLR', )
3350
3351
    @classmethod
3352
    def get_next_comic(cls, last_comic):
3353
        """Generic implementation of get_next_comic for Tumblr comics."""
3354
        for p in cls.get_posts(last_comic):
3355
            comic = cls.get_comic_info(p)
3356
            if comic is not None:
3357
                yield comic
3358
3359
    @classmethod
3360
    def get_url_from_post(cls, post):
3361
        return post['url']
3362
3363
    @classmethod
3364
    def get_api_url(cls):
3365
        return urljoin_wrapper(cls.url, '/api/read/')
3366
3367
    @classmethod
3368
    def get_comic_info(cls, post):
3369
        """Get information about a particular comics."""
3370
        type_ = post['type']
3371
        if type_ != 'photo':
3372
            return None
3373
        tumblr_id = int(post['id'])
3374
        api_url = cls.get_api_url() + '?id=%d' % (tumblr_id)
3375
        day = datetime.datetime.fromtimestamp(int(post['unix-timestamp'])).date()
3376
        caption = post.find('photo-caption')
3377
        title = caption.string if caption else ""
3378
        tags = ' '.join(t.string for t in post.find_all('tag'))
3379
        # Photos may appear in 'photo' tags and/or straight in the post
3380
        photo_tags = post.find_all('photo')
3381
        if not photo_tags:
3382
            photo_tags = [post]
3383
        # Images are in multiple resolutions - taking the first one
3384
        imgs = [photo.find('photo-url') for photo in photo_tags]
3385
        return {
3386
            'url': cls.get_url_from_post(post),
3387
            'url2': post['url-with-slug'],
3388
            'day': day.day,
3389
            'month': day.month,
3390
            'year': day.year,
3391
            'title': title,
3392
            'tags': tags,
3393
            'img': [i.string for i in imgs],
3394
            'tumblr-id': tumblr_id,
3395
            'api_url': api_url,
3396
        }
3397
3398
    @classmethod
3399
    def get_posts(cls, last_comic, nb_post_per_call=10):
3400
        """Get posts using API. nb_post_per_call is max 50.
3401
3402
        Posts are retrieved from newer to older as per the tumblr v1 api
3403
        but are returned in chronological order."""
3404
        waiting_for_url = last_comic['url'] if last_comic else None
3405
        posts_acc = []
3406
        if last_comic is not None:
3407
            # Sometimes, tumblr posts are deleted. When previous post is deleted, we
3408
            # might end up spending a lot of time looking for something that
3409
            # doesn't exist. Failing early and clearly might be a better option.
3410
            last_api_url = last_comic['api_url']
3411
            try:
3412
                get_soup_at_url(last_api_url)
3413
            except urllib.error.HTTPError:
3414
                try:
3415
                    get_soup_at_url(cls.url)
3416
                except urllib.error.HTTPError:
3417
                    print("Did not find previous post nor main url %s" % cls.url)
3418
                else:
3419
                    print("Did not find previous post %s : it might have been deleted" % last_api_url)
3420
                return reversed(posts_acc)
3421
        api_url = cls.get_api_url()
3422
        posts = get_soup_at_url(api_url).find('posts')
3423
        start, total = int(posts['start']), int(posts['total'])
3424
        assert start == 0
3425
        for starting_num in range(0, total, nb_post_per_call):
3426
            api_url2 = api_url + '?start=%d&num=%d' % (starting_num, nb_post_per_call)
3427
            posts2 = get_soup_at_url(api_url2).find('posts')
3428
            start2, total2 = int(posts2['start']), int(posts2['total'])
3429
            assert starting_num == start2, "%d != %d" % (starting_num, start2)
3430
            # This may happen and should be handled in the future
3431
            assert total == total2, "%d != %d" % (total, total2)
3432
            for p in posts2.find_all('post'):
3433
                if waiting_for_url and waiting_for_url == cls.get_url_from_post(p):
3434
                    return reversed(posts_acc)
3435
                posts_acc.append(p)
3436
        if waiting_for_url is None:
3437
            return reversed(posts_acc)
3438
        print("Did not find %s : there might be a problem" % waiting_for_url)
3439
        return []
3440
3441
3442
class SaturdayMorningBreakfastCerealTumblr(GenericTumblrV1):
3443
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
3444
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
3445
    # Also on http://www.smbc-comics.com
3446
    name = 'smbc-tumblr'
3447
    long_name = 'Saturday Morning Breakfast Cereal (from Tumblr)'
3448
    url = 'http://smbc-comics.tumblr.com'
3449
    _categories = ('SMBC', )
3450
3451
3452
class IrwinCardozo(GenericTumblrV1):
3453
    """Class to retrieve Irwin Cardozo Comics."""
3454
    name = 'irwinc'
3455
    long_name = 'Irwin Cardozo'
3456
    url = 'http://irwincardozocomics.tumblr.com'
3457
3458
3459
class AccordingToDevin(GenericTumblrV1):
3460
    """Class to retrieve According To Devin comics."""
3461
    name = 'devin'
3462
    long_name = 'According To Devin'
3463
    url = 'http://accordingtodevin.tumblr.com'
3464
3465
3466
class ItsTheTieTumblr(GenericTumblrV1):
3467
    """Class to retrieve It's the tie comics."""
3468
    # Also on http://itsthetie.com
3469
    # Also on https://tapastic.com/series/itsthetie
3470
    name = 'tie-tumblr'
3471
    long_name = "It's the tie (from Tumblr)"
3472
    url = "http://itsthetie.tumblr.com"
3473
    _categories = ('TIE', )
3474
3475
3476
class OctopunsTumblr(GenericTumblrV1):
3477
    """Class to retrieve Octopuns comics."""
3478
    # Also on http://www.octopuns.net
3479
    name = 'octopuns-tumblr'
3480
    long_name = 'Octopuns (from Tumblr)'
3481
    url = 'http://octopuns.tumblr.com'
3482
3483
3484
class PicturesInBoxesTumblr(GenericTumblrV1):
3485
    """Class to retrieve Pictures In Boxes comics."""
3486
    # Also on http://www.picturesinboxes.com
3487
    name = 'picturesinboxes-tumblr'
3488
    long_name = 'Pictures in Boxes (from Tumblr)'
3489
    url = 'http://picturesinboxescomic.tumblr.com'
3490
3491
3492
class TubeyToonsTumblr(GenericTumblrV1):
3493
    """Class to retrieve TubeyToons comics."""
3494
    # Also on http://tapastic.com/series/Tubey-Toons
3495
    # Also on http://tubeytoons.com
3496
    name = 'tubeytoons-tumblr'
3497
    long_name = 'Tubey Toons (from Tumblr)'
3498
    url = 'http://tubeytoons.tumblr.com'
3499
    _categories = ('TUNEYTOONS', )
3500
3501
3502
class UnearthedComicsTumblr(GenericTumblrV1):
3503
    """Class to retrieve Unearthed comics."""
3504
    # Also on http://tapastic.com/series/UnearthedComics
3505
    # Also on http://unearthedcomics.com
3506
    name = 'unearthed-tumblr'
3507
    long_name = 'Unearthed Comics (from Tumblr)'
3508
    url = 'http://unearthedcomics.tumblr.com'
3509
    _categories = ('UNEARTHED', )
3510
3511
3512
class PieComic(GenericTumblrV1):
3513
    """Class to retrieve Pie Comic comics."""
3514
    name = 'pie'
3515
    long_name = 'Pie Comic'
3516
    url = "http://piecomic.tumblr.com"
3517
3518
3519
class MrEthanDiamond(GenericTumblrV1):
3520
    """Class to retrieve Mr Ethan Diamond comics."""
3521
    name = 'diamond'
3522
    long_name = 'Mr Ethan Diamond'
3523
    url = 'http://mrethandiamond.tumblr.com'
3524
3525
3526
class Flocci(GenericTumblrV1):
3527
    """Class to retrieve floccinaucinihilipilification comics."""
3528
    name = 'flocci'
3529
    long_name = 'floccinaucinihilipilification'
3530
    url = "http://floccinaucinihilipilificationa.tumblr.com"
3531
3532
3533
class UpAndOut(GenericTumblrV1):
3534
    """Class to retrieve Up & Out comics."""
3535
    # Also on http://tapastic.com/series/UP-and-OUT
3536
    name = 'upandout'
3537
    long_name = 'Up And Out (from Tumblr)'
3538
    url = 'http://upandoutcomic.tumblr.com'
3539
3540
3541
class Pundemonium(GenericTumblrV1):
3542
    """Class to retrieve Pundemonium comics."""
3543
    name = 'pundemonium'
3544
    long_name = 'Pundemonium'
3545
    url = 'http://monstika.tumblr.com'
3546
3547
3548
class PoorlyDrawnLinesTumblr(GenericTumblrV1):
3549
    """Class to retrieve Poorly Drawn Lines comics."""
3550
    # Also on http://poorlydrawnlines.com
3551
    name = 'poorlydrawn-tumblr'
3552
    long_name = 'Poorly Drawn Lines (from Tumblr)'
3553
    url = 'http://pdlcomics.tumblr.com'
3554
    _categories = ('POORLYDRAWN', )
3555
3556
3557
class PearShapedComics(GenericTumblrV1):
3558
    """Class to retrieve Pear Shaped Comics."""
3559
    name = 'pearshaped'
3560
    long_name = 'Pear-Shaped Comics'
3561
    url = 'http://pearshapedcomics.com'
3562
3563
3564
class PondScumComics(GenericTumblrV1):
3565
    """Class to retrieve Pond Scum Comics."""
3566
    name = 'pond'
3567
    long_name = 'Pond Scum'
3568
    url = 'http://pondscumcomic.tumblr.com'
3569
3570
3571
class MercworksTumblr(GenericTumblrV1):
3572
    """Class to retrieve Mercworks comics."""
3573
    # Also on http://mercworks.net
3574
    name = 'mercworks-tumblr'
3575
    long_name = 'Mercworks (from Tumblr)'
3576
    url = 'http://mercworks.tumblr.com'
3577
3578
3579
class OwlTurdTumblr(GenericTumblrV1):
3580
    """Class to retrieve Owl Turd comics."""
3581
    # Also on http://tapastic.com/series/Owl-Turd-Comix
3582
    name = 'owlturd-tumblr'
3583
    long_name = 'Owl Turd (from Tumblr)'
3584
    url = 'http://owlturd.com'
3585
    _categories = ('OWLTURD', )
3586
3587
3588
class VectorBelly(GenericTumblrV1):
3589
    """Class to retrieve Vector Belly comics."""
3590
    # Also on http://vectorbelly.com
3591
    name = 'vector'
3592
    long_name = 'Vector Belly'
3593
    url = 'http://vectorbelly.tumblr.com'
3594
3595
3596
class GoneIntoRapture(GenericTumblrV1):
3597
    """Class to retrieve Gone Into Rapture comics."""
3598
    # Also on http://goneintorapture.tumblr.com
3599
    # Also on http://tapastic.com/series/Goneintorapture
3600
    name = 'rapture'
3601
    long_name = 'Gone Into Rapture'
3602
    url = 'http://www.goneintorapture.com'
3603
3604
3605
class TheOatmealTumblr(GenericTumblrV1):
3606
    """Class to retrieve The Oatmeal comics."""
3607
    # Also on http://theoatmeal.com
3608
    name = 'oatmeal-tumblr'
3609
    long_name = 'The Oatmeal (from Tumblr)'
3610
    url = 'http://oatmeal.tumblr.com'
3611
3612
3613
class HeckIfIKnowComicsTumblr(GenericTumblrV1):
3614
    """Class to retrieve Heck If I Know Comics."""
3615
    # Also on http://tapastic.com/series/Regular
3616
    name = 'heck-tumblr'
3617
    long_name = 'Heck if I Know comics (from Tumblr)'
3618
    url = 'http://heckifiknowcomics.com'
3619
3620
3621
class MyJetPack(GenericTumblrV1):
3622
    """Class to retrieve My Jet Pack comics."""
3623
    name = 'jetpack'
3624
    long_name = 'My Jet Pack'
3625
    url = 'http://myjetpack.tumblr.com'
3626
3627
3628
class CheerUpEmoKidTumblr(GenericTumblrV1):
3629
    """Class to retrieve CheerUpEmoKid comics."""
3630
    # Also on http://www.cheerupemokid.com
3631
    # Also on http://tapastic.com/series/CUEK
3632
    name = 'cuek-tumblr'
3633
    long_name = 'Cheer Up Emo Kid (from Tumblr)'
3634
    url = 'http://enzocomics.tumblr.com'
3635
3636
3637
class ForLackOfABetterComic(GenericTumblrV1):
3638
    """Class to retrieve For Lack Of A Better Comics."""
3639
    # Also on http://forlackofabettercomic.com
3640
    name = 'lack'
3641
    long_name = 'For Lack Of A Better Comic'
3642
    url = 'http://forlackofabettercomic.tumblr.com'
3643
3644
3645
class ZenPencilsTumblr(GenericTumblrV1):
3646
    """Class to retrieve ZenPencils comics."""
3647
    # Also on http://zenpencils.com
3648
    # Also on http://www.gocomics.com/zen-pencils
3649
    name = 'zenpencils-tumblr'
3650
    long_name = 'Zen Pencils (from Tumblr)'
3651
    url = 'http://zenpencils.tumblr.com'
3652
    _categories = ('ZENPENCILS', )
3653
3654
3655
class ThreeWordPhraseTumblr(GenericTumblrV1):
3656
    """Class to retrieve Three Word Phrase comics."""
3657
    # Also on http://threewordphrase.com
3658
    name = 'threeword-tumblr'
3659
    long_name = 'Three Word Phrase (from Tumblr)'
3660
    url = 'http://www.threewordphrase.tumblr.com'
3661
3662
3663
class TimeTrabbleTumblr(GenericTumblrV1):
3664
    """Class to retrieve Time Trabble comics."""
3665
    # Also on http://timetrabble.com
3666
    name = 'timetrabble-tumblr'
3667
    long_name = 'Time Trabble (from Tumblr)'
3668
    url = 'http://timetrabble.tumblr.com'
3669
3670
3671
class SafelyEndangeredTumblr(GenericTumblrV1):
3672
    """Class to retrieve Safely Endangered comics."""
3673
    # Also on http://www.safelyendangered.com
3674
    name = 'endangered-tumblr'
3675
    long_name = 'Safely Endangered (from Tumblr)'
3676
    url = 'http://tumblr.safelyendangered.com'
3677
3678
3679
class MouseBearComedyTumblr(GenericTumblrV1):
3680
    """Class to retrieve Mouse Bear Comedy comics."""
3681
    # Also on http://www.mousebearcomedy.com
3682
    name = 'mousebear-tumblr'
3683
    long_name = 'Mouse Bear Comedy (from Tumblr)'
3684
    url = 'http://mousebearcomedy.tumblr.com'
3685
3686
3687
class BouletCorpTumblr(GenericTumblrV1):
3688
    """Class to retrieve BouletCorp comics."""
3689
    # Also on http://www.bouletcorp.com
3690
    name = 'boulet-tumblr'
3691
    long_name = 'Boulet Corp (from Tumblr)'
3692
    url = 'http://bouletcorp.tumblr.com'
3693
    _categories = ('BOULET', )
3694
3695
3696
class TheAwkwardYetiTumblr(GenericTumblrV1):
3697
    """Class to retrieve The Awkward Yeti comics."""
3698
    # Also on http://www.gocomics.com/the-awkward-yeti
3699
    # Also on http://theawkwardyeti.com
3700
    # Also on https://tapastic.com/series/TheAwkwardYeti
3701
    name = 'yeti-tumblr'
3702
    long_name = 'The Awkward Yeti (from Tumblr)'
3703
    url = 'http://larstheyeti.tumblr.com'
3704
    _categories = ('YETI', )
3705
3706
3707
class NellucNhoj(GenericTumblrV1):
3708
    """Class to retrieve NellucNhoj comics."""
3709
    name = 'nhoj'
3710
    long_name = 'Nelluc Nhoj'
3711
    url = 'http://nellucnhoj.com'
3712
3713
3714
class DownTheUpwardSpiralTumblr(GenericTumblrV1):
3715
    """Class to retrieve Down The Upward Spiral comics."""
3716
    # Also on http://www.downtheupwardspiral.com
3717
    name = 'spiral-tumblr'
3718
    long_name = 'Down the Upward Spiral (from Tumblr)'
3719
    url = 'http://downtheupwardspiral.tumblr.com'
3720
3721
3722
class AsPerUsualTumblr(GenericTumblrV1):
3723
    """Class to retrieve As Per Usual comics."""
3724
    # Also on https://tapastic.com/series/AsPerUsual
3725
    name = 'usual-tumblr'
3726
    long_name = 'As Per Usual (from Tumblr)'
3727
    url = 'http://as-per-usual.tumblr.com'
3728
    categories = ('DAMILEE', )
3729
3730
3731
class HotComicsForCoolPeopleTumblr(GenericTumblrV1):
3732
    """Class to retrieve Hot Comics For Cool People."""
3733
    # Also on https://tapastic.com/series/Hot-Comics-For-Cool-People
3734
    # Also on http://hotcomics.biz (links to tumblr)
3735
    # Also on http://hcfcp.com (links to tumblr)
3736
    name = 'hotcomics-tumblr'
3737
    long_name = 'Hot Comics For Cool People (from Tumblr)'
3738
    url = 'http://hotcomicsforcoolpeople.tumblr.com'
3739
    categories = ('DAMILEE', )
3740
3741
3742
class OneOneOneOneComicTumblr(GenericTumblrV1):
3743
    """Class to retrieve 1111 Comics."""
3744
    # Also on http://www.1111comics.me
3745
    # Also on https://tapastic.com/series/1111-Comics
3746
    name = '1111-tumblr'
3747
    long_name = '1111 Comics (from Tumblr)'
3748
    url = 'http://comics1111.tumblr.com'
3749
    _categories = ('ONEONEONEONE', )
3750
3751
3752
class JhallComicsTumblr(GenericTumblrV1):
3753
    """Class to retrieve Jhall Comics."""
3754
    # Also on http://jhallcomics.com
3755
    name = 'jhall-tumblr'
3756
    long_name = 'Jhall Comics (from Tumblr)'
3757
    url = 'http://jhallcomics.tumblr.com'
3758
3759
3760
class BerkeleyMewsTumblr(GenericTumblrV1):
3761
    """Class to retrieve Berkeley Mews comics."""
3762
    # Also on http://www.gocomics.com/berkeley-mews
3763
    # Also on http://www.berkeleymews.com
3764
    name = 'berkeley-tumblr'
3765
    long_name = 'Berkeley Mews (from Tumblr)'
3766
    url = 'http://mews.tumblr.com'
3767
    _categories = ('BERKELEY', )
3768
3769
3770
class JoanCornellaTumblr(GenericTumblrV1):
3771
    """Class to retrieve Joan Cornella comics."""
3772
    # Also on http://joancornella.net
3773
    name = 'cornella-tumblr'
3774
    long_name = 'Joan Cornella (from Tumblr)'
3775
    url = 'http://cornellajoan.tumblr.com'
3776
3777
3778
class RespawnComicTumblr(GenericTumblrV1):
3779
    """Class to retrieve Respawn Comic."""
3780
    # Also on http://respawncomic.com
3781
    name = 'respawn-tumblr'
3782
    long_name = 'Respawn Comic (from Tumblr)'
3783
    url = 'http://respawncomic.tumblr.com'
3784
3785
3786
class ChrisHallbeckTumblr(GenericTumblrV1):
3787
    """Class to retrieve Chris Hallbeck comics."""
3788
    # Also on https://tapastic.com/ChrisHallbeck
3789
    # Also on http://maximumble.com
3790
    # Also on http://minimumble.com
3791
    # Also on http://thebookofbiff.com
3792
    name = 'hallbeck-tumblr'
3793
    long_name = 'Chris Hallback (from Tumblr)'
3794
    url = 'http://chrishallbeck.tumblr.com'
3795
    _categories = ('HALLBACK', )
3796
3797
3798
class ComicNuggets(GenericTumblrV1):
3799
    """Class to retrieve Comic Nuggets."""
3800
    name = 'nuggets'
3801
    long_name = 'Comic Nuggets'
3802
    url = 'http://comicnuggets.com'
3803
3804
3805
class PigeonGazetteTumblr(GenericTumblrV1):
3806
    """Class to retrieve The Pigeon Gazette comics."""
3807
    # Also on https://tapastic.com/series/The-Pigeon-Gazette
3808
    name = 'pigeon-tumblr'
3809
    long_name = 'The Pigeon Gazette (from Tumblr)'
3810
    url = 'http://thepigeongazette.tumblr.com'
3811
3812
3813
class CancerOwl(GenericTumblrV1):
3814
    """Class to retrieve Cancer Owl comics."""
3815
    # Also on http://cancerowl.com
3816
    name = 'cancerowl-tumblr'
3817
    long_name = 'Cancer Owl (from Tumblr)'
3818
    url = 'http://cancerowl.tumblr.com'
3819
3820
3821
class FowlLanguageTumblr(GenericTumblrV1):
3822
    """Class to retrieve Fowl Language comics."""
3823
    # Also on http://www.fowllanguagecomics.com
3824
    # Also on http://tapastic.com/series/Fowl-Language-Comics
3825
    # Also on http://www.gocomics.com/fowl-language
3826
    name = 'fowllanguage-tumblr'
3827
    long_name = 'Fowl Language Comics (from Tumblr)'
3828
    url = 'http://fowllanguagecomics.tumblr.com'
3829
    _categories = ('FOWLLANGUAGE', )
3830
3831
3832
class TheOdd1sOutTumblr(GenericTumblrV1):
3833
    """Class to retrieve The Odd 1s Out comics."""
3834
    # Also on http://theodd1sout.com
3835
    # Also on https://tapastic.com/series/Theodd1sout
3836
    name = 'theodd-tumblr'
3837
    long_name = 'The Odd 1s Out (from Tumblr)'
3838
    url = 'http://theodd1sout.tumblr.com'
3839
3840
3841
class TheUnderfoldTumblr(GenericTumblrV1):
3842
    """Class to retrieve The Underfold comics."""
3843
    # Also on http://theunderfold.com
3844
    name = 'underfold-tumblr'
3845
    long_name = 'The Underfold (from Tumblr)'
3846
    url = 'http://theunderfold.tumblr.com'
3847
3848
3849
class LolNeinTumblr(GenericTumblrV1):
3850
    """Class to retrieve Lol Nein comics."""
3851
    # Also on http://lolnein.com
3852
    name = 'lolnein-tumblr'
3853
    long_name = 'Lol Nein (from Tumblr)'
3854
    url = 'http://lolneincom.tumblr.com'
3855
3856
3857
class FatAwesomeComicsTumblr(GenericTumblrV1):
3858
    """Class to retrieve Fat Awesome Comics."""
3859
    # Also on http://fatawesome.com/comics
3860
    name = 'fatawesome-tumblr'
3861
    long_name = 'Fat Awesome (from Tumblr)'
3862
    url = 'http://fatawesomecomedy.tumblr.com'
3863
3864
3865
class TheWorldIsFlatTumblr(GenericTumblrV1):
3866
    """Class to retrieve The World Is Flat Comics."""
3867
    # Also on https://tapastic.com/series/The-World-is-Flat
3868
    name = 'flatworld-tumblr'
3869
    long_name = 'The World Is Flat (from Tumblr)'
3870
    url = 'http://theworldisflatcomics.tumblr.com'
3871
3872
3873
class DorrisMc(GenericTumblrV1):
3874
    """Class to retrieve Dorris Mc Comics"""
3875
    # Also on http://www.gocomics.com/dorris-mccomics
3876
    name = 'dorrismc'
3877
    long_name = 'Dorris Mc'
3878
    url = 'http://dorrismccomics.com'
3879
3880
3881
class LeleozTumblr(GenericEmptyComic, GenericTumblrV1):
3882
    """Class to retrieve Leleoz comics."""
3883
    # Also on https://tapastic.com/series/Leleoz
3884
    name = 'leleoz-tumblr'
3885
    long_name = 'Leleoz (from Tumblr)'
3886
    url = 'http://leleozcomics.tumblr.com'
3887
3888
3889
class MoonBeardTumblr(GenericTumblrV1):
3890
    """Class to retrieve MoonBeard comics."""
3891
    # Also on http://moonbeard.com
3892
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
3893
    name = 'moonbeard-tumblr'
3894
    long_name = 'Moon Beard (from Tumblr)'
3895
    url = 'http://blog.squiresjam.es/moonbeard'
3896
3897
3898
class AComik(GenericTumblrV1):
3899
    """Class to retrieve A Comik"""
3900
    name = 'comik'
3901
    long_name = 'A Comik'
3902
    url = 'http://acomik.com'
3903
3904
3905
class ClassicRandy(GenericTumblrV1):
3906
    """Class to retrieve Classic Randy comics."""
3907
    name = 'randy'
3908
    long_name = 'Classic Randy'
3909
    url = 'http://classicrandy.tumblr.com'
3910
3911
3912
class DagssonTumblr(GenericTumblrV1):
3913
    """Class to retrieve Dagsson comics."""
3914
    # Also on http://www.dagsson.com
3915
    name = 'dagsson-tumblr'
3916
    long_name = 'Dagsson Hugleikur (from Tumblr)'
3917
    url = 'http://hugleikurdagsson.tumblr.com'
3918
3919
3920
class LinsEditionsTumblr(GenericTumblrV1):
3921
    """Class to retrieve L.I.N.S. Editions comics."""
3922
    # Also on https://linsedition.com
3923
    # Now on http://warandpeas.tumblr.com
3924
    name = 'lins-tumblr'
3925
    long_name = 'L.I.N.S. Editions (from Tumblr)'
3926
    url = 'http://linscomics.tumblr.com'
3927
    _categories = ('LINS', )
3928
3929
3930
class WarAndPeasTumblr(GenericTumblrV1):
3931
    """Class to retrieve War And Peas comics."""
3932
    # Was on http://linscomics.tumblr.com
3933
    name = 'warandpeas-tumblr'
3934
    long_name = 'War And Peas (from Tumblr)'
3935
    url = 'http://warandpeas.tumblr.com'
3936
    _categories = ('WARANDPEAS', )
3937
3938
3939
class OrigamiHotDish(GenericTumblrV1):
3940
    """Class to retrieve Origami Hot Dish comics."""
3941
    name = 'origamihotdish'
3942
    long_name = 'Origami Hot Dish'
3943
    url = 'http://origamihotdish.com'
3944
3945
3946
class HitAndMissComicsTumblr(GenericTumblrV1):
3947
    """Class to retrieve Hit and Miss Comics."""
3948
    name = 'hitandmiss'
3949
    long_name = 'Hit and Miss Comics'
3950
    url = 'http://hitandmisscomics.tumblr.com'
3951
3952
3953
class HMBlanc(GenericTumblrV1):
3954
    """Class to retrieve HM Blanc comics."""
3955
    name = 'hmblanc'
3956
    long_name = 'HM Blanc'
3957
    url = 'http://hmblanc.tumblr.com'
3958
3959
3960
class TalesOfAbsurdityTumblr(GenericTumblrV1):
3961
    """Class to retrieve Tales Of Absurdity comics."""
3962
    # Also on http://talesofabsurdity.com
3963
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
3964
    name = 'absurdity-tumblr'
3965
    long_name = 'Tales of Absurdity (from Tumblr)'
3966
    url = 'http://talesofabsurdity.tumblr.com'
3967
    _categories = ('ABSURDITY', )
3968
3969
3970
class RobbieAndBobby(GenericTumblrV1):
3971
    """Class to retrieve Robbie And Bobby comics."""
3972
    # Also on http://robbieandbobby.com
3973
    name = 'robbie-tumblr'
3974
    long_name = 'Robbie And Bobby (from Tumblr)'
3975
    url = 'http://robbieandbobby.tumblr.com'
3976
3977
3978
class ElectricBunnyComicTumblr(GenericTumblrV1):
3979
    """Class to retrieve Electric Bunny Comics."""
3980
    # Also on http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell
3981
    name = 'bunny-tumblr'
3982
    long_name = 'Electric Bunny Comic (from Tumblr)'
3983
    url = 'http://electricbunnycomics.tumblr.com'
3984
3985
3986
class Hoomph(GenericTumblrV1):
3987
    """Class to retrieve Hoomph comics."""
3988
    name = 'hoomph'
3989
    long_name = 'Hoomph'
3990
    url = 'http://hoom.ph'
3991
3992
3993
class BFGFSTumblr(GenericTumblrV1):
3994
    """Class to retrieve BFGFS comics."""
3995
    # Also on https://tapastic.com/series/BFGFS
3996
    # Also on http://bfgfs.com
3997
    name = 'bfgfs-tumblr'
3998
    long_name = 'BFGFS (from Tumblr)'
3999
    url = 'http://bfgfs.tumblr.com'
4000
4001
4002
class DoodleForFood(GenericTumblrV1):
4003
    """Class to retrieve Doodle For Food comics."""
4004
    # Also on http://doodleforfood.com
4005
    name = 'doodle'
4006
    long_name = 'Doodle For Food'
4007
    url = 'http://doodleforfood.com'
4008
4009
4010
class CassandraCalinTumblr(GenericTumblrV1):
4011
    """Class to retrieve C. Cassandra comics."""
4012
    # Also on http://cassandracalin.com
4013
    # Also on https://tapastic.com/series/C-Cassandra-comics
4014
    name = 'cassandra-tumblr'
4015
    long_name = 'Cassandra Calin (from Tumblr)'
4016
    url = 'http://c-cassandra.tumblr.com'
4017
4018
4019
class DougWasTaken(GenericTumblrV1):
4020
    """Class to retrieve Doug Was Taken comics."""
4021
    name = 'doug'
4022
    long_name = 'Doug Was Taken'
4023
    url = 'http://dougwastaken.tumblr.com'
4024
4025
4026
class MandatoryRollerCoaster(GenericTumblrV1):
4027
    """Class to retrieve Mandatory Roller Coaster comics."""
4028
    name = 'rollercoaster'
4029
    long_name = 'Mandatory Roller Coaster'
4030
    url = 'http://mandatoryrollercoaster.com'
4031
4032
4033
class CEstPasEnRegardantSesPompes(GenericTumblrV1):
4034
    """Class to retrieve C'Est Pas En Regardant Ses Pompes (...)  comics."""
4035
    name = 'cperspqccltt'
4036
    long_name = 'C Est Pas En Regardant Ses Pompes (...)'
4037
    url = 'http://cperspqccltt.tumblr.com'
4038
4039
4040
class TheGrohlTroll(GenericTumblrV1):
4041
    """Class to retrieve The Grohl Troll comics."""
4042
    name = 'grohltroll'
4043
    long_name = 'The Grohl Troll'
4044
    url = 'http://thegrohltroll.com'
4045
4046
4047
class WebcomicName(GenericTumblrV1):
4048
    """Class to retrieve Webcomic Name comics."""
4049
    name = 'webcomicname'
4050
    long_name = 'Webcomic Name'
4051
    url = 'http://webcomicname.com'
4052
4053
4054
class BooksOfAdam(GenericTumblrV1):
4055
    """Class to retrieve Books of Adam comics."""
4056
    # Also on http://www.booksofadam.com
4057
    name = 'booksofadam'
4058
    long_name = 'Books of Adam'
4059
    url = 'http://booksofadam.tumblr.com'
4060
4061
4062
class HarkAVagrant(GenericTumblrV1):
4063
    """Class to retrieve Hark A Vagrant comics."""
4064
    # Also on http://www.harkavagrant.com
4065
    name = 'hark-tumblr'
4066
    long_name = 'Hark A Vagrant (from Tumblr)'
4067
    url = 'http://beatonna.tumblr.com'
4068
4069
4070
class OurSuperAdventureTumblr(GenericTumblrV1):
4071
    """Class to retrieve Our Super Adventure comics."""
4072
    # Also on https://tapastic.com/series/Our-Super-Adventure
4073
    # Also on http://www.oursuperadventure.com
4074
    # http://sarahgraley.com
4075
    name = 'superadventure-tumblr'
4076
    long_name = 'Our Super Adventure (from Tumblr)'
4077
    url = 'http://sarahssketchbook.tumblr.com'
4078
4079
4080
class JakeLikesOnions(GenericTumblrV1):
4081
    """Class to retrieve Jake Likes Onions comics."""
4082
    name = 'jake'
4083
    long_name = 'Jake Likes Onions'
4084
    url = 'http://jakelikesonions.com'
4085
4086
4087
class InYourFaceCake(GenericTumblrV1):
4088
    """Class to retrieve In Your Face Cake comics."""
4089
    name = 'inyourfacecake-tumblr'
4090
    long_name = 'In Your Face Cake (from Tumblr)'
4091
    url = 'http://in-your-face-cake.tumblr.com'
4092
4093
4094
class Robospunk(GenericTumblrV1):
4095
    """Class to retrieve Robospunk comics."""
4096
    name = 'robospunk'
4097
    long_name = 'Robospunk'
4098
    url = 'http://robospunk.com'
4099
4100
4101
class BananaTwinky(GenericTumblrV1):
4102
    """Class to retrieve Banana Twinky comics."""
4103
    name = 'banana'
4104
    long_name = 'Banana Twinky'
4105
    url = 'http://bananatwinky.tumblr.com'
4106
4107
4108
class YesterdaysPopcornTumblr(GenericTumblrV1):
4109
    """Class to retrieve Yesterday's Popcorn comics."""
4110
    # Also on http://www.yesterdayspopcorn.com
4111
    # Also on https://tapastic.com/series/Yesterdays-Popcorn
4112
    name = 'popcorn-tumblr'
4113
    long_name = 'Yesterday\'s Popcorn (from Tumblr)'
4114
    url = 'http://yesterdayspopcorn.tumblr.com'
4115
4116
4117
class TwistedDoodles(GenericTumblrV1):
4118
    """Class to retrieve Twisted Doodles comics."""
4119
    name = 'twisted'
4120
    long_name = 'Twisted Doodles'
4121
    url = 'http://www.twisteddoodles.com'
4122
4123
4124
class UbertoolTumblr(GenericTumblrV1):
4125
    """Class to retrieve Ubertool comics."""
4126
    # Also on http://ubertoolcomic.com
4127
    # Also on https://tapastic.com/series/ubertool
4128
    name = 'ubertool-tumblr'
4129
    long_name = 'Ubertool (from Tumblr)'
4130
    url = 'http://ubertool.tumblr.com'
4131
    _categories = ('UBERTOOL', )
4132
4133
4134
class LittleLifeLinesTumblr(GenericTumblrV1):
4135
    """Class to retrieve Little Life Lines comics."""
4136
    # Also on http://www.littlelifelines.com
4137
    name = 'life-tumblr'
4138
    long_name = 'Little Life Lines (from Tumblr)'
4139
    url = 'https://little-life-lines.tumblr.com'
4140
4141
4142
class TheyCanTalk(GenericTumblrV1):
4143
    """Class to retrieve They Can Talk comics."""
4144
    name = 'theycantalk'
4145
    long_name = 'They Can Talk'
4146
    url = 'http://theycantalk.com'
4147
4148
4149
class Will5NeverCome(GenericTumblrV1):
4150
    """Class to retrieve Will 5:00 Never Come comics."""
4151
    name = 'will5'
4152
    long_name = 'Will 5:00 Never Come ?'
4153
    url = 'http://will5nevercome.com'
4154
4155
4156
class Sephko(GenericTumblrV1):
4157
    """Class to retrieve Sephko Comics."""
4158
    # Also on http://www.sephko.com
4159
    name = 'sephko'
4160
    long_name = 'Sephko'
4161
    url = 'http://sephko.tumblr.com'
4162
4163
4164
class BlazersAtDawn(GenericTumblrV1):
4165
    """Class to retrieve Blazers At Dawn Comics."""
4166
    name = 'blazers'
4167
    long_name = 'Blazers At Dawn'
4168
    url = 'http://blazersatdawn.tumblr.com'
4169
4170
4171
class ArtByMoga(GenericEmptyComic, GenericTumblrV1):  # Deactivated because it downloads too many things
4172
    """Class to retrieve Art By Moga Comics."""
4173
    name = 'moga'
4174
    long_name = 'Art By Moga'
4175
    url = 'http://artbymoga.tumblr.com'
4176
4177
4178
class VerbalVomitTumblr(GenericTumblrV1):
4179
    """Class to retrieve Verbal Vomit comics."""
4180
    # Also on http://www.verbal-vomit.com
4181
    name = 'vomit-tumblr'
4182
    long_name = 'Verbal Vomit (from Tumblr)'
4183
    url = 'http://verbalvomits.tumblr.com'
4184 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
4185
4186
class LibraryComic(GenericTumblrV1):
4187
    """Class to retrieve LibraryComic."""
4188
    # Also on http://librarycomic.com
4189
    name = 'library-tumblr'
4190
    long_name = 'LibraryComic (from Tumblr)'
4191
    url = 'http://librarycomic.tumblr.com'
4192
4193
4194
class HorovitzComics(GenericListableComic):
4195
    """Generic class to handle the logic common to the different comics from Horovitz."""
4196
    url = 'http://www.horovitzcomics.com'
4197
    _categories = ('HOROVITZ', )
4198
    img_re = re.compile('.*comics/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
4199
    link_re = NotImplemented
4200
    get_url_from_archive_element = join_cls_url_to_href
4201
4202
    @classmethod
4203
    def get_comic_info(cls, soup, link):
4204
        """Get information about a particular comics."""
4205
        href = link['href']
4206
        num = int(cls.link_re.match(href).groups()[0])
4207
        title = link.string
4208
        imgs = soup.find_all('img', id='comic')
4209
        assert len(imgs) == 1
4210
        year, month, day = [int(s)
4211
                            for s in cls.img_re.match(imgs[0]['src']).groups()]
4212
        return {
4213
            'title': title,
4214
            'day': day,
4215
            'month': month,
4216
            'year': year,
4217
            'img': [i['src'] for i in imgs],
4218
            'num': num,
4219
        }
4220
4221
    @classmethod
4222
    def get_archive_elements(cls):
4223
        archive_url = 'http://www.horovitzcomics.com/comics/archive/'
4224
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.link_re))
4225
4226
4227
class HorovitzNew(HorovitzComics):
4228
    """Class to retrieve Horovitz new comics."""
4229
    name = 'horovitznew'
4230
    long_name = 'Horovitz New'
4231
    link_re = re.compile('^/comics/new/([0-9]+)$')
4232
4233
4234
class HorovitzClassic(HorovitzComics):
4235
    """Class to retrieve Horovitz classic comics."""
4236
    name = 'horovitzclassic'
4237
    long_name = 'Horovitz Classic'
4238
    link_re = re.compile('^/comics/classic/([0-9]+)$')
4239
4240
4241 View Code Duplication
class GenericGoComic(GenericNavigableComic):
4242
    """Generic class to handle the logic common to comics from gocomics.com."""
4243
    _categories = ('GOCOMIC', )
4244
4245
    @classmethod
4246
    def get_first_comic_link(cls):
4247
        """Get link to first comics."""
4248
        return get_soup_at_url(cls.url).find('a', class_='fa btn btn-outline-default btn-circle fa-backward sm ')
4249
4250
    @classmethod
4251
    def get_navi_link(cls, last_soup, next_):
4252
        """Get link to next or previous comic."""
4253
        PREV = 'fa btn btn-outline-default btn-circle fa-caret-left sm '
4254
        NEXT = 'fa btn btn-outline-default btn-circle fa-caret-right sm '
4255
        return last_soup.find('a', class_=NEXT if next_ else PREV)
4256
4257
    @classmethod
4258
    def get_url_from_link(cls, link):
4259
        gocomics = 'http://www.gocomics.com'
4260
        return urljoin_wrapper(gocomics, link['href'])
4261
4262
    @classmethod
4263
    def get_comic_info(cls, soup, link):
4264
        """Get information about a particular comics."""
4265
        date_str = soup.find('meta', property='article:published_time')['content']
4266
        day = string_to_date(date_str, "%Y-%m-%d")
4267
        imgs = soup.find('picture', class_='img-fluid item-comic-image').find_all('img')
4268
        author = soup.find('meta', property='article:author')['content']
4269
        tags = soup.find('meta', property='article:tag')['content']
4270
        return {
4271
            'day': day.day,
4272
            'month': day.month,
4273
            'year': day.year,
4274
            'img': [i['src'] for i in imgs],
4275
            'author': author,
4276
            'tags': tags,
4277
        }
4278
4279
4280
class PearlsBeforeSwine(GenericGoComic):
4281
    """Class to retrieve Pearls Before Swine comics."""
4282
    name = 'pearls'
4283
    long_name = 'Pearls Before Swine'
4284
    url = 'http://www.gocomics.com/pearlsbeforeswine'
4285
4286
4287
class Peanuts(GenericGoComic):
4288
    """Class to retrieve Peanuts comics."""
4289
    name = 'peanuts'
4290
    long_name = 'Peanuts'
4291
    url = 'http://www.gocomics.com/peanuts'
4292
4293
4294
class MattWuerker(GenericGoComic):
4295
    """Class to retrieve Matt Wuerker comics."""
4296
    name = 'wuerker'
4297
    long_name = 'Matt Wuerker'
4298
    url = 'http://www.gocomics.com/mattwuerker'
4299
4300
4301
class TomToles(GenericGoComic):
4302
    """Class to retrieve Tom Toles comics."""
4303
    name = 'toles'
4304
    long_name = 'Tom Toles'
4305
    url = 'http://www.gocomics.com/tomtoles'
4306
4307
4308
class BreakOfDay(GenericGoComic):
4309
    """Class to retrieve Break Of Day comics."""
4310
    name = 'breakofday'
4311
    long_name = 'Break Of Day'
4312
    url = 'http://www.gocomics.com/break-of-day'
4313
4314
4315
class Brevity(GenericGoComic):
4316
    """Class to retrieve Brevity comics."""
4317
    name = 'brevity'
4318
    long_name = 'Brevity'
4319
    url = 'http://www.gocomics.com/brevitypanel'
4320
4321
4322
class MichaelRamirez(GenericGoComic):
4323
    """Class to retrieve Michael Ramirez comics."""
4324
    name = 'ramirez'
4325
    long_name = 'Michael Ramirez'
4326
    url = 'http://www.gocomics.com/michaelramirez'
4327
4328
4329
class MikeLuckovich(GenericGoComic):
4330
    """Class to retrieve Mike Luckovich comics."""
4331
    name = 'luckovich'
4332
    long_name = 'Mike Luckovich'
4333
    url = 'http://www.gocomics.com/mikeluckovich'
4334
4335
4336
class JimBenton(GenericGoComic):
4337
    """Class to retrieve Jim Benton comics."""
4338
    # Also on http://jimbenton.tumblr.com
4339
    name = 'benton'
4340
    long_name = 'Jim Benton'
4341
    url = 'http://www.gocomics.com/jim-benton-cartoons'
4342
4343
4344
class TheArgyleSweater(GenericGoComic):
4345
    """Class to retrieve the Argyle Sweater comics."""
4346
    name = 'argyle'
4347
    long_name = 'Argyle Sweater'
4348
    url = 'http://www.gocomics.com/theargylesweater'
4349
4350
4351
class SunnyStreet(GenericGoComic):
4352
    """Class to retrieve Sunny Street comics."""
4353
    # Also on http://www.sunnystreetcomics.com
4354
    name = 'sunny'
4355
    long_name = 'Sunny Street'
4356
    url = 'http://www.gocomics.com/sunny-street'
4357
4358
4359
class OffTheMark(GenericGoComic):
4360
    """Class to retrieve Off The Mark comics."""
4361
    # Also on https://www.offthemark.com
4362
    name = 'offthemark'
4363
    long_name = 'Off The Mark'
4364
    url = 'http://www.gocomics.com/offthemark'
4365
4366
4367
class WuMo(GenericGoComic):
4368
    """Class to retrieve WuMo comics."""
4369
    # Also on http://wumo.com
4370
    name = 'wumo'
4371
    long_name = 'WuMo'
4372
    url = 'http://www.gocomics.com/wumo'
4373
4374
4375
class LunarBaboon(GenericGoComic):
4376
    """Class to retrieve Lunar Baboon comics."""
4377
    # Also on http://www.lunarbaboon.com
4378
    # Also on https://tapastic.com/series/Lunarbaboon
4379
    name = 'lunarbaboon'
4380
    long_name = 'Lunar Baboon'
4381
    url = 'http://www.gocomics.com/lunarbaboon'
4382
4383
4384
class SandersenGocomic(GenericGoComic):
4385
    """Class to retrieve Sarah Andersen comics."""
4386
    # Also on http://sarahcandersen.com
4387
    # Also on http://tapastic.com/series/Doodle-Time
4388
    name = 'sandersen-goc'
4389
    long_name = 'Sarah Andersen (from GoComics)'
4390
    url = 'http://www.gocomics.com/sarahs-scribbles'
4391
4392
4393
class SaturdayMorningBreakfastCerealGoComic(GenericGoComic):
4394
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
4395
    # Also on http://smbc-comics.tumblr.com
4396
    # Also on http://www.smbc-comics.com
4397
    name = 'smbc-goc'
4398
    long_name = 'Saturday Morning Breakfast Cereal (from GoComics)'
4399
    url = 'http://www.gocomics.com/saturday-morning-breakfast-cereal'
4400
    _categories = ('SMBC', )
4401
4402
4403
class CalvinAndHobbesGoComic(GenericGoComic):
4404
    """Class to retrieve Calvin and Hobbes comics."""
4405
    # From gocomics, not http://marcel-oehler.marcellosendos.ch/comics/ch/
4406
    name = 'calvin-goc'
4407
    long_name = 'Calvin and Hobbes (from GoComics)'
4408
    url = 'http://www.gocomics.com/calvinandhobbes'
4409
4410
4411
class RallGoComic(GenericGoComic):
4412
    """Class to retrieve Ted Rall comics."""
4413
    # Also on http://rall.com/comic
4414
    name = 'rall-goc'
4415
    long_name = "Ted Rall (from GoComics)"
4416
    url = "http://www.gocomics.com/ted-rall"
4417
    _categories = ('RALL', )
4418
4419
4420
class TheAwkwardYetiGoComic(GenericGoComic):
4421
    """Class to retrieve The Awkward Yeti comics."""
4422
    # Also on http://larstheyeti.tumblr.com
4423
    # Also on http://theawkwardyeti.com
4424
    # Also on https://tapastic.com/series/TheAwkwardYeti
4425
    name = 'yeti-goc'
4426
    long_name = 'The Awkward Yeti (from GoComics)'
4427
    url = 'http://www.gocomics.com/the-awkward-yeti'
4428
    _categories = ('YETI', )
4429
4430
4431
class BerkeleyMewsGoComics(GenericGoComic):
4432
    """Class to retrieve Berkeley Mews comics."""
4433
    # Also on http://mews.tumblr.com
4434
    # Also on http://www.berkeleymews.com
4435
    name = 'berkeley-goc'
4436
    long_name = 'Berkeley Mews (from GoComics)'
4437
    url = 'http://www.gocomics.com/berkeley-mews'
4438
    _categories = ('BERKELEY', )
4439
4440
4441
class SheldonGoComics(GenericGoComic):
4442
    """Class to retrieve Sheldon comics."""
4443
    # Also on http://www.sheldoncomics.com
4444
    name = 'sheldon-goc'
4445
    long_name = 'Sheldon Comics (from GoComics)'
4446
    url = 'http://www.gocomics.com/sheldon'
4447
4448
4449
class FowlLanguageGoComics(GenericGoComic):
4450
    """Class to retrieve Fowl Language comics."""
4451
    # Also on http://www.fowllanguagecomics.com
4452
    # Also on http://tapastic.com/series/Fowl-Language-Comics
4453
    # Also on http://fowllanguagecomics.tumblr.com
4454
    name = 'fowllanguage-goc'
4455
    long_name = 'Fowl Language Comics (from GoComics)'
4456
    url = 'http://www.gocomics.com/fowl-language'
4457
    _categories = ('FOWLLANGUAGE', )
4458
4459
4460
class NickAnderson(GenericGoComic):
4461
    """Class to retrieve Nick Anderson comics."""
4462
    name = 'nickanderson'
4463
    long_name = 'Nick Anderson'
4464
    url = 'http://www.gocomics.com/nickanderson'
4465
4466
4467
class GarfieldGoComics(GenericGoComic):
4468
    """Class to retrieve Garfield comics."""
4469
    # Also on http://garfield.com
4470
    name = 'garfield-goc'
4471
    long_name = 'Garfield (from GoComics)'
4472
    url = 'http://www.gocomics.com/garfield'
4473
    _categories = ('GARFIELD', )
4474
4475
4476
class DorrisMcGoComics(GenericGoComic):
4477
    """Class to retrieve Dorris Mc Comics"""
4478
    # Also on http://dorrismccomics.com
4479
    name = 'dorrismc-goc'
4480
    long_name = 'Dorris Mc (from GoComics)'
4481
    url = 'http://www.gocomics.com/dorris-mccomics'
4482
4483
4484
class FoxTrot(GenericGoComic):
4485
    """Class to retrieve FoxTrot comics."""
4486
    name = 'foxtrot'
4487
    long_name = 'FoxTrot'
4488
    url = 'http://www.gocomics.com/foxtrot'
4489
4490
4491
class FoxTrotClassics(GenericGoComic):
4492
    """Class to retrieve FoxTrot Classics comics."""
4493
    name = 'foxtrot-classics'
4494
    long_name = 'FoxTrot Classics'
4495
    url = 'http://www.gocomics.com/foxtrotclassics'
4496
4497
4498
class MisterAndMeGoComics(GenericGoComic):
4499
    """Class to retrieve Mister & Me Comics."""
4500
    # Also on http://www.mister-and-me.com
4501
    # Also on https://tapastic.com/series/Mister-and-Me
4502
    name = 'mister-goc'
4503
    long_name = 'Mister & Me (from GoComics)'
4504
    url = 'http://www.gocomics.com/mister-and-me'
4505
4506
4507
class NonSequitur(GenericGoComic):
4508
    """Class to retrieve Non Sequitur (Wiley Miller) comics."""
4509
    name = 'nonsequitur'
4510
    long_name = 'Non Sequitur'
4511
    url = 'http://www.gocomics.com/nonsequitur'
4512
4513
4514
class GenericTapasticComic(GenericListableComic):
4515
    """Generic class to handle the logic common to comics from tapastic.com."""
4516
    _categories = ('TAPASTIC', )
4517
4518
    @classmethod
4519
    def get_comic_info(cls, soup, archive_elt):
4520
        """Get information about a particular comics."""
4521
        timestamp = int(archive_elt['publishDate']) / 1000.0
4522
        day = datetime.datetime.fromtimestamp(timestamp).date()
4523
        imgs = soup.find_all('img', class_='art-image')
4524
        if not imgs:
4525
            print("Comic %s is being uploaded, retry later" % cls.get_url_from_archive_element(archive_elt))
4526
            return None
4527
        assert len(imgs) > 0
4528
        return {
4529
            'day': day.day,
4530
            'year': day.year,
4531
            'month': day.month,
4532
            'img': [i['src'] for i in imgs],
4533
            'title': archive_elt['title'],
4534
        }
4535
4536
    @classmethod
4537
    def get_url_from_archive_element(cls, archive_elt):
4538
        return 'http://tapastic.com/episode/' + str(archive_elt['id'])
4539
4540
    @classmethod
4541
    def get_archive_elements(cls):
4542
        pref, suff = 'episodeList : ', ','
4543
        # Information is stored in the javascript part
4544
        # I don't know the clean way to get it so this is the ugly way.
4545
        string = [s[len(pref):-len(suff)] for s in (s.decode('utf-8').strip() for s in urlopen_wrapper(cls.url).readlines()) if s.startswith(pref) and s.endswith(suff)][0]
4546
        return json.loads(string)
4547
4548
4549
class VegetablesForDessert(GenericTapasticComic):
4550
    """Class to retrieve Vegetables For Dessert comics."""
4551
    # Also on http://vegetablesfordessert.tumblr.com
4552
    name = 'vegetables'
4553
    long_name = 'Vegetables For Dessert'
4554
    url = 'http://tapastic.com/series/vegetablesfordessert'
4555
4556
4557
class FowlLanguageTapa(GenericTapasticComic):
4558
    """Class to retrieve Fowl Language comics."""
4559
    # Also on http://www.fowllanguagecomics.com
4560
    # Also on http://fowllanguagecomics.tumblr.com
4561
    # Also on http://www.gocomics.com/fowl-language
4562
    name = 'fowllanguage-tapa'
4563
    long_name = 'Fowl Language Comics (from Tapastic)'
4564
    url = 'http://tapastic.com/series/Fowl-Language-Comics'
4565
    _categories = ('FOWLLANGUAGE', )
4566
4567
4568
class OscillatingProfundities(GenericTapasticComic):
4569
    """Class to retrieve Oscillating Profundities comics."""
4570
    name = 'oscillating'
4571
    long_name = 'Oscillating Profundities'
4572
    url = 'http://tapastic.com/series/oscillatingprofundities'
4573
4574
4575
class ZnoflatsComics(GenericTapasticComic):
4576
    """Class to retrieve Znoflats comics."""
4577
    name = 'znoflats'
4578
    long_name = 'Znoflats Comics'
4579
    url = 'http://tapastic.com/series/Znoflats-Comics'
4580
4581
4582
class SandersenTapastic(GenericTapasticComic):
4583
    """Class to retrieve Sarah Andersen comics."""
4584
    # Also on http://sarahcandersen.com
4585
    # Also on http://www.gocomics.com/sarahs-scribbles
4586
    name = 'sandersen-tapa'
4587
    long_name = 'Sarah Andersen (from Tapastic)'
4588
    url = 'http://tapastic.com/series/Doodle-Time'
4589
4590
4591
class TubeyToonsTapastic(GenericTapasticComic):
4592
    """Class to retrieve TubeyToons comics."""
4593
    # Also on http://tubeytoons.com
4594
    # Also on http://tubeytoons.tumblr.com
4595
    name = 'tubeytoons-tapa'
4596
    long_name = 'Tubey Toons (from Tapastic)'
4597
    url = 'http://tapastic.com/series/Tubey-Toons'
4598
    _categories = ('TUNEYTOONS', )
4599
4600
4601
class AnythingComicTapastic(GenericTapasticComic):
4602
    """Class to retrieve Anything Comics."""
4603
    # Also on http://www.anythingcomic.com
4604
    name = 'anythingcomic-tapa'
4605
    long_name = 'Anything Comic (from Tapastic)'
4606
    url = 'http://tapastic.com/series/anything'
4607
4608
4609
class UnearthedComicsTapastic(GenericTapasticComic):
4610
    """Class to retrieve Unearthed comics."""
4611
    # Also on http://unearthedcomics.com
4612
    # Also on http://unearthedcomics.tumblr.com
4613
    name = 'unearthed-tapa'
4614
    long_name = 'Unearthed Comics (from Tapastic)'
4615
    url = 'http://tapastic.com/series/UnearthedComics'
4616
    _categories = ('UNEARTHED', )
4617
4618
4619
class EverythingsStupidTapastic(GenericTapasticComic):
4620
    """Class to retrieve Everything's stupid Comics."""
4621
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
4622
    # Also on http://everythingsstupid.net
4623
    name = 'stupid-tapa'
4624
    long_name = "Everything's Stupid (from Tapastic)"
4625
    url = 'http://tapastic.com/series/EverythingsStupid'
4626
4627
4628
class JustSayEhTapastic(GenericTapasticComic):
4629
    """Class to retrieve Just Say Eh comics."""
4630
    # Also on http://www.justsayeh.com
4631
    name = 'justsayeh-tapa'
4632
    long_name = 'Just Say Eh (from Tapastic)'
4633
    url = 'http://tapastic.com/series/Just-Say-Eh'
4634
4635
4636
class ThorsThundershackTapastic(GenericTapasticComic):
4637
    """Class to retrieve Thor's Thundershack comics."""
4638
    # Also on http://www.thorsthundershack.com
4639
    name = 'thor-tapa'
4640
    long_name = 'Thor\'s Thundershack (from Tapastic)'
4641
    url = 'http://tapastic.com/series/Thors-Thundershac'
4642
    _categories = ('THOR', )
4643
4644
4645
class OwlTurdTapastic(GenericTapasticComic):
4646
    """Class to retrieve Owl Turd comics."""
4647
    # Also on http://owlturd.com
4648
    name = 'owlturd-tapa'
4649
    long_name = 'Owl Turd (from Tapastic)'
4650
    url = 'http://tapastic.com/series/Owl-Turd-Comix'
4651
    _categories = ('OWLTURD', )
4652
4653
4654
class GoneIntoRaptureTapastic(GenericTapasticComic):
4655
    """Class to retrieve Gone Into Rapture comics."""
4656
    # Also on http://goneintorapture.tumblr.com
4657
    # Also on http://www.goneintorapture.com
4658
    name = 'rapture-tapa'
4659
    long_name = 'Gone Into Rapture (from Tapastic)'
4660
    url = 'http://tapastic.com/series/Goneintorapture'
4661
4662
4663
class HeckIfIKnowComicsTapa(GenericTapasticComic):
4664
    """Class to retrieve Heck If I Know Comics."""
4665
    # Also on http://heckifiknowcomics.com
4666
    name = 'heck-tapa'
4667
    long_name = 'Heck if I Know comics (from Tapastic)'
4668
    url = 'http://tapastic.com/series/Regular'
4669
4670
4671
class CheerUpEmoKidTapa(GenericTapasticComic):
4672
    """Class to retrieve CheerUpEmoKid comics."""
4673
    # Also on http://www.cheerupemokid.com
4674
    # Also on http://enzocomics.tumblr.com
4675
    name = 'cuek-tapa'
4676
    long_name = 'Cheer Up Emo Kid (from Tapastic)'
4677
    url = 'http://tapastic.com/series/CUEK'
4678
4679
4680
class BigFootJusticeTapa(GenericTapasticComic):
4681
    """Class to retrieve Big Foot Justice comics."""
4682
    # Also on http://bigfootjustice.com
4683
    name = 'bigfoot-tapa'
4684
    long_name = 'Big Foot Justice (from Tapastic)'
4685
    url = 'http://tapastic.com/series/bigfoot-justice'
4686
4687
4688
class UpAndOutTapa(GenericTapasticComic):
4689
    """Class to retrieve Up & Out comics."""
4690
    # Also on http://upandoutcomic.tumblr.com
4691
    name = 'upandout-tapa'
4692
    long_name = 'Up And Out (from Tapastic)'
4693
    url = 'http://tapastic.com/series/UP-and-OUT'
4694
4695
4696
class ToonHoleTapa(GenericTapasticComic):
4697
    """Class to retrieve Toon Holes comics."""
4698
    # Also on http://www.toonhole.com
4699
    name = 'toonhole-tapa'
4700
    long_name = 'Toon Hole (from Tapastic)'
4701
    url = 'http://tapastic.com/series/TOONHOLE'
4702
4703
4704
class AngryAtNothingTapa(GenericTapasticComic):
4705
    """Class to retrieve Angry at Nothing comics."""
4706
    # Also on http://www.angryatnothing.net
4707
    name = 'angry-tapa'
4708
    long_name = 'Angry At Nothing (from Tapastic)'
4709
    url = 'http://tapastic.com/series/Comics-yeah-definitely-comics-'
4710
4711
4712
class LeleozTapa(GenericTapasticComic):
4713
    """Class to retrieve Leleoz comics."""
4714
    # Also on http://leleozcomics.tumblr.com
4715
    name = 'leleoz-tapa'
4716
    long_name = 'Leleoz (from Tapastic)'
4717
    url = 'https://tapastic.com/series/Leleoz'
4718
4719
4720
class TheAwkwardYetiTapa(GenericTapasticComic):
4721
    """Class to retrieve The Awkward Yeti comics."""
4722
    # Also on http://www.gocomics.com/the-awkward-yeti
4723
    # Also on http://theawkwardyeti.com
4724
    # Also on http://larstheyeti.tumblr.com
4725
    name = 'yeti-tapa'
4726
    long_name = 'The Awkward Yeti (from Tapastic)'
4727
    url = 'https://tapastic.com/series/TheAwkwardYeti'
4728
    _categories = ('YETI', )
4729
4730
4731
class AsPerUsualTapa(GenericTapasticComic):
4732
    """Class to retrieve As Per Usual comics."""
4733
    # Also on http://as-per-usual.tumblr.com
4734
    name = 'usual-tapa'
4735
    long_name = 'As Per Usual (from Tapastic)'
4736
    url = 'https://tapastic.com/series/AsPerUsual'
4737
    categories = ('DAMILEE', )
4738
4739
4740
class HotComicsForCoolPeopleTapa(GenericTapasticComic):
4741
    """Class to retrieve Hot Comics For Cool People."""
4742
    # Also on http://hotcomicsforcoolpeople.tumblr.com
4743
    # Also on http://hotcomics.biz (links to tumblr)
4744
    # Also on http://hcfcp.com (links to tumblr)
4745
    name = 'hotcomics-tapa'
4746
    long_name = 'Hot Comics For Cool People (from Tapastic)'
4747
    url = 'https://tapastic.com/series/Hot-Comics-For-Cool-People'
4748
    categories = ('DAMILEE', )
4749
4750
4751
class OneOneOneOneComicTapa(GenericTapasticComic):
4752
    """Class to retrieve 1111 Comics."""
4753
    # Also on http://www.1111comics.me
4754
    # Also on http://comics1111.tumblr.com
4755
    name = '1111-tapa'
4756
    long_name = '1111 Comics (from Tapastic)'
4757
    url = 'https://tapastic.com/series/1111-Comics'
4758
    _categories = ('ONEONEONEONE', )
4759
4760
4761
class TumbleDryTapa(GenericTapasticComic):
4762
    """Class to retrieve Tumble Dry comics."""
4763
    # Also on http://tumbledrycomics.com
4764
    name = 'tumbledry-tapa'
4765
    long_name = 'Tumblr Dry (from Tapastic)'
4766
    url = 'https://tapastic.com/series/TumbleDryComics'
4767
4768
4769
class DeadlyPanelTapa(GenericTapasticComic):
4770
    """Class to retrieve Deadly Panel comics."""
4771
    # Also on http://www.deadlypanel.com
4772
    name = 'deadly-tapa'
4773
    long_name = 'Deadly Panel (from Tapastic)'
4774
    url = 'https://tapastic.com/series/deadlypanel'
4775
4776
4777
class ChrisHallbeckMaxiTapa(GenericTapasticComic):
4778
    """Class to retrieve Chris Hallbeck comics."""
4779
    # Also on http://chrishallbeck.tumblr.com
4780
    # Also on http://maximumble.com
4781
    name = 'hallbeckmaxi-tapa'
4782
    long_name = 'Chris Hallback - Maximumble (from Tapastic)'
4783
    url = 'https://tapastic.com/series/Maximumble'
4784
    _categories = ('HALLBACK', )
4785
4786
4787
class ChrisHallbeckMiniTapa(GenericTapasticComic):
4788
    """Class to retrieve Chris Hallbeck comics."""
4789
    # Also on http://chrishallbeck.tumblr.com
4790
    # Also on http://minimumble.com
4791
    name = 'hallbeckmini-tapa'
4792
    long_name = 'Chris Hallback - Minimumble (from Tapastic)'
4793
    url = 'https://tapastic.com/series/Minimumble'
4794
    _categories = ('HALLBACK', )
4795
4796
4797
class ChrisHallbeckBiffTapa(GenericTapasticComic):
4798
    """Class to retrieve Chris Hallbeck comics."""
4799
    # Also on http://chrishallbeck.tumblr.com
4800
    # Also on http://thebookofbiff.com
4801
    name = 'hallbeckbiff-tapa'
4802
    long_name = 'Chris Hallback - The Book of Biff (from Tapastic)'
4803
    url = 'https://tapastic.com/series/Biff'
4804
    _categories = ('HALLBACK', )
4805
4806
4807
class RandoWisTapa(GenericTapasticComic):
4808
    """Class to retrieve RandoWis comics."""
4809
    # Also on https://randowis.com
4810
    name = 'randowis-tapa'
4811
    long_name = 'RandoWis (from Tapastic)'
4812
    url = 'https://tapastic.com/series/RandoWis'
4813
4814
4815
class PigeonGazetteTapa(GenericTapasticComic):
4816
    """Class to retrieve The Pigeon Gazette comics."""
4817
    # Also on http://thepigeongazette.tumblr.com
4818
    name = 'pigeon-tapa'
4819
    long_name = 'The Pigeon Gazette (from Tapastic)'
4820
    url = 'https://tapastic.com/series/The-Pigeon-Gazette'
4821
4822
4823
class TheOdd1sOutTapa(GenericTapasticComic):
4824
    """Class to retrieve The Odd 1s Out comics."""
4825
    # Also on http://theodd1sout.com
4826
    # Also on http://theodd1sout.tumblr.com
4827
    name = 'theodd-tapa'
4828
    long_name = 'The Odd 1s Out (from Tapastic)'
4829
    url = 'https://tapastic.com/series/Theodd1sout'
4830
4831
4832
class TheWorldIsFlatTapa(GenericTapasticComic):
4833
    """Class to retrieve The World Is Flat Comics."""
4834
    # Also on http://theworldisflatcomics.tumblr.com
4835
    name = 'flatworld-tapa'
4836
    long_name = 'The World Is Flat (from Tapastic)'
4837
    url = 'https://tapastic.com/series/The-World-is-Flat'
4838
4839
4840
class MisterAndMeTapa(GenericTapasticComic):
4841
    """Class to retrieve Mister & Me Comics."""
4842
    # Also on http://www.mister-and-me.com
4843
    # Also on http://www.gocomics.com/mister-and-me
4844
    name = 'mister-tapa'
4845
    long_name = 'Mister & Me (from Tapastic)'
4846
    url = 'https://tapastic.com/series/Mister-and-Me'
4847
4848
4849
class TalesOfAbsurdityTapa(GenericTapasticComic):
4850
    """Class to retrieve Tales Of Absurdity comics."""
4851
    # Also on http://talesofabsurdity.com
4852
    # Also on http://talesofabsurdity.tumblr.com
4853
    name = 'absurdity-tapa'
4854
    long_name = 'Tales of Absurdity (from Tapastic)'
4855
    url = 'http://tapastic.com/series/Tales-Of-Absurdity'
4856
    _categories = ('ABSURDITY', )
4857
4858
4859
class BFGFSTapa(GenericTapasticComic):
4860
    """Class to retrieve BFGFS comics."""
4861
    # Also on http://bfgfs.com
4862
    # Also on http://bfgfs.tumblr.com
4863
    name = 'bfgfs-tapa'
4864
    long_name = 'BFGFS (from Tapastic)'
4865
    url = 'https://tapastic.com/series/BFGFS'
4866
4867
4868
class DoodleForFoodTapa(GenericTapasticComic):
4869
    """Class to retrieve Doodle For Food comics."""
4870
    # Also on http://doodleforfood.com
4871
    name = 'doodle-tapa'
4872
    long_name = 'Doodle For Food (from Tapastic)'
4873
    url = 'https://tapastic.com/series/Doodle-for-Food'
4874
4875
4876
class MrLovensteinTapa(GenericTapasticComic):
4877
    """Class to retrieve Mr Lovenstein comics."""
4878
    # Also on  https://tapastic.com/series/MrLovenstein
4879
    name = 'mrlovenstein-tapa'
4880
    long_name = 'Mr. Lovenstein (from Tapastic)'
4881
    url = 'https://tapastic.com/series/MrLovenstein'
4882
4883
4884
class CassandraCalinTapa(GenericTapasticComic):
4885
    """Class to retrieve C. Cassandra comics."""
4886
    # Also on http://cassandracalin.com
4887
    # Also on http://c-cassandra.tumblr.com
4888
    name = 'cassandra-tapa'
4889
    long_name = 'Cassandra Calin (from Tapastic)'
4890
    url = 'https://tapastic.com/series/C-Cassandra-comics'
4891
4892
4893
class WafflesAndPancakes(GenericTapasticComic):
4894
    """Class to retrieve Waffles And Pancakes comics."""
4895
    # Also on http://wandpcomic.com
4896
    name = 'waffles'
4897
    long_name = 'Waffles And Pancakes'
4898
    url = 'https://tapastic.com/series/Waffles-and-Pancakes'
4899
4900
4901
class YesterdaysPopcornTapastic(GenericTapasticComic):
4902
    """Class to retrieve Yesterday's Popcorn comics."""
4903
    # Also on http://www.yesterdayspopcorn.com
4904
    # Also on http://yesterdayspopcorn.tumblr.com
4905
    name = 'popcorn-tapa'
4906
    long_name = 'Yesterday\'s Popcorn (from Tapastic)'
4907
    url = 'https://tapastic.com/series/Yesterdays-Popcorn'
4908
4909
4910
class OurSuperAdventureTapastic(GenericTapasticComic):
4911
    """Class to retrieve Our Super Adventure comics."""
4912
    # Also on http://www.oursuperadventure.com
4913
    # http://sarahssketchbook.tumblr.com
4914
    # http://sarahgraley.com
4915
    name = 'superadventure-tapastic'
4916
    long_name = 'Our Super Adventure (from Tapastic)'
4917
    url = 'https://tapastic.com/series/Our-Super-Adventure'
4918
4919
4920
class NamelessPCs(GenericTapasticComic):
4921
    """Class to retrieve Nameless PCs comics."""
4922
    # Also on http://namelesspcs.com
4923
    name = 'namelesspcs-tapa'
4924
    long_name = 'NamelessPCs (from Tapastic)'
4925
    url = 'https://tapastic.com/series/NamelessPC'
4926
4927
4928
class UbertoolTapa(GenericTapasticComic):
4929
    """Class to retrieve Ubertool comics."""
4930
    # Also on http://ubertoolcomic.com
4931
    # Also on http://ubertool.tumblr.com
4932
    name = 'ubertool-tapa'
4933
    long_name = 'Ubertool (from Tapastic)'
4934
    url = 'https://tapastic.com/series/ubertool'
4935
    _categories = ('UBERTOOL', )
4936
4937
4938
class SmallBlueYonderTapa(GenericTapasticComic):
4939
    """Class to retrieve Small Blue Yonder comics."""
4940
    # Also on http://www.smallblueyonder.com
4941
    name = 'smallblue-tapa'
4942
    long_name = 'Small Blue Yonder (from Tapastic)'
4943
    url = 'https://tapastic.com/series/Small-Blue-Yonder'
4944
4945
4946
def get_subclasses(klass):
4947
    """Gets the list of direct/indirect subclasses of a class"""
4948
    subclasses = klass.__subclasses__()
4949
    for derived in list(subclasses):
4950
        subclasses.extend(get_subclasses(derived))
4951
    return subclasses
4952
4953
4954
def remove_st_nd_rd_th_from_date(string):
4955
    """Function to transform 1st/2nd/3rd/4th in a parsable date format."""
4956
    # Hackish way to convert string with numeral "1st"/"2nd"/etc to date
4957
    return (string.replace('st', '')
4958
            .replace('nd', '')
4959
            .replace('rd', '')
4960
            .replace('th', '')
4961
            .replace('Augu', 'August'))
4962
4963
4964
def string_to_date(string, date_format, local=DEFAULT_LOCAL):
4965
    """Function to convert string to date object.
4966
    Wrapper around datetime.datetime.strptime."""
4967
    # format described in https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior
4968
    prev_locale = locale.setlocale(locale.LC_ALL)
4969
    if local != prev_locale:
4970
        locale.setlocale(locale.LC_ALL, local)
4971
    ret = datetime.datetime.strptime(string, date_format).date()
4972
    if local != prev_locale:
4973
        locale.setlocale(locale.LC_ALL, prev_locale)
4974
    return ret
4975
4976
4977
COMICS = set(get_subclasses(GenericComic))
4978
VALID_COMICS = [c for c in COMICS if c.name is not None]
4979
COMIC_NAMES = {c.name: c for c in VALID_COMICS}
4980
assert len(VALID_COMICS) == len(COMIC_NAMES)
4981
CLASS_NAMES = {c.__name__ for c in VALID_COMICS}
4982
assert len(VALID_COMICS) == len(CLASS_NAMES)
4983