Completed
Push — master ( 4fc97d...2f8162 )
by De
01:03
created

comics.py (24 issues)

Code
1
#! /usr/bin/python3
2
# vim: set expandtab tabstop=4 shiftwidth=4 :
3
"""Module to retrieve webcomics"""
4
5
from comic_abstract import GenericComic, get_date_for_comic
6
import re
7
from datetime import date, timedelta
8
import datetime
9
from urlfunctions import get_soup_at_url, urljoin_wrapper,\
10
    convert_iri_to_plain_ascii_uri, load_json_at_url, urlopen_wrapper
11
import json
12
import locale
13
import urllib
14
15
DEFAULT_LOCAL = 'en_GB.UTF-8'
16
17
18
class Xkcd(GenericComic):
19
    """Class to retrieve Xkcd comics."""
20
    name = 'xkcd'
21
    long_name = 'xkcd'
22
    url = 'http://xkcd.com'
23
24
    @classmethod
25
    def get_next_comic(cls, last_comic):
26
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
27
        first_num = last_comic['num'] if last_comic else 0
28
        last_num = load_json_at_url(
29
            urljoin_wrapper(cls.url, 'info.0.json'))['num']
30
31
        for num in range(first_num + 1, last_num + 1):
32
            if num != 404:
33
                json_url = urljoin_wrapper(cls.url, '%d/info.0.json' % num)
34
                comic = load_json_at_url(json_url)
35
                comic['img'] = [comic['img']]
36
                comic['prefix'] = '%d-' % num
37
                comic['json_url'] = json_url
38
                comic['url'] = urljoin_wrapper(cls.url, str(num))
39
                comic['day'] = int(comic['day'])
40
                comic['month'] = int(comic['month'])
41
                comic['year'] = int(comic['year'])
42
                assert comic['num'] == num
43
                yield comic
44
45
46
# Helper functions corresponding to get_url_from_link/get_url_from_archive_element
47
48
49
@classmethod
50
def get_href(cls, link):
51
    """Implementation of get_url_from_link/get_url_from_archive_element."""
52
    return link['href']
53
54
55
@classmethod
56
def join_cls_url_to_href(cls, link):
57
    """Implementation of get_url_from_link/get_url_from_archive_element."""
58
    return urljoin_wrapper(cls.url, link['href'])
59
60
61
class GenericNavigableComic(GenericComic):
62
    """Generic class for "navigable" comics : with first/next arrows.
63
64
    This class applies to comic where previous and next comics can be
65
    accessed from a given comic. Once given a starting point (either
66
    the first comic or the last comic retrieved), it will handle the
67
    navigation, the retrieval of the soup object and the setting of
68
    the 'url' attribute on retrieved comics. This limits a lot the
69
    amount of boilerplate code in the different implementation classes.
70
71
    The method `get_next_comic` methods is implemented in terms of new
72
    more specialized methods to be implemented/overridden:
73
        - get_first_comic_link
74
        - get_navi_link
75
        - get_comic_info
76
        - get_url_from_link
77
    """
78
    _categories = ('NAVIGABLE', )
79
80
    @classmethod
81
    def get_first_comic_link(cls):
82
        """Get link to first comics.
83
84
        Sometimes this can be retrieved of any comic page, sometimes on
85
        the archive page, sometimes it doesn't exist at all and one has
86
        to iterate backward to find it before hardcoding the result found.
87
        """
88
        raise NotImplementedError
89
90
    @classmethod
91
    def get_navi_link(cls, last_soup, next_):
92
        """Get link to next (or previous - for dev purposes) comic."""
93
        raise NotImplementedError
94
95
    @classmethod
96
    def get_comic_info(cls, soup, link):
97
        """Get information about a particular comics."""
98
        raise NotImplementedError
99
100
    @classmethod
101
    def get_url_from_link(cls, link):
102
        """Get url corresponding to a link. Default implementation is similar to get_href."""
103
        return link['href']
104
105
    @classmethod
106
    def get_next_link(cls, last_soup):
107
        """Get link to next comic."""
108
        return cls.get_navi_link(last_soup, True)
109
110
    @classmethod
111
    def get_prev_link(cls, last_soup):
112
        """Get link to previous comic."""
113
        return cls.get_navi_link(last_soup, False)
114
115
    @classmethod
116
    def get_next_comic(cls, last_comic):
117
        """Generic implementation of get_next_comic for navigable comics."""
118
        url = last_comic['url'] if last_comic else None
119
        next_comic = \
120
            cls.get_next_link(get_soup_at_url(url)) \
121
            if url else \
122
            cls.get_first_comic_link()
123
        cls.log("next/first comic will be %s (url is %s)" % (str(next_comic), url))
124
        while next_comic:
125
            prev_url, url = url, cls.get_url_from_link(next_comic)
126
            if prev_url == url:
127
                cls.log("got same url %s" % url)
128
                break
129
            cls.log("about to get %s (%s)" % (url, str(next_comic)))
130
            soup = get_soup_at_url(url)
131
            comic = cls.get_comic_info(soup, next_comic)
132
            if comic is not None:
133
                assert 'url' not in comic
134
                comic['url'] = url
135
                yield comic
136
            next_comic = cls.get_next_link(soup)
137
            cls.log("next comic will be %s" % str(next_comic))
138
139
    @classmethod
140
    def check_first_link(cls):
141
        """Check that navigation to first comic seems to be working - for dev purposes."""
142
        cls.log("about to check first link")
143
        ok = True
144
        firstlink = cls.get_first_comic_link()
145
        if firstlink is None:
146
            print("From %s : no first link" % cls.url)
147
            ok = False
148
        else:
149
            firsturl = cls.get_url_from_link(firstlink)
150
            try:
151
                get_soup_at_url(firsturl)
152
            except urllib.error.HTTPError:
153
                print("From %s : invalid first url" % cls.url)
154
                ok = False
155
        cls.log("checked first link -> returned %d" % ok)
156
        return ok
157
158
    @classmethod
159
    def check_prev_next_links(cls, url):
160
        """Check that navigation to prev/next from a given URL seems to be working - for dev purposes."""
161
        cls.log("about to check prev/next from %s" % url)
162
        ok = True
163
        if url is None:
164
            prevlink, nextlink = None, None
165
        else:
166
            soup = get_soup_at_url(url)
167
            prevlink, nextlink = cls.get_prev_link(soup), cls.get_next_link(soup)
168
        if prevlink is None and nextlink is None:
169
            print("From %s : no previous nor next" % url)
170
            ok = False
171
        else:
172
            if prevlink:
173
                prevurl = cls.get_url_from_link(prevlink)
174
                prevsoup = get_soup_at_url(prevurl)
175
                prevnext = cls.get_url_from_link(cls.get_next_link(prevsoup))
176
                if prevnext != url:
177
                    print("From %s, going backward then forward leads to %s" % (url, prevnext))
178
                    ok = False
179
            if nextlink:
180
                nexturl = cls.get_url_from_link(nextlink)
181
                if nexturl != url:
182
                    nextsoup = get_soup_at_url(nexturl)
183
                    nextprev = cls.get_url_from_link(cls.get_prev_link(nextsoup))
184
                    if nextprev != url:
185
                        print("From %s, going forward then backward leads to %s" % (url, nextprev))
186
                        ok = False
187
        cls.log("checked prev/next from %s -> returned %d" % (url, ok))
188
        return ok
189
190
    @classmethod
191
    def check_navigation(cls, url):
192
        """Check that navigation functions seem to be working - for dev purposes."""
193
        cls.log("about to check navigation from %s" % url)
194
        first = cls.check_first_link()
195
        prevnext = cls.check_prev_next_links(url)
196
        ok = first and prevnext
197
        cls.log("checked navigation from %s -> returned %d" % (url, ok))
198
        return ok
199
200
201
class GenericListableComic(GenericComic):
202
    """Generic class for "listable" comics : with a list of comics (aka 'archive')
203
204
    The method `get_next_comic` methods is implemented in terms of new
205
    more specialized methods to be implemented/overridden:
206
        - get_archive_elements
207
        - get_url_from_archive_element
208
        - get_comic_info
209
    """
210
    _categories = ('LISTABLE', )
211
212
    @classmethod
213
    def get_archive_elements(cls):
214
        """Get the archive elements (iterable)."""
215
        raise NotImplementedError
216
217
    @classmethod
218
    def get_url_from_archive_element(cls, archive_elt):
219
        """Get url corresponding to an archive element."""
220
        raise NotImplementedError
221
222
    @classmethod
223
    def get_comic_info(cls, soup, archive_elt):
224
        """Get information about a particular comics."""
225
        raise NotImplementedError
226
227
    @classmethod
228
    def get_next_comic(cls, last_comic):
229
        """Generic implementation of get_next_comic for listable comics."""
230
        waiting_for_url = last_comic['url'] if last_comic else None
231
        for archive_elt in cls.get_archive_elements():
232
            url = cls.get_url_from_archive_element(archive_elt)
233
            cls.log("considering %s" % url)
234
            if waiting_for_url is None:
235
                cls.log("about to get %s (%s)" % (url, str(archive_elt)))
236
                soup = get_soup_at_url(url)
237
                comic = cls.get_comic_info(soup, archive_elt)
238
                if comic is not None:
239
                    assert 'url' not in comic
240
                    comic['url'] = url
241
                    yield comic
242
            elif waiting_for_url == url:
243
                waiting_for_url = None
244
        if waiting_for_url is not None:
245
            print("Did not find %s : there might be a problem" % waiting_for_url)
246
247
# Helper functions corresponding to get_first_comic_link/get_navi_link
248
249
250
@classmethod
251
def get_link_rel_next(cls, last_soup, next_):
252
    """Implementation of get_navi_link."""
253
    return last_soup.find('link', rel='next' if next_ else 'prev')
254
255
256
@classmethod
257
def get_a_rel_next(cls, last_soup, next_):
258
    """Implementation of get_navi_link."""
259
    return last_soup.find('a', rel='next' if next_ else 'prev')
260
261
262
@classmethod
263
def get_a_navi_navinext(cls, last_soup, next_):
264
    """Implementation of get_navi_link."""
265
    return last_soup.find('a', class_='navi navi-next' if next_ else 'navi navi-prev')
266
267
268
@classmethod
269
def get_a_navi_comicnavnext_navinext(cls, last_soup, next_):
270
    """Implementation of get_navi_link."""
271
    return last_soup.find('a', class_='navi comic-nav-next navi-next' if next_ else 'navi comic-nav-previous navi-prev')
272
273
274
@classmethod
275
def get_a_comicnavbase_comicnavnext(cls, last_soup, next_):
276
    """Implementation of get_navi_link."""
277
    return last_soup.find('a', class_='comic-nav-base comic-nav-next' if next_ else 'comic-nav-base comic-nav-previous')
278
279
280
@classmethod
281
def get_a_navi_navifirst(cls):
282
    """Implementation of get_first_comic_link."""
283
    return get_soup_at_url(cls.url).find('a', class_='navi navi-first')
284
285
286
@classmethod
287
def get_div_navfirst_a(cls):
288
    """Implementation of get_first_comic_link."""
289
    return get_soup_at_url(cls.url).find('div', class_="nav-first").find('a')
290
291
292
@classmethod
293
def get_a_comicnavbase_comicnavfirst(cls):
294
    """Implementation of get_first_comic_link."""
295
    return get_soup_at_url(cls.url).find('a', class_='comic-nav-base comic-nav-first')
296
297
298
@classmethod
299
def simulate_first_link(cls):
300
    """Implementation of get_first_comic_link creating a link-like object from
301
    an URL provided by the class."""
302
    return {'href': cls.first_url}
303
304
305
@classmethod
306
def navigate_to_first_comic(cls):
307
    """Implementation of get_first_comic_link navigating from a user provided
308
    URL to the first comic.
309
310
    Sometimes, the first comic cannot be reached directly so to start
311
    from the first comic one has to go to the previous comic until
312
    there is no previous comics. Once this URL is reached, it
313
    is better to hardcode it but for development purposes, it
314
    is convenient to have an automatic way to find it.
315
    """
316
    url = input("Get starting URL: ")
317
    print(url)
318
    comic = cls.get_prev_link(get_soup_at_url(url))
319
    while comic:
320
        url = cls.get_url_from_link(comic)
321
        print(url)
322
        comic = cls.get_prev_link(get_soup_at_url(url))
323
    return {'href': url}
324
325
326
class GenericEmptyComic(GenericComic):
327
    """Generic class for comics where nothing is to be done.
328
329
    It can be useful to deactivate temporarily comics that do not work
330
    properly by replacing `def MyComic(GenericWhateverComic)` with
331
    `def MyComic(GenericEmptyComic, GenericWhateverComic)`."""
332
    _categories = ('EMPTY', )
333
334
    @classmethod
335
    def get_next_comic(cls, last_comic):
336
        """Implementation of get_next_comic returning no comics."""
337
        cls.log("comic is considered as empty - returning no comic")
338 View Code Duplication
        return []
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
339
340
341
class ExtraFabulousComics(GenericNavigableComic):
342
    """Class to retrieve Extra Fabulous Comics."""
343
    name = 'efc'
344
    long_name = 'Extra Fabulous Comics'
345
    url = 'http://extrafabulouscomics.com'
346
    get_first_comic_link = get_a_navi_navifirst
347
    get_navi_link = get_link_rel_next
348
349
    @classmethod
350
    def get_comic_info(cls, soup, link):
351
        """Get information about a particular comics."""
352
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
353
        imgs = soup.find_all('img', src=img_src_re)
354
        title = soup.find('meta', property='og:title')['content']
355
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
356
        day = string_to_date(date_str, "%Y-%m-%d")
357
        return {
358
            'title': title,
359
            'img': [i['src'] for i in imgs],
360
            'month': day.month,
361
            'year': day.year,
362
            'day': day.day,
363
            'prefix': title + '-'
364
        }
365
366
367
class GenericLeMondeBlog(GenericNavigableComic):
368
    """Generic class to retrieve comics from Le Monde blogs."""
369
    _categories = ('LEMONDE', 'FRANCAIS')
370
    get_navi_link = get_link_rel_next
371
    get_first_comic_link = simulate_first_link
372
    first_url = NotImplemented
373
374
    @classmethod
375
    def get_comic_info(cls, soup, link):
376
        """Get information about a particular comics."""
377
        url2 = soup.find('link', rel='shortlink')['href']
378
        title = soup.find('meta', property='og:title')['content']
379
        date_str = soup.find("span", class_="entry-date").string
380
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
381
        imgs = soup.find_all('meta', property='og:image')
382
        return {
383
            'title': title,
384
            'url2': url2,
385
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
386
            'month': day.month,
387
            'year': day.year,
388
            'day': day.day,
389
        }
390
391
392
class ZepWorld(GenericLeMondeBlog):
393
    """Class to retrieve Zep World comics."""
394
    name = "zep"
395
    long_name = "Zep World"
396
    url = "http://zepworld.blog.lemonde.fr"
397
    first_url = "http://zepworld.blog.lemonde.fr/2014/10/31/bientot-le-blog-de-zep/"
398
399
400
class Vidberg(GenericLeMondeBlog):
401
    """Class to retrieve Vidberg comics."""
402
    name = 'vidberg'
403
    long_name = "Vidberg - l'actu en patates"
404
    url = "http://vidberg.blog.lemonde.fr"
405
    # Not the first but I didn't find an efficient way to retrieve it
406
    first_url = "http://vidberg.blog.lemonde.fr/2012/02/09/revue-de-campagne-la-campagne-du-modem-semballe/"
407
408
409
class Plantu(GenericLeMondeBlog):
410
    """Class to retrieve Plantu comics."""
411
    name = 'plantu'
412
    long_name = "Plantu"
413
    url = "http://plantu.blog.lemonde.fr"
414
    first_url = "http://plantu.blog.lemonde.fr/2014/10/28/stress-test-a-bruxelles/"
415
416
417
class XavierGorce(GenericLeMondeBlog):
418
    """Class to retrieve Xavier Gorce comics."""
419
    name = 'gorce'
420
    long_name = "Xavier Gorce"
421
    url = "http://xaviergorce.blog.lemonde.fr"
422
    first_url = "http://xaviergorce.blog.lemonde.fr/2015/01/09/distinction/"
423
424
425
class CartooningForPeace(GenericLeMondeBlog):
426
    """Class to retrieve Cartooning For Peace comics."""
427
    name = 'forpeace'
428
    long_name = "Cartooning For Peace"
429
    url = "http://cartooningforpeace.blog.lemonde.fr"
430
    first_url = "http://cartooningforpeace.blog.lemonde.fr/2014/12/15/bado/"
431
432
433
class Aurel(GenericLeMondeBlog):
434
    """Class to retrieve Aurel comics."""
435
    name = 'aurel'
436
    long_name = "Aurel"
437
    url = "http://aurel.blog.lemonde.fr"
438
    first_url = "http://aurel.blog.lemonde.fr/2014/09/29/le-senat-repasse-a-droite/"
439
440
441
class LesCulottees(GenericLeMondeBlog):
442
    """Class to retrieve Les Culottees comics."""
443
    name = 'culottees'
444
    long_name = 'Les Culottees'
445
    url = "http://lesculottees.blog.lemonde.fr"
446
    first_url = "http://lesculottees.blog.lemonde.fr/2016/01/11/clementine-delait-femme-a-barbe/"
447
448
449
class UneAnneeAuLycee(GenericLeMondeBlog):
450
    """Class to retrieve Une Annee Au Lycee comics."""
451
    name = 'lycee'
452
    long_name = 'Une Annee au Lycee'
453
    url = 'http://uneanneeaulycee.blog.lemonde.fr'
454
    first_url = "http://uneanneeaulycee.blog.lemonde.fr/2016/06/13/la-semaine-du-bac-est-arrivee/"
455
456
457
class Rall(GenericNavigableComic):
458
    """Class to retrieve Ted Rall comics."""
459
    # Also on http://www.gocomics.com/tedrall
460
    name = 'rall'
461
    long_name = "Ted Rall"
462
    url = "http://rall.com/comic"
463
    _categories = ('RALL', )
464
    get_navi_link = get_link_rel_next
465
    get_first_comic_link = simulate_first_link
466
    # Not the first but I didn't find an efficient way to retrieve it
467
    first_url = "http://rall.com/2014/01/30/los-angeles-times-cartoon-well-miss-those-california-flowers"
468
469
    @classmethod
470
    def get_comic_info(cls, soup, link):
471
        """Get information about a particular comics."""
472
        title = soup.find('meta', property='og:title')['content']
473
        author = soup.find("span", class_="author vcard").find("a").string
474
        date_str = soup.find("span", class_="entry-date").string
475
        day = string_to_date(date_str, "%B %d, %Y")
476
        desc = soup.find('meta', property='og:description')['content']
477
        imgs = soup.find('div', class_='entry-content').find_all('img')
478
        imgs = imgs[:-7]  # remove social media buttons
479
        return {
480
            'title': title,
481
            'author': author,
482
            'month': day.month,
483
            'year': day.year,
484
            'day': day.day,
485
            'description': desc,
486
            'img': [i['src'] for i in imgs],
487
        }
488
489
490
class Dilem(GenericNavigableComic):
491
    """Class to retrieve Ali Dilem comics."""
492
    name = 'dilem'
493
    long_name = 'Ali Dilem'
494
    url = 'http://information.tv5monde.com/dilem'
495
    _categories = ('FRANCAIS', )
496
    get_url_from_link = join_cls_url_to_href
497
    get_first_comic_link = simulate_first_link
498
    first_url = "http://information.tv5monde.com/dilem/2004-06-26"
499
500
    @classmethod
501
    def get_navi_link(cls, last_soup, next_):
502
        """Get link to next or previous comic."""
503
        # prev is next / next is prev
504
        li = last_soup.find('li', class_='prev' if next_ else 'next')
505
        return li.find('a') if li else None
506
507
    @classmethod
508
    def get_comic_info(cls, soup, link):
509
        """Get information about a particular comics."""
510
        short_url = soup.find('link', rel='shortlink')['href']
511
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
512
        imgs = soup.find_all('meta', property='og:image')
513
        date_str = soup.find('span', property='dc:date')['content']
514
        date_str = date_str[:10]
515
        day = string_to_date(date_str, "%Y-%m-%d")
516
        return {
517
            'short_url': short_url,
518
            'title': title,
519
            'img': [i['content'] for i in imgs],
520
            'day': day.day,
521
            'month': day.month,
522
            'year': day.year,
523
        }
524
525
526
class SpaceAvalanche(GenericNavigableComic):
527
    """Class to retrieve Space Avalanche comics."""
528
    name = 'avalanche'
529
    long_name = 'Space Avalanche'
530
    url = 'http://www.spaceavalanche.com'
531
    get_navi_link = get_link_rel_next
532
533
    @classmethod
534
    def get_first_comic_link(cls):
535
        """Get link to first comics."""
536
        return {'href': "http://www.spaceavalanche.com/2009/02/02/irish-sea/", 'title': "Irish Sea"}
537
538
    @classmethod
539
    def get_comic_info(cls, soup, link):
540
        """Get information about a particular comics."""
541
        url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
542
        title = link['title']
543
        url = cls.get_url_from_link(link)
544
        year, month, day = [int(s)
545
                            for s in url_date_re.match(url).groups()]
546
        imgs = soup.find("div", class_="entry").find_all("img")
547
        return {
548
            'title': title,
549
            'day': day,
550
            'month': month,
551
            'year': year,
552
            'img': [i['src'] for i in imgs],
553
        }
554
555
556
class ZenPencils(GenericNavigableComic):
557
    """Class to retrieve ZenPencils comics."""
558
    # Also on http://zenpencils.tumblr.com
559
    # Also on http://www.gocomics.com/zen-pencils
560
    name = 'zenpencils'
561
    long_name = 'Zen Pencils'
562
    url = 'http://zenpencils.com'
563
    _categories = ('ZENPENCILS', )
564
    get_navi_link = get_link_rel_next
565
    get_first_comic_link = simulate_first_link
566
    first_url = "http://zenpencils.com/comic/1-ralph-waldo-emerson-make-them-cry/"
567
568
    @classmethod
569
    def get_comic_info(cls, soup, link):
570
        """Get information about a particular comics."""
571
        imgs = soup.find('div', id='comic').find_all('img')
572
        # imgs2 = soup.find_all('meta', property='og:image')
573
        post = soup.find('div', class_='post-content')
574
        author = post.find("span", class_="post-author").find("a").string
575
        title = soup.find('meta', property='og:title')['content']
576
        date_str = post.find('span', class_='post-date').string
577
        day = string_to_date(date_str, "%B %d, %Y")
578
        assert imgs
579
        assert all(i['alt'] == i['title'] for i in imgs)
580
        assert all(i['alt'] in (title, "") for i in imgs)
581
        desc = soup.find('meta', property='og:description')['content']
582
        return {
583
            'title': title,
584
            'description': desc,
585
            'author': author,
586
            'day': day.day,
587
            'month': day.month,
588
            'year': day.year,
589
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
590
        }
591
592
593
class ItsTheTie(GenericNavigableComic):
594
    """Class to retrieve It's the tie comics."""
595
    # Also on http://itsthetie.tumblr.com
596
    # Also on https://tapastic.com/series/itsthetie
597
    name = 'tie'
598
    long_name = "It's the tie"
599
    url = "http://itsthetie.com"
600
    _categories = ('TIE', )
601
    get_first_comic_link = get_div_navfirst_a
602
    get_navi_link = get_a_rel_next
603
604
    @classmethod
605
    def get_comic_info(cls, soup, link):
606
        """Get information about a particular comics."""
607
        title = soup.find('h1', class_='comic-title').find('a').string
608
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
609
        day = string_to_date(date_str, "%B %d, %Y")
610
        # Bonus images may or may not be in meta og:image.
611
        imgs = soup.find_all('meta', property='og:image')
612
        imgs_src = [i['content'] for i in imgs]
613
        bonus = soup.find_all('img', attrs={'data-oversrc': True})
614
        bonus_src = [b['data-oversrc'] for b in bonus]
615
        all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src]
616
        all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")]
617
        tag_meta = soup.find('meta', property='article:tag')
618
        tags = tag_meta['content'] if tag_meta else ""
619
        return {
620
            'title': title,
621
            'month': day.month,
622
            'year': day.year,
623
            'day': day.day,
624
            'img': all_imgs_src,
625
            'tags': tags,
626
        }
627
628
629
class PenelopeBagieu(GenericNavigableComic):
630
    """Class to retrieve comics from Penelope Bagieu's blog."""
631
    name = 'bagieu'
632
    long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
633
    url = 'http://www.penelope-jolicoeur.com'
634
    _categories = ('FRANCAIS', )
635
    get_navi_link = get_link_rel_next
636
    get_first_comic_link = simulate_first_link
637
    first_url = 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'
638
639
    @classmethod
640
    def get_comic_info(cls, soup, link):
641
        """Get information about a particular comics."""
642
        date_str = soup.find('h2', class_='date-header').string
643
        day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
644
        imgs = soup.find('div', class_='entry-body').find_all('img')
645 View Code Duplication
        title = soup.find('h3', class_='entry-header').string
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
646
        return {
647
            'title': title,
648
            'img': [i['src'] for i in imgs],
649
            'month': day.month,
650
            'year': day.year,
651
            'day': day.day,
652
        }
653
654
655
class OneOneOneOneComic(GenericNavigableComic):
656
    """Class to retrieve 1111 Comics."""
657
    # Also on http://comics1111.tumblr.com
658
    # Also on https://tapastic.com/series/1111-Comics
659
    name = '1111'
660
    long_name = '1111 Comics'
661
    url = 'http://www.1111comics.me'
662
    _categories = ('ONEONEONEONE', )
663
    get_first_comic_link = get_div_navfirst_a
664
    get_navi_link = get_link_rel_next
665
666
    @classmethod
667
    def get_comic_info(cls, soup, link):
668
        """Get information about a particular comics."""
669
        title = soup.find('h1', class_='comic-title').find('a').string
670
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
671
        day = string_to_date(date_str, "%B %d, %Y")
672
        imgs = soup.find_all('meta', property='og:image')
673
        return {
674
            'title': title,
675
            'month': day.month,
676
            'year': day.year,
677
            'day': day.day,
678
            'img': [i['content'] for i in imgs],
679
        }
680
681
682
class AngryAtNothing(GenericNavigableComic):
683
    """Class to retrieve Angry at Nothing comics."""
684
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
685
    name = 'angry'
686
    long_name = 'Angry At Nothing'
687
    url = 'http://www.angryatnothing.net'
688
    get_first_comic_link = get_div_navfirst_a
689
    get_navi_link = get_a_rel_next
690
691
    @classmethod
692
    def get_comic_info(cls, soup, link):
693
        """Get information about a particular comics."""
694
        title = soup.find('h1', class_='comic-title').find('a').string
695
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
696
        day = string_to_date(date_str, "%B %d, %Y")
697
        imgs = soup.find_all('meta', property='og:image')
698
        return {
699
            'title': title,
700
            'month': day.month,
701
            'year': day.year,
702
            'day': day.day,
703
            'img': [i['content'] for i in imgs],
704
        }
705
706
707
class NeDroid(GenericNavigableComic):
708
    """Class to retrieve NeDroid comics."""
709
    name = 'nedroid'
710
    long_name = 'NeDroid'
711
    url = 'http://nedroid.com'
712
    get_first_comic_link = get_div_navfirst_a
713
    get_navi_link = get_link_rel_next
714
    get_url_from_link = join_cls_url_to_href
715
716
    @classmethod
717
    def get_comic_info(cls, soup, link):
718
        """Get information about a particular comics."""
719
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
720
        comic_url_re = re.compile('//nedroid.com/comics/([0-9]*)-([0-9]*)-([0-9]*).*')
721
        short_url = cls.get_url_from_link(soup.find('link', rel='shortlink'))
722
        num = int(short_url_re.match(short_url).groups()[0])
723
        imgs = soup.find('div', id='comic').find_all('img')
724
        year, month, day = [int(s) for s in comic_url_re.match(imgs[0]['src']).groups()]
725
        assert len(imgs) == 1
726
        title = imgs[0]['alt']
727
        title2 = imgs[0]['title']
728
        return {
729
            'short_url': short_url,
730
            'title': title,
731
            'title2': title2,
732
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
733
            'day': day,
734
            'month': month,
735
            'year': year,
736
            'num': num,
737
        }
738
739
740
class Garfield(GenericNavigableComic):
741
    """Class to retrieve Garfield comics."""
742
    # Also on http://www.gocomics.com/garfield
743
    name = 'garfield'
744
    long_name = 'Garfield'
745
    url = 'https://garfield.com'
746
    _categories = ('GARFIELD', )
747
    get_first_comic_link = simulate_first_link
748
    first_url = 'https://garfield.com/comic/1978/06/19'
749
750
    @classmethod
751
    def get_navi_link(cls, last_soup, next_):
752
        """Get link to next or previous comic."""
753
        return last_soup.find('a', class_='comic-arrow-right' if next_ else 'comic-arrow-left')
754
755
    @classmethod
756
    def get_comic_info(cls, soup, link):
757
        """Get information about a particular comics."""
758 View Code Duplication
        url = cls.get_url_from_link(link)
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
759
        date_re = re.compile('^%s/comic/([0-9]*)/([0-9]*)/([0-9]*)' % cls.url)
760
        year, month, day = [int(s) for s in date_re.match(url).groups()]
761
        imgs = soup.find('div', class_='comic-display').find_all('img', class_='img-responsive')
762
        return {
763
            'month': month,
764
            'year': year,
765
            'day': day,
766
            'img': [i['src'] for i in imgs],
767
        }
768
769
770
class Dilbert(GenericNavigableComic):
771
    """Class to retrieve Dilbert comics."""
772
    # Also on http://www.gocomics.com/dilbert-classics
773
    name = 'dilbert'
774
    long_name = 'Dilbert'
775
    url = 'http://dilbert.com'
776
    get_url_from_link = join_cls_url_to_href
777
    get_first_comic_link = simulate_first_link
778
    first_url = 'http://dilbert.com/strip/1989-04-16'
779
780
    @classmethod
781
    def get_navi_link(cls, last_soup, next_):
782
        """Get link to next or previous comic."""
783
        link = last_soup.find('div', class_='nav-comic nav-right' if next_ else 'nav-comic nav-left')
784
        return link.find('a') if link else None
785
786
    @classmethod
787
    def get_comic_info(cls, soup, link):
788
        """Get information about a particular comics."""
789
        title = soup.find('meta', property='og:title')['content']
790
        imgs = soup.find_all('meta', property='og:image')
791
        desc = soup.find('meta', property='og:description')['content']
792
        date_str = soup.find('meta', property='article:publish_date')['content']
793
        day = string_to_date(date_str, "%B %d, %Y")
794
        author = soup.find('meta', property='article:author')['content']
795
        tags = soup.find('meta', property='article:tag')['content']
796
        return {
797
            'title': title,
798
            'description': desc,
799
            'img': [i['content'] for i in imgs],
800
            'author': author,
801
            'tags': tags,
802
            'day': day.day,
803
            'month': day.month,
804
            'year': day.year
805
        }
806
807
808
class VictimsOfCircumsolar(GenericNavigableComic):
809
    """Class to retrieve VictimsOfCircumsolar comics."""
810
    name = 'circumsolar'
811
    long_name = 'Victims Of Circumsolar'
812
    url = 'http://www.victimsofcircumsolar.com'
813
    get_navi_link = get_a_navi_comicnavnext_navinext
814
    get_first_comic_link = simulate_first_link
815
    first_url = 'http://www.victimsofcircumsolar.com/comic/modern-addiction'
816
817
    @classmethod
818
    def get_comic_info(cls, soup, link):
819
        """Get information about a particular comics."""
820
        # Date is on the archive page
821
        title = soup.find_all('meta', property='og:title')[-1]['content']
822
        desc = soup.find_all('meta', property='og:description')[-1]['content']
823
        imgs = soup.find('div', id='comic').find_all('img')
824
        assert all(i['title'] == i['alt'] == title for i in imgs)
825
        return {
826
            'title': title,
827
            'description': desc,
828
            'img': [i['src'] for i in imgs],
829
        }
830
831
832
class ThreeWordPhrase(GenericNavigableComic):
833
    """Class to retrieve Three Word Phrase comics."""
834
    # Also on http://www.threewordphrase.tumblr.com
835
    name = 'threeword'
836
    long_name = 'Three Word Phrase'
837
    url = 'http://threewordphrase.com'
838
    get_url_from_link = join_cls_url_to_href
839
840
    @classmethod
841
    def get_first_comic_link(cls):
842
        """Get link to first comics."""
843
        return get_soup_at_url(cls.url).find('img', src='/firstlink.gif').parent
844
845
    @classmethod
846
    def get_navi_link(cls, last_soup, next_):
847
        """Get link to next or previous comic."""
848
        link = last_soup.find('img', src='/nextlink.gif' if next_ else '/prevlink.gif').parent
849
        return None if link.get('href') is None else link
850
851
    @classmethod
852
    def get_comic_info(cls, soup, link):
853
        """Get information about a particular comics."""
854
        title = soup.find('title')
855
        imgs = [img for img in soup.find_all('img')
856
                if not img['src'].endswith(
857
                    ('link.gif', '32.png', 'twpbookad.jpg',
858
                     'merchad.jpg', 'header.gif', 'tipjar.jpg'))]
859
        return {
860
            'title': title.string if title else None,
861
            'title2': '  '.join(img.get('alt') for img in imgs if img.get('alt')),
862
            'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs],
863
        }
864
865
866
class DeadlyPanel(GenericEmptyComic, GenericNavigableComic):
867
    """Class to retrieve Deadly Panel comics."""
868
    # Also on https://tapastic.com/series/deadlypanel
869
    name = 'deadly'
870
    long_name = 'Deadly Panel'
871
    url = 'http://www.deadlypanel.com'
872
    get_first_comic_link = get_a_navi_navifirst
873
    get_navi_link = get_a_navi_comicnavnext_navinext
874
875
    @classmethod
876
    def get_comic_info(cls, soup, link):
877
        """Get information about a particular comics."""
878
        imgs = soup.find('div', id='comic').find_all('img')
879
        assert all(i['alt'] == i['title'] for i in imgs)
880
        return {
881
            'img': [i['src'] for i in imgs],
882
        }
883
884
885
class TheGentlemanArmchair(GenericNavigableComic):
886
    """Class to retrieve The Gentleman Armchair comics."""
887
    name = 'gentlemanarmchair'
888
    long_name = 'The Gentleman Armchair'
889
    url = 'http://thegentlemansarmchair.com'
890
    get_first_comic_link = get_a_navi_navifirst
891
    get_navi_link = get_link_rel_next
892
893
    @classmethod
894
    def get_comic_info(cls, soup, link):
895
        """Get information about a particular comics."""
896
        title = soup.find('h2', class_='post-title').string
897
        author = soup.find("span", class_="post-author").find("a").string
898
        date_str = soup.find('span', class_='post-date').string
899
        day = string_to_date(date_str, "%B %d, %Y")
900
        imgs = soup.find('div', id='comic').find_all('img')
901
        return {
902
            'img': [i['src'] for i in imgs],
903
            'title': title,
904
            'author': author,
905
            'month': day.month,
906
            'year': day.year,
907
            'day': day.day,
908
        }
909
910
911
class MyExtraLife(GenericNavigableComic):
912
    """Class to retrieve My Extra Life comics."""
913
    name = 'extralife'
914
    long_name = 'My Extra Life'
915
    url = 'http://www.myextralife.com'
916
    get_navi_link = get_link_rel_next
917
918
    @classmethod
919
    def get_first_comic_link(cls):
920
        """Get link to first comics."""
921
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
922
923
    @classmethod
924
    def get_comic_info(cls, soup, link):
925
        """Get information about a particular comics."""
926
        title = soup.find("h1", class_="comic_title").string
927
        date_str = soup.find("span", class_="comic_date").string
928
        day = string_to_date(date_str, "%B %d, %Y")
929
        imgs = soup.find_all("img", class_="comic")
930
        assert all(i['alt'] == i['title'] == title for i in imgs)
931
        return {
932
            'title': title,
933
            'img': [i['src'] for i in imgs if i["src"]],
934
            'day': day.day,
935
            'month': day.month,
936
            'year': day.year
937
        }
938
939
940
class SaturdayMorningBreakfastCereal(GenericNavigableComic):
941
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
942
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
943
    # Also on http://smbc-comics.tumblr.com
944
    name = 'smbc'
945
    long_name = 'Saturday Morning Breakfast Cereal'
946
    url = 'http://www.smbc-comics.com'
947
    _categories = ('SMBC', )
948
    get_navi_link = get_a_rel_next
949
950
    @classmethod
951
    def get_first_comic_link(cls):
952
        """Get link to first comics."""
953
        return get_soup_at_url(cls.url).find('a', rel='start')
954
955
    @classmethod
956
    def get_comic_info(cls, soup, link):
957
        """Get information about a particular comics."""
958
        image1 = soup.find('img', id='cc-comic')
959
        image_url1 = image1['src']
960
        aftercomic = soup.find('div', id='aftercomic')
961
        image_url2 = aftercomic.find('img')['src'] if aftercomic else ''
962
        imgs = [image_url1] + ([image_url2] if image_url2 else [])
963
        date_str = soup.find('div', class_='cc-publishtime').contents[0]
964
        day = string_to_date(date_str, "%B %d, %Y")
965
        return {
966
            'title': image1['title'],
967
            'img': [urljoin_wrapper(cls.url, i) for i in imgs],
968
            'day': day.day,
969
            'month': day.month,
970
            'year': day.year
971
        }
972
973
974
class PerryBibleFellowship(GenericListableComic):
975
    """Class to retrieve Perry Bible Fellowship comics."""
976
    name = 'pbf'
977
    long_name = 'Perry Bible Fellowship'
978
    url = 'http://pbfcomics.com'
979
    get_url_from_archive_element = join_cls_url_to_href
980
981
    @classmethod
982
    def get_archive_elements(cls):
983
        comic_link_re = re.compile('^/[0-9]*/$')
984
        return reversed(get_soup_at_url(cls.url).find_all('a', href=comic_link_re))
985
986
    @classmethod
987
    def get_comic_info(cls, soup, link):
988
        """Get information about a particular comics."""
989
        url = cls.get_url_from_archive_element(link)
990
        comic_img_re = re.compile('^/archive_b/PBF.*')
991
        name = link.string
992
        num = int(link['name'])
993
        href = link['href']
994
        assert href == '/%d/' % num
995
        imgs = soup.find_all('img', src=comic_img_re)
996
        assert len(imgs) == 1
997
        assert imgs[0]['alt'] == name
998
        return {
999
            'num': num,
1000
            'name': name,
1001
            'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1002
            'prefix': '%d-' % num,
1003
        }
1004
1005
1006
class Mercworks(GenericNavigableComic):
1007
    """Class to retrieve Mercworks comics."""
1008
    # Also on http://mercworks.tumblr.com
1009
    name = 'mercworks'
1010
    long_name = 'Mercworks'
1011
    url = 'http://mercworks.net'
1012
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1013
    get_navi_link = get_a_rel_next
1014
1015
    @classmethod
1016
    def get_comic_info(cls, soup, link):
1017
        """Get information about a particular comics."""
1018
        title = soup.find('meta', property='og:title')['content']
1019
        metadesc = soup.find('meta', property='og:description')
1020
        desc = metadesc['content'] if metadesc else ""
1021
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1022
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1023
        date_str = date_str[:10]
1024
        day = string_to_date(date_str, "%Y-%m-%d")
1025
        imgs = soup.find_all('meta', property='og:image')
1026
        return {
1027
            'img': [i['content'] for i in imgs],
1028
            'title': title,
1029
            'author': author,
1030
            'desc': desc,
1031
            'day': day.day,
1032
            'month': day.month,
1033
            'year': day.year
1034
        }
1035
1036
1037
class BerkeleyMews(GenericListableComic):
1038
    """Class to retrieve Berkeley Mews comics."""
1039
    # Also on http://mews.tumblr.com
1040
    # Also on http://www.gocomics.com/berkeley-mews
1041
    name = 'berkeley'
1042
    long_name = 'Berkeley Mews'
1043
    url = 'http://www.berkeleymews.com'
1044
    _categories = ('BERKELEY', )
1045
    get_url_from_archive_element = get_href
1046
    comic_num_re = re.compile('%s/\\?p=([0-9]*)$' % url)
1047
1048
    @classmethod
1049
    def get_archive_elements(cls):
1050
        archive_url = urljoin_wrapper(cls.url, "?page_id=2")
1051
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_num_re))
1052
1053
    @classmethod
1054
    def get_comic_info(cls, soup, link):
1055
        """Get information about a particular comics."""
1056
        comic_date_re = re.compile('.*/([0-9]*)-([0-9]*)-([0-9]*)-.*')
1057
        url = cls.get_url_from_archive_element(link)
1058
        num = int(cls.comic_num_re.match(url).groups()[0])
1059
        img = soup.find('div', id='comic').find('img')
1060
        assert all(i['alt'] == i['title'] for i in [img])
1061
        title2 = img['title']
1062
        img_url = img['src']
1063
        year, month, day = [int(s) for s in comic_date_re.match(img_url).groups()]
1064
        return {
1065
            'num': num,
1066
            'title': link.string,
1067
            'title2': title2,
1068
            'img': [img_url],
1069
            'year': year,
1070
            'month': month,
1071
            'day': day,
1072
        }
1073
1074
1075
class GenericBouletCorp(GenericNavigableComic):
1076
    """Generic class to retrieve BouletCorp comics in different languages."""
1077
    # Also on http://bouletcorp.tumblr.com
1078
    _categories = ('BOULET', )
1079
    get_navi_link = get_link_rel_next
1080
1081
    @classmethod
1082
    def get_first_comic_link(cls):
1083
        """Get link to first comics."""
1084
        return get_soup_at_url(cls.url).find('div', id='centered_nav').find_all('a')[0]
1085
1086
    @classmethod
1087
    def get_comic_info(cls, soup, link):
1088
        """Get information about a particular comics."""
1089
        url = cls.get_url_from_link(link)
1090
        date_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/' % cls.url)
1091
        year, month, day = [int(s) for s in date_re.match(url).groups()]
1092
        imgs = soup.find('div', id='notes').find('div', class_='storycontent').find_all('img')
1093
        texts = '  '.join(t for t in (i.get('title') for i in imgs) if t)
1094
        title = soup.find('title').string
1095
        return {
1096
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs if i.get('src') is not None],
1097
            'title': title,
1098
            'texts': texts,
1099
            'year': year,
1100
            'month': month,
1101
            'day': day,
1102
        }
1103
1104
1105
class BouletCorp(GenericBouletCorp):
1106
    """Class to retrieve BouletCorp comics."""
1107
    name = 'boulet'
1108
    long_name = 'Boulet Corp'
1109
    url = 'http://www.bouletcorp.com'
1110
    _categories = ('FRANCAIS', )
1111
1112
1113
class BouletCorpEn(GenericBouletCorp):
1114
    """Class to retrieve EnglishBouletCorp comics."""
1115
    name = 'boulet_en'
1116
    long_name = 'Boulet Corp English'
1117
    url = 'http://english.bouletcorp.com'
1118
1119
1120
class AmazingSuperPowers(GenericNavigableComic):
1121
    """Class to retrieve Amazing Super Powers comics."""
1122
    name = 'asp'
1123
    long_name = 'Amazing Super Powers'
1124
    url = 'http://www.amazingsuperpowers.com'
1125
    get_first_comic_link = get_a_navi_navifirst
1126
    get_navi_link = get_a_navi_navinext
1127
1128
    @classmethod
1129
    def get_comic_info(cls, soup, link):
1130
        """Get information about a particular comics."""
1131
        author = soup.find("span", class_="post-author").find("a").string
1132
        date_str = soup.find('span', class_='post-date').string
1133
        day = string_to_date(date_str, "%B %d, %Y")
1134
        imgs = soup.find('div', id='comic').find_all('img')
1135
        title = ' '.join(i['title'] for i in imgs)
1136
        assert all(i['alt'] == i['title'] for i in imgs)
1137
        return {
1138
            'title': title,
1139
            'author': author,
1140
            'img': [img['src'] for img in imgs],
1141
            'day': day.day,
1142
            'month': day.month,
1143
            'year': day.year
1144
        }
1145
1146
1147
class ToonHole(GenericNavigableComic):
1148
    """Class to retrieve Toon Holes comics."""
1149
    # Also on http://tapastic.com/series/TOONHOLE
1150
    name = 'toonhole'
1151
    long_name = 'Toon Hole'
1152
    url = 'http://www.toonhole.com'
1153
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1154
    get_navi_link = get_link_rel_next
1155
1156
    @classmethod
1157
    def get_comic_info(cls, soup, link):
1158
        """Get information about a particular comics."""
1159
        short_url = soup.find('link', rel='shortlink')['href']
1160
        date_str = soup.find('time', class_='entry-date published').string
1161
        day = string_to_date(date_str, "%B %d, %Y")
1162
        imgs = soup.find('div', id='comic').find_all('img')
1163
        if imgs:
1164
            img = imgs[0]
1165
            title = img['alt']
1166
            assert img['title'] == title
1167
        else:
1168
            title = ""
1169
        return {
1170
            'short_url': short_url,
1171
            'title': title,
1172
            'month': day.month,
1173
            'year': day.year,
1174
            'day': day.day,
1175
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs],
1176
        }
1177
1178
1179
class Channelate(GenericNavigableComic):
1180
    """Class to retrieve Channelate comics."""
1181
    name = 'channelate'
1182
    long_name = 'Channelate'
1183
    url = 'http://www.channelate.com'
1184
    get_first_comic_link = get_div_navfirst_a
1185
    get_navi_link = get_link_rel_next
1186
    get_url_from_link = join_cls_url_to_href
1187
1188
    @classmethod
1189
    def get_comic_info(cls, soup, link):
1190
        """Get information about a particular comics."""
1191
        author = soup.find("span", class_="post-author").find("a").string
1192
        date_str = soup.find('span', class_='post-date').string
1193
        day = string_to_date(date_str, '%Y/%m/%d')
1194
        title = soup.find('meta', property='og:title')['content']
1195
        post = soup.find('div', id='comic')
1196
        imgs = post.find_all('img') if post else []
1197
        extra_url = None
1198
        extra_div = soup.find('div', id='extrapanelbutton')
1199
        if extra_div:
1200
            extra_url = extra_div.find('a')['href']
1201
            extra_soup = get_soup_at_url(extra_url)
1202
            extra_imgs = extra_soup.find_all('img', class_='extrapanelimage')
1203
            imgs.extend(extra_imgs)
1204
        return {
1205
            'url_extra': extra_url,
1206
            'title': title,
1207
            'author': author,
1208
            'month': day.month,
1209
            'year': day.year,
1210
            'day': day.day,
1211
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1212
        }
1213
1214
1215
class CyanideAndHappiness(GenericNavigableComic):
1216
    """Class to retrieve Cyanide And Happiness comics."""
1217
    name = 'cyanide'
1218
    long_name = 'Cyanide and Happiness'
1219
    url = 'http://explosm.net'
1220
    _categories = ('NSFW', )
1221
    get_url_from_link = join_cls_url_to_href
1222
1223
    @classmethod
1224
    def get_first_comic_link(cls):
1225
        """Get link to first comics."""
1226
        return get_soup_at_url(cls.url).find('a', title='Oldest comic')
1227
1228
    @classmethod
1229
    def get_navi_link(cls, last_soup, next_):
1230
        """Get link to next or previous comic."""
1231
        link = last_soup.find('a', class_='next-comic' if next_ else 'previous-comic ')
1232
        return None if link.get('href') is None else link
1233
1234
    @classmethod
1235
    def get_comic_info(cls, soup, link):
1236
        """Get information about a particular comics."""
1237
        url2 = soup.find('meta', property='og:url')['content']
1238
        num = int(url2.split('/')[-2])
1239
        date_str = soup.find('h3').find('a').string
1240
        day = string_to_date(date_str, '%Y.%m.%d')
1241
        author = soup.find('small', class_="author-credit-name").string
1242
        assert author.startswith('by ')
1243
        author = author[3:]
1244
        imgs = soup.find_all('img', id='main-comic')
1245
        return {
1246
            'num': num,
1247
            'author': author,
1248
            'month': day.month,
1249
            'year': day.year,
1250
            'day': day.day,
1251
            'prefix': '%d-' % num,
1252
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i['src'])) for i in imgs]
1253
        }
1254
1255
1256
class MrLovenstein(GenericComic):
1257
    """Class to retrieve Mr Lovenstein comics."""
1258
    # Also on https://tapastic.com/series/MrLovenstein
1259
    name = 'mrlovenstein'
1260
    long_name = 'Mr. Lovenstein'
1261
    url = 'http://www.mrlovenstein.com'
1262
1263
    @classmethod
1264
    def get_next_comic(cls, last_comic):
1265
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1266
        # TODO: more info from http://www.mrlovenstein.com/archive
1267
        comic_num_re = re.compile('^/comic/([0-9]*)$')
1268
        nums = [int(comic_num_re.match(link['href']).groups()[0])
1269
                for link in get_soup_at_url(cls.url).find_all('a', href=comic_num_re)]
1270
        first, last = min(nums), max(nums)
1271
        if last_comic:
1272
            first = last_comic['num'] + 1
1273
        for num in range(first, last + 1):
1274
            url = urljoin_wrapper(cls.url, '/comic/%d' % num)
1275
            soup = get_soup_at_url(url)
1276
            imgs = list(
1277
                reversed(soup.find_all('img', src=re.compile('^/images/comics/'))))
1278
            description = soup.find('meta', attrs={'name': 'description'})['content']
1279
            yield {
1280
                'url': url,
1281
                'num': num,
1282
                'texts': '  '.join(t for t in (i.get('title') for i in imgs) if t),
1283
                'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1284
                'description': description,
1285
            }
1286
1287
1288
class DinosaurComics(GenericListableComic):
1289
    """Class to retrieve Dinosaur Comics comics."""
1290
    name = 'dinosaur'
1291
    long_name = 'Dinosaur Comics'
1292
    url = 'http://www.qwantz.com'
1293
    get_url_from_archive_element = get_href
1294
    comic_link_re = re.compile('^%s/index.php\\?comic=([0-9]*)$' % url)
1295
1296
    @classmethod
1297
    def get_archive_elements(cls):
1298
        archive_url = urljoin_wrapper(cls.url, 'archive.php')
1299
        # first link is random -> skip it
1300
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re)[1:])
1301
1302
    @classmethod
1303
    def get_comic_info(cls, soup, link):
1304
        """Get information about a particular comics."""
1305
        url = cls.get_url_from_archive_element(link)
1306
        num = int(cls.comic_link_re.match(url).groups()[0])
1307
        date_str = link.string
1308
        text = link.next_sibling.string
1309
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1310
        comic_img_re = re.compile('^%s/comics/' % cls.url)
1311
        img = soup.find('img', src=comic_img_re)
1312
        return {
1313
            'month': day.month,
1314
            'year': day.year,
1315
            'day': day.day,
1316
            'img': [img.get('src')],
1317
            'title': img.get('title'),
1318
            'text': text,
1319
            'num': num,
1320
        }
1321
1322
1323
class ButterSafe(GenericListableComic):
1324
    """Class to retrieve Butter Safe comics."""
1325
    name = 'butter'
1326
    long_name = 'ButterSafe'
1327 View Code Duplication
    url = 'http://buttersafe.com'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1328
    get_url_from_archive_element = get_href
1329
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)
1330
1331
    @classmethod
1332
    def get_archive_elements(cls):
1333
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1334
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
1335
1336
    @classmethod
1337
    def get_comic_info(cls, soup, link):
1338
        """Get information about a particular comics."""
1339
        url = cls.get_url_from_archive_element(link)
1340
        title = link.string
1341
        year, month, day = [int(s) for s in cls.comic_link_re.match(url).groups()]
1342
        img = soup.find('div', id='comic').find('img')
1343
        assert img['alt'] == title
1344
        return {
1345
            'title': title,
1346
            'day': day,
1347
            'month': month,
1348
            'year': year,
1349
            'img': [img['src']],
1350
        }
1351
1352
1353
class CalvinAndHobbes(GenericComic):
1354
    """Class to retrieve Calvin and Hobbes comics."""
1355
    # Also on http://www.gocomics.com/calvinandhobbes/
1356
    name = 'calvin'
1357
    long_name = 'Calvin and Hobbes'
1358
    # This is not through any official webpage but eh...
1359
    url = 'http://marcel-oehler.marcellosendos.ch/comics/ch/'
1360
1361
    @classmethod
1362
    def get_next_comic(cls, last_comic):
1363
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1364
        last_date = get_date_for_comic(
1365
            last_comic) if last_comic else date(1985, 11, 1)
1366
        link_re = re.compile('^([0-9]*)/([0-9]*)/')
1367
        img_re = re.compile('')
1368
        for link in get_soup_at_url(cls.url).find_all('a', href=link_re):
1369
            url = link['href']
1370
            year, month = link_re.match(url).groups()
1371
            if date(int(year), int(month), 1) + timedelta(days=31) >= last_date:
1372
                img_re = re.compile('^%s%s([0-9]*)' % (year, month))
1373
                month_url = urljoin_wrapper(cls.url, url)
1374
                for img in get_soup_at_url(month_url).find_all('img', src=img_re):
1375
                    img_src = img['src']
1376
                    day = int(img_re.match(img_src).groups()[0])
1377
                    comic_date = date(int(year), int(month), day)
1378
                    if comic_date > last_date:
1379
                        yield {
1380
                            'url': month_url,
1381
                            'year': int(year),
1382
                            'month': int(month),
1383
                            'day': int(day),
1384
                            'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)],
1385
                        }
1386
                        last_date = comic_date
1387
1388
1389
class AbstruseGoose(GenericListableComic):
1390
    """Class to retrieve AbstruseGoose Comics."""
1391
    name = 'abstruse'
1392
    long_name = 'Abstruse Goose'
1393 View Code Duplication
    url = 'http://abstrusegoose.com'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1394
    get_url_from_archive_element = get_href
1395
    comic_url_re = re.compile('^%s/([0-9]*)$' % url)
1396
    comic_img_re = re.compile('^%s/strips/.*' % url)
1397
1398
    @classmethod
1399
    def get_archive_elements(cls):
1400
        archive_url = urljoin_wrapper(cls.url, 'archive')
1401
        return get_soup_at_url(archive_url).find_all('a', href=cls.comic_url_re)
1402
1403
    @classmethod
1404
    def get_comic_info(cls, soup, archive_elt):
1405
        comic_url = cls.get_url_from_archive_element(archive_elt)
1406
        num = int(cls.comic_url_re.match(comic_url).groups()[0])
1407
        return {
1408
            'num': num,
1409
            'title': archive_elt.string,
1410
            'img': [soup.find('img', src=cls.comic_img_re)['src']]
1411
        }
1412
1413
1414
class PhDComics(GenericNavigableComic):
1415
    """Class to retrieve PHD Comics."""
1416
    name = 'phd'
1417
    long_name = 'PhD Comics'
1418
    url = 'http://phdcomics.com/comics/archive.php'
1419
    get_url_from_link = join_cls_url_to_href
1420
1421
    @classmethod
1422
    def get_first_comic_link(cls):
1423
        """Get link to first comics."""
1424
        return get_soup_at_url(cls.url).find('img', src='images/first_button.gif').parent
1425
1426
    @classmethod
1427
    def get_navi_link(cls, last_soup, next_):
1428
        """Get link to next or previous comic."""
1429
        img = last_soup.find('img', src='images/next_button.gif' if next_ else 'images/prev_button.gif')
1430
        return None if img is None else img.parent
1431
1432
    @classmethod
1433 View Code Duplication
    def get_comic_info(cls, soup, link):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1434
        """Get information about a particular comics."""
1435
        date_str = soup.find('font', face='Arial,Helvetica,Geneva,Swiss,SunSans-Regular', color='white').string.strip()
1436
        try:
1437
            day = string_to_date(date_str, '%m/%d/%Y')
1438
        except ValueError:
1439
            print("Invalid date %s" % date_str)
1440
            day = date.today()
1441
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
1442
        return {
1443
            'year': day.year,
1444
            'month': day.month,
1445
            'day': day.day,
1446
            'img': [soup.find('img', id='comic')['src']],
1447
            'title': title,
1448
        }
1449
1450
1451
class Octopuns(GenericEmptyComic, GenericNavigableComic):
1452
    """Class to retrieve Octopuns comics."""
1453
    # Also on http://octopuns.tumblr.com
1454
    name = 'octopuns'
1455
    long_name = 'Octopuns'
1456
    url = 'http://www.octopuns.net'
1457
1458
    @classmethod
1459
    def get_first_comic_link(cls):
1460
        """Get link to first comics."""
1461
        return get_soup_at_url(cls.url).find('img', src=re.compile('.*/First.png')).parent
1462
1463
    @classmethod
1464
    def get_navi_link(cls, last_soup, next_):
1465
        """Get link to next or previous comic."""
1466
        link = last_soup.find('img', src=re.compile('.*/Next.png' if next_ else '.*/Back.png')).parent
1467
        return None if link.get('href') is None else link
1468
1469
    @classmethod
1470
    def get_comic_info(cls, soup, link):
1471
        """Get information about a particular comics."""
1472
        title = soup.find('h3', class_='post-title entry-title').string
1473
        date_str = soup.find('h2', class_='date-header').string
1474
        day = string_to_date(date_str, "%A, %B %d, %Y")
1475
        imgs = soup.find_all('link', rel='image_src')
1476
        return {
1477
            'img': [i['href'] for i in imgs],
1478
            'title': title,
1479
            'day': day.day,
1480
            'month': day.month,
1481
            'year': day.year,
1482
        }
1483
1484
1485
class Quarktees(GenericNavigableComic):
1486
    """Class to retrieve the Quarktees comics."""
1487
    name = 'quarktees'
1488
    long_name = 'Quarktees'
1489
    url = 'http://www.quarktees.com/blogs/news'
1490
    get_url_from_link = join_cls_url_to_href
1491
    get_first_comic_link = simulate_first_link
1492
    first_url = 'http://www.quarktees.com/blogs/news/12486621-coming-soon'
1493
1494
    @classmethod
1495
    def get_navi_link(cls, last_soup, next_):
1496
        """Get link to next or previous comic."""
1497
        return last_soup.find('a', id='article-next' if next_ else 'article-prev')
1498
1499
    @classmethod
1500
    def get_comic_info(cls, soup, link):
1501
        """Get information about a particular comics."""
1502
        title = soup.find('meta', property='og:title')['content']
1503
        article = soup.find('div', class_='single-article')
1504
        imgs = article.find_all('img')
1505
        return {
1506
            'title': title,
1507
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1508
        }
1509
1510
1511
class OverCompensating(GenericNavigableComic):
1512
    """Class to retrieve the Over Compensating comics."""
1513
    name = 'compensating'
1514
    long_name = 'Over Compensating'
1515
    url = 'http://www.overcompensating.com'
1516
    get_url_from_link = join_cls_url_to_href
1517
1518
    @classmethod
1519
    def get_first_comic_link(cls):
1520
        """Get link to first comics."""
1521
        return get_soup_at_url(cls.url).find('a', href=re.compile('comic=1$'))
1522
1523
    @classmethod
1524
    def get_navi_link(cls, last_soup, next_):
1525
        """Get link to next or previous comic."""
1526
        return last_soup.find('a', title='next comic' if next_ else 'go back already')
1527
1528
    @classmethod
1529
    def get_comic_info(cls, soup, link):
1530
        """Get information about a particular comics."""
1531
        img_src_re = re.compile('^/oc/comics/.*')
1532
        comic_num_re = re.compile('.*comic=([0-9]*)$')
1533
        comic_url = cls.get_url_from_link(link)
1534
        num = int(comic_num_re.match(comic_url).groups()[0])
1535
        img = soup.find('img', src=img_src_re)
1536
        return {
1537
            'num': num,
1538
            'img': [urljoin_wrapper(comic_url, img['src'])],
1539
            'title': img.get('title')
1540
        }
1541
1542
1543
class Oglaf(GenericNavigableComic):
1544
    """Class to retrieve Oglaf comics."""
1545
    name = 'oglaf'
1546
    long_name = 'Oglaf [NSFW]'
1547
    url = 'http://oglaf.com'
1548
    _categories = ('NSFW', )
1549
    get_url_from_link = join_cls_url_to_href
1550
1551
    @classmethod
1552
    def get_first_comic_link(cls):
1553
        """Get link to first comics."""
1554
        return get_soup_at_url(cls.url).find("div", id="st").parent
1555
1556
    @classmethod
1557
    def get_navi_link(cls, last_soup, next_):
1558
        """Get link to next or previous comic."""
1559
        div = last_soup.find("div", id="nx" if next_ else "pvs")
1560
        return div.parent if div else None
1561
1562
    @classmethod
1563
    def get_comic_info(cls, soup, link):
1564
        """Get information about a particular comics."""
1565
        title = soup.find('title').string
1566
        title_imgs = soup.find('div', id='tt').find_all('img')
1567
        assert len(title_imgs) == 1
1568
        strip_imgs = soup.find_all('img', id='strip')
1569
        assert len(strip_imgs) == 1
1570
        imgs = title_imgs + strip_imgs
1571
        desc = ' '.join(i['title'] for i in imgs)
1572
        return {
1573
            'title': title,
1574
            'img': [i['src'] for i in imgs],
1575
            'description': desc,
1576
        }
1577
1578
1579
class ScandinaviaAndTheWorld(GenericNavigableComic):
1580
    """Class to retrieve Scandinavia And The World comics."""
1581
    name = 'satw'
1582
    long_name = 'Scandinavia And The World'
1583
    url = 'http://satwcomic.com'
1584
    get_first_comic_link = simulate_first_link
1585
    first_url = 'http://satwcomic.com/sweden-denmark-and-norway'
1586
1587
    @classmethod
1588
    def get_navi_link(cls, last_soup, next_):
1589
        """Get link to next or previous comic."""
1590
        return last_soup.find('a', accesskey='n' if next_ else 'p')
1591
1592
    @classmethod
1593
    def get_comic_info(cls, soup, link):
1594
        """Get information about a particular comics."""
1595
        title = soup.find('meta', attrs={'name': 'twitter:label1'})['content']
1596
        desc = soup.find('meta', property='og:description')['content']
1597
        imgs = soup.find_all('img', itemprop="image")
1598
        return {
1599
            'title': title,
1600
            'description': desc,
1601
            'img': [i['src'] for i in imgs],
1602
        }
1603
1604
1605
class SomethingOfThatIlk(GenericEmptyComic):  # Does not exist anymore
1606
    """Class to retrieve the Something Of That Ilk comics."""
1607
    name = 'somethingofthatilk'
1608
    long_name = 'Something Of That Ilk'
1609
    url = 'http://www.somethingofthatilk.com'
1610
1611
1612
class InfiniteMonkeyBusiness(GenericNavigableComic):
1613
    """Generic class to retrieve InfiniteMonkeyBusiness comics."""
1614
    name = 'monkey'
1615
    long_name = 'Infinite Monkey Business'
1616
    url = 'http://infinitemonkeybusiness.net'
1617
    get_navi_link = get_a_navi_comicnavnext_navinext
1618
    get_first_comic_link = simulate_first_link
1619
    first_url = 'http://infinitemonkeybusiness.net/comic/pillory/'
1620
1621
    @classmethod
1622
    def get_comic_info(cls, soup, link):
1623
        """Get information about a particular comics."""
1624
        title = soup.find('meta', property='og:title')['content']
1625
        imgs = soup.find('div', id='comic').find_all('img')
1626
        return {
1627
            'title': title,
1628
            'img': [i['src'] for i in imgs],
1629
        }
1630
1631
1632
class Wondermark(GenericListableComic):
1633
    """Class to retrieve the Wondermark comics."""
1634
    name = 'wondermark'
1635
    long_name = 'Wondermark'
1636
    url = 'http://wondermark.com'
1637
    get_url_from_archive_element = get_href
1638
1639
    @classmethod
1640
    def get_archive_elements(cls):
1641
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1642
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1643
1644
    @classmethod
1645
    def get_comic_info(cls, soup, link):
1646
        """Get information about a particular comics."""
1647
        date_str = soup.find('div', class_='postdate').find('em').string
1648
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1649
        div = soup.find('div', id='comic')
1650 View Code Duplication
        if div:
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1651
            img = div.find('img')
1652
            img_src = [img['src']]
1653
            alt = img['alt']
1654
            assert alt == img['title']
1655
            title = soup.find('meta', property='og:title')['content']
1656
        else:
1657
            img_src = []
1658
            alt = ''
1659
            title = ''
1660
        return {
1661
            'month': day.month,
1662
            'year': day.year,
1663
            'day': day.day,
1664
            'img': img_src,
1665
            'title': title,
1666
            'alt': alt,
1667
            'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')),
1668
        }
1669
1670
1671
class WarehouseComic(GenericNavigableComic):
1672
    """Class to retrieve Warehouse Comic comics."""
1673
    name = 'warehouse'
1674
    long_name = 'Warehouse Comic'
1675
    url = 'http://warehousecomic.com'
1676
    get_first_comic_link = get_a_navi_navifirst
1677
    get_navi_link = get_link_rel_next
1678
1679
    @classmethod
1680
    def get_comic_info(cls, soup, link):
1681
        """Get information about a particular comics."""
1682
        title = soup.find('h2', class_='post-title').string
1683
        date_str = soup.find('span', class_='post-date').string
1684
        day = string_to_date(date_str, "%B %d, %Y")
1685
        imgs = soup.find('div', id='comic').find_all('img')
1686
        return {
1687
            'img': [i['src'] for i in imgs],
1688
            'title': title,
1689
            'day': day.day,
1690
            'month': day.month,
1691
            'year': day.year,
1692
        }
1693
1694
1695
class JustSayEh(GenericNavigableComic):
1696
    """Class to retrieve Just Say Eh comics."""
1697
    # Also on http//tapastic.com/series/Just-Say-Eh
1698
    name = 'justsayeh'
1699
    long_name = 'Just Say Eh'
1700
    url = 'http://www.justsayeh.com'
1701
    get_first_comic_link = get_a_navi_navifirst
1702
    get_navi_link = get_a_navi_comicnavnext_navinext
1703
1704
    @classmethod
1705
    def get_comic_info(cls, soup, link):
1706
        """Get information about a particular comics."""
1707
        title = soup.find('h2', class_='post-title').string
1708
        imgs = soup.find("div", id="comic").find_all("img")
1709
        assert all(i['alt'] == i['title'] for i in imgs)
1710
        alt = imgs[0]['alt']
1711
        return {
1712
            'img': [i['src'] for i in imgs],
1713
            'title': title,
1714
            'alt': alt,
1715
        }
1716
1717
1718
class MouseBearComedy(GenericNavigableComic):
1719
    """Class to retrieve Mouse Bear Comedy comics."""
1720
    # Also on http://mousebearcomedy.tumblr.com
1721
    name = 'mousebear'
1722
    long_name = 'Mouse Bear Comedy'
1723
    url = 'http://www.mousebearcomedy.com'
1724
    get_first_comic_link = get_a_navi_navifirst
1725
    get_navi_link = get_a_navi_comicnavnext_navinext
1726
1727
    @classmethod
1728
    def get_comic_info(cls, soup, link):
1729
        """Get information about a particular comics."""
1730
        title = soup.find('h2', class_='post-title').string
1731
        author = soup.find("span", class_="post-author").find("a").string
1732
        date_str = soup.find("span", class_="post-date").string
1733
        day = string_to_date(date_str, '%B %d, %Y')
1734
        imgs = soup.find("div", id="comic").find_all("img")
1735
        assert all(i['alt'] == i['title'] == title for i in imgs)
1736
        return {
1737
            'day': day.day,
1738
            'month': day.month,
1739
            'year': day.year,
1740
            'img': [i['src'] for i in imgs],
1741
            'title': title,
1742
            'author': author,
1743
        }
1744 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1745
1746
class BigFootJustice(GenericNavigableComic):
1747
    """Class to retrieve Big Foot Justice comics."""
1748
    # Also on http://tapastic.com/series/bigfoot-justice
1749
    name = 'bigfoot'
1750
    long_name = 'Big Foot Justice'
1751
    url = 'http://bigfootjustice.com'
1752
    get_first_comic_link = get_a_navi_navifirst
1753
    get_navi_link = get_a_navi_comicnavnext_navinext
1754
1755
    @classmethod
1756
    def get_comic_info(cls, soup, link):
1757
        """Get information about a particular comics."""
1758
        imgs = soup.find('div', id='comic').find_all('img')
1759
        assert all(i['title'] == i['alt'] for i in imgs)
1760
        title = ' '.join(i['title'] for i in imgs)
1761
        return {
1762
            'img': [i['src'] for i in imgs],
1763
            'title': title,
1764
        }
1765
1766
1767
class RespawnComic(GenericNavigableComic):
1768
    """Class to retrieve Respawn Comic."""
1769
    # Also on http://respawncomic.tumblr.com
1770
    name = 'respawn'
1771
    long_name = 'Respawn Comic'
1772
    url = 'http://respawncomic.com '
1773
    _categories = ('RESPAWN', )
1774
    get_navi_link = get_a_rel_next
1775
    get_first_comic_link = simulate_first_link
1776
    first_url = 'http://respawncomic.com/comic/c0001/'
1777
1778
    @classmethod
1779
    def get_comic_info(cls, soup, link):
1780
        """Get information about a particular comics."""
1781 View Code Duplication
        title = soup.find('meta', property='og:title')['content']
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1782
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1783
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1784
        date_str = date_str[:10]
1785
        day = string_to_date(date_str, "%Y-%m-%d")
1786
        imgs = soup.find_all('meta', property='og:image')
1787
        skip_imgs = {
1788
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1789
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1790
        }
1791
        return {
1792
            'title': title,
1793
            'author': author,
1794
            'day': day.day,
1795
            'month': day.month,
1796
            'year': day.year,
1797
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1798
        }
1799
1800
1801
class SafelyEndangered(GenericNavigableComic):
1802
    """Class to retrieve Safely Endangered comics."""
1803
    # Also on http://tumblr.safelyendangered.com
1804
    name = 'endangered'
1805
    long_name = 'Safely Endangered'
1806
    url = 'http://www.safelyendangered.com'
1807
    get_navi_link = get_link_rel_next
1808
    get_first_comic_link = simulate_first_link
1809
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1810 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1811
    @classmethod
1812
    def get_comic_info(cls, soup, link):
1813
        """Get information about a particular comics."""
1814
        title = soup.find('h2', class_='post-title').string
1815
        date_str = soup.find('span', class_='post-date').string
1816
        day = string_to_date(date_str, '%B %d, %Y')
1817
        imgs = soup.find('div', id='comic').find_all('img')
1818
        alt = imgs[0]['alt']
1819
        assert all(i['alt'] == i['title'] for i in imgs)
1820
        return {
1821
            'day': day.day,
1822
            'month': day.month,
1823
            'year': day.year,
1824
            'img': [i['src'] for i in imgs],
1825
            'title': title,
1826
            'alt': alt,
1827
        }
1828
1829
1830
class PicturesInBoxes(GenericNavigableComic):
1831
    """Class to retrieve Pictures In Boxes comics."""
1832
    # Also on http://picturesinboxescomic.tumblr.com
1833
    name = 'picturesinboxes'
1834
    long_name = 'Pictures in Boxes'
1835
    url = 'http://www.picturesinboxes.com'
1836
    get_navi_link = get_a_navi_navinext
1837
    get_first_comic_link = simulate_first_link
1838
    first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'
1839
1840
    @classmethod
1841
    def get_comic_info(cls, soup, link):
1842
        """Get information about a particular comics."""
1843
        title = soup.find('h2', class_='post-title').string
1844
        author = soup.find("span", class_="post-author").find("a").string
1845
        date_str = soup.find('span', class_='post-date').string
1846
        day = string_to_date(date_str, '%B %d, %Y')
1847
        imgs = soup.find('div', class_='comicpane').find_all('img')
1848
        assert imgs
1849
        assert all(i['title'] == i['alt'] == title for i in imgs)
1850
        return {
1851
            'day': day.day,
1852
            'month': day.month,
1853
            'year': day.year,
1854
            'img': [i['src'] for i in imgs],
1855
            'title': title,
1856
            'author': author,
1857
        }
1858
1859
1860
class Penmen(GenericEmptyComic):
1861
    """Class to retrieve Penmen comics."""
1862
    name = 'penmen'
1863
    long_name = 'Penmen'
1864
    url = 'http://penmen.com'
1865
1866
1867
class TheDoghouseDiaries(GenericNavigableComic):
1868
    """Class to retrieve The Dog House Diaries comics."""
1869
    name = 'doghouse'
1870
    long_name = 'The Dog House Diaries'
1871
    url = 'http://thedoghousediaries.com'
1872
1873
    @classmethod
1874
    def get_first_comic_link(cls):
1875
        """Get link to first comics."""
1876
        return get_soup_at_url(cls.url).find('a', id='firstlink')
1877
1878
    @classmethod
1879
    def get_navi_link(cls, last_soup, next_):
1880
        """Get link to next or previous comic."""
1881
        return last_soup.find('a', id='nextlink' if next_ else 'previouslink')
1882
1883
    @classmethod
1884
    def get_comic_info(cls, soup, link):
1885
        """Get information about a particular comics."""
1886
        comic_img_re = re.compile('^dhdcomics/.*')
1887
        img = soup.find('img', src=comic_img_re)
1888
        comic_url = cls.get_url_from_link(link)
1889
        return {
1890
            'title': soup.find('h2', id='titleheader').string,
1891
            'title2': soup.find('div', id='subtext').string,
1892
            'alt': img.get('title'),
1893
            'img': [urljoin_wrapper(comic_url, img['src'].strip())],
1894
            'num': int(comic_url.split('/')[-1]),
1895
        }
1896
1897
1898
class InvisibleBread(GenericListableComic):
1899
    """Class to retrieve Invisible Bread comics."""
1900
    # Also on http://www.gocomics.com/invisible-bread
1901
    name = 'invisiblebread'
1902
    long_name = 'Invisible Bread'
1903
    url = 'http://invisiblebread.com'
1904
1905
    @classmethod
1906
    def get_archive_elements(cls):
1907
        archive_url = urljoin_wrapper(cls.url, 'archives/')
1908
        return reversed(get_soup_at_url(archive_url).find_all('td', class_='archive-title'))
1909
1910
    @classmethod
1911
    def get_url_from_archive_element(cls, td):
1912
        return td.find('a')['href']
1913
1914
    @classmethod
1915
    def get_comic_info(cls, soup, td):
1916
        """Get information about a particular comics."""
1917
        url = cls.get_url_from_archive_element(td)
1918
        title = td.find('a').string
1919
        month_and_day = td.previous_sibling.string
1920
        link_re = re.compile('^%s/([0-9]+)/' % cls.url)
1921
        year = link_re.match(url).groups()[0]
1922
        date_str = month_and_day + ' ' + year
1923
        day = string_to_date(date_str, '%b %d %Y')
1924
        imgs = [soup.find('div', id='comic').find('img')]
1925
        assert len(imgs) == 1
1926
        assert all(i['title'] == i['alt'] == title for i in imgs)
1927
        return {
1928
            'month': day.month,
1929
            'year': day.year,
1930
            'day': day.day,
1931
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1932 View Code Duplication
            'title': title,
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1933
        }
1934
1935
1936
class DiscoBleach(GenericEmptyComic):  # Does not work anymore
1937
    """Class to retrieve Disco Bleach Comics."""
1938
    name = 'discobleach'
1939
    long_name = 'Disco Bleach'
1940
    url = 'http://discobleach.com'
1941
1942
1943
class TubeyToons(GenericEmptyComic):  # Does not work anymore
1944
    """Class to retrieve TubeyToons comics."""
1945
    # Also on http://tapastic.com/series/Tubey-Toons
1946
    # Also on http://tubeytoons.tumblr.com
1947
    name = 'tubeytoons'
1948
    long_name = 'Tubey Toons'
1949
    url = 'http://tubeytoons.com'
1950
    _categories = ('TUNEYTOONS', )
1951
1952
1953
class CompletelySeriousComics(GenericNavigableComic):
1954
    """Class to retrieve Completely Serious comics."""
1955
    name = 'completelyserious'
1956
    long_name = 'Completely Serious Comics'
1957
    url = 'http://completelyseriouscomics.com'
1958
    get_first_comic_link = get_a_navi_navifirst
1959
    get_navi_link = get_a_navi_navinext
1960
1961
    @classmethod
1962
    def get_comic_info(cls, soup, link):
1963
        """Get information about a particular comics."""
1964
        title = soup.find('h2', class_='post-title').string
1965
        author = soup.find('span', class_='post-author').contents[1].string
1966
        date_str = soup.find('span', class_='post-date').string
1967
        day = string_to_date(date_str, '%B %d, %Y')
1968
        imgs = soup.find('div', class_='comicpane').find_all('img')
1969
        assert imgs
1970
        alt = imgs[0]['title']
1971
        assert all(i['title'] == i['alt'] == alt for i in imgs)
1972
        return {
1973
            'month': day.month,
1974
            'year': day.year,
1975
            'day': day.day,
1976
            'img': [i['src'] for i in imgs],
1977
            'title': title,
1978
            'alt': alt,
1979
            'author': author,
1980
        }
1981
1982
1983
class PoorlyDrawnLines(GenericListableComic):
1984
    """Class to retrieve Poorly Drawn Lines comics."""
1985
    # Also on http://pdlcomics.tumblr.com
1986
    name = 'poorlydrawn'
1987 View Code Duplication
    long_name = 'Poorly Drawn Lines'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1988
    url = 'http://poorlydrawnlines.com'
1989
    _categories = ('POORLYDRAWN', )
1990
    get_url_from_archive_element = get_href
1991
1992
    @classmethod
1993
    def get_comic_info(cls, soup, link):
1994
        """Get information about a particular comics."""
1995
        imgs = soup.find('div', class_='post').find_all('img')
1996
        assert len(imgs) <= 1
1997
        return {
1998
            'img': [i['src'] for i in imgs],
1999
            'title': imgs[0].get('title', "") if imgs else "",
2000
        }
2001
2002
    @classmethod
2003
    def get_archive_elements(cls):
2004
        archive_url = urljoin_wrapper(cls.url, 'archive')
2005
        url_re = re.compile('^%s/comic/.' % cls.url)
2006
        return reversed(get_soup_at_url(archive_url).find_all('a', href=url_re))
2007
2008
2009
class LoadingComics(GenericNavigableComic):
2010
    """Class to retrieve Loading Artist comics."""
2011
    name = 'loadingartist'
2012
    long_name = 'Loading Artist'
2013
    url = 'http://www.loadingartist.com/latest'
2014
2015
    @classmethod
2016
    def get_first_comic_link(cls):
2017
        """Get link to first comics."""
2018
        return get_soup_at_url(cls.url).find('a', title="First")
2019 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2020
    @classmethod
2021
    def get_navi_link(cls, last_soup, next_):
2022
        """Get link to next or previous comic."""
2023
        return last_soup.find('a', title='Next' if next_ else 'Previous')
2024
2025
    @classmethod
2026
    def get_comic_info(cls, soup, link):
2027
        """Get information about a particular comics."""
2028
        title = soup.find('h1').string
2029
        date_str = soup.find('span', class_='date').string.strip()
2030
        day = string_to_date(date_str, "%B %d, %Y")
2031
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
2032
        return {
2033
            'title': title,
2034
            'img': [i['src'] for i in imgs],
2035
            'month': day.month,
2036
            'year': day.year,
2037
            'day': day.day,
2038
        }
2039
2040
2041
class ChuckleADuck(GenericNavigableComic):
2042
    """Class to retrieve Chuckle-A-Duck comics."""
2043
    name = 'chuckleaduck'
2044
    long_name = 'Chuckle-A-duck'
2045
    url = 'http://chuckleaduck.com'
2046
    get_first_comic_link = get_div_navfirst_a
2047
    get_navi_link = get_link_rel_next
2048
2049
    @classmethod
2050
    def get_comic_info(cls, soup, link):
2051
        """Get information about a particular comics."""
2052
        date_str = soup.find('span', class_='post-date').string
2053
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
2054
        author = soup.find('span', class_='post-author').string
2055
        div = soup.find('div', id='comic')
2056
        imgs = div.find_all('img') if div else []
2057
        title = imgs[0]['title'] if imgs else ""
2058
        assert all(i['title'] == i['alt'] == title for i in imgs)
2059
        return {
2060
            'month': day.month,
2061
            'year': day.year,
2062
            'day': day.day,
2063
            'img': [i['src'] for i in imgs],
2064
            'title': title,
2065
            'author': author,
2066
        }
2067
2068
2069
class DepressedAlien(GenericNavigableComic):
2070
    """Class to retrieve Depressed Alien Comics."""
2071
    name = 'depressedalien'
2072
    long_name = 'Depressed Alien'
2073
    url = 'http://depressedalien.com'
2074
    get_url_from_link = join_cls_url_to_href
2075
2076
    @classmethod
2077
    def get_first_comic_link(cls):
2078
        """Get link to first comics."""
2079
        return get_soup_at_url(cls.url).find('img', attrs={'name': 'beginArrow'}).parent
2080
2081
    @classmethod
2082
    def get_navi_link(cls, last_soup, next_):
2083
        """Get link to next or previous comic."""
2084
        return last_soup.find('img', attrs={'name': 'rightArrow' if next_ else 'leftArrow'}).parent
2085
2086
    @classmethod
2087
    def get_comic_info(cls, soup, link):
2088
        """Get information about a particular comics."""
2089
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2090
        imgs = soup.find_all('meta', property='og:image')
2091
        return {
2092
            'title': title,
2093
            'img': [i['content'] for i in imgs],
2094
        }
2095
2096
2097
class ThingsInSquares(GenericListableComic):
2098
    """Class to retrieve Things In Squares comics."""
2099
    # This can be retrieved in other languages
2100
    # Also on https://tapastic.com/series/Things-in-Squares
2101
    name = 'squares'
2102
    long_name = 'Things in squares'
2103
    url = 'http://www.thingsinsquares.com'
2104
2105
    @classmethod
2106
    def get_comic_info(cls, soup, tr):
2107
        """Get information about a particular comics."""
2108
        _, td2, td3 = tr.find_all('td')
2109
        a = td2.find('a')
2110
        date_str = td3.string
2111
        day = string_to_date(date_str, "%m.%d.%y")
2112
        title = a.string
2113
        title2 = soup.find('meta', property='og:title')['content']
2114
        desc = soup.find('meta', property='og:description')
2115
        description = desc['content'] if desc else ''
2116
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2117
        imgs = soup.find('div', class_='entry-content').find_all('img')
2118
        return {
2119 View Code Duplication
            'day': day.day,
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2120
            'month': day.month,
2121
            'year': day.year,
2122
            'title': title,
2123
            'title2': title2,
2124
            'description': description,
2125
            'tags': tags,
2126
            'img': [i['src'] for i in imgs],
2127
            'alt': ' '.join(i['alt'] for i in imgs),
2128
        }
2129
2130
    @classmethod
2131
    def get_url_from_archive_element(cls, tr):
2132
        _, td2, td3 = tr.find_all('td')
2133
        return td2.find('a')['href']
2134
2135
    @classmethod
2136
    def get_archive_elements(cls):
2137
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
2138
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2139
2140
2141
class HappleTea(GenericNavigableComic):
2142
    """Class to retrieve Happle Tea Comics."""
2143
    name = 'happletea'
2144
    long_name = 'Happle Tea'
2145
    url = 'http://www.happletea.com'
2146
    get_first_comic_link = get_a_navi_navifirst
2147
    get_navi_link = get_link_rel_next
2148
2149
    @classmethod
2150
    def get_comic_info(cls, soup, link):
2151
        """Get information about a particular comics."""
2152
        imgs = soup.find('div', id='comic').find_all('img')
2153
        post = soup.find('div', class_='post-content')
2154
        title = post.find('h2', class_='post-title').string
2155
        author = post.find('a', rel='author').string
2156
        date_str = post.find('span', class_='post-date').string
2157
        day = string_to_date(date_str, "%B %d, %Y")
2158
        assert all(i['alt'] == i['title'] for i in imgs)
2159
        return {
2160
            'title': title,
2161
            'img': [i['src'] for i in imgs],
2162
            'alt': ''.join(i['alt'] for i in imgs),
2163
            'month': day.month,
2164
            'year': day.year,
2165
            'day': day.day,
2166
            'author': author,
2167
        }
2168
2169
2170
class FatAwesomeComics(GenericNavigableComic):
2171
    """Class to retrieve Fat Awesome Comics."""
2172
    # Also on http://fatawesomecomedy.tumblr.com
2173
    name = 'fatawesome'
2174
    long_name = 'Fat Awesome'
2175
    url = 'http://fatawesome.com/comics'
2176
    get_navi_link = get_a_rel_next
2177
    get_first_comic_link = simulate_first_link
2178
    first_url = 'http://fatawesome.com/shortbus/'
2179
2180
    @classmethod
2181
    def get_comic_info(cls, soup, link):
2182
        """Get information about a particular comics."""
2183
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2184
        description = soup.find('meta', attrs={'name': 'description'})['content']
2185
        tags_prop = soup.find('meta', property='article:tag')
2186
        tags = tags_prop['content'] if tags_prop else ""
2187
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2188
        day = string_to_date(date_str, "%Y-%m-%d")
2189
        imgs = soup.find_all('img', attrs={'data-recalc-dims': "1"})
2190
        assert len(imgs) == 1
2191
        return {
2192
            'title': title,
2193
            'description': description,
2194
            'tags': tags,
2195
            'alt': "".join(i['alt'] for i in imgs),
2196
            'img': [i['src'].rsplit('?', 1)[0] for i in imgs],
2197
            'month': day.month,
2198
            'year': day.year,
2199
            'day': day.day,
2200
        }
2201
2202
2203
class AnythingComic(GenericListableComic):
2204
    """Class to retrieve Anything Comics."""
2205
    # Also on http://tapastic.com/series/anything
2206
    name = 'anythingcomic'
2207
    long_name = 'Anything Comic'
2208
    url = 'http://www.anythingcomic.com'
2209
2210
    @classmethod
2211
    def get_archive_elements(cls):
2212
        archive_url = urljoin_wrapper(cls.url, 'archive/')
2213
        # The first 2 <tr>'s do not correspond to comics
2214
        return get_soup_at_url(archive_url).find('table', id='chapter_table').find_all('tr')[2:]
2215
2216
    @classmethod
2217
    def get_url_from_archive_element(cls, tr):
2218
        """Get url corresponding to an archive element."""
2219
        td_num, td_comic, td_date, _ = tr.find_all('td')
2220
        link = td_comic.find('a')
2221
        return urljoin_wrapper(cls.url, link['href'])
2222
2223
    @classmethod
2224
    def get_comic_info(cls, soup, tr):
2225
        """Get information about a particular comics."""
2226
        td_num, td_comic, td_date, _ = tr.find_all('td')
2227
        num = int(td_num.string)
2228
        link = td_comic.find('a')
2229
        title = link.string
2230
        imgs = soup.find_all('img', id='comic_image')
2231
        day = string_to_date(td_date.string, '%d %b %Y %I:%M %p')
2232
        assert len(imgs) == 1
2233
        assert all(i.get('alt') == i.get('title') for i in imgs)
2234
        return {
2235
            'num': num,
2236
            'title': title,
2237
            'alt': imgs[0].get('alt', ''),
2238
            'img': [i['src'] for i in imgs],
2239
            'month': day.month,
2240
            'year': day.year,
2241
            'day': day.day,
2242
        }
2243
2244
2245
class LonnieMillsap(GenericNavigableComic):
2246
    """Class to retrieve Lonnie Millsap's comics."""
2247
    name = 'millsap'
2248
    long_name = 'Lonnie Millsap'
2249
    url = 'http://www.lonniemillsap.com'
2250
    get_navi_link = get_link_rel_next
2251
    get_first_comic_link = simulate_first_link
2252
    first_url = 'http://www.lonniemillsap.com/?p=42'
2253
2254
    @classmethod
2255
    def get_comic_info(cls, soup, link):
2256
        """Get information about a particular comics."""
2257
        title = soup.find('h2', class_='post-title').string
2258
        post = soup.find('div', class_='post-content')
2259
        author = post.find("span", class_="post-author").find("a").string
2260
        date_str = post.find("span", class_="post-date").string
2261
        day = string_to_date(date_str, "%B %d, %Y")
2262
        imgs = post.find("div", class_="entry").find_all("img")
2263
        return {
2264
            'title': title,
2265
            'author': author,
2266
            'img': [i['src'] for i in imgs],
2267
            'month': day.month,
2268
            'year': day.year,
2269
            'day': day.day,
2270
        }
2271
2272
2273
class LinsEditions(GenericNavigableComic):
2274
    """Class to retrieve L.I.N.S. Editions comics."""
2275
    # Also on http://linscomics.tumblr.com
2276
    name = 'lins'
2277
    long_name = 'L.I.N.S. Editions'
2278
    url = 'https://linsedition.com'
2279
    _categories = ('LINS', )
2280
    get_navi_link = get_link_rel_next
2281
    get_first_comic_link = simulate_first_link
2282
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2283
2284
    @classmethod
2285
    def get_comic_info(cls, soup, link):
2286
        """Get information about a particular comics."""
2287
        title = soup.find('meta', property='og:title')['content']
2288
        imgs = soup.find_all('meta', property='og:image')
2289
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2290
        day = string_to_date(date_str, "%Y-%m-%d")
2291
        return {
2292
            'title': title,
2293
            'img': [i['content'] for i in imgs],
2294
            'month': day.month,
2295
            'year': day.year,
2296
            'day': day.day,
2297
        }
2298
2299
2300
class ThorsThundershack(GenericNavigableComic):
2301
    """Class to retrieve Thor's Thundershack comics."""
2302
    # Also on http://tapastic.com/series/Thors-Thundershac
2303
    name = 'thor'
2304
    long_name = 'Thor\'s Thundershack'
2305
    url = 'http://www.thorsthundershack.com'
2306
    _categories = ('THOR', )
2307
    get_url_from_link = join_cls_url_to_href
2308
2309
    @classmethod
2310
    def get_first_comic_link(cls):
2311
        """Get link to first comics."""
2312
        return get_soup_at_url(cls.url).find('a', class_='first navlink')
2313
2314
    @classmethod
2315
    def get_navi_link(cls, last_soup, next_):
2316
        """Get link to next or previous comic."""
2317
        for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
2318
            if link['href'] != '/comic':
2319
                return link
2320
        return None
2321 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2322
    @classmethod
2323
    def get_comic_info(cls, soup, link):
2324
        """Get information about a particular comics."""
2325
        title = soup.find('meta', attrs={'name': 'description'})["content"]
2326
        description = soup.find('div', itemprop='articleBody').text
2327
        author = soup.find('span', itemprop='author copyrightHolder').string
2328
        imgs = soup.find_all('img', itemprop='image')
2329
        assert all(i['title'] == i['alt'] for i in imgs)
2330
        alt = imgs[0]['alt'] if imgs else ""
2331
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
2332
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2333
        return {
2334
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2335
            'month': day.month,
2336
            'year': day.year,
2337
            'day': day.day,
2338
            'author': author,
2339
            'title': title,
2340
            'alt': alt,
2341
            'description': description,
2342
        }
2343
2344
2345
class GerbilWithAJetpack(GenericNavigableComic):
2346
    """Class to retrieve GerbilWithAJetpack comics."""
2347
    name = 'gerbil'
2348
    long_name = 'Gerbil With A Jetpack'
2349
    url = 'http://gerbilwithajetpack.com'
2350
    get_first_comic_link = get_a_navi_navifirst
2351
    get_navi_link = get_a_rel_next
2352
2353
    @classmethod
2354
    def get_comic_info(cls, soup, link):
2355
        """Get information about a particular comics."""
2356
        title = soup.find('h2', class_='post-title').string
2357
        author = soup.find("span", class_="post-author").find("a").string
2358
        date_str = soup.find("span", class_="post-date").string
2359
        day = string_to_date(date_str, "%B %d, %Y")
2360
        imgs = soup.find("div", id="comic").find_all("img")
2361
        alt = imgs[0]['alt']
2362
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2363
        return {
2364
            'img': [i['src'] for i in imgs],
2365
            'title': title,
2366
            'alt': alt,
2367
            'author': author,
2368
            'day': day.day,
2369
            'month': day.month,
2370
            'year': day.year
2371
        }
2372
2373
2374
class EveryDayBlues(GenericNavigableComic):
2375
    """Class to retrieve EveryDayBlues Comics."""
2376
    name = "blues"
2377
    long_name = "Every Day Blues"
2378 View Code Duplication
    url = "http://everydayblues.net"
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2379
    get_first_comic_link = get_a_navi_navifirst
2380
    get_navi_link = get_link_rel_next
2381
2382
    @classmethod
2383
    def get_comic_info(cls, soup, link):
2384
        """Get information about a particular comics."""
2385
        title = soup.find("h2", class_="post-title").string
2386
        author = soup.find("span", class_="post-author").find("a").string
2387
        date_str = soup.find("span", class_="post-date").string
2388
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2389
        imgs = soup.find("div", id="comic").find_all("img")
2390
        assert all(i['alt'] == i['title'] == title for i in imgs)
2391
        assert len(imgs) <= 1
2392
        return {
2393
            'img': [i['src'] for i in imgs],
2394
            'title': title,
2395
            'author': author,
2396
            'day': day.day,
2397
            'month': day.month,
2398
            'year': day.year
2399
        }
2400
2401
2402
class BiterComics(GenericNavigableComic):
2403
    """Class to retrieve Biter Comics."""
2404
    name = "biter"
2405
    long_name = "Biter Comics"
2406
    url = "http://www.bitercomics.com"
2407
    get_first_comic_link = get_a_navi_navifirst
2408
    get_navi_link = get_link_rel_next
2409
2410
    @classmethod
2411
    def get_comic_info(cls, soup, link):
2412
        """Get information about a particular comics."""
2413
        title = soup.find("h1", class_="entry-title").string
2414
        author = soup.find("span", class_="author vcard").find("a").string
2415
        date_str = soup.find("span", class_="entry-date").string
2416
        day = string_to_date(date_str, "%B %d, %Y")
2417
        imgs = soup.find("div", id="comic").find_all("img")
2418
        assert all(i['alt'] == i['title'] for i in imgs)
2419
        assert len(imgs) == 1
2420
        alt = imgs[0]['alt']
2421
        return {
2422
            'img': [i['src'] for i in imgs],
2423
            'title': title,
2424
            'alt': alt,
2425
            'author': author,
2426
            'day': day.day,
2427
            'month': day.month,
2428
            'year': day.year
2429
        }
2430
2431
2432
class TheAwkwardYeti(GenericNavigableComic):
2433
    """Class to retrieve The Awkward Yeti comics."""
2434
    # Also on http://www.gocomics.com/the-awkward-yeti
2435
    # Also on http://larstheyeti.tumblr.com
2436
    # Also on https://tapastic.com/series/TheAwkwardYeti
2437
    name = 'yeti'
2438
    long_name = 'The Awkward Yeti'
2439
    url = 'http://theawkwardyeti.com'
2440
    _categories = ('YETI', )
2441
    get_first_comic_link = get_a_navi_navifirst
2442
    get_navi_link = get_link_rel_next
2443
2444
    @classmethod
2445
    def get_comic_info(cls, soup, link):
2446
        """Get information about a particular comics."""
2447
        title = soup.find('h2', class_='post-title').string
2448
        date_str = soup.find("span", class_="post-date").string
2449
        day = string_to_date(date_str, "%B %d, %Y")
2450
        imgs = soup.find("div", id="comic").find_all("img")
2451
        assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
2452
        return {
2453
            'img': [i['src'] for i in imgs],
2454
            'title': title,
2455
            'day': day.day,
2456
            'month': day.month,
2457
            'year': day.year
2458
        }
2459
2460
2461
class PleasantThoughts(GenericNavigableComic):
2462
    """Class to retrieve Pleasant Thoughts comics."""
2463
    name = 'pleasant'
2464
    long_name = 'Pleasant Thoughts'
2465
    url = 'http://pleasant-thoughts.com'
2466
    get_first_comic_link = get_a_navi_navifirst
2467
    get_navi_link = get_link_rel_next
2468
2469
    @classmethod
2470
    def get_comic_info(cls, soup, link):
2471
        """Get information about a particular comics."""
2472
        post = soup.find('div', class_='post-content')
2473
        title = post.find('h2', class_='post-title').string
2474
        imgs = post.find("div", class_="entry").find_all("img")
2475
        return {
2476
            'title': title,
2477
            'img': [i['src'] for i in imgs],
2478
        }
2479
2480
2481
class MisterAndMe(GenericNavigableComic):
2482
    """Class to retrieve Mister & Me Comics."""
2483
    # Also on http://www.gocomics.com/mister-and-me
2484
    # Also on https://tapastic.com/series/Mister-and-Me
2485
    name = 'mister'
2486
    long_name = 'Mister & Me'
2487
    url = 'http://www.mister-and-me.com'
2488 View Code Duplication
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2489
    get_navi_link = get_link_rel_next
2490
2491
    @classmethod
2492
    def get_comic_info(cls, soup, link):
2493
        """Get information about a particular comics."""
2494
        title = soup.find('h2', class_='post-title').string
2495
        author = soup.find("span", class_="post-author").find("a").string
2496
        date_str = soup.find("span", class_="post-date").string
2497
        day = string_to_date(date_str, "%B %d, %Y")
2498
        imgs = soup.find("div", id="comic").find_all("img")
2499
        assert all(i['alt'] == i['title'] for i in imgs)
2500
        assert len(imgs) <= 1
2501
        alt = imgs[0]['alt'] if imgs else ""
2502
        return {
2503
            'img': [i['src'] for i in imgs],
2504
            'title': title,
2505
            'alt': alt,
2506
            'author': author,
2507
            'day': day.day,
2508
            'month': day.month,
2509
            'year': day.year
2510
        }
2511
2512
2513
class LastPlaceComics(GenericNavigableComic):
2514
    """Class to retrieve Last Place Comics."""
2515
    name = 'lastplace'
2516
    long_name = 'Last Place Comics'
2517
    url = "http://lastplacecomics.com"
2518 View Code Duplication
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2519
    get_navi_link = get_link_rel_next
2520
2521
    @classmethod
2522
    def get_comic_info(cls, soup, link):
2523
        """Get information about a particular comics."""
2524
        title = soup.find('h2', class_='post-title').string
2525
        author = soup.find("span", class_="post-author").find("a").string
2526
        date_str = soup.find("span", class_="post-date").string
2527
        day = string_to_date(date_str, "%B %d, %Y")
2528
        imgs = soup.find("div", id="comic").find_all("img")
2529
        assert all(i['alt'] == i['title'] for i in imgs)
2530
        assert len(imgs) <= 1
2531
        alt = imgs[0]['alt'] if imgs else ""
2532
        return {
2533
            'img': [i['src'] for i in imgs],
2534
            'title': title,
2535
            'alt': alt,
2536
            'author': author,
2537
            'day': day.day,
2538
            'month': day.month,
2539
            'year': day.year
2540
        }
2541
2542
2543
class TalesOfAbsurdity(GenericNavigableComic):
2544
    """Class to retrieve Tales Of Absurdity comics."""
2545
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
2546
    # Also on http://talesofabsurdity.tumblr.com
2547
    name = 'absurdity'
2548
    long_name = 'Tales of Absurdity'
2549
    url = 'http://talesofabsurdity.com'
2550
    _categories = ('ABSURDITY', )
2551
    get_first_comic_link = get_a_navi_navifirst
2552
    get_navi_link = get_a_navi_comicnavnext_navinext
2553
2554
    @classmethod
2555
    def get_comic_info(cls, soup, link):
2556
        """Get information about a particular comics."""
2557
        title = soup.find('h2', class_='post-title').string
2558
        author = soup.find("span", class_="post-author").find("a").string
2559
        date_str = soup.find("span", class_="post-date").string
2560
        day = string_to_date(date_str, "%B %d, %Y")
2561
        imgs = soup.find("div", id="comic").find_all("img")
2562
        assert all(i['alt'] == i['title'] for i in imgs)
2563
        alt = imgs[0]['alt'] if imgs else ""
2564
        return {
2565
            'img': [i['src'] for i in imgs],
2566
            'title': title,
2567
            'alt': alt,
2568
            'author': author,
2569
            'day': day.day,
2570
            'month': day.month,
2571
            'year': day.year
2572
        }
2573
2574
2575
class EndlessOrigami(GenericNavigableComic):
2576
    """Class to retrieve Endless Origami Comics."""
2577
    name = "origami"
2578
    long_name = "Endless Origami"
2579
    url = "http://endlessorigami.com"
2580
    get_first_comic_link = get_a_navi_navifirst
2581
    get_navi_link = get_link_rel_next
2582
2583
    @classmethod
2584
    def get_comic_info(cls, soup, link):
2585
        """Get information about a particular comics."""
2586
        title = soup.find('h2', class_='post-title').string
2587
        author = soup.find("span", class_="post-author").find("a").string
2588
        date_str = soup.find("span", class_="post-date").string
2589
        day = string_to_date(date_str, "%B %d, %Y")
2590
        imgs = soup.find("div", id="comic").find_all("img")
2591
        assert all(i['alt'] == i['title'] for i in imgs)
2592
        alt = imgs[0]['alt'] if imgs else ""
2593
        return {
2594
            'img': [i['src'] for i in imgs],
2595
            'title': title,
2596
            'alt': alt,
2597
            'author': author,
2598
            'day': day.day,
2599
            'month': day.month,
2600
            'year': day.year
2601
        }
2602
2603
2604
class PlanC(GenericNavigableComic):
2605
    """Class to retrieve Plan C comics."""
2606
    name = 'planc'
2607
    long_name = 'Plan C'
2608
    url = 'http://www.plancomic.com'
2609
    get_first_comic_link = get_a_navi_navifirst
2610
    get_navi_link = get_a_navi_comicnavnext_navinext
2611
2612
    @classmethod
2613
    def get_comic_info(cls, soup, link):
2614
        """Get information about a particular comics."""
2615
        title = soup.find('h2', class_='post-title').string
2616
        date_str = soup.find("span", class_="post-date").string
2617
        day = string_to_date(date_str, "%B %d, %Y")
2618
        imgs = soup.find('div', id='comic').find_all('img')
2619
        return {
2620
            'title': title,
2621
            'img': [i['src'] for i in imgs],
2622
            'month': day.month,
2623
            'year': day.year,
2624
            'day': day.day,
2625
        }
2626 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2627
2628
class BuniComic(GenericNavigableComic):
2629
    """Class to retrieve Buni Comics."""
2630
    name = 'buni'
2631
    long_name = 'BuniComics'
2632
    url = 'http://www.bunicomic.com'
2633
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2634
    get_navi_link = get_link_rel_next
2635
2636
    @classmethod
2637
    def get_comic_info(cls, soup, link):
2638
        """Get information about a particular comics."""
2639
        imgs = soup.find('div', id='comic').find_all('img')
2640
        assert all(i['alt'] == i['title'] for i in imgs)
2641
        assert len(imgs) == 1
2642
        return {
2643
            'img': [i['src'] for i in imgs],
2644
            'title': imgs[0]['title'],
2645
        }
2646
2647
2648
class GenericCommitStrip(GenericNavigableComic):
2649
    """Generic class to retrieve Commit Strips in different languages."""
2650
    get_navi_link = get_a_rel_next
2651
    get_first_comic_link = simulate_first_link
2652
    first_url = NotImplemented
2653
2654
    @classmethod
2655
    def get_comic_info(cls, soup, link):
2656
        """Get information about a particular comics."""
2657
        desc = soup.find('meta', property='og:description')['content']
2658
        title = soup.find('meta', property='og:title')['content']
2659 View Code Duplication
        imgs = soup.find('div', class_='entry-content').find_all('img')
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2660
        title2 = ' '.join(i.get('title', '') for i in imgs)
2661
        return {
2662
            'title': title,
2663
            'title2': title2,
2664
            'description': desc,
2665
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2666
        }
2667
2668
2669
class CommitStripFr(GenericCommitStrip):
2670
    """Class to retrieve Commit Strips in French."""
2671
    name = 'commit_fr'
2672
    long_name = 'Commit Strip (Fr)'
2673
    url = 'http://www.commitstrip.com/fr'
2674
    _categories = ('FRANCAIS', )
2675
    first_url = 'http://www.commitstrip.com/fr/2012/02/22/interview/'
2676
2677
2678
class CommitStripEn(GenericCommitStrip):
2679
    """Class to retrieve Commit Strips in English."""
2680
    name = 'commit_en'
2681
    long_name = 'Commit Strip (En)'
2682
    url = 'http://www.commitstrip.com/en'
2683
    first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'
2684
2685
2686
class GenericBoumerie(GenericNavigableComic):
2687
    """Generic class to retrieve Boumeries comics in different languages."""
2688
    get_first_comic_link = get_a_navi_navifirst
2689
    get_navi_link = get_link_rel_next
2690
    date_format = NotImplemented
2691
    lang = NotImplemented
2692
2693
    @classmethod
2694
    def get_comic_info(cls, soup, link):
2695
        """Get information about a particular comics."""
2696
        title = soup.find('h2', class_='post-title').string
2697
        short_url = soup.find('link', rel='shortlink')['href']
2698
        author = soup.find("span", class_="post-author").find("a").string
2699
        date_str = soup.find('span', class_='post-date').string
2700
        day = string_to_date(date_str, cls.date_format, cls.lang)
2701
        imgs = soup.find('div', id='comic').find_all('img')
2702
        assert all(i['alt'] == i['title'] for i in imgs)
2703
        return {
2704
            'short_url': short_url,
2705
            'img': [i['src'] for i in imgs],
2706
            'title': title,
2707
            'author': author,
2708
            'month': day.month,
2709
            'year': day.year,
2710
            'day': day.day,
2711
        }
2712
2713
2714
class BoumerieEn(GenericBoumerie):
2715
    """Class to retrieve Boumeries comics in English."""
2716
    name = 'boumeries_en'
2717
    long_name = 'Boumeries (En)'
2718
    url = 'http://comics.boumerie.com'
2719
    date_format = "%B %d, %Y"
2720
    lang = 'en_GB.UTF-8'
2721
2722
2723
class BoumerieFr(GenericBoumerie):
2724
    """Class to retrieve Boumeries comics in French."""
2725
    name = 'boumeries_fr'
2726
    long_name = 'Boumeries (Fr)'
2727
    url = 'http://bd.boumerie.com'
2728
    _categories = ('FRANCAIS', )
2729
    date_format = "%A, %d %B %Y"
2730
    lang = "fr_FR.utf8"
2731
2732
2733
class UnearthedComics(GenericNavigableComic):
2734
    """Class to retrieve Unearthed comics."""
2735
    # Also on http://tapastic.com/series/UnearthedComics
2736
    # Also on http://unearthedcomics.tumblr.com
2737
    name = 'unearthed'
2738 View Code Duplication
    long_name = 'Unearthed Comics'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2739
    url = 'http://unearthedcomics.com'
2740
    _categories = ('UNEARTHED', )
2741
    get_navi_link = get_link_rel_next
2742
    get_first_comic_link = simulate_first_link
2743
    first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'
2744
2745
    @classmethod
2746
    def get_comic_info(cls, soup, link):
2747
        """Get information about a particular comics."""
2748
        short_url = soup.find('link', rel='shortlink')['href']
2749
        title_elt = soup.find('h1') or soup.find('h2')
2750
        title = title_elt.string if title_elt else ""
2751
        desc = soup.find('meta', property='og:description')
2752
        date_str = soup.find('time', class_='published updated hidden')['datetime']
2753
        day = string_to_date(date_str, "%Y-%m-%d")
2754
        post = soup.find('div', class_="entry content entry-content type-portfolio")
2755
        imgs = post.find_all('img')
2756
        return {
2757
            'title': title,
2758
            'description': desc,
2759
            'url2': short_url,
2760
            'img': [i['src'] for i in imgs],
2761
            'month': day.month,
2762
            'year': day.year,
2763
            'day': day.day,
2764
        }
2765
2766
2767
class Optipess(GenericNavigableComic):
2768
    """Class to retrieve Optipess comics."""
2769
    name = 'optipess'
2770
    long_name = 'Optipess'
2771
    url = 'http://www.optipess.com'
2772
    get_first_comic_link = get_a_navi_navifirst
2773
    get_navi_link = get_link_rel_next
2774
2775
    @classmethod
2776
    def get_comic_info(cls, soup, link):
2777
        """Get information about a particular comics."""
2778
        title = soup.find('h2', class_='post-title').string
2779
        author = soup.find("span", class_="post-author").find("a").string
2780
        comic = soup.find('div', id='comic')
2781
        imgs = comic.find_all('img') if comic else []
2782
        alt = imgs[0]['title'] if imgs else ""
2783
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2784
        date_str = soup.find('span', class_='post-date').string
2785
        day = string_to_date(date_str, "%B %d, %Y")
2786
        return {
2787
            'title': title,
2788
            'alt': alt,
2789
            'author': author,
2790
            'img': [i['src'] for i in imgs],
2791
            'month': day.month,
2792
            'year': day.year,
2793
            'day': day.day,
2794
        }
2795
2796
2797
class PainTrainComic(GenericNavigableComic):
2798
    """Class to retrieve Pain Train Comics."""
2799
    name = 'paintrain'
2800
    long_name = 'Pain Train Comics'
2801
    url = 'http://paintraincomic.com'
2802
    get_first_comic_link = get_a_navi_navifirst
2803
    get_navi_link = get_link_rel_next
2804
2805
    @classmethod
2806
    def get_comic_info(cls, soup, link):
2807
        """Get information about a particular comics."""
2808
        title = soup.find('h2', class_='post-title').string
2809
        short_url = soup.find('link', rel='shortlink')['href']
2810
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2811
        num = int(short_url_re.match(short_url).groups()[0])
2812
        imgs = soup.find('div', id='comic').find_all('img')
2813
        alt = imgs[0]['title']
2814
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2815
        date_str = soup.find('span', class_='post-date').string
2816
        day = string_to_date(date_str, "%d/%m/%Y")
2817
        return {
2818
            'short_url': short_url,
2819
            'num': num,
2820
            'img': [i['src'] for i in imgs],
2821
            'month': day.month,
2822
            'year': day.year,
2823
            'day': day.day,
2824
            'alt': alt,
2825
            'title': title,
2826
        }
2827
2828
2829
class MoonBeard(GenericNavigableComic):
2830
    """Class to retrieve MoonBeard comics."""
2831
    # Also on http://blog.squiresjam.es/moonbeard
2832
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
2833
    name = 'moonbeard'
2834
    long_name = 'Moon Beard'
2835
    url = 'http://moonbeard.com'
2836
    get_first_comic_link = get_a_navi_navifirst
2837
    get_navi_link = get_a_navi_navinext
2838
2839
    @classmethod
2840
    def get_comic_info(cls, soup, link):
2841
        """Get information about a particular comics."""
2842
        title = soup.find('h2', class_='post-title').string
2843
        short_url = soup.find('link', rel='shortlink')['href']
2844
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2845
        num = int(short_url_re.match(short_url).groups()[0])
2846
        imgs = soup.find('div', id='comic').find_all('img')
2847
        alt = imgs[0]['title']
2848
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2849
        date_str = soup.find('span', class_='post-date').string
2850
        day = string_to_date(date_str, "%B %d, %Y")
2851
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2852
        author = soup.find('span', class_='post-author').string
2853
        return {
2854
            'short_url': short_url,
2855
            'num': num,
2856
            'img': [i['src'] for i in imgs],
2857
            'month': day.month,
2858
            'year': day.year,
2859
            'day': day.day,
2860
            'title': title,
2861
            'tags': tags,
2862
            'alt': alt,
2863
            'author': author,
2864
        }
2865
2866
2867
class AHamADay(GenericNavigableComic):
2868
    """Class to retrieve class A Ham A Day comics."""
2869 View Code Duplication
    name = 'ham'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2870
    long_name = 'A Ham A Day'
2871
    url = 'http://www.ahammaday.com'
2872
    get_url_from_link = join_cls_url_to_href
2873
    get_first_comic_link = simulate_first_link
2874
    first_url = 'http://www.ahammaday.com/today/3/6/french'
2875
2876
    @classmethod
2877
    def get_navi_link(cls, last_soup, next_):
2878
        """Get link to next or previous comic."""
2879
        # prev is next / next is prev
2880
        return last_soup.find('li', class_='previous' if next_ else 'next').find('a')
2881
2882
    @classmethod
2883
    def get_comic_info(cls, soup, link):
2884
        """Get information about a particular comics."""
2885
        date_str = soup.find('time', class_='published')['datetime']
2886
        day = string_to_date(date_str, "%Y-%m-%d")
2887
        author = soup.find('span', class_='blog-author').find('a').string
2888
        title = soup.find('meta', property='og:title')['content']
2889
        imgs = soup.find_all('meta', itemprop='image')
2890
        return {
2891
            'img': [i['content'] for i in imgs],
2892
            'title': title,
2893
            'author': author,
2894
            'day': day.day,
2895
            'month': day.month,
2896
            'year': day.year,
2897
        }
2898
2899
2900
class LittleLifeLines(GenericNavigableComic):
2901
    """Class to retrieve Little Life Lines comics."""
2902
    # Also on https://little-life-lines.tumblr.com
2903
    name = 'life'
2904
    long_name = 'Little Life Lines'
2905
    url = 'http://www.littlelifelines.com'
2906
    get_url_from_link = join_cls_url_to_href
2907
    get_first_comic_link = simulate_first_link
2908
    first_url = 'http://www.littlelifelines.com/comics/well-done'
2909
2910
    @classmethod
2911
    def get_navi_link(cls, last_soup, next_):
2912
        """Get link to next or previous comic."""
2913
        # prev is next / next is prev
2914
        li = last_soup.find('li', class_='prev' if next_ else 'next')
2915
        return li.find('a') if li else None
2916
2917
    @classmethod
2918
    def get_comic_info(cls, soup, link):
2919
        """Get information about a particular comics."""
2920
        title = soup.find('meta', property='og:title')['content']
2921
        desc = soup.find('meta', property='og:description')['content']
2922
        date_str = soup.find('time', class_='published')['datetime']
2923
        day = string_to_date(date_str, "%Y-%m-%d")
2924
        author = soup.find('a', rel='author').string
2925
        div_content = soup.find('div', class_="body entry-content")
2926
        imgs = div_content.find_all('img')
2927
        imgs = [i for i in imgs if i.get('src') is not None]
2928
        alt = imgs[0]['alt']
2929
        return {
2930
            'title': title,
2931
            'alt': alt,
2932
            'description': desc,
2933
            'author': author,
2934
            'day': day.day,
2935
            'month': day.month,
2936
            'year': day.year,
2937
            'img': [i['src'] for i in imgs],
2938
        }
2939
2940
2941
class GenericWordPressInkblot(GenericNavigableComic):
2942
    """Generic class to retrieve comics using WordPress with Inkblot."""
2943
    get_navi_link = get_link_rel_next
2944
2945
    @classmethod
2946
    def get_first_comic_link(cls):
2947
        """Get link to first comics."""
2948
        return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')
2949
2950
    @classmethod
2951
    def get_comic_info(cls, soup, link):
2952
        """Get information about a particular comics."""
2953
        title = soup.find('meta', property='og:title')['content']
2954
        imgs = soup.find('div', class_='webcomic-image').find_all('img')
2955
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2956
        day = string_to_date(date_str, "%Y-%m-%d")
2957
        return {
2958
            'title': title,
2959
            'day': day.day,
2960
            'month': day.month,
2961
            'year': day.year,
2962
            'img': [i['src'] for i in imgs],
2963
        }
2964
2965
2966
class EverythingsStupid(GenericWordPressInkblot):
2967
    """Class to retrieve Everything's stupid Comics."""
2968
    # Also on http://tapastic.com/series/EverythingsStupid
2969
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
2970
    # Also on http://everythingsstupidcomics.tumblr.com
2971
    name = 'stupid'
2972
    long_name = "Everything's Stupid"
2973
    url = 'http://everythingsstupid.net'
2974
2975
2976
class TheIsmComics(GenericWordPressInkblot):
2977
    """Class to retrieve The Ism Comics."""
2978
    # Also on https://tapastic.com/series/TheIsm (?)
2979
    name = 'theism'
2980
    long_name = "The Ism"
2981
    url = 'http://www.theism-comics.com'
2982
2983
2984
class WoodenPlankStudios(GenericWordPressInkblot):
2985
    """Class to retrieve Wooden Plank Studios comics."""
2986
    name = 'woodenplank'
2987
    long_name = 'Wooden Plank Studios'
2988
    url = 'http://woodenplankstudios.com'
2989
2990
2991
class ElectricBunnyComic(GenericNavigableComic):
2992
    """Class to retrieve Electric Bunny Comics."""
2993
    # Also on http://electricbunnycomics.tumblr.com
2994
    name = 'bunny'
2995
    long_name = 'Electric Bunny Comic'
2996
    url = 'http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell'
2997
    get_url_from_link = join_cls_url_to_href
2998
2999
    @classmethod
3000
    def get_first_comic_link(cls):
3001
        """Get link to first comics."""
3002
        return get_soup_at_url(cls.url).find('img', alt='First').parent
3003
3004
    @classmethod
3005
    def get_navi_link(cls, last_soup, next_):
3006
        """Get link to next or previous comic."""
3007
        img = last_soup.find('img', alt='Next' if next_ else 'Back')
3008
        return img.parent if img else None
3009
3010
    @classmethod
3011
    def get_comic_info(cls, soup, link):
3012
        """Get information about a particular comics."""
3013
        title = soup.find('meta', property='og:title')['content']
3014
        imgs = soup.find_all('meta', property='og:image')
3015
        return {
3016
            'title': title,
3017
            'img': [i['content'] for i in imgs],
3018
        }
3019
3020
3021
class SheldonComics(GenericNavigableComic):
3022
    """Class to retrieve Sheldon comics."""
3023
    # Also on http://www.gocomics.com/sheldon
3024
    name = 'sheldon'
3025
    long_name = 'Sheldon Comics'
3026
    url = 'http://www.sheldoncomics.com'
3027
3028
    @classmethod
3029
    def get_first_comic_link(cls):
3030
        """Get link to first comics."""
3031
        return get_soup_at_url(cls.url).find("a", id="nav-first")
3032
3033
    @classmethod
3034
    def get_navi_link(cls, last_soup, next_):
3035
        """Get link to next or previous comic."""
3036
        for link in last_soup.find_all("a", id="nav-next" if next_ else "nav-prev"):
3037
            if link['href'] != 'http://www.sheldoncomics.com':
3038
                return link
3039
        return None
3040
3041
    @classmethod
3042
    def get_comic_info(cls, soup, link):
3043
        """Get information about a particular comics."""
3044
        imgs = soup.find("div", id="comic-foot").find_all("img")
3045
        assert all(i['alt'] == i['title'] for i in imgs)
3046
        assert len(imgs) == 1
3047
        title = imgs[0]['title']
3048
        return {
3049
            'title': title,
3050
            'img': [i['src'] for i in imgs],
3051
        }
3052
3053
3054
class Ubertool(GenericNavigableComic):
3055
    """Class to retrieve Ubertool comics."""
3056
    # Also on http://ubertool.tumblr.com
3057
    # Also on https://tapastic.com/series/ubertool
3058
    name = 'ubertool'
3059
    long_name = 'Ubertool'
3060
    url = 'http://ubertoolcomic.com'
3061
    _categories = ('UBERTOOL', )
3062
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
3063
    get_navi_link = get_a_comicnavbase_comicnavnext
3064
3065
    @classmethod
3066
    def get_comic_info(cls, soup, link):
3067
        """Get information about a particular comics."""
3068
        title = soup.find('h2', class_='post-title').string
3069
        date_str = soup.find('span', class_='post-date').string
3070
        day = string_to_date(date_str, "%B %d, %Y")
3071
        imgs = soup.find('div', id='comic').find_all('img')
3072
        return {
3073
            'img': [i['src'] for i in imgs],
3074
            'title': title,
3075
            'month': day.month,
3076
            'year': day.year,
3077
            'day': day.day,
3078
        }
3079
3080
3081
class EarthExplodes(GenericNavigableComic):
3082
    """Class to retrieve The Earth Explodes comics."""
3083
    name = 'earthexplodes'
3084
    long_name = 'The Earth Explodes'
3085
    url = 'http://www.earthexplodes.com'
3086
    get_url_from_link = join_cls_url_to_href
3087
    get_first_comic_link = simulate_first_link
3088
    first_url = 'http://www.earthexplodes.com/comics/000/'
3089
3090
    @classmethod
3091
    def get_navi_link(cls, last_soup, next_):
3092
        """Get link to next or previous comic."""
3093
        return last_soup.find('a', id='next' if next_ else 'prev')
3094
3095
    @classmethod
3096
    def get_comic_info(cls, soup, link):
3097
        """Get information about a particular comics."""
3098
        title = soup.find('title').string
3099
        imgs = soup.find('div', id='image').find_all('img')
3100
        alt = imgs[0].get('title', '')
3101
        return {
3102
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3103
            'title': title,
3104
        }
3105
3106
3107
class CubeDrone(GenericNavigableComic):
3108
    """Class to retrieve Cube Drone comics."""
3109
    name = 'cubedrone'
3110
    long_name = 'Cube Drone'
3111
    url = 'http://cube-drone.com/comics'
3112
    get_url_from_link = join_cls_url_to_href
3113
3114
    @classmethod
3115
    def get_first_comic_link(cls):
3116
        """Get link to first comics."""
3117
        return get_soup_at_url(cls.url).find('span', class_='glyphicon glyphicon-backward').parent
3118
3119
    @classmethod
3120
    def get_navi_link(cls, last_soup, next_):
3121
        """Get link to next or previous comic."""
3122
        class_ = 'glyphicon glyphicon-chevron-' + ('right' if next_ else 'left')
3123
        return last_soup.find('span', class_=class_).parent
3124 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3125
    @classmethod
3126
    def get_comic_info(cls, soup, link):
3127
        """Get information about a particular comics."""
3128
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
3129
        url2 = soup.find('meta', attrs={'name': 'twitter:url'})['content']
3130
        # date_str = soup.find('h2', class_='comic_title').find('small').string
3131
        # day = string_to_date(date_str, "%B %d, %Y, %I:%M %p")
3132
        imgs = soup.find_all('img', class_='comic img-responsive')
3133
        title2 = imgs[0]['title']
3134
        alt = imgs[0]['alt']
3135
        return {
3136
            'url2': url2,
3137
            'title': title,
3138
            'title2': title2,
3139
            'alt': alt,
3140
            'img': [i['src'] for i in imgs],
3141
        }
3142
3143
3144
class MakeItStoopid(GenericNavigableComic):
3145
    """Class to retrieve Make It Stoopid Comics."""
3146
    name = 'stoopid'
3147
    long_name = 'Make it stoopid'
3148
    url = 'http://makeitstoopid.com/comic.php'
3149
3150
    @classmethod
3151
    def get_nav(cls, soup):
3152
        """Get the navigation elements from soup object."""
3153
        cnav = soup.find_all(class_='cnav')
3154
        nav1, nav2 = cnav[:5], cnav[5:]
3155
        assert nav1 == nav2
3156
        # begin, prev, archive, next_, end = nav1
3157
        return [None if i.get('href') is None else i for i in nav1]
3158
3159
    @classmethod
3160
    def get_first_comic_link(cls):
3161
        """Get link to first comics."""
3162
        return cls.get_nav(get_soup_at_url(cls.url))[0]
3163
3164
    @classmethod
3165
    def get_navi_link(cls, last_soup, next_):
3166
        """Get link to next or previous comic."""
3167
        return cls.get_nav(last_soup)[3 if next_ else 1]
3168
3169
    @classmethod
3170
    def get_comic_info(cls, soup, link):
3171
        """Get information about a particular comics."""
3172
        title = link['title']
3173
        imgs = soup.find_all('img', id='comicimg')
3174
        return {
3175
            'title': title,
3176
            'img': [i['src'] for i in imgs],
3177
        }
3178
3179
3180
class TuMourrasMoinsBete(GenericNavigableComic):
3181
    """Class to retrieve Tu Mourras Moins Bete comics."""
3182
    name = 'mourrasmoinsbete'
3183
    long_name = 'Tu Mourras Moins Bete'
3184
    url = 'http://tumourrasmoinsbete.blogspot.fr'
3185
    _categories = ('FRANCAIS', )
3186
    get_first_comic_link = simulate_first_link
3187
    first_url = 'http://tumourrasmoinsbete.blogspot.fr/2008/06/essai.html'
3188
3189
    @classmethod
3190
    def get_navi_link(cls, last_soup, next_):
3191 View Code Duplication
        """Get link to next or previous comic."""
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3192
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3193
3194
    @classmethod
3195
    def get_comic_info(cls, soup, link):
3196
        """Get information about a particular comics."""
3197
        title = soup.find('title').string
3198
        imgs = soup.find('div', itemprop='description articleBody').find_all('img')
3199
        author = soup.find('span', itemprop='author').string
3200
        return {
3201
            'img': [i['src'] for i in imgs],
3202
            'author': author,
3203
            'title': title,
3204
        }
3205
3206
3207
class GeekAndPoke(GenericNavigableComic):
3208
    """Class to retrieve Geek And Poke comics."""
3209
    name = 'geek'
3210
    long_name = 'Geek And Poke'
3211
    url = 'http://geek-and-poke.com'
3212
    get_url_from_link = join_cls_url_to_href
3213
    get_first_comic_link = simulate_first_link
3214
    first_url = 'http://geek-and-poke.com/geekandpoke/2006/8/27/a-new-place-for-a-not-so-old-blog.html'
3215
3216
    @classmethod
3217
    def get_navi_link(cls, last_soup, next_):
3218
        """Get link to next or previous comic."""
3219
        return last_soup.find('a', class_='prev-item' if next_ else 'next-item')
3220
3221
    @classmethod
3222
    def get_comic_info(cls, soup, link):
3223
        """Get information about a particular comics."""
3224
        title = soup.find('meta', property='og:title')['content']
3225
        desc = soup.find('meta', property='og:description')['content']
3226
        date_str = soup.find('time', class_='published')['datetime']
3227
        day = string_to_date(date_str, "%Y-%m-%d")
3228
        author = soup.find('a', rel='author').string
3229
        div_content = (soup.find('div', class_="body entry-content") or
3230
                       soup.find('div', class_="special-content"))
3231
        imgs = div_content.find_all('img')
3232
        imgs = [i for i in imgs if i.get('src') is not None]
3233
        assert all('title' not in i or i['alt'] == i['title'] for i in imgs)
3234
        alt = imgs[0].get('alt', "") if imgs else []
3235
        return {
3236
            'title': title,
3237
            'alt': alt,
3238
            'description': desc,
3239
            'author': author,
3240
            'day': day.day,
3241
            'month': day.month,
3242
            'year': day.year,
3243
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3244
        }
3245
3246
3247
class GloryOwlComix(GenericNavigableComic):
3248
    """Class to retrieve Glory Owl comics."""
3249
    name = 'gloryowl'
3250
    long_name = 'Glory Owl'
3251
    url = 'http://gloryowlcomix.blogspot.fr'
3252
    _categories = ('NSFW', 'FRANCAIS')
3253
    get_first_comic_link = simulate_first_link
3254
    first_url = 'http://gloryowlcomix.blogspot.fr/2013/02/1_7.html'
3255
3256
    @classmethod
3257
    def get_navi_link(cls, last_soup, next_):
3258
        """Get link to next or previous comic."""
3259
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3260
3261
    @classmethod
3262
    def get_comic_info(cls, soup, link):
3263
        """Get information about a particular comics."""
3264
        title = soup.find('title').string
3265
        imgs = soup.find_all('link', rel='image_src')
3266
        author = soup.find('a', rel='author').string
3267
        return {
3268
            'img': [i['href'] for i in imgs],
3269
            'author': author,
3270
            'title': title,
3271
        }
3272
3273
3274
class GenericTumblrV1(GenericComic):
3275
    """Generic class to retrieve comics from Tumblr using the V1 API."""
3276
    _categories = ('TUMBLR', )
3277
3278
    @classmethod
3279
    def get_next_comic(cls, last_comic):
3280
        """Generic implementation of get_next_comic for Tumblr comics."""
3281
        for p in cls.get_posts(last_comic):
3282
            comic = cls.get_comic_info(p)
3283
            if comic is not None:
3284
                yield comic
3285
3286
    @classmethod
3287
    def get_url_from_post(cls, post):
3288
        return post['url']
3289
3290
    @classmethod
3291
    def get_api_url(cls):
3292
        return urljoin_wrapper(cls.url, '/api/read/')
3293
3294
    @classmethod
3295
    def get_comic_info(cls, post):
3296
        """Get information about a particular comics."""
3297
        type_ = post['type']
3298
        if type_ != 'photo':
3299
            return None
3300
        tumblr_id = int(post['id'])
3301
        api_url = cls.get_api_url() + '?id=%d' % (tumblr_id)
3302
        day = datetime.datetime.fromtimestamp(int(post['unix-timestamp'])).date()
3303
        caption = post.find('photo-caption')
3304
        title = caption.string if caption else ""
3305
        tags = ' '.join(t.string for t in post.find_all('tag'))
3306
        # Photos may appear in 'photo' tags and/or straight in the post
3307
        photo_tags = post.find_all('photo')
3308
        if not photo_tags:
3309
            photo_tags = [post]
3310
        # Images are in multiple resolutions - taking the first one
3311
        imgs = [photo.find('photo-url') for photo in photo_tags]
3312
        return {
3313
            'url': cls.get_url_from_post(post),
3314
            'url2': post['url-with-slug'],
3315
            'day': day.day,
3316
            'month': day.month,
3317
            'year': day.year,
3318
            'title': title,
3319
            'tags': tags,
3320
            'img': [i.string for i in imgs],
3321
            'tumblr-id': tumblr_id,
3322
            'api_url': api_url,
3323
        }
3324
3325
    @classmethod
3326
    def get_posts(cls, last_comic, nb_post_per_call=10):
3327
        """Get posts using API. nb_post_per_call is max 50.
3328
3329
        Posts are retrieved from newer to older as per the tumblr v1 api
3330
        but are returned in chronological order."""
3331
        waiting_for_url = last_comic['url'] if last_comic else None
3332
        posts_acc = []
3333
        if last_comic is not None:
3334
            # Sometimes, tumblr posts are deleted. When previous post is deleted, we
3335
            # might end up spending a lot of time looking for something that
3336
            # doesn't exist. Failing early and clearly might be a better option.
3337
            last_api_url = last_comic['api_url']
3338
            try:
3339
                get_soup_at_url(last_api_url)
3340
            except urllib.error.HTTPError:
3341
                try:
3342
                    get_soup_at_url(cls.url)
3343
                except urllib.error.HTTPError:
3344
                    print("Did not find previous post nor main url %s" % cls.url)
3345
                else:
3346
                    print("Did not find previous post %s : it might have been deleted" % last_api_url)
3347
                return reversed(posts_acc)
3348
        api_url = cls.get_api_url()
3349
        posts = get_soup_at_url(api_url).find('posts')
3350
        start, total = int(posts['start']), int(posts['total'])
3351
        assert start == 0
3352
        for starting_num in range(0, total, nb_post_per_call):
3353
            api_url2 = api_url + '?start=%d&num=%d' % (starting_num, nb_post_per_call)
3354
            posts2 = get_soup_at_url(api_url2).find('posts')
3355
            start2, total2 = int(posts2['start']), int(posts2['total'])
3356
            assert starting_num == start2, "%d != %d" % (starting_num, start2)
3357
            # This may happen and should be handled in the future
3358
            assert total == total2, "%d != %d" % (total, total2)
3359
            for p in posts2.find_all('post'):
3360
                if waiting_for_url and waiting_for_url == cls.get_url_from_post(p):
3361
                    return reversed(posts_acc)
3362
                posts_acc.append(p)
3363
        if waiting_for_url is None:
3364
            return reversed(posts_acc)
3365
        print("Did not find %s : there might be a problem" % waiting_for_url)
3366
        return []
3367
3368
3369
class SaturdayMorningBreakfastCerealTumblr(GenericTumblrV1):
3370
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
3371
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
3372
    # Also on http://www.smbc-comics.com
3373
    name = 'smbc-tumblr'
3374
    long_name = 'Saturday Morning Breakfast Cereal (from Tumblr)'
3375
    url = 'http://smbc-comics.tumblr.com'
3376
    _categories = ('SMBC', )
3377
3378
3379
class IrwinCardozo(GenericTumblrV1):
3380
    """Class to retrieve Irwin Cardozo Comics."""
3381
    name = 'irwinc'
3382
    long_name = 'Irwin Cardozo'
3383
    url = 'http://irwincardozocomics.tumblr.com'
3384
3385
3386
class AccordingToDevin(GenericTumblrV1):
3387
    """Class to retrieve According To Devin comics."""
3388
    name = 'devin'
3389
    long_name = 'According To Devin'
3390
    url = 'http://accordingtodevin.tumblr.com'
3391
3392
3393
class ItsTheTieTumblr(GenericTumblrV1):
3394
    """Class to retrieve It's the tie comics."""
3395
    # Also on http://itsthetie.com
3396
    # Also on https://tapastic.com/series/itsthetie
3397
    name = 'tie-tumblr'
3398
    long_name = "It's the tie (from Tumblr)"
3399
    url = "http://itsthetie.tumblr.com"
3400
    _categories = ('TIE', )
3401
3402
3403
class OctopunsTumblr(GenericTumblrV1):
3404
    """Class to retrieve Octopuns comics."""
3405
    # Also on http://www.octopuns.net
3406
    name = 'octopuns-tumblr'
3407
    long_name = 'Octopuns (from Tumblr)'
3408
    url = 'http://octopuns.tumblr.com'
3409
3410
3411
class PicturesInBoxesTumblr(GenericTumblrV1):
3412
    """Class to retrieve Pictures In Boxes comics."""
3413
    # Also on http://www.picturesinboxes.com
3414
    name = 'picturesinboxes-tumblr'
3415
    long_name = 'Pictures in Boxes (from Tumblr)'
3416
    url = 'http://picturesinboxescomic.tumblr.com'
3417
3418
3419
class TubeyToonsTumblr(GenericTumblrV1):
3420
    """Class to retrieve TubeyToons comics."""
3421
    # Also on http://tapastic.com/series/Tubey-Toons
3422
    # Also on http://tubeytoons.com
3423
    name = 'tubeytoons-tumblr'
3424
    long_name = 'Tubey Toons (from Tumblr)'
3425
    url = 'http://tubeytoons.tumblr.com'
3426
    _categories = ('TUNEYTOONS', )
3427
3428
3429
class UnearthedComicsTumblr(GenericTumblrV1):
3430
    """Class to retrieve Unearthed comics."""
3431
    # Also on http://tapastic.com/series/UnearthedComics
3432
    # Also on http://unearthedcomics.com
3433
    name = 'unearthed-tumblr'
3434
    long_name = 'Unearthed Comics (from Tumblr)'
3435
    url = 'http://unearthedcomics.tumblr.com'
3436
    _categories = ('UNEARTHED', )
3437
3438
3439
class PieComic(GenericTumblrV1):
3440
    """Class to retrieve Pie Comic comics."""
3441
    name = 'pie'
3442
    long_name = 'Pie Comic'
3443
    url = "http://piecomic.tumblr.com"
3444
3445
3446
class MrEthanDiamond(GenericTumblrV1):
3447
    """Class to retrieve Mr Ethan Diamond comics."""
3448
    name = 'diamond'
3449
    long_name = 'Mr Ethan Diamond'
3450
    url = 'http://mrethandiamond.tumblr.com'
3451
3452
3453
class Flocci(GenericTumblrV1):
3454
    """Class to retrieve floccinaucinihilipilification comics."""
3455
    name = 'flocci'
3456
    long_name = 'floccinaucinihilipilification'
3457
    url = "http://floccinaucinihilipilificationa.tumblr.com"
3458
3459
3460
class UpAndOut(GenericTumblrV1):
3461
    """Class to retrieve Up & Out comics."""
3462
    # Also on http://tapastic.com/series/UP-and-OUT
3463
    name = 'upandout'
3464
    long_name = 'Up And Out (from Tumblr)'
3465
    url = 'http://upandoutcomic.tumblr.com'
3466
3467
3468
class Pundemonium(GenericTumblrV1):
3469
    """Class to retrieve Pundemonium comics."""
3470
    name = 'pundemonium'
3471
    long_name = 'Pundemonium'
3472
    url = 'http://monstika.tumblr.com'
3473
3474
3475
class PoorlyDrawnLinesTumblr(GenericTumblrV1):
3476
    """Class to retrieve Poorly Drawn Lines comics."""
3477
    # Also on http://poorlydrawnlines.com
3478
    name = 'poorlydrawn-tumblr'
3479
    long_name = 'Poorly Drawn Lines (from Tumblr)'
3480
    url = 'http://pdlcomics.tumblr.com'
3481
    _categories = ('POORLYDRAWN', )
3482
3483
3484
class PearShapedComics(GenericTumblrV1):
3485
    """Class to retrieve Pear Shaped Comics."""
3486
    name = 'pearshaped'
3487
    long_name = 'Pear-Shaped Comics'
3488
    url = 'http://pearshapedcomics.com'
3489
3490
3491
class PondScumComics(GenericTumblrV1):
3492
    """Class to retrieve Pond Scum Comics."""
3493
    name = 'pond'
3494
    long_name = 'Pond Scum'
3495
    url = 'http://pondscumcomic.tumblr.com'
3496
3497
3498
class MercworksTumblr(GenericTumblrV1):
3499
    """Class to retrieve Mercworks comics."""
3500
    # Also on http://mercworks.net
3501
    name = 'mercworks-tumblr'
3502
    long_name = 'Mercworks (from Tumblr)'
3503
    url = 'http://mercworks.tumblr.com'
3504
3505
3506
class OwlTurdTumblr(GenericTumblrV1):
3507
    """Class to retrieve Owl Turd comics."""
3508
    # Also on http://tapastic.com/series/Owl-Turd-Comix
3509
    name = 'owlturd-tumblr'
3510
    long_name = 'Owl Turd (from Tumblr)'
3511
    url = 'http://owlturd.com'
3512
    _categories = ('OWLTURD', )
3513
3514
3515
class VectorBelly(GenericTumblrV1):
3516
    """Class to retrieve Vector Belly comics."""
3517
    # Also on http://vectorbelly.com
3518
    name = 'vector'
3519
    long_name = 'Vector Belly'
3520
    url = 'http://vectorbelly.tumblr.com'
3521
3522
3523
class GoneIntoRapture(GenericTumblrV1):
3524
    """Class to retrieve Gone Into Rapture comics."""
3525
    # Also on http://goneintorapture.tumblr.com
3526
    # Also on http://tapastic.com/series/Goneintorapture
3527
    name = 'rapture'
3528
    long_name = 'Gone Into Rapture'
3529
    url = 'http://www.goneintorapture.com'
3530
3531
3532
class TheOatmealTumblr(GenericTumblrV1):
3533
    """Class to retrieve The Oatmeal comics."""
3534
    # Also on http://theoatmeal.com
3535
    name = 'oatmeal-tumblr'
3536
    long_name = 'The Oatmeal (from Tumblr)'
3537
    url = 'http://oatmeal.tumblr.com'
3538
3539
3540
class HeckIfIKnowComicsTumblr(GenericTumblrV1):
3541
    """Class to retrieve Heck If I Know Comics."""
3542
    # Also on http://tapastic.com/series/Regular
3543
    name = 'heck-tumblr'
3544
    long_name = 'Heck if I Know comics (from Tumblr)'
3545
    url = 'http://heckifiknowcomics.com'
3546
3547
3548
class MyJetPack(GenericTumblrV1):
3549
    """Class to retrieve My Jet Pack comics."""
3550
    name = 'jetpack'
3551
    long_name = 'My Jet Pack'
3552
    url = 'http://myjetpack.tumblr.com'
3553
3554
3555
class CheerUpEmoKidTumblr(GenericTumblrV1):
3556
    """Class to retrieve CheerUpEmoKid comics."""
3557
    # Also on http://www.cheerupemokid.com
3558
    # Also on http://tapastic.com/series/CUEK
3559
    name = 'cuek-tumblr'
3560
    long_name = 'Cheer Up Emo Kid (from Tumblr)'
3561
    url = 'http://enzocomics.tumblr.com'
3562
3563
3564
class ForLackOfABetterComic(GenericTumblrV1):
3565
    """Class to retrieve For Lack Of A Better Comics."""
3566
    # Also on http://forlackofabettercomic.com
3567
    name = 'lack'
3568
    long_name = 'For Lack Of A Better Comic'
3569
    url = 'http://forlackofabettercomic.tumblr.com'
3570
3571
3572
class ZenPencilsTumblr(GenericTumblrV1):
3573
    """Class to retrieve ZenPencils comics."""
3574
    # Also on http://zenpencils.com
3575
    # Also on http://www.gocomics.com/zen-pencils
3576
    name = 'zenpencils-tumblr'
3577
    long_name = 'Zen Pencils (from Tumblr)'
3578
    url = 'http://zenpencils.tumblr.com'
3579
    _categories = ('ZENPENCILS', )
3580
3581
3582
class ThreeWordPhraseTumblr(GenericTumblrV1):
3583
    """Class to retrieve Three Word Phrase comics."""
3584
    # Also on http://threewordphrase.com
3585
    name = 'threeword-tumblr'
3586
    long_name = 'Three Word Phrase (from Tumblr)'
3587
    url = 'http://www.threewordphrase.tumblr.com'
3588
3589
3590
class TimeTrabbleTumblr(GenericTumblrV1):
3591
    """Class to retrieve Time Trabble comics."""
3592
    # Also on http://timetrabble.com
3593
    name = 'timetrabble-tumblr'
3594
    long_name = 'Time Trabble (from Tumblr)'
3595
    url = 'http://timetrabble.tumblr.com'
3596
3597
3598
class SafelyEndangeredTumblr(GenericTumblrV1):
3599
    """Class to retrieve Safely Endangered comics."""
3600
    # Also on http://www.safelyendangered.com
3601
    name = 'endangered-tumblr'
3602
    long_name = 'Safely Endangered (from Tumblr)'
3603
    url = 'http://tumblr.safelyendangered.com'
3604
3605
3606
class MouseBearComedyTumblr(GenericTumblrV1):
3607
    """Class to retrieve Mouse Bear Comedy comics."""
3608
    # Also on http://www.mousebearcomedy.com
3609
    name = 'mousebear-tumblr'
3610
    long_name = 'Mouse Bear Comedy (from Tumblr)'
3611
    url = 'http://mousebearcomedy.tumblr.com'
3612
3613
3614
class BouletCorpTumblr(GenericTumblrV1):
3615
    """Class to retrieve BouletCorp comics."""
3616
    # Also on http://www.bouletcorp.com
3617
    name = 'boulet-tumblr'
3618
    long_name = 'Boulet Corp (from Tumblr)'
3619
    url = 'http://bouletcorp.tumblr.com'
3620
    _categories = ('BOULET', )
3621
3622
3623
class TheAwkwardYetiTumblr(GenericTumblrV1):
3624
    """Class to retrieve The Awkward Yeti comics."""
3625
    # Also on http://www.gocomics.com/the-awkward-yeti
3626
    # Also on http://theawkwardyeti.com
3627
    # Also on https://tapastic.com/series/TheAwkwardYeti
3628
    name = 'yeti-tumblr'
3629
    long_name = 'The Awkward Yeti (from Tumblr)'
3630
    url = 'http://larstheyeti.tumblr.com'
3631
    _categories = ('YETI', )
3632
3633
3634
class NellucNhoj(GenericTumblrV1):
3635
    """Class to retrieve NellucNhoj comics."""
3636
    name = 'nhoj'
3637
    long_name = 'Nelluc Nhoj'
3638
    url = 'http://nellucnhoj.com'
3639
3640
3641
class DownTheUpwardSpiralTumblr(GenericTumblrV1):
3642
    """Class to retrieve Down The Upward Spiral comics."""
3643
    # Also on http://www.downtheupwardspiral.com
3644
    name = 'spiral-tumblr'
3645
    long_name = 'Down the Upward Spiral (from Tumblr)'
3646
    url = 'http://downtheupwardspiral.tumblr.com'
3647
3648
3649
class AsPerUsualTumblr(GenericTumblrV1):
3650
    """Class to retrieve As Per Usual comics."""
3651
    # Also on https://tapastic.com/series/AsPerUsual
3652
    name = 'usual-tumblr'
3653
    long_name = 'As Per Usual (from Tumblr)'
3654
    url = 'http://as-per-usual.tumblr.com'
3655
    categories = ('DAMILEE', )
3656
3657
3658
class HotComicsForCoolPeopleTumblr(GenericTumblrV1):
3659
    """Class to retrieve Hot Comics For Cool People."""
3660
    # Also on https://tapastic.com/series/Hot-Comics-For-Cool-People
3661
    # Also on http://hotcomics.biz (links to tumblr)
3662
    # Also on http://hcfcp.com (links to tumblr)
3663
    name = 'hotcomics-tumblr'
3664
    long_name = 'Hot Comics For Cool People (from Tumblr)'
3665
    url = 'http://hotcomicsforcoolpeople.tumblr.com'
3666
    categories = ('DAMILEE', )
3667
3668
3669
class OneOneOneOneComicTumblr(GenericTumblrV1):
3670
    """Class to retrieve 1111 Comics."""
3671
    # Also on http://www.1111comics.me
3672
    # Also on https://tapastic.com/series/1111-Comics
3673
    name = '1111-tumblr'
3674
    long_name = '1111 Comics (from Tumblr)'
3675
    url = 'http://comics1111.tumblr.com'
3676
    _categories = ('ONEONEONEONE', )
3677
3678
3679
class JhallComicsTumblr(GenericTumblrV1):
3680
    """Class to retrieve Jhall Comics."""
3681
    # Also on http://jhallcomics.com
3682
    name = 'jhall-tumblr'
3683
    long_name = 'Jhall Comics (from Tumblr)'
3684
    url = 'http://jhallcomics.tumblr.com'
3685
3686
3687
class BerkeleyMewsTumblr(GenericTumblrV1):
3688
    """Class to retrieve Berkeley Mews comics."""
3689
    # Also on http://www.gocomics.com/berkeley-mews
3690
    # Also on http://www.berkeleymews.com
3691
    name = 'berkeley-tumblr'
3692
    long_name = 'Berkeley Mews (from Tumblr)'
3693
    url = 'http://mews.tumblr.com'
3694
    _categories = ('BERKELEY', )
3695
3696
3697
class JoanCornellaTumblr(GenericTumblrV1):
3698
    """Class to retrieve Joan Cornella comics."""
3699
    # Also on http://joancornella.net
3700
    name = 'cornella-tumblr'
3701
    long_name = 'Joan Cornella (from Tumblr)'
3702
    url = 'http://cornellajoan.tumblr.com'
3703
3704
3705
class RespawnComicTumblr(GenericTumblrV1):
3706
    """Class to retrieve Respawn Comic."""
3707
    # Also on http://respawncomic.com
3708
    name = 'respawn-tumblr'
3709
    long_name = 'Respawn Comic (from Tumblr)'
3710
    url = 'http://respawncomic.tumblr.com'
3711
3712
3713
class ChrisHallbeckTumblr(GenericTumblrV1):
3714
    """Class to retrieve Chris Hallbeck comics."""
3715
    # Also on https://tapastic.com/ChrisHallbeck
3716
    # Also on http://maximumble.com
3717
    # Also on http://minimumble.com
3718
    # Also on http://thebookofbiff.com
3719
    name = 'hallbeck-tumblr'
3720
    long_name = 'Chris Hallback (from Tumblr)'
3721
    url = 'http://chrishallbeck.tumblr.com'
3722
    _categories = ('HALLBACK', )
3723
3724
3725
class ComicNuggets(GenericTumblrV1):
3726
    """Class to retrieve Comic Nuggets."""
3727
    name = 'nuggets'
3728
    long_name = 'Comic Nuggets'
3729
    url = 'http://comicnuggets.com'
3730
3731
3732
class PigeonGazetteTumblr(GenericTumblrV1):
3733
    """Class to retrieve The Pigeon Gazette comics."""
3734
    # Also on https://tapastic.com/series/The-Pigeon-Gazette
3735
    name = 'pigeon-tumblr'
3736
    long_name = 'The Pigeon Gazette (from Tumblr)'
3737
    url = 'http://thepigeongazette.tumblr.com'
3738
3739
3740
class CancerOwl(GenericTumblrV1):
3741
    """Class to retrieve Cancer Owl comics."""
3742
    # Also on http://cancerowl.com
3743
    name = 'cancerowl-tumblr'
3744
    long_name = 'Cancer Owl (from Tumblr)'
3745
    url = 'http://cancerowl.tumblr.com'
3746
3747
3748
class FowlLanguageTumblr(GenericTumblrV1):
3749
    """Class to retrieve Fowl Language comics."""
3750
    # Also on http://www.fowllanguagecomics.com
3751
    # Also on http://tapastic.com/series/Fowl-Language-Comics
3752
    # Also on http://www.gocomics.com/fowl-language
3753
    name = 'fowllanguage-tumblr'
3754
    long_name = 'Fowl Language Comics (from Tumblr)'
3755
    url = 'http://fowllanguagecomics.tumblr.com'
3756
    _categories = ('FOWLLANGUAGE', )
3757
3758
3759
class TheOdd1sOutTumblr(GenericTumblrV1):
3760
    """Class to retrieve The Odd 1s Out comics."""
3761
    # Also on http://theodd1sout.com
3762
    # Also on https://tapastic.com/series/Theodd1sout
3763
    name = 'theodd-tumblr'
3764
    long_name = 'The Odd 1s Out (from Tumblr)'
3765
    url = 'http://theodd1sout.tumblr.com'
3766
3767
3768
class TheUnderfoldTumblr(GenericTumblrV1):
3769
    """Class to retrieve The Underfold comics."""
3770
    # Also on http://theunderfold.com
3771
    name = 'underfold-tumblr'
3772
    long_name = 'The Underfold (from Tumblr)'
3773
    url = 'http://theunderfold.tumblr.com'
3774
3775
3776
class LolNeinTumblr(GenericTumblrV1):
3777
    """Class to retrieve Lol Nein comics."""
3778
    # Also on http://lolnein.com
3779
    name = 'lolnein-tumblr'
3780
    long_name = 'Lol Nein (from Tumblr)'
3781
    url = 'http://lolneincom.tumblr.com'
3782
3783
3784
class FatAwesomeComicsTumblr(GenericTumblrV1):
3785
    """Class to retrieve Fat Awesome Comics."""
3786
    # Also on http://fatawesome.com/comics
3787
    name = 'fatawesome-tumblr'
3788
    long_name = 'Fat Awesome (from Tumblr)'
3789
    url = 'http://fatawesomecomedy.tumblr.com'
3790
3791
3792
class TheWorldIsFlatTumblr(GenericTumblrV1):
3793
    """Class to retrieve The World Is Flat Comics."""
3794
    # Also on https://tapastic.com/series/The-World-is-Flat
3795
    name = 'flatworld-tumblr'
3796
    long_name = 'The World Is Flat (from Tumblr)'
3797
    url = 'http://theworldisflatcomics.tumblr.com'
3798
3799
3800
class DorrisMc(GenericTumblrV1):
3801
    """Class to retrieve Dorris Mc Comics"""
3802
    # Also on http://www.gocomics.com/dorris-mccomics
3803
    name = 'dorrismc'
3804
    long_name = 'Dorris Mc'
3805
    url = 'http://dorrismccomics.com'
3806
3807
3808
class LeleozTumblr(GenericEmptyComic, GenericTumblrV1):
3809
    """Class to retrieve Leleoz comics."""
3810
    # Also on https://tapastic.com/series/Leleoz
3811
    name = 'leleoz-tumblr'
3812
    long_name = 'Leleoz (from Tumblr)'
3813
    url = 'http://leleozcomics.tumblr.com'
3814
3815
3816
class MoonBeardTumblr(GenericTumblrV1):
3817
    """Class to retrieve MoonBeard comics."""
3818
    # Also on http://moonbeard.com
3819
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
3820
    name = 'moonbeard-tumblr'
3821
    long_name = 'Moon Beard (from Tumblr)'
3822
    url = 'http://blog.squiresjam.es/moonbeard'
3823
3824
3825
class AComik(GenericTumblrV1):
3826
    """Class to retrieve A Comik"""
3827
    name = 'comik'
3828
    long_name = 'A Comik'
3829
    url = 'http://acomik.com'
3830
3831
3832
class ClassicRandy(GenericTumblrV1):
3833
    """Class to retrieve Classic Randy comics."""
3834
    name = 'randy'
3835
    long_name = 'Classic Randy'
3836
    url = 'http://classicrandy.tumblr.com'
3837
3838
3839
class DagssonTumblr(GenericTumblrV1):
3840
    """Class to retrieve Dagsson comics."""
3841
    # Also on http://www.dagsson.com
3842
    name = 'dagsson-tumblr'
3843
    long_name = 'Dagsson Hugleikur (from Tumblr)'
3844
    url = 'http://hugleikurdagsson.tumblr.com'
3845
3846
3847
class LinsEditionsTumblr(GenericTumblrV1):
3848
    """Class to retrieve L.I.N.S. Editions comics."""
3849
    # Also on https://linsedition.com
3850
    name = 'lins-tumblr'
3851
    long_name = 'L.I.N.S. Editions (from Tumblr)'
3852
    url = 'http://linscomics.tumblr.com'
3853
    _categories = ('LINS', )
3854
3855
3856
class OrigamiHotDish(GenericTumblrV1):
3857
    """Class to retrieve Origami Hot Dish comics."""
3858
    name = 'origamihotdish'
3859
    long_name = 'Origami Hot Dish'
3860
    url = 'http://origamihotdish.com'
3861
3862
3863
class HitAndMissComicsTumblr(GenericTumblrV1):
3864
    """Class to retrieve Hit and Miss Comics."""
3865
    name = 'hitandmiss'
3866
    long_name = 'Hit and Miss Comics'
3867
    url = 'http://hitandmisscomics.tumblr.com'
3868
3869
3870
class HMBlanc(GenericTumblrV1):
3871
    """Class to retrieve HM Blanc comics."""
3872
    name = 'hmblanc'
3873
    long_name = 'HM Blanc'
3874
    url = 'http://hmblanc.tumblr.com'
3875
3876
3877
class TalesOfAbsurdityTumblr(GenericTumblrV1):
3878
    """Class to retrieve Tales Of Absurdity comics."""
3879
    # Also on http://talesofabsurdity.com
3880
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
3881
    name = 'absurdity-tumblr'
3882
    long_name = 'Tales of Absurdity (from Tumblr)'
3883
    url = 'http://talesofabsurdity.tumblr.com'
3884
    _categories = ('ABSURDITY', )
3885
3886
3887
class RobbieAndBobby(GenericTumblrV1):
3888
    """Class to retrieve Robbie And Bobby comics."""
3889
    # Also on http://robbieandbobby.com
3890
    name = 'robbie-tumblr'
3891
    long_name = 'Robbie And Bobby (from Tumblr)'
3892
    url = 'http://robbieandbobby.tumblr.com'
3893
3894
3895
class ElectricBunnyComicTumblr(GenericTumblrV1):
3896
    """Class to retrieve Electric Bunny Comics."""
3897
    # Also on http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell
3898
    name = 'bunny-tumblr'
3899
    long_name = 'Electric Bunny Comic (from Tumblr)'
3900
    url = 'http://electricbunnycomics.tumblr.com'
3901
3902
3903
class Hoomph(GenericTumblrV1):
3904
    """Class to retrieve Hoomph comics."""
3905
    name = 'hoomph'
3906
    long_name = 'Hoomph'
3907
    url = 'http://hoom.ph'
3908
3909
3910
class BFGFSTumblr(GenericTumblrV1):
3911
    """Class to retrieve BFGFS comics."""
3912
    # Also on https://tapastic.com/series/BFGFS
3913
    # Also on http://bfgfs.com
3914
    name = 'bfgfs-tumblr'
3915
    long_name = 'BFGFS (from Tumblr)'
3916
    url = 'http://bfgfs.tumblr.com'
3917
3918
3919
class DoodleForFood(GenericTumblrV1):
3920
    """Class to retrieve Doodle For Food comics."""
3921
    # Also on http://doodleforfood.com
3922
    name = 'doodle'
3923
    long_name = 'Doodle For Food'
3924
    url = 'http://doodleforfood.com'
3925
3926
3927
class CassandraCalinTumblr(GenericTumblrV1):
3928
    """Class to retrieve C. Cassandra comics."""
3929
    # Also on http://cassandracalin.com
3930
    # Also on https://tapastic.com/series/C-Cassandra-comics
3931
    name = 'cassandra-tumblr'
3932
    long_name = 'Cassandra Calin (from Tumblr)'
3933
    url = 'http://c-cassandra.tumblr.com'
3934
3935
3936
class DougWasTaken(GenericTumblrV1):
3937
    """Class to retrieve Doug Was Taken comics."""
3938
    name = 'doug'
3939
    long_name = 'Doug Was Taken'
3940
    url = 'http://dougwastaken.tumblr.com'
3941
3942
3943
class MandatoryRollerCoaster(GenericTumblrV1):
3944
    """Class to retrieve Mandatory Roller Coaster comics."""
3945
    name = 'rollercoaster'
3946
    long_name = 'Mandatory Roller Coaster'
3947
    url = 'http://mandatoryrollercoaster.com'
3948
3949
3950
class CEstPasEnRegardantSesPompes(GenericTumblrV1):
3951
    """Class to retrieve C'Est Pas En Regardant Ses Pompes (...)  comics."""
3952
    name = 'cperspqccltt'
3953
    long_name = 'C Est Pas En Regardant Ses Pompes (...)'
3954
    url = 'http://cperspqccltt.tumblr.com'
3955
3956
3957
class TheGrohlTroll(GenericTumblrV1):
3958
    """Class to retrieve The Grohl Troll comics."""
3959
    name = 'grohltroll'
3960
    long_name = 'The Grohl Troll'
3961
    url = 'http://thegrohltroll.com'
3962
3963
3964
class WebcomicName(GenericTumblrV1):
3965
    """Class to retrieve Webcomic Name comics."""
3966
    name = 'webcomicname'
3967
    long_name = 'Webcomic Name'
3968
    url = 'http://webcomicname.com'
3969
3970
3971
class BooksOfAdam(GenericTumblrV1):
3972
    """Class to retrieve Books of Adam comics."""
3973
    # Also on http://www.booksofadam.com
3974
    name = 'booksofadam'
3975
    long_name = 'Books of Adam'
3976
    url = 'http://booksofadam.tumblr.com'
3977
3978
3979
class HarkAVagrant(GenericTumblrV1):
3980
    """Class to retrieve Hark A Vagrant comics."""
3981
    # Also on http://www.harkavagrant.com
3982
    name = 'hark-tumblr'
3983
    long_name = 'Hark A Vagrant (from Tumblr)'
3984
    url = 'http://beatonna.tumblr.com'
3985
3986
3987
class OurSuperAdventureTumblr(GenericTumblrV1):
3988
    """Class to retrieve Our Super Adventure comics."""
3989
    # Also on https://tapastic.com/series/Our-Super-Adventure
3990
    # Also on http://www.oursuperadventure.com
3991
    # http://sarahgraley.com
3992
    name = 'superadventure-tumblr'
3993
    long_name = 'Our Super Adventure (from Tumblr)'
3994
    url = 'http://sarahssketchbook.tumblr.com'
3995
3996
3997
class JakeLikesOnions(GenericTumblrV1):
3998
    """Class to retrieve Jake Likes Onions comics."""
3999
    name = 'jake'
4000
    long_name = 'Jake Likes Onions'
4001
    url = 'http://jakelikesonions.com'
4002
4003
4004
class InYourFaceCake(GenericTumblrV1):
4005
    """Class to retrieve In Your Face Cake comics."""
4006
    name = 'inyourfacecake-tumblr'
4007
    long_name = 'In Your Face Cake (from Tumblr)'
4008
    url = 'http://in-your-face-cake.tumblr.com'
4009
4010
4011
class Robospunk(GenericTumblrV1):
4012
    """Class to retrieve Robospunk comics."""
4013
    name = 'robospunk'
4014
    long_name = 'Robospunk'
4015
    url = 'http://robospunk.com'
4016
4017
4018
class BananaTwinky(GenericTumblrV1):
4019
    """Class to retrieve Banana Twinky comics."""
4020
    name = 'banana'
4021
    long_name = 'Banana Twinky'
4022
    url = 'http://bananatwinky.tumblr.com'
4023
4024
4025
class YesterdaysPopcornTumblr(GenericTumblrV1):
4026
    """Class to retrieve Yesterday's Popcorn comics."""
4027
    # Also on http://www.yesterdayspopcorn.com
4028
    # Also on https://tapastic.com/series/Yesterdays-Popcorn
4029
    name = 'popcorn-tumblr'
4030
    long_name = 'Yesterday\'s Popcorn (from Tumblr)'
4031
    url = 'http://yesterdayspopcorn.tumblr.com'
4032
4033
4034
class TwistedDoodles(GenericTumblrV1):
4035
    """Class to retrieve Twisted Doodles comics."""
4036
    name = 'twisted'
4037
    long_name = 'Twisted Doodles'
4038
    url = 'http://www.twisteddoodles.com'
4039
4040
4041
class UbertoolTumblr(GenericTumblrV1):
4042
    """Class to retrieve Ubertool comics."""
4043
    # Also on http://ubertoolcomic.com
4044
    # Also on https://tapastic.com/series/ubertool
4045
    name = 'ubertool-tumblr'
4046
    long_name = 'Ubertool (from Tumblr)'
4047
    url = 'http://ubertool.tumblr.com'
4048
    _categories = ('UBERTOOL', )
4049
4050
4051
class LittleLifeLinesTumblr(GenericTumblrV1):
4052
    """Class to retrieve Little Life Lines comics."""
4053
    # Also on http://www.littlelifelines.com
4054
    name = 'life-tumblr'
4055
    long_name = 'Little Life Lines (from Tumblr)'
4056
    url = 'https://little-life-lines.tumblr.com'
4057
4058
4059
class TheyCanTalk(GenericTumblrV1):
4060
    """Class to retrieve They Can Talk comics."""
4061
    name = 'theycantalk'
4062
    long_name = 'They Can Talk'
4063
    url = 'http://theycantalk.com'
4064
4065
4066
class Will5NeverCome(GenericTumblrV1):
4067
    """Class to retrieve Will 5:00 Never Come comics."""
4068
    name = 'will5'
4069
    long_name = 'Will 5:00 Never Come ?'
4070
    url = 'http://will5nevercome.com'
4071
4072
4073
class Sephko(GenericTumblrV1):
4074
    """Class to retrieve Sephko Comics."""
4075
    # Also on http://www.sephko.com
4076
    name = 'sephko'
4077
    long_name = 'Sephko'
4078
    url = 'http://sephko.tumblr.com'
4079
4080
4081
class BlazersAtDawn(GenericTumblrV1):
4082
    """Class to retrieve Blazers At Dawn Comics."""
4083
    name = 'blazers'
4084
    long_name = 'Blazers At Dawn'
4085
    url = 'http://blazersatdawn.tumblr.com'
4086
4087
4088
class ArtByMoga(GenericEmptyComic, GenericTumblrV1):  # Deactivated because it downloads too many things
4089
    """Class to retrieve Art By Moga Comics."""
4090
    name = 'moga'
4091
    long_name = 'Art By Moga'
4092
    url = 'http://artbymoga.tumblr.com'
4093
4094
4095
class HorovitzComics(GenericListableComic):
4096
    """Generic class to handle the logic common to the different comics from Horovitz."""
4097
    url = 'http://www.horovitzcomics.com'
4098
    _categories = ('HOROVITZ', )
4099
    img_re = re.compile('.*comics/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
4100
    link_re = NotImplemented
4101
    get_url_from_archive_element = join_cls_url_to_href
4102
4103
    @classmethod
4104
    def get_comic_info(cls, soup, link):
4105
        """Get information about a particular comics."""
4106
        href = link['href']
4107
        num = int(cls.link_re.match(href).groups()[0])
4108
        title = link.string
4109
        imgs = soup.find_all('img', id='comic')
4110
        assert len(imgs) == 1
4111
        year, month, day = [int(s)
4112
                            for s in cls.img_re.match(imgs[0]['src']).groups()]
4113
        return {
4114
            'title': title,
4115
            'day': day,
4116
            'month': month,
4117
            'year': year,
4118
            'img': [i['src'] for i in imgs],
4119
            'num': num,
4120
        }
4121
4122
    @classmethod
4123
    def get_archive_elements(cls):
4124
        archive_url = 'http://www.horovitzcomics.com/comics/archive/'
4125
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.link_re))
4126
4127
4128
class HorovitzNew(HorovitzComics):
4129
    """Class to retrieve Horovitz new comics."""
4130
    name = 'horovitznew'
4131
    long_name = 'Horovitz New'
4132
    link_re = re.compile('^/comics/new/([0-9]+)$')
4133
4134
4135
class HorovitzClassic(HorovitzComics):
4136
    """Class to retrieve Horovitz classic comics."""
4137
    name = 'horovitzclassic'
4138
    long_name = 'Horovitz Classic'
4139
    link_re = re.compile('^/comics/classic/([0-9]+)$')
4140
4141
4142
class GenericGoComic(GenericNavigableComic):
4143
    """Generic class to handle the logic common to comics from gocomics.com."""
4144
    _categories = ('GOCOMIC', )
4145
    url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)$')
4146
4147
    @classmethod
4148
    def get_first_comic_link(cls):
4149
        """Get link to first comics."""
4150
        return get_soup_at_url(cls.url).find('a', class_='beginning')
4151
4152
    @classmethod
4153
    def get_navi_link(cls, last_soup, next_):
4154
        """Get link to next or previous comic."""
4155
        return last_soup.find('a', class_='next' if next_ else 'prev', href=cls.url_date_re)
4156
4157
    @classmethod
4158
    def get_url_from_link(cls, link):
4159
        gocomics = 'http://www.gocomics.com'
4160
        return urljoin_wrapper(gocomics, link['href'])
4161
4162
    @classmethod
4163
    def get_comic_info(cls, soup, link):
4164
        """Get information about a particular comics."""
4165
        url = cls.get_url_from_link(link)
4166
        year, month, day = [int(s)
4167
                            for s in cls.url_date_re.match(url).groups()]
4168
        return {
4169
            'day': day,
4170
            'month': month,
4171
            'year': year,
4172
            'img': [soup.find_all('img', class_='strip')[-1]['src']],
4173
            'author': soup.find('meta', attrs={'name': 'author'})['content']
4174
        }
4175
4176
4177
class PearlsBeforeSwine(GenericGoComic):
4178
    """Class to retrieve Pearls Before Swine comics."""
4179
    name = 'pearls'
4180
    long_name = 'Pearls Before Swine'
4181
    url = 'http://www.gocomics.com/pearlsbeforeswine'
4182
4183
4184
class Peanuts(GenericGoComic):
4185
    """Class to retrieve Peanuts comics."""
4186
    name = 'peanuts'
4187
    long_name = 'Peanuts'
4188
    url = 'http://www.gocomics.com/peanuts'
4189
4190
4191
class MattWuerker(GenericGoComic):
4192
    """Class to retrieve Matt Wuerker comics."""
4193
    name = 'wuerker'
4194
    long_name = 'Matt Wuerker'
4195
    url = 'http://www.gocomics.com/mattwuerker'
4196
4197
4198
class TomToles(GenericGoComic):
4199
    """Class to retrieve Tom Toles comics."""
4200
    name = 'toles'
4201
    long_name = 'Tom Toles'
4202
    url = 'http://www.gocomics.com/tomtoles'
4203
4204
4205
class BreakOfDay(GenericGoComic):
4206
    """Class to retrieve Break Of Day comics."""
4207
    name = 'breakofday'
4208
    long_name = 'Break Of Day'
4209
    url = 'http://www.gocomics.com/break-of-day'
4210
4211
4212
class Brevity(GenericGoComic):
4213
    """Class to retrieve Brevity comics."""
4214
    name = 'brevity'
4215
    long_name = 'Brevity'
4216
    url = 'http://www.gocomics.com/brevity'
4217
4218
4219
class MichaelRamirez(GenericGoComic):
4220
    """Class to retrieve Michael Ramirez comics."""
4221
    name = 'ramirez'
4222
    long_name = 'Michael Ramirez'
4223
    url = 'http://www.gocomics.com/michaelramirez'
4224
4225
4226
class MikeLuckovich(GenericGoComic):
4227
    """Class to retrieve Mike Luckovich comics."""
4228
    name = 'luckovich'
4229
    long_name = 'Mike Luckovich'
4230
    url = 'http://www.gocomics.com/mikeluckovich'
4231
4232
4233
class JimBenton(GenericGoComic):
4234
    """Class to retrieve Jim Benton comics."""
4235
    # Also on http://jimbenton.tumblr.com
4236
    name = 'benton'
4237
    long_name = 'Jim Benton'
4238
    url = 'http://www.gocomics.com/jim-benton-cartoons'
4239
4240
4241
class TheArgyleSweater(GenericGoComic):
4242
    """Class to retrieve the Argyle Sweater comics."""
4243
    name = 'argyle'
4244
    long_name = 'Argyle Sweater'
4245
    url = 'http://www.gocomics.com/theargylesweater'
4246
4247
4248
class SunnyStreet(GenericGoComic):
4249
    """Class to retrieve Sunny Street comics."""
4250
    # Also on http://www.sunnystreetcomics.com
4251
    name = 'sunny'
4252
    long_name = 'Sunny Street'
4253
    url = 'http://www.gocomics.com/sunny-street'
4254
4255
4256
class OffTheMark(GenericGoComic):
4257
    """Class to retrieve Off The Mark comics."""
4258
    # Also on https://www.offthemark.com
4259
    name = 'offthemark'
4260
    long_name = 'Off The Mark'
4261
    url = 'http://www.gocomics.com/offthemark'
4262
4263
4264
class WuMo(GenericGoComic):
4265
    """Class to retrieve WuMo comics."""
4266
    # Also on http://wumo.com
4267
    name = 'wumo'
4268
    long_name = 'WuMo'
4269
    url = 'http://www.gocomics.com/wumo'
4270
4271
4272
class LunarBaboon(GenericGoComic):
4273
    """Class to retrieve Lunar Baboon comics."""
4274
    # Also on http://www.lunarbaboon.com
4275
    # Also on https://tapastic.com/series/Lunarbaboon
4276
    name = 'lunarbaboon'
4277
    long_name = 'Lunar Baboon'
4278
    url = 'http://www.gocomics.com/lunarbaboon'
4279
4280
4281
class SandersenGocomic(GenericGoComic):
4282
    """Class to retrieve Sarah Andersen comics."""
4283
    # Also on http://sarahcandersen.com
4284
    # Also on http://tapastic.com/series/Doodle-Time
4285
    name = 'sandersen-goc'
4286
    long_name = 'Sarah Andersen (from GoComics)'
4287
    url = 'http://www.gocomics.com/sarahs-scribbles'
4288
4289
4290
class SaturdayMorningBreakfastCerealGoComic(GenericGoComic):
4291
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
4292
    # Also on http://smbc-comics.tumblr.com
4293
    # Also on http://www.smbc-comics.com
4294
    name = 'smbc-goc'
4295
    long_name = 'Saturday Morning Breakfast Cereal (from GoComics)'
4296
    url = 'http://www.gocomics.com/saturday-morning-breakfast-cereal'
4297
    _categories = ('SMBC', )
4298
4299
4300
class CalvinAndHobbesGoComic(GenericGoComic):
4301
    """Class to retrieve Calvin and Hobbes comics."""
4302
    # From gocomics, not http://marcel-oehler.marcellosendos.ch/comics/ch/
4303
    name = 'calvin-goc'
4304
    long_name = 'Calvin and Hobbes (from GoComics)'
4305
    url = 'http://www.gocomics.com/calvinandhobbes'
4306
4307
4308
class RallGoComic(GenericGoComic):
4309
    """Class to retrieve Ted Rall comics."""
4310
    # Also on http://rall.com/comic
4311
    name = 'rall-goc'
4312
    long_name = "Ted Rall (from GoComics)"
4313
    url = "http://www.gocomics.com/tedrall"
4314
    _categories = ('RALL', )
4315
4316
4317
class TheAwkwardYetiGoComic(GenericGoComic):
4318
    """Class to retrieve The Awkward Yeti comics."""
4319
    # Also on http://larstheyeti.tumblr.com
4320
    # Also on http://theawkwardyeti.com
4321
    # Also on https://tapastic.com/series/TheAwkwardYeti
4322
    name = 'yeti-goc'
4323
    long_name = 'The Awkward Yeti (from GoComics)'
4324
    url = 'http://www.gocomics.com/the-awkward-yeti'
4325
    _categories = ('YETI', )
4326
4327
4328
class BerkeleyMewsGoComics(GenericGoComic):
4329
    """Class to retrieve Berkeley Mews comics."""
4330
    # Also on http://mews.tumblr.com
4331
    # Also on http://www.berkeleymews.com
4332
    name = 'berkeley-goc'
4333
    long_name = 'Berkeley Mews (from GoComics)'
4334
    url = 'http://www.gocomics.com/berkeley-mews'
4335
    _categories = ('BERKELEY', )
4336
4337
4338
class SheldonGoComics(GenericGoComic):
4339
    """Class to retrieve Sheldon comics."""
4340
    # Also on http://www.sheldoncomics.com
4341
    name = 'sheldon-goc'
4342
    long_name = 'Sheldon Comics (from GoComics)'
4343
    url = 'http://www.gocomics.com/sheldon'
4344
4345
4346
class FowlLanguageGoComics(GenericGoComic):
4347
    """Class to retrieve Fowl Language comics."""
4348
    # Also on http://www.fowllanguagecomics.com
4349
    # Also on http://tapastic.com/series/Fowl-Language-Comics
4350
    # Also on http://fowllanguagecomics.tumblr.com
4351
    name = 'fowllanguage-goc'
4352
    long_name = 'Fowl Language Comics (from GoComics)'
4353
    url = 'http://www.gocomics.com/fowl-language'
4354
    _categories = ('FOWLLANGUAGE', )
4355
4356
4357
class NickAnderson(GenericGoComic):
4358
    """Class to retrieve Nick Anderson comics."""
4359
    name = 'nickanderson'
4360
    long_name = 'Nick Anderson'
4361
    url = 'http://www.gocomics.com/nickanderson'
4362
4363
4364
class GarfieldGoComics(GenericGoComic):
4365
    """Class to retrieve Garfield comics."""
4366
    # Also on http://garfield.com
4367
    name = 'garfield-goc'
4368
    long_name = 'Garfield (from GoComics)'
4369
    url = 'http://www.gocomics.com/garfield'
4370
    _categories = ('GARFIELD', )
4371
4372
4373
class DorrisMcGoComics(GenericGoComic):
4374
    """Class to retrieve Dorris Mc Comics"""
4375
    # Also on http://dorrismccomics.com
4376
    name = 'dorrismc-goc'
4377
    long_name = 'Dorris Mc (from GoComics)'
4378
    url = 'http://www.gocomics.com/dorris-mccomics'
4379
4380
4381
class FoxTrot(GenericGoComic):
4382
    """Class to retrieve FoxTrot comics."""
4383
    name = 'foxtrot'
4384
    long_name = 'FoxTrot'
4385
    url = 'http://www.gocomics.com/foxtrot'
4386
4387
4388
class FoxTrotClassics(GenericGoComic):
4389
    """Class to retrieve FoxTrot Classics comics."""
4390
    name = 'foxtrot-classics'
4391
    long_name = 'FoxTrot Classics'
4392
    url = 'http://www.gocomics.com/foxtrotclassics'
4393
4394
4395
class MisterAndMeGoComics(GenericGoComic):
4396
    """Class to retrieve Mister & Me Comics."""
4397
    # Also on http://www.mister-and-me.com
4398
    # Also on https://tapastic.com/series/Mister-and-Me
4399
    name = 'mister-goc'
4400
    long_name = 'Mister & Me (from GoComics)'
4401
    url = 'http://www.gocomics.com/mister-and-me'
4402
4403
4404
class NonSequitur(GenericGoComic):
4405
    """Class to retrieve Non Sequitur (Wiley Miller) comics."""
4406
    name = 'nonsequitur'
4407
    long_name = 'Non Sequitur'
4408
    url = 'http://www.gocomics.com/nonsequitur'
4409
4410
4411
class GenericTapasticComic(GenericListableComic):
4412
    """Generic class to handle the logic common to comics from tapastic.com."""
4413
    _categories = ('TAPASTIC', )
4414
4415
    @classmethod
4416
    def get_comic_info(cls, soup, archive_elt):
4417
        """Get information about a particular comics."""
4418
        timestamp = int(archive_elt['publishDate']) / 1000.0
4419
        day = datetime.datetime.fromtimestamp(timestamp).date()
4420
        imgs = soup.find_all('img', class_='art-image')
4421
        if not imgs:
4422
            print("Comic %s is being uploaded, retry later" % cls.get_url_from_archive_element(archive_elt))
4423
            return None
4424
        assert len(imgs) > 0
4425
        return {
4426
            'day': day.day,
4427
            'year': day.year,
4428
            'month': day.month,
4429
            'img': [i['src'] for i in imgs],
4430
            'title': archive_elt['title'],
4431
        }
4432
4433
    @classmethod
4434
    def get_url_from_archive_element(cls, archive_elt):
4435
        return 'http://tapastic.com/episode/' + str(archive_elt['id'])
4436
4437
    @classmethod
4438
    def get_archive_elements(cls):
4439
        pref, suff = 'episodeList : ', ','
4440
        # Information is stored in the javascript part
4441
        # I don't know the clean way to get it so this is the ugly way.
4442
        string = [s[len(pref):-len(suff)] for s in (s.decode('utf-8').strip() for s in urlopen_wrapper(cls.url).readlines()) if s.startswith(pref) and s.endswith(suff)][0]
4443
        return json.loads(string)
4444
4445
4446
class VegetablesForDessert(GenericTapasticComic):
4447
    """Class to retrieve Vegetables For Dessert comics."""
4448
    # Also on http://vegetablesfordessert.tumblr.com
4449
    name = 'vegetables'
4450
    long_name = 'Vegetables For Dessert'
4451
    url = 'http://tapastic.com/series/vegetablesfordessert'
4452
4453
4454
class FowlLanguageTapa(GenericTapasticComic):
4455
    """Class to retrieve Fowl Language comics."""
4456
    # Also on http://www.fowllanguagecomics.com
4457
    # Also on http://fowllanguagecomics.tumblr.com
4458
    # Also on http://www.gocomics.com/fowl-language
4459
    name = 'fowllanguage-tapa'
4460
    long_name = 'Fowl Language Comics (from Tapastic)'
4461
    url = 'http://tapastic.com/series/Fowl-Language-Comics'
4462
    _categories = ('FOWLLANGUAGE', )
4463
4464
4465
class OscillatingProfundities(GenericTapasticComic):
4466
    """Class to retrieve Oscillating Profundities comics."""
4467
    name = 'oscillating'
4468
    long_name = 'Oscillating Profundities'
4469
    url = 'http://tapastic.com/series/oscillatingprofundities'
4470
4471
4472
class ZnoflatsComics(GenericTapasticComic):
4473
    """Class to retrieve Znoflats comics."""
4474
    name = 'znoflats'
4475
    long_name = 'Znoflats Comics'
4476
    url = 'http://tapastic.com/series/Znoflats-Comics'
4477
4478
4479
class SandersenTapastic(GenericTapasticComic):
4480
    """Class to retrieve Sarah Andersen comics."""
4481
    # Also on http://sarahcandersen.com
4482
    # Also on http://www.gocomics.com/sarahs-scribbles
4483
    name = 'sandersen-tapa'
4484
    long_name = 'Sarah Andersen (from Tapastic)'
4485
    url = 'http://tapastic.com/series/Doodle-Time'
4486
4487
4488
class TubeyToonsTapastic(GenericTapasticComic):
4489
    """Class to retrieve TubeyToons comics."""
4490
    # Also on http://tubeytoons.com
4491
    # Also on http://tubeytoons.tumblr.com
4492
    name = 'tubeytoons-tapa'
4493
    long_name = 'Tubey Toons (from Tapastic)'
4494
    url = 'http://tapastic.com/series/Tubey-Toons'
4495
    _categories = ('TUNEYTOONS', )
4496
4497
4498
class AnythingComicTapastic(GenericTapasticComic):
4499
    """Class to retrieve Anything Comics."""
4500
    # Also on http://www.anythingcomic.com
4501
    name = 'anythingcomic-tapa'
4502
    long_name = 'Anything Comic (from Tapastic)'
4503
    url = 'http://tapastic.com/series/anything'
4504
4505
4506
class UnearthedComicsTapastic(GenericTapasticComic):
4507
    """Class to retrieve Unearthed comics."""
4508
    # Also on http://unearthedcomics.com
4509
    # Also on http://unearthedcomics.tumblr.com
4510
    name = 'unearthed-tapa'
4511
    long_name = 'Unearthed Comics (from Tapastic)'
4512
    url = 'http://tapastic.com/series/UnearthedComics'
4513
    _categories = ('UNEARTHED', )
4514
4515
4516
class EverythingsStupidTapastic(GenericTapasticComic):
4517
    """Class to retrieve Everything's stupid Comics."""
4518
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
4519
    # Also on http://everythingsstupid.net
4520
    name = 'stupid-tapa'
4521
    long_name = "Everything's Stupid (from Tapastic)"
4522
    url = 'http://tapastic.com/series/EverythingsStupid'
4523
4524
4525
class JustSayEhTapastic(GenericTapasticComic):
4526
    """Class to retrieve Just Say Eh comics."""
4527
    # Also on http://www.justsayeh.com
4528
    name = 'justsayeh-tapa'
4529
    long_name = 'Just Say Eh (from Tapastic)'
4530
    url = 'http://tapastic.com/series/Just-Say-Eh'
4531
4532
4533
class ThorsThundershackTapastic(GenericTapasticComic):
4534
    """Class to retrieve Thor's Thundershack comics."""
4535
    # Also on http://www.thorsthundershack.com
4536
    name = 'thor-tapa'
4537
    long_name = 'Thor\'s Thundershack (from Tapastic)'
4538
    url = 'http://tapastic.com/series/Thors-Thundershac'
4539
    _categories = ('THOR', )
4540
4541
4542
class OwlTurdTapastic(GenericTapasticComic):
4543
    """Class to retrieve Owl Turd comics."""
4544
    # Also on http://owlturd.com
4545
    name = 'owlturd-tapa'
4546
    long_name = 'Owl Turd (from Tapastic)'
4547
    url = 'http://tapastic.com/series/Owl-Turd-Comix'
4548
    _categories = ('OWLTURD', )
4549
4550
4551
class GoneIntoRaptureTapastic(GenericTapasticComic):
4552
    """Class to retrieve Gone Into Rapture comics."""
4553
    # Also on http://goneintorapture.tumblr.com
4554
    # Also on http://www.goneintorapture.com
4555
    name = 'rapture-tapa'
4556
    long_name = 'Gone Into Rapture (from Tapastic)'
4557
    url = 'http://tapastic.com/series/Goneintorapture'
4558
4559
4560
class HeckIfIKnowComicsTapa(GenericTapasticComic):
4561
    """Class to retrieve Heck If I Know Comics."""
4562
    # Also on http://heckifiknowcomics.com
4563
    name = 'heck-tapa'
4564
    long_name = 'Heck if I Know comics (from Tapastic)'
4565
    url = 'http://tapastic.com/series/Regular'
4566
4567
4568
class CheerUpEmoKidTapa(GenericTapasticComic):
4569
    """Class to retrieve CheerUpEmoKid comics."""
4570
    # Also on http://www.cheerupemokid.com
4571
    # Also on http://enzocomics.tumblr.com
4572
    name = 'cuek-tapa'
4573
    long_name = 'Cheer Up Emo Kid (from Tapastic)'
4574
    url = 'http://tapastic.com/series/CUEK'
4575
4576
4577
class BigFootJusticeTapa(GenericTapasticComic):
4578
    """Class to retrieve Big Foot Justice comics."""
4579
    # Also on http://bigfootjustice.com
4580
    name = 'bigfoot-tapa'
4581
    long_name = 'Big Foot Justice (from Tapastic)'
4582
    url = 'http://tapastic.com/series/bigfoot-justice'
4583
4584
4585
class UpAndOutTapa(GenericTapasticComic):
4586
    """Class to retrieve Up & Out comics."""
4587
    # Also on http://upandoutcomic.tumblr.com
4588
    name = 'upandout-tapa'
4589
    long_name = 'Up And Out (from Tapastic)'
4590
    url = 'http://tapastic.com/series/UP-and-OUT'
4591
4592
4593
class ToonHoleTapa(GenericTapasticComic):
4594
    """Class to retrieve Toon Holes comics."""
4595
    # Also on http://www.toonhole.com
4596
    name = 'toonhole-tapa'
4597
    long_name = 'Toon Hole (from Tapastic)'
4598
    url = 'http://tapastic.com/series/TOONHOLE'
4599
4600
4601
class AngryAtNothingTapa(GenericTapasticComic):
4602
    """Class to retrieve Angry at Nothing comics."""
4603
    # Also on http://www.angryatnothing.net
4604
    name = 'angry-tapa'
4605
    long_name = 'Angry At Nothing (from Tapastic)'
4606
    url = 'http://tapastic.com/series/Comics-yeah-definitely-comics-'
4607
4608
4609
class LeleozTapa(GenericTapasticComic):
4610
    """Class to retrieve Leleoz comics."""
4611
    # Also on http://leleozcomics.tumblr.com
4612
    name = 'leleoz-tapa'
4613
    long_name = 'Leleoz (from Tapastic)'
4614
    url = 'https://tapastic.com/series/Leleoz'
4615
4616
4617
class TheAwkwardYetiTapa(GenericTapasticComic):
4618
    """Class to retrieve The Awkward Yeti comics."""
4619
    # Also on http://www.gocomics.com/the-awkward-yeti
4620
    # Also on http://theawkwardyeti.com
4621
    # Also on http://larstheyeti.tumblr.com
4622
    name = 'yeti-tapa'
4623
    long_name = 'The Awkward Yeti (from Tapastic)'
4624
    url = 'https://tapastic.com/series/TheAwkwardYeti'
4625
    _categories = ('YETI', )
4626
4627
4628
class AsPerUsualTapa(GenericTapasticComic):
4629
    """Class to retrieve As Per Usual comics."""
4630
    # Also on http://as-per-usual.tumblr.com
4631
    name = 'usual-tapa'
4632
    long_name = 'As Per Usual (from Tapastic)'
4633
    url = 'https://tapastic.com/series/AsPerUsual'
4634
    categories = ('DAMILEE', )
4635
4636
4637
class HotComicsForCoolPeopleTapa(GenericTapasticComic):
4638
    """Class to retrieve Hot Comics For Cool People."""
4639
    # Also on http://hotcomicsforcoolpeople.tumblr.com
4640
    # Also on http://hotcomics.biz (links to tumblr)
4641
    # Also on http://hcfcp.com (links to tumblr)
4642
    name = 'hotcomics-tapa'
4643
    long_name = 'Hot Comics For Cool People (from Tapastic)'
4644
    url = 'https://tapastic.com/series/Hot-Comics-For-Cool-People'
4645
    categories = ('DAMILEE', )
4646
4647
4648
class OneOneOneOneComicTapa(GenericTapasticComic):
4649
    """Class to retrieve 1111 Comics."""
4650
    # Also on http://www.1111comics.me
4651
    # Also on http://comics1111.tumblr.com
4652
    name = '1111-tapa'
4653
    long_name = '1111 Comics (from Tapastic)'
4654
    url = 'https://tapastic.com/series/1111-Comics'
4655
    _categories = ('ONEONEONEONE', )
4656
4657
4658
class TumbleDryTapa(GenericTapasticComic):
4659
    """Class to retrieve Tumble Dry comics."""
4660
    # Also on http://tumbledrycomics.com
4661
    name = 'tumbledry-tapa'
4662
    long_name = 'Tumblr Dry (from Tapastic)'
4663
    url = 'https://tapastic.com/series/TumbleDryComics'
4664
4665
4666
class DeadlyPanelTapa(GenericTapasticComic):
4667
    """Class to retrieve Deadly Panel comics."""
4668
    # Also on http://www.deadlypanel.com
4669
    name = 'deadly-tapa'
4670
    long_name = 'Deadly Panel (from Tapastic)'
4671
    url = 'https://tapastic.com/series/deadlypanel'
4672
4673
4674
class ChrisHallbeckMaxiTapa(GenericTapasticComic):
4675
    """Class to retrieve Chris Hallbeck comics."""
4676
    # Also on http://chrishallbeck.tumblr.com
4677
    # Also on http://maximumble.com
4678
    name = 'hallbeckmaxi-tapa'
4679
    long_name = 'Chris Hallback - Maximumble (from Tapastic)'
4680
    url = 'https://tapastic.com/series/Maximumble'
4681
    _categories = ('HALLBACK', )
4682
4683
4684
class ChrisHallbeckMiniTapa(GenericTapasticComic):
4685
    """Class to retrieve Chris Hallbeck comics."""
4686
    # Also on http://chrishallbeck.tumblr.com
4687
    # Also on http://minimumble.com
4688
    name = 'hallbeckmini-tapa'
4689
    long_name = 'Chris Hallback - Minimumble (from Tapastic)'
4690
    url = 'https://tapastic.com/series/Minimumble'
4691
    _categories = ('HALLBACK', )
4692
4693
4694
class ChrisHallbeckBiffTapa(GenericTapasticComic):
4695
    """Class to retrieve Chris Hallbeck comics."""
4696
    # Also on http://chrishallbeck.tumblr.com
4697
    # Also on http://thebookofbiff.com
4698
    name = 'hallbeckbiff-tapa'
4699
    long_name = 'Chris Hallback - The Book of Biff (from Tapastic)'
4700
    url = 'https://tapastic.com/series/Biff'
4701
    _categories = ('HALLBACK', )
4702
4703
4704
class RandoWisTapa(GenericTapasticComic):
4705
    """Class to retrieve RandoWis comics."""
4706
    # Also on https://randowis.com
4707
    name = 'randowis-tapa'
4708
    long_name = 'RandoWis (from Tapastic)'
4709
    url = 'https://tapastic.com/series/RandoWis'
4710
4711
4712
class PigeonGazetteTapa(GenericTapasticComic):
4713
    """Class to retrieve The Pigeon Gazette comics."""
4714
    # Also on http://thepigeongazette.tumblr.com
4715
    name = 'pigeon-tapa'
4716
    long_name = 'The Pigeon Gazette (from Tapastic)'
4717
    url = 'https://tapastic.com/series/The-Pigeon-Gazette'
4718
4719
4720
class TheOdd1sOutTapa(GenericTapasticComic):
4721
    """Class to retrieve The Odd 1s Out comics."""
4722
    # Also on http://theodd1sout.com
4723
    # Also on http://theodd1sout.tumblr.com
4724
    name = 'theodd-tapa'
4725
    long_name = 'The Odd 1s Out (from Tapastic)'
4726
    url = 'https://tapastic.com/series/Theodd1sout'
4727
4728
4729
class TheWorldIsFlatTapa(GenericTapasticComic):
4730
    """Class to retrieve The World Is Flat Comics."""
4731
    # Also on http://theworldisflatcomics.tumblr.com
4732
    name = 'flatworld-tapa'
4733
    long_name = 'The World Is Flat (from Tapastic)'
4734
    url = 'https://tapastic.com/series/The-World-is-Flat'
4735
4736
4737
class MisterAndMeTapa(GenericTapasticComic):
4738
    """Class to retrieve Mister & Me Comics."""
4739
    # Also on http://www.mister-and-me.com
4740
    # Also on http://www.gocomics.com/mister-and-me
4741
    name = 'mister-tapa'
4742
    long_name = 'Mister & Me (from Tapastic)'
4743
    url = 'https://tapastic.com/series/Mister-and-Me'
4744
4745
4746
class TalesOfAbsurdityTapa(GenericTapasticComic):
4747
    """Class to retrieve Tales Of Absurdity comics."""
4748
    # Also on http://talesofabsurdity.com
4749
    # Also on http://talesofabsurdity.tumblr.com
4750
    name = 'absurdity-tapa'
4751
    long_name = 'Tales of Absurdity (from Tapastic)'
4752
    url = 'http://tapastic.com/series/Tales-Of-Absurdity'
4753
    _categories = ('ABSURDITY', )
4754
4755
4756
class BFGFSTapa(GenericTapasticComic):
4757
    """Class to retrieve BFGFS comics."""
4758
    # Also on http://bfgfs.com
4759
    # Also on http://bfgfs.tumblr.com
4760
    name = 'bfgfs-tapa'
4761
    long_name = 'BFGFS (from Tapastic)'
4762
    url = 'https://tapastic.com/series/BFGFS'
4763
4764
4765
class DoodleForFoodTapa(GenericTapasticComic):
4766
    """Class to retrieve Doodle For Food comics."""
4767
    # Also on http://doodleforfood.com
4768
    name = 'doodle-tapa'
4769
    long_name = 'Doodle For Food (from Tapastic)'
4770
    url = 'https://tapastic.com/series/Doodle-for-Food'
4771
4772
4773
class MrLovensteinTapa(GenericTapasticComic):
4774
    """Class to retrieve Mr Lovenstein comics."""
4775
    # Also on  https://tapastic.com/series/MrLovenstein
4776
    name = 'mrlovenstein-tapa'
4777
    long_name = 'Mr. Lovenstein (from Tapastic)'
4778
    url = 'https://tapastic.com/series/MrLovenstein'
4779
4780
4781
class CassandraCalinTapa(GenericTapasticComic):
4782
    """Class to retrieve C. Cassandra comics."""
4783
    # Also on http://cassandracalin.com
4784
    # Also on http://c-cassandra.tumblr.com
4785
    name = 'cassandra-tapa'
4786
    long_name = 'Cassandra Calin (from Tapastic)'
4787
    url = 'https://tapastic.com/series/C-Cassandra-comics'
4788
4789
4790
class WafflesAndPancakes(GenericTapasticComic):
4791
    """Class to retrieve Waffles And Pancakes comics."""
4792
    # Also on http://wandpcomic.com
4793
    name = 'waffles'
4794
    long_name = 'Waffles And Pancakes'
4795
    url = 'https://tapastic.com/series/Waffles-and-Pancakes'
4796
4797
4798
class YesterdaysPopcornTapastic(GenericTapasticComic):
4799
    """Class to retrieve Yesterday's Popcorn comics."""
4800
    # Also on http://www.yesterdayspopcorn.com
4801
    # Also on http://yesterdayspopcorn.tumblr.com
4802
    name = 'popcorn-tapa'
4803
    long_name = 'Yesterday\'s Popcorn (from Tapastic)'
4804
    url = 'https://tapastic.com/series/Yesterdays-Popcorn'
4805
4806
4807
class OurSuperAdventureTapastic(GenericTapasticComic):
4808
    """Class to retrieve Our Super Adventure comics."""
4809
    # Also on http://www.oursuperadventure.com
4810
    # http://sarahssketchbook.tumblr.com
4811
    # http://sarahgraley.com
4812
    name = 'superadventure-tapastic'
4813
    long_name = 'Our Super Adventure (from Tapastic)'
4814
    url = 'https://tapastic.com/series/Our-Super-Adventure'
4815
4816
4817
class NamelessPCs(GenericTapasticComic):
4818
    """Class to retrieve Nameless PCs comics."""
4819
    # Also on http://namelesspcs.com
4820
    name = 'namelesspcs-tapa'
4821
    long_name = 'NamelessPCs (from Tapastic)'
4822
    url = 'https://tapastic.com/series/NamelessPC'
4823
4824
4825
class UbertoolTapa(GenericTapasticComic):
4826
    """Class to retrieve Ubertool comics."""
4827
    # Also on http://ubertoolcomic.com
4828
    # Also on http://ubertool.tumblr.com
4829
    name = 'ubertool-tapa'
4830
    long_name = 'Ubertool (from Tapastic)'
4831
    url = 'https://tapastic.com/series/ubertool'
4832
    _categories = ('UBERTOOL', )
4833
4834
4835
class SmallBlueYonderTapa(GenericTapasticComic):
4836
    """Class to retrieve Small Blue Yonder comics."""
4837
    # Also on http://www.smallblueyonder.com
4838
    name = 'smallblue-tapa'
4839
    long_name = 'Small Blue Yonder (from Tapastic)'
4840
    url = 'https://tapastic.com/series/Small-Blue-Yonder'
4841
4842
4843
def get_subclasses(klass):
4844
    """Gets the list of direct/indirect subclasses of a class"""
4845
    subclasses = klass.__subclasses__()
4846
    for derived in list(subclasses):
4847
        subclasses.extend(get_subclasses(derived))
4848
    return subclasses
4849
4850
4851
def remove_st_nd_rd_th_from_date(string):
4852
    """Function to transform 1st/2nd/3rd/4th in a parsable date format."""
4853
    # Hackish way to convert string with numeral "1st"/"2nd"/etc to date
4854
    return (string.replace('st', '')
4855
            .replace('nd', '')
4856
            .replace('rd', '')
4857
            .replace('th', '')
4858
            .replace('Augu', 'August'))
4859
4860
4861
def string_to_date(string, date_format, local=DEFAULT_LOCAL):
4862
    """Function to convert string to date object.
4863
    Wrapper around datetime.datetime.strptime."""
4864
    # format described in https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior
4865
    prev_locale = locale.setlocale(locale.LC_ALL)
4866
    if local != prev_locale:
4867
        locale.setlocale(locale.LC_ALL, local)
4868
    ret = datetime.datetime.strptime(string, date_format).date()
4869
    if local != prev_locale:
4870
        locale.setlocale(locale.LC_ALL, prev_locale)
4871
    return ret
4872
4873
4874
COMICS = set(get_subclasses(GenericComic))
4875
VALID_COMICS = [c for c in COMICS if c.name is not None]
4876
COMIC_NAMES = {c.name: c for c in VALID_COMICS}
4877
assert len(VALID_COMICS) == len(COMIC_NAMES)
4878
CLASS_NAMES = {c.__name__ for c in VALID_COMICS}
4879
assert len(VALID_COMICS) == len(CLASS_NAMES)
4880