Completed
Push — master ( 3e6c11...f98615 )
by De
01:02
created

comics.py (7 issues)

1
#! /usr/bin/python3
2
# vim: set expandtab tabstop=4 shiftwidth=4 :
3
"""Module to retrieve webcomics"""
4
5
from comic_abstract import GenericComic, get_date_for_comic
6
import re
7
from datetime import date, timedelta
8
import datetime
9
from urlfunctions import get_soup_at_url, urljoin_wrapper,\
10
    convert_iri_to_plain_ascii_uri, load_json_at_url, urlopen_wrapper
11
import json
12
import locale
13
import urllib
14
15
DEFAULT_LOCAL = 'en_GB.UTF-8'
16
17
18
class Xkcd(GenericComic):
19
    """Class to retrieve Xkcd comics."""
20
    name = 'xkcd'
21
    long_name = 'xkcd'
22
    url = 'http://xkcd.com'
23
24
    @classmethod
25
    def get_next_comic(cls, last_comic):
26
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
27
        first_num = last_comic['num'] if last_comic else 0
28
        last_num = load_json_at_url(
29
            urljoin_wrapper(cls.url, 'info.0.json'))['num']
30
31
        for num in range(first_num + 1, last_num + 1):
32
            if num != 404:
33
                json_url = urljoin_wrapper(cls.url, '%d/info.0.json' % num)
34
                comic = load_json_at_url(json_url)
35
                comic['img'] = [comic['img']]
36
                comic['prefix'] = '%d-' % num
37
                comic['json_url'] = json_url
38
                comic['url'] = urljoin_wrapper(cls.url, str(num))
39
                comic['day'] = int(comic['day'])
40
                comic['month'] = int(comic['month'])
41
                comic['year'] = int(comic['year'])
42
                assert comic['num'] == num
43
                yield comic
44
45
46
# Helper functions corresponding to get_url_from_link/get_url_from_archive_element
47
48
49
@classmethod
50
def get_href(cls, link):
51
    """Implementation of get_url_from_link/get_url_from_archive_element."""
52
    return link['href']
53
54
55
@classmethod
56
def join_cls_url_to_href(cls, link):
57
    """Implementation of get_url_from_link/get_url_from_archive_element."""
58
    return urljoin_wrapper(cls.url, link['href'])
59
60
61
class GenericNavigableComic(GenericComic):
62
    """Generic class for "navigable" comics : with first/next arrows.
63
64
    This class applies to comic where previous and next comics can be
65
    accessed from a given comic. Once given a starting point (either
66
    the first comic or the last comic retrieved), it will handle the
67
    navigation, the retrieval of the soup object and the setting of
68
    the 'url' attribute on retrieved comics. This limits a lot the
69
    amount of boilerplate code in the different implementation classes.
70
71
    The method `get_next_comic` methods is implemented in terms of new
72
    more specialized methods to be implemented/overridden:
73
        - get_first_comic_link
74
        - get_navi_link
75
        - get_comic_info
76
        - get_url_from_link
77
    """
78
    _categories = ('NAVIGABLE', )
79
80
    @classmethod
81
    def get_first_comic_link(cls):
82
        """Get link to first comics.
83
84
        Sometimes this can be retrieved of any comic page, sometimes on
85
        the archive page, sometimes it doesn't exist at all and one has
86
        to iterate backward to find it before hardcoding the result found.
87
        """
88
        raise NotImplementedError
89
90
    @classmethod
91
    def get_navi_link(cls, last_soup, next_):
92
        """Get link to next (or previous - for dev purposes) comic."""
93
        raise NotImplementedError
94
95
    @classmethod
96
    def get_comic_info(cls, soup, link):
97
        """Get information about a particular comics."""
98
        raise NotImplementedError
99
100
    @classmethod
101
    def get_url_from_link(cls, link):
102
        """Get url corresponding to a link. Default implementation is similar to get_href."""
103
        return link['href']
104
105
    @classmethod
106
    def get_next_link(cls, last_soup):
107
        """Get link to next comic."""
108
        link = cls.get_navi_link(last_soup, True)
109
        cls.log("Next link is %s" % link)
110
        return link
111
112
    @classmethod
113
    def get_prev_link(cls, last_soup):
114
        """Get link to previous comic."""
115
        link = cls.get_navi_link(last_soup, False)
116
        cls.log("Prev link is %s" % link)
117
        return link
118
119
    @classmethod
120
    def get_next_comic(cls, last_comic):
121
        """Generic implementation of get_next_comic for navigable comics."""
122
        url = last_comic['url'] if last_comic else None
123
        cls.log("starting 'get_next_comic' from %s" % url)
124
        next_comic = \
125
            cls.get_next_link(get_soup_at_url(url)) \
126
            if url else \
127
            cls.get_first_comic_link()
128
        cls.log("next/first comic will be %s (url is %s)" % (str(next_comic), url))
129
        # cls.check_navigation(url)
130
        while next_comic:
131
            prev_url, url = url, cls.get_url_from_link(next_comic)
132
            if prev_url == url:
133
                cls.log("got same url %s" % url)
134
                break
135
            cls.log("about to get %s (%s)" % (url, str(next_comic)))
136
            soup = get_soup_at_url(url)
137
            comic = cls.get_comic_info(soup, next_comic)
138
            if comic is not None:
139
                assert 'url' not in comic
140
                comic['url'] = url
141
                yield comic
142
            next_comic = cls.get_next_link(soup)
143
            cls.log("next comic will be %s" % str(next_comic))
144
145
    @classmethod
146
    def check_first_link(cls):
147
        """Check that navigation to first comic seems to be working - for dev purposes."""
148
        cls.log("about to check first link")
149
        ok = True
150
        firstlink = cls.get_first_comic_link()
151
        if firstlink is None:
152
            print("From %s : no first link" % cls.url)
153
            ok = False
154
        else:
155
            firsturl = cls.get_url_from_link(firstlink)
156
            try:
157
                get_soup_at_url(firsturl)
158
            except urllib.error.HTTPError:
159
                print("From %s : invalid first url" % cls.url)
160
                ok = False
161
        cls.log("checked first link -> returned %d" % ok)
162
        return ok
163
164
    @classmethod
165
    def check_prev_next_links(cls, url):
166
        """Check that navigation to prev/next from a given URL seems to be working - for dev purposes."""
167
        cls.log("about to check prev/next from %s" % url)
168
        ok = True
169
        if url is None:
170
            prevlink, nextlink = None, None
171
        else:
172
            soup = get_soup_at_url(url)
173
            prevlink, nextlink = cls.get_prev_link(soup), cls.get_next_link(soup)
174
        if prevlink is None and nextlink is None:
175
            print("From %s : no previous nor next" % url)
176
            ok = False
177
        else:
178
            if prevlink:
179
                prevurl = cls.get_url_from_link(prevlink)
180
                prevsoup = get_soup_at_url(prevurl)
181
                prevnextlink = cls.get_next_link(prevsoup)
182
                prevnext = cls.get_url_from_link(prevnextlink) if prevnextlink is not None else "NO URL"
183
                if prevnext != url:
184
                    print("From %s, going backward then forward leads to %s" % (url, prevnext))
185
                    ok = False
186
            if nextlink:
187
                nexturl = cls.get_url_from_link(nextlink)
188
                if nexturl != url:
189
                    nextsoup = get_soup_at_url(nexturl)
190
                    nextprevlink = cls.get_prev_link(nextsoup)
191
                    nextprev = cls.get_url_from_link(nextprevlink) if nextprevlink is not None else "NO URL"
192
                    if nextprev != url:
193
                        print("From %s, going forward then backward leads to %s" % (url, nextprev))
194
                        ok = False
195
        cls.log("checked prev/next from %s -> returned %d" % (url, ok))
196
        return ok
197
198
    @classmethod
199
    def check_navigation(cls, url):
200
        """Check that navigation functions seem to be working - for dev purposes."""
201
        cls.log("about to check navigation from %s" % url)
202
        first = cls.check_first_link()
203
        prevnext = cls.check_prev_next_links(url)
204
        ok = first and prevnext
205
        cls.log("checked navigation from %s -> returned %d" % (url, ok))
206
        return ok
207
208
209
class GenericListableComic(GenericComic):
210
    """Generic class for "listable" comics : with a list of comics (aka 'archive')
211
212
    The method `get_next_comic` methods is implemented in terms of new
213
    more specialized methods to be implemented/overridden:
214
        - get_archive_elements
215
        - get_url_from_archive_element
216
        - get_comic_info
217
    """
218
    _categories = ('LISTABLE', )
219
220
    @classmethod
221
    def get_archive_elements(cls):
222
        """Get the archive elements (iterable)."""
223
        raise NotImplementedError
224
225
    @classmethod
226
    def get_url_from_archive_element(cls, archive_elt):
227
        """Get url corresponding to an archive element."""
228
        raise NotImplementedError
229
230
    @classmethod
231
    def get_comic_info(cls, soup, archive_elt):
232
        """Get information about a particular comics."""
233
        raise NotImplementedError
234
235
    @classmethod
236
    def get_next_comic(cls, last_comic):
237
        """Generic implementation of get_next_comic for listable comics."""
238
        waiting_for_url = last_comic['url'] if last_comic else None
239
        archive_elts = list(cls.get_archive_elements())
240
        for archive_elt in archive_elts:
241
            url = cls.get_url_from_archive_element(archive_elt)
242
            cls.log("considering %s" % url)
243
            if waiting_for_url is None:
244
                cls.log("about to get %s (%s)" % (url, str(archive_elt)))
245
                soup = get_soup_at_url(url)
246
                comic = cls.get_comic_info(soup, archive_elt)
247
                if comic is not None:
248
                    assert 'url' not in comic
249
                    comic['url'] = url
250
                    yield comic
251
            elif waiting_for_url == url:
252
                waiting_for_url = None
253
        if waiting_for_url is not None:
254
            print("Did not find %s in the %d comics: there might be a problem" %
255
                  (waiting_for_url, len(archive_elts)))
256
257
# Helper functions corresponding to get_first_comic_link/get_navi_link
258
259
260
@classmethod
261
def get_link_rel_next(cls, last_soup, next_):
262
    """Implementation of get_navi_link."""
263
    return last_soup.find('link', rel='next' if next_ else 'prev')
264
265
266
@classmethod
267
def get_a_rel_next(cls, last_soup, next_):
268
    """Implementation of get_navi_link."""
269
    return last_soup.find('a', rel='next' if next_ else 'prev')
270
271
272
@classmethod
273
def get_a_navi_navinext(cls, last_soup, next_):
274
    """Implementation of get_navi_link."""
275
    return last_soup.find('a', class_='navi navi-next' if next_ else 'navi navi-prev')
276
277
278
@classmethod
279
def get_a_navi_comicnavnext_navinext(cls, last_soup, next_):
280
    """Implementation of get_navi_link."""
281
    return last_soup.find('a', class_='navi comic-nav-next navi-next' if next_ else 'navi comic-nav-previous navi-prev')
282
283
284
@classmethod
285
def get_a_comicnavbase_comicnavnext(cls, last_soup, next_):
286
    """Implementation of get_navi_link."""
287
    return last_soup.find('a', class_='comic-nav-base comic-nav-next' if next_ else 'comic-nav-base comic-nav-previous')
288
289
290
@classmethod
291
def get_a_navi_navifirst(cls):
292
    """Implementation of get_first_comic_link."""
293
    return get_soup_at_url(cls.url).find('a', class_='navi navi-first')
294
295
296
@classmethod
297
def get_div_navfirst_a(cls):
298
    """Implementation of get_first_comic_link."""
299
    return get_soup_at_url(cls.url).find('div', class_="nav-first").find('a')
300
301
302
@classmethod
303
def get_a_comicnavbase_comicnavfirst(cls):
304
    """Implementation of get_first_comic_link."""
305
    return get_soup_at_url(cls.url).find('a', class_='comic-nav-base comic-nav-first')
306
307
308
@classmethod
309
def simulate_first_link(cls):
310
    """Implementation of get_first_comic_link creating a link-like object from
311
    an URL provided by the class.
312
313
    Note: The first URL can easily be found using :
314
    `get_first_comic_link = navigate_to_first_comic`.
315
    """
316
    return {'href': cls.first_url}
317
318
319
@classmethod
320
def navigate_to_first_comic(cls):
321
    """Implementation of get_first_comic_link navigating from a user provided
322
    URL to the first comic.
323
324
    Sometimes, the first comic cannot be reached directly so to start
325
    from the first comic one has to go to the previous comic until
326
    there is no previous comics. Once this URL is reached, it
327
    is better to hardcode it but for development purposes, it
328
    is convenient to have an automatic way to find it.
329
330
    Then, the URL found can easily be used via `simulate_first_link`.
331
    """
332
    url = input("Get starting URL: ")
333
    print(url)
334
    comic = cls.get_prev_link(get_soup_at_url(url))
335
    while comic:
336
        url = cls.get_url_from_link(comic)
337
        print(url)
338
        comic = cls.get_prev_link(get_soup_at_url(url))
339
    return {'href': url}
340
341
342
class GenericEmptyComic(GenericComic):
343
    """Generic class for comics where nothing is to be done.
344
345
    It can be useful to deactivate temporarily comics that do not work
346
    properly by replacing `def MyComic(GenericWhateverComic)` with
347
    `def MyComic(GenericEmptyComic, GenericWhateverComic)`."""
348
    _categories = ('EMPTY', )
349
350
    @classmethod
351
    def get_next_comic(cls, last_comic):
352
        """Implementation of get_next_comic returning no comics."""
353
        cls.log("comic is considered as empty - returning no comic")
354
        return []
355
356
357 View Code Duplication
class ExtraFabulousComics(GenericNavigableComic):
358
    """Class to retrieve Extra Fabulous Comics."""
359
    name = 'efc'
360
    long_name = 'Extra Fabulous Comics'
361
    url = 'http://extrafabulouscomics.com'
362
    get_first_comic_link = get_a_navi_navifirst
363
    get_navi_link = get_link_rel_next
364
365
    @classmethod
366
    def get_comic_info(cls, soup, link):
367
        """Get information about a particular comics."""
368
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
369
        imgs = soup.find_all('img', src=img_src_re)
370
        title = soup.find('meta', property='og:title')['content']
371
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
372
        day = string_to_date(date_str, "%Y-%m-%d")
373
        return {
374
            'title': title,
375
            'img': [i['src'] for i in imgs],
376
            'month': day.month,
377
            'year': day.year,
378
            'day': day.day,
379
            'prefix': title + '-'
380
        }
381
382
383 View Code Duplication
class GenericLeMondeBlog(GenericNavigableComic):
384
    """Generic class to retrieve comics from Le Monde blogs."""
385
    _categories = ('LEMONDE', 'FRANCAIS')
386
    get_navi_link = get_link_rel_next
387
    get_first_comic_link = simulate_first_link
388
    first_url = NotImplemented
389
390
    @classmethod
391
    def get_comic_info(cls, soup, link):
392
        """Get information about a particular comics."""
393
        url2 = soup.find('link', rel='shortlink')['href']
394
        title = soup.find('meta', property='og:title')['content']
395
        date_str = soup.find("span", class_="entry-date").string
396
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
397
        imgs = soup.find_all('meta', property='og:image')
398
        return {
399
            'title': title,
400
            'url2': url2,
401
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
402
            'month': day.month,
403
            'year': day.year,
404
            'day': day.day,
405
        }
406
407
408
class ZepWorld(GenericLeMondeBlog):
409
    """Class to retrieve Zep World comics."""
410
    name = "zep"
411
    long_name = "Zep World"
412
    url = "http://zepworld.blog.lemonde.fr"
413
    first_url = "http://zepworld.blog.lemonde.fr/2014/10/31/bientot-le-blog-de-zep/"
414
415
416
class Vidberg(GenericLeMondeBlog):
417
    """Class to retrieve Vidberg comics."""
418
    name = 'vidberg'
419
    long_name = "Vidberg - l'actu en patates"
420
    url = "http://vidberg.blog.lemonde.fr"
421
    # Not the first but I didn't find an efficient way to retrieve it
422
    first_url = "http://vidberg.blog.lemonde.fr/2012/02/09/revue-de-campagne-la-campagne-du-modem-semballe/"
423
424
425
class Plantu(GenericLeMondeBlog):
426
    """Class to retrieve Plantu comics."""
427
    name = 'plantu'
428
    long_name = "Plantu"
429
    url = "http://plantu.blog.lemonde.fr"
430
    first_url = "http://plantu.blog.lemonde.fr/2014/10/28/stress-test-a-bruxelles/"
431
432
433
class XavierGorce(GenericLeMondeBlog):
434
    """Class to retrieve Xavier Gorce comics."""
435
    name = 'gorce'
436
    long_name = "Xavier Gorce"
437
    url = "http://xaviergorce.blog.lemonde.fr"
438
    first_url = "http://xaviergorce.blog.lemonde.fr/2015/01/09/distinction/"
439
440
441
class CartooningForPeace(GenericLeMondeBlog):
442
    """Class to retrieve Cartooning For Peace comics."""
443
    name = 'forpeace'
444
    long_name = "Cartooning For Peace"
445
    url = "http://cartooningforpeace.blog.lemonde.fr"
446
    first_url = "http://cartooningforpeace.blog.lemonde.fr/2014/12/15/bado/"
447
448
449
class Aurel(GenericLeMondeBlog):
450
    """Class to retrieve Aurel comics."""
451
    name = 'aurel'
452
    long_name = "Aurel"
453
    url = "http://aurel.blog.lemonde.fr"
454
    first_url = "http://aurel.blog.lemonde.fr/2014/09/29/le-senat-repasse-a-droite/"
455
456
457
class LesCulottees(GenericLeMondeBlog):
458
    """Class to retrieve Les Culottees comics."""
459
    name = 'culottees'
460
    long_name = 'Les Culottees'
461
    url = "http://lesculottees.blog.lemonde.fr"
462
    first_url = "http://lesculottees.blog.lemonde.fr/2016/01/11/clementine-delait-femme-a-barbe/"
463
464
465
class UneAnneeAuLycee(GenericLeMondeBlog):
466
    """Class to retrieve Une Annee Au Lycee comics."""
467
    name = 'lycee'
468
    long_name = 'Une Annee au Lycee'
469
    url = 'http://uneanneeaulycee.blog.lemonde.fr'
470
    first_url = "http://uneanneeaulycee.blog.lemonde.fr/2016/06/13/la-semaine-du-bac-est-arrivee/"
471
472
473 View Code Duplication
class Rall(GenericNavigableComic):
474
    """Class to retrieve Ted Rall comics."""
475
    # Also on http://www.gocomics.com/tedrall
476
    name = 'rall'
477
    long_name = "Ted Rall"
478
    url = "http://rall.com/comic"
479
    _categories = ('RALL', )
480
    get_navi_link = get_link_rel_next
481
    get_first_comic_link = simulate_first_link
482
    # Not the first but I didn't find an efficient way to retrieve it
483
    first_url = "http://rall.com/2014/01/30/los-angeles-times-cartoon-well-miss-those-california-flowers"
484
485
    @classmethod
486
    def get_comic_info(cls, soup, link):
487
        """Get information about a particular comics."""
488
        title = soup.find('meta', property='og:title')['content']
489
        author = soup.find("span", class_="author vcard").find("a").string
490
        date_str = soup.find("span", class_="entry-date").string
491
        day = string_to_date(date_str, "%B %d, %Y")
492
        desc = soup.find('meta', property='og:description')['content']
493
        imgs = soup.find('div', class_='entry-content').find_all('img')
494
        imgs = imgs[:-7]  # remove social media buttons
495
        return {
496
            'title': title,
497
            'author': author,
498
            'month': day.month,
499
            'year': day.year,
500
            'day': day.day,
501
            'description': desc,
502
            'img': [i['src'] for i in imgs],
503
        }
504
505
506
class Dilem(GenericNavigableComic):
507
    """Class to retrieve Ali Dilem comics."""
508
    name = 'dilem'
509
    long_name = 'Ali Dilem'
510
    url = 'http://information.tv5monde.com/dilem'
511
    _categories = ('FRANCAIS', )
512
    get_url_from_link = join_cls_url_to_href
513
    get_first_comic_link = simulate_first_link
514
    first_url = "http://information.tv5monde.com/dilem/2004-06-26"
515
516
    @classmethod
517
    def get_navi_link(cls, last_soup, next_):
518
        """Get link to next or previous comic."""
519
        # prev is next / next is prev
520
        li = last_soup.find('li', class_='prev' if next_ else 'next')
521 View Code Duplication
        return li.find('a') if li else None
522
523
    @classmethod
524
    def get_comic_info(cls, soup, link):
525
        """Get information about a particular comics."""
526
        short_url = soup.find('link', rel='shortlink')['href']
527
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
528
        imgs = soup.find_all('meta', property='og:image')
529
        date_str = soup.find('span', property='dc:date')['content']
530
        date_str = date_str[:10]
531
        day = string_to_date(date_str, "%Y-%m-%d")
532
        return {
533
            'short_url': short_url,
534
            'title': title,
535
            'img': [i['content'] for i in imgs],
536
            'day': day.day,
537
            'month': day.month,
538
            'year': day.year,
539
        }
540
541
542
class SpaceAvalanche(GenericNavigableComic):
543
    """Class to retrieve Space Avalanche comics."""
544
    name = 'avalanche'
545
    long_name = 'Space Avalanche'
546
    url = 'http://www.spaceavalanche.com'
547
    get_navi_link = get_link_rel_next
548
549
    @classmethod
550
    def get_first_comic_link(cls):
551
        """Get link to first comics."""
552
        return {'href': "http://www.spaceavalanche.com/2009/02/02/irish-sea/", 'title': "Irish Sea"}
553
554
    @classmethod
555
    def get_comic_info(cls, soup, link):
556
        """Get information about a particular comics."""
557
        url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
558
        title = link['title']
559
        url = cls.get_url_from_link(link)
560
        year, month, day = [int(s)
561
                            for s in url_date_re.match(url).groups()]
562
        imgs = soup.find("div", class_="entry").find_all("img")
563
        return {
564
            'title': title,
565
            'day': day,
566
            'month': month,
567
            'year': year,
568
            'img': [i['src'] for i in imgs],
569
        }
570
571
572
class ZenPencils(GenericNavigableComic):
573
    """Class to retrieve ZenPencils comics."""
574
    # Also on http://zenpencils.tumblr.com
575
    # Also on http://www.gocomics.com/zen-pencils
576
    name = 'zenpencils'
577
    long_name = 'Zen Pencils'
578
    url = 'http://zenpencils.com'
579
    _categories = ('ZENPENCILS', )
580
    get_navi_link = get_link_rel_next
581
    get_first_comic_link = simulate_first_link
582
    first_url = "http://zenpencils.com/comic/1-ralph-waldo-emerson-make-them-cry/"
583
584
    @classmethod
585
    def get_comic_info(cls, soup, link):
586
        """Get information about a particular comics."""
587
        imgs = soup.find('div', id='comic').find_all('img')
588
        # imgs2 = soup.find_all('meta', property='og:image')
589
        post = soup.find('div', class_='post-content')
590
        author = post.find("span", class_="post-author").find("a").string
591
        title = soup.find('meta', property='og:title')['content']
592
        date_str = post.find('span', class_='post-date').string
593
        day = string_to_date(date_str, "%B %d, %Y")
594
        assert imgs
595
        assert all(i['alt'] == i['title'] for i in imgs)
596
        assert all(i['alt'] in (title, "") for i in imgs)
597
        desc = soup.find('meta', property='og:description')['content']
598
        return {
599
            'title': title,
600
            'description': desc,
601
            'author': author,
602
            'day': day.day,
603
            'month': day.month,
604
            'year': day.year,
605
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
606
        }
607
608
609
class ItsTheTie(GenericNavigableComic):
610
    """Class to retrieve It's the tie comics."""
611
    # Also on http://itsthetie.tumblr.com
612
    # Also on https://tapastic.com/series/itsthetie
613
    name = 'tie'
614
    long_name = "It's the tie"
615
    url = "http://itsthetie.com"
616
    _categories = ('TIE', )
617
    get_first_comic_link = get_div_navfirst_a
618
    get_navi_link = get_a_rel_next
619
620
    @classmethod
621
    def get_comic_info(cls, soup, link):
622
        """Get information about a particular comics."""
623
        title = soup.find('h1', class_='comic-title').find('a').string
624
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
625
        day = string_to_date(date_str, "%B %d, %Y")
626
        # Bonus images may or may not be in meta og:image.
627
        imgs = soup.find_all('meta', property='og:image')
628
        imgs_src = [i['content'] for i in imgs]
629
        bonus = soup.find_all('img', attrs={'data-oversrc': True})
630
        bonus_src = [b['data-oversrc'] for b in bonus]
631
        all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src]
632
        all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")]
633
        tag_meta = soup.find('meta', property='article:tag')
634
        tags = tag_meta['content'] if tag_meta else ""
635
        return {
636
            'title': title,
637
            'month': day.month,
638
            'year': day.year,
639
            'day': day.day,
640
            'img': all_imgs_src,
641
            'tags': tags,
642
        }
643
644
645
class PenelopeBagieu(GenericNavigableComic):
646
    """Class to retrieve comics from Penelope Bagieu's blog."""
647
    name = 'bagieu'
648
    long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
649
    url = 'http://www.penelope-jolicoeur.com'
650
    _categories = ('FRANCAIS', )
651
    get_navi_link = get_link_rel_next
652
    get_first_comic_link = simulate_first_link
653
    first_url = 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'
654
655
    @classmethod
656
    def get_comic_info(cls, soup, link):
657
        """Get information about a particular comics."""
658
        date_str = soup.find('h2', class_='date-header').string
659
        day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
660
        imgs = soup.find('div', class_='entry-body').find_all('img')
661
        title = soup.find('h3', class_='entry-header').string
662
        return {
663
            'title': title,
664
            'img': [i['src'] for i in imgs],
665
            'month': day.month,
666
            'year': day.year,
667
            'day': day.day,
668
        }
669
670
671 View Code Duplication
class OneOneOneOneComic(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
672
    """Class to retrieve 1111 Comics."""
673
    # Also on http://comics1111.tumblr.com
674
    # Also on https://tapastic.com/series/1111-Comics
675
    name = '1111'
676
    long_name = '1111 Comics'
677
    url = 'http://www.1111comics.me'
678
    _categories = ('ONEONEONEONE', )
679
    get_first_comic_link = get_div_navfirst_a
680
    get_navi_link = get_link_rel_next
681
682
    @classmethod
683
    def get_comic_info(cls, soup, link):
684
        """Get information about a particular comics."""
685
        title = soup.find('h1', class_='comic-title').find('a').string
686
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
687
        day = string_to_date(date_str, "%B %d, %Y")
688
        imgs = soup.find_all('meta', property='og:image')
689
        return {
690
            'title': title,
691
            'month': day.month,
692
            'year': day.year,
693
            'day': day.day,
694
            'img': [i['content'] for i in imgs],
695
        }
696
697
698 View Code Duplication
class AngryAtNothing(GenericNavigableComic):
699
    """Class to retrieve Angry at Nothing comics."""
700
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
701
    name = 'angry'
702
    long_name = 'Angry At Nothing'
703
    url = 'http://www.angryatnothing.net'
704
    get_first_comic_link = get_div_navfirst_a
705
    get_navi_link = get_a_rel_next
706
707
    @classmethod
708
    def get_comic_info(cls, soup, link):
709
        """Get information about a particular comics."""
710
        title = soup.find('h1', class_='comic-title').find('a').string
711
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
712
        day = string_to_date(date_str, "%B %d, %Y")
713
        imgs = soup.find_all('meta', property='og:image')
714
        return {
715
            'title': title,
716
            'month': day.month,
717
            'year': day.year,
718
            'day': day.day,
719
            'img': [i['content'] for i in imgs],
720
        }
721
722
723
class NeDroid(GenericNavigableComic):
724
    """Class to retrieve NeDroid comics."""
725
    name = 'nedroid'
726
    long_name = 'NeDroid'
727
    url = 'http://nedroid.com'
728
    get_first_comic_link = get_div_navfirst_a
729
    get_navi_link = get_link_rel_next
730 View Code Duplication
    get_url_from_link = join_cls_url_to_href
731
732
    @classmethod
733
    def get_comic_info(cls, soup, link):
734
        """Get information about a particular comics."""
735
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
736
        short_url = cls.get_url_from_link(soup.find('link', rel='shortlink'))
737
        num = int(short_url_re.match(short_url).groups()[0])
738
        imgs = soup.find('div', id='comic').find_all('img')
739
        assert len(imgs) == 1
740
        title = imgs[0]['alt']
741
        title2 = imgs[0]['title']
742
        return {
743
            'short_url': short_url,
744
            'title': title,
745
            'title2': title2,
746
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
747
            'num': num,
748
        }
749
750
751
class Garfield(GenericNavigableComic):
752
    """Class to retrieve Garfield comics."""
753
    # Also on http://www.gocomics.com/garfield
754
    name = 'garfield'
755
    long_name = 'Garfield'
756
    url = 'https://garfield.com'
757
    _categories = ('GARFIELD', )
758
    get_first_comic_link = simulate_first_link
759
    first_url = 'https://garfield.com/comic/1978/06/19'
760
761
    @classmethod
762
    def get_navi_link(cls, last_soup, next_):
763
        """Get link to next or previous comic."""
764
        return last_soup.find('a', class_='comic-arrow-right' if next_ else 'comic-arrow-left')
765
766
    @classmethod
767
    def get_comic_info(cls, soup, link):
768
        """Get information about a particular comics."""
769
        url = cls.get_url_from_link(link)
770
        date_re = re.compile('^%s/comic/([0-9]*)/([0-9]*)/([0-9]*)' % cls.url)
771
        year, month, day = [int(s) for s in date_re.match(url).groups()]
772
        imgs = soup.find('div', class_='comic-display').find_all('img', class_='img-responsive')
773
        return {
774
            'month': month,
775
            'year': year,
776
            'day': day,
777
            'img': [i['src'] for i in imgs],
778
        }
779
780
781 View Code Duplication
class Dilbert(GenericNavigableComic):
782
    """Class to retrieve Dilbert comics."""
783
    # Also on http://www.gocomics.com/dilbert-classics
784
    name = 'dilbert'
785
    long_name = 'Dilbert'
786
    url = 'http://dilbert.com'
787
    get_url_from_link = join_cls_url_to_href
788
    get_first_comic_link = simulate_first_link
789
    first_url = 'http://dilbert.com/strip/1989-04-16'
790
791
    @classmethod
792
    def get_navi_link(cls, last_soup, next_):
793
        """Get link to next or previous comic."""
794
        link = last_soup.find('div', class_='nav-comic nav-right' if next_ else 'nav-comic nav-left')
795
        return link.find('a') if link else None
796
797
    @classmethod
798
    def get_comic_info(cls, soup, link):
799
        """Get information about a particular comics."""
800
        title = soup.find('meta', property='og:title')['content']
801
        imgs = soup.find_all('meta', property='og:image')
802
        desc = soup.find('meta', property='og:description')['content']
803
        date_str = soup.find('meta', property='article:publish_date')['content']
804
        day = string_to_date(date_str, "%B %d, %Y")
805
        author = soup.find('meta', property='article:author')['content']
806
        tags = soup.find('meta', property='article:tag')['content']
807
        return {
808
            'title': title,
809
            'description': desc,
810
            'img': [i['content'] for i in imgs],
811
            'author': author,
812
            'tags': tags,
813
            'day': day.day,
814
            'month': day.month,
815
            'year': day.year
816
        }
817
818
819
class VictimsOfCircumsolar(GenericNavigableComic):
820
    """Class to retrieve VictimsOfCircumsolar comics."""
821
    # Also on http://victimsofcomics.tumblr.com
822
    name = 'circumsolar'
823
    long_name = 'Victims Of Circumsolar'
824
    url = 'http://www.victimsofcircumsolar.com'
825
    get_navi_link = get_a_navi_comicnavnext_navinext
826
    get_first_comic_link = simulate_first_link
827
    first_url = 'http://www.victimsofcircumsolar.com/comic/modern-addiction'
828
829
    @classmethod
830
    def get_comic_info(cls, soup, link):
831
        """Get information about a particular comics."""
832
        # Date is on the archive page
833
        title = soup.find_all('meta', property='og:title')[-1]['content']
834
        desc = soup.find_all('meta', property='og:description')[-1]['content']
835
        imgs = soup.find('div', id='comic').find_all('img')
836
        assert all(i['title'] == i['alt'] == title for i in imgs)
837
        return {
838
            'title': title,
839
            'description': desc,
840
            'img': [i['src'] for i in imgs],
841
        }
842
843
844
class ThreeWordPhrase(GenericNavigableComic):
845
    """Class to retrieve Three Word Phrase comics."""
846
    # Also on http://www.threewordphrase.tumblr.com
847
    name = 'threeword'
848
    long_name = 'Three Word Phrase'
849
    url = 'http://threewordphrase.com'
850
    get_url_from_link = join_cls_url_to_href
851
852
    @classmethod
853
    def get_first_comic_link(cls):
854
        """Get link to first comics."""
855
        return get_soup_at_url(cls.url).find('img', src='/firstlink.gif').parent
856
857
    @classmethod
858
    def get_navi_link(cls, last_soup, next_):
859
        """Get link to next or previous comic."""
860
        link = last_soup.find('img', src='/nextlink.gif' if next_ else '/prevlink.gif').parent
861
        return None if link.get('href') is None else link
862
863
    @classmethod
864
    def get_comic_info(cls, soup, link):
865
        """Get information about a particular comics."""
866
        title = soup.find('title')
867
        imgs = [img for img in soup.find_all('img')
868
                if not img['src'].endswith(
869
                    ('link.gif', '32.png', 'twpbookad.jpg',
870
                     'merchad.jpg', 'header.gif', 'tipjar.jpg'))]
871
        return {
872
            'title': title.string if title else None,
873
            'title2': '  '.join(img.get('alt') for img in imgs if img.get('alt')),
874
            'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs],
875
        }
876
877
878
class DeadlyPanel(GenericEmptyComic, GenericNavigableComic):
879
    """Class to retrieve Deadly Panel comics."""
880
    # Also on https://tapastic.com/series/deadlypanel
881
    # Also on http://deadlypanel.tumblr.com
882
    name = 'deadly'
883
    long_name = 'Deadly Panel'
884
    url = 'http://www.deadlypanel.com'
885
    get_first_comic_link = get_a_navi_navifirst
886
    get_navi_link = get_a_navi_comicnavnext_navinext
887
888
    @classmethod
889
    def get_comic_info(cls, soup, link):
890
        """Get information about a particular comics."""
891
        imgs = soup.find('div', id='comic').find_all('img')
892
        assert all(i['alt'] == i['title'] for i in imgs)
893
        return {
894
            'img': [i['src'] for i in imgs],
895
        }
896
897
898 View Code Duplication
class TheGentlemanArmchair(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
899
    """Class to retrieve The Gentleman Armchair comics."""
900
    name = 'gentlemanarmchair'
901
    long_name = 'The Gentleman Armchair'
902
    url = 'http://thegentlemansarmchair.com'
903
    get_first_comic_link = get_a_navi_navifirst
904
    get_navi_link = get_link_rel_next
905
906
    @classmethod
907
    def get_comic_info(cls, soup, link):
908
        """Get information about a particular comics."""
909
        title = soup.find('h2', class_='post-title').string
910
        author = soup.find("span", class_="post-author").find("a").string
911
        date_str = soup.find('span', class_='post-date').string
912
        day = string_to_date(date_str, "%B %d, %Y")
913
        imgs = soup.find('div', id='comic').find_all('img')
914
        return {
915
            'img': [i['src'] for i in imgs],
916
            'title': title,
917
            'author': author,
918
            'month': day.month,
919
            'year': day.year,
920
            'day': day.day,
921
        }
922
923
924 View Code Duplication
class ImogenQuest(GenericNavigableComic):
925
    """Class to retrieve Imogen Quest comics."""
926
    # Also on http://imoquest.tumblr.com
927
    name = 'imogen'
928
    long_name = 'Imogen Quest'
929
    url = 'http://imogenquest.net'
930
    get_first_comic_link = get_div_navfirst_a
931
    get_navi_link = get_a_rel_next
932
933
    @classmethod
934
    def get_comic_info(cls, soup, link):
935
        """Get information about a particular comics."""
936
        title = soup.find('h2', class_='post-title').string
937
        author = soup.find("span", class_="post-author").find("a").string
938
        date_str = soup.find('span', class_='post-date').string
939
        day = string_to_date(date_str, '%B %d, %Y')
940
        imgs = soup.find('div', class_='comicpane').find_all('img')
941
        assert all(i['alt'] == i['title'] for i in imgs)
942
        title2 = imgs[0]['title']
943
        return {
944
            'day': day.day,
945
            'month': day.month,
946
            'year': day.year,
947
            'img': [i['src'] for i in imgs],
948
            'title': title,
949
            'title2': title2,
950
            'author': author,
951
        }
952
953
954 View Code Duplication
class MyExtraLife(GenericNavigableComic):
955
    """Class to retrieve My Extra Life comics."""
956
    name = 'extralife'
957
    long_name = 'My Extra Life'
958
    url = 'http://www.myextralife.com'
959
    get_navi_link = get_link_rel_next
960
961
    @classmethod
962
    def get_first_comic_link(cls):
963
        """Get link to first comics."""
964
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
965
966
    @classmethod
967
    def get_comic_info(cls, soup, link):
968
        """Get information about a particular comics."""
969
        title = soup.find("h1", class_="comic_title").string
970
        date_str = soup.find("span", class_="comic_date").string
971
        day = string_to_date(date_str, "%B %d, %Y")
972
        imgs = soup.find_all("img", class_="comic")
973
        assert all(i['alt'] == i['title'] == title for i in imgs)
974
        return {
975
            'title': title,
976
            'img': [i['src'] for i in imgs if i["src"]],
977
            'day': day.day,
978
            'month': day.month,
979
            'year': day.year
980
        }
981
982
983
class SaturdayMorningBreakfastCereal(GenericNavigableComic):
984
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
985
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
986
    # Also on http://smbc-comics.tumblr.com
987
    name = 'smbc'
988
    long_name = 'Saturday Morning Breakfast Cereal'
989
    url = 'http://www.smbc-comics.com'
990
    _categories = ('SMBC', )
991
    get_navi_link = get_a_rel_next
992
993
    @classmethod
994
    def get_first_comic_link(cls):
995
        """Get link to first comics."""
996
        return get_soup_at_url(cls.url).find('a', rel='start')
997
998
    @classmethod
999
    def get_comic_info(cls, soup, link):
1000
        """Get information about a particular comics."""
1001
        image1 = soup.find('img', id='cc-comic')
1002
        image_url1 = image1['src']
1003
        aftercomic = soup.find('div', id='aftercomic')
1004
        image_url2 = aftercomic.find('img')['src'] if aftercomic else ''
1005
        imgs = [image_url1] + ([image_url2] if image_url2 else [])
1006
        date_str = soup.find('div', class_='cc-publishtime').contents[0]
1007
        day = string_to_date(date_str, "%B %d, %Y")
1008
        return {
1009
            'title': image1['title'],
1010
            'img': [urljoin_wrapper(cls.url, i) for i in imgs],
1011
            'day': day.day,
1012
            'month': day.month,
1013
            'year': day.year
1014
        }
1015
1016
1017
class PerryBibleFellowship(GenericListableComic):
1018
    """Class to retrieve Perry Bible Fellowship comics."""
1019
    name = 'pbf'
1020
    long_name = 'Perry Bible Fellowship'
1021
    url = 'http://pbfcomics.com'
1022
    get_url_from_archive_element = join_cls_url_to_href
1023
1024
    @classmethod
1025
    def get_archive_elements(cls):
1026
        comic_link_re = re.compile('^/[0-9]*/$')
1027
        return reversed(get_soup_at_url(cls.url).find_all('a', href=comic_link_re))
1028
1029
    @classmethod
1030
    def get_comic_info(cls, soup, link):
1031
        """Get information about a particular comics."""
1032
        url = cls.get_url_from_archive_element(link)
1033
        comic_img_re = re.compile('^/archive_b/PBF.*')
1034
        name = link.string
1035
        num = int(link['name'])
1036
        href = link['href']
1037
        assert href == '/%d/' % num
1038
        imgs = soup.find_all('img', src=comic_img_re)
1039
        assert len(imgs) == 1
1040
        assert imgs[0]['alt'] == name
1041
        return {
1042
            'num': num,
1043
            'name': name,
1044
            'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1045
            'prefix': '%d-' % num,
1046
        }
1047
1048
1049 View Code Duplication
class Mercworks(GenericNavigableComic):
1050
    """Class to retrieve Mercworks comics."""
1051
    # Also on http://mercworks.tumblr.com
1052
    name = 'mercworks'
1053
    long_name = 'Mercworks'
1054
    url = 'http://mercworks.net'
1055
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1056
    get_navi_link = get_link_rel_next
1057
1058
    @classmethod
1059
    def get_comic_info(cls, soup, link):
1060
        """Get information about a particular comics."""
1061
        title = soup.find('meta', property='og:title')['content']
1062
        metadesc = soup.find('meta', property='og:description')
1063
        desc = metadesc['content'] if metadesc else ""
1064
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
1065
        day = string_to_date(date_str, "%Y-%m-%d")
1066
        imgs = soup.find_all('meta', property='og:image')
1067
        return {
1068
            'img': [i['content'] for i in imgs],
1069
            'title': title,
1070
            'desc': desc,
1071
            'day': day.day,
1072
            'month': day.month,
1073
            'year': day.year
1074
        }
1075
1076
1077
class BerkeleyMews(GenericListableComic):
1078
    """Class to retrieve Berkeley Mews comics."""
1079
    # Also on http://mews.tumblr.com
1080
    # Also on http://www.gocomics.com/berkeley-mews
1081
    name = 'berkeley'
1082
    long_name = 'Berkeley Mews'
1083
    url = 'http://www.berkeleymews.com'
1084
    _categories = ('BERKELEY', )
1085
    get_url_from_archive_element = get_href
1086
    comic_num_re = re.compile('%s/\\?p=([0-9]*)$' % url)
1087
1088
    @classmethod
1089
    def get_archive_elements(cls):
1090
        archive_url = urljoin_wrapper(cls.url, "?page_id=2")
1091
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_num_re))
1092
1093
    @classmethod
1094
    def get_comic_info(cls, soup, link):
1095
        """Get information about a particular comics."""
1096
        comic_date_re = re.compile('.*/([0-9]*)-([0-9]*)-([0-9]*)-.*')
1097
        url = cls.get_url_from_archive_element(link)
1098
        num = int(cls.comic_num_re.match(url).groups()[0])
1099
        img = soup.find('div', id='comic').find('img')
1100
        assert all(i['alt'] == i['title'] for i in [img])
1101
        title2 = img['title']
1102
        img_url = img['src']
1103
        year, month, day = [int(s) for s in comic_date_re.match(img_url).groups()]
1104
        return {
1105
            'num': num,
1106
            'title': link.string,
1107
            'title2': title2,
1108
            'img': [img_url],
1109
            'year': year,
1110
            'month': month,
1111
            'day': day,
1112
        }
1113
1114
1115
class GenericBouletCorp(GenericNavigableComic):
1116
    """Generic class to retrieve BouletCorp comics in different languages."""
1117
    # Also on http://bouletcorp.tumblr.com
1118
    _categories = ('BOULET', )
1119
    get_navi_link = get_link_rel_next
1120
1121
    @classmethod
1122
    def get_first_comic_link(cls):
1123
        """Get link to first comics."""
1124
        return get_soup_at_url(cls.url).find('div', id='centered_nav').find_all('a')[0]
1125
1126
    @classmethod
1127
    def get_comic_info(cls, soup, link):
1128
        """Get information about a particular comics."""
1129
        url = cls.get_url_from_link(link)
1130
        date_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/' % cls.url)
1131
        year, month, day = [int(s) for s in date_re.match(url).groups()]
1132
        imgs = soup.find('div', id='notes').find('div', class_='storycontent').find_all('img')
1133
        texts = '  '.join(t for t in (i.get('title') for i in imgs) if t)
1134
        title = soup.find('title').string
1135
        return {
1136
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs if i.get('src') is not None],
1137
            'title': title,
1138
            'texts': texts,
1139
            'year': year,
1140
            'month': month,
1141
            'day': day,
1142
        }
1143
1144
1145
class BouletCorp(GenericBouletCorp):
1146
    """Class to retrieve BouletCorp comics."""
1147
    name = 'boulet'
1148
    long_name = 'Boulet Corp'
1149
    url = 'http://www.bouletcorp.com'
1150
    _categories = ('FRANCAIS', )
1151
1152
1153
class BouletCorpEn(GenericBouletCorp):
1154
    """Class to retrieve EnglishBouletCorp comics."""
1155
    name = 'boulet_en'
1156
    long_name = 'Boulet Corp English'
1157
    url = 'http://english.bouletcorp.com'
1158
1159
1160 View Code Duplication
class AmazingSuperPowers(GenericNavigableComic):
1161
    """Class to retrieve Amazing Super Powers comics."""
1162
    name = 'asp'
1163
    long_name = 'Amazing Super Powers'
1164
    url = 'http://www.amazingsuperpowers.com'
1165
    get_first_comic_link = get_a_navi_navifirst
1166
    get_navi_link = get_a_navi_navinext
1167
1168
    @classmethod
1169
    def get_comic_info(cls, soup, link):
1170
        """Get information about a particular comics."""
1171
        author = soup.find("span", class_="post-author").find("a").string
1172
        date_str = soup.find('span', class_='post-date').string
1173
        day = string_to_date(date_str, "%B %d, %Y")
1174
        imgs = soup.find('div', id='comic').find_all('img')
1175
        title = ' '.join(i['title'] for i in imgs)
1176
        assert all(i['alt'] == i['title'] for i in imgs)
1177
        return {
1178
            'title': title,
1179
            'author': author,
1180
            'img': [img['src'] for img in imgs],
1181
            'day': day.day,
1182
            'month': day.month,
1183
            'year': day.year
1184
        }
1185
1186
1187
class ToonHole(GenericNavigableComic):
1188
    """Class to retrieve Toon Holes comics."""
1189
    # Also on http://tapastic.com/series/TOONHOLE
1190
    name = 'toonhole'
1191
    long_name = 'Toon Hole'
1192
    url = 'http://www.toonhole.com'
1193
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1194
    get_navi_link = get_link_rel_next
1195
1196
    @classmethod
1197
    def get_comic_info(cls, soup, link):
1198
        """Get information about a particular comics."""
1199
        short_url = soup.find('link', rel='shortlink')['href']
1200
        date_str = soup.find('div', class_='entry-meta').contents[0].strip()
1201
        day = string_to_date(date_str, "%B %d, %Y")
1202
        imgs = soup.find('div', id='comic').find_all('img')
1203
        if imgs:
1204
            img = imgs[0]
1205
            title = img['alt']
1206
            assert img['title'] == title
1207
        else:
1208
            title = ""
1209
        return {
1210
            'short_url': short_url,
1211
            'title': title,
1212
            'month': day.month,
1213
            'year': day.year,
1214
            'day': day.day,
1215
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs],
1216
        }
1217
1218
1219
class Channelate(GenericNavigableComic):
1220
    """Class to retrieve Channelate comics."""
1221
    name = 'channelate'
1222
    long_name = 'Channelate'
1223
    url = 'http://www.channelate.com'
1224
    get_first_comic_link = get_div_navfirst_a
1225
    get_navi_link = get_link_rel_next
1226
    get_url_from_link = join_cls_url_to_href
1227
1228
    @classmethod
1229
    def get_comic_info(cls, soup, link):
1230
        """Get information about a particular comics."""
1231
        author = soup.find("span", class_="post-author").find("a").string
1232
        date_str = soup.find('span', class_='post-date').string
1233
        day = string_to_date(date_str, '%Y/%m/%d')
1234
        title = soup.find('meta', property='og:title')['content']
1235
        post = soup.find('div', id='comic')
1236
        imgs = post.find_all('img') if post else []
1237
        extra_url = None
1238
        extra_div = soup.find('div', id='extrapanelbutton')
1239
        if extra_div:
1240
            extra_url = extra_div.find('a')['href']
1241
            extra_soup = get_soup_at_url(extra_url)
1242
            extra_imgs = extra_soup.find_all('img', class_='extrapanelimage')
1243
            imgs.extend(extra_imgs)
1244
        return {
1245
            'url_extra': extra_url,
1246
            'title': title,
1247
            'author': author,
1248
            'month': day.month,
1249
            'year': day.year,
1250
            'day': day.day,
1251
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1252
        }
1253
1254
1255
class CyanideAndHappiness(GenericNavigableComic):
1256
    """Class to retrieve Cyanide And Happiness comics."""
1257
    name = 'cyanide'
1258
    long_name = 'Cyanide and Happiness'
1259
    url = 'http://explosm.net'
1260
    _categories = ('NSFW', )
1261
    get_url_from_link = join_cls_url_to_href
1262
1263
    @classmethod
1264
    def get_first_comic_link(cls):
1265
        """Get link to first comics."""
1266
        return get_soup_at_url(cls.url).find('a', title='Oldest comic')
1267
1268
    @classmethod
1269
    def get_navi_link(cls, last_soup, next_):
1270
        """Get link to next or previous comic."""
1271
        link = last_soup.find('a', class_='next-comic' if next_ else 'previous-comic ')
1272
        return None if link.get('href') is None else link
1273
1274
    @classmethod
1275
    def get_comic_info(cls, soup, link):
1276
        """Get information about a particular comics."""
1277
        url2 = soup.find('meta', property='og:url')['content']
1278
        num = int(url2.split('/')[-2])
1279
        date_str = soup.find('h3').find('a').string
1280
        day = string_to_date(date_str, '%Y.%m.%d')
1281
        author = soup.find('small', class_="author-credit-name").string
1282
        assert author.startswith('by ')
1283
        author = author[3:]
1284
        imgs = soup.find_all('img', id='main-comic')
1285
        return {
1286
            'num': num,
1287
            'author': author,
1288
            'month': day.month,
1289
            'year': day.year,
1290
            'day': day.day,
1291
            'prefix': '%d-' % num,
1292
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i['src'])) for i in imgs]
1293
        }
1294
1295
1296
class MrLovenstein(GenericComic):
1297
    """Class to retrieve Mr Lovenstein comics."""
1298
    # Also on https://tapastic.com/series/MrLovenstein
1299
    name = 'mrlovenstein'
1300
    long_name = 'Mr. Lovenstein'
1301
    url = 'http://www.mrlovenstein.com'
1302
1303
    @classmethod
1304
    def get_next_comic(cls, last_comic):
1305
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1306
        # TODO: more info from http://www.mrlovenstein.com/archive
1307
        comic_num_re = re.compile('^/comic/([0-9]*)$')
1308
        nums = [int(comic_num_re.match(link['href']).groups()[0])
1309
                for link in get_soup_at_url(cls.url).find_all('a', href=comic_num_re)]
1310
        first, last = min(nums), max(nums)
1311
        if last_comic:
1312
            first = last_comic['num'] + 1
1313
        for num in range(first, last + 1):
1314
            url = urljoin_wrapper(cls.url, '/comic/%d' % num)
1315
            soup = get_soup_at_url(url)
1316
            imgs = list(
1317
                reversed(soup.find_all('img', src=re.compile('^/images/comics/'))))
1318
            description = soup.find('meta', attrs={'name': 'description'})['content']
1319
            yield {
1320
                'url': url,
1321
                'num': num,
1322
                'texts': '  '.join(t for t in (i.get('title') for i in imgs) if t),
1323
                'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1324
                'description': description,
1325
            }
1326
1327
1328
class DinosaurComics(GenericListableComic):
1329
    """Class to retrieve Dinosaur Comics comics."""
1330
    name = 'dinosaur'
1331
    long_name = 'Dinosaur Comics'
1332
    url = 'http://www.qwantz.com'
1333
    get_url_from_archive_element = get_href
1334
    comic_link_re = re.compile('^%s/index.php\\?comic=([0-9]*)$' % url)
1335
1336
    @classmethod
1337
    def get_archive_elements(cls):
1338
        archive_url = urljoin_wrapper(cls.url, 'archive.php')
1339
        # first link is random -> skip it
1340
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re)[1:])
1341
1342
    @classmethod
1343
    def get_comic_info(cls, soup, link):
1344
        """Get information about a particular comics."""
1345
        url = cls.get_url_from_archive_element(link)
1346
        num = int(cls.comic_link_re.match(url).groups()[0])
1347
        date_str = link.string
1348
        text = link.next_sibling.string
1349
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1350
        comic_img_re = re.compile('^%s/comics/' % cls.url)
1351
        img = soup.find('img', src=comic_img_re)
1352
        return {
1353
            'month': day.month,
1354
            'year': day.year,
1355
            'day': day.day,
1356
            'img': [img.get('src')],
1357
            'title': img.get('title'),
1358
            'text': text,
1359
            'num': num,
1360
        }
1361 View Code Duplication
1362
1363
class ButterSafe(GenericListableComic):
1364
    """Class to retrieve Butter Safe comics."""
1365
    name = 'butter'
1366
    long_name = 'ButterSafe'
1367
    url = 'http://buttersafe.com'
1368
    get_url_from_archive_element = get_href
1369
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)
1370
1371
    @classmethod
1372
    def get_archive_elements(cls):
1373
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1374
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
1375
1376
    @classmethod
1377
    def get_comic_info(cls, soup, link):
1378
        """Get information about a particular comics."""
1379
        url = cls.get_url_from_archive_element(link)
1380
        title = link.string
1381
        year, month, day = [int(s) for s in cls.comic_link_re.match(url).groups()]
1382
        img = soup.find('div', id='comic').find('img')
1383
        assert img['alt'] == title
1384
        return {
1385
            'title': title,
1386
            'day': day,
1387
            'month': month,
1388
            'year': year,
1389
            'img': [img['src']],
1390
        }
1391
1392
1393
class CalvinAndHobbes(GenericComic):
1394
    """Class to retrieve Calvin and Hobbes comics."""
1395
    # Also on http://www.gocomics.com/calvinandhobbes/
1396
    name = 'calvin'
1397
    long_name = 'Calvin and Hobbes'
1398
    # This is not through any official webpage but eh...
1399
    url = 'http://marcel-oehler.marcellosendos.ch/comics/ch/'
1400
1401
    @classmethod
1402
    def get_next_comic(cls, last_comic):
1403
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1404
        last_date = get_date_for_comic(
1405
            last_comic) if last_comic else date(1985, 11, 1)
1406
        link_re = re.compile('^([0-9]*)/([0-9]*)/')
1407
        img_re = re.compile('')
1408
        for link in get_soup_at_url(cls.url).find_all('a', href=link_re):
1409
            url = link['href']
1410
            year, month = link_re.match(url).groups()
1411
            if date(int(year), int(month), 1) + timedelta(days=31) >= last_date:
1412
                img_re = re.compile('^%s%s([0-9]*)' % (year, month))
1413
                month_url = urljoin_wrapper(cls.url, url)
1414
                for img in get_soup_at_url(month_url).find_all('img', src=img_re):
1415
                    img_src = img['src']
1416
                    day = int(img_re.match(img_src).groups()[0])
1417
                    comic_date = date(int(year), int(month), day)
1418
                    if comic_date > last_date:
1419
                        yield {
1420
                            'url': month_url,
1421
                            'year': int(year),
1422
                            'month': int(month),
1423
                            'day': int(day),
1424
                            'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)],
1425
                        }
1426
                        last_date = comic_date
1427 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1428
1429
class AbstruseGoose(GenericListableComic):
1430
    """Class to retrieve AbstruseGoose Comics."""
1431
    name = 'abstruse'
1432
    long_name = 'Abstruse Goose'
1433
    url = 'http://abstrusegoose.com'
1434
    get_url_from_archive_element = get_href
1435
    comic_url_re = re.compile('^%s/([0-9]*)$' % url)
1436
    comic_img_re = re.compile('^%s/strips/.*' % url)
1437
1438
    @classmethod
1439
    def get_archive_elements(cls):
1440
        archive_url = urljoin_wrapper(cls.url, 'archive')
1441
        return get_soup_at_url(archive_url).find_all('a', href=cls.comic_url_re)
1442
1443
    @classmethod
1444
    def get_comic_info(cls, soup, archive_elt):
1445
        comic_url = cls.get_url_from_archive_element(archive_elt)
1446
        num = int(cls.comic_url_re.match(comic_url).groups()[0])
1447
        return {
1448
            'num': num,
1449
            'title': archive_elt.string,
1450
            'img': [soup.find('img', src=cls.comic_img_re)['src']]
1451
        }
1452
1453
1454
class PhDComics(GenericNavigableComic):
1455
    """Class to retrieve PHD Comics."""
1456
    name = 'phd'
1457
    long_name = 'PhD Comics'
1458
    url = 'http://phdcomics.com/comics/archive.php'
1459
1460
    @classmethod
1461
    def get_first_comic_link(cls):
1462
        """Get link to first comics."""
1463
        soup = get_soup_at_url(cls.url)
1464
        img = soup.find('img', src='http://phdcomics.com/comics/images/first_button.gif')
1465
        return None if img is None else img.parent
1466
1467
    @classmethod
1468
    def get_navi_link(cls, last_soup, next_):
1469
        """Get link to next or previous comic."""
1470
        url = 'http://phdcomics.com/comics/images/%s_button.gif' % ('next' if next_ else 'prev')
1471
        img = last_soup.find('img', src=url)
1472
        return None if img is None else img.parent
1473
1474
    @classmethod
1475
    def get_comic_info(cls, soup, link):
1476
        """Get information about a particular comics."""
1477
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
1478
        imgs = soup.find_all('meta', property='og:image')
1479
        return {
1480
            'img': [i['content'] for i in imgs],
1481
            'title': title,
1482
        }
1483
1484
1485 View Code Duplication
class Octopuns(GenericEmptyComic, GenericNavigableComic):
1486
    """Class to retrieve Octopuns comics."""
1487
    # Also on http://octopuns.tumblr.com
1488
    name = 'octopuns'
1489
    long_name = 'Octopuns'
1490
    url = 'http://www.octopuns.net'
1491
1492
    @classmethod
1493
    def get_first_comic_link(cls):
1494
        """Get link to first comics."""
1495
        return get_soup_at_url(cls.url).find('img', src=re.compile('.*/First.png')).parent
1496
1497
    @classmethod
1498
    def get_navi_link(cls, last_soup, next_):
1499
        """Get link to next or previous comic."""
1500
        link = last_soup.find('img', src=re.compile('.*/Next.png' if next_ else '.*/Back.png')).parent
1501
        return None if link.get('href') is None else link
1502
1503
    @classmethod
1504
    def get_comic_info(cls, soup, link):
1505
        """Get information about a particular comics."""
1506
        title = soup.find('h3', class_='post-title entry-title').string
1507
        date_str = soup.find('h2', class_='date-header').string
1508
        day = string_to_date(date_str, "%A, %B %d, %Y")
1509
        imgs = soup.find_all('link', rel='image_src')
1510
        return {
1511
            'img': [i['href'] for i in imgs],
1512
            'title': title,
1513
            'day': day.day,
1514
            'month': day.month,
1515
            'year': day.year,
1516
        }
1517
1518
1519
class Quarktees(GenericNavigableComic):
1520
    """Class to retrieve the Quarktees comics."""
1521
    name = 'quarktees'
1522
    long_name = 'Quarktees'
1523
    url = 'http://www.quarktees.com/blogs/news'
1524
    get_url_from_link = join_cls_url_to_href
1525
    get_first_comic_link = simulate_first_link
1526
    first_url = 'http://www.quarktees.com/blogs/news/12486621-coming-soon'
1527
1528
    @classmethod
1529
    def get_navi_link(cls, last_soup, next_):
1530
        """Get link to next or previous comic."""
1531
        return last_soup.find('a', id='article-next' if next_ else 'article-prev')
1532
1533
    @classmethod
1534
    def get_comic_info(cls, soup, link):
1535
        """Get information about a particular comics."""
1536
        title = soup.find('meta', property='og:title')['content']
1537
        article = soup.find('div', class_='single-article')
1538
        imgs = article.find_all('img')
1539
        return {
1540
            'title': title,
1541
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1542
        }
1543
1544
1545
class OverCompensating(GenericNavigableComic):
1546
    """Class to retrieve the Over Compensating comics."""
1547
    name = 'compensating'
1548
    long_name = 'Over Compensating'
1549
    url = 'http://www.overcompensating.com'
1550
    get_url_from_link = join_cls_url_to_href
1551
1552
    @classmethod
1553
    def get_first_comic_link(cls):
1554
        """Get link to first comics."""
1555
        return get_soup_at_url(cls.url).find('a', href=re.compile('comic=1$'))
1556
1557
    @classmethod
1558
    def get_navi_link(cls, last_soup, next_):
1559
        """Get link to next or previous comic."""
1560
        return last_soup.find('a', title='next comic' if next_ else 'go back already')
1561
1562
    @classmethod
1563
    def get_comic_info(cls, soup, link):
1564
        """Get information about a particular comics."""
1565
        img_src_re = re.compile('^/oc/comics/.*')
1566
        comic_num_re = re.compile('.*comic=([0-9]*)$')
1567
        comic_url = cls.get_url_from_link(link)
1568
        num = int(comic_num_re.match(comic_url).groups()[0])
1569
        img = soup.find('img', src=img_src_re)
1570
        return {
1571
            'num': num,
1572
            'img': [urljoin_wrapper(comic_url, img['src'])],
1573
            'title': img.get('title')
1574
        }
1575
1576
1577
class Oglaf(GenericNavigableComic):
1578
    """Class to retrieve Oglaf comics."""
1579
    name = 'oglaf'
1580
    long_name = 'Oglaf [NSFW]'
1581
    url = 'http://oglaf.com'
1582
    _categories = ('NSFW', )
1583
    get_url_from_link = join_cls_url_to_href
1584
1585
    @classmethod
1586
    def get_first_comic_link(cls):
1587
        """Get link to first comics."""
1588
        return get_soup_at_url(cls.url).find("div", id="st").parent
1589
1590
    @classmethod
1591
    def get_navi_link(cls, last_soup, next_):
1592
        """Get link to next or previous comic."""
1593
        div = last_soup.find("div", id="nx" if next_ else "pvs")
1594
        return div.parent if div else None
1595
1596
    @classmethod
1597
    def get_comic_info(cls, soup, link):
1598
        """Get information about a particular comics."""
1599
        title = soup.find('title').string
1600
        title_imgs = soup.find('div', id='tt').find_all('img')
1601
        assert len(title_imgs) == 1
1602
        strip_imgs = soup.find_all('img', id='strip')
1603
        assert len(strip_imgs) == 1
1604
        imgs = title_imgs + strip_imgs
1605
        desc = ' '.join(i['title'] for i in imgs)
1606
        return {
1607
            'title': title,
1608
            'img': [i['src'] for i in imgs],
1609
            'description': desc,
1610
        }
1611
1612
1613
class ScandinaviaAndTheWorld(GenericNavigableComic):
1614
    """Class to retrieve Scandinavia And The World comics."""
1615
    name = 'satw'
1616
    long_name = 'Scandinavia And The World'
1617
    url = 'http://satwcomic.com'
1618
    get_first_comic_link = simulate_first_link
1619
    first_url = 'http://satwcomic.com/sweden-denmark-and-norway'
1620
1621
    @classmethod
1622
    def get_navi_link(cls, last_soup, next_):
1623
        """Get link to next or previous comic."""
1624
        return last_soup.find('a', accesskey='n' if next_ else 'p')
1625
1626
    @classmethod
1627
    def get_comic_info(cls, soup, link):
1628
        """Get information about a particular comics."""
1629
        title = soup.find('meta', attrs={'name': 'twitter:label1'})['content']
1630
        desc = soup.find('meta', property='og:description')['content']
1631
        imgs = soup.find_all('img', itemprop="image")
1632
        return {
1633
            'title': title,
1634
            'description': desc,
1635
            'img': [i['src'] for i in imgs],
1636
        }
1637
1638
1639
class SomethingOfThatIlk(GenericEmptyComic):  # Does not exist anymore
1640
    """Class to retrieve the Something Of That Ilk comics."""
1641
    name = 'somethingofthatilk'
1642
    long_name = 'Something Of That Ilk'
1643
    url = 'http://www.somethingofthatilk.com'
1644
1645
1646
class InfiniteMonkeyBusiness(GenericNavigableComic):
1647
    """Generic class to retrieve InfiniteMonkeyBusiness comics."""
1648
    name = 'monkey'
1649
    long_name = 'Infinite Monkey Business'
1650
    url = 'http://infinitemonkeybusiness.net'
1651
    get_navi_link = get_a_navi_comicnavnext_navinext
1652
    get_first_comic_link = simulate_first_link
1653
    first_url = 'http://infinitemonkeybusiness.net/comic/pillory/'
1654
1655
    @classmethod
1656
    def get_comic_info(cls, soup, link):
1657
        """Get information about a particular comics."""
1658
        title = soup.find('meta', property='og:title')['content']
1659
        imgs = soup.find('div', id='comic').find_all('img')
1660
        return {
1661
            'title': title,
1662
            'img': [i['src'] for i in imgs],
1663
        }
1664
1665
1666
class Wondermark(GenericListableComic):
1667
    """Class to retrieve the Wondermark comics."""
1668
    name = 'wondermark'
1669
    long_name = 'Wondermark'
1670
    url = 'http://wondermark.com'
1671
    get_url_from_archive_element = get_href
1672
1673
    @classmethod
1674
    def get_archive_elements(cls):
1675
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1676
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1677
1678
    @classmethod
1679
    def get_comic_info(cls, soup, link):
1680
        """Get information about a particular comics."""
1681
        date_str = soup.find('div', class_='postdate').find('em').string
1682
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1683
        div = soup.find('div', id='comic')
1684
        if div:
1685
            img = div.find('img')
1686
            img_src = [img['src']]
1687
            alt = img['alt']
1688
            assert alt == img['title']
1689
            title = soup.find('meta', property='og:title')['content']
1690
        else:
1691
            img_src = []
1692
            alt = ''
1693
            title = ''
1694
        return {
1695
            'month': day.month,
1696
            'year': day.year,
1697
            'day': day.day,
1698
            'img': img_src,
1699
            'title': title,
1700
            'alt': alt,
1701
            'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')),
1702
        }
1703
1704
1705
class WarehouseComic(GenericNavigableComic):
1706
    """Class to retrieve Warehouse Comic comics."""
1707
    name = 'warehouse'
1708
    long_name = 'Warehouse Comic'
1709
    url = 'http://warehousecomic.com'
1710
    get_first_comic_link = get_a_navi_navifirst
1711
    get_navi_link = get_link_rel_next
1712
1713
    @classmethod
1714
    def get_comic_info(cls, soup, link):
1715
        """Get information about a particular comics."""
1716
        title = soup.find('h2', class_='post-title').string
1717
        date_str = soup.find('span', class_='post-date').string
1718
        day = string_to_date(date_str, "%B %d, %Y")
1719
        imgs = soup.find('div', id='comic').find_all('img')
1720
        return {
1721
            'img': [i['src'] for i in imgs],
1722
            'title': title,
1723
            'day': day.day,
1724
            'month': day.month,
1725
            'year': day.year,
1726
        }
1727
1728
1729
class JustSayEh(GenericNavigableComic):
1730
    """Class to retrieve Just Say Eh comics."""
1731
    # Also on http//tapastic.com/series/Just-Say-Eh
1732
    name = 'justsayeh'
1733
    long_name = 'Just Say Eh'
1734
    url = 'http://www.justsayeh.com'
1735
    get_first_comic_link = get_a_navi_navifirst
1736
    get_navi_link = get_a_navi_comicnavnext_navinext
1737
1738
    @classmethod
1739
    def get_comic_info(cls, soup, link):
1740
        """Get information about a particular comics."""
1741
        title = soup.find('h2', class_='post-title').string
1742
        imgs = soup.find("div", id="comic").find_all("img")
1743
        assert all(i['alt'] == i['title'] for i in imgs)
1744
        alt = imgs[0]['alt']
1745
        return {
1746
            'img': [i['src'] for i in imgs],
1747
            'title': title,
1748
            'alt': alt,
1749
        }
1750
1751
1752 View Code Duplication
class MouseBearComedy(GenericNavigableComic):
1753
    """Class to retrieve Mouse Bear Comedy comics."""
1754
    # Also on http://mousebearcomedy.tumblr.com
1755
    name = 'mousebear'
1756
    long_name = 'Mouse Bear Comedy'
1757
    url = 'http://www.mousebearcomedy.com'
1758
    get_first_comic_link = get_a_navi_navifirst
1759
    get_navi_link = get_a_navi_comicnavnext_navinext
1760
1761
    @classmethod
1762
    def get_comic_info(cls, soup, link):
1763
        """Get information about a particular comics."""
1764
        title = soup.find('h2', class_='post-title').string
1765
        author = soup.find("span", class_="post-author").find("a").string
1766
        date_str = soup.find("span", class_="post-date").string
1767
        day = string_to_date(date_str, '%B %d, %Y')
1768
        imgs = soup.find("div", id="comic").find_all("img")
1769
        assert all(i['alt'] == i['title'] == title for i in imgs)
1770
        return {
1771
            'day': day.day,
1772
            'month': day.month,
1773
            'year': day.year,
1774
            'img': [i['src'] for i in imgs],
1775
            'title': title,
1776
            'author': author,
1777
        }
1778 View Code Duplication
1779
1780
class BigFootJustice(GenericNavigableComic):
1781
    """Class to retrieve Big Foot Justice comics."""
1782
    # Also on http://tapastic.com/series/bigfoot-justice
1783
    name = 'bigfoot'
1784
    long_name = 'Big Foot Justice'
1785
    url = 'http://bigfootjustice.com'
1786
    get_first_comic_link = get_a_navi_navifirst
1787
    get_navi_link = get_a_navi_comicnavnext_navinext
1788
1789
    @classmethod
1790
    def get_comic_info(cls, soup, link):
1791
        """Get information about a particular comics."""
1792
        imgs = soup.find('div', id='comic').find_all('img')
1793
        assert all(i['title'] == i['alt'] for i in imgs)
1794
        title = ' '.join(i['title'] for i in imgs)
1795
        return {
1796
            'img': [i['src'] for i in imgs],
1797
            'title': title,
1798
        }
1799
1800
1801
class RespawnComic(GenericNavigableComic):
1802
    """Class to retrieve Respawn Comic."""
1803
    # Also on http://respawncomic.tumblr.com
1804
    name = 'respawn'
1805
    long_name = 'Respawn Comic'
1806
    url = 'http://respawncomic.com '
1807
    _categories = ('RESPAWN', )
1808
    get_navi_link = get_a_rel_next
1809
    get_first_comic_link = simulate_first_link
1810 View Code Duplication
    first_url = 'http://respawncomic.com/comic/c0001/'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1811
1812
    @classmethod
1813
    def get_comic_info(cls, soup, link):
1814
        """Get information about a particular comics."""
1815
        title = soup.find('meta', property='og:title')['content']
1816
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1817
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1818
        date_str = date_str[:10]
1819
        day = string_to_date(date_str, "%Y-%m-%d")
1820
        imgs = soup.find_all('meta', property='og:image')
1821
        skip_imgs = {
1822
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1823
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1824
        }
1825
        return {
1826
            'title': title,
1827
            'author': author,
1828
            'day': day.day,
1829
            'month': day.month,
1830
            'year': day.year,
1831
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1832
        }
1833
1834
1835 View Code Duplication
class SafelyEndangered(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1836
    """Class to retrieve Safely Endangered comics."""
1837
    # Also on http://tumblr.safelyendangered.com
1838
    name = 'endangered'
1839
    long_name = 'Safely Endangered'
1840
    url = 'http://www.safelyendangered.com'
1841
    get_navi_link = get_link_rel_next
1842
    get_first_comic_link = simulate_first_link
1843
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1844
1845
    @classmethod
1846
    def get_comic_info(cls, soup, link):
1847
        """Get information about a particular comics."""
1848
        title = soup.find('h2', class_='post-title').string
1849
        date_str = soup.find('span', class_='post-date').string
1850
        day = string_to_date(date_str, '%B %d, %Y')
1851
        imgs = soup.find('div', id='comic').find_all('img')
1852
        alt = imgs[0]['alt']
1853
        assert all(i['alt'] == i['title'] for i in imgs)
1854
        return {
1855
            'day': day.day,
1856
            'month': day.month,
1857
            'year': day.year,
1858
            'img': [i['src'] for i in imgs],
1859
            'title': title,
1860
            'alt': alt,
1861
        }
1862
1863
1864 View Code Duplication
class PicturesInBoxes(GenericNavigableComic):
1865
    """Class to retrieve Pictures In Boxes comics."""
1866
    # Also on http://picturesinboxescomic.tumblr.com
1867
    name = 'picturesinboxes'
1868
    long_name = 'Pictures in Boxes'
1869
    url = 'http://www.picturesinboxes.com'
1870
    get_navi_link = get_a_navi_navinext
1871
    get_first_comic_link = simulate_first_link
1872
    first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'
1873
1874
    @classmethod
1875
    def get_comic_info(cls, soup, link):
1876
        """Get information about a particular comics."""
1877
        title = soup.find('h2', class_='post-title').string
1878
        author = soup.find("span", class_="post-author").find("a").string
1879
        date_str = soup.find('span', class_='post-date').string
1880
        day = string_to_date(date_str, '%B %d, %Y')
1881
        imgs = soup.find('div', class_='comicpane').find_all('img')
1882
        assert imgs
1883
        assert all(i['title'] == i['alt'] == title for i in imgs)
1884
        return {
1885
            'day': day.day,
1886
            'month': day.month,
1887
            'year': day.year,
1888
            'img': [i['src'] for i in imgs],
1889
            'title': title,
1890
            'author': author,
1891
        }
1892
1893
1894 View Code Duplication
class Penmen(GenericNavigableComic):
1895
    """Class to retrieve Penmen comics."""
1896
    name = 'penmen'
1897
    long_name = 'Penmen'
1898
    url = 'http://penmen.com'
1899
    get_navi_link = get_link_rel_next
1900
    get_first_comic_link = simulate_first_link
1901
    first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/'
1902
1903
    @classmethod
1904
    def get_comic_info(cls, soup, link):
1905
        """Get information about a particular comics."""
1906
        title = soup.find('title').string
1907
        imgs = soup.find('div', class_='entry-content').find_all('img')
1908
        short_url = soup.find('link', rel='shortlink')['href']
1909
        tags = ' '.join(t.string for t in soup.find_all('a', rel='tag'))
1910
        date_str = soup.find('time')['datetime'][:10]
1911
        day = string_to_date(date_str, "%Y-%m-%d")
1912
        return {
1913
            'title': title,
1914
            'short_url': short_url,
1915
            'img': [i['src'] for i in imgs],
1916
            'tags': tags,
1917
            'month': day.month,
1918
            'year': day.year,
1919
            'day': day.day,
1920
        }
1921
1922
1923
class TheDoghouseDiaries(GenericNavigableComic):
1924
    """Class to retrieve The Dog House Diaries comics."""
1925
    name = 'doghouse'
1926
    long_name = 'The Dog House Diaries'
1927
    url = 'http://thedoghousediaries.com'
1928
1929
    @classmethod
1930
    def get_first_comic_link(cls):
1931
        """Get link to first comics."""
1932
        return get_soup_at_url(cls.url).find('a', id='firstlink')
1933
1934
    @classmethod
1935
    def get_navi_link(cls, last_soup, next_):
1936
        """Get link to next or previous comic."""
1937
        return last_soup.find('a', id='nextlink' if next_ else 'previouslink')
1938
1939
    @classmethod
1940
    def get_comic_info(cls, soup, link):
1941
        """Get information about a particular comics."""
1942
        comic_img_re = re.compile('^dhdcomics/.*')
1943
        img = soup.find('img', src=comic_img_re)
1944
        comic_url = cls.get_url_from_link(link)
1945
        return {
1946
            'title': soup.find('h2', id='titleheader').string,
1947
            'title2': soup.find('div', id='subtext').string,
1948
            'alt': img.get('title'),
1949
            'img': [urljoin_wrapper(comic_url, img['src'].strip())],
1950
            'num': int(comic_url.split('/')[-1]),
1951
        }
1952
1953
1954
class InvisibleBread(GenericListableComic):
1955
    """Class to retrieve Invisible Bread comics."""
1956
    # Also on http://www.gocomics.com/invisible-bread
1957
    name = 'invisiblebread'
1958
    long_name = 'Invisible Bread'
1959
    url = 'http://invisiblebread.com'
1960
1961
    @classmethod
1962
    def get_archive_elements(cls):
1963
        archive_url = urljoin_wrapper(cls.url, 'archives/')
1964
        return reversed(get_soup_at_url(archive_url).find_all('td', class_='archive-title'))
1965
1966
    @classmethod
1967
    def get_url_from_archive_element(cls, td):
1968 View Code Duplication
        return td.find('a')['href']
1969
1970
    @classmethod
1971
    def get_comic_info(cls, soup, td):
1972
        """Get information about a particular comics."""
1973
        url = cls.get_url_from_archive_element(td)
1974
        title = td.find('a').string
1975
        month_and_day = td.previous_sibling.string
1976
        link_re = re.compile('^%s/([0-9]+)/' % cls.url)
1977
        year = link_re.match(url).groups()[0]
1978
        date_str = month_and_day + ' ' + year
1979
        day = string_to_date(date_str, '%b %d %Y')
1980
        imgs = [soup.find('div', id='comic').find('img')]
1981
        assert len(imgs) == 1
1982
        assert all(i['title'] == i['alt'] == title for i in imgs)
1983
        return {
1984
            'month': day.month,
1985
            'year': day.year,
1986
            'day': day.day,
1987
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1988
            'title': title,
1989
        }
1990
1991
1992
class DiscoBleach(GenericEmptyComic):  # Does not work anymore
1993
    """Class to retrieve Disco Bleach Comics."""
1994
    name = 'discobleach'
1995
    long_name = 'Disco Bleach'
1996
    url = 'http://discobleach.com'
1997
1998
1999
class TubeyToons(GenericEmptyComic):  # Does not work anymore
2000
    """Class to retrieve TubeyToons comics."""
2001
    # Also on http://tapastic.com/series/Tubey-Toons
2002
    # Also on http://tubeytoons.tumblr.com
2003
    name = 'tubeytoons'
2004
    long_name = 'Tubey Toons'
2005
    url = 'http://tubeytoons.com'
2006
    _categories = ('TUNEYTOONS', )
2007
2008
2009
class CompletelySeriousComics(GenericNavigableComic):
2010
    """Class to retrieve Completely Serious comics."""
2011
    name = 'completelyserious'
2012
    long_name = 'Completely Serious Comics'
2013
    url = 'http://completelyseriouscomics.com'
2014
    get_first_comic_link = get_a_navi_navifirst
2015
    get_navi_link = get_a_navi_navinext
2016
2017
    @classmethod
2018
    def get_comic_info(cls, soup, link):
2019
        """Get information about a particular comics."""
2020
        title = soup.find('h2', class_='post-title').string
2021
        author = soup.find('span', class_='post-author').contents[1].string
2022
        date_str = soup.find('span', class_='post-date').string
2023
        day = string_to_date(date_str, '%B %d, %Y')
2024
        imgs = soup.find('div', class_='comicpane').find_all('img')
2025
        assert imgs
2026
        alt = imgs[0]['title']
2027
        assert all(i['title'] == i['alt'] == alt for i in imgs)
2028
        return {
2029
            'month': day.month,
2030
            'year': day.year,
2031
            'day': day.day,
2032
            'img': [i['src'] for i in imgs],
2033
            'title': title,
2034
            'alt': alt,
2035
            'author': author,
2036
        }
2037 View Code Duplication
2038
2039
class PoorlyDrawnLines(GenericListableComic):
2040
    """Class to retrieve Poorly Drawn Lines comics."""
2041
    # Also on http://pdlcomics.tumblr.com
2042
    name = 'poorlydrawn'
2043
    long_name = 'Poorly Drawn Lines'
2044
    url = 'http://www.poorlydrawnlines.com'
2045
    _categories = ('POORLYDRAWN', )
2046
    get_url_from_archive_element = get_href
2047
2048
    @classmethod
2049
    def get_comic_info(cls, soup, link):
2050
        """Get information about a particular comics."""
2051
        imgs = soup.find('div', class_='post').find_all('img')
2052
        assert len(imgs) <= 1
2053
        return {
2054
            'img': [i['src'] for i in imgs],
2055
            'title': imgs[0].get('title', "") if imgs else "",
2056
        }
2057
2058
    @classmethod
2059
    def get_archive_elements(cls):
2060
        archive_url = urljoin_wrapper(cls.url, 'archive')
2061
        url_re = re.compile('^%s/comic/.' % cls.url)
2062
        return reversed(get_soup_at_url(archive_url).find_all('a', href=url_re))
2063
2064
2065 View Code Duplication
class LoadingComics(GenericNavigableComic):
2066
    """Class to retrieve Loading Artist comics."""
2067
    name = 'loadingartist'
2068
    long_name = 'Loading Artist'
2069
    url = 'http://www.loadingartist.com/latest'
2070
2071
    @classmethod
2072
    def get_first_comic_link(cls):
2073
        """Get link to first comics."""
2074
        return get_soup_at_url(cls.url).find('a', title="First")
2075
2076
    @classmethod
2077
    def get_navi_link(cls, last_soup, next_):
2078
        """Get link to next or previous comic."""
2079
        return last_soup.find('a', title='Next' if next_ else 'Previous')
2080
2081
    @classmethod
2082
    def get_comic_info(cls, soup, link):
2083
        """Get information about a particular comics."""
2084
        title = soup.find('h1').string
2085
        date_str = soup.find('span', class_='date').string.strip()
2086
        day = string_to_date(date_str, "%B %d, %Y")
2087
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
2088
        return {
2089
            'title': title,
2090
            'img': [i['src'] for i in imgs],
2091
            'month': day.month,
2092
            'year': day.year,
2093
            'day': day.day,
2094
        }
2095
2096
2097
class ChuckleADuck(GenericNavigableComic):
2098
    """Class to retrieve Chuckle-A-Duck comics."""
2099
    name = 'chuckleaduck'
2100
    long_name = 'Chuckle-A-duck'
2101
    url = 'http://chuckleaduck.com'
2102
    get_first_comic_link = get_div_navfirst_a
2103
    get_navi_link = get_link_rel_next
2104
2105
    @classmethod
2106
    def get_comic_info(cls, soup, link):
2107
        """Get information about a particular comics."""
2108
        date_str = soup.find('span', class_='post-date').string
2109
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
2110
        author = soup.find('span', class_='post-author').string
2111
        div = soup.find('div', id='comic')
2112
        imgs = div.find_all('img') if div else []
2113
        title = imgs[0]['title'] if imgs else ""
2114
        assert all(i['title'] == i['alt'] == title for i in imgs)
2115
        return {
2116
            'month': day.month,
2117
            'year': day.year,
2118
            'day': day.day,
2119
            'img': [i['src'] for i in imgs],
2120
            'title': title,
2121
            'author': author,
2122
        }
2123
2124
2125
class DepressedAlien(GenericNavigableComic):
2126
    """Class to retrieve Depressed Alien Comics."""
2127
    name = 'depressedalien'
2128
    long_name = 'Depressed Alien'
2129
    url = 'http://depressedalien.com'
2130
    get_url_from_link = join_cls_url_to_href
2131
2132
    @classmethod
2133
    def get_first_comic_link(cls):
2134
        """Get link to first comics."""
2135
        return get_soup_at_url(cls.url).find('img', attrs={'name': 'beginArrow'}).parent
2136
2137
    @classmethod
2138
    def get_navi_link(cls, last_soup, next_):
2139
        """Get link to next or previous comic."""
2140
        return last_soup.find('img', attrs={'name': 'rightArrow' if next_ else 'leftArrow'}).parent
2141
2142
    @classmethod
2143
    def get_comic_info(cls, soup, link):
2144
        """Get information about a particular comics."""
2145
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2146
        imgs = soup.find_all('meta', property='og:image')
2147
        return {
2148
            'title': title,
2149
            'img': [i['content'] for i in imgs],
2150
        }
2151
2152
2153
class ThingsInSquares(GenericListableComic):
2154
    """Class to retrieve Things In Squares comics."""
2155
    # This can be retrieved in other languages
2156
    # Also on https://tapastic.com/series/Things-in-Squares
2157
    name = 'squares'
2158
    long_name = 'Things in squares'
2159
    url = 'http://www.thingsinsquares.com'
2160
2161
    @classmethod
2162
    def get_comic_info(cls, soup, tr):
2163
        """Get information about a particular comics."""
2164
        _, td2, td3 = tr.find_all('td')
2165
        a = td2.find('a')
2166
        date_str = td3.string
2167
        day = string_to_date(date_str, "%m.%d.%y")
2168
        title = a.string
2169
        title2 = soup.find('meta', property='og:title')['content']
2170
        desc = soup.find('meta', property='og:description')
2171
        description = desc['content'] if desc else ''
2172
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2173
        imgs = soup.find('div', class_='entry-content').find_all('img')
2174
        return {
2175
            'day': day.day,
2176
            'month': day.month,
2177
            'year': day.year,
2178
            'title': title,
2179
            'title2': title2,
2180
            'description': description,
2181
            'tags': tags,
2182
            'img': [i['src'] for i in imgs],
2183
            'alt': ' '.join(i['alt'] for i in imgs),
2184
        }
2185
2186
    @classmethod
2187
    def get_url_from_archive_element(cls, tr):
2188
        _, td2, td3 = tr.find_all('td')
2189
        return td2.find('a')['href']
2190
2191
    @classmethod
2192
    def get_archive_elements(cls):
2193
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
2194
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2195
2196
2197
class HappleTea(GenericNavigableComic):
2198
    """Class to retrieve Happle Tea Comics."""
2199
    name = 'happletea'
2200
    long_name = 'Happle Tea'
2201
    url = 'http://www.happletea.com'
2202
    get_first_comic_link = get_a_navi_navifirst
2203
    get_navi_link = get_link_rel_next
2204
2205
    @classmethod
2206
    def get_comic_info(cls, soup, link):
2207
        """Get information about a particular comics."""
2208
        imgs = soup.find('div', id='comic').find_all('img')
2209
        post = soup.find('div', class_='post-content')
2210
        title = post.find('h2', class_='post-title').string
2211
        author = post.find('a', rel='author').string
2212
        date_str = post.find('span', class_='post-date').string
2213
        day = string_to_date(date_str, "%B %d, %Y")
2214
        assert all(i['alt'] == i['title'] for i in imgs)
2215
        return {
2216
            'title': title,
2217
            'img': [i['src'] for i in imgs],
2218
            'alt': ''.join(i['alt'] for i in imgs),
2219
            'month': day.month,
2220
            'year': day.year,
2221
            'day': day.day,
2222
            'author': author,
2223
        }
2224
2225
2226
class RockPaperScissors(GenericNavigableComic):
2227
    """Class to retrieve Rock Paper Scissors comics."""
2228
    name = 'rps'
2229
    long_name = 'Rock Paper Scissors'
2230
    url = 'http://rps-comics.com'
2231
    get_first_comic_link = get_a_navi_navifirst
2232
    get_navi_link = get_link_rel_next
2233
2234
    @classmethod
2235
    def get_comic_info(cls, soup, link):
2236
        """Get information about a particular comics."""
2237
        title = soup.find('title').string
2238
        imgs = soup.find_all('meta', property='og:image')
2239
        short_url = soup.find('link', rel='shortlink')['href']
2240
        transcript = soup.find('div', id='transcript-content').string
2241
        return {
2242
            'title': title,
2243
            'transcript': transcript,
2244
            'short_url': short_url,
2245
            'img': [i['content'] for i in imgs],
2246
        }
2247
2248
2249
class FatAwesomeComics(GenericNavigableComic):
2250
    """Class to retrieve Fat Awesome Comics."""
2251
    # Also on http://fatawesomecomedy.tumblr.com
2252
    name = 'fatawesome'
2253
    long_name = 'Fat Awesome'
2254
    url = 'http://fatawesome.com/comics'
2255
    get_navi_link = get_a_rel_next
2256
    get_first_comic_link = simulate_first_link
2257
    first_url = 'http://fatawesome.com/shortbus/'
2258
2259
    @classmethod
2260
    def get_comic_info(cls, soup, link):
2261
        """Get information about a particular comics."""
2262
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2263
        description = soup.find('meta', attrs={'name': 'description'})['content']
2264
        tags_prop = soup.find('meta', property='article:tag')
2265
        tags = tags_prop['content'] if tags_prop else ""
2266
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2267
        day = string_to_date(date_str, "%Y-%m-%d")
2268
        imgs = soup.find_all('img', attrs={'data-recalc-dims': "1"})
2269
        assert len(imgs) == 1
2270
        return {
2271
            'title': title,
2272
            'description': description,
2273
            'tags': tags,
2274
            'alt': "".join(i['alt'] for i in imgs),
2275
            'img': [i['src'].rsplit('?', 1)[0] for i in imgs],
2276
            'month': day.month,
2277
            'year': day.year,
2278
            'day': day.day,
2279
        }
2280
2281
2282
class AnythingComic(GenericListableComic):
2283
    """Class to retrieve Anything Comics."""
2284
    # Also on http://tapastic.com/series/anything
2285
    name = 'anythingcomic'
2286
    long_name = 'Anything Comic'
2287
    url = 'http://www.anythingcomic.com'
2288
2289
    @classmethod
2290
    def get_archive_elements(cls):
2291
        archive_url = urljoin_wrapper(cls.url, 'archive/')
2292
        # The first 2 <tr>'s do not correspond to comics
2293
        return get_soup_at_url(archive_url).find('table', id='chapter_table').find_all('tr')[2:]
2294
2295
    @classmethod
2296
    def get_url_from_archive_element(cls, tr):
2297
        """Get url corresponding to an archive element."""
2298
        td_num, td_comic, td_date, _ = tr.find_all('td')
2299
        link = td_comic.find('a')
2300 View Code Duplication
        return urljoin_wrapper(cls.url, link['href'])
2301
2302
    @classmethod
2303
    def get_comic_info(cls, soup, tr):
2304
        """Get information about a particular comics."""
2305
        td_num, td_comic, td_date, _ = tr.find_all('td')
2306
        num = int(td_num.string)
2307
        link = td_comic.find('a')
2308
        title = link.string
2309
        imgs = soup.find_all('img', id='comic_image')
2310
        date_str = td_date.string
2311
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y, %I:%M %p")
2312
        assert len(imgs) == 1
2313
        assert all(i.get('alt') == i.get('title') for i in imgs)
2314
        return {
2315
            'num': num,
2316
            'title': title,
2317
            'alt': imgs[0].get('alt', ''),
2318
            'img': [i['src'] for i in imgs],
2319
            'month': day.month,
2320
            'year': day.year,
2321
            'day': day.day,
2322
        }
2323
2324
2325
class LonnieMillsap(GenericNavigableComic):
2326
    """Class to retrieve Lonnie Millsap's comics."""
2327
    name = 'millsap'
2328
    long_name = 'Lonnie Millsap'
2329
    url = 'http://www.lonniemillsap.com'
2330
    get_navi_link = get_link_rel_next
2331
    get_first_comic_link = simulate_first_link
2332
    first_url = 'http://www.lonniemillsap.com/?p=42'
2333
2334
    @classmethod
2335
    def get_comic_info(cls, soup, link):
2336
        """Get information about a particular comics."""
2337
        title = soup.find('h2', class_='post-title').string
2338
        post = soup.find('div', class_='post-content')
2339
        author = post.find("span", class_="post-author").find("a").string
2340
        date_str = post.find("span", class_="post-date").string
2341
        day = string_to_date(date_str, "%B %d, %Y")
2342
        imgs = post.find("div", class_="entry").find_all("img")
2343
        return {
2344
            'title': title,
2345
            'author': author,
2346
            'img': [i['src'] for i in imgs],
2347
            'month': day.month,
2348
            'year': day.year,
2349
            'day': day.day,
2350
        }
2351
2352
2353 View Code Duplication
class LinsEditions(GenericNavigableComic):
2354
    """Class to retrieve L.I.N.S. Editions comics."""
2355
    # Also on http://linscomics.tumblr.com
2356
    # Now on https://warandpeas.com
2357
    name = 'lins'
2358
    long_name = 'L.I.N.S. Editions'
2359
    url = 'https://linsedition.com'
2360
    _categories = ('LINS', )
2361
    get_navi_link = get_link_rel_next
2362
    get_first_comic_link = simulate_first_link
2363
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2364
2365
    @classmethod
2366
    def get_comic_info(cls, soup, link):
2367
        """Get information about a particular comics."""
2368
        title = soup.find('meta', property='og:title')['content']
2369
        imgs = soup.find_all('meta', property='og:image')
2370
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2371
        day = string_to_date(date_str, "%Y-%m-%d")
2372
        return {
2373
            'title': title,
2374
            'img': [i['content'] for i in imgs],
2375
            'month': day.month,
2376
            'year': day.year,
2377
            'day': day.day,
2378
        }
2379
2380
2381
class ThorsThundershack(GenericNavigableComic):
2382
    """Class to retrieve Thor's Thundershack comics."""
2383
    # Also on http://tapastic.com/series/Thors-Thundershac
2384
    name = 'thor'
2385
    long_name = 'Thor\'s Thundershack'
2386
    url = 'http://www.thorsthundershack.com'
2387
    _categories = ('THOR', )
2388
    get_url_from_link = join_cls_url_to_href
2389
2390
    @classmethod
2391
    def get_first_comic_link(cls):
2392
        """Get link to first comics."""
2393
        return get_soup_at_url(cls.url).find('a', class_='first navlink')
2394
2395
    @classmethod
2396
    def get_navi_link(cls, last_soup, next_):
2397
        """Get link to next or previous comic."""
2398
        for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
2399
            if link['href'] != '/comic':
2400
                return link
2401
        return None
2402
2403
    @classmethod
2404
    def get_comic_info(cls, soup, link):
2405
        """Get information about a particular comics."""
2406
        title = soup.find('meta', attrs={'name': 'description'})["content"]
2407
        description = soup.find('div', itemprop='articleBody').text
2408
        author = soup.find('span', itemprop='author copyrightHolder').string
2409
        imgs = soup.find_all('img', itemprop='image')
2410
        assert all(i['title'] == i['alt'] for i in imgs)
2411
        alt = imgs[0]['alt'] if imgs else ""
2412
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
2413
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2414
        return {
2415
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2416
            'month': day.month,
2417
            'year': day.year,
2418
            'day': day.day,
2419
            'author': author,
2420
            'title': title,
2421
            'alt': alt,
2422
            'description': description,
2423
        }
2424
2425
2426
class GerbilWithAJetpack(GenericNavigableComic):
2427
    """Class to retrieve GerbilWithAJetpack comics."""
2428
    name = 'gerbil'
2429
    long_name = 'Gerbil With A Jetpack'
2430
    url = 'http://gerbilwithajetpack.com'
2431
    get_first_comic_link = get_a_navi_navifirst
2432
    get_navi_link = get_a_rel_next
2433
2434
    @classmethod
2435
    def get_comic_info(cls, soup, link):
2436
        """Get information about a particular comics."""
2437
        title = soup.find('h2', class_='post-title').string
2438
        author = soup.find("span", class_="post-author").find("a").string
2439
        date_str = soup.find("span", class_="post-date").string
2440
        day = string_to_date(date_str, "%B %d, %Y")
2441
        imgs = soup.find("div", id="comic").find_all("img")
2442
        alt = imgs[0]['alt']
2443
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2444
        return {
2445
            'img': [i['src'] for i in imgs],
2446
            'title': title,
2447
            'alt': alt,
2448
            'author': author,
2449
            'day': day.day,
2450
            'month': day.month,
2451
            'year': day.year
2452
        }
2453
2454
2455 View Code Duplication
class EveryDayBlues(GenericNavigableComic):
2456
    """Class to retrieve EveryDayBlues Comics."""
2457
    name = "blues"
2458
    long_name = "Every Day Blues"
2459
    url = "http://everydayblues.net"
2460
    get_first_comic_link = get_a_navi_navifirst
2461
    get_navi_link = get_link_rel_next
2462
2463
    @classmethod
2464
    def get_comic_info(cls, soup, link):
2465
        """Get information about a particular comics."""
2466
        title = soup.find("h2", class_="post-title").string
2467
        author = soup.find("span", class_="post-author").find("a").string
2468
        date_str = soup.find("span", class_="post-date").string
2469
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2470
        imgs = soup.find("div", id="comic").find_all("img")
2471
        assert all(i['alt'] == i['title'] == title for i in imgs)
2472
        assert len(imgs) <= 1
2473
        return {
2474
            'img': [i['src'] for i in imgs],
2475
            'title': title,
2476
            'author': author,
2477
            'day': day.day,
2478
            'month': day.month,
2479
            'year': day.year
2480
        }
2481
2482
2483
class BiterComics(GenericNavigableComic):
2484
    """Class to retrieve Biter Comics."""
2485
    name = "biter"
2486
    long_name = "Biter Comics"
2487
    url = "http://www.bitercomics.com"
2488
    get_first_comic_link = get_a_navi_navifirst
2489
    get_navi_link = get_link_rel_next
2490
2491
    @classmethod
2492
    def get_comic_info(cls, soup, link):
2493
        """Get information about a particular comics."""
2494
        title = soup.find("h1", class_="entry-title").string
2495
        author = soup.find("span", class_="author vcard").find("a").string
2496
        date_str = soup.find("span", class_="entry-date").string
2497
        day = string_to_date(date_str, "%B %d, %Y")
2498
        imgs = soup.find("div", id="comic").find_all("img")
2499
        assert all(i['alt'] == i['title'] for i in imgs)
2500
        assert len(imgs) == 1
2501
        alt = imgs[0]['alt']
2502
        return {
2503
            'img': [i['src'] for i in imgs],
2504
            'title': title,
2505
            'alt': alt,
2506
            'author': author,
2507
            'day': day.day,
2508
            'month': day.month,
2509
            'year': day.year
2510
        }
2511
2512
2513 View Code Duplication
class TheAwkwardYeti(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2514
    """Class to retrieve The Awkward Yeti comics."""
2515
    # Also on http://www.gocomics.com/the-awkward-yeti
2516
    # Also on http://larstheyeti.tumblr.com
2517
    # Also on https://tapastic.com/series/TheAwkwardYeti
2518
    name = 'yeti'
2519
    long_name = 'The Awkward Yeti'
2520
    url = 'http://theawkwardyeti.com'
2521
    _categories = ('YETI', )
2522
    get_first_comic_link = get_a_navi_navifirst
2523
    get_navi_link = get_link_rel_next
2524
2525
    @classmethod
2526
    def get_comic_info(cls, soup, link):
2527
        """Get information about a particular comics."""
2528
        title = soup.find('h2', class_='post-title').string
2529
        date_str = soup.find("span", class_="post-date").string
2530
        day = string_to_date(date_str, "%B %d, %Y")
2531
        imgs = soup.find("div", id="comic").find_all("img")
2532
        assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
2533
        return {
2534
            'img': [i['src'] for i in imgs],
2535
            'title': title,
2536
            'day': day.day,
2537
            'month': day.month,
2538
            'year': day.year
2539
        }
2540
2541
2542
class PleasantThoughts(GenericNavigableComic):
2543
    """Class to retrieve Pleasant Thoughts comics."""
2544
    name = 'pleasant'
2545
    long_name = 'Pleasant Thoughts'
2546
    url = 'http://pleasant-thoughts.com'
2547
    get_first_comic_link = get_a_navi_navifirst
2548
    get_navi_link = get_link_rel_next
2549
2550
    @classmethod
2551
    def get_comic_info(cls, soup, link):
2552
        """Get information about a particular comics."""
2553
        post = soup.find('div', class_='post-content')
2554
        title = post.find('h2', class_='post-title').string
2555
        imgs = post.find("div", class_="entry").find_all("img")
2556
        return {
2557
            'title': title,
2558
            'img': [i['src'] for i in imgs],
2559
        }
2560
2561
2562
class MisterAndMe(GenericNavigableComic):
2563
    """Class to retrieve Mister & Me Comics."""
2564
    # Also on http://www.gocomics.com/mister-and-me
2565
    # Also on https://tapastic.com/series/Mister-and-Me
2566
    name = 'mister'
2567
    long_name = 'Mister & Me'
2568
    url = 'http://www.mister-and-me.com'
2569
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2570
    get_navi_link = get_link_rel_next
2571
2572
    @classmethod
2573
    def get_comic_info(cls, soup, link):
2574
        """Get information about a particular comics."""
2575
        title = soup.find('h2', class_='post-title').string
2576
        author = soup.find("span", class_="post-author").find("a").string
2577
        date_str = soup.find("span", class_="post-date").string
2578
        day = string_to_date(date_str, "%B %d, %Y")
2579
        imgs = soup.find("div", id="comic").find_all("img")
2580
        assert all(i['alt'] == i['title'] for i in imgs)
2581
        assert len(imgs) <= 1
2582
        alt = imgs[0]['alt'] if imgs else ""
2583
        return {
2584
            'img': [i['src'] for i in imgs],
2585
            'title': title,
2586
            'alt': alt,
2587
            'author': author,
2588
            'day': day.day,
2589
            'month': day.month,
2590
            'year': day.year
2591
        }
2592
2593
2594
class LastPlaceComics(GenericNavigableComic):
2595
    """Class to retrieve Last Place Comics."""
2596
    name = 'lastplace'
2597
    long_name = 'Last Place Comics'
2598
    url = "http://lastplacecomics.com"
2599
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2600
    get_navi_link = get_link_rel_next
2601
2602
    @classmethod
2603
    def get_comic_info(cls, soup, link):
2604
        """Get information about a particular comics."""
2605
        title = soup.find('h2', class_='post-title').string
2606
        author = soup.find("span", class_="post-author").find("a").string
2607
        date_str = soup.find("span", class_="post-date").string
2608
        day = string_to_date(date_str, "%B %d, %Y")
2609
        imgs = soup.find("div", id="comic").find_all("img")
2610
        assert all(i['alt'] == i['title'] for i in imgs)
2611
        assert len(imgs) <= 1
2612
        alt = imgs[0]['alt'] if imgs else ""
2613
        return {
2614
            'img': [i['src'] for i in imgs],
2615
            'title': title,
2616
            'alt': alt,
2617
            'author': author,
2618
            'day': day.day,
2619
            'month': day.month,
2620
            'year': day.year
2621
        }
2622
2623
2624
class TalesOfAbsurdity(GenericNavigableComic):
2625
    """Class to retrieve Tales Of Absurdity comics."""
2626
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
2627
    # Also on http://talesofabsurdity.tumblr.com
2628
    name = 'absurdity'
2629
    long_name = 'Tales of Absurdity'
2630
    url = 'http://talesofabsurdity.com'
2631
    _categories = ('ABSURDITY', )
2632
    get_first_comic_link = get_a_navi_navifirst
2633
    get_navi_link = get_a_navi_comicnavnext_navinext
2634
2635
    @classmethod
2636
    def get_comic_info(cls, soup, link):
2637
        """Get information about a particular comics."""
2638
        title = soup.find('h2', class_='post-title').string
2639
        author = soup.find("span", class_="post-author").find("a").string
2640
        date_str = soup.find("span", class_="post-date").string
2641
        day = string_to_date(date_str, "%B %d, %Y")
2642
        imgs = soup.find("div", id="comic").find_all("img")
2643
        assert all(i['alt'] == i['title'] for i in imgs)
2644
        alt = imgs[0]['alt'] if imgs else ""
2645
        return {
2646
            'img': [i['src'] for i in imgs],
2647
            'title': title,
2648
            'alt': alt,
2649
            'author': author,
2650
            'day': day.day,
2651
            'month': day.month,
2652
            'year': day.year
2653
        }
2654
2655
2656
class EndlessOrigami(GenericEmptyComic, GenericNavigableComic):
2657
    """Class to retrieve Endless Origami Comics."""
2658
    name = "origami"
2659
    long_name = "Endless Origami"
2660
    url = "http://endlessorigami.com"
2661
    get_first_comic_link = get_a_navi_navifirst
2662
    get_navi_link = get_link_rel_next
2663
2664
    @classmethod
2665
    def get_comic_info(cls, soup, link):
2666
        """Get information about a particular comics."""
2667
        title = soup.find('h2', class_='post-title').string
2668
        author = soup.find("span", class_="post-author").find("a").string
2669
        date_str = soup.find("span", class_="post-date").string
2670
        day = string_to_date(date_str, "%B %d, %Y")
2671
        imgs = soup.find("div", id="comic").find_all("img")
2672
        assert all(i['alt'] == i['title'] for i in imgs)
2673
        alt = imgs[0]['alt'] if imgs else ""
2674
        return {
2675
            'img': [i['src'] for i in imgs],
2676
            'title': title,
2677
            'alt': alt,
2678
            'author': author,
2679
            'day': day.day,
2680
            'month': day.month,
2681
            'year': day.year
2682
        }
2683
2684
2685
class PlanC(GenericNavigableComic):
2686
    """Class to retrieve Plan C comics."""
2687
    name = 'planc'
2688
    long_name = 'Plan C'
2689
    url = 'http://www.plancomic.com'
2690
    get_first_comic_link = get_a_navi_navifirst
2691
    get_navi_link = get_a_navi_comicnavnext_navinext
2692
2693
    @classmethod
2694
    def get_comic_info(cls, soup, link):
2695
        """Get information about a particular comics."""
2696
        title = soup.find('h2', class_='post-title').string
2697
        date_str = soup.find("span", class_="post-date").string
2698
        day = string_to_date(date_str, "%B %d, %Y")
2699
        imgs = soup.find('div', id='comic').find_all('img')
2700
        return {
2701
            'title': title,
2702
            'img': [i['src'] for i in imgs],
2703
            'month': day.month,
2704
            'year': day.year,
2705
            'day': day.day,
2706
        }
2707 View Code Duplication
2708
2709
class BuniComic(GenericNavigableComic):
2710
    """Class to retrieve Buni Comics."""
2711
    name = 'buni'
2712
    long_name = 'BuniComics'
2713
    url = 'http://www.bunicomic.com'
2714
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2715
    get_navi_link = get_link_rel_next
2716
2717
    @classmethod
2718
    def get_comic_info(cls, soup, link):
2719
        """Get information about a particular comics."""
2720
        imgs = soup.find('div', id='comic').find_all('img')
2721
        assert all(i['alt'] == i['title'] for i in imgs)
2722
        assert len(imgs) == 1
2723
        return {
2724
            'img': [i['src'] for i in imgs],
2725
            'title': imgs[0]['title'],
2726
        }
2727
2728
2729
class GenericCommitStrip(GenericNavigableComic):
2730
    """Generic class to retrieve Commit Strips in different languages."""
2731
    get_navi_link = get_a_rel_next
2732
    get_first_comic_link = simulate_first_link
2733
    first_url = NotImplemented
2734
2735
    @classmethod
2736
    def get_comic_info(cls, soup, link):
2737
        """Get information about a particular comics."""
2738
        desc = soup.find('meta', property='og:description')['content']
2739
        title = soup.find('meta', property='og:title')['content']
2740
        imgs = soup.find('div', class_='entry-content').find_all('img')
2741
        title2 = ' '.join(i.get('title', '') for i in imgs)
2742
        return {
2743
            'title': title,
2744
            'title2': title2,
2745
            'description': desc,
2746
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2747
        }
2748
2749
2750
class CommitStripFr(GenericCommitStrip):
2751
    """Class to retrieve Commit Strips in French."""
2752
    name = 'commit_fr'
2753
    long_name = 'Commit Strip (Fr)'
2754
    url = 'http://www.commitstrip.com/fr'
2755
    _categories = ('FRANCAIS', )
2756
    first_url = 'http://www.commitstrip.com/fr/2012/02/22/interview/'
2757
2758
2759
class CommitStripEn(GenericCommitStrip):
2760
    """Class to retrieve Commit Strips in English."""
2761
    name = 'commit_en'
2762
    long_name = 'Commit Strip (En)'
2763
    url = 'http://www.commitstrip.com/en'
2764
    first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'
2765
2766
2767 View Code Duplication
class GenericBoumerie(GenericNavigableComic):
2768
    """Generic class to retrieve Boumeries comics in different languages."""
2769
    get_first_comic_link = get_a_navi_navifirst
2770
    get_navi_link = get_link_rel_next
2771
    date_format = NotImplemented
2772
    lang = NotImplemented
2773
2774
    @classmethod
2775
    def get_comic_info(cls, soup, link):
2776
        """Get information about a particular comics."""
2777
        title = soup.find('h2', class_='post-title').string
2778
        short_url = soup.find('link', rel='shortlink')['href']
2779
        author = soup.find("span", class_="post-author").find("a").string
2780
        date_str = soup.find('span', class_='post-date').string
2781
        day = string_to_date(date_str, cls.date_format, cls.lang)
2782
        imgs = soup.find('div', id='comic').find_all('img')
2783
        assert all(i['alt'] == i['title'] for i in imgs)
2784
        return {
2785
            'short_url': short_url,
2786
            'img': [i['src'] for i in imgs],
2787
            'title': title,
2788
            'author': author,
2789
            'month': day.month,
2790
            'year': day.year,
2791
            'day': day.day,
2792
        }
2793
2794
2795
class BoumerieEn(GenericBoumerie):
2796
    """Class to retrieve Boumeries comics in English."""
2797
    name = 'boumeries_en'
2798
    long_name = 'Boumeries (En)'
2799
    url = 'http://comics.boumerie.com'
2800
    date_format = "%B %d, %Y"
2801
    lang = 'en_GB.UTF-8'
2802
2803
2804
class BoumerieFr(GenericBoumerie):
2805
    """Class to retrieve Boumeries comics in French."""
2806
    name = 'boumeries_fr'
2807
    long_name = 'Boumeries (Fr)'
2808
    url = 'http://bd.boumerie.com'
2809
    _categories = ('FRANCAIS', )
2810
    date_format = "%A, %d %B %Y"
2811
    lang = "fr_FR.utf8"
2812
2813
2814 View Code Duplication
class UnearthedComics(GenericNavigableComic):
2815
    """Class to retrieve Unearthed comics."""
2816
    # Also on http://tapastic.com/series/UnearthedComics
2817
    # Also on http://unearthedcomics.tumblr.com
2818
    name = 'unearthed'
2819
    long_name = 'Unearthed Comics'
2820
    url = 'http://unearthedcomics.com'
2821
    _categories = ('UNEARTHED', )
2822
    get_navi_link = get_link_rel_next
2823
    get_first_comic_link = simulate_first_link
2824
    first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'
2825
2826
    @classmethod
2827
    def get_comic_info(cls, soup, link):
2828
        """Get information about a particular comics."""
2829
        short_url = soup.find('link', rel='shortlink')['href']
2830
        title_elt = soup.find('h1') or soup.find('h2')
2831
        title = title_elt.string if title_elt else ""
2832
        desc = soup.find('meta', property='og:description')
2833
        date_str = soup.find('time', class_='published updated hidden')['datetime']
2834
        day = string_to_date(date_str, "%Y-%m-%d")
2835
        post = soup.find('div', class_="entry content entry-content type-portfolio")
2836
        imgs = post.find_all('img')
2837
        return {
2838
            'title': title,
2839
            'description': desc,
2840
            'url2': short_url,
2841
            'img': [i['src'] for i in imgs],
2842
            'month': day.month,
2843
            'year': day.year,
2844
            'day': day.day,
2845
        }
2846
2847
2848
class Optipess(GenericNavigableComic):
2849
    """Class to retrieve Optipess comics."""
2850
    name = 'optipess'
2851
    long_name = 'Optipess'
2852
    url = 'http://www.optipess.com'
2853
    get_first_comic_link = get_a_navi_navifirst
2854
    get_navi_link = get_link_rel_next
2855
2856
    @classmethod
2857
    def get_comic_info(cls, soup, link):
2858
        """Get information about a particular comics."""
2859
        title = soup.find('h2', class_='post-title').string
2860
        author = soup.find("span", class_="post-author").find("a").string
2861
        comic = soup.find('div', id='comic')
2862
        imgs = comic.find_all('img') if comic else []
2863
        alt = imgs[0]['title'] if imgs else ""
2864
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2865
        date_str = soup.find('span', class_='post-date').string
2866
        day = string_to_date(date_str, "%B %d, %Y")
2867
        return {
2868
            'title': title,
2869
            'alt': alt,
2870
            'author': author,
2871
            'img': [i['src'] for i in imgs],
2872
            'month': day.month,
2873
            'year': day.year,
2874
            'day': day.day,
2875
        }
2876
2877
2878
class PainTrainComic(GenericNavigableComic):
2879
    """Class to retrieve Pain Train Comics."""
2880
    name = 'paintrain'
2881
    long_name = 'Pain Train Comics'
2882
    url = 'http://paintraincomic.com'
2883
    get_first_comic_link = get_a_navi_navifirst
2884
    get_navi_link = get_link_rel_next
2885
2886
    @classmethod
2887
    def get_comic_info(cls, soup, link):
2888
        """Get information about a particular comics."""
2889
        title = soup.find('h2', class_='post-title').string
2890
        short_url = soup.find('link', rel='shortlink')['href']
2891
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2892
        num = int(short_url_re.match(short_url).groups()[0])
2893
        imgs = soup.find('div', id='comic').find_all('img')
2894
        alt = imgs[0]['title']
2895
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2896
        date_str = soup.find('span', class_='post-date').string
2897
        day = string_to_date(date_str, "%d/%m/%Y")
2898
        return {
2899
            'short_url': short_url,
2900
            'num': num,
2901
            'img': [i['src'] for i in imgs],
2902
            'month': day.month,
2903
            'year': day.year,
2904
            'day': day.day,
2905
            'alt': alt,
2906
            'title': title,
2907
        }
2908
2909
2910
class MoonBeard(GenericNavigableComic):
2911
    """Class to retrieve MoonBeard comics."""
2912
    # Also on http://blog.squiresjam.es/moonbeard
2913
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
2914
    name = 'moonbeard'
2915
    long_name = 'Moon Beard'
2916
    url = 'http://moonbeard.com'
2917
    get_first_comic_link = get_a_navi_navifirst
2918
    get_navi_link = get_a_navi_navinext
2919
2920
    @classmethod
2921
    def get_comic_info(cls, soup, link):
2922
        """Get information about a particular comics."""
2923
        title = soup.find('h2', class_='post-title').string
2924
        short_url = soup.find('link', rel='shortlink')['href']
2925
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2926
        num = int(short_url_re.match(short_url).groups()[0])
2927
        imgs = soup.find('div', id='comic').find_all('img')
2928
        alt = imgs[0]['title']
2929
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2930
        date_str = soup.find('span', class_='post-date').string
2931
        day = string_to_date(date_str, "%B %d, %Y")
2932
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2933
        author = soup.find('span', class_='post-author').string
2934
        return {
2935
            'short_url': short_url,
2936
            'num': num,
2937
            'img': [i['src'] for i in imgs],
2938
            'month': day.month,
2939
            'year': day.year,
2940
            'day': day.day,
2941
            'title': title,
2942
            'tags': tags,
2943
            'alt': alt,
2944
            'author': author,
2945
        }
2946
2947
2948
class AHammADay(GenericNavigableComic):
2949
    """Class to retrieve class A Hamm A Day comics."""
2950
    name = 'hamm'
2951
    long_name = 'A Hamm A Day'
2952
    url = 'http://www.ahammaday.com'
2953
    get_url_from_link = join_cls_url_to_href
2954
    get_first_comic_link = simulate_first_link
2955
    first_url = 'http://www.ahammaday.com/today/3/6/french'
2956
2957
    @classmethod
2958
    def get_navi_link(cls, last_soup, next_):
2959
        """Get link to next or previous comic."""
2960
        # prev is next / next is prev
2961
        return last_soup.find('li', class_='previous' if next_ else 'next').find('a')
2962
2963
    @classmethod
2964
    def get_comic_info(cls, soup, link):
2965
        """Get information about a particular comics."""
2966
        date_str = soup.find('time', class_='published')['datetime']
2967
        day = string_to_date(date_str, "%Y-%m-%d")
2968
        author = soup.find('span', class_='blog-author').find('a').string
2969
        title = soup.find('meta', property='og:title')['content']
2970
        imgs = soup.find_all('meta', itemprop='image')
2971
        return {
2972
            'img': [i['content'] for i in imgs],
2973
            'title': title,
2974
            'author': author,
2975
            'day': day.day,
2976
            'month': day.month,
2977
            'year': day.year,
2978
        }
2979
2980
2981 View Code Duplication
class LittleLifeLines(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2982
    """Class to retrieve Little Life Lines comics."""
2983
    # Also on https://little-life-lines.tumblr.com
2984
    name = 'life'
2985
    long_name = 'Little Life Lines'
2986
    url = 'http://www.littlelifelines.com'
2987
    get_url_from_link = join_cls_url_to_href
2988
    get_first_comic_link = simulate_first_link
2989
    first_url = 'http://www.littlelifelines.com/comics/well-done'
2990
2991
    @classmethod
2992
    def get_navi_link(cls, last_soup, next_):
2993
        """Get link to next or previous comic."""
2994
        # prev is next / next is prev
2995
        li = last_soup.find('li', class_='prev' if next_ else 'next')
2996
        return li.find('a') if li else None
2997
2998
    @classmethod
2999
    def get_comic_info(cls, soup, link):
3000
        """Get information about a particular comics."""
3001
        title = soup.find('meta', property='og:title')['content']
3002
        desc = soup.find('meta', property='og:description')['content']
3003
        date_str = soup.find('time', class_='published')['datetime']
3004
        day = string_to_date(date_str, "%Y-%m-%d")
3005
        author = soup.find('a', rel='author').string
3006
        div_content = soup.find('div', class_="body entry-content")
3007
        imgs = div_content.find_all('img')
3008
        imgs = [i for i in imgs if i.get('src') is not None]
3009
        alt = imgs[0]['alt']
3010
        return {
3011
            'title': title,
3012
            'alt': alt,
3013
            'description': desc,
3014
            'author': author,
3015
            'day': day.day,
3016
            'month': day.month,
3017
            'year': day.year,
3018
            'img': [i['src'] for i in imgs],
3019
        }
3020
3021
3022
class GenericWordPressInkblot(GenericNavigableComic):
3023
    """Generic class to retrieve comics using WordPress with Inkblot."""
3024
    get_navi_link = get_link_rel_next
3025
3026
    @classmethod
3027
    def get_first_comic_link(cls):
3028
        """Get link to first comics."""
3029
        return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')
3030
3031
    @classmethod
3032
    def get_comic_info(cls, soup, link):
3033
        """Get information about a particular comics."""
3034
        title = soup.find('meta', property='og:title')['content']
3035
        imgs = soup.find('div', class_='webcomic-image').find_all('img')
3036
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3037
        day = string_to_date(date_str, "%Y-%m-%d")
3038
        return {
3039
            'title': title,
3040
            'day': day.day,
3041
            'month': day.month,
3042
            'year': day.year,
3043
            'img': [i['src'] for i in imgs],
3044
        }
3045
3046
3047
class EverythingsStupid(GenericWordPressInkblot):
3048
    """Class to retrieve Everything's stupid Comics."""
3049
    # Also on http://tapastic.com/series/EverythingsStupid
3050
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
3051
    # Also on http://everythingsstupidcomics.tumblr.com
3052
    name = 'stupid'
3053
    long_name = "Everything's Stupid"
3054
    url = 'http://everythingsstupid.net'
3055
3056
3057
class TheIsmComics(GenericWordPressInkblot):
3058
    """Class to retrieve The Ism Comics."""
3059
    # Also on https://tapastic.com/series/TheIsm (?)
3060
    name = 'theism'
3061
    long_name = "The Ism"
3062
    url = 'http://www.theism-comics.com'
3063
3064
3065
class WoodenPlankStudios(GenericWordPressInkblot):
3066
    """Class to retrieve Wooden Plank Studios comics."""
3067
    name = 'woodenplank'
3068
    long_name = 'Wooden Plank Studios'
3069
    url = 'http://woodenplankstudios.com'
3070
3071
3072
class ElectricBunnyComic(GenericNavigableComic):
3073
    """Class to retrieve Electric Bunny Comics."""
3074
    # Also on http://electricbunnycomics.tumblr.com
3075
    name = 'bunny'
3076
    long_name = 'Electric Bunny Comic'
3077
    url = 'http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell'
3078
    get_url_from_link = join_cls_url_to_href
3079
3080
    @classmethod
3081
    def get_first_comic_link(cls):
3082
        """Get link to first comics."""
3083
        return get_soup_at_url(cls.url).find('img', alt='First').parent
3084
3085
    @classmethod
3086
    def get_navi_link(cls, last_soup, next_):
3087
        """Get link to next or previous comic."""
3088
        img = last_soup.find('img', alt='Next' if next_ else 'Back')
3089
        return img.parent if img else None
3090
3091
    @classmethod
3092
    def get_comic_info(cls, soup, link):
3093
        """Get information about a particular comics."""
3094
        title = soup.find('meta', property='og:title')['content']
3095
        imgs = soup.find_all('meta', property='og:image')
3096
        return {
3097
            'title': title,
3098
            'img': [i['content'] for i in imgs],
3099
        }
3100
3101
3102
class SheldonComics(GenericNavigableComic):
3103
    """Class to retrieve Sheldon comics."""
3104
    # Also on http://www.gocomics.com/sheldon
3105
    name = 'sheldon'
3106
    long_name = 'Sheldon Comics'
3107
    url = 'http://www.sheldoncomics.com'
3108
3109
    @classmethod
3110
    def get_first_comic_link(cls):
3111
        """Get link to first comics."""
3112
        return get_soup_at_url(cls.url).find("a", id="nav-first")
3113
3114
    @classmethod
3115
    def get_navi_link(cls, last_soup, next_):
3116
        """Get link to next or previous comic."""
3117
        for link in last_soup.find_all("a", id="nav-next" if next_ else "nav-prev"):
3118
            if link['href'] != 'http://www.sheldoncomics.com':
3119
                return link
3120
        return None
3121
3122
    @classmethod
3123
    def get_comic_info(cls, soup, link):
3124
        """Get information about a particular comics."""
3125
        imgs = soup.find("div", id="comic-foot").find_all("img")
3126
        assert all(i['alt'] == i['title'] for i in imgs)
3127
        assert len(imgs) == 1
3128
        title = imgs[0]['title']
3129
        return {
3130
            'title': title,
3131
            'img': [i['src'] for i in imgs],
3132
        }
3133
3134
3135
class Ubertool(GenericNavigableComic):
3136
    """Class to retrieve Ubertool comics."""
3137
    # Also on http://ubertool.tumblr.com
3138
    # Also on https://tapastic.com/series/ubertool
3139
    name = 'ubertool'
3140
    long_name = 'Ubertool'
3141
    url = 'http://ubertoolcomic.com'
3142
    _categories = ('UBERTOOL', )
3143
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
3144
    get_navi_link = get_a_comicnavbase_comicnavnext
3145
3146
    @classmethod
3147
    def get_comic_info(cls, soup, link):
3148
        """Get information about a particular comics."""
3149
        title = soup.find('h2', class_='post-title').string
3150
        date_str = soup.find('span', class_='post-date').string
3151
        day = string_to_date(date_str, "%B %d, %Y")
3152
        imgs = soup.find('div', id='comic').find_all('img')
3153
        return {
3154
            'img': [i['src'] for i in imgs],
3155
            'title': title,
3156
            'month': day.month,
3157
            'year': day.year,
3158
            'day': day.day,
3159
        }
3160
3161
3162 View Code Duplication
class EarthExplodes(GenericNavigableComic):
3163
    """Class to retrieve The Earth Explodes comics."""
3164
    name = 'earthexplodes'
3165
    long_name = 'The Earth Explodes'
3166
    url = 'http://www.earthexplodes.com'
3167
    get_url_from_link = join_cls_url_to_href
3168
    get_first_comic_link = simulate_first_link
3169
    first_url = 'http://www.earthexplodes.com/comics/000/'
3170
3171
    @classmethod
3172
    def get_navi_link(cls, last_soup, next_):
3173
        """Get link to next or previous comic."""
3174
        return last_soup.find('a', id='next' if next_ else 'prev')
3175
3176
    @classmethod
3177
    def get_comic_info(cls, soup, link):
3178
        """Get information about a particular comics."""
3179
        title = soup.find('title').string
3180
        imgs = soup.find('div', id='image').find_all('img')
3181
        alt = imgs[0].get('title', '')
3182
        return {
3183
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3184
            'title': title,
3185
            'alt': alt,
3186
        }
3187
3188
3189 View Code Duplication
class PomComics(GenericNavigableComic):
3190
    """Class to retrieve PomComics."""
3191
    name = 'pom'
3192
    long_name = 'Pom Comics / Piece of Me'
3193
    url = 'http://www.pomcomic.com'
3194
    get_url_from_link = join_cls_url_to_href
3195
3196
    @classmethod
3197
    def get_first_comic_link(cls):
3198
        """Get link to first comics."""
3199
        return get_soup_at_url(cls.url).find('a', class_='btn_first')
3200
3201
    @classmethod
3202
    def get_navi_link(cls, last_soup, next_):
3203
        """Get link to next or previous comic."""
3204
        return last_soup.find('a', class_='btn_next' if next_ else 'btn_prev')
3205
3206
    @classmethod
3207
    def get_comic_info(cls, soup, link):
3208
        """Get information about a particular comics."""
3209
        title = soup.find('h1', id="comic-name").string
3210
        desc = soup.find('meta', property='og:description')['content']
3211
        tags = soup.find('meta', attrs={'name': 'keywords'})['content']
3212
        imgs = soup.find('div', class_='comic').find_all('img')
3213
        return {
3214
            'title': title,
3215
            'desc': desc,
3216
            'tags': tags,
3217
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3218
        }
3219
3220
3221
class CubeDrone(GenericNavigableComic):
3222
    """Class to retrieve Cube Drone comics."""
3223
    name = 'cubedrone'
3224
    long_name = 'Cube Drone'
3225
    url = 'http://cube-drone.com/comics'
3226
    get_url_from_link = join_cls_url_to_href
3227
3228
    @classmethod
3229
    def get_first_comic_link(cls):
3230
        """Get link to first comics."""
3231
        return get_soup_at_url(cls.url).find('span', class_='glyphicon glyphicon-backward').parent
3232
3233
    @classmethod
3234
    def get_navi_link(cls, last_soup, next_):
3235
        """Get link to next or previous comic."""
3236
        class_ = 'glyphicon glyphicon-chevron-' + ('right' if next_ else 'left')
3237
        return last_soup.find('span', class_=class_).parent
3238
3239
    @classmethod
3240
    def get_comic_info(cls, soup, link):
3241
        """Get information about a particular comics."""
3242
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
3243
        url2 = soup.find('meta', attrs={'name': 'twitter:url'})['content']
3244
        # date_str = soup.find('h2', class_='comic_title').find('small').string
3245
        # day = string_to_date(date_str, "%B %d, %Y, %I:%M %p")
3246
        imgs = soup.find_all('img', class_='comic img-responsive')
3247
        title2 = imgs[0]['title']
3248
        alt = imgs[0]['alt']
3249
        return {
3250
            'url2': url2,
3251
            'title': title,
3252
            'title2': title2,
3253
            'alt': alt,
3254
            'img': [i['src'] for i in imgs],
3255
        }
3256
3257
3258
class MakeItStoopid(GenericNavigableComic):
3259
    """Class to retrieve Make It Stoopid Comics."""
3260
    name = 'stoopid'
3261
    long_name = 'Make it stoopid'
3262
    url = 'http://makeitstoopid.com/comic.php'
3263
3264
    @classmethod
3265
    def get_nav(cls, soup):
3266
        """Get the navigation elements from soup object."""
3267
        cnav = soup.find_all(class_='cnav')
3268
        nav1, nav2 = cnav[:5], cnav[5:]
3269
        assert nav1 == nav2
3270
        # begin, prev, archive, next_, end = nav1
3271
        return [None if i.get('href') is None else i for i in nav1]
3272
3273
    @classmethod
3274
    def get_first_comic_link(cls):
3275
        """Get link to first comics."""
3276
        return cls.get_nav(get_soup_at_url(cls.url))[0]
3277
3278
    @classmethod
3279
    def get_navi_link(cls, last_soup, next_):
3280
        """Get link to next or previous comic."""
3281
        return cls.get_nav(last_soup)[3 if next_ else 1]
3282
3283
    @classmethod
3284
    def get_comic_info(cls, soup, link):
3285
        """Get information about a particular comics."""
3286
        title = link['title']
3287
        imgs = soup.find_all('img', id='comicimg')
3288
        return {
3289
            'title': title,
3290
            'img': [i['src'] for i in imgs],
3291
        }
3292
3293
3294 View Code Duplication
class MarketoonistComics(GenericNavigableComic):
3295
    """Class to retrieve Marketoonist Comics."""
3296
    name = 'marketoonist'
3297
    long_name = 'Marketoonist'
3298
    url = 'https://marketoonist.com/cartoons'
3299
    get_first_comic_link = simulate_first_link
3300
    get_navi_link = get_link_rel_next
3301
    first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'
3302
3303
    @classmethod
3304
    def get_comic_info(cls, soup, link):
3305
        """Get information about a particular comics."""
3306
        imgs = soup.find_all('meta', property='og:image')
3307
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3308
        day = string_to_date(date_str, "%Y-%m-%d")
3309
        title = soup.find('meta', property='og:title')['content']
3310
        return {
3311
            'img': [i['content'] for i in imgs],
3312
            'day': day.day,
3313
            'month': day.month,
3314
            'year': day.year,
3315
            'title': title,
3316
        }
3317
3318
3319 View Code Duplication
class ConsoliaComics(GenericNavigableComic):
3320
    """Class to retrieve Consolia comics."""
3321
    name = 'consolia'
3322
    long_name = 'consolia'
3323
    url = 'https://consolia-comic.com'
3324
    get_url_from_link = join_cls_url_to_href
3325
3326
    @classmethod
3327
    def get_first_comic_link(cls):
3328
        """Get link to first comics."""
3329
        return get_soup_at_url(cls.url).find('a', class_='first')
3330
3331
    @classmethod
3332
    def get_navi_link(cls, last_soup, next_):
3333
        """Get link to next or previous comic."""
3334
        return last_soup.find('a', class_='next' if next_ else 'prev')
3335
3336
    @classmethod
3337
    def get_comic_info(cls, soup, link):
3338
        """Get information about a particular comics."""
3339
        title = soup.find('meta', property='og:title')['content']
3340
        date_str = soup.find('time')["datetime"]
3341
        day = string_to_date(date_str, "%Y-%m-%d")
3342
        imgs = soup.find_all('meta', property='og:image')
3343
        return {
3344
            'title': title,
3345
            'img': [i['content'] for i in imgs],
3346
            'day': day.day,
3347
            'month': day.month,
3348
            'year': day.year,
3349
        }
3350
3351
3352 View Code Duplication
class TuMourrasMoinsBete(GenericNavigableComic):
3353
    """Class to retrieve Tu Mourras Moins Bete comics."""
3354
    name = 'mourrasmoinsbete'
3355
    long_name = 'Tu Mourras Moins Bete'
3356
    url = 'http://tumourrasmoinsbete.blogspot.fr'
3357
    _categories = ('FRANCAIS', )
3358
    get_first_comic_link = simulate_first_link
3359
    first_url = 'http://tumourrasmoinsbete.blogspot.fr/2008/06/essai.html'
3360
3361
    @classmethod
3362
    def get_navi_link(cls, last_soup, next_):
3363
        """Get link to next or previous comic."""
3364
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3365
3366
    @classmethod
3367
    def get_comic_info(cls, soup, link):
3368
        """Get information about a particular comics."""
3369
        title = soup.find('title').string
3370
        imgs = soup.find('div', itemprop='description articleBody').find_all('img')
3371
        author = soup.find('span', itemprop='author').string
3372
        return {
3373
            'img': [i['src'] for i in imgs],
3374
            'author': author,
3375
            'title': title,
3376
        }
3377
3378
3379
class GeekAndPoke(GenericNavigableComic):
3380
    """Class to retrieve Geek And Poke comics."""
3381
    name = 'geek'
3382
    long_name = 'Geek And Poke'
3383
    url = 'http://geek-and-poke.com'
3384
    get_url_from_link = join_cls_url_to_href
3385
    get_first_comic_link = simulate_first_link
3386
    first_url = 'http://geek-and-poke.com/geekandpoke/2006/8/27/a-new-place-for-a-not-so-old-blog.html'
3387
3388
    @classmethod
3389
    def get_navi_link(cls, last_soup, next_):
3390
        """Get link to next or previous comic."""
3391
        return last_soup.find('a', class_='prev-item' if next_ else 'next-item')
3392
3393
    @classmethod
3394
    def get_comic_info(cls, soup, link):
3395
        """Get information about a particular comics."""
3396
        title = soup.find('meta', property='og:title')['content']
3397
        desc = soup.find('meta', property='og:description')['content']
3398
        date_str = soup.find('time', class_='published')['datetime']
3399
        day = string_to_date(date_str, "%Y-%m-%d")
3400
        author = soup.find('a', rel='author').string
3401
        div_content = (soup.find('div', class_="body entry-content") or
3402
                       soup.find('div', class_="special-content"))
3403
        imgs = div_content.find_all('img')
3404
        imgs = [i for i in imgs if i.get('src') is not None]
3405
        assert all('title' not in i or i['alt'] == i['title'] for i in imgs)
3406
        alt = imgs[0].get('alt', "") if imgs else []
3407
        return {
3408
            'title': title,
3409
            'alt': alt,
3410
            'description': desc,
3411
            'author': author,
3412
            'day': day.day,
3413
            'month': day.month,
3414
            'year': day.year,
3415
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3416
        }
3417
3418
3419 View Code Duplication
class GloryOwlComix(GenericNavigableComic):
3420
    """Class to retrieve Glory Owl comics."""
3421
    name = 'gloryowl'
3422
    long_name = 'Glory Owl'
3423
    url = 'http://gloryowlcomix.blogspot.fr'
3424
    _categories = ('NSFW', 'FRANCAIS')
3425
    get_first_comic_link = simulate_first_link
3426
    first_url = 'http://gloryowlcomix.blogspot.fr/2013/02/1_7.html'
3427
3428
    @classmethod
3429
    def get_navi_link(cls, last_soup, next_):
3430
        """Get link to next or previous comic."""
3431
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3432
3433
    @classmethod
3434
    def get_comic_info(cls, soup, link):
3435
        """Get information about a particular comics."""
3436
        title = soup.find('title').string
3437
        imgs = soup.find_all('link', rel='image_src')
3438
        author = soup.find('a', rel='author').string
3439
        return {
3440
            'img': [i['href'] for i in imgs],
3441
            'author': author,
3442
            'title': title,
3443
        }
3444
3445
3446
class GenericTumblrV1(GenericComic):
3447
    """Generic class to retrieve comics from Tumblr using the V1 API."""
3448
    _categories = ('TUMBLR', )
3449
3450
    @classmethod
3451
    def get_next_comic(cls, last_comic):
3452
        """Generic implementation of get_next_comic for Tumblr comics."""
3453
        for p in cls.get_posts(last_comic):
3454
            comic = cls.get_comic_info(p)
3455
            if comic is not None:
3456
                yield comic
3457
3458
    @classmethod
3459
    def get_url_from_post(cls, post):
3460
        return post['url']
3461
3462
    @classmethod
3463
    def get_api_url(cls):
3464
        return urljoin_wrapper(cls.url, '/api/read/')
3465
3466
    @classmethod
3467
    def get_comic_info(cls, post):
3468
        """Get information about a particular comics."""
3469
        type_ = post['type']
3470
        if type_ != 'photo':
3471
            return None
3472
        tumblr_id = int(post['id'])
3473
        api_url = cls.get_api_url() + '?id=%d' % (tumblr_id)
3474
        day = datetime.datetime.fromtimestamp(int(post['unix-timestamp'])).date()
3475
        caption = post.find('photo-caption')
3476
        title = caption.string if caption else ""
3477
        tags = ' '.join(t.string for t in post.find_all('tag'))
3478
        # Photos may appear in 'photo' tags and/or straight in the post
3479
        photo_tags = post.find_all('photo')
3480
        if not photo_tags:
3481
            photo_tags = [post]
3482
        # Images are in multiple resolutions - taking the first one
3483
        imgs = [photo.find('photo-url') for photo in photo_tags]
3484
        return {
3485
            'url': cls.get_url_from_post(post),
3486
            'url2': post['url-with-slug'],
3487
            'day': day.day,
3488
            'month': day.month,
3489
            'year': day.year,
3490
            'title': title,
3491
            'tags': tags,
3492
            'img': [i.string for i in imgs],
3493
            'tumblr-id': tumblr_id,
3494
            'api_url': api_url,
3495
        }
3496
3497
    @classmethod
3498
    def get_posts(cls, last_comic, nb_post_per_call=10):
3499
        """Get posts using API. nb_post_per_call is max 50.
3500
3501
        Posts are retrieved from newer to older as per the tumblr v1 api
3502
        but are returned in chronological order."""
3503
        waiting_for_url = last_comic['url'] if last_comic else None
3504
        posts_acc = []
3505
        if last_comic is not None:
3506
            # Sometimes, tumblr posts are deleted. When previous post is deleted, we
3507
            # might end up spending a lot of time looking for something that
3508
            # doesn't exist. Failing early and clearly might be a better option.
3509
            last_api_url = last_comic['api_url']
3510
            try:
3511
                get_soup_at_url(last_api_url)
3512
            except urllib.error.HTTPError:
3513
                try:
3514
                    get_soup_at_url(cls.url)
3515
                except urllib.error.HTTPError:
3516
                    print("Did not find previous post nor main url %s" % cls.url)
3517
                else:
3518
                    print("Did not find previous post %s : it might have been deleted" % last_api_url)
3519
                return reversed(posts_acc)
3520
        api_url = cls.get_api_url()
3521
        posts = get_soup_at_url(api_url).find('posts')
3522
        start, total = int(posts['start']), int(posts['total'])
3523
        assert start == 0
3524
        for starting_num in range(0, total, nb_post_per_call):
3525
            api_url2 = api_url + '?start=%d&num=%d' % (starting_num, nb_post_per_call)
3526
            posts2 = get_soup_at_url(api_url2).find('posts')
3527
            start2, total2 = int(posts2['start']), int(posts2['total'])
3528
            assert starting_num == start2, "%d != %d" % (starting_num, start2)
3529
            # This may happen and should be handled in the future
3530
            assert total == total2, "%d != %d" % (total, total2)
3531
            for p in posts2.find_all('post'):
3532
                if waiting_for_url and waiting_for_url == cls.get_url_from_post(p):
3533
                    return reversed(posts_acc)
3534
                posts_acc.append(p)
3535
        if waiting_for_url is None:
3536
            return reversed(posts_acc)
3537
        print("Did not find %s : there might be a problem" % waiting_for_url)
3538
        return []
3539
3540
3541
class SaturdayMorningBreakfastCerealTumblr(GenericTumblrV1):
3542
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
3543
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
3544
    # Also on http://www.smbc-comics.com
3545
    name = 'smbc-tumblr'
3546
    long_name = 'Saturday Morning Breakfast Cereal (from Tumblr)'
3547
    url = 'http://smbc-comics.tumblr.com'
3548
    _categories = ('SMBC', )
3549
3550
3551
class IrwinCardozo(GenericTumblrV1):
3552
    """Class to retrieve Irwin Cardozo Comics."""
3553
    name = 'irwinc'
3554
    long_name = 'Irwin Cardozo'
3555
    url = 'http://irwincardozocomics.tumblr.com'
3556
3557
3558
class AccordingToDevin(GenericTumblrV1):
3559
    """Class to retrieve According To Devin comics."""
3560
    name = 'devin'
3561
    long_name = 'According To Devin'
3562
    url = 'http://accordingtodevin.tumblr.com'
3563
3564
3565
class ItsTheTieTumblr(GenericTumblrV1):
3566
    """Class to retrieve It's the tie comics."""
3567
    # Also on http://itsthetie.com
3568
    # Also on https://tapastic.com/series/itsthetie
3569
    name = 'tie-tumblr'
3570
    long_name = "It's the tie (from Tumblr)"
3571
    url = "http://itsthetie.tumblr.com"
3572
    _categories = ('TIE', )
3573
3574
3575
class OctopunsTumblr(GenericTumblrV1):
3576
    """Class to retrieve Octopuns comics."""
3577
    # Also on http://www.octopuns.net
3578
    name = 'octopuns-tumblr'
3579
    long_name = 'Octopuns (from Tumblr)'
3580
    url = 'http://octopuns.tumblr.com'
3581
3582
3583
class PicturesInBoxesTumblr(GenericTumblrV1):
3584
    """Class to retrieve Pictures In Boxes comics."""
3585
    # Also on http://www.picturesinboxes.com
3586
    name = 'picturesinboxes-tumblr'
3587
    long_name = 'Pictures in Boxes (from Tumblr)'
3588
    url = 'http://picturesinboxescomic.tumblr.com'
3589
3590
3591
class TubeyToonsTumblr(GenericTumblrV1):
3592
    """Class to retrieve TubeyToons comics."""
3593
    # Also on http://tapastic.com/series/Tubey-Toons
3594
    # Also on http://tubeytoons.com
3595
    name = 'tubeytoons-tumblr'
3596
    long_name = 'Tubey Toons (from Tumblr)'
3597
    url = 'http://tubeytoons.tumblr.com'
3598
    _categories = ('TUNEYTOONS', )
3599
3600
3601
class UnearthedComicsTumblr(GenericTumblrV1):
3602
    """Class to retrieve Unearthed comics."""
3603
    # Also on http://tapastic.com/series/UnearthedComics
3604
    # Also on http://unearthedcomics.com
3605
    name = 'unearthed-tumblr'
3606
    long_name = 'Unearthed Comics (from Tumblr)'
3607
    url = 'http://unearthedcomics.tumblr.com'
3608
    _categories = ('UNEARTHED', )
3609
3610
3611
class PieComic(GenericTumblrV1):
3612
    """Class to retrieve Pie Comic comics."""
3613
    name = 'pie'
3614
    long_name = 'Pie Comic'
3615
    url = "http://piecomic.tumblr.com"
3616
3617
3618
class MrEthanDiamond(GenericTumblrV1):
3619
    """Class to retrieve Mr Ethan Diamond comics."""
3620
    name = 'diamond'
3621
    long_name = 'Mr Ethan Diamond'
3622
    url = 'http://mrethandiamond.tumblr.com'
3623
3624
3625
class Flocci(GenericTumblrV1):
3626
    """Class to retrieve floccinaucinihilipilification comics."""
3627
    name = 'flocci'
3628
    long_name = 'floccinaucinihilipilification'
3629
    url = "http://floccinaucinihilipilificationa.tumblr.com"
3630
3631
3632
class UpAndOut(GenericTumblrV1):
3633
    """Class to retrieve Up & Out comics."""
3634
    # Also on http://tapastic.com/series/UP-and-OUT
3635
    name = 'upandout'
3636
    long_name = 'Up And Out (from Tumblr)'
3637
    url = 'http://upandoutcomic.tumblr.com'
3638
3639
3640
class Pundemonium(GenericTumblrV1):
3641
    """Class to retrieve Pundemonium comics."""
3642
    name = 'pundemonium'
3643
    long_name = 'Pundemonium'
3644
    url = 'http://monstika.tumblr.com'
3645
3646
3647
class PoorlyDrawnLinesTumblr(GenericTumblrV1):
3648
    """Class to retrieve Poorly Drawn Lines comics."""
3649
    # Also on http://poorlydrawnlines.com
3650
    name = 'poorlydrawn-tumblr'
3651
    long_name = 'Poorly Drawn Lines (from Tumblr)'
3652
    url = 'http://pdlcomics.tumblr.com'
3653
    _categories = ('POORLYDRAWN', )
3654
3655
3656
class PearShapedComics(GenericTumblrV1):
3657
    """Class to retrieve Pear Shaped Comics."""
3658
    name = 'pearshaped'
3659
    long_name = 'Pear-Shaped Comics'
3660
    url = 'http://pearshapedcomics.com'
3661
3662
3663
class PondScumComics(GenericTumblrV1):
3664
    """Class to retrieve Pond Scum Comics."""
3665
    name = 'pond'
3666
    long_name = 'Pond Scum'
3667
    url = 'http://pondscumcomic.tumblr.com'
3668
3669
3670
class MercworksTumblr(GenericTumblrV1):
3671
    """Class to retrieve Mercworks comics."""
3672
    # Also on http://mercworks.net
3673
    name = 'mercworks-tumblr'
3674
    long_name = 'Mercworks (from Tumblr)'
3675
    url = 'http://mercworks.tumblr.com'
3676
3677
3678
class OwlTurdTumblr(GenericTumblrV1):
3679
    """Class to retrieve Owl Turd comics."""
3680
    # Also on http://tapastic.com/series/Owl-Turd-Comix
3681
    name = 'owlturd-tumblr'
3682
    long_name = 'Owl Turd (from Tumblr)'
3683
    url = 'http://owlturdcomix.tumblr.com'
3684
    _categories = ('OWLTURD', )
3685
3686
3687
class VectorBelly(GenericTumblrV1):
3688
    """Class to retrieve Vector Belly comics."""
3689
    # Also on http://vectorbelly.com
3690
    name = 'vector'
3691
    long_name = 'Vector Belly'
3692
    url = 'http://vectorbelly.tumblr.com'
3693
3694
3695
class GoneIntoRapture(GenericTumblrV1):
3696
    """Class to retrieve Gone Into Rapture comics."""
3697
    # Also on http://goneintorapture.tumblr.com
3698
    # Also on http://tapastic.com/series/Goneintorapture
3699
    name = 'rapture'
3700
    long_name = 'Gone Into Rapture'
3701
    url = 'http://www.goneintorapture.com'
3702
3703
3704
class TheOatmealTumblr(GenericTumblrV1):
3705
    """Class to retrieve The Oatmeal comics."""
3706
    # Also on http://theoatmeal.com
3707
    name = 'oatmeal-tumblr'
3708
    long_name = 'The Oatmeal (from Tumblr)'
3709
    url = 'http://oatmeal.tumblr.com'
3710
3711
3712
class HeckIfIKnowComicsTumblr(GenericTumblrV1):
3713
    """Class to retrieve Heck If I Know Comics."""
3714
    # Also on http://tapastic.com/series/Regular
3715
    name = 'heck-tumblr'
3716
    long_name = 'Heck if I Know comics (from Tumblr)'
3717
    url = 'http://heckifiknowcomics.com'
3718
3719
3720
class MyJetPack(GenericTumblrV1):
3721
    """Class to retrieve My Jet Pack comics."""
3722
    name = 'jetpack'
3723
    long_name = 'My Jet Pack'
3724
    url = 'http://myjetpack.tumblr.com'
3725
3726
3727
class CheerUpEmoKidTumblr(GenericTumblrV1):
3728
    """Class to retrieve CheerUpEmoKid comics."""
3729
    # Also on http://www.cheerupemokid.com
3730
    # Also on http://tapastic.com/series/CUEK
3731
    name = 'cuek-tumblr'
3732
    long_name = 'Cheer Up Emo Kid (from Tumblr)'
3733
    url = 'http://enzocomics.tumblr.com'
3734
3735
3736
class ForLackOfABetterComic(GenericTumblrV1):
3737
    """Class to retrieve For Lack Of A Better Comics."""
3738
    # Also on http://forlackofabettercomic.com
3739
    name = 'lack'
3740
    long_name = 'For Lack Of A Better Comic'
3741
    url = 'http://forlackofabettercomic.tumblr.com'
3742
3743
3744
class ZenPencilsTumblr(GenericTumblrV1):
3745
    """Class to retrieve ZenPencils comics."""
3746
    # Also on http://zenpencils.com
3747
    # Also on http://www.gocomics.com/zen-pencils
3748
    name = 'zenpencils-tumblr'
3749
    long_name = 'Zen Pencils (from Tumblr)'
3750
    url = 'http://zenpencils.tumblr.com'
3751
    _categories = ('ZENPENCILS', )
3752
3753
3754
class ThreeWordPhraseTumblr(GenericTumblrV1):
3755
    """Class to retrieve Three Word Phrase comics."""
3756
    # Also on http://threewordphrase.com
3757
    name = 'threeword-tumblr'
3758
    long_name = 'Three Word Phrase (from Tumblr)'
3759
    url = 'http://www.threewordphrase.tumblr.com'
3760
3761
3762
class TimeTrabbleTumblr(GenericTumblrV1):
3763
    """Class to retrieve Time Trabble comics."""
3764
    # Also on http://timetrabble.com
3765
    name = 'timetrabble-tumblr'
3766
    long_name = 'Time Trabble (from Tumblr)'
3767
    url = 'http://timetrabble.tumblr.com'
3768
3769
3770
class SafelyEndangeredTumblr(GenericTumblrV1):
3771
    """Class to retrieve Safely Endangered comics."""
3772
    # Also on http://www.safelyendangered.com
3773
    name = 'endangered-tumblr'
3774
    long_name = 'Safely Endangered (from Tumblr)'
3775
    url = 'http://tumblr.safelyendangered.com'
3776
3777
3778
class MouseBearComedyTumblr(GenericTumblrV1):
3779
    """Class to retrieve Mouse Bear Comedy comics."""
3780
    # Also on http://www.mousebearcomedy.com
3781
    name = 'mousebear-tumblr'
3782
    long_name = 'Mouse Bear Comedy (from Tumblr)'
3783
    url = 'http://mousebearcomedy.tumblr.com'
3784
3785
3786
class BouletCorpTumblr(GenericTumblrV1):
3787
    """Class to retrieve BouletCorp comics."""
3788
    # Also on http://www.bouletcorp.com
3789
    name = 'boulet-tumblr'
3790
    long_name = 'Boulet Corp (from Tumblr)'
3791
    url = 'http://bouletcorp.tumblr.com'
3792
    _categories = ('BOULET', )
3793
3794
3795
class TheAwkwardYetiTumblr(GenericTumblrV1):
3796
    """Class to retrieve The Awkward Yeti comics."""
3797
    # Also on http://www.gocomics.com/the-awkward-yeti
3798
    # Also on http://theawkwardyeti.com
3799
    # Also on https://tapastic.com/series/TheAwkwardYeti
3800
    name = 'yeti-tumblr'
3801
    long_name = 'The Awkward Yeti (from Tumblr)'
3802
    url = 'http://larstheyeti.tumblr.com'
3803
    _categories = ('YETI', )
3804
3805
3806
class NellucNhoj(GenericTumblrV1):
3807
    """Class to retrieve NellucNhoj comics."""
3808
    name = 'nhoj'
3809
    long_name = 'Nelluc Nhoj'
3810
    url = 'http://nellucnhoj.com'
3811
3812
3813
class DownTheUpwardSpiralTumblr(GenericTumblrV1):
3814
    """Class to retrieve Down The Upward Spiral comics."""
3815
    # Also on http://www.downtheupwardspiral.com
3816
    name = 'spiral-tumblr'
3817
    long_name = 'Down the Upward Spiral (from Tumblr)'
3818
    url = 'http://downtheupwardspiral.tumblr.com'
3819
3820
3821
class AsPerUsualTumblr(GenericTumblrV1):
3822
    """Class to retrieve As Per Usual comics."""
3823
    # Also on https://tapastic.com/series/AsPerUsual
3824
    name = 'usual-tumblr'
3825
    long_name = 'As Per Usual (from Tumblr)'
3826
    url = 'http://as-per-usual.tumblr.com'
3827
    categories = ('DAMILEE', )
3828
3829
3830
class HotComicsForCoolPeopleTumblr(GenericTumblrV1):
3831
    """Class to retrieve Hot Comics For Cool People."""
3832
    # Also on https://tapastic.com/series/Hot-Comics-For-Cool-People
3833
    # Also on http://hotcomics.biz (links to tumblr)
3834
    # Also on http://hcfcp.com (links to tumblr)
3835
    name = 'hotcomics-tumblr'
3836
    long_name = 'Hot Comics For Cool People (from Tumblr)'
3837
    url = 'http://hotcomicsforcoolpeople.tumblr.com'
3838
    categories = ('DAMILEE', )
3839
3840
3841
class OneOneOneOneComicTumblr(GenericTumblrV1):
3842
    """Class to retrieve 1111 Comics."""
3843
    # Also on http://www.1111comics.me
3844
    # Also on https://tapastic.com/series/1111-Comics
3845
    name = '1111-tumblr'
3846
    long_name = '1111 Comics (from Tumblr)'
3847
    url = 'http://comics1111.tumblr.com'
3848
    _categories = ('ONEONEONEONE', )
3849
3850
3851
class JhallComicsTumblr(GenericTumblrV1):
3852
    """Class to retrieve Jhall Comics."""
3853
    # Also on http://jhallcomics.com
3854
    name = 'jhall-tumblr'
3855
    long_name = 'Jhall Comics (from Tumblr)'
3856
    url = 'http://jhallcomics.tumblr.com'
3857
3858
3859
class BerkeleyMewsTumblr(GenericTumblrV1):
3860
    """Class to retrieve Berkeley Mews comics."""
3861
    # Also on http://www.gocomics.com/berkeley-mews
3862
    # Also on http://www.berkeleymews.com
3863
    name = 'berkeley-tumblr'
3864
    long_name = 'Berkeley Mews (from Tumblr)'
3865
    url = 'http://mews.tumblr.com'
3866
    _categories = ('BERKELEY', )
3867
3868
3869
class JoanCornellaTumblr(GenericTumblrV1):
3870
    """Class to retrieve Joan Cornella comics."""
3871
    # Also on http://joancornella.net
3872
    name = 'cornella-tumblr'
3873
    long_name = 'Joan Cornella (from Tumblr)'
3874
    url = 'http://cornellajoan.tumblr.com'
3875
3876
3877
class RespawnComicTumblr(GenericTumblrV1):
3878
    """Class to retrieve Respawn Comic."""
3879
    # Also on http://respawncomic.com
3880
    name = 'respawn-tumblr'
3881
    long_name = 'Respawn Comic (from Tumblr)'
3882
    url = 'http://respawncomic.tumblr.com'
3883
3884
3885
class ChrisHallbeckTumblr(GenericTumblrV1):
3886
    """Class to retrieve Chris Hallbeck comics."""
3887
    # Also on https://tapastic.com/ChrisHallbeck
3888
    # Also on http://maximumble.com
3889
    # Also on http://minimumble.com
3890
    # Also on http://thebookofbiff.com
3891
    name = 'hallbeck-tumblr'
3892
    long_name = 'Chris Hallback (from Tumblr)'
3893
    url = 'http://chrishallbeck.tumblr.com'
3894
    _categories = ('HALLBACK', )
3895
3896
3897
class ComicNuggets(GenericTumblrV1):
3898
    """Class to retrieve Comic Nuggets."""
3899
    name = 'nuggets'
3900
    long_name = 'Comic Nuggets'
3901
    url = 'http://comicnuggets.com'
3902
3903
3904
class PigeonGazetteTumblr(GenericTumblrV1):
3905
    """Class to retrieve The Pigeon Gazette comics."""
3906
    # Also on https://tapastic.com/series/The-Pigeon-Gazette
3907
    name = 'pigeon-tumblr'
3908
    long_name = 'The Pigeon Gazette (from Tumblr)'
3909
    url = 'http://thepigeongazette.tumblr.com'
3910
3911
3912
class CancerOwl(GenericTumblrV1):
3913
    """Class to retrieve Cancer Owl comics."""
3914
    # Also on http://cancerowl.com
3915
    name = 'cancerowl-tumblr'
3916
    long_name = 'Cancer Owl (from Tumblr)'
3917
    url = 'http://cancerowl.tumblr.com'
3918
3919
3920
class FowlLanguageTumblr(GenericTumblrV1):
3921
    """Class to retrieve Fowl Language comics."""
3922
    # Also on http://www.fowllanguagecomics.com
3923
    # Also on http://tapastic.com/series/Fowl-Language-Comics
3924
    # Also on http://www.gocomics.com/fowl-language
3925
    name = 'fowllanguage-tumblr'
3926
    long_name = 'Fowl Language Comics (from Tumblr)'
3927
    url = 'http://fowllanguagecomics.tumblr.com'
3928
    _categories = ('FOWLLANGUAGE', )
3929
3930
3931
class TheOdd1sOutTumblr(GenericTumblrV1):
3932
    """Class to retrieve The Odd 1s Out comics."""
3933
    # Also on http://theodd1sout.com
3934
    # Also on https://tapastic.com/series/Theodd1sout
3935
    name = 'theodd-tumblr'
3936
    long_name = 'The Odd 1s Out (from Tumblr)'
3937
    url = 'http://theodd1sout.tumblr.com'
3938
3939
3940
class TheUnderfoldTumblr(GenericTumblrV1):
3941
    """Class to retrieve The Underfold comics."""
3942
    # Also on http://theunderfold.com
3943
    name = 'underfold-tumblr'
3944
    long_name = 'The Underfold (from Tumblr)'
3945
    url = 'http://theunderfold.tumblr.com'
3946
3947
3948
class LolNeinTumblr(GenericTumblrV1):
3949
    """Class to retrieve Lol Nein comics."""
3950
    # Also on http://lolnein.com
3951
    name = 'lolnein-tumblr'
3952
    long_name = 'Lol Nein (from Tumblr)'
3953
    url = 'http://lolneincom.tumblr.com'
3954
3955
3956
class FatAwesomeComicsTumblr(GenericTumblrV1):
3957
    """Class to retrieve Fat Awesome Comics."""
3958
    # Also on http://fatawesome.com/comics
3959
    name = 'fatawesome-tumblr'
3960
    long_name = 'Fat Awesome (from Tumblr)'
3961
    url = 'http://fatawesomecomedy.tumblr.com'
3962
3963
3964
class TheWorldIsFlatTumblr(GenericTumblrV1):
3965
    """Class to retrieve The World Is Flat Comics."""
3966
    # Also on https://tapastic.com/series/The-World-is-Flat
3967
    name = 'flatworld-tumblr'
3968
    long_name = 'The World Is Flat (from Tumblr)'
3969
    url = 'http://theworldisflatcomics.tumblr.com'
3970
3971
3972
class DorrisMc(GenericTumblrV1):
3973
    """Class to retrieve Dorris Mc Comics"""
3974
    # Also on http://www.gocomics.com/dorris-mccomics
3975
    name = 'dorrismc'
3976
    long_name = 'Dorris Mc'
3977
    url = 'http://dorrismccomics.com'
3978
3979
3980
class LeleozTumblr(GenericEmptyComic, GenericTumblrV1):
3981
    """Class to retrieve Leleoz comics."""
3982
    # Also on https://tapastic.com/series/Leleoz
3983
    name = 'leleoz-tumblr'
3984
    long_name = 'Leleoz (from Tumblr)'
3985
    url = 'http://leleozcomics.tumblr.com'
3986
3987
3988
class MoonBeardTumblr(GenericTumblrV1):
3989
    """Class to retrieve MoonBeard comics."""
3990
    # Also on http://moonbeard.com
3991
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
3992
    name = 'moonbeard-tumblr'
3993
    long_name = 'Moon Beard (from Tumblr)'
3994
    url = 'http://blog.squiresjam.es/moonbeard'
3995
3996
3997
class AComik(GenericTumblrV1):
3998
    """Class to retrieve A Comik"""
3999
    name = 'comik'
4000
    long_name = 'A Comik'
4001
    url = 'http://acomik.com'
4002
4003
4004
class ClassicRandy(GenericTumblrV1):
4005
    """Class to retrieve Classic Randy comics."""
4006
    name = 'randy'
4007
    long_name = 'Classic Randy'
4008
    url = 'http://classicrandy.tumblr.com'
4009
4010
4011
class DagssonTumblr(GenericTumblrV1):
4012
    """Class to retrieve Dagsson comics."""
4013
    # Also on http://www.dagsson.com
4014
    name = 'dagsson-tumblr'
4015
    long_name = 'Dagsson Hugleikur (from Tumblr)'
4016
    url = 'http://hugleikurdagsson.tumblr.com'
4017
4018
4019
class LinsEditionsTumblr(GenericTumblrV1):
4020
    """Class to retrieve L.I.N.S. Editions comics."""
4021
    # Also on https://linsedition.com
4022
    # Now on http://warandpeas.tumblr.com
4023
    name = 'lins-tumblr'
4024
    long_name = 'L.I.N.S. Editions (from Tumblr)'
4025
    url = 'http://linscomics.tumblr.com'
4026
    _categories = ('LINS', )
4027
4028
4029
class WarAndPeasTumblr(GenericTumblrV1):
4030
    """Class to retrieve War And Peas comics."""
4031
    # Was on http://linscomics.tumblr.com
4032
    name = 'warandpeas-tumblr'
4033
    long_name = 'War And Peas (from Tumblr)'
4034
    url = 'http://warandpeas.tumblr.com'
4035
    _categories = ('WARANDPEAS', )
4036
4037
4038
class OrigamiHotDish(GenericTumblrV1):
4039
    """Class to retrieve Origami Hot Dish comics."""
4040
    name = 'origamihotdish'
4041
    long_name = 'Origami Hot Dish'
4042
    url = 'http://origamihotdish.com'
4043
4044
4045
class HitAndMissComicsTumblr(GenericTumblrV1):
4046
    """Class to retrieve Hit and Miss Comics."""
4047
    name = 'hitandmiss'
4048
    long_name = 'Hit and Miss Comics'
4049
    url = 'http://hitandmisscomics.tumblr.com'
4050
4051
4052
class HMBlanc(GenericTumblrV1):
4053
    """Class to retrieve HM Blanc comics."""
4054
    name = 'hmblanc'
4055
    long_name = 'HM Blanc'
4056
    url = 'http://hmblanc.tumblr.com'
4057
4058
4059
class TalesOfAbsurdityTumblr(GenericTumblrV1):
4060
    """Class to retrieve Tales Of Absurdity comics."""
4061
    # Also on http://talesofabsurdity.com
4062
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
4063
    name = 'absurdity-tumblr'
4064
    long_name = 'Tales of Absurdity (from Tumblr)'
4065
    url = 'http://talesofabsurdity.tumblr.com'
4066
    _categories = ('ABSURDITY', )
4067
4068
4069
class RobbieAndBobby(GenericTumblrV1):
4070
    """Class to retrieve Robbie And Bobby comics."""
4071
    # Also on http://robbieandbobby.com
4072
    name = 'robbie-tumblr'
4073
    long_name = 'Robbie And Bobby (from Tumblr)'
4074
    url = 'http://robbieandbobby.tumblr.com'
4075
4076
4077
class ElectricBunnyComicTumblr(GenericTumblrV1):
4078
    """Class to retrieve Electric Bunny Comics."""
4079
    # Also on http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell
4080
    name = 'bunny-tumblr'
4081
    long_name = 'Electric Bunny Comic (from Tumblr)'
4082
    url = 'http://electricbunnycomics.tumblr.com'
4083
4084
4085
class Hoomph(GenericTumblrV1):
4086
    """Class to retrieve Hoomph comics."""
4087
    name = 'hoomph'
4088
    long_name = 'Hoomph'
4089
    url = 'http://hoom.ph'
4090
4091
4092
class BFGFSTumblr(GenericTumblrV1):
4093
    """Class to retrieve BFGFS comics."""
4094
    # Also on https://tapastic.com/series/BFGFS
4095
    # Also on http://bfgfs.com
4096
    name = 'bfgfs-tumblr'
4097
    long_name = 'BFGFS (from Tumblr)'
4098
    url = 'http://bfgfs.tumblr.com'
4099
4100
4101
class DoodleForFood(GenericTumblrV1):
4102
    """Class to retrieve Doodle For Food comics."""
4103
    # Also on http://doodleforfood.com
4104
    name = 'doodle'
4105
    long_name = 'Doodle For Food'
4106
    url = 'http://doodleforfood.com'
4107
4108
4109
class CassandraCalinTumblr(GenericTumblrV1):
4110
    """Class to retrieve C. Cassandra comics."""
4111
    # Also on http://cassandracalin.com
4112
    # Also on https://tapastic.com/series/C-Cassandra-comics
4113
    name = 'cassandra-tumblr'
4114
    long_name = 'Cassandra Calin (from Tumblr)'
4115
    url = 'http://c-cassandra.tumblr.com'
4116
4117
4118
class DougWasTaken(GenericTumblrV1):
4119
    """Class to retrieve Doug Was Taken comics."""
4120
    name = 'doug'
4121
    long_name = 'Doug Was Taken'
4122
    url = 'http://dougwastaken.tumblr.com'
4123
4124
4125
class MandatoryRollerCoaster(GenericTumblrV1):
4126
    """Class to retrieve Mandatory Roller Coaster comics."""
4127
    name = 'rollercoaster'
4128
    long_name = 'Mandatory Roller Coaster'
4129
    url = 'http://mandatoryrollercoaster.com'
4130
4131
4132
class CEstPasEnRegardantSesPompes(GenericTumblrV1):
4133
    """Class to retrieve C'Est Pas En Regardant Ses Pompes (...)  comics."""
4134
    name = 'cperspqccltt'
4135
    long_name = 'C Est Pas En Regardant Ses Pompes (...)'
4136
    url = 'http://cperspqccltt.tumblr.com'
4137
4138
4139
class TheGrohlTroll(GenericTumblrV1):
4140
    """Class to retrieve The Grohl Troll comics."""
4141
    name = 'grohltroll'
4142
    long_name = 'The Grohl Troll'
4143
    url = 'http://thegrohltroll.com'
4144
4145
4146
class WebcomicName(GenericTumblrV1):
4147
    """Class to retrieve Webcomic Name comics."""
4148
    name = 'webcomicname'
4149
    long_name = 'Webcomic Name'
4150
    url = 'http://webcomicname.com'
4151
4152
4153
class BooksOfAdam(GenericTumblrV1):
4154
    """Class to retrieve Books of Adam comics."""
4155
    # Also on http://www.booksofadam.com
4156
    name = 'booksofadam'
4157
    long_name = 'Books of Adam'
4158
    url = 'http://booksofadam.tumblr.com'
4159
4160
4161
class HarkAVagrant(GenericTumblrV1):
4162
    """Class to retrieve Hark A Vagrant comics."""
4163
    # Also on http://www.harkavagrant.com
4164
    name = 'hark-tumblr'
4165
    long_name = 'Hark A Vagrant (from Tumblr)'
4166
    url = 'http://beatonna.tumblr.com'
4167
4168
4169
class OurSuperAdventureTumblr(GenericTumblrV1):
4170
    """Class to retrieve Our Super Adventure comics."""
4171
    # Also on https://tapastic.com/series/Our-Super-Adventure
4172
    # Also on http://www.oursuperadventure.com
4173
    # http://sarahgraley.com
4174
    name = 'superadventure-tumblr'
4175
    long_name = 'Our Super Adventure (from Tumblr)'
4176
    url = 'http://sarahssketchbook.tumblr.com'
4177
4178
4179
class JakeLikesOnions(GenericTumblrV1):
4180
    """Class to retrieve Jake Likes Onions comics."""
4181
    name = 'jake'
4182
    long_name = 'Jake Likes Onions'
4183
    url = 'http://jakelikesonions.com'
4184
4185
4186
class InYourFaceCake(GenericTumblrV1):
4187
    """Class to retrieve In Your Face Cake comics."""
4188
    name = 'inyourfacecake-tumblr'
4189
    long_name = 'In Your Face Cake (from Tumblr)'
4190
    url = 'http://in-your-face-cake.tumblr.com'
4191
4192
4193
class Robospunk(GenericTumblrV1):
4194
    """Class to retrieve Robospunk comics."""
4195
    name = 'robospunk'
4196
    long_name = 'Robospunk'
4197
    url = 'http://robospunk.com'
4198
4199
4200
class BananaTwinky(GenericTumblrV1):
4201
    """Class to retrieve Banana Twinky comics."""
4202
    name = 'banana'
4203
    long_name = 'Banana Twinky'
4204
    url = 'http://bananatwinky.tumblr.com'
4205
4206
4207
class YesterdaysPopcornTumblr(GenericTumblrV1):
4208
    """Class to retrieve Yesterday's Popcorn comics."""
4209
    # Also on http://www.yesterdayspopcorn.com
4210
    # Also on https://tapastic.com/series/Yesterdays-Popcorn
4211
    name = 'popcorn-tumblr'
4212
    long_name = 'Yesterday\'s Popcorn (from Tumblr)'
4213
    url = 'http://yesterdayspopcorn.tumblr.com'
4214
4215
4216
class TwistedDoodles(GenericTumblrV1):
4217
    """Class to retrieve Twisted Doodles comics."""
4218
    name = 'twisted'
4219
    long_name = 'Twisted Doodles'
4220
    url = 'http://www.twisteddoodles.com'
4221
4222
4223
class UbertoolTumblr(GenericTumblrV1):
4224
    """Class to retrieve Ubertool comics."""
4225
    # Also on http://ubertoolcomic.com
4226
    # Also on https://tapastic.com/series/ubertool
4227
    name = 'ubertool-tumblr'
4228
    long_name = 'Ubertool (from Tumblr)'
4229
    url = 'http://ubertool.tumblr.com'
4230
    _categories = ('UBERTOOL', )
4231
4232
4233
class LittleLifeLinesTumblr(GenericTumblrV1):
4234
    """Class to retrieve Little Life Lines comics."""
4235
    # Also on http://www.littlelifelines.com
4236
    name = 'life-tumblr'
4237
    long_name = 'Little Life Lines (from Tumblr)'
4238
    url = 'https://little-life-lines.tumblr.com'
4239
4240
4241
class TheyCanTalk(GenericTumblrV1):
4242
    """Class to retrieve They Can Talk comics."""
4243
    name = 'theycantalk'
4244
    long_name = 'They Can Talk'
4245
    url = 'http://theycantalk.com'
4246
4247
4248
class Will5NeverCome(GenericTumblrV1):
4249
    """Class to retrieve Will 5:00 Never Come comics."""
4250
    name = 'will5'
4251
    long_name = 'Will 5:00 Never Come ?'
4252
    url = 'http://will5nevercome.com'
4253
4254
4255
class Sephko(GenericTumblrV1):
4256
    """Class to retrieve Sephko Comics."""
4257
    # Also on http://www.sephko.com
4258
    name = 'sephko'
4259
    long_name = 'Sephko'
4260
    url = 'http://sephko.tumblr.com'
4261
4262
4263
class BlazersAtDawn(GenericTumblrV1):
4264
    """Class to retrieve Blazers At Dawn Comics."""
4265
    name = 'blazers'
4266
    long_name = 'Blazers At Dawn'
4267
    url = 'http://blazersatdawn.tumblr.com'
4268
4269
4270
class ArtByMoga(GenericEmptyComic, GenericTumblrV1):  # Deactivated because it downloads too many things
4271
    """Class to retrieve Art By Moga Comics."""
4272
    name = 'moga'
4273
    long_name = 'Art By Moga'
4274
    url = 'http://artbymoga.tumblr.com'
4275
4276
4277
class VerbalVomitTumblr(GenericTumblrV1):
4278
    """Class to retrieve Verbal Vomit comics."""
4279
    # Also on http://www.verbal-vomit.com
4280
    name = 'vomit-tumblr'
4281
    long_name = 'Verbal Vomit (from Tumblr)'
4282
    url = 'http://verbalvomits.tumblr.com'
4283
4284
4285
class LibraryComic(GenericTumblrV1):
4286
    """Class to retrieve LibraryComic."""
4287
    # Also on http://librarycomic.com
4288
    name = 'library-tumblr'
4289
    long_name = 'LibraryComic (from Tumblr)'
4290
    url = 'http://librarycomic.tumblr.com'
4291
4292
4293
class TizzyStitchBirdTumblr(GenericTumblrV1):
4294
    """Class to retrieve Tizzy Stitch Bird comics."""
4295
    # Also on http://tizzystitchbird.com
4296
    # Also on https://tapastic.com/series/TizzyStitchbird
4297
    # Also on http://www.webtoons.com/en/challenge/tizzy-stitchbird/list?title_no=50082
4298
    name = 'tizzy-tumblr'
4299
    long_name = 'Tizzy Stitch Bird (from Tumblr)'
4300
    url = 'http://tizzystitchbird.tumblr.com'
4301
4302
4303
class VictimsOfCircumsolarTumblr(GenericTumblrV1):
4304
    """Class to retrieve VictimsOfCircumsolar comics."""
4305
    # Also on http://www.victimsofcircumsolar.com
4306
    name = 'circumsolar-tumblr'
4307
    long_name = 'Victims Of Circumsolar (from Tumblr)'
4308
    url = 'http://victimsofcomics.tumblr.com'
4309
4310
4311
class RockPaperCynicTumblr(GenericTumblrV1):
4312 View Code Duplication
    """Class to retrieve RockPaperCynic comics."""
4313
    # Also on http://www.rockpapercynic.com
4314
    # Also on https://tapastic.com/series/rockpapercynic
4315
    name = 'rpc-tumblr'
4316
    long_name = 'Rock Paper Cynic (from Tumblr)'
4317
    url = 'http://rockpapercynic.tumblr.com'
4318
4319
4320
class DeadlyPanelTumblr(GenericTumblrV1):
4321
    """Class to retrieve Deadly Panel comics."""
4322
    # Also on http://www.deadlypanel.com
4323
    # Also on https://tapastic.com/series/deadlypanel
4324
    name = 'deadly-tumblr'
4325
    long_name = 'Deadly Panel (from Tumblr)'
4326
    url = 'http://deadlypanel.tumblr.com'
4327
4328
4329
class CatanaComics(GenericTumblrV1):
4330
    """Class to retrieve Catana comics."""
4331
    name = 'catana'
4332
    long_name = 'Catana'
4333
    url = 'http://www.catanacomics.com'
4334
4335
4336
class ShanghaiTango(GenericTumblrV1):
4337
    """Class to retrieve Shanghai Tango comic."""
4338
    name = 'tango'
4339
    long_name = 'Shanghai Tango'
4340
    url = 'http://tango2010weibo.tumblr.com'
4341
4342
4343
class OffTheLeashDogTumblr(GenericTumblrV1):
4344
    """Class to retrieve Off The Leash Dog comics."""
4345
    # Also on http://offtheleashdogcartoons.com
4346
    # Also on http://www.rupertfawcettcartoons.com
4347
    name = 'offtheleash-tumblr'
4348
    long_name = 'Off The Leash Dog (from Tumblr)'
4349
    url = 'http://rupertfawcettsdoggyblog.tumblr.com'
4350
    _categories = ('FAWCETT', )
4351
4352
4353
class ImogenQuestTumblr(GenericTumblrV1):
4354
    """Class to retrieve Imogen Quest comics."""
4355
    # Also on http://imogenquest.net
4356
    name = 'imogen-tumblr'
4357
    long_name = 'Imogen Quest (from Tumblr)'
4358
    url = 'http://imoquest.tumblr.com'
4359
4360
4361
class Shitfest(GenericTumblrV1):
4362
    """Class to retrieve Shitfest comics."""
4363
    name = 'shitfest'
4364
    long_name = 'Shitfest'
4365
    url = 'http://shitfestcomic.com'
4366
4367
4368
class HorovitzComics(GenericListableComic):
4369
    """Generic class to handle the logic common to the different comics from Horovitz."""
4370
    url = 'http://www.horovitzcomics.com'
4371
    _categories = ('HOROVITZ', )
4372
    img_re = re.compile('.*comics/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
4373
    link_re = NotImplemented
4374
    get_url_from_archive_element = join_cls_url_to_href
4375
4376
    @classmethod
4377
    def get_comic_info(cls, soup, link):
4378
        """Get information about a particular comics."""
4379
        href = link['href']
4380
        num = int(cls.link_re.match(href).groups()[0])
4381
        title = link.string
4382
        imgs = soup.find_all('img', id='comic')
4383
        assert len(imgs) == 1
4384
        year, month, day = [int(s)
4385
                            for s in cls.img_re.match(imgs[0]['src']).groups()]
4386
        return {
4387
            'title': title,
4388
            'day': day,
4389
            'month': month,
4390
            'year': year,
4391
            'img': [i['src'] for i in imgs],
4392
            'num': num,
4393
        }
4394
4395
    @classmethod
4396
    def get_archive_elements(cls):
4397
        archive_url = 'http://www.horovitzcomics.com/comics/archive/'
4398
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.link_re))
4399
4400
4401
class HorovitzNew(HorovitzComics):
4402
    """Class to retrieve Horovitz new comics."""
4403
    name = 'horovitznew'
4404
    long_name = 'Horovitz New'
4405
    link_re = re.compile('^/comics/new/([0-9]+)$')
4406
4407
4408
class HorovitzClassic(HorovitzComics):
4409
    """Class to retrieve Horovitz classic comics."""
4410
    name = 'horovitzclassic'
4411
    long_name = 'Horovitz Classic'
4412
    link_re = re.compile('^/comics/classic/([0-9]+)$')
4413
4414
4415
class GenericGoComic(GenericNavigableComic):
4416
    """Generic class to handle the logic common to comics from gocomics.com."""
4417
    _categories = ('GOCOMIC', )
4418
4419
    @classmethod
4420
    def get_first_comic_link(cls):
4421
        """Get link to first comics."""
4422
        return get_soup_at_url(cls.url).find('a', class_='fa btn btn-outline-default btn-circle fa-backward sm ')
4423
4424
    @classmethod
4425
    def get_navi_link(cls, last_soup, next_):
4426
        """Get link to next or previous comic."""
4427
        PREV = 'fa btn btn-outline-default btn-circle fa-caret-left sm '
4428
        NEXT = 'fa btn btn-outline-default btn-circle fa-caret-right sm '
4429
        return last_soup.find('a', class_=NEXT if next_ else PREV)
4430
4431
    @classmethod
4432
    def get_url_from_link(cls, link):
4433
        gocomics = 'http://www.gocomics.com'
4434
        return urljoin_wrapper(gocomics, link['href'])
4435
4436
    @classmethod
4437
    def get_comic_info(cls, soup, link):
4438
        """Get information about a particular comics."""
4439
        date_str = soup.find('meta', property='article:published_time')['content']
4440
        day = string_to_date(date_str, "%Y-%m-%d")
4441
        imgs = soup.find('picture', class_='img-fluid item-comic-image').find_all('img')
4442
        author = soup.find('meta', property='article:author')['content']
4443
        tags = soup.find('meta', property='article:tag')['content']
4444
        return {
4445
            'day': day.day,
4446
            'month': day.month,
4447
            'year': day.year,
4448
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
4449
            'author': author,
4450
            'tags': tags,
4451
        }
4452
4453
4454
class PearlsBeforeSwine(GenericGoComic):
4455
    """Class to retrieve Pearls Before Swine comics."""
4456
    name = 'pearls'
4457
    long_name = 'Pearls Before Swine'
4458
    url = 'http://www.gocomics.com/pearlsbeforeswine'
4459
4460
4461
class Peanuts(GenericGoComic):
4462
    """Class to retrieve Peanuts comics."""
4463
    name = 'peanuts'
4464
    long_name = 'Peanuts'
4465
    url = 'http://www.gocomics.com/peanuts'
4466
4467
4468
class MattWuerker(GenericGoComic):
4469
    """Class to retrieve Matt Wuerker comics."""
4470
    name = 'wuerker'
4471
    long_name = 'Matt Wuerker'
4472
    url = 'http://www.gocomics.com/mattwuerker'
4473
4474
4475
class TomToles(GenericGoComic):
4476
    """Class to retrieve Tom Toles comics."""
4477
    name = 'toles'
4478
    long_name = 'Tom Toles'
4479
    url = 'http://www.gocomics.com/tomtoles'
4480
4481
4482
class BreakOfDay(GenericGoComic):
4483
    """Class to retrieve Break Of Day comics."""
4484
    name = 'breakofday'
4485
    long_name = 'Break Of Day'
4486
    url = 'http://www.gocomics.com/break-of-day'
4487
4488
4489
class Brevity(GenericGoComic):
4490
    """Class to retrieve Brevity comics."""
4491
    name = 'brevity'
4492
    long_name = 'Brevity'
4493
    url = 'http://www.gocomics.com/brevitypanel'
4494
4495
4496
class MichaelRamirez(GenericGoComic):
4497
    """Class to retrieve Michael Ramirez comics."""
4498
    name = 'ramirez'
4499
    long_name = 'Michael Ramirez'
4500
    url = 'http://www.gocomics.com/michaelramirez'
4501
4502
4503
class MikeLuckovich(GenericGoComic):
4504
    """Class to retrieve Mike Luckovich comics."""
4505
    name = 'luckovich'
4506
    long_name = 'Mike Luckovich'
4507
    url = 'http://www.gocomics.com/mikeluckovich'
4508
4509
4510
class JimBenton(GenericGoComic):
4511
    """Class to retrieve Jim Benton comics."""
4512
    # Also on http://jimbenton.tumblr.com
4513
    name = 'benton'
4514
    long_name = 'Jim Benton'
4515
    url = 'http://www.gocomics.com/jim-benton-cartoons'
4516
4517
4518
class TheArgyleSweater(GenericGoComic):
4519
    """Class to retrieve the Argyle Sweater comics."""
4520
    name = 'argyle'
4521
    long_name = 'Argyle Sweater'
4522
    url = 'http://www.gocomics.com/theargylesweater'
4523
4524
4525
class SunnyStreet(GenericGoComic):
4526
    """Class to retrieve Sunny Street comics."""
4527
    # Also on http://www.sunnystreetcomics.com
4528
    name = 'sunny'
4529
    long_name = 'Sunny Street'
4530
    url = 'http://www.gocomics.com/sunny-street'
4531
4532
4533
class OffTheMark(GenericGoComic):
4534
    """Class to retrieve Off The Mark comics."""
4535
    # Also on https://www.offthemark.com
4536
    name = 'offthemark'
4537
    long_name = 'Off The Mark'
4538
    url = 'http://www.gocomics.com/offthemark'
4539
4540
4541
class WuMo(GenericGoComic):
4542
    """Class to retrieve WuMo comics."""
4543
    # Also on http://wumo.com
4544
    name = 'wumo'
4545
    long_name = 'WuMo'
4546
    url = 'http://www.gocomics.com/wumo'
4547
4548
4549
class LunarBaboon(GenericGoComic):
4550
    """Class to retrieve Lunar Baboon comics."""
4551
    # Also on http://www.lunarbaboon.com
4552
    # Also on https://tapastic.com/series/Lunarbaboon
4553
    name = 'lunarbaboon'
4554
    long_name = 'Lunar Baboon'
4555
    url = 'http://www.gocomics.com/lunarbaboon'
4556
4557
4558
class SandersenGocomic(GenericGoComic):
4559
    """Class to retrieve Sarah Andersen comics."""
4560
    # Also on http://sarahcandersen.com
4561
    # Also on http://tapastic.com/series/Doodle-Time
4562
    name = 'sandersen-goc'
4563
    long_name = 'Sarah Andersen (from GoComics)'
4564
    url = 'http://www.gocomics.com/sarahs-scribbles'
4565
4566
4567
class SaturdayMorningBreakfastCerealGoComic(GenericGoComic):
4568
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
4569
    # Also on http://smbc-comics.tumblr.com
4570
    # Also on http://www.smbc-comics.com
4571
    name = 'smbc-goc'
4572
    long_name = 'Saturday Morning Breakfast Cereal (from GoComics)'
4573
    url = 'http://www.gocomics.com/saturday-morning-breakfast-cereal'
4574
    _categories = ('SMBC', )
4575
4576
4577
class CalvinAndHobbesGoComic(GenericGoComic):
4578
    """Class to retrieve Calvin and Hobbes comics."""
4579
    # From gocomics, not http://marcel-oehler.marcellosendos.ch/comics/ch/
4580
    name = 'calvin-goc'
4581
    long_name = 'Calvin and Hobbes (from GoComics)'
4582
    url = 'http://www.gocomics.com/calvinandhobbes'
4583
4584
4585
class RallGoComic(GenericGoComic):
4586
    """Class to retrieve Ted Rall comics."""
4587
    # Also on http://rall.com/comic
4588
    name = 'rall-goc'
4589
    long_name = "Ted Rall (from GoComics)"
4590
    url = "http://www.gocomics.com/ted-rall"
4591
    _categories = ('RALL', )
4592
4593
4594
class TheAwkwardYetiGoComic(GenericGoComic):
4595
    """Class to retrieve The Awkward Yeti comics."""
4596
    # Also on http://larstheyeti.tumblr.com
4597
    # Also on http://theawkwardyeti.com
4598
    # Also on https://tapastic.com/series/TheAwkwardYeti
4599
    name = 'yeti-goc'
4600
    long_name = 'The Awkward Yeti (from GoComics)'
4601
    url = 'http://www.gocomics.com/the-awkward-yeti'
4602
    _categories = ('YETI', )
4603
4604
4605
class BerkeleyMewsGoComics(GenericGoComic):
4606
    """Class to retrieve Berkeley Mews comics."""
4607
    # Also on http://mews.tumblr.com
4608
    # Also on http://www.berkeleymews.com
4609
    name = 'berkeley-goc'
4610
    long_name = 'Berkeley Mews (from GoComics)'
4611
    url = 'http://www.gocomics.com/berkeley-mews'
4612
    _categories = ('BERKELEY', )
4613
4614
4615
class SheldonGoComics(GenericGoComic):
4616
    """Class to retrieve Sheldon comics."""
4617
    # Also on http://www.sheldoncomics.com
4618
    name = 'sheldon-goc'
4619
    long_name = 'Sheldon Comics (from GoComics)'
4620
    url = 'http://www.gocomics.com/sheldon'
4621
4622
4623
class FowlLanguageGoComics(GenericGoComic):
4624
    """Class to retrieve Fowl Language comics."""
4625
    # Also on http://www.fowllanguagecomics.com
4626
    # Also on http://tapastic.com/series/Fowl-Language-Comics
4627
    # Also on http://fowllanguagecomics.tumblr.com
4628
    name = 'fowllanguage-goc'
4629
    long_name = 'Fowl Language Comics (from GoComics)'
4630
    url = 'http://www.gocomics.com/fowl-language'
4631
    _categories = ('FOWLLANGUAGE', )
4632
4633
4634
class NickAnderson(GenericGoComic):
4635
    """Class to retrieve Nick Anderson comics."""
4636
    name = 'nickanderson'
4637
    long_name = 'Nick Anderson'
4638
    url = 'http://www.gocomics.com/nickanderson'
4639
4640
4641
class GarfieldGoComics(GenericGoComic):
4642
    """Class to retrieve Garfield comics."""
4643
    # Also on http://garfield.com
4644
    name = 'garfield-goc'
4645
    long_name = 'Garfield (from GoComics)'
4646
    url = 'http://www.gocomics.com/garfield'
4647
    _categories = ('GARFIELD', )
4648
4649
4650
class DorrisMcGoComics(GenericGoComic):
4651
    """Class to retrieve Dorris Mc Comics"""
4652
    # Also on http://dorrismccomics.com
4653
    name = 'dorrismc-goc'
4654
    long_name = 'Dorris Mc (from GoComics)'
4655
    url = 'http://www.gocomics.com/dorris-mccomics'
4656
4657
4658
class FoxTrot(GenericGoComic):
4659
    """Class to retrieve FoxTrot comics."""
4660
    name = 'foxtrot'
4661
    long_name = 'FoxTrot'
4662
    url = 'http://www.gocomics.com/foxtrot'
4663
4664
4665
class FoxTrotClassics(GenericGoComic):
4666
    """Class to retrieve FoxTrot Classics comics."""
4667
    name = 'foxtrot-classics'
4668
    long_name = 'FoxTrot Classics'
4669
    url = 'http://www.gocomics.com/foxtrotclassics'
4670
4671
4672
class MisterAndMeGoComics(GenericEmptyComic, GenericGoComic):  # Removed ?
4673
    """Class to retrieve Mister & Me Comics."""
4674
    # Also on http://www.mister-and-me.com
4675
    # Also on https://tapastic.com/series/Mister-and-Me
4676
    name = 'mister-goc'
4677
    long_name = 'Mister & Me (from GoComics)'
4678
    url = 'http://www.gocomics.com/mister-and-me'
4679
4680
4681
class NonSequitur(GenericGoComic):
4682
    """Class to retrieve Non Sequitur (Wiley Miller) comics."""
4683
    name = 'nonsequitur'
4684
    long_name = 'Non Sequitur'
4685
    url = 'http://www.gocomics.com/nonsequitur'
4686
4687
4688
class GenericTapasticComic(GenericListableComic):
4689
    """Generic class to handle the logic common to comics from tapastic.com."""
4690
    _categories = ('TAPASTIC', )
4691
4692
    @classmethod
4693
    def get_comic_info(cls, soup, archive_elt):
4694
        """Get information about a particular comics."""
4695
        timestamp = int(archive_elt['publishDate']) / 1000.0
4696
        day = datetime.datetime.fromtimestamp(timestamp).date()
4697
        imgs = soup.find_all('img', class_='art-image')
4698
        if not imgs:
4699
            print("Comic %s is being uploaded, retry later" % cls.get_url_from_archive_element(archive_elt))
4700
            return None
4701
        assert len(imgs) > 0
4702
        return {
4703
            'day': day.day,
4704
            'year': day.year,
4705
            'month': day.month,
4706
            'img': [i['src'] for i in imgs],
4707
            'title': archive_elt['title'],
4708
        }
4709
4710
    @classmethod
4711
    def get_url_from_archive_element(cls, archive_elt):
4712
        return 'http://tapastic.com/episode/' + str(archive_elt['id'])
4713
4714
    @classmethod
4715
    def get_archive_elements(cls):
4716
        pref, suff = 'episodeList : ', ','
4717
        # Information is stored in the javascript part
4718
        # I don't know the clean way to get it so this is the ugly way.
4719
        string = [s[len(pref):-len(suff)] for s in (s.decode('utf-8').strip() for s in urlopen_wrapper(cls.url).readlines()) if s.startswith(pref) and s.endswith(suff)][0]
4720
        return json.loads(string)
4721
4722
4723
class VegetablesForDessert(GenericTapasticComic):
4724
    """Class to retrieve Vegetables For Dessert comics."""
4725
    # Also on http://vegetablesfordessert.tumblr.com
4726
    name = 'vegetables'
4727
    long_name = 'Vegetables For Dessert'
4728
    url = 'http://tapastic.com/series/vegetablesfordessert'
4729
4730
4731
class FowlLanguageTapa(GenericTapasticComic):
4732
    """Class to retrieve Fowl Language comics."""
4733
    # Also on http://www.fowllanguagecomics.com
4734
    # Also on http://fowllanguagecomics.tumblr.com
4735
    # Also on http://www.gocomics.com/fowl-language
4736
    name = 'fowllanguage-tapa'
4737
    long_name = 'Fowl Language Comics (from Tapastic)'
4738
    url = 'http://tapastic.com/series/Fowl-Language-Comics'
4739
    _categories = ('FOWLLANGUAGE', )
4740
4741
4742
class OscillatingProfundities(GenericTapasticComic):
4743
    """Class to retrieve Oscillating Profundities comics."""
4744
    name = 'oscillating'
4745
    long_name = 'Oscillating Profundities'
4746
    url = 'http://tapastic.com/series/oscillatingprofundities'
4747
4748
4749
class ZnoflatsComics(GenericTapasticComic):
4750
    """Class to retrieve Znoflats comics."""
4751
    name = 'znoflats'
4752
    long_name = 'Znoflats Comics'
4753
    url = 'http://tapastic.com/series/Znoflats-Comics'
4754
4755
4756
class SandersenTapastic(GenericTapasticComic):
4757
    """Class to retrieve Sarah Andersen comics."""
4758
    # Also on http://sarahcandersen.com
4759
    # Also on http://www.gocomics.com/sarahs-scribbles
4760
    name = 'sandersen-tapa'
4761
    long_name = 'Sarah Andersen (from Tapastic)'
4762
    url = 'http://tapastic.com/series/Doodle-Time'
4763
4764
4765
class TubeyToonsTapastic(GenericTapasticComic):
4766
    """Class to retrieve TubeyToons comics."""
4767
    # Also on http://tubeytoons.com
4768
    # Also on http://tubeytoons.tumblr.com
4769
    name = 'tubeytoons-tapa'
4770
    long_name = 'Tubey Toons (from Tapastic)'
4771
    url = 'http://tapastic.com/series/Tubey-Toons'
4772
    _categories = ('TUNEYTOONS', )
4773
4774
4775
class AnythingComicTapastic(GenericTapasticComic):
4776
    """Class to retrieve Anything Comics."""
4777
    # Also on http://www.anythingcomic.com
4778
    name = 'anythingcomic-tapa'
4779
    long_name = 'Anything Comic (from Tapastic)'
4780
    url = 'http://tapastic.com/series/anything'
4781
4782
4783
class UnearthedComicsTapastic(GenericTapasticComic):
4784
    """Class to retrieve Unearthed comics."""
4785
    # Also on http://unearthedcomics.com
4786
    # Also on http://unearthedcomics.tumblr.com
4787
    name = 'unearthed-tapa'
4788
    long_name = 'Unearthed Comics (from Tapastic)'
4789
    url = 'http://tapastic.com/series/UnearthedComics'
4790
    _categories = ('UNEARTHED', )
4791
4792
4793
class EverythingsStupidTapastic(GenericTapasticComic):
4794
    """Class to retrieve Everything's stupid Comics."""
4795
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
4796
    # Also on http://everythingsstupid.net
4797
    name = 'stupid-tapa'
4798
    long_name = "Everything's Stupid (from Tapastic)"
4799
    url = 'http://tapastic.com/series/EverythingsStupid'
4800
4801
4802
class JustSayEhTapastic(GenericTapasticComic):
4803
    """Class to retrieve Just Say Eh comics."""
4804
    # Also on http://www.justsayeh.com
4805
    name = 'justsayeh-tapa'
4806
    long_name = 'Just Say Eh (from Tapastic)'
4807
    url = 'http://tapastic.com/series/Just-Say-Eh'
4808
4809
4810
class ThorsThundershackTapastic(GenericTapasticComic):
4811
    """Class to retrieve Thor's Thundershack comics."""
4812
    # Also on http://www.thorsthundershack.com
4813
    name = 'thor-tapa'
4814
    long_name = 'Thor\'s Thundershack (from Tapastic)'
4815
    url = 'http://tapastic.com/series/Thors-Thundershac'
4816
    _categories = ('THOR', )
4817
4818
4819
class OwlTurdTapastic(GenericTapasticComic):
4820
    """Class to retrieve Owl Turd comics."""
4821
    # Also on http://owlturd.com
4822
    name = 'owlturd-tapa'
4823
    long_name = 'Owl Turd (from Tapastic)'
4824
    url = 'http://tapastic.com/series/Owl-Turd-Comix'
4825
    _categories = ('OWLTURD', )
4826
4827
4828
class GoneIntoRaptureTapastic(GenericTapasticComic):
4829
    """Class to retrieve Gone Into Rapture comics."""
4830
    # Also on http://goneintorapture.tumblr.com
4831
    # Also on http://www.goneintorapture.com
4832
    name = 'rapture-tapa'
4833
    long_name = 'Gone Into Rapture (from Tapastic)'
4834
    url = 'http://tapastic.com/series/Goneintorapture'
4835
4836
4837
class HeckIfIKnowComicsTapa(GenericTapasticComic):
4838
    """Class to retrieve Heck If I Know Comics."""
4839
    # Also on http://heckifiknowcomics.com
4840
    name = 'heck-tapa'
4841
    long_name = 'Heck if I Know comics (from Tapastic)'
4842
    url = 'http://tapastic.com/series/Regular'
4843
4844
4845
class CheerUpEmoKidTapa(GenericTapasticComic):
4846
    """Class to retrieve CheerUpEmoKid comics."""
4847
    # Also on http://www.cheerupemokid.com
4848
    # Also on http://enzocomics.tumblr.com
4849
    name = 'cuek-tapa'
4850
    long_name = 'Cheer Up Emo Kid (from Tapastic)'
4851
    url = 'http://tapastic.com/series/CUEK'
4852
4853
4854
class BigFootJusticeTapa(GenericTapasticComic):
4855
    """Class to retrieve Big Foot Justice comics."""
4856
    # Also on http://bigfootjustice.com
4857
    name = 'bigfoot-tapa'
4858
    long_name = 'Big Foot Justice (from Tapastic)'
4859
    url = 'http://tapastic.com/series/bigfoot-justice'
4860
4861
4862
class UpAndOutTapa(GenericTapasticComic):
4863
    """Class to retrieve Up & Out comics."""
4864
    # Also on http://upandoutcomic.tumblr.com
4865
    name = 'upandout-tapa'
4866
    long_name = 'Up And Out (from Tapastic)'
4867
    url = 'http://tapastic.com/series/UP-and-OUT'
4868
4869
4870
class ToonHoleTapa(GenericTapasticComic):
4871
    """Class to retrieve Toon Holes comics."""
4872
    # Also on http://www.toonhole.com
4873
    name = 'toonhole-tapa'
4874
    long_name = 'Toon Hole (from Tapastic)'
4875
    url = 'http://tapastic.com/series/TOONHOLE'
4876
4877
4878
class AngryAtNothingTapa(GenericTapasticComic):
4879
    """Class to retrieve Angry at Nothing comics."""
4880
    # Also on http://www.angryatnothing.net
4881
    name = 'angry-tapa'
4882
    long_name = 'Angry At Nothing (from Tapastic)'
4883
    url = 'http://tapastic.com/series/Comics-yeah-definitely-comics-'
4884
4885
4886
class LeleozTapa(GenericTapasticComic):
4887
    """Class to retrieve Leleoz comics."""
4888
    # Also on http://leleozcomics.tumblr.com
4889
    name = 'leleoz-tapa'
4890
    long_name = 'Leleoz (from Tapastic)'
4891
    url = 'https://tapastic.com/series/Leleoz'
4892
4893
4894
class TheAwkwardYetiTapa(GenericTapasticComic):
4895
    """Class to retrieve The Awkward Yeti comics."""
4896
    # Also on http://www.gocomics.com/the-awkward-yeti
4897
    # Also on http://theawkwardyeti.com
4898
    # Also on http://larstheyeti.tumblr.com
4899
    name = 'yeti-tapa'
4900
    long_name = 'The Awkward Yeti (from Tapastic)'
4901
    url = 'https://tapastic.com/series/TheAwkwardYeti'
4902
    _categories = ('YETI', )
4903
4904
4905
class AsPerUsualTapa(GenericTapasticComic):
4906
    """Class to retrieve As Per Usual comics."""
4907
    # Also on http://as-per-usual.tumblr.com
4908
    name = 'usual-tapa'
4909
    long_name = 'As Per Usual (from Tapastic)'
4910
    url = 'https://tapastic.com/series/AsPerUsual'
4911
    categories = ('DAMILEE', )
4912
4913
4914
class HotComicsForCoolPeopleTapa(GenericTapasticComic):
4915
    """Class to retrieve Hot Comics For Cool People."""
4916
    # Also on http://hotcomicsforcoolpeople.tumblr.com
4917
    # Also on http://hotcomics.biz (links to tumblr)
4918
    # Also on http://hcfcp.com (links to tumblr)
4919
    name = 'hotcomics-tapa'
4920
    long_name = 'Hot Comics For Cool People (from Tapastic)'
4921
    url = 'https://tapastic.com/series/Hot-Comics-For-Cool-People'
4922
    categories = ('DAMILEE', )
4923
4924
4925
class OneOneOneOneComicTapa(GenericTapasticComic):
4926
    """Class to retrieve 1111 Comics."""
4927
    # Also on http://www.1111comics.me
4928
    # Also on http://comics1111.tumblr.com
4929
    name = '1111-tapa'
4930
    long_name = '1111 Comics (from Tapastic)'
4931
    url = 'https://tapastic.com/series/1111-Comics'
4932
    _categories = ('ONEONEONEONE', )
4933
4934
4935
class TumbleDryTapa(GenericTapasticComic):
4936
    """Class to retrieve Tumble Dry comics."""
4937
    # Also on http://tumbledrycomics.com
4938
    name = 'tumbledry-tapa'
4939
    long_name = 'Tumblr Dry (from Tapastic)'
4940
    url = 'https://tapastic.com/series/TumbleDryComics'
4941
4942
4943
class DeadlyPanelTapa(GenericTapasticComic):
4944
    """Class to retrieve Deadly Panel comics."""
4945
    # Also on http://www.deadlypanel.com
4946
    # Also on http://deadlypanel.tumblr.com
4947
    name = 'deadly-tapa'
4948
    long_name = 'Deadly Panel (from Tapastic)'
4949
    url = 'https://tapastic.com/series/deadlypanel'
4950
4951
4952
class ChrisHallbeckMaxiTapa(GenericTapasticComic):
4953
    """Class to retrieve Chris Hallbeck comics."""
4954
    # Also on http://chrishallbeck.tumblr.com
4955
    # Also on http://maximumble.com
4956
    name = 'hallbeckmaxi-tapa'
4957
    long_name = 'Chris Hallback - Maximumble (from Tapastic)'
4958
    url = 'https://tapastic.com/series/Maximumble'
4959
    _categories = ('HALLBACK', )
4960
4961
4962
class ChrisHallbeckMiniTapa(GenericTapasticComic):
4963
    """Class to retrieve Chris Hallbeck comics."""
4964
    # Also on http://chrishallbeck.tumblr.com
4965
    # Also on http://minimumble.com
4966
    name = 'hallbeckmini-tapa'
4967
    long_name = 'Chris Hallback - Minimumble (from Tapastic)'
4968
    url = 'https://tapastic.com/series/Minimumble'
4969
    _categories = ('HALLBACK', )
4970
4971
4972
class ChrisHallbeckBiffTapa(GenericTapasticComic):
4973
    """Class to retrieve Chris Hallbeck comics."""
4974
    # Also on http://chrishallbeck.tumblr.com
4975
    # Also on http://thebookofbiff.com
4976
    name = 'hallbeckbiff-tapa'
4977
    long_name = 'Chris Hallback - The Book of Biff (from Tapastic)'
4978
    url = 'https://tapastic.com/series/Biff'
4979
    _categories = ('HALLBACK', )
4980
4981
4982
class RandoWisTapa(GenericTapasticComic):
4983
    """Class to retrieve RandoWis comics."""
4984
    # Also on https://randowis.com
4985
    name = 'randowis-tapa'
4986
    long_name = 'RandoWis (from Tapastic)'
4987
    url = 'https://tapastic.com/series/RandoWis'
4988
4989
4990
class PigeonGazetteTapa(GenericTapasticComic):
4991
    """Class to retrieve The Pigeon Gazette comics."""
4992
    # Also on http://thepigeongazette.tumblr.com
4993
    name = 'pigeon-tapa'
4994
    long_name = 'The Pigeon Gazette (from Tapastic)'
4995
    url = 'https://tapastic.com/series/The-Pigeon-Gazette'
4996
4997
4998
class TheOdd1sOutTapa(GenericTapasticComic):
4999
    """Class to retrieve The Odd 1s Out comics."""
5000
    # Also on http://theodd1sout.com
5001
    # Also on http://theodd1sout.tumblr.com
5002
    name = 'theodd-tapa'
5003
    long_name = 'The Odd 1s Out (from Tapastic)'
5004
    url = 'https://tapastic.com/series/Theodd1sout'
5005
5006
5007
class TheWorldIsFlatTapa(GenericTapasticComic):
5008
    """Class to retrieve The World Is Flat Comics."""
5009
    # Also on http://theworldisflatcomics.tumblr.com
5010
    name = 'flatworld-tapa'
5011
    long_name = 'The World Is Flat (from Tapastic)'
5012
    url = 'https://tapastic.com/series/The-World-is-Flat'
5013
5014
5015
class MisterAndMeTapa(GenericTapasticComic):
5016
    """Class to retrieve Mister & Me Comics."""
5017
    # Also on http://www.mister-and-me.com
5018
    # Also on http://www.gocomics.com/mister-and-me
5019
    name = 'mister-tapa'
5020
    long_name = 'Mister & Me (from Tapastic)'
5021
    url = 'https://tapastic.com/series/Mister-and-Me'
5022
5023
5024
class TalesOfAbsurdityTapa(GenericTapasticComic):
5025
    """Class to retrieve Tales Of Absurdity comics."""
5026
    # Also on http://talesofabsurdity.com
5027
    # Also on http://talesofabsurdity.tumblr.com
5028
    name = 'absurdity-tapa'
5029
    long_name = 'Tales of Absurdity (from Tapastic)'
5030
    url = 'http://tapastic.com/series/Tales-Of-Absurdity'
5031
    _categories = ('ABSURDITY', )
5032
5033
5034
class BFGFSTapa(GenericTapasticComic):
5035
    """Class to retrieve BFGFS comics."""
5036
    # Also on http://bfgfs.com
5037
    # Also on http://bfgfs.tumblr.com
5038
    name = 'bfgfs-tapa'
5039
    long_name = 'BFGFS (from Tapastic)'
5040
    url = 'https://tapastic.com/series/BFGFS'
5041
5042
5043
class DoodleForFoodTapa(GenericTapasticComic):
5044
    """Class to retrieve Doodle For Food comics."""
5045
    # Also on http://doodleforfood.com
5046
    name = 'doodle-tapa'
5047
    long_name = 'Doodle For Food (from Tapastic)'
5048
    url = 'https://tapastic.com/series/Doodle-for-Food'
5049
5050
5051
class MrLovensteinTapa(GenericTapasticComic):
5052
    """Class to retrieve Mr Lovenstein comics."""
5053
    # Also on  https://tapastic.com/series/MrLovenstein
5054
    name = 'mrlovenstein-tapa'
5055
    long_name = 'Mr. Lovenstein (from Tapastic)'
5056
    url = 'https://tapastic.com/series/MrLovenstein'
5057
5058
5059
class CassandraCalinTapa(GenericTapasticComic):
5060
    """Class to retrieve C. Cassandra comics."""
5061
    # Also on http://cassandracalin.com
5062
    # Also on http://c-cassandra.tumblr.com
5063
    name = 'cassandra-tapa'
5064
    long_name = 'Cassandra Calin (from Tapastic)'
5065
    url = 'https://tapastic.com/series/C-Cassandra-comics'
5066
5067
5068
class WafflesAndPancakes(GenericTapasticComic):
5069
    """Class to retrieve Waffles And Pancakes comics."""
5070
    # Also on http://wandpcomic.com
5071
    name = 'waffles'
5072
    long_name = 'Waffles And Pancakes'
5073
    url = 'https://tapastic.com/series/Waffles-and-Pancakes'
5074
5075
5076
class YesterdaysPopcornTapastic(GenericTapasticComic):
5077
    """Class to retrieve Yesterday's Popcorn comics."""
5078
    # Also on http://www.yesterdayspopcorn.com
5079
    # Also on http://yesterdayspopcorn.tumblr.com
5080
    name = 'popcorn-tapa'
5081
    long_name = 'Yesterday\'s Popcorn (from Tapastic)'
5082
    url = 'https://tapastic.com/series/Yesterdays-Popcorn'
5083
5084
5085
class OurSuperAdventureTapastic(GenericTapasticComic):
5086
    """Class to retrieve Our Super Adventure comics."""
5087
    # Also on http://www.oursuperadventure.com
5088
    # http://sarahssketchbook.tumblr.com
5089
    # http://sarahgraley.com
5090
    name = 'superadventure-tapastic'
5091
    long_name = 'Our Super Adventure (from Tapastic)'
5092
    url = 'https://tapastic.com/series/Our-Super-Adventure'
5093
5094
5095
class NamelessPCs(GenericTapasticComic):
5096
    """Class to retrieve Nameless PCs comics."""
5097
    # Also on http://namelesspcs.com
5098
    name = 'namelesspcs-tapa'
5099
    long_name = 'NamelessPCs (from Tapastic)'
5100
    url = 'https://tapastic.com/series/NamelessPC'
5101
5102
5103
class UbertoolTapa(GenericTapasticComic):
5104
    """Class to retrieve Ubertool comics."""
5105
    # Also on http://ubertoolcomic.com
5106
    # Also on http://ubertool.tumblr.com
5107
    name = 'ubertool-tapa'
5108
    long_name = 'Ubertool (from Tapastic)'
5109
    url = 'https://tapastic.com/series/ubertool'
5110
    _categories = ('UBERTOOL', )
5111
5112
5113
class BarteNerdsTapa(GenericTapasticComic):
5114
    """Class to retrieve BarteNerds comics."""
5115
    # Also on http://www.bartenerds.com
5116
    name = 'bartenerds-tapa'
5117
    long_name = 'BarteNerds (from Tapastic)'
5118
    url = 'https://tapastic.com/series/BarteNERDS'
5119
5120
5121
class SmallBlueYonderTapa(GenericTapasticComic):
5122
    """Class to retrieve Small Blue Yonder comics."""
5123
    # Also on http://www.smallblueyonder.com
5124
    name = 'smallblue-tapa'
5125
    long_name = 'Small Blue Yonder (from Tapastic)'
5126
    url = 'https://tapastic.com/series/Small-Blue-Yonder'
5127
5128
5129
class TizzyStitchBirdTapa(GenericTapasticComic):
5130
    """Class to retrieve Tizzy Stitch Bird comics."""
5131
    # Also on http://tizzystitchbird.com
5132
    # Also on http://tizzystitchbird.tumblr.com
5133
    # Also on http://www.webtoons.com/en/challenge/tizzy-stitchbird/list?title_no=50082
5134
    name = 'tizzy-tapa'
5135
    long_name = 'Tizzy Stitch Bird (from Tapastic)'
5136
    url = 'https://tapastic.com/series/TizzyStitchbird'
5137
5138
5139
class RockPaperCynicTapa(GenericTapasticComic):
5140
    """Class to retrieve RockPaperCynic comics."""
5141
    # Also on http://www.rockpapercynic.com
5142
    # Also on http://rockpapercynic.tumblr.com
5143
    name = 'rpc-tapa'
5144
    long_name = 'Rock Paper Cynic (from Tapastic)'
5145
    url = 'https://tapastic.com/series/rockpapercynic'
5146
5147
5148
class ItsTheTieTapa(GenericTapasticComic):
5149
    """Class to retrieve It's the tie comics."""
5150
    # Also on http://itsthetie.com
5151
    # Also on http://itsthetie.tumblr.com
5152
    name = 'tie-tapa'
5153
    long_name = "It's the tie (from Tapastic)"
5154
    url = "https://tapastic.com/series/itsthetie"
5155
    _categories = ('TIE', )
5156
5157
5158
def get_subclasses(klass):
5159
    """Gets the list of direct/indirect subclasses of a class"""
5160
    subclasses = klass.__subclasses__()
5161
    for derived in list(subclasses):
5162
        subclasses.extend(get_subclasses(derived))
5163
    return subclasses
5164
5165
5166
def remove_st_nd_rd_th_from_date(string):
5167
    """Function to transform 1st/2nd/3rd/4th in a parsable date format."""
5168
    # Hackish way to convert string with numeral "1st"/"2nd"/etc to date
5169
    return (string.replace('st', '')
5170
            .replace('nd', '')
5171
            .replace('rd', '')
5172
            .replace('th', '')
5173
            .replace('Augu', 'August'))
5174
5175
5176
def string_to_date(string, date_format, local=DEFAULT_LOCAL):
5177
    """Function to convert string to date object.
5178
    Wrapper around datetime.datetime.strptime."""
5179
    # format described in https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior
5180
    prev_locale = locale.setlocale(locale.LC_ALL)
5181
    if local != prev_locale:
5182
        locale.setlocale(locale.LC_ALL, local)
5183
    ret = datetime.datetime.strptime(string, date_format).date()
5184
    if local != prev_locale:
5185
        locale.setlocale(locale.LC_ALL, prev_locale)
5186
    return ret
5187
5188
5189
COMICS = set(get_subclasses(GenericComic))
5190
VALID_COMICS = [c for c in COMICS if c.name is not None]
5191
COMIC_NAMES = {c.name: c for c in VALID_COMICS}
5192
assert len(VALID_COMICS) == len(COMIC_NAMES)
5193
CLASS_NAMES = {c.__name__ for c in VALID_COMICS}
5194
assert len(VALID_COMICS) == len(CLASS_NAMES)
5195