Completed
Push — master ( f77710...afb6eb )
by De
01:02
created

comics.py (14 issues)

1
#! /usr/bin/python3
2
# vim: set expandtab tabstop=4 shiftwidth=4 :
3
"""Module to retrieve webcomics"""
4
5
from comic_abstract import GenericComic, get_date_for_comic
6
import re
7
from datetime import date, timedelta
8
import datetime
9
from urlfunctions import get_soup_at_url, urljoin_wrapper,\
10
    convert_iri_to_plain_ascii_uri, load_json_at_url, urlopen_wrapper
11
import json
12
import locale
13
import urllib
14
15
DEFAULT_LOCAL = 'en_GB.UTF-8'
16
17
18
class Xkcd(GenericComic):
19
    """Class to retrieve Xkcd comics."""
20
    name = 'xkcd'
21
    long_name = 'xkcd'
22
    url = 'http://xkcd.com'
23
24
    @classmethod
25
    def get_next_comic(cls, last_comic):
26
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
27
        first_num = last_comic['num'] if last_comic else 0
28
        last_num = load_json_at_url(
29
            urljoin_wrapper(cls.url, 'info.0.json'))['num']
30
31
        for num in range(first_num + 1, last_num + 1):
32
            if num != 404:
33
                json_url = urljoin_wrapper(cls.url, '%d/info.0.json' % num)
34
                comic = load_json_at_url(json_url)
35
                comic['img'] = [comic['img']]
36
                comic['prefix'] = '%d-' % num
37
                comic['json_url'] = json_url
38
                comic['url'] = urljoin_wrapper(cls.url, str(num))
39
                comic['day'] = int(comic['day'])
40
                comic['month'] = int(comic['month'])
41
                comic['year'] = int(comic['year'])
42
                assert comic['num'] == num
43
                yield comic
44
45
46
# Helper functions corresponding to get_url_from_link/get_url_from_archive_element
47
48
49
@classmethod
50
def get_href(cls, link):
51
    """Implementation of get_url_from_link/get_url_from_archive_element."""
52
    return link['href']
53
54
55
@classmethod
56
def join_cls_url_to_href(cls, link):
57
    """Implementation of get_url_from_link/get_url_from_archive_element."""
58
    return urljoin_wrapper(cls.url, link['href'])
59
60
61
class GenericNavigableComic(GenericComic):
62
    """Generic class for "navigable" comics : with first/next arrows.
63
64
    This class applies to comic where previous and next comics can be
65
    accessed from a given comic. Once given a starting point (either
66
    the first comic or the last comic retrieved), it will handle the
67
    navigation, the retrieval of the soup object and the setting of
68
    the 'url' attribute on retrieved comics. This limits a lot the
69
    amount of boilerplate code in the different implementation classes.
70
71
    The method `get_next_comic` methods is implemented in terms of new
72
    more specialized methods to be implemented/overridden:
73
        - get_first_comic_link
74
        - get_navi_link
75
        - get_comic_info
76
        - get_url_from_link
77
    """
78
    _categories = ('NAVIGABLE', )
79
80
    @classmethod
81
    def get_first_comic_link(cls):
82
        """Get link to first comics.
83
84
        Sometimes this can be retrieved of any comic page, sometimes on
85
        the archive page, sometimes it doesn't exist at all and one has
86
        to iterate backward to find it before hardcoding the result found.
87
        """
88
        raise NotImplementedError
89
90
    @classmethod
91
    def get_navi_link(cls, last_soup, next_):
92
        """Get link to next (or previous - for dev purposes) comic."""
93
        raise NotImplementedError
94
95
    @classmethod
96
    def get_comic_info(cls, soup, link):
97
        """Get information about a particular comics."""
98
        raise NotImplementedError
99
100
    @classmethod
101
    def get_url_from_link(cls, link):
102
        """Get url corresponding to a link. Default implementation is similar to get_href."""
103
        return link['href']
104
105
    @classmethod
106
    def get_next_link(cls, last_soup):
107
        """Get link to next comic."""
108
        link = cls.get_navi_link(last_soup, True)
109
        cls.log("Next link is %s" % link)
110
        return link
111
112
    @classmethod
113
    def get_prev_link(cls, last_soup):
114
        """Get link to previous comic."""
115
        link = cls.get_navi_link(last_soup, False)
116
        cls.log("Prev link is %s" % link)
117
        return link
118
119
    @classmethod
120
    def get_next_comic(cls, last_comic):
121
        """Generic implementation of get_next_comic for navigable comics."""
122
        url = last_comic['url'] if last_comic else None
123
        cls.log("starting 'get_next_comic' from %s" % url)
124
        next_comic = \
125
            cls.get_next_link(get_soup_at_url(url)) \
126
            if url else \
127
            cls.get_first_comic_link()
128
        cls.log("next/first comic will be %s (url is %s)" % (str(next_comic), url))
129
        # cls.check_navigation(url)
130
        while next_comic:
131
            prev_url, url = url, cls.get_url_from_link(next_comic)
132
            if prev_url == url:
133
                cls.log("got same url %s" % url)
134
                break
135
            cls.log("about to get %s (%s)" % (url, str(next_comic)))
136
            soup = get_soup_at_url(url)
137
            comic = cls.get_comic_info(soup, next_comic)
138
            if comic is not None:
139
                assert 'url' not in comic
140
                comic['url'] = url
141
                yield comic
142
            next_comic = cls.get_next_link(soup)
143
            cls.log("next comic will be %s" % str(next_comic))
144
145
    @classmethod
146
    def check_first_link(cls):
147
        """Check that navigation to first comic seems to be working - for dev purposes."""
148
        cls.log("about to check first link")
149
        ok = True
150
        firstlink = cls.get_first_comic_link()
151
        if firstlink is None:
152
            print("From %s : no first link" % cls.url)
153
            ok = False
154
        else:
155
            firsturl = cls.get_url_from_link(firstlink)
156
            try:
157
                get_soup_at_url(firsturl)
158
            except urllib.error.HTTPError:
159
                print("From %s : invalid first url" % cls.url)
160
                ok = False
161
        cls.log("checked first link -> returned %d" % ok)
162
        return ok
163
164
    @classmethod
165
    def check_prev_next_links(cls, url):
166
        """Check that navigation to prev/next from a given URL seems to be working - for dev purposes."""
167
        cls.log("about to check prev/next from %s" % url)
168
        ok = True
169
        if url is None:
170
            prevlink, nextlink = None, None
171
        else:
172
            soup = get_soup_at_url(url)
173
            prevlink, nextlink = cls.get_prev_link(soup), cls.get_next_link(soup)
174
        if prevlink is None and nextlink is None:
175
            print("From %s : no previous nor next" % url)
176
            ok = False
177
        else:
178
            if prevlink:
179
                prevurl = cls.get_url_from_link(prevlink)
180
                prevsoup = get_soup_at_url(prevurl)
181
                prevnextlink = cls.get_next_link(prevsoup)
182
                prevnext = cls.get_url_from_link(prevnextlink) if prevnextlink is not None else "NO URL"
183
                if prevnext != url:
184
                    print("From %s, going backward then forward leads to %s" % (url, prevnext))
185
                    ok = False
186
            if nextlink:
187
                nexturl = cls.get_url_from_link(nextlink)
188
                if nexturl != url:
189
                    nextsoup = get_soup_at_url(nexturl)
190
                    nextprevlink = cls.get_prev_link(nextsoup)
191
                    nextprev = cls.get_url_from_link(nextprevlink) if nextprevlink is not None else "NO URL"
192
                    if nextprev != url:
193
                        print("From %s, going forward then backward leads to %s" % (url, nextprev))
194
                        ok = False
195
        cls.log("checked prev/next from %s -> returned %d" % (url, ok))
196
        return ok
197
198
    @classmethod
199
    def check_navigation(cls, url):
200
        """Check that navigation functions seem to be working - for dev purposes."""
201
        cls.log("about to check navigation from %s" % url)
202
        first = cls.check_first_link()
203
        prevnext = cls.check_prev_next_links(url)
204
        ok = first and prevnext
205
        cls.log("checked navigation from %s -> returned %d" % (url, ok))
206
        return ok
207
208
209
class GenericListableComic(GenericComic):
210
    """Generic class for "listable" comics : with a list of comics (aka 'archive')
211
212
    The method `get_next_comic` methods is implemented in terms of new
213
    more specialized methods to be implemented/overridden:
214
        - get_archive_elements
215
        - get_url_from_archive_element
216
        - get_comic_info
217
    """
218
    _categories = ('LISTABLE', )
219
220
    @classmethod
221
    def get_archive_elements(cls):
222
        """Get the archive elements (iterable)."""
223
        raise NotImplementedError
224
225
    @classmethod
226
    def get_url_from_archive_element(cls, archive_elt):
227
        """Get url corresponding to an archive element."""
228
        raise NotImplementedError
229
230
    @classmethod
231
    def get_comic_info(cls, soup, archive_elt):
232
        """Get information about a particular comics."""
233
        raise NotImplementedError
234
235
    @classmethod
236
    def get_next_comic(cls, last_comic):
237
        """Generic implementation of get_next_comic for listable comics."""
238
        waiting_for_url = last_comic['url'] if last_comic else None
239
        for archive_elt in cls.get_archive_elements():
240
            url = cls.get_url_from_archive_element(archive_elt)
241
            cls.log("considering %s" % url)
242
            if waiting_for_url is None:
243
                cls.log("about to get %s (%s)" % (url, str(archive_elt)))
244
                soup = get_soup_at_url(url)
245
                comic = cls.get_comic_info(soup, archive_elt)
246
                if comic is not None:
247
                    assert 'url' not in comic
248
                    comic['url'] = url
249
                    yield comic
250
            elif waiting_for_url == url:
251
                waiting_for_url = None
252
        if waiting_for_url is not None:
253
            print("Did not find %s : there might be a problem" % waiting_for_url)
254
255
# Helper functions corresponding to get_first_comic_link/get_navi_link
256
257
258
@classmethod
259
def get_link_rel_next(cls, last_soup, next_):
260
    """Implementation of get_navi_link."""
261
    return last_soup.find('link', rel='next' if next_ else 'prev')
262
263
264
@classmethod
265
def get_a_rel_next(cls, last_soup, next_):
266
    """Implementation of get_navi_link."""
267
    return last_soup.find('a', rel='next' if next_ else 'prev')
268
269
270
@classmethod
271
def get_a_navi_navinext(cls, last_soup, next_):
272
    """Implementation of get_navi_link."""
273
    return last_soup.find('a', class_='navi navi-next' if next_ else 'navi navi-prev')
274
275
276
@classmethod
277
def get_a_navi_comicnavnext_navinext(cls, last_soup, next_):
278
    """Implementation of get_navi_link."""
279
    return last_soup.find('a', class_='navi comic-nav-next navi-next' if next_ else 'navi comic-nav-previous navi-prev')
280
281
282
@classmethod
283
def get_a_comicnavbase_comicnavnext(cls, last_soup, next_):
284
    """Implementation of get_navi_link."""
285
    return last_soup.find('a', class_='comic-nav-base comic-nav-next' if next_ else 'comic-nav-base comic-nav-previous')
286
287
288
@classmethod
289
def get_a_navi_navifirst(cls):
290
    """Implementation of get_first_comic_link."""
291
    return get_soup_at_url(cls.url).find('a', class_='navi navi-first')
292
293
294
@classmethod
295
def get_div_navfirst_a(cls):
296
    """Implementation of get_first_comic_link."""
297
    return get_soup_at_url(cls.url).find('div', class_="nav-first").find('a')
298
299
300
@classmethod
301
def get_a_comicnavbase_comicnavfirst(cls):
302
    """Implementation of get_first_comic_link."""
303
    return get_soup_at_url(cls.url).find('a', class_='comic-nav-base comic-nav-first')
304
305
306
@classmethod
307
def simulate_first_link(cls):
308
    """Implementation of get_first_comic_link creating a link-like object from
309
    an URL provided by the class.
310
311
    Note: The first URL can easily be found using :
312
    `get_first_comic_link = navigate_to_first_comic`.
313
    """
314
    return {'href': cls.first_url}
315
316
317
@classmethod
318
def navigate_to_first_comic(cls):
319
    """Implementation of get_first_comic_link navigating from a user provided
320
    URL to the first comic.
321
322
    Sometimes, the first comic cannot be reached directly so to start
323
    from the first comic one has to go to the previous comic until
324
    there is no previous comics. Once this URL is reached, it
325
    is better to hardcode it but for development purposes, it
326
    is convenient to have an automatic way to find it.
327
328
    Then, the URL found can easily be used via `simulate_first_link`.
329
    """
330
    url = input("Get starting URL: ")
331
    print(url)
332
    comic = cls.get_prev_link(get_soup_at_url(url))
333
    while comic:
334
        url = cls.get_url_from_link(comic)
335
        print(url)
336
        comic = cls.get_prev_link(get_soup_at_url(url))
337
    return {'href': url}
338
339
340
class GenericEmptyComic(GenericComic):
341
    """Generic class for comics where nothing is to be done.
342
343
    It can be useful to deactivate temporarily comics that do not work
344
    properly by replacing `def MyComic(GenericWhateverComic)` with
345
    `def MyComic(GenericEmptyComic, GenericWhateverComic)`."""
346
    _categories = ('EMPTY', )
347
348
    @classmethod
349
    def get_next_comic(cls, last_comic):
350
        """Implementation of get_next_comic returning no comics."""
351
        cls.log("comic is considered as empty - returning no comic")
352
        return []
353
354
355
class ExtraFabulousComics(GenericNavigableComic):
356
    """Class to retrieve Extra Fabulous Comics."""
357
    name = 'efc'
358
    long_name = 'Extra Fabulous Comics'
359
    url = 'http://extrafabulouscomics.com'
360
    get_first_comic_link = get_a_navi_navifirst
361
    get_navi_link = get_link_rel_next
362
363
    @classmethod
364
    def get_comic_info(cls, soup, link):
365
        """Get information about a particular comics."""
366
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
367
        imgs = soup.find_all('img', src=img_src_re)
368
        title = soup.find('meta', property='og:title')['content']
369
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
370
        day = string_to_date(date_str, "%Y-%m-%d")
371
        return {
372
            'title': title,
373
            'img': [i['src'] for i in imgs],
374
            'month': day.month,
375
            'year': day.year,
376
            'day': day.day,
377
            'prefix': title + '-'
378
        }
379
380
381 View Code Duplication
class GenericLeMondeBlog(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
382
    """Generic class to retrieve comics from Le Monde blogs."""
383
    _categories = ('LEMONDE', 'FRANCAIS')
384
    get_navi_link = get_link_rel_next
385
    get_first_comic_link = simulate_first_link
386
    first_url = NotImplemented
387
388
    @classmethod
389
    def get_comic_info(cls, soup, link):
390
        """Get information about a particular comics."""
391
        url2 = soup.find('link', rel='shortlink')['href']
392
        title = soup.find('meta', property='og:title')['content']
393
        date_str = soup.find("span", class_="entry-date").string
394
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
395
        imgs = soup.find_all('meta', property='og:image')
396
        return {
397
            'title': title,
398
            'url2': url2,
399
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
400
            'month': day.month,
401
            'year': day.year,
402
            'day': day.day,
403
        }
404
405
406
class ZepWorld(GenericLeMondeBlog):
407
    """Class to retrieve Zep World comics."""
408
    name = "zep"
409
    long_name = "Zep World"
410
    url = "http://zepworld.blog.lemonde.fr"
411
    first_url = "http://zepworld.blog.lemonde.fr/2014/10/31/bientot-le-blog-de-zep/"
412
413
414
class Vidberg(GenericLeMondeBlog):
415
    """Class to retrieve Vidberg comics."""
416
    name = 'vidberg'
417
    long_name = "Vidberg - l'actu en patates"
418
    url = "http://vidberg.blog.lemonde.fr"
419
    # Not the first but I didn't find an efficient way to retrieve it
420
    first_url = "http://vidberg.blog.lemonde.fr/2012/02/09/revue-de-campagne-la-campagne-du-modem-semballe/"
421
422
423
class Plantu(GenericLeMondeBlog):
424
    """Class to retrieve Plantu comics."""
425
    name = 'plantu'
426
    long_name = "Plantu"
427
    url = "http://plantu.blog.lemonde.fr"
428
    first_url = "http://plantu.blog.lemonde.fr/2014/10/28/stress-test-a-bruxelles/"
429
430
431
class XavierGorce(GenericLeMondeBlog):
432
    """Class to retrieve Xavier Gorce comics."""
433
    name = 'gorce'
434
    long_name = "Xavier Gorce"
435
    url = "http://xaviergorce.blog.lemonde.fr"
436
    first_url = "http://xaviergorce.blog.lemonde.fr/2015/01/09/distinction/"
437
438
439
class CartooningForPeace(GenericLeMondeBlog):
440
    """Class to retrieve Cartooning For Peace comics."""
441
    name = 'forpeace'
442
    long_name = "Cartooning For Peace"
443
    url = "http://cartooningforpeace.blog.lemonde.fr"
444
    first_url = "http://cartooningforpeace.blog.lemonde.fr/2014/12/15/bado/"
445
446
447
class Aurel(GenericLeMondeBlog):
448
    """Class to retrieve Aurel comics."""
449
    name = 'aurel'
450
    long_name = "Aurel"
451
    url = "http://aurel.blog.lemonde.fr"
452
    first_url = "http://aurel.blog.lemonde.fr/2014/09/29/le-senat-repasse-a-droite/"
453
454
455
class LesCulottees(GenericLeMondeBlog):
456
    """Class to retrieve Les Culottees comics."""
457
    name = 'culottees'
458
    long_name = 'Les Culottees'
459
    url = "http://lesculottees.blog.lemonde.fr"
460
    first_url = "http://lesculottees.blog.lemonde.fr/2016/01/11/clementine-delait-femme-a-barbe/"
461
462
463
class UneAnneeAuLycee(GenericLeMondeBlog):
464
    """Class to retrieve Une Annee Au Lycee comics."""
465
    name = 'lycee'
466
    long_name = 'Une Annee au Lycee'
467
    url = 'http://uneanneeaulycee.blog.lemonde.fr'
468
    first_url = "http://uneanneeaulycee.blog.lemonde.fr/2016/06/13/la-semaine-du-bac-est-arrivee/"
469
470
471 View Code Duplication
class Rall(GenericNavigableComic):
472
    """Class to retrieve Ted Rall comics."""
473
    # Also on http://www.gocomics.com/tedrall
474
    name = 'rall'
475
    long_name = "Ted Rall"
476
    url = "http://rall.com/comic"
477
    _categories = ('RALL', )
478
    get_navi_link = get_link_rel_next
479
    get_first_comic_link = simulate_first_link
480
    # Not the first but I didn't find an efficient way to retrieve it
481
    first_url = "http://rall.com/2014/01/30/los-angeles-times-cartoon-well-miss-those-california-flowers"
482
483
    @classmethod
484
    def get_comic_info(cls, soup, link):
485
        """Get information about a particular comics."""
486
        title = soup.find('meta', property='og:title')['content']
487
        author = soup.find("span", class_="author vcard").find("a").string
488
        date_str = soup.find("span", class_="entry-date").string
489
        day = string_to_date(date_str, "%B %d, %Y")
490
        desc = soup.find('meta', property='og:description')['content']
491
        imgs = soup.find('div', class_='entry-content').find_all('img')
492
        imgs = imgs[:-7]  # remove social media buttons
493
        return {
494
            'title': title,
495
            'author': author,
496
            'month': day.month,
497
            'year': day.year,
498
            'day': day.day,
499
            'description': desc,
500
            'img': [i['src'] for i in imgs],
501
        }
502
503
504
class Dilem(GenericNavigableComic):
505
    """Class to retrieve Ali Dilem comics."""
506
    name = 'dilem'
507
    long_name = 'Ali Dilem'
508
    url = 'http://information.tv5monde.com/dilem'
509
    _categories = ('FRANCAIS', )
510
    get_url_from_link = join_cls_url_to_href
511
    get_first_comic_link = simulate_first_link
512
    first_url = "http://information.tv5monde.com/dilem/2004-06-26"
513
514
    @classmethod
515
    def get_navi_link(cls, last_soup, next_):
516
        """Get link to next or previous comic."""
517
        # prev is next / next is prev
518
        li = last_soup.find('li', class_='prev' if next_ else 'next')
519
        return li.find('a') if li else None
520
521 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
522
    def get_comic_info(cls, soup, link):
523
        """Get information about a particular comics."""
524
        short_url = soup.find('link', rel='shortlink')['href']
525
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
526
        imgs = soup.find_all('meta', property='og:image')
527
        date_str = soup.find('span', property='dc:date')['content']
528
        date_str = date_str[:10]
529
        day = string_to_date(date_str, "%Y-%m-%d")
530
        return {
531
            'short_url': short_url,
532
            'title': title,
533
            'img': [i['content'] for i in imgs],
534
            'day': day.day,
535
            'month': day.month,
536
            'year': day.year,
537
        }
538
539
540
class SpaceAvalanche(GenericNavigableComic):
541
    """Class to retrieve Space Avalanche comics."""
542
    name = 'avalanche'
543
    long_name = 'Space Avalanche'
544
    url = 'http://www.spaceavalanche.com'
545
    get_navi_link = get_link_rel_next
546
547
    @classmethod
548
    def get_first_comic_link(cls):
549
        """Get link to first comics."""
550
        return {'href': "http://www.spaceavalanche.com/2009/02/02/irish-sea/", 'title': "Irish Sea"}
551
552
    @classmethod
553
    def get_comic_info(cls, soup, link):
554
        """Get information about a particular comics."""
555
        url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
556
        title = link['title']
557
        url = cls.get_url_from_link(link)
558
        year, month, day = [int(s)
559
                            for s in url_date_re.match(url).groups()]
560
        imgs = soup.find("div", class_="entry").find_all("img")
561
        return {
562
            'title': title,
563
            'day': day,
564
            'month': month,
565
            'year': year,
566
            'img': [i['src'] for i in imgs],
567
        }
568
569
570
class ZenPencils(GenericNavigableComic):
571
    """Class to retrieve ZenPencils comics."""
572
    # Also on http://zenpencils.tumblr.com
573
    # Also on http://www.gocomics.com/zen-pencils
574
    name = 'zenpencils'
575
    long_name = 'Zen Pencils'
576
    url = 'http://zenpencils.com'
577
    _categories = ('ZENPENCILS', )
578
    get_navi_link = get_link_rel_next
579
    get_first_comic_link = simulate_first_link
580
    first_url = "http://zenpencils.com/comic/1-ralph-waldo-emerson-make-them-cry/"
581
582
    @classmethod
583
    def get_comic_info(cls, soup, link):
584
        """Get information about a particular comics."""
585
        imgs = soup.find('div', id='comic').find_all('img')
586
        # imgs2 = soup.find_all('meta', property='og:image')
587
        post = soup.find('div', class_='post-content')
588
        author = post.find("span", class_="post-author").find("a").string
589
        title = soup.find('meta', property='og:title')['content']
590
        date_str = post.find('span', class_='post-date').string
591
        day = string_to_date(date_str, "%B %d, %Y")
592
        assert imgs
593
        assert all(i['alt'] == i['title'] for i in imgs)
594
        assert all(i['alt'] in (title, "") for i in imgs)
595
        desc = soup.find('meta', property='og:description')['content']
596
        return {
597
            'title': title,
598
            'description': desc,
599
            'author': author,
600
            'day': day.day,
601
            'month': day.month,
602
            'year': day.year,
603
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
604
        }
605
606
607
class ItsTheTie(GenericNavigableComic):
608
    """Class to retrieve It's the tie comics."""
609
    # Also on http://itsthetie.tumblr.com
610
    # Also on https://tapastic.com/series/itsthetie
611
    name = 'tie'
612
    long_name = "It's the tie"
613
    url = "http://itsthetie.com"
614
    _categories = ('TIE', )
615
    get_first_comic_link = get_div_navfirst_a
616
    get_navi_link = get_a_rel_next
617
618
    @classmethod
619
    def get_comic_info(cls, soup, link):
620
        """Get information about a particular comics."""
621
        title = soup.find('h1', class_='comic-title').find('a').string
622
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
623
        day = string_to_date(date_str, "%B %d, %Y")
624
        # Bonus images may or may not be in meta og:image.
625
        imgs = soup.find_all('meta', property='og:image')
626
        imgs_src = [i['content'] for i in imgs]
627
        bonus = soup.find_all('img', attrs={'data-oversrc': True})
628
        bonus_src = [b['data-oversrc'] for b in bonus]
629
        all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src]
630
        all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")]
631
        tag_meta = soup.find('meta', property='article:tag')
632
        tags = tag_meta['content'] if tag_meta else ""
633
        return {
634
            'title': title,
635
            'month': day.month,
636
            'year': day.year,
637
            'day': day.day,
638
            'img': all_imgs_src,
639
            'tags': tags,
640
        }
641
642
643
class PenelopeBagieu(GenericNavigableComic):
644
    """Class to retrieve comics from Penelope Bagieu's blog."""
645 View Code Duplication
    name = 'bagieu'
646
    long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
647
    url = 'http://www.penelope-jolicoeur.com'
648
    _categories = ('FRANCAIS', )
649
    get_navi_link = get_link_rel_next
650
    get_first_comic_link = simulate_first_link
651
    first_url = 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'
652
653
    @classmethod
654
    def get_comic_info(cls, soup, link):
655
        """Get information about a particular comics."""
656
        date_str = soup.find('h2', class_='date-header').string
657
        day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
658
        imgs = soup.find('div', class_='entry-body').find_all('img')
659
        title = soup.find('h3', class_='entry-header').string
660
        return {
661
            'title': title,
662
            'img': [i['src'] for i in imgs],
663
            'month': day.month,
664
            'year': day.year,
665
            'day': day.day,
666
        }
667
668
669 View Code Duplication
class OneOneOneOneComic(GenericEmptyComic, GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
670
    """Class to retrieve 1111 Comics."""
671
    # Also on http://comics1111.tumblr.com
672
    # Also on https://tapastic.com/series/1111-Comics
673
    name = '1111'
674
    long_name = '1111 Comics'
675
    url = 'http://www.1111comics.me'
676
    _categories = ('ONEONEONEONE', )
677
    get_first_comic_link = get_div_navfirst_a
678
    get_navi_link = get_link_rel_next
679
680
    @classmethod
681
    def get_comic_info(cls, soup, link):
682
        """Get information about a particular comics."""
683
        title = soup.find('h1', class_='comic-title').find('a').string
684
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
685
        day = string_to_date(date_str, "%B %d, %Y")
686
        imgs = soup.find_all('meta', property='og:image')
687
        return {
688
            'title': title,
689
            'month': day.month,
690
            'year': day.year,
691
            'day': day.day,
692
            'img': [i['content'] for i in imgs],
693
        }
694
695
696 View Code Duplication
class AngryAtNothing(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
697
    """Class to retrieve Angry at Nothing comics."""
698
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
699
    name = 'angry'
700
    long_name = 'Angry At Nothing'
701
    url = 'http://www.angryatnothing.net'
702
    get_first_comic_link = get_div_navfirst_a
703
    get_navi_link = get_a_rel_next
704
705
    @classmethod
706
    def get_comic_info(cls, soup, link):
707
        """Get information about a particular comics."""
708
        title = soup.find('h1', class_='comic-title').find('a').string
709
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
710
        day = string_to_date(date_str, "%B %d, %Y")
711
        imgs = soup.find_all('meta', property='og:image')
712
        return {
713
            'title': title,
714
            'month': day.month,
715
            'year': day.year,
716
            'day': day.day,
717
            'img': [i['content'] for i in imgs],
718
        }
719
720
721
class NeDroid(GenericNavigableComic):
722
    """Class to retrieve NeDroid comics."""
723
    name = 'nedroid'
724
    long_name = 'NeDroid'
725
    url = 'http://nedroid.com'
726
    get_first_comic_link = get_div_navfirst_a
727
    get_navi_link = get_link_rel_next
728
    get_url_from_link = join_cls_url_to_href
729
730 View Code Duplication
    @classmethod
731
    def get_comic_info(cls, soup, link):
732
        """Get information about a particular comics."""
733
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
734
        short_url = cls.get_url_from_link(soup.find('link', rel='shortlink'))
735
        num = int(short_url_re.match(short_url).groups()[0])
736
        imgs = soup.find('div', id='comic').find_all('img')
737
        assert len(imgs) == 1
738
        title = imgs[0]['alt']
739
        title2 = imgs[0]['title']
740
        return {
741
            'short_url': short_url,
742
            'title': title,
743
            'title2': title2,
744
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
745
            'num': num,
746
        }
747
748
749
class Garfield(GenericNavigableComic):
750
    """Class to retrieve Garfield comics."""
751
    # Also on http://www.gocomics.com/garfield
752
    name = 'garfield'
753
    long_name = 'Garfield'
754
    url = 'https://garfield.com'
755
    _categories = ('GARFIELD', )
756
    get_first_comic_link = simulate_first_link
757
    first_url = 'https://garfield.com/comic/1978/06/19'
758 View Code Duplication
759
    @classmethod
760
    def get_navi_link(cls, last_soup, next_):
761
        """Get link to next or previous comic."""
762
        return last_soup.find('a', class_='comic-arrow-right' if next_ else 'comic-arrow-left')
763
764
    @classmethod
765
    def get_comic_info(cls, soup, link):
766
        """Get information about a particular comics."""
767
        url = cls.get_url_from_link(link)
768
        date_re = re.compile('^%s/comic/([0-9]*)/([0-9]*)/([0-9]*)' % cls.url)
769
        year, month, day = [int(s) for s in date_re.match(url).groups()]
770
        imgs = soup.find('div', class_='comic-display').find_all('img', class_='img-responsive')
771
        return {
772
            'month': month,
773
            'year': year,
774
            'day': day,
775
            'img': [i['src'] for i in imgs],
776
        }
777
778
779
class Dilbert(GenericNavigableComic):
780
    """Class to retrieve Dilbert comics."""
781
    # Also on http://www.gocomics.com/dilbert-classics
782
    name = 'dilbert'
783
    long_name = 'Dilbert'
784
    url = 'http://dilbert.com'
785
    get_url_from_link = join_cls_url_to_href
786
    get_first_comic_link = simulate_first_link
787
    first_url = 'http://dilbert.com/strip/1989-04-16'
788
789
    @classmethod
790
    def get_navi_link(cls, last_soup, next_):
791
        """Get link to next or previous comic."""
792
        link = last_soup.find('div', class_='nav-comic nav-right' if next_ else 'nav-comic nav-left')
793
        return link.find('a') if link else None
794
795
    @classmethod
796
    def get_comic_info(cls, soup, link):
797
        """Get information about a particular comics."""
798
        title = soup.find('meta', property='og:title')['content']
799
        imgs = soup.find_all('meta', property='og:image')
800
        desc = soup.find('meta', property='og:description')['content']
801
        date_str = soup.find('meta', property='article:publish_date')['content']
802
        day = string_to_date(date_str, "%B %d, %Y")
803
        author = soup.find('meta', property='article:author')['content']
804
        tags = soup.find('meta', property='article:tag')['content']
805
        return {
806
            'title': title,
807
            'description': desc,
808
            'img': [i['content'] for i in imgs],
809
            'author': author,
810
            'tags': tags,
811
            'day': day.day,
812
            'month': day.month,
813
            'year': day.year
814
        }
815
816
817
class VictimsOfCircumsolar(GenericNavigableComic):
818
    """Class to retrieve VictimsOfCircumsolar comics."""
819
    # Also on http://victimsofcomics.tumblr.com
820
    name = 'circumsolar'
821
    long_name = 'Victims Of Circumsolar'
822
    url = 'http://www.victimsofcircumsolar.com'
823
    get_navi_link = get_a_navi_comicnavnext_navinext
824
    get_first_comic_link = simulate_first_link
825
    first_url = 'http://www.victimsofcircumsolar.com/comic/modern-addiction'
826
827
    @classmethod
828
    def get_comic_info(cls, soup, link):
829
        """Get information about a particular comics."""
830
        # Date is on the archive page
831
        title = soup.find_all('meta', property='og:title')[-1]['content']
832
        desc = soup.find_all('meta', property='og:description')[-1]['content']
833
        imgs = soup.find('div', id='comic').find_all('img')
834
        assert all(i['title'] == i['alt'] == title for i in imgs)
835
        return {
836
            'title': title,
837
            'description': desc,
838
            'img': [i['src'] for i in imgs],
839
        }
840
841
842
class ThreeWordPhrase(GenericNavigableComic):
843
    """Class to retrieve Three Word Phrase comics."""
844
    # Also on http://www.threewordphrase.tumblr.com
845
    name = 'threeword'
846
    long_name = 'Three Word Phrase'
847
    url = 'http://threewordphrase.com'
848
    get_url_from_link = join_cls_url_to_href
849
850
    @classmethod
851
    def get_first_comic_link(cls):
852
        """Get link to first comics."""
853
        return get_soup_at_url(cls.url).find('img', src='/firstlink.gif').parent
854
855
    @classmethod
856
    def get_navi_link(cls, last_soup, next_):
857
        """Get link to next or previous comic."""
858
        link = last_soup.find('img', src='/nextlink.gif' if next_ else '/prevlink.gif').parent
859
        return None if link.get('href') is None else link
860
861
    @classmethod
862
    def get_comic_info(cls, soup, link):
863
        """Get information about a particular comics."""
864
        title = soup.find('title')
865
        imgs = [img for img in soup.find_all('img')
866
                if not img['src'].endswith(
867
                    ('link.gif', '32.png', 'twpbookad.jpg',
868
                     'merchad.jpg', 'header.gif', 'tipjar.jpg'))]
869
        return {
870
            'title': title.string if title else None,
871
            'title2': '  '.join(img.get('alt') for img in imgs if img.get('alt')),
872
            'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs],
873
        }
874
875
876
class DeadlyPanel(GenericEmptyComic, GenericNavigableComic):
877
    """Class to retrieve Deadly Panel comics."""
878
    # Also on https://tapastic.com/series/deadlypanel
879
    name = 'deadly'
880
    long_name = 'Deadly Panel'
881
    url = 'http://www.deadlypanel.com'
882
    get_first_comic_link = get_a_navi_navifirst
883
    get_navi_link = get_a_navi_comicnavnext_navinext
884
885
    @classmethod
886
    def get_comic_info(cls, soup, link):
887
        """Get information about a particular comics."""
888
        imgs = soup.find('div', id='comic').find_all('img')
889
        assert all(i['alt'] == i['title'] for i in imgs)
890
        return {
891
            'img': [i['src'] for i in imgs],
892
        }
893
894
895 View Code Duplication
class TheGentlemanArmchair(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
896
    """Class to retrieve The Gentleman Armchair comics."""
897
    name = 'gentlemanarmchair'
898
    long_name = 'The Gentleman Armchair'
899
    url = 'http://thegentlemansarmchair.com'
900
    get_first_comic_link = get_a_navi_navifirst
901
    get_navi_link = get_link_rel_next
902
903
    @classmethod
904
    def get_comic_info(cls, soup, link):
905
        """Get information about a particular comics."""
906
        title = soup.find('h2', class_='post-title').string
907
        author = soup.find("span", class_="post-author").find("a").string
908
        date_str = soup.find('span', class_='post-date').string
909
        day = string_to_date(date_str, "%B %d, %Y")
910
        imgs = soup.find('div', id='comic').find_all('img')
911
        return {
912
            'img': [i['src'] for i in imgs],
913
            'title': title,
914
            'author': author,
915
            'month': day.month,
916
            'year': day.year,
917
            'day': day.day,
918
        }
919
920
921 View Code Duplication
class MyExtraLife(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
922
    """Class to retrieve My Extra Life comics."""
923
    name = 'extralife'
924
    long_name = 'My Extra Life'
925
    url = 'http://www.myextralife.com'
926
    get_navi_link = get_link_rel_next
927
928
    @classmethod
929
    def get_first_comic_link(cls):
930
        """Get link to first comics."""
931
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
932
933
    @classmethod
934
    def get_comic_info(cls, soup, link):
935
        """Get information about a particular comics."""
936
        title = soup.find("h1", class_="comic_title").string
937
        date_str = soup.find("span", class_="comic_date").string
938
        day = string_to_date(date_str, "%B %d, %Y")
939
        imgs = soup.find_all("img", class_="comic")
940
        assert all(i['alt'] == i['title'] == title for i in imgs)
941
        return {
942
            'title': title,
943
            'img': [i['src'] for i in imgs if i["src"]],
944
            'day': day.day,
945
            'month': day.month,
946
            'year': day.year
947
        }
948
949
950
class SaturdayMorningBreakfastCereal(GenericNavigableComic):
951
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
952
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
953
    # Also on http://smbc-comics.tumblr.com
954
    name = 'smbc'
955
    long_name = 'Saturday Morning Breakfast Cereal'
956
    url = 'http://www.smbc-comics.com'
957
    _categories = ('SMBC', )
958
    get_navi_link = get_a_rel_next
959
960
    @classmethod
961
    def get_first_comic_link(cls):
962
        """Get link to first comics."""
963
        return get_soup_at_url(cls.url).find('a', rel='start')
964
965
    @classmethod
966
    def get_comic_info(cls, soup, link):
967
        """Get information about a particular comics."""
968
        image1 = soup.find('img', id='cc-comic')
969
        image_url1 = image1['src']
970
        aftercomic = soup.find('div', id='aftercomic')
971
        image_url2 = aftercomic.find('img')['src'] if aftercomic else ''
972
        imgs = [image_url1] + ([image_url2] if image_url2 else [])
973
        date_str = soup.find('div', class_='cc-publishtime').contents[0]
974
        day = string_to_date(date_str, "%B %d, %Y")
975
        return {
976
            'title': image1['title'],
977
            'img': [urljoin_wrapper(cls.url, i) for i in imgs],
978
            'day': day.day,
979
            'month': day.month,
980
            'year': day.year
981
        }
982
983
984
class PerryBibleFellowship(GenericListableComic):
985
    """Class to retrieve Perry Bible Fellowship comics."""
986
    name = 'pbf'
987
    long_name = 'Perry Bible Fellowship'
988
    url = 'http://pbfcomics.com'
989
    get_url_from_archive_element = join_cls_url_to_href
990
991
    @classmethod
992
    def get_archive_elements(cls):
993
        comic_link_re = re.compile('^/[0-9]*/$')
994
        return reversed(get_soup_at_url(cls.url).find_all('a', href=comic_link_re))
995
996
    @classmethod
997
    def get_comic_info(cls, soup, link):
998
        """Get information about a particular comics."""
999
        url = cls.get_url_from_archive_element(link)
1000
        comic_img_re = re.compile('^/archive_b/PBF.*')
1001
        name = link.string
1002
        num = int(link['name'])
1003
        href = link['href']
1004
        assert href == '/%d/' % num
1005
        imgs = soup.find_all('img', src=comic_img_re)
1006
        assert len(imgs) == 1
1007
        assert imgs[0]['alt'] == name
1008
        return {
1009
            'num': num,
1010
            'name': name,
1011
            'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1012
            'prefix': '%d-' % num,
1013
        }
1014
1015
1016
class Mercworks(GenericNavigableComic):
1017
    """Class to retrieve Mercworks comics."""
1018
    # Also on http://mercworks.tumblr.com
1019
    name = 'mercworks'
1020
    long_name = 'Mercworks'
1021
    url = 'http://mercworks.net'
1022
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1023
    get_navi_link = get_link_rel_next
1024
1025
    @classmethod
1026
    def get_comic_info(cls, soup, link):
1027
        """Get information about a particular comics."""
1028
        title = soup.find('meta', property='og:title')['content']
1029
        metadesc = soup.find('meta', property='og:description')
1030
        desc = metadesc['content'] if metadesc else ""
1031
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
1032
        day = string_to_date(date_str, "%Y-%m-%d")
1033
        imgs = soup.find_all('meta', property='og:image')
1034
        return {
1035
            'img': [i['content'] for i in imgs],
1036
            'title': title,
1037
            'desc': desc,
1038
            'day': day.day,
1039
            'month': day.month,
1040
            'year': day.year
1041
        }
1042
1043
1044
class BerkeleyMews(GenericListableComic):
1045
    """Class to retrieve Berkeley Mews comics."""
1046
    # Also on http://mews.tumblr.com
1047
    # Also on http://www.gocomics.com/berkeley-mews
1048
    name = 'berkeley'
1049
    long_name = 'Berkeley Mews'
1050
    url = 'http://www.berkeleymews.com'
1051
    _categories = ('BERKELEY', )
1052
    get_url_from_archive_element = get_href
1053
    comic_num_re = re.compile('%s/\\?p=([0-9]*)$' % url)
1054
1055
    @classmethod
1056
    def get_archive_elements(cls):
1057
        archive_url = urljoin_wrapper(cls.url, "?page_id=2")
1058
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_num_re))
1059
1060
    @classmethod
1061
    def get_comic_info(cls, soup, link):
1062
        """Get information about a particular comics."""
1063
        comic_date_re = re.compile('.*/([0-9]*)-([0-9]*)-([0-9]*)-.*')
1064
        url = cls.get_url_from_archive_element(link)
1065
        num = int(cls.comic_num_re.match(url).groups()[0])
1066
        img = soup.find('div', id='comic').find('img')
1067
        assert all(i['alt'] == i['title'] for i in [img])
1068
        title2 = img['title']
1069
        img_url = img['src']
1070
        year, month, day = [int(s) for s in comic_date_re.match(img_url).groups()]
1071
        return {
1072
            'num': num,
1073
            'title': link.string,
1074
            'title2': title2,
1075
            'img': [img_url],
1076
            'year': year,
1077
            'month': month,
1078
            'day': day,
1079
        }
1080
1081
1082
class GenericBouletCorp(GenericNavigableComic):
1083
    """Generic class to retrieve BouletCorp comics in different languages."""
1084
    # Also on http://bouletcorp.tumblr.com
1085
    _categories = ('BOULET', )
1086
    get_navi_link = get_link_rel_next
1087
1088
    @classmethod
1089
    def get_first_comic_link(cls):
1090
        """Get link to first comics."""
1091
        return get_soup_at_url(cls.url).find('div', id='centered_nav').find_all('a')[0]
1092
1093
    @classmethod
1094
    def get_comic_info(cls, soup, link):
1095
        """Get information about a particular comics."""
1096
        url = cls.get_url_from_link(link)
1097
        date_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/' % cls.url)
1098
        year, month, day = [int(s) for s in date_re.match(url).groups()]
1099
        imgs = soup.find('div', id='notes').find('div', class_='storycontent').find_all('img')
1100
        texts = '  '.join(t for t in (i.get('title') for i in imgs) if t)
1101
        title = soup.find('title').string
1102
        return {
1103
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs if i.get('src') is not None],
1104
            'title': title,
1105
            'texts': texts,
1106
            'year': year,
1107
            'month': month,
1108
            'day': day,
1109
        }
1110
1111
1112
class BouletCorp(GenericBouletCorp):
1113
    """Class to retrieve BouletCorp comics."""
1114
    name = 'boulet'
1115
    long_name = 'Boulet Corp'
1116
    url = 'http://www.bouletcorp.com'
1117
    _categories = ('FRANCAIS', )
1118
1119
1120
class BouletCorpEn(GenericBouletCorp):
1121
    """Class to retrieve EnglishBouletCorp comics."""
1122
    name = 'boulet_en'
1123
    long_name = 'Boulet Corp English'
1124
    url = 'http://english.bouletcorp.com'
1125
1126
1127 View Code Duplication
class AmazingSuperPowers(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1128
    """Class to retrieve Amazing Super Powers comics."""
1129
    name = 'asp'
1130
    long_name = 'Amazing Super Powers'
1131
    url = 'http://www.amazingsuperpowers.com'
1132
    get_first_comic_link = get_a_navi_navifirst
1133
    get_navi_link = get_a_navi_navinext
1134
1135
    @classmethod
1136
    def get_comic_info(cls, soup, link):
1137
        """Get information about a particular comics."""
1138
        author = soup.find("span", class_="post-author").find("a").string
1139
        date_str = soup.find('span', class_='post-date').string
1140
        day = string_to_date(date_str, "%B %d, %Y")
1141
        imgs = soup.find('div', id='comic').find_all('img')
1142
        title = ' '.join(i['title'] for i in imgs)
1143
        assert all(i['alt'] == i['title'] for i in imgs)
1144
        return {
1145
            'title': title,
1146
            'author': author,
1147
            'img': [img['src'] for img in imgs],
1148
            'day': day.day,
1149
            'month': day.month,
1150
            'year': day.year
1151
        }
1152
1153
1154
class ToonHole(GenericNavigableComic):
1155
    """Class to retrieve Toon Holes comics."""
1156
    # Also on http://tapastic.com/series/TOONHOLE
1157
    name = 'toonhole'
1158
    long_name = 'Toon Hole'
1159
    url = 'http://www.toonhole.com'
1160
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1161
    get_navi_link = get_link_rel_next
1162
1163
    @classmethod
1164
    def get_comic_info(cls, soup, link):
1165
        """Get information about a particular comics."""
1166
        short_url = soup.find('link', rel='shortlink')['href']
1167
        date_str = soup.find('div', class_='entry-meta').contents[0].strip()
1168
        day = string_to_date(date_str, "%B %d, %Y")
1169
        imgs = soup.find('div', id='comic').find_all('img')
1170
        if imgs:
1171
            img = imgs[0]
1172
            title = img['alt']
1173
            assert img['title'] == title
1174
        else:
1175
            title = ""
1176
        return {
1177
            'short_url': short_url,
1178
            'title': title,
1179
            'month': day.month,
1180
            'year': day.year,
1181
            'day': day.day,
1182
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs],
1183
        }
1184
1185
1186
class Channelate(GenericNavigableComic):
1187
    """Class to retrieve Channelate comics."""
1188
    name = 'channelate'
1189
    long_name = 'Channelate'
1190
    url = 'http://www.channelate.com'
1191
    get_first_comic_link = get_div_navfirst_a
1192
    get_navi_link = get_link_rel_next
1193
    get_url_from_link = join_cls_url_to_href
1194
1195
    @classmethod
1196
    def get_comic_info(cls, soup, link):
1197
        """Get information about a particular comics."""
1198
        author = soup.find("span", class_="post-author").find("a").string
1199
        date_str = soup.find('span', class_='post-date').string
1200
        day = string_to_date(date_str, '%Y/%m/%d')
1201
        title = soup.find('meta', property='og:title')['content']
1202
        post = soup.find('div', id='comic')
1203
        imgs = post.find_all('img') if post else []
1204
        extra_url = None
1205
        extra_div = soup.find('div', id='extrapanelbutton')
1206
        if extra_div:
1207
            extra_url = extra_div.find('a')['href']
1208
            extra_soup = get_soup_at_url(extra_url)
1209
            extra_imgs = extra_soup.find_all('img', class_='extrapanelimage')
1210
            imgs.extend(extra_imgs)
1211
        return {
1212
            'url_extra': extra_url,
1213
            'title': title,
1214
            'author': author,
1215
            'month': day.month,
1216
            'year': day.year,
1217
            'day': day.day,
1218
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1219
        }
1220
1221
1222
class CyanideAndHappiness(GenericNavigableComic):
1223
    """Class to retrieve Cyanide And Happiness comics."""
1224
    name = 'cyanide'
1225
    long_name = 'Cyanide and Happiness'
1226
    url = 'http://explosm.net'
1227
    _categories = ('NSFW', )
1228
    get_url_from_link = join_cls_url_to_href
1229
1230
    @classmethod
1231
    def get_first_comic_link(cls):
1232
        """Get link to first comics."""
1233
        return get_soup_at_url(cls.url).find('a', title='Oldest comic')
1234
1235
    @classmethod
1236
    def get_navi_link(cls, last_soup, next_):
1237
        """Get link to next or previous comic."""
1238
        link = last_soup.find('a', class_='next-comic' if next_ else 'previous-comic ')
1239
        return None if link.get('href') is None else link
1240
1241
    @classmethod
1242
    def get_comic_info(cls, soup, link):
1243
        """Get information about a particular comics."""
1244
        url2 = soup.find('meta', property='og:url')['content']
1245
        num = int(url2.split('/')[-2])
1246
        date_str = soup.find('h3').find('a').string
1247
        day = string_to_date(date_str, '%Y.%m.%d')
1248
        author = soup.find('small', class_="author-credit-name").string
1249
        assert author.startswith('by ')
1250
        author = author[3:]
1251
        imgs = soup.find_all('img', id='main-comic')
1252
        return {
1253
            'num': num,
1254
            'author': author,
1255
            'month': day.month,
1256
            'year': day.year,
1257
            'day': day.day,
1258
            'prefix': '%d-' % num,
1259
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i['src'])) for i in imgs]
1260
        }
1261
1262
1263
class MrLovenstein(GenericComic):
1264
    """Class to retrieve Mr Lovenstein comics."""
1265
    # Also on https://tapastic.com/series/MrLovenstein
1266
    name = 'mrlovenstein'
1267
    long_name = 'Mr. Lovenstein'
1268
    url = 'http://www.mrlovenstein.com'
1269
1270
    @classmethod
1271
    def get_next_comic(cls, last_comic):
1272
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1273
        # TODO: more info from http://www.mrlovenstein.com/archive
1274
        comic_num_re = re.compile('^/comic/([0-9]*)$')
1275
        nums = [int(comic_num_re.match(link['href']).groups()[0])
1276
                for link in get_soup_at_url(cls.url).find_all('a', href=comic_num_re)]
1277
        first, last = min(nums), max(nums)
1278
        if last_comic:
1279
            first = last_comic['num'] + 1
1280
        for num in range(first, last + 1):
1281
            url = urljoin_wrapper(cls.url, '/comic/%d' % num)
1282
            soup = get_soup_at_url(url)
1283
            imgs = list(
1284
                reversed(soup.find_all('img', src=re.compile('^/images/comics/'))))
1285
            description = soup.find('meta', attrs={'name': 'description'})['content']
1286
            yield {
1287
                'url': url,
1288
                'num': num,
1289
                'texts': '  '.join(t for t in (i.get('title') for i in imgs) if t),
1290
                'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1291
                'description': description,
1292
            }
1293
1294
1295
class DinosaurComics(GenericListableComic):
1296
    """Class to retrieve Dinosaur Comics comics."""
1297
    name = 'dinosaur'
1298
    long_name = 'Dinosaur Comics'
1299
    url = 'http://www.qwantz.com'
1300
    get_url_from_archive_element = get_href
1301
    comic_link_re = re.compile('^%s/index.php\\?comic=([0-9]*)$' % url)
1302
1303
    @classmethod
1304
    def get_archive_elements(cls):
1305
        archive_url = urljoin_wrapper(cls.url, 'archive.php')
1306
        # first link is random -> skip it
1307
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re)[1:])
1308
1309
    @classmethod
1310
    def get_comic_info(cls, soup, link):
1311
        """Get information about a particular comics."""
1312
        url = cls.get_url_from_archive_element(link)
1313
        num = int(cls.comic_link_re.match(url).groups()[0])
1314
        date_str = link.string
1315
        text = link.next_sibling.string
1316
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1317
        comic_img_re = re.compile('^%s/comics/' % cls.url)
1318
        img = soup.find('img', src=comic_img_re)
1319
        return {
1320
            'month': day.month,
1321
            'year': day.year,
1322
            'day': day.day,
1323
            'img': [img.get('src')],
1324
            'title': img.get('title'),
1325
            'text': text,
1326
            'num': num,
1327 View Code Duplication
        }
1328
1329
1330
class ButterSafe(GenericListableComic):
1331
    """Class to retrieve Butter Safe comics."""
1332
    name = 'butter'
1333
    long_name = 'ButterSafe'
1334
    url = 'http://buttersafe.com'
1335
    get_url_from_archive_element = get_href
1336
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)
1337
1338
    @classmethod
1339
    def get_archive_elements(cls):
1340
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1341
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
1342
1343
    @classmethod
1344
    def get_comic_info(cls, soup, link):
1345
        """Get information about a particular comics."""
1346
        url = cls.get_url_from_archive_element(link)
1347
        title = link.string
1348
        year, month, day = [int(s) for s in cls.comic_link_re.match(url).groups()]
1349
        img = soup.find('div', id='comic').find('img')
1350
        assert img['alt'] == title
1351
        return {
1352
            'title': title,
1353
            'day': day,
1354
            'month': month,
1355
            'year': year,
1356
            'img': [img['src']],
1357
        }
1358
1359
1360
class CalvinAndHobbes(GenericComic):
1361
    """Class to retrieve Calvin and Hobbes comics."""
1362
    # Also on http://www.gocomics.com/calvinandhobbes/
1363
    name = 'calvin'
1364
    long_name = 'Calvin and Hobbes'
1365
    # This is not through any official webpage but eh...
1366
    url = 'http://marcel-oehler.marcellosendos.ch/comics/ch/'
1367
1368
    @classmethod
1369
    def get_next_comic(cls, last_comic):
1370
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1371
        last_date = get_date_for_comic(
1372
            last_comic) if last_comic else date(1985, 11, 1)
1373
        link_re = re.compile('^([0-9]*)/([0-9]*)/')
1374
        img_re = re.compile('')
1375
        for link in get_soup_at_url(cls.url).find_all('a', href=link_re):
1376
            url = link['href']
1377
            year, month = link_re.match(url).groups()
1378
            if date(int(year), int(month), 1) + timedelta(days=31) >= last_date:
1379
                img_re = re.compile('^%s%s([0-9]*)' % (year, month))
1380
                month_url = urljoin_wrapper(cls.url, url)
1381
                for img in get_soup_at_url(month_url).find_all('img', src=img_re):
1382
                    img_src = img['src']
1383
                    day = int(img_re.match(img_src).groups()[0])
1384
                    comic_date = date(int(year), int(month), day)
1385
                    if comic_date > last_date:
1386
                        yield {
1387
                            'url': month_url,
1388
                            'year': int(year),
1389
                            'month': int(month),
1390
                            'day': int(day),
1391
                            'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)],
1392
                        }
1393 View Code Duplication
                        last_date = comic_date
1394
1395
1396
class AbstruseGoose(GenericListableComic):
1397
    """Class to retrieve AbstruseGoose Comics."""
1398
    name = 'abstruse'
1399
    long_name = 'Abstruse Goose'
1400
    url = 'http://abstrusegoose.com'
1401
    get_url_from_archive_element = get_href
1402
    comic_url_re = re.compile('^%s/([0-9]*)$' % url)
1403
    comic_img_re = re.compile('^%s/strips/.*' % url)
1404
1405
    @classmethod
1406
    def get_archive_elements(cls):
1407
        archive_url = urljoin_wrapper(cls.url, 'archive')
1408
        return get_soup_at_url(archive_url).find_all('a', href=cls.comic_url_re)
1409
1410
    @classmethod
1411
    def get_comic_info(cls, soup, archive_elt):
1412
        comic_url = cls.get_url_from_archive_element(archive_elt)
1413
        num = int(cls.comic_url_re.match(comic_url).groups()[0])
1414
        return {
1415
            'num': num,
1416
            'title': archive_elt.string,
1417
            'img': [soup.find('img', src=cls.comic_img_re)['src']]
1418
        }
1419
1420
1421
class PhDComics(GenericNavigableComic):
1422
    """Class to retrieve PHD Comics."""
1423
    name = 'phd'
1424
    long_name = 'PhD Comics'
1425
    url = 'http://phdcomics.com/comics/archive.php'
1426
1427
    @classmethod
1428
    def get_first_comic_link(cls):
1429
        """Get link to first comics."""
1430
        soup = get_soup_at_url(cls.url)
1431
        img = soup.find('img', src='http://phdcomics.com/comics/images/first_button.gif')
1432
        return None if img is None else img.parent
1433
1434
    @classmethod
1435
    def get_navi_link(cls, last_soup, next_):
1436
        """Get link to next or previous comic."""
1437
        url = 'http://phdcomics.com/comics/images/%s_button.gif' % ('next' if next_ else 'prev')
1438
        img = last_soup.find('img', src=url)
1439
        return None if img is None else img.parent
1440
1441
    @classmethod
1442
    def get_comic_info(cls, soup, link):
1443
        """Get information about a particular comics."""
1444
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
1445
        imgs = soup.find_all('meta', property='og:image')
1446
        return {
1447
            'img': [i['content'] for i in imgs],
1448
            'title': title,
1449
        }
1450
1451
1452
class Octopuns(GenericEmptyComic, GenericNavigableComic):
1453
    """Class to retrieve Octopuns comics."""
1454
    # Also on http://octopuns.tumblr.com
1455
    name = 'octopuns'
1456
    long_name = 'Octopuns'
1457
    url = 'http://www.octopuns.net'
1458
1459
    @classmethod
1460
    def get_first_comic_link(cls):
1461
        """Get link to first comics."""
1462
        return get_soup_at_url(cls.url).find('img', src=re.compile('.*/First.png')).parent
1463
1464
    @classmethod
1465
    def get_navi_link(cls, last_soup, next_):
1466
        """Get link to next or previous comic."""
1467
        link = last_soup.find('img', src=re.compile('.*/Next.png' if next_ else '.*/Back.png')).parent
1468
        return None if link.get('href') is None else link
1469
1470
    @classmethod
1471
    def get_comic_info(cls, soup, link):
1472
        """Get information about a particular comics."""
1473
        title = soup.find('h3', class_='post-title entry-title').string
1474
        date_str = soup.find('h2', class_='date-header').string
1475
        day = string_to_date(date_str, "%A, %B %d, %Y")
1476
        imgs = soup.find_all('link', rel='image_src')
1477
        return {
1478
            'img': [i['href'] for i in imgs],
1479
            'title': title,
1480
            'day': day.day,
1481
            'month': day.month,
1482
            'year': day.year,
1483
        }
1484
1485
1486
class Quarktees(GenericNavigableComic):
1487
    """Class to retrieve the Quarktees comics."""
1488
    name = 'quarktees'
1489
    long_name = 'Quarktees'
1490
    url = 'http://www.quarktees.com/blogs/news'
1491
    get_url_from_link = join_cls_url_to_href
1492
    get_first_comic_link = simulate_first_link
1493
    first_url = 'http://www.quarktees.com/blogs/news/12486621-coming-soon'
1494
1495
    @classmethod
1496
    def get_navi_link(cls, last_soup, next_):
1497
        """Get link to next or previous comic."""
1498
        return last_soup.find('a', id='article-next' if next_ else 'article-prev')
1499
1500
    @classmethod
1501
    def get_comic_info(cls, soup, link):
1502
        """Get information about a particular comics."""
1503
        title = soup.find('meta', property='og:title')['content']
1504
        article = soup.find('div', class_='single-article')
1505
        imgs = article.find_all('img')
1506
        return {
1507
            'title': title,
1508
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1509
        }
1510
1511
1512
class OverCompensating(GenericNavigableComic):
1513
    """Class to retrieve the Over Compensating comics."""
1514
    name = 'compensating'
1515
    long_name = 'Over Compensating'
1516
    url = 'http://www.overcompensating.com'
1517
    get_url_from_link = join_cls_url_to_href
1518
1519
    @classmethod
1520
    def get_first_comic_link(cls):
1521
        """Get link to first comics."""
1522
        return get_soup_at_url(cls.url).find('a', href=re.compile('comic=1$'))
1523
1524
    @classmethod
1525
    def get_navi_link(cls, last_soup, next_):
1526
        """Get link to next or previous comic."""
1527
        return last_soup.find('a', title='next comic' if next_ else 'go back already')
1528
1529
    @classmethod
1530
    def get_comic_info(cls, soup, link):
1531
        """Get information about a particular comics."""
1532
        img_src_re = re.compile('^/oc/comics/.*')
1533
        comic_num_re = re.compile('.*comic=([0-9]*)$')
1534
        comic_url = cls.get_url_from_link(link)
1535
        num = int(comic_num_re.match(comic_url).groups()[0])
1536
        img = soup.find('img', src=img_src_re)
1537
        return {
1538
            'num': num,
1539
            'img': [urljoin_wrapper(comic_url, img['src'])],
1540
            'title': img.get('title')
1541
        }
1542
1543
1544
class Oglaf(GenericNavigableComic):
1545
    """Class to retrieve Oglaf comics."""
1546
    name = 'oglaf'
1547
    long_name = 'Oglaf [NSFW]'
1548
    url = 'http://oglaf.com'
1549
    _categories = ('NSFW', )
1550
    get_url_from_link = join_cls_url_to_href
1551
1552
    @classmethod
1553
    def get_first_comic_link(cls):
1554
        """Get link to first comics."""
1555
        return get_soup_at_url(cls.url).find("div", id="st").parent
1556
1557
    @classmethod
1558
    def get_navi_link(cls, last_soup, next_):
1559
        """Get link to next or previous comic."""
1560
        div = last_soup.find("div", id="nx" if next_ else "pvs")
1561
        return div.parent if div else None
1562
1563
    @classmethod
1564
    def get_comic_info(cls, soup, link):
1565
        """Get information about a particular comics."""
1566
        title = soup.find('title').string
1567
        title_imgs = soup.find('div', id='tt').find_all('img')
1568
        assert len(title_imgs) == 1
1569
        strip_imgs = soup.find_all('img', id='strip')
1570
        assert len(strip_imgs) == 1
1571
        imgs = title_imgs + strip_imgs
1572
        desc = ' '.join(i['title'] for i in imgs)
1573
        return {
1574
            'title': title,
1575
            'img': [i['src'] for i in imgs],
1576
            'description': desc,
1577
        }
1578
1579
1580
class ScandinaviaAndTheWorld(GenericNavigableComic):
1581
    """Class to retrieve Scandinavia And The World comics."""
1582
    name = 'satw'
1583
    long_name = 'Scandinavia And The World'
1584
    url = 'http://satwcomic.com'
1585
    get_first_comic_link = simulate_first_link
1586
    first_url = 'http://satwcomic.com/sweden-denmark-and-norway'
1587
1588
    @classmethod
1589
    def get_navi_link(cls, last_soup, next_):
1590
        """Get link to next or previous comic."""
1591
        return last_soup.find('a', accesskey='n' if next_ else 'p')
1592
1593
    @classmethod
1594
    def get_comic_info(cls, soup, link):
1595
        """Get information about a particular comics."""
1596
        title = soup.find('meta', attrs={'name': 'twitter:label1'})['content']
1597
        desc = soup.find('meta', property='og:description')['content']
1598
        imgs = soup.find_all('img', itemprop="image")
1599
        return {
1600
            'title': title,
1601
            'description': desc,
1602
            'img': [i['src'] for i in imgs],
1603
        }
1604
1605
1606
class SomethingOfThatIlk(GenericEmptyComic):  # Does not exist anymore
1607
    """Class to retrieve the Something Of That Ilk comics."""
1608
    name = 'somethingofthatilk'
1609
    long_name = 'Something Of That Ilk'
1610
    url = 'http://www.somethingofthatilk.com'
1611
1612
1613
class InfiniteMonkeyBusiness(GenericNavigableComic):
1614
    """Generic class to retrieve InfiniteMonkeyBusiness comics."""
1615
    name = 'monkey'
1616
    long_name = 'Infinite Monkey Business'
1617
    url = 'http://infinitemonkeybusiness.net'
1618
    get_navi_link = get_a_navi_comicnavnext_navinext
1619
    get_first_comic_link = simulate_first_link
1620
    first_url = 'http://infinitemonkeybusiness.net/comic/pillory/'
1621
1622
    @classmethod
1623
    def get_comic_info(cls, soup, link):
1624
        """Get information about a particular comics."""
1625
        title = soup.find('meta', property='og:title')['content']
1626
        imgs = soup.find('div', id='comic').find_all('img')
1627
        return {
1628
            'title': title,
1629
            'img': [i['src'] for i in imgs],
1630
        }
1631
1632
1633
class Wondermark(GenericListableComic):
1634
    """Class to retrieve the Wondermark comics."""
1635
    name = 'wondermark'
1636
    long_name = 'Wondermark'
1637
    url = 'http://wondermark.com'
1638
    get_url_from_archive_element = get_href
1639
1640
    @classmethod
1641
    def get_archive_elements(cls):
1642
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1643
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1644
1645
    @classmethod
1646
    def get_comic_info(cls, soup, link):
1647
        """Get information about a particular comics."""
1648
        date_str = soup.find('div', class_='postdate').find('em').string
1649
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1650 View Code Duplication
        div = soup.find('div', id='comic')
1651
        if div:
1652
            img = div.find('img')
1653
            img_src = [img['src']]
1654
            alt = img['alt']
1655
            assert alt == img['title']
1656
            title = soup.find('meta', property='og:title')['content']
1657
        else:
1658
            img_src = []
1659
            alt = ''
1660
            title = ''
1661
        return {
1662
            'month': day.month,
1663
            'year': day.year,
1664
            'day': day.day,
1665
            'img': img_src,
1666
            'title': title,
1667
            'alt': alt,
1668
            'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')),
1669
        }
1670
1671
1672
class WarehouseComic(GenericNavigableComic):
1673
    """Class to retrieve Warehouse Comic comics."""
1674
    name = 'warehouse'
1675
    long_name = 'Warehouse Comic'
1676
    url = 'http://warehousecomic.com'
1677
    get_first_comic_link = get_a_navi_navifirst
1678
    get_navi_link = get_link_rel_next
1679
1680
    @classmethod
1681
    def get_comic_info(cls, soup, link):
1682
        """Get information about a particular comics."""
1683
        title = soup.find('h2', class_='post-title').string
1684
        date_str = soup.find('span', class_='post-date').string
1685
        day = string_to_date(date_str, "%B %d, %Y")
1686
        imgs = soup.find('div', id='comic').find_all('img')
1687
        return {
1688
            'img': [i['src'] for i in imgs],
1689
            'title': title,
1690
            'day': day.day,
1691
            'month': day.month,
1692
            'year': day.year,
1693
        }
1694
1695
1696
class JustSayEh(GenericNavigableComic):
1697
    """Class to retrieve Just Say Eh comics."""
1698
    # Also on http//tapastic.com/series/Just-Say-Eh
1699
    name = 'justsayeh'
1700
    long_name = 'Just Say Eh'
1701
    url = 'http://www.justsayeh.com'
1702
    get_first_comic_link = get_a_navi_navifirst
1703
    get_navi_link = get_a_navi_comicnavnext_navinext
1704
1705
    @classmethod
1706
    def get_comic_info(cls, soup, link):
1707
        """Get information about a particular comics."""
1708
        title = soup.find('h2', class_='post-title').string
1709
        imgs = soup.find("div", id="comic").find_all("img")
1710
        assert all(i['alt'] == i['title'] for i in imgs)
1711
        alt = imgs[0]['alt']
1712
        return {
1713
            'img': [i['src'] for i in imgs],
1714
            'title': title,
1715
            'alt': alt,
1716
        }
1717
1718
1719 View Code Duplication
class MouseBearComedy(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1720
    """Class to retrieve Mouse Bear Comedy comics."""
1721
    # Also on http://mousebearcomedy.tumblr.com
1722
    name = 'mousebear'
1723
    long_name = 'Mouse Bear Comedy'
1724
    url = 'http://www.mousebearcomedy.com'
1725
    get_first_comic_link = get_a_navi_navifirst
1726
    get_navi_link = get_a_navi_comicnavnext_navinext
1727
1728
    @classmethod
1729
    def get_comic_info(cls, soup, link):
1730
        """Get information about a particular comics."""
1731
        title = soup.find('h2', class_='post-title').string
1732
        author = soup.find("span", class_="post-author").find("a").string
1733
        date_str = soup.find("span", class_="post-date").string
1734
        day = string_to_date(date_str, '%B %d, %Y')
1735
        imgs = soup.find("div", id="comic").find_all("img")
1736
        assert all(i['alt'] == i['title'] == title for i in imgs)
1737
        return {
1738
            'day': day.day,
1739
            'month': day.month,
1740
            'year': day.year,
1741
            'img': [i['src'] for i in imgs],
1742
            'title': title,
1743
            'author': author,
1744
        }
1745
1746
1747
class BigFootJustice(GenericNavigableComic):
1748
    """Class to retrieve Big Foot Justice comics."""
1749
    # Also on http://tapastic.com/series/bigfoot-justice
1750
    name = 'bigfoot'
1751
    long_name = 'Big Foot Justice'
1752 View Code Duplication
    url = 'http://bigfootjustice.com'
1753
    get_first_comic_link = get_a_navi_navifirst
1754
    get_navi_link = get_a_navi_comicnavnext_navinext
1755
1756
    @classmethod
1757
    def get_comic_info(cls, soup, link):
1758
        """Get information about a particular comics."""
1759
        imgs = soup.find('div', id='comic').find_all('img')
1760
        assert all(i['title'] == i['alt'] for i in imgs)
1761
        title = ' '.join(i['title'] for i in imgs)
1762
        return {
1763
            'img': [i['src'] for i in imgs],
1764
            'title': title,
1765
        }
1766
1767
1768
class RespawnComic(GenericNavigableComic):
1769
    """Class to retrieve Respawn Comic."""
1770
    # Also on http://respawncomic.tumblr.com
1771
    name = 'respawn'
1772
    long_name = 'Respawn Comic'
1773
    url = 'http://respawncomic.com '
1774
    _categories = ('RESPAWN', )
1775
    get_navi_link = get_a_rel_next
1776
    get_first_comic_link = simulate_first_link
1777
    first_url = 'http://respawncomic.com/comic/c0001/'
1778
1779 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1780
    def get_comic_info(cls, soup, link):
1781
        """Get information about a particular comics."""
1782
        title = soup.find('meta', property='og:title')['content']
1783
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1784
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1785
        date_str = date_str[:10]
1786
        day = string_to_date(date_str, "%Y-%m-%d")
1787
        imgs = soup.find_all('meta', property='og:image')
1788
        skip_imgs = {
1789
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1790
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1791
        }
1792
        return {
1793
            'title': title,
1794
            'author': author,
1795
            'day': day.day,
1796
            'month': day.month,
1797
            'year': day.year,
1798
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1799
        }
1800
1801
1802
class SafelyEndangered(GenericNavigableComic):
1803
    """Class to retrieve Safely Endangered comics."""
1804
    # Also on http://tumblr.safelyendangered.com
1805
    name = 'endangered'
1806
    long_name = 'Safely Endangered'
1807
    url = 'http://www.safelyendangered.com'
1808
    get_navi_link = get_link_rel_next
1809
    get_first_comic_link = simulate_first_link
1810
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1811
1812
    @classmethod
1813
    def get_comic_info(cls, soup, link):
1814
        """Get information about a particular comics."""
1815
        title = soup.find('h2', class_='post-title').string
1816
        date_str = soup.find('span', class_='post-date').string
1817
        day = string_to_date(date_str, '%B %d, %Y')
1818
        imgs = soup.find('div', id='comic').find_all('img')
1819
        alt = imgs[0]['alt']
1820
        assert all(i['alt'] == i['title'] for i in imgs)
1821
        return {
1822
            'day': day.day,
1823
            'month': day.month,
1824
            'year': day.year,
1825
            'img': [i['src'] for i in imgs],
1826
            'title': title,
1827
            'alt': alt,
1828
        }
1829
1830
1831 View Code Duplication
class PicturesInBoxes(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1832
    """Class to retrieve Pictures In Boxes comics."""
1833
    # Also on http://picturesinboxescomic.tumblr.com
1834
    name = 'picturesinboxes'
1835
    long_name = 'Pictures in Boxes'
1836
    url = 'http://www.picturesinboxes.com'
1837
    get_navi_link = get_a_navi_navinext
1838
    get_first_comic_link = simulate_first_link
1839
    first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'
1840
1841
    @classmethod
1842
    def get_comic_info(cls, soup, link):
1843
        """Get information about a particular comics."""
1844
        title = soup.find('h2', class_='post-title').string
1845
        author = soup.find("span", class_="post-author").find("a").string
1846
        date_str = soup.find('span', class_='post-date').string
1847
        day = string_to_date(date_str, '%B %d, %Y')
1848
        imgs = soup.find('div', class_='comicpane').find_all('img')
1849
        assert imgs
1850
        assert all(i['title'] == i['alt'] == title for i in imgs)
1851
        return {
1852
            'day': day.day,
1853
            'month': day.month,
1854
            'year': day.year,
1855
            'img': [i['src'] for i in imgs],
1856
            'title': title,
1857
            'author': author,
1858
        }
1859
1860
1861
class Penmen(GenericNavigableComic):
1862
    """Class to retrieve Penmen comics."""
1863
    name = 'penmen'
1864
    long_name = 'Penmen'
1865
    url = 'http://penmen.com'
1866
    get_navi_link = get_link_rel_next
1867
    get_first_comic_link = simulate_first_link
1868
    first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/'
1869
1870
    @classmethod
1871
    def get_comic_info(cls, soup, link):
1872
        """Get information about a particular comics."""
1873
        title = soup.find('title').string
1874
        imgs = soup.find('div', class_='entry-content').find_all('img')
1875
        short_url = soup.find('link', rel='shortlink')['href']
1876
        tags = ' '.join(t.string for t in soup.find_all('a', rel='tag'))
1877
        date_str = soup.find('time')['datetime'][:10]
1878
        day = string_to_date(date_str, "%Y-%m-%d")
1879
        return {
1880
            'title': title,
1881
            'short_url': short_url,
1882
            'img': [i['src'] for i in imgs],
1883
            'tags': tags,
1884
            'month': day.month,
1885
            'year': day.year,
1886
            'day': day.day,
1887
        }
1888
1889
1890
class TheDoghouseDiaries(GenericNavigableComic):
1891
    """Class to retrieve The Dog House Diaries comics."""
1892
    name = 'doghouse'
1893
    long_name = 'The Dog House Diaries'
1894
    url = 'http://thedoghousediaries.com'
1895
1896
    @classmethod
1897
    def get_first_comic_link(cls):
1898
        """Get link to first comics."""
1899
        return get_soup_at_url(cls.url).find('a', id='firstlink')
1900
1901
    @classmethod
1902
    def get_navi_link(cls, last_soup, next_):
1903
        """Get link to next or previous comic."""
1904
        return last_soup.find('a', id='nextlink' if next_ else 'previouslink')
1905
1906
    @classmethod
1907
    def get_comic_info(cls, soup, link):
1908
        """Get information about a particular comics."""
1909
        comic_img_re = re.compile('^dhdcomics/.*')
1910
        img = soup.find('img', src=comic_img_re)
1911
        comic_url = cls.get_url_from_link(link)
1912
        return {
1913
            'title': soup.find('h2', id='titleheader').string,
1914 View Code Duplication
            'title2': soup.find('div', id='subtext').string,
1915
            'alt': img.get('title'),
1916
            'img': [urljoin_wrapper(comic_url, img['src'].strip())],
1917
            'num': int(comic_url.split('/')[-1]),
1918
        }
1919
1920
1921
class InvisibleBread(GenericListableComic):
1922
    """Class to retrieve Invisible Bread comics."""
1923
    # Also on http://www.gocomics.com/invisible-bread
1924
    name = 'invisiblebread'
1925
    long_name = 'Invisible Bread'
1926
    url = 'http://invisiblebread.com'
1927
1928
    @classmethod
1929
    def get_archive_elements(cls):
1930
        archive_url = urljoin_wrapper(cls.url, 'archives/')
1931
        return reversed(get_soup_at_url(archive_url).find_all('td', class_='archive-title'))
1932
1933
    @classmethod
1934
    def get_url_from_archive_element(cls, td):
1935
        return td.find('a')['href']
1936
1937
    @classmethod
1938
    def get_comic_info(cls, soup, td):
1939
        """Get information about a particular comics."""
1940
        url = cls.get_url_from_archive_element(td)
1941
        title = td.find('a').string
1942
        month_and_day = td.previous_sibling.string
1943
        link_re = re.compile('^%s/([0-9]+)/' % cls.url)
1944
        year = link_re.match(url).groups()[0]
1945
        date_str = month_and_day + ' ' + year
1946
        day = string_to_date(date_str, '%b %d %Y')
1947
        imgs = [soup.find('div', id='comic').find('img')]
1948
        assert len(imgs) == 1
1949
        assert all(i['title'] == i['alt'] == title for i in imgs)
1950
        return {
1951
            'month': day.month,
1952
            'year': day.year,
1953
            'day': day.day,
1954
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1955
            'title': title,
1956
        }
1957
1958
1959
class DiscoBleach(GenericEmptyComic):  # Does not work anymore
1960
    """Class to retrieve Disco Bleach Comics."""
1961
    name = 'discobleach'
1962
    long_name = 'Disco Bleach'
1963
    url = 'http://discobleach.com'
1964
1965
1966
class TubeyToons(GenericEmptyComic):  # Does not work anymore
1967
    """Class to retrieve TubeyToons comics."""
1968
    # Also on http://tapastic.com/series/Tubey-Toons
1969
    # Also on http://tubeytoons.tumblr.com
1970
    name = 'tubeytoons'
1971
    long_name = 'Tubey Toons'
1972
    url = 'http://tubeytoons.com'
1973
    _categories = ('TUNEYTOONS', )
1974
1975
1976
class CompletelySeriousComics(GenericNavigableComic):
1977
    """Class to retrieve Completely Serious comics."""
1978
    name = 'completelyserious'
1979
    long_name = 'Completely Serious Comics'
1980
    url = 'http://completelyseriouscomics.com'
1981
    get_first_comic_link = get_a_navi_navifirst
1982
    get_navi_link = get_a_navi_navinext
1983
1984
    @classmethod
1985
    def get_comic_info(cls, soup, link):
1986
        """Get information about a particular comics."""
1987
        title = soup.find('h2', class_='post-title').string
1988
        author = soup.find('span', class_='post-author').contents[1].string
1989
        date_str = soup.find('span', class_='post-date').string
1990
        day = string_to_date(date_str, '%B %d, %Y')
1991
        imgs = soup.find('div', class_='comicpane').find_all('img')
1992
        assert imgs
1993
        alt = imgs[0]['title']
1994
        assert all(i['title'] == i['alt'] == alt for i in imgs)
1995
        return {
1996
            'month': day.month,
1997
            'year': day.year,
1998
            'day': day.day,
1999
            'img': [i['src'] for i in imgs],
2000
            'title': title,
2001
            'alt': alt,
2002
            'author': author,
2003
        }
2004
2005
2006
class PoorlyDrawnLines(GenericListableComic):
2007
    """Class to retrieve Poorly Drawn Lines comics."""
2008
    # Also on http://pdlcomics.tumblr.com
2009
    name = 'poorlydrawn'
2010
    long_name = 'Poorly Drawn Lines'
2011
    url = 'http://poorlydrawnlines.com'
2012
    _categories = ('POORLYDRAWN', )
2013
    get_url_from_archive_element = get_href
2014
2015
    @classmethod
2016
    def get_comic_info(cls, soup, link):
2017
        """Get information about a particular comics."""
2018
        imgs = soup.find('div', class_='post').find_all('img')
2019
        assert len(imgs) <= 1
2020
        return {
2021
            'img': [i['src'] for i in imgs],
2022
            'title': imgs[0].get('title', "") if imgs else "",
2023
        }
2024
2025
    @classmethod
2026
    def get_archive_elements(cls):
2027
        archive_url = urljoin_wrapper(cls.url, 'archive')
2028
        url_re = re.compile('^%s/comic/.' % cls.url)
2029
        return reversed(get_soup_at_url(archive_url).find_all('a', href=url_re))
2030
2031
2032
class LoadingComics(GenericNavigableComic):
2033
    """Class to retrieve Loading Artist comics."""
2034
    name = 'loadingartist'
2035
    long_name = 'Loading Artist'
2036
    url = 'http://www.loadingartist.com/latest'
2037
2038
    @classmethod
2039
    def get_first_comic_link(cls):
2040
        """Get link to first comics."""
2041
        return get_soup_at_url(cls.url).find('a', title="First")
2042
2043
    @classmethod
2044
    def get_navi_link(cls, last_soup, next_):
2045
        """Get link to next or previous comic."""
2046
        return last_soup.find('a', title='Next' if next_ else 'Previous')
2047
2048
    @classmethod
2049
    def get_comic_info(cls, soup, link):
2050
        """Get information about a particular comics."""
2051
        title = soup.find('h1').string
2052
        date_str = soup.find('span', class_='date').string.strip()
2053
        day = string_to_date(date_str, "%B %d, %Y")
2054
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
2055
        return {
2056
            'title': title,
2057
            'img': [i['src'] for i in imgs],
2058
            'month': day.month,
2059
            'year': day.year,
2060
            'day': day.day,
2061
        }
2062
2063
2064
class ChuckleADuck(GenericNavigableComic):
2065
    """Class to retrieve Chuckle-A-Duck comics."""
2066
    name = 'chuckleaduck'
2067
    long_name = 'Chuckle-A-duck'
2068
    url = 'http://chuckleaduck.com'
2069
    get_first_comic_link = get_div_navfirst_a
2070
    get_navi_link = get_link_rel_next
2071
2072
    @classmethod
2073
    def get_comic_info(cls, soup, link):
2074
        """Get information about a particular comics."""
2075
        date_str = soup.find('span', class_='post-date').string
2076
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
2077
        author = soup.find('span', class_='post-author').string
2078
        div = soup.find('div', id='comic')
2079
        imgs = div.find_all('img') if div else []
2080
        title = imgs[0]['title'] if imgs else ""
2081
        assert all(i['title'] == i['alt'] == title for i in imgs)
2082
        return {
2083
            'month': day.month,
2084
            'year': day.year,
2085
            'day': day.day,
2086
            'img': [i['src'] for i in imgs],
2087
            'title': title,
2088
            'author': author,
2089
        }
2090
2091
2092
class DepressedAlien(GenericNavigableComic):
2093
    """Class to retrieve Depressed Alien Comics."""
2094
    name = 'depressedalien'
2095
    long_name = 'Depressed Alien'
2096
    url = 'http://depressedalien.com'
2097
    get_url_from_link = join_cls_url_to_href
2098
2099
    @classmethod
2100
    def get_first_comic_link(cls):
2101
        """Get link to first comics."""
2102
        return get_soup_at_url(cls.url).find('img', attrs={'name': 'beginArrow'}).parent
2103
2104
    @classmethod
2105
    def get_navi_link(cls, last_soup, next_):
2106
        """Get link to next or previous comic."""
2107
        return last_soup.find('img', attrs={'name': 'rightArrow' if next_ else 'leftArrow'}).parent
2108
2109
    @classmethod
2110
    def get_comic_info(cls, soup, link):
2111
        """Get information about a particular comics."""
2112
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2113
        imgs = soup.find_all('meta', property='og:image')
2114
        return {
2115
            'title': title,
2116
            'img': [i['content'] for i in imgs],
2117
        }
2118
2119
2120
class ThingsInSquares(GenericListableComic):
2121
    """Class to retrieve Things In Squares comics."""
2122
    # This can be retrieved in other languages
2123
    # Also on https://tapastic.com/series/Things-in-Squares
2124
    name = 'squares'
2125
    long_name = 'Things in squares'
2126
    url = 'http://www.thingsinsquares.com'
2127
2128
    @classmethod
2129
    def get_comic_info(cls, soup, tr):
2130
        """Get information about a particular comics."""
2131
        _, td2, td3 = tr.find_all('td')
2132
        a = td2.find('a')
2133
        date_str = td3.string
2134
        day = string_to_date(date_str, "%m.%d.%y")
2135
        title = a.string
2136
        title2 = soup.find('meta', property='og:title')['content']
2137
        desc = soup.find('meta', property='og:description')
2138
        description = desc['content'] if desc else ''
2139
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2140
        imgs = soup.find('div', class_='entry-content').find_all('img')
2141
        return {
2142
            'day': day.day,
2143
            'month': day.month,
2144
            'year': day.year,
2145
            'title': title,
2146
            'title2': title2,
2147
            'description': description,
2148
            'tags': tags,
2149
            'img': [i['src'] for i in imgs],
2150
            'alt': ' '.join(i['alt'] for i in imgs),
2151
        }
2152
2153
    @classmethod
2154
    def get_url_from_archive_element(cls, tr):
2155
        _, td2, td3 = tr.find_all('td')
2156
        return td2.find('a')['href']
2157
2158
    @classmethod
2159
    def get_archive_elements(cls):
2160
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
2161
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2162
2163
2164
class HappleTea(GenericNavigableComic):
2165
    """Class to retrieve Happle Tea Comics."""
2166
    name = 'happletea'
2167
    long_name = 'Happle Tea'
2168
    url = 'http://www.happletea.com'
2169
    get_first_comic_link = get_a_navi_navifirst
2170
    get_navi_link = get_link_rel_next
2171
2172
    @classmethod
2173
    def get_comic_info(cls, soup, link):
2174
        """Get information about a particular comics."""
2175
        imgs = soup.find('div', id='comic').find_all('img')
2176
        post = soup.find('div', class_='post-content')
2177
        title = post.find('h2', class_='post-title').string
2178
        author = post.find('a', rel='author').string
2179
        date_str = post.find('span', class_='post-date').string
2180
        day = string_to_date(date_str, "%B %d, %Y")
2181
        assert all(i['alt'] == i['title'] for i in imgs)
2182
        return {
2183
            'title': title,
2184
            'img': [i['src'] for i in imgs],
2185
            'alt': ''.join(i['alt'] for i in imgs),
2186
            'month': day.month,
2187
            'year': day.year,
2188
            'day': day.day,
2189
            'author': author,
2190
        }
2191
2192
2193
class RockPaperScissors(GenericNavigableComic):
2194
    """Class to retrieve Rock Paper Scissors comics."""
2195
    name = 'rps'
2196
    long_name = 'Rock Paper Scissors'
2197
    url = 'http://rps-comics.com'
2198
    get_first_comic_link = get_a_navi_navifirst
2199
    get_navi_link = get_link_rel_next
2200
2201
    @classmethod
2202
    def get_comic_info(cls, soup, link):
2203
        """Get information about a particular comics."""
2204
        title = soup.find('title').string
2205
        imgs = soup.find_all('meta', property='og:image')
2206
        short_url = soup.find('link', rel='shortlink')['href']
2207
        transcript = soup.find('div', id='transcript-content').string
2208
        return {
2209
            'title': title,
2210
            'transcript': transcript,
2211
            'short_url': short_url,
2212
            'img': [i['content'] for i in imgs],
2213
        }
2214
2215
2216
class FatAwesomeComics(GenericNavigableComic):
2217
    """Class to retrieve Fat Awesome Comics."""
2218
    # Also on http://fatawesomecomedy.tumblr.com
2219
    name = 'fatawesome'
2220
    long_name = 'Fat Awesome'
2221
    url = 'http://fatawesome.com/comics'
2222
    get_navi_link = get_a_rel_next
2223 View Code Duplication
    get_first_comic_link = simulate_first_link
2224
    first_url = 'http://fatawesome.com/shortbus/'
2225
2226
    @classmethod
2227
    def get_comic_info(cls, soup, link):
2228
        """Get information about a particular comics."""
2229
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2230
        description = soup.find('meta', attrs={'name': 'description'})['content']
2231
        tags_prop = soup.find('meta', property='article:tag')
2232
        tags = tags_prop['content'] if tags_prop else ""
2233
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2234
        day = string_to_date(date_str, "%Y-%m-%d")
2235
        imgs = soup.find_all('img', attrs={'data-recalc-dims': "1"})
2236
        assert len(imgs) == 1
2237
        return {
2238
            'title': title,
2239
            'description': description,
2240
            'tags': tags,
2241
            'alt': "".join(i['alt'] for i in imgs),
2242
            'img': [i['src'].rsplit('?', 1)[0] for i in imgs],
2243
            'month': day.month,
2244
            'year': day.year,
2245
            'day': day.day,
2246
        }
2247
2248
2249
class AnythingComic(GenericListableComic):
2250
    """Class to retrieve Anything Comics."""
2251
    # Also on http://tapastic.com/series/anything
2252
    name = 'anythingcomic'
2253
    long_name = 'Anything Comic'
2254
    url = 'http://www.anythingcomic.com'
2255
2256
    @classmethod
2257
    def get_archive_elements(cls):
2258
        archive_url = urljoin_wrapper(cls.url, 'archive/')
2259
        # The first 2 <tr>'s do not correspond to comics
2260
        return get_soup_at_url(archive_url).find('table', id='chapter_table').find_all('tr')[2:]
2261
2262
    @classmethod
2263
    def get_url_from_archive_element(cls, tr):
2264
        """Get url corresponding to an archive element."""
2265
        td_num, td_comic, td_date, _ = tr.find_all('td')
2266
        link = td_comic.find('a')
2267
        return urljoin_wrapper(cls.url, link['href'])
2268
2269
    @classmethod
2270
    def get_comic_info(cls, soup, tr):
2271
        """Get information about a particular comics."""
2272
        td_num, td_comic, td_date, _ = tr.find_all('td')
2273
        num = int(td_num.string)
2274
        link = td_comic.find('a')
2275
        title = link.string
2276
        imgs = soup.find_all('img', id='comic_image')
2277
        date_str = td_date.string
2278
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y, %I:%M %p")
2279
        assert len(imgs) == 1
2280
        assert all(i.get('alt') == i.get('title') for i in imgs)
2281
        return {
2282
            'num': num,
2283
            'title': title,
2284
            'alt': imgs[0].get('alt', ''),
2285
            'img': [i['src'] for i in imgs],
2286
            'month': day.month,
2287
            'year': day.year,
2288
            'day': day.day,
2289
        }
2290
2291
2292
class LonnieMillsap(GenericNavigableComic):
2293
    """Class to retrieve Lonnie Millsap's comics."""
2294
    name = 'millsap'
2295
    long_name = 'Lonnie Millsap'
2296
    url = 'http://www.lonniemillsap.com'
2297
    get_navi_link = get_link_rel_next
2298
    get_first_comic_link = simulate_first_link
2299
    first_url = 'http://www.lonniemillsap.com/?p=42'
2300
2301
    @classmethod
2302
    def get_comic_info(cls, soup, link):
2303
        """Get information about a particular comics."""
2304
        title = soup.find('h2', class_='post-title').string
2305
        post = soup.find('div', class_='post-content')
2306
        author = post.find("span", class_="post-author").find("a").string
2307
        date_str = post.find("span", class_="post-date").string
2308
        day = string_to_date(date_str, "%B %d, %Y")
2309
        imgs = post.find("div", class_="entry").find_all("img")
2310
        return {
2311
            'title': title,
2312
            'author': author,
2313
            'img': [i['src'] for i in imgs],
2314
            'month': day.month,
2315
            'year': day.year,
2316
            'day': day.day,
2317
        }
2318
2319
2320
class LinsEditions(GenericNavigableComic):
2321
    """Class to retrieve L.I.N.S. Editions comics."""
2322
    # Also on http://linscomics.tumblr.com
2323
    # Now on https://warandpeas.com
2324
    name = 'lins'
2325
    long_name = 'L.I.N.S. Editions'
2326
    url = 'https://linsedition.com'
2327
    _categories = ('LINS', )
2328
    get_navi_link = get_link_rel_next
2329
    get_first_comic_link = simulate_first_link
2330
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2331
2332
    @classmethod
2333
    def get_comic_info(cls, soup, link):
2334
        """Get information about a particular comics."""
2335
        title = soup.find('meta', property='og:title')['content']
2336
        imgs = soup.find_all('meta', property='og:image')
2337
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2338
        day = string_to_date(date_str, "%Y-%m-%d")
2339
        return {
2340
            'title': title,
2341
            'img': [i['content'] for i in imgs],
2342
            'month': day.month,
2343
            'year': day.year,
2344
            'day': day.day,
2345
        }
2346
2347
2348
class ThorsThundershack(GenericNavigableComic):
2349
    """Class to retrieve Thor's Thundershack comics."""
2350
    # Also on http://tapastic.com/series/Thors-Thundershac
2351
    name = 'thor'
2352
    long_name = 'Thor\'s Thundershack'
2353
    url = 'http://www.thorsthundershack.com'
2354
    _categories = ('THOR', )
2355
    get_url_from_link = join_cls_url_to_href
2356
2357
    @classmethod
2358
    def get_first_comic_link(cls):
2359
        """Get link to first comics."""
2360
        return get_soup_at_url(cls.url).find('a', class_='first navlink')
2361
2362
    @classmethod
2363
    def get_navi_link(cls, last_soup, next_):
2364
        """Get link to next or previous comic."""
2365
        for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
2366
            if link['href'] != '/comic':
2367
                return link
2368
        return None
2369
2370
    @classmethod
2371
    def get_comic_info(cls, soup, link):
2372
        """Get information about a particular comics."""
2373
        title = soup.find('meta', attrs={'name': 'description'})["content"]
2374
        description = soup.find('div', itemprop='articleBody').text
2375
        author = soup.find('span', itemprop='author copyrightHolder').string
2376
        imgs = soup.find_all('img', itemprop='image')
2377
        assert all(i['title'] == i['alt'] for i in imgs)
2378
        alt = imgs[0]['alt'] if imgs else ""
2379
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
2380
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2381
        return {
2382
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2383
            'month': day.month,
2384
            'year': day.year,
2385
            'day': day.day,
2386
            'author': author,
2387
            'title': title,
2388
            'alt': alt,
2389
            'description': description,
2390
        }
2391
2392
2393 View Code Duplication
class GerbilWithAJetpack(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2394
    """Class to retrieve GerbilWithAJetpack comics."""
2395
    name = 'gerbil'
2396
    long_name = 'Gerbil With A Jetpack'
2397
    url = 'http://gerbilwithajetpack.com'
2398
    get_first_comic_link = get_a_navi_navifirst
2399
    get_navi_link = get_a_rel_next
2400
2401
    @classmethod
2402
    def get_comic_info(cls, soup, link):
2403
        """Get information about a particular comics."""
2404
        title = soup.find('h2', class_='post-title').string
2405
        author = soup.find("span", class_="post-author").find("a").string
2406
        date_str = soup.find("span", class_="post-date").string
2407
        day = string_to_date(date_str, "%B %d, %Y")
2408
        imgs = soup.find("div", id="comic").find_all("img")
2409
        alt = imgs[0]['alt']
2410
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2411
        return {
2412
            'img': [i['src'] for i in imgs],
2413
            'title': title,
2414
            'alt': alt,
2415
            'author': author,
2416
            'day': day.day,
2417
            'month': day.month,
2418
            'year': day.year
2419
        }
2420
2421
2422 View Code Duplication
class EveryDayBlues(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2423
    """Class to retrieve EveryDayBlues Comics."""
2424
    name = "blues"
2425
    long_name = "Every Day Blues"
2426
    url = "http://everydayblues.net"
2427
    get_first_comic_link = get_a_navi_navifirst
2428
    get_navi_link = get_link_rel_next
2429
2430
    @classmethod
2431
    def get_comic_info(cls, soup, link):
2432
        """Get information about a particular comics."""
2433
        title = soup.find("h2", class_="post-title").string
2434
        author = soup.find("span", class_="post-author").find("a").string
2435
        date_str = soup.find("span", class_="post-date").string
2436
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2437
        imgs = soup.find("div", id="comic").find_all("img")
2438
        assert all(i['alt'] == i['title'] == title for i in imgs)
2439
        assert len(imgs) <= 1
2440
        return {
2441
            'img': [i['src'] for i in imgs],
2442
            'title': title,
2443
            'author': author,
2444
            'day': day.day,
2445
            'month': day.month,
2446
            'year': day.year
2447
        }
2448
2449
2450
class BiterComics(GenericNavigableComic):
2451
    """Class to retrieve Biter Comics."""
2452
    name = "biter"
2453
    long_name = "Biter Comics"
2454
    url = "http://www.bitercomics.com"
2455
    get_first_comic_link = get_a_navi_navifirst
2456
    get_navi_link = get_link_rel_next
2457
2458
    @classmethod
2459
    def get_comic_info(cls, soup, link):
2460
        """Get information about a particular comics."""
2461
        title = soup.find("h1", class_="entry-title").string
2462
        author = soup.find("span", class_="author vcard").find("a").string
2463
        date_str = soup.find("span", class_="entry-date").string
2464
        day = string_to_date(date_str, "%B %d, %Y")
2465
        imgs = soup.find("div", id="comic").find_all("img")
2466
        assert all(i['alt'] == i['title'] for i in imgs)
2467
        assert len(imgs) == 1
2468
        alt = imgs[0]['alt']
2469
        return {
2470
            'img': [i['src'] for i in imgs],
2471
            'title': title,
2472
            'alt': alt,
2473
            'author': author,
2474
            'day': day.day,
2475
            'month': day.month,
2476
            'year': day.year
2477
        }
2478
2479
2480 View Code Duplication
class TheAwkwardYeti(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2481
    """Class to retrieve The Awkward Yeti comics."""
2482
    # Also on http://www.gocomics.com/the-awkward-yeti
2483
    # Also on http://larstheyeti.tumblr.com
2484
    # Also on https://tapastic.com/series/TheAwkwardYeti
2485
    name = 'yeti'
2486
    long_name = 'The Awkward Yeti'
2487
    url = 'http://theawkwardyeti.com'
2488
    _categories = ('YETI', )
2489
    get_first_comic_link = get_a_navi_navifirst
2490
    get_navi_link = get_link_rel_next
2491
2492
    @classmethod
2493
    def get_comic_info(cls, soup, link):
2494
        """Get information about a particular comics."""
2495
        title = soup.find('h2', class_='post-title').string
2496
        date_str = soup.find("span", class_="post-date").string
2497
        day = string_to_date(date_str, "%B %d, %Y")
2498
        imgs = soup.find("div", id="comic").find_all("img")
2499
        assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
2500
        return {
2501
            'img': [i['src'] for i in imgs],
2502
            'title': title,
2503
            'day': day.day,
2504
            'month': day.month,
2505
            'year': day.year
2506
        }
2507
2508
2509
class PleasantThoughts(GenericNavigableComic):
2510
    """Class to retrieve Pleasant Thoughts comics."""
2511
    name = 'pleasant'
2512
    long_name = 'Pleasant Thoughts'
2513
    url = 'http://pleasant-thoughts.com'
2514
    get_first_comic_link = get_a_navi_navifirst
2515
    get_navi_link = get_link_rel_next
2516
2517
    @classmethod
2518
    def get_comic_info(cls, soup, link):
2519
        """Get information about a particular comics."""
2520
        post = soup.find('div', class_='post-content')
2521
        title = post.find('h2', class_='post-title').string
2522
        imgs = post.find("div", class_="entry").find_all("img")
2523
        return {
2524
            'title': title,
2525
            'img': [i['src'] for i in imgs],
2526
        }
2527
2528
2529
class MisterAndMe(GenericNavigableComic):
2530
    """Class to retrieve Mister & Me Comics."""
2531
    # Also on http://www.gocomics.com/mister-and-me
2532
    # Also on https://tapastic.com/series/Mister-and-Me
2533
    name = 'mister'
2534
    long_name = 'Mister & Me'
2535
    url = 'http://www.mister-and-me.com'
2536
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2537
    get_navi_link = get_link_rel_next
2538
2539
    @classmethod
2540
    def get_comic_info(cls, soup, link):
2541
        """Get information about a particular comics."""
2542
        title = soup.find('h2', class_='post-title').string
2543
        author = soup.find("span", class_="post-author").find("a").string
2544
        date_str = soup.find("span", class_="post-date").string
2545
        day = string_to_date(date_str, "%B %d, %Y")
2546
        imgs = soup.find("div", id="comic").find_all("img")
2547
        assert all(i['alt'] == i['title'] for i in imgs)
2548
        assert len(imgs) <= 1
2549
        alt = imgs[0]['alt'] if imgs else ""
2550
        return {
2551
            'img': [i['src'] for i in imgs],
2552
            'title': title,
2553
            'alt': alt,
2554
            'author': author,
2555
            'day': day.day,
2556
            'month': day.month,
2557
            'year': day.year
2558
        }
2559
2560
2561
class LastPlaceComics(GenericNavigableComic):
2562
    """Class to retrieve Last Place Comics."""
2563
    name = 'lastplace'
2564
    long_name = 'Last Place Comics'
2565
    url = "http://lastplacecomics.com"
2566
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2567
    get_navi_link = get_link_rel_next
2568
2569
    @classmethod
2570
    def get_comic_info(cls, soup, link):
2571
        """Get information about a particular comics."""
2572
        title = soup.find('h2', class_='post-title').string
2573
        author = soup.find("span", class_="post-author").find("a").string
2574
        date_str = soup.find("span", class_="post-date").string
2575
        day = string_to_date(date_str, "%B %d, %Y")
2576
        imgs = soup.find("div", id="comic").find_all("img")
2577
        assert all(i['alt'] == i['title'] for i in imgs)
2578
        assert len(imgs) <= 1
2579
        alt = imgs[0]['alt'] if imgs else ""
2580
        return {
2581
            'img': [i['src'] for i in imgs],
2582
            'title': title,
2583
            'alt': alt,
2584
            'author': author,
2585
            'day': day.day,
2586
            'month': day.month,
2587
            'year': day.year
2588
        }
2589
2590
2591
class TalesOfAbsurdity(GenericNavigableComic):
2592
    """Class to retrieve Tales Of Absurdity comics."""
2593
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
2594
    # Also on http://talesofabsurdity.tumblr.com
2595
    name = 'absurdity'
2596
    long_name = 'Tales of Absurdity'
2597
    url = 'http://talesofabsurdity.com'
2598
    _categories = ('ABSURDITY', )
2599
    get_first_comic_link = get_a_navi_navifirst
2600
    get_navi_link = get_a_navi_comicnavnext_navinext
2601
2602
    @classmethod
2603
    def get_comic_info(cls, soup, link):
2604
        """Get information about a particular comics."""
2605
        title = soup.find('h2', class_='post-title').string
2606
        author = soup.find("span", class_="post-author").find("a").string
2607
        date_str = soup.find("span", class_="post-date").string
2608
        day = string_to_date(date_str, "%B %d, %Y")
2609
        imgs = soup.find("div", id="comic").find_all("img")
2610
        assert all(i['alt'] == i['title'] for i in imgs)
2611
        alt = imgs[0]['alt'] if imgs else ""
2612
        return {
2613
            'img': [i['src'] for i in imgs],
2614
            'title': title,
2615
            'alt': alt,
2616
            'author': author,
2617
            'day': day.day,
2618
            'month': day.month,
2619
            'year': day.year
2620
        }
2621
2622
2623
class EndlessOrigami(GenericEmptyComic, GenericNavigableComic):
2624
    """Class to retrieve Endless Origami Comics."""
2625
    name = "origami"
2626
    long_name = "Endless Origami"
2627
    url = "http://endlessorigami.com"
2628
    get_first_comic_link = get_a_navi_navifirst
2629
    get_navi_link = get_link_rel_next
2630
2631
    @classmethod
2632
    def get_comic_info(cls, soup, link):
2633
        """Get information about a particular comics."""
2634
        title = soup.find('h2', class_='post-title').string
2635
        author = soup.find("span", class_="post-author").find("a").string
2636
        date_str = soup.find("span", class_="post-date").string
2637
        day = string_to_date(date_str, "%B %d, %Y")
2638
        imgs = soup.find("div", id="comic").find_all("img")
2639
        assert all(i['alt'] == i['title'] for i in imgs)
2640
        alt = imgs[0]['alt'] if imgs else ""
2641
        return {
2642
            'img': [i['src'] for i in imgs],
2643
            'title': title,
2644
            'alt': alt,
2645
            'author': author,
2646
            'day': day.day,
2647
            'month': day.month,
2648
            'year': day.year
2649
        }
2650
2651
2652
class PlanC(GenericNavigableComic):
2653
    """Class to retrieve Plan C comics."""
2654
    name = 'planc'
2655
    long_name = 'Plan C'
2656
    url = 'http://www.plancomic.com'
2657
    get_first_comic_link = get_a_navi_navifirst
2658
    get_navi_link = get_a_navi_comicnavnext_navinext
2659
2660
    @classmethod
2661
    def get_comic_info(cls, soup, link):
2662
        """Get information about a particular comics."""
2663
        title = soup.find('h2', class_='post-title').string
2664
        date_str = soup.find("span", class_="post-date").string
2665
        day = string_to_date(date_str, "%B %d, %Y")
2666
        imgs = soup.find('div', id='comic').find_all('img')
2667
        return {
2668
            'title': title,
2669
            'img': [i['src'] for i in imgs],
2670
            'month': day.month,
2671
            'year': day.year,
2672
            'day': day.day,
2673
        }
2674
2675
2676
class BuniComic(GenericNavigableComic):
2677
    """Class to retrieve Buni Comics."""
2678
    name = 'buni'
2679
    long_name = 'BuniComics'
2680
    url = 'http://www.bunicomic.com'
2681 View Code Duplication
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2682
    get_navi_link = get_link_rel_next
2683
2684
    @classmethod
2685
    def get_comic_info(cls, soup, link):
2686
        """Get information about a particular comics."""
2687
        imgs = soup.find('div', id='comic').find_all('img')
2688
        assert all(i['alt'] == i['title'] for i in imgs)
2689
        assert len(imgs) == 1
2690
        return {
2691
            'img': [i['src'] for i in imgs],
2692
            'title': imgs[0]['title'],
2693
        }
2694
2695
2696
class GenericCommitStrip(GenericNavigableComic):
2697
    """Generic class to retrieve Commit Strips in different languages."""
2698
    get_navi_link = get_a_rel_next
2699
    get_first_comic_link = simulate_first_link
2700
    first_url = NotImplemented
2701
2702
    @classmethod
2703
    def get_comic_info(cls, soup, link):
2704
        """Get information about a particular comics."""
2705
        desc = soup.find('meta', property='og:description')['content']
2706
        title = soup.find('meta', property='og:title')['content']
2707
        imgs = soup.find('div', class_='entry-content').find_all('img')
2708
        title2 = ' '.join(i.get('title', '') for i in imgs)
2709
        return {
2710
            'title': title,
2711
            'title2': title2,
2712
            'description': desc,
2713
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2714
        }
2715
2716
2717
class CommitStripFr(GenericCommitStrip):
2718
    """Class to retrieve Commit Strips in French."""
2719
    name = 'commit_fr'
2720
    long_name = 'Commit Strip (Fr)'
2721
    url = 'http://www.commitstrip.com/fr'
2722
    _categories = ('FRANCAIS', )
2723
    first_url = 'http://www.commitstrip.com/fr/2012/02/22/interview/'
2724
2725
2726
class CommitStripEn(GenericCommitStrip):
2727
    """Class to retrieve Commit Strips in English."""
2728
    name = 'commit_en'
2729
    long_name = 'Commit Strip (En)'
2730
    url = 'http://www.commitstrip.com/en'
2731
    first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'
2732
2733
2734 View Code Duplication
class GenericBoumerie(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2735
    """Generic class to retrieve Boumeries comics in different languages."""
2736
    get_first_comic_link = get_a_navi_navifirst
2737
    get_navi_link = get_link_rel_next
2738
    date_format = NotImplemented
2739
    lang = NotImplemented
2740
2741
    @classmethod
2742
    def get_comic_info(cls, soup, link):
2743
        """Get information about a particular comics."""
2744
        title = soup.find('h2', class_='post-title').string
2745
        short_url = soup.find('link', rel='shortlink')['href']
2746
        author = soup.find("span", class_="post-author").find("a").string
2747
        date_str = soup.find('span', class_='post-date').string
2748
        day = string_to_date(date_str, cls.date_format, cls.lang)
2749
        imgs = soup.find('div', id='comic').find_all('img')
2750
        assert all(i['alt'] == i['title'] for i in imgs)
2751
        return {
2752
            'short_url': short_url,
2753
            'img': [i['src'] for i in imgs],
2754
            'title': title,
2755
            'author': author,
2756
            'month': day.month,
2757
            'year': day.year,
2758
            'day': day.day,
2759
        }
2760
2761
2762
class BoumerieEn(GenericBoumerie):
2763
    """Class to retrieve Boumeries comics in English."""
2764
    name = 'boumeries_en'
2765
    long_name = 'Boumeries (En)'
2766
    url = 'http://comics.boumerie.com'
2767
    date_format = "%B %d, %Y"
2768
    lang = 'en_GB.UTF-8'
2769
2770
2771
class BoumerieFr(GenericBoumerie):
2772
    """Class to retrieve Boumeries comics in French."""
2773
    name = 'boumeries_fr'
2774
    long_name = 'Boumeries (Fr)'
2775
    url = 'http://bd.boumerie.com'
2776
    _categories = ('FRANCAIS', )
2777
    date_format = "%A, %d %B %Y"
2778
    lang = "fr_FR.utf8"
2779
2780
2781 View Code Duplication
class UnearthedComics(GenericNavigableComic):
2782
    """Class to retrieve Unearthed comics."""
2783
    # Also on http://tapastic.com/series/UnearthedComics
2784
    # Also on http://unearthedcomics.tumblr.com
2785
    name = 'unearthed'
2786
    long_name = 'Unearthed Comics'
2787
    url = 'http://unearthedcomics.com'
2788
    _categories = ('UNEARTHED', )
2789
    get_navi_link = get_link_rel_next
2790
    get_first_comic_link = simulate_first_link
2791
    first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'
2792
2793
    @classmethod
2794
    def get_comic_info(cls, soup, link):
2795
        """Get information about a particular comics."""
2796
        short_url = soup.find('link', rel='shortlink')['href']
2797
        title_elt = soup.find('h1') or soup.find('h2')
2798
        title = title_elt.string if title_elt else ""
2799
        desc = soup.find('meta', property='og:description')
2800
        date_str = soup.find('time', class_='published updated hidden')['datetime']
2801
        day = string_to_date(date_str, "%Y-%m-%d")
2802
        post = soup.find('div', class_="entry content entry-content type-portfolio")
2803
        imgs = post.find_all('img')
2804
        return {
2805
            'title': title,
2806
            'description': desc,
2807
            'url2': short_url,
2808
            'img': [i['src'] for i in imgs],
2809
            'month': day.month,
2810
            'year': day.year,
2811
            'day': day.day,
2812
        }
2813
2814
2815
class Optipess(GenericNavigableComic):
2816
    """Class to retrieve Optipess comics."""
2817
    name = 'optipess'
2818
    long_name = 'Optipess'
2819
    url = 'http://www.optipess.com'
2820
    get_first_comic_link = get_a_navi_navifirst
2821
    get_navi_link = get_link_rel_next
2822
2823
    @classmethod
2824
    def get_comic_info(cls, soup, link):
2825
        """Get information about a particular comics."""
2826
        title = soup.find('h2', class_='post-title').string
2827
        author = soup.find("span", class_="post-author").find("a").string
2828
        comic = soup.find('div', id='comic')
2829
        imgs = comic.find_all('img') if comic else []
2830
        alt = imgs[0]['title'] if imgs else ""
2831
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2832
        date_str = soup.find('span', class_='post-date').string
2833
        day = string_to_date(date_str, "%B %d, %Y")
2834
        return {
2835
            'title': title,
2836
            'alt': alt,
2837
            'author': author,
2838
            'img': [i['src'] for i in imgs],
2839
            'month': day.month,
2840
            'year': day.year,
2841
            'day': day.day,
2842
        }
2843
2844
2845
class PainTrainComic(GenericNavigableComic):
2846
    """Class to retrieve Pain Train Comics."""
2847
    name = 'paintrain'
2848
    long_name = 'Pain Train Comics'
2849
    url = 'http://paintraincomic.com'
2850
    get_first_comic_link = get_a_navi_navifirst
2851
    get_navi_link = get_link_rel_next
2852
2853
    @classmethod
2854
    def get_comic_info(cls, soup, link):
2855
        """Get information about a particular comics."""
2856
        title = soup.find('h2', class_='post-title').string
2857
        short_url = soup.find('link', rel='shortlink')['href']
2858
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2859
        num = int(short_url_re.match(short_url).groups()[0])
2860
        imgs = soup.find('div', id='comic').find_all('img')
2861
        alt = imgs[0]['title']
2862
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2863
        date_str = soup.find('span', class_='post-date').string
2864
        day = string_to_date(date_str, "%d/%m/%Y")
2865
        return {
2866
            'short_url': short_url,
2867
            'num': num,
2868
            'img': [i['src'] for i in imgs],
2869 View Code Duplication
            'month': day.month,
2870
            'year': day.year,
2871
            'day': day.day,
2872
            'alt': alt,
2873
            'title': title,
2874
        }
2875
2876
2877
class MoonBeard(GenericNavigableComic):
2878
    """Class to retrieve MoonBeard comics."""
2879
    # Also on http://blog.squiresjam.es/moonbeard
2880
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
2881
    name = 'moonbeard'
2882
    long_name = 'Moon Beard'
2883
    url = 'http://moonbeard.com'
2884
    get_first_comic_link = get_a_navi_navifirst
2885
    get_navi_link = get_a_navi_navinext
2886
2887
    @classmethod
2888
    def get_comic_info(cls, soup, link):
2889
        """Get information about a particular comics."""
2890
        title = soup.find('h2', class_='post-title').string
2891
        short_url = soup.find('link', rel='shortlink')['href']
2892
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2893
        num = int(short_url_re.match(short_url).groups()[0])
2894
        imgs = soup.find('div', id='comic').find_all('img')
2895
        alt = imgs[0]['title']
2896
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2897
        date_str = soup.find('span', class_='post-date').string
2898
        day = string_to_date(date_str, "%B %d, %Y")
2899
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2900
        author = soup.find('span', class_='post-author').string
2901
        return {
2902
            'short_url': short_url,
2903
            'num': num,
2904
            'img': [i['src'] for i in imgs],
2905
            'month': day.month,
2906
            'year': day.year,
2907
            'day': day.day,
2908
            'title': title,
2909
            'tags': tags,
2910
            'alt': alt,
2911
            'author': author,
2912
        }
2913
2914
2915
class AHamADay(GenericNavigableComic):
2916
    """Class to retrieve class A Ham A Day comics."""
2917
    name = 'ham'
2918
    long_name = 'A Ham A Day'
2919
    url = 'http://www.ahammaday.com'
2920
    get_url_from_link = join_cls_url_to_href
2921
    get_first_comic_link = simulate_first_link
2922
    first_url = 'http://www.ahammaday.com/today/3/6/french'
2923
2924
    @classmethod
2925
    def get_navi_link(cls, last_soup, next_):
2926
        """Get link to next or previous comic."""
2927
        # prev is next / next is prev
2928
        return last_soup.find('li', class_='previous' if next_ else 'next').find('a')
2929
2930
    @classmethod
2931
    def get_comic_info(cls, soup, link):
2932
        """Get information about a particular comics."""
2933
        date_str = soup.find('time', class_='published')['datetime']
2934
        day = string_to_date(date_str, "%Y-%m-%d")
2935
        author = soup.find('span', class_='blog-author').find('a').string
2936
        title = soup.find('meta', property='og:title')['content']
2937
        imgs = soup.find_all('meta', itemprop='image')
2938
        return {
2939
            'img': [i['content'] for i in imgs],
2940
            'title': title,
2941
            'author': author,
2942
            'day': day.day,
2943
            'month': day.month,
2944
            'year': day.year,
2945
        }
2946
2947
2948
class LittleLifeLines(GenericNavigableComic):
2949
    """Class to retrieve Little Life Lines comics."""
2950
    # Also on https://little-life-lines.tumblr.com
2951
    name = 'life'
2952
    long_name = 'Little Life Lines'
2953
    url = 'http://www.littlelifelines.com'
2954
    get_url_from_link = join_cls_url_to_href
2955
    get_first_comic_link = simulate_first_link
2956
    first_url = 'http://www.littlelifelines.com/comics/well-done'
2957
2958
    @classmethod
2959
    def get_navi_link(cls, last_soup, next_):
2960
        """Get link to next or previous comic."""
2961
        # prev is next / next is prev
2962
        li = last_soup.find('li', class_='prev' if next_ else 'next')
2963
        return li.find('a') if li else None
2964
2965
    @classmethod
2966
    def get_comic_info(cls, soup, link):
2967
        """Get information about a particular comics."""
2968
        title = soup.find('meta', property='og:title')['content']
2969
        desc = soup.find('meta', property='og:description')['content']
2970
        date_str = soup.find('time', class_='published')['datetime']
2971
        day = string_to_date(date_str, "%Y-%m-%d")
2972
        author = soup.find('a', rel='author').string
2973
        div_content = soup.find('div', class_="body entry-content")
2974
        imgs = div_content.find_all('img')
2975
        imgs = [i for i in imgs if i.get('src') is not None]
2976
        alt = imgs[0]['alt']
2977
        return {
2978
            'title': title,
2979
            'alt': alt,
2980
            'description': desc,
2981
            'author': author,
2982
            'day': day.day,
2983
            'month': day.month,
2984
            'year': day.year,
2985
            'img': [i['src'] for i in imgs],
2986
        }
2987
2988
2989
class GenericWordPressInkblot(GenericNavigableComic):
2990
    """Generic class to retrieve comics using WordPress with Inkblot."""
2991
    get_navi_link = get_link_rel_next
2992
2993
    @classmethod
2994
    def get_first_comic_link(cls):
2995
        """Get link to first comics."""
2996
        return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')
2997
2998
    @classmethod
2999
    def get_comic_info(cls, soup, link):
3000
        """Get information about a particular comics."""
3001
        title = soup.find('meta', property='og:title')['content']
3002
        imgs = soup.find('div', class_='webcomic-image').find_all('img')
3003
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3004
        day = string_to_date(date_str, "%Y-%m-%d")
3005
        return {
3006
            'title': title,
3007
            'day': day.day,
3008
            'month': day.month,
3009
            'year': day.year,
3010
            'img': [i['src'] for i in imgs],
3011
        }
3012
3013
3014
class EverythingsStupid(GenericWordPressInkblot):
3015
    """Class to retrieve Everything's stupid Comics."""
3016
    # Also on http://tapastic.com/series/EverythingsStupid
3017
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
3018
    # Also on http://everythingsstupidcomics.tumblr.com
3019
    name = 'stupid'
3020
    long_name = "Everything's Stupid"
3021
    url = 'http://everythingsstupid.net'
3022
3023
3024
class TheIsmComics(GenericWordPressInkblot):
3025
    """Class to retrieve The Ism Comics."""
3026
    # Also on https://tapastic.com/series/TheIsm (?)
3027
    name = 'theism'
3028
    long_name = "The Ism"
3029
    url = 'http://www.theism-comics.com'
3030
3031
3032
class WoodenPlankStudios(GenericWordPressInkblot):
3033
    """Class to retrieve Wooden Plank Studios comics."""
3034
    name = 'woodenplank'
3035
    long_name = 'Wooden Plank Studios'
3036
    url = 'http://woodenplankstudios.com'
3037
3038
3039
class ElectricBunnyComic(GenericNavigableComic):
3040
    """Class to retrieve Electric Bunny Comics."""
3041
    # Also on http://electricbunnycomics.tumblr.com
3042
    name = 'bunny'
3043
    long_name = 'Electric Bunny Comic'
3044
    url = 'http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell'
3045
    get_url_from_link = join_cls_url_to_href
3046
3047
    @classmethod
3048
    def get_first_comic_link(cls):
3049
        """Get link to first comics."""
3050
        return get_soup_at_url(cls.url).find('img', alt='First').parent
3051
3052
    @classmethod
3053
    def get_navi_link(cls, last_soup, next_):
3054
        """Get link to next or previous comic."""
3055
        img = last_soup.find('img', alt='Next' if next_ else 'Back')
3056
        return img.parent if img else None
3057
3058
    @classmethod
3059
    def get_comic_info(cls, soup, link):
3060
        """Get information about a particular comics."""
3061
        title = soup.find('meta', property='og:title')['content']
3062
        imgs = soup.find_all('meta', property='og:image')
3063
        return {
3064
            'title': title,
3065
            'img': [i['content'] for i in imgs],
3066
        }
3067
3068
3069
class SheldonComics(GenericNavigableComic):
3070
    """Class to retrieve Sheldon comics."""
3071
    # Also on http://www.gocomics.com/sheldon
3072
    name = 'sheldon'
3073
    long_name = 'Sheldon Comics'
3074
    url = 'http://www.sheldoncomics.com'
3075
3076
    @classmethod
3077
    def get_first_comic_link(cls):
3078
        """Get link to first comics."""
3079
        return get_soup_at_url(cls.url).find("a", id="nav-first")
3080
3081
    @classmethod
3082
    def get_navi_link(cls, last_soup, next_):
3083
        """Get link to next or previous comic."""
3084
        for link in last_soup.find_all("a", id="nav-next" if next_ else "nav-prev"):
3085
            if link['href'] != 'http://www.sheldoncomics.com':
3086
                return link
3087
        return None
3088
3089
    @classmethod
3090
    def get_comic_info(cls, soup, link):
3091
        """Get information about a particular comics."""
3092
        imgs = soup.find("div", id="comic-foot").find_all("img")
3093
        assert all(i['alt'] == i['title'] for i in imgs)
3094
        assert len(imgs) == 1
3095
        title = imgs[0]['title']
3096
        return {
3097
            'title': title,
3098
            'img': [i['src'] for i in imgs],
3099
        }
3100
3101
3102
class Ubertool(GenericNavigableComic):
3103
    """Class to retrieve Ubertool comics."""
3104
    # Also on http://ubertool.tumblr.com
3105
    # Also on https://tapastic.com/series/ubertool
3106
    name = 'ubertool'
3107
    long_name = 'Ubertool'
3108
    url = 'http://ubertoolcomic.com'
3109
    _categories = ('UBERTOOL', )
3110
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
3111
    get_navi_link = get_a_comicnavbase_comicnavnext
3112
3113
    @classmethod
3114
    def get_comic_info(cls, soup, link):
3115
        """Get information about a particular comics."""
3116
        title = soup.find('h2', class_='post-title').string
3117
        date_str = soup.find('span', class_='post-date').string
3118
        day = string_to_date(date_str, "%B %d, %Y")
3119
        imgs = soup.find('div', id='comic').find_all('img')
3120
        return {
3121
            'img': [i['src'] for i in imgs],
3122
            'title': title,
3123
            'month': day.month,
3124
            'year': day.year,
3125
            'day': day.day,
3126
        }
3127
3128
3129
class EarthExplodes(GenericNavigableComic):
3130
    """Class to retrieve The Earth Explodes comics."""
3131
    name = 'earthexplodes'
3132
    long_name = 'The Earth Explodes'
3133
    url = 'http://www.earthexplodes.com'
3134 View Code Duplication
    get_url_from_link = join_cls_url_to_href
3135
    get_first_comic_link = simulate_first_link
3136
    first_url = 'http://www.earthexplodes.com/comics/000/'
3137
3138
    @classmethod
3139
    def get_navi_link(cls, last_soup, next_):
3140
        """Get link to next or previous comic."""
3141
        return last_soup.find('a', id='next' if next_ else 'prev')
3142
3143
    @classmethod
3144
    def get_comic_info(cls, soup, link):
3145
        """Get information about a particular comics."""
3146
        title = soup.find('title').string
3147
        imgs = soup.find('div', id='image').find_all('img')
3148
        alt = imgs[0].get('title', '')
3149
        return {
3150
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3151
            'title': title,
3152
            'alt': alt,
3153
        }
3154
3155
3156
class PomComics(GenericNavigableComic):
3157
    """Class to retrieve PomComics."""
3158
    name = 'pom'
3159
    long_name = 'Pom Comics / Piece of Me'
3160
    url = 'http://www.pomcomic.com'
3161 View Code Duplication
    get_url_from_link = join_cls_url_to_href
3162
3163
    @classmethod
3164
    def get_first_comic_link(cls):
3165
        """Get link to first comics."""
3166
        return get_soup_at_url(cls.url).find('a', class_='btn_first')
3167
3168
    @classmethod
3169
    def get_navi_link(cls, last_soup, next_):
3170
        """Get link to next or previous comic."""
3171
        return last_soup.find('a', class_='btn_next' if next_ else 'btn_prev')
3172
3173
    @classmethod
3174
    def get_comic_info(cls, soup, link):
3175
        """Get information about a particular comics."""
3176
        title = soup.find('h1', id="comic-name").string
3177
        desc = soup.find('meta', property='og:description')['content']
3178
        tags = soup.find('meta', attrs={'name': 'keywords'})['content']
3179
        imgs = soup.find('div', class_='comic').find_all('img')
3180
        return {
3181
            'title': title,
3182
            'desc': desc,
3183
            'tags': tags,
3184
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3185
        }
3186
3187
3188
class CubeDrone(GenericNavigableComic):
3189
    """Class to retrieve Cube Drone comics."""
3190
    name = 'cubedrone'
3191
    long_name = 'Cube Drone'
3192
    url = 'http://cube-drone.com/comics'
3193
    get_url_from_link = join_cls_url_to_href
3194
3195
    @classmethod
3196
    def get_first_comic_link(cls):
3197
        """Get link to first comics."""
3198
        return get_soup_at_url(cls.url).find('span', class_='glyphicon glyphicon-backward').parent
3199
3200
    @classmethod
3201
    def get_navi_link(cls, last_soup, next_):
3202
        """Get link to next or previous comic."""
3203
        class_ = 'glyphicon glyphicon-chevron-' + ('right' if next_ else 'left')
3204
        return last_soup.find('span', class_=class_).parent
3205
3206
    @classmethod
3207
    def get_comic_info(cls, soup, link):
3208
        """Get information about a particular comics."""
3209
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
3210
        url2 = soup.find('meta', attrs={'name': 'twitter:url'})['content']
3211
        # date_str = soup.find('h2', class_='comic_title').find('small').string
3212
        # day = string_to_date(date_str, "%B %d, %Y, %I:%M %p")
3213
        imgs = soup.find_all('img', class_='comic img-responsive')
3214
        title2 = imgs[0]['title']
3215
        alt = imgs[0]['alt']
3216
        return {
3217
            'url2': url2,
3218
            'title': title,
3219
            'title2': title2,
3220
            'alt': alt,
3221
            'img': [i['src'] for i in imgs],
3222
        }
3223
3224
3225
class MakeItStoopid(GenericNavigableComic):
3226
    """Class to retrieve Make It Stoopid Comics."""
3227
    name = 'stoopid'
3228
    long_name = 'Make it stoopid'
3229
    url = 'http://makeitstoopid.com/comic.php'
3230
3231
    @classmethod
3232
    def get_nav(cls, soup):
3233
        """Get the navigation elements from soup object."""
3234
        cnav = soup.find_all(class_='cnav')
3235
        nav1, nav2 = cnav[:5], cnav[5:]
3236
        assert nav1 == nav2
3237
        # begin, prev, archive, next_, end = nav1
3238
        return [None if i.get('href') is None else i for i in nav1]
3239
3240
    @classmethod
3241
    def get_first_comic_link(cls):
3242
        """Get link to first comics."""
3243
        return cls.get_nav(get_soup_at_url(cls.url))[0]
3244
3245
    @classmethod
3246
    def get_navi_link(cls, last_soup, next_):
3247
        """Get link to next or previous comic."""
3248
        return cls.get_nav(last_soup)[3 if next_ else 1]
3249
3250
    @classmethod
3251
    def get_comic_info(cls, soup, link):
3252
        """Get information about a particular comics."""
3253
        title = link['title']
3254
        imgs = soup.find_all('img', id='comicimg')
3255
        return {
3256
            'title': title,
3257
            'img': [i['src'] for i in imgs],
3258
        }
3259
3260
3261
class MarketoonistComics(GenericNavigableComic):
3262
    """Class to retrieve Marketoonist Comics."""
3263
    name = 'marketoonist'
3264
    long_name = 'Marketoonist'
3265
    url = 'https://marketoonist.com/cartoons'
3266
    get_first_comic_link = simulate_first_link
3267
    get_navi_link = get_link_rel_next
3268
    first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'
3269
3270
    @classmethod
3271
    def get_comic_info(cls, soup, link):
3272
        """Get information about a particular comics."""
3273
        imgs = soup.find_all('meta', property='og:image')
3274
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3275
        day = string_to_date(date_str, "%Y-%m-%d")
3276
        title = soup.find('meta', property='og:title')['content']
3277
        return {
3278
            'img': [i['content'] for i in imgs],
3279
            'day': day.day,
3280
            'month': day.month,
3281
            'year': day.year,
3282
            'title': title,
3283
        }
3284
3285
3286
class ConsoliaComics(GenericNavigableComic):
3287
    """Class to retrieve Consolia comics."""
3288
    name = 'consolia'
3289
    long_name = 'consolia'
3290
    url = 'https://consolia-comic.com'
3291 View Code Duplication
    get_url_from_link = join_cls_url_to_href
3292
3293
    @classmethod
3294
    def get_first_comic_link(cls):
3295
        """Get link to first comics."""
3296
        return get_soup_at_url(cls.url).find('a', class_='first')
3297
3298
    @classmethod
3299
    def get_navi_link(cls, last_soup, next_):
3300
        """Get link to next or previous comic."""
3301
        return last_soup.find('a', class_='next' if next_ else 'prev')
3302
3303
    @classmethod
3304
    def get_comic_info(cls, soup, link):
3305
        """Get information about a particular comics."""
3306
        title = soup.find('meta', property='og:title')['content']
3307
        date_str = soup.find('time')["datetime"]
3308
        day = string_to_date(date_str, "%Y-%m-%d")
3309
        imgs = soup.find_all('meta', property='og:image')
3310
        return {
3311
            'title': title,
3312
            'img': [i['content'] for i in imgs],
3313
            'day': day.day,
3314
            'month': day.month,
3315
            'year': day.year,
3316
        }
3317
3318
3319
class TuMourrasMoinsBete(GenericNavigableComic):
3320
    """Class to retrieve Tu Mourras Moins Bete comics."""
3321
    name = 'mourrasmoinsbete'
3322
    long_name = 'Tu Mourras Moins Bete'
3323
    url = 'http://tumourrasmoinsbete.blogspot.fr'
3324 View Code Duplication
    _categories = ('FRANCAIS', )
3325
    get_first_comic_link = simulate_first_link
3326
    first_url = 'http://tumourrasmoinsbete.blogspot.fr/2008/06/essai.html'
3327
3328
    @classmethod
3329
    def get_navi_link(cls, last_soup, next_):
3330
        """Get link to next or previous comic."""
3331
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3332
3333
    @classmethod
3334
    def get_comic_info(cls, soup, link):
3335
        """Get information about a particular comics."""
3336
        title = soup.find('title').string
3337
        imgs = soup.find('div', itemprop='description articleBody').find_all('img')
3338
        author = soup.find('span', itemprop='author').string
3339
        return {
3340
            'img': [i['src'] for i in imgs],
3341
            'author': author,
3342
            'title': title,
3343
        }
3344
3345
3346
class GeekAndPoke(GenericNavigableComic):
3347
    """Class to retrieve Geek And Poke comics."""
3348
    name = 'geek'
3349
    long_name = 'Geek And Poke'
3350
    url = 'http://geek-and-poke.com'
3351
    get_url_from_link = join_cls_url_to_href
3352
    get_first_comic_link = simulate_first_link
3353
    first_url = 'http://geek-and-poke.com/geekandpoke/2006/8/27/a-new-place-for-a-not-so-old-blog.html'
3354
3355
    @classmethod
3356
    def get_navi_link(cls, last_soup, next_):
3357
        """Get link to next or previous comic."""
3358
        return last_soup.find('a', class_='prev-item' if next_ else 'next-item')
3359
3360
    @classmethod
3361
    def get_comic_info(cls, soup, link):
3362
        """Get information about a particular comics."""
3363
        title = soup.find('meta', property='og:title')['content']
3364
        desc = soup.find('meta', property='og:description')['content']
3365
        date_str = soup.find('time', class_='published')['datetime']
3366
        day = string_to_date(date_str, "%Y-%m-%d")
3367
        author = soup.find('a', rel='author').string
3368
        div_content = (soup.find('div', class_="body entry-content") or
3369
                       soup.find('div', class_="special-content"))
3370
        imgs = div_content.find_all('img')
3371
        imgs = [i for i in imgs if i.get('src') is not None]
3372
        assert all('title' not in i or i['alt'] == i['title'] for i in imgs)
3373
        alt = imgs[0].get('alt', "") if imgs else []
3374
        return {
3375
            'title': title,
3376
            'alt': alt,
3377
            'description': desc,
3378
            'author': author,
3379
            'day': day.day,
3380
            'month': day.month,
3381
            'year': day.year,
3382
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3383
        }
3384
3385
3386
class GloryOwlComix(GenericNavigableComic):
3387
    """Class to retrieve Glory Owl comics."""
3388
    name = 'gloryowl'
3389
    long_name = 'Glory Owl'
3390
    url = 'http://gloryowlcomix.blogspot.fr'
3391 View Code Duplication
    _categories = ('NSFW', 'FRANCAIS')
3392
    get_first_comic_link = simulate_first_link
3393
    first_url = 'http://gloryowlcomix.blogspot.fr/2013/02/1_7.html'
3394
3395
    @classmethod
3396
    def get_navi_link(cls, last_soup, next_):
3397
        """Get link to next or previous comic."""
3398
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3399
3400
    @classmethod
3401
    def get_comic_info(cls, soup, link):
3402
        """Get information about a particular comics."""
3403
        title = soup.find('title').string
3404
        imgs = soup.find_all('link', rel='image_src')
3405
        author = soup.find('a', rel='author').string
3406
        return {
3407
            'img': [i['href'] for i in imgs],
3408
            'author': author,
3409
            'title': title,
3410
        }
3411
3412
3413
class GenericTumblrV1(GenericComic):
3414
    """Generic class to retrieve comics from Tumblr using the V1 API."""
3415
    _categories = ('TUMBLR', )
3416
3417
    @classmethod
3418
    def get_next_comic(cls, last_comic):
3419
        """Generic implementation of get_next_comic for Tumblr comics."""
3420
        for p in cls.get_posts(last_comic):
3421
            comic = cls.get_comic_info(p)
3422
            if comic is not None:
3423
                yield comic
3424
3425
    @classmethod
3426
    def get_url_from_post(cls, post):
3427
        return post['url']
3428
3429
    @classmethod
3430
    def get_api_url(cls):
3431
        return urljoin_wrapper(cls.url, '/api/read/')
3432
3433
    @classmethod
3434
    def get_comic_info(cls, post):
3435
        """Get information about a particular comics."""
3436
        type_ = post['type']
3437
        if type_ != 'photo':
3438
            return None
3439
        tumblr_id = int(post['id'])
3440
        api_url = cls.get_api_url() + '?id=%d' % (tumblr_id)
3441
        day = datetime.datetime.fromtimestamp(int(post['unix-timestamp'])).date()
3442
        caption = post.find('photo-caption')
3443
        title = caption.string if caption else ""
3444
        tags = ' '.join(t.string for t in post.find_all('tag'))
3445
        # Photos may appear in 'photo' tags and/or straight in the post
3446
        photo_tags = post.find_all('photo')
3447
        if not photo_tags:
3448
            photo_tags = [post]
3449
        # Images are in multiple resolutions - taking the first one
3450
        imgs = [photo.find('photo-url') for photo in photo_tags]
3451
        return {
3452
            'url': cls.get_url_from_post(post),
3453
            'url2': post['url-with-slug'],
3454
            'day': day.day,
3455
            'month': day.month,
3456
            'year': day.year,
3457
            'title': title,
3458
            'tags': tags,
3459
            'img': [i.string for i in imgs],
3460
            'tumblr-id': tumblr_id,
3461
            'api_url': api_url,
3462
        }
3463
3464
    @classmethod
3465
    def get_posts(cls, last_comic, nb_post_per_call=10):
3466
        """Get posts using API. nb_post_per_call is max 50.
3467
3468
        Posts are retrieved from newer to older as per the tumblr v1 api
3469
        but are returned in chronological order."""
3470
        waiting_for_url = last_comic['url'] if last_comic else None
3471
        posts_acc = []
3472
        if last_comic is not None:
3473
            # Sometimes, tumblr posts are deleted. When previous post is deleted, we
3474
            # might end up spending a lot of time looking for something that
3475
            # doesn't exist. Failing early and clearly might be a better option.
3476
            last_api_url = last_comic['api_url']
3477
            try:
3478
                get_soup_at_url(last_api_url)
3479
            except urllib.error.HTTPError:
3480
                try:
3481
                    get_soup_at_url(cls.url)
3482
                except urllib.error.HTTPError:
3483
                    print("Did not find previous post nor main url %s" % cls.url)
3484
                else:
3485
                    print("Did not find previous post %s : it might have been deleted" % last_api_url)
3486
                return reversed(posts_acc)
3487
        api_url = cls.get_api_url()
3488
        posts = get_soup_at_url(api_url).find('posts')
3489
        start, total = int(posts['start']), int(posts['total'])
3490
        assert start == 0
3491
        for starting_num in range(0, total, nb_post_per_call):
3492
            api_url2 = api_url + '?start=%d&num=%d' % (starting_num, nb_post_per_call)
3493
            posts2 = get_soup_at_url(api_url2).find('posts')
3494
            start2, total2 = int(posts2['start']), int(posts2['total'])
3495
            assert starting_num == start2, "%d != %d" % (starting_num, start2)
3496
            # This may happen and should be handled in the future
3497
            assert total == total2, "%d != %d" % (total, total2)
3498
            for p in posts2.find_all('post'):
3499
                if waiting_for_url and waiting_for_url == cls.get_url_from_post(p):
3500
                    return reversed(posts_acc)
3501
                posts_acc.append(p)
3502
        if waiting_for_url is None:
3503
            return reversed(posts_acc)
3504
        print("Did not find %s : there might be a problem" % waiting_for_url)
3505
        return []
3506
3507
3508
class SaturdayMorningBreakfastCerealTumblr(GenericTumblrV1):
3509
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
3510
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
3511
    # Also on http://www.smbc-comics.com
3512
    name = 'smbc-tumblr'
3513
    long_name = 'Saturday Morning Breakfast Cereal (from Tumblr)'
3514
    url = 'http://smbc-comics.tumblr.com'
3515
    _categories = ('SMBC', )
3516
3517
3518
class IrwinCardozo(GenericTumblrV1):
3519
    """Class to retrieve Irwin Cardozo Comics."""
3520
    name = 'irwinc'
3521
    long_name = 'Irwin Cardozo'
3522
    url = 'http://irwincardozocomics.tumblr.com'
3523
3524
3525
class AccordingToDevin(GenericTumblrV1):
3526
    """Class to retrieve According To Devin comics."""
3527
    name = 'devin'
3528
    long_name = 'According To Devin'
3529
    url = 'http://accordingtodevin.tumblr.com'
3530
3531
3532
class ItsTheTieTumblr(GenericTumblrV1):
3533
    """Class to retrieve It's the tie comics."""
3534
    # Also on http://itsthetie.com
3535
    # Also on https://tapastic.com/series/itsthetie
3536
    name = 'tie-tumblr'
3537
    long_name = "It's the tie (from Tumblr)"
3538
    url = "http://itsthetie.tumblr.com"
3539
    _categories = ('TIE', )
3540
3541
3542
class OctopunsTumblr(GenericTumblrV1):
3543
    """Class to retrieve Octopuns comics."""
3544
    # Also on http://www.octopuns.net
3545
    name = 'octopuns-tumblr'
3546
    long_name = 'Octopuns (from Tumblr)'
3547
    url = 'http://octopuns.tumblr.com'
3548
3549
3550
class PicturesInBoxesTumblr(GenericTumblrV1):
3551
    """Class to retrieve Pictures In Boxes comics."""
3552
    # Also on http://www.picturesinboxes.com
3553
    name = 'picturesinboxes-tumblr'
3554
    long_name = 'Pictures in Boxes (from Tumblr)'
3555
    url = 'http://picturesinboxescomic.tumblr.com'
3556
3557
3558
class TubeyToonsTumblr(GenericTumblrV1):
3559
    """Class to retrieve TubeyToons comics."""
3560
    # Also on http://tapastic.com/series/Tubey-Toons
3561
    # Also on http://tubeytoons.com
3562
    name = 'tubeytoons-tumblr'
3563
    long_name = 'Tubey Toons (from Tumblr)'
3564
    url = 'http://tubeytoons.tumblr.com'
3565
    _categories = ('TUNEYTOONS', )
3566
3567
3568
class UnearthedComicsTumblr(GenericTumblrV1):
3569
    """Class to retrieve Unearthed comics."""
3570
    # Also on http://tapastic.com/series/UnearthedComics
3571
    # Also on http://unearthedcomics.com
3572
    name = 'unearthed-tumblr'
3573
    long_name = 'Unearthed Comics (from Tumblr)'
3574
    url = 'http://unearthedcomics.tumblr.com'
3575
    _categories = ('UNEARTHED', )
3576
3577
3578
class PieComic(GenericTumblrV1):
3579
    """Class to retrieve Pie Comic comics."""
3580
    name = 'pie'
3581
    long_name = 'Pie Comic'
3582
    url = "http://piecomic.tumblr.com"
3583
3584
3585
class MrEthanDiamond(GenericTumblrV1):
3586
    """Class to retrieve Mr Ethan Diamond comics."""
3587
    name = 'diamond'
3588
    long_name = 'Mr Ethan Diamond'
3589
    url = 'http://mrethandiamond.tumblr.com'
3590
3591
3592
class Flocci(GenericTumblrV1):
3593
    """Class to retrieve floccinaucinihilipilification comics."""
3594
    name = 'flocci'
3595
    long_name = 'floccinaucinihilipilification'
3596
    url = "http://floccinaucinihilipilificationa.tumblr.com"
3597
3598
3599
class UpAndOut(GenericTumblrV1):
3600
    """Class to retrieve Up & Out comics."""
3601
    # Also on http://tapastic.com/series/UP-and-OUT
3602
    name = 'upandout'
3603
    long_name = 'Up And Out (from Tumblr)'
3604
    url = 'http://upandoutcomic.tumblr.com'
3605
3606
3607
class Pundemonium(GenericTumblrV1):
3608
    """Class to retrieve Pundemonium comics."""
3609
    name = 'pundemonium'
3610
    long_name = 'Pundemonium'
3611
    url = 'http://monstika.tumblr.com'
3612
3613
3614
class PoorlyDrawnLinesTumblr(GenericTumblrV1):
3615
    """Class to retrieve Poorly Drawn Lines comics."""
3616
    # Also on http://poorlydrawnlines.com
3617
    name = 'poorlydrawn-tumblr'
3618
    long_name = 'Poorly Drawn Lines (from Tumblr)'
3619
    url = 'http://pdlcomics.tumblr.com'
3620
    _categories = ('POORLYDRAWN', )
3621
3622
3623
class PearShapedComics(GenericTumblrV1):
3624
    """Class to retrieve Pear Shaped Comics."""
3625
    name = 'pearshaped'
3626
    long_name = 'Pear-Shaped Comics'
3627
    url = 'http://pearshapedcomics.com'
3628
3629
3630
class PondScumComics(GenericTumblrV1):
3631
    """Class to retrieve Pond Scum Comics."""
3632
    name = 'pond'
3633
    long_name = 'Pond Scum'
3634
    url = 'http://pondscumcomic.tumblr.com'
3635
3636
3637
class MercworksTumblr(GenericTumblrV1):
3638
    """Class to retrieve Mercworks comics."""
3639
    # Also on http://mercworks.net
3640
    name = 'mercworks-tumblr'
3641
    long_name = 'Mercworks (from Tumblr)'
3642
    url = 'http://mercworks.tumblr.com'
3643
3644
3645
class OwlTurdTumblr(GenericTumblrV1):
3646
    """Class to retrieve Owl Turd comics."""
3647
    # Also on http://tapastic.com/series/Owl-Turd-Comix
3648
    name = 'owlturd-tumblr'
3649
    long_name = 'Owl Turd (from Tumblr)'
3650
    url = 'http://owlturd.com'
3651
    _categories = ('OWLTURD', )
3652
3653
3654
class VectorBelly(GenericTumblrV1):
3655
    """Class to retrieve Vector Belly comics."""
3656
    # Also on http://vectorbelly.com
3657
    name = 'vector'
3658
    long_name = 'Vector Belly'
3659
    url = 'http://vectorbelly.tumblr.com'
3660
3661
3662
class GoneIntoRapture(GenericTumblrV1):
3663
    """Class to retrieve Gone Into Rapture comics."""
3664
    # Also on http://goneintorapture.tumblr.com
3665
    # Also on http://tapastic.com/series/Goneintorapture
3666
    name = 'rapture'
3667
    long_name = 'Gone Into Rapture'
3668
    url = 'http://www.goneintorapture.com'
3669
3670
3671
class TheOatmealTumblr(GenericTumblrV1):
3672
    """Class to retrieve The Oatmeal comics."""
3673
    # Also on http://theoatmeal.com
3674
    name = 'oatmeal-tumblr'
3675
    long_name = 'The Oatmeal (from Tumblr)'
3676
    url = 'http://oatmeal.tumblr.com'
3677
3678
3679
class HeckIfIKnowComicsTumblr(GenericTumblrV1):
3680
    """Class to retrieve Heck If I Know Comics."""
3681
    # Also on http://tapastic.com/series/Regular
3682
    name = 'heck-tumblr'
3683
    long_name = 'Heck if I Know comics (from Tumblr)'
3684
    url = 'http://heckifiknowcomics.com'
3685
3686
3687
class MyJetPack(GenericTumblrV1):
3688
    """Class to retrieve My Jet Pack comics."""
3689
    name = 'jetpack'
3690
    long_name = 'My Jet Pack'
3691
    url = 'http://myjetpack.tumblr.com'
3692
3693
3694
class CheerUpEmoKidTumblr(GenericTumblrV1):
3695
    """Class to retrieve CheerUpEmoKid comics."""
3696
    # Also on http://www.cheerupemokid.com
3697
    # Also on http://tapastic.com/series/CUEK
3698
    name = 'cuek-tumblr'
3699
    long_name = 'Cheer Up Emo Kid (from Tumblr)'
3700
    url = 'http://enzocomics.tumblr.com'
3701
3702
3703
class ForLackOfABetterComic(GenericTumblrV1):
3704
    """Class to retrieve For Lack Of A Better Comics."""
3705
    # Also on http://forlackofabettercomic.com
3706
    name = 'lack'
3707
    long_name = 'For Lack Of A Better Comic'
3708
    url = 'http://forlackofabettercomic.tumblr.com'
3709
3710
3711
class ZenPencilsTumblr(GenericTumblrV1):
3712
    """Class to retrieve ZenPencils comics."""
3713
    # Also on http://zenpencils.com
3714
    # Also on http://www.gocomics.com/zen-pencils
3715
    name = 'zenpencils-tumblr'
3716
    long_name = 'Zen Pencils (from Tumblr)'
3717
    url = 'http://zenpencils.tumblr.com'
3718
    _categories = ('ZENPENCILS', )
3719
3720
3721
class ThreeWordPhraseTumblr(GenericTumblrV1):
3722
    """Class to retrieve Three Word Phrase comics."""
3723
    # Also on http://threewordphrase.com
3724
    name = 'threeword-tumblr'
3725
    long_name = 'Three Word Phrase (from Tumblr)'
3726
    url = 'http://www.threewordphrase.tumblr.com'
3727
3728
3729
class TimeTrabbleTumblr(GenericTumblrV1):
3730
    """Class to retrieve Time Trabble comics."""
3731
    # Also on http://timetrabble.com
3732
    name = 'timetrabble-tumblr'
3733
    long_name = 'Time Trabble (from Tumblr)'
3734
    url = 'http://timetrabble.tumblr.com'
3735
3736
3737
class SafelyEndangeredTumblr(GenericTumblrV1):
3738
    """Class to retrieve Safely Endangered comics."""
3739
    # Also on http://www.safelyendangered.com
3740
    name = 'endangered-tumblr'
3741
    long_name = 'Safely Endangered (from Tumblr)'
3742
    url = 'http://tumblr.safelyendangered.com'
3743
3744
3745
class MouseBearComedyTumblr(GenericTumblrV1):
3746
    """Class to retrieve Mouse Bear Comedy comics."""
3747
    # Also on http://www.mousebearcomedy.com
3748
    name = 'mousebear-tumblr'
3749
    long_name = 'Mouse Bear Comedy (from Tumblr)'
3750
    url = 'http://mousebearcomedy.tumblr.com'
3751
3752
3753
class BouletCorpTumblr(GenericTumblrV1):
3754
    """Class to retrieve BouletCorp comics."""
3755
    # Also on http://www.bouletcorp.com
3756
    name = 'boulet-tumblr'
3757
    long_name = 'Boulet Corp (from Tumblr)'
3758
    url = 'http://bouletcorp.tumblr.com'
3759
    _categories = ('BOULET', )
3760
3761
3762
class TheAwkwardYetiTumblr(GenericTumblrV1):
3763
    """Class to retrieve The Awkward Yeti comics."""
3764
    # Also on http://www.gocomics.com/the-awkward-yeti
3765
    # Also on http://theawkwardyeti.com
3766
    # Also on https://tapastic.com/series/TheAwkwardYeti
3767
    name = 'yeti-tumblr'
3768
    long_name = 'The Awkward Yeti (from Tumblr)'
3769
    url = 'http://larstheyeti.tumblr.com'
3770
    _categories = ('YETI', )
3771
3772
3773
class NellucNhoj(GenericTumblrV1):
3774
    """Class to retrieve NellucNhoj comics."""
3775
    name = 'nhoj'
3776
    long_name = 'Nelluc Nhoj'
3777
    url = 'http://nellucnhoj.com'
3778
3779
3780
class DownTheUpwardSpiralTumblr(GenericTumblrV1):
3781
    """Class to retrieve Down The Upward Spiral comics."""
3782
    # Also on http://www.downtheupwardspiral.com
3783
    name = 'spiral-tumblr'
3784
    long_name = 'Down the Upward Spiral (from Tumblr)'
3785
    url = 'http://downtheupwardspiral.tumblr.com'
3786
3787
3788
class AsPerUsualTumblr(GenericTumblrV1):
3789
    """Class to retrieve As Per Usual comics."""
3790
    # Also on https://tapastic.com/series/AsPerUsual
3791
    name = 'usual-tumblr'
3792
    long_name = 'As Per Usual (from Tumblr)'
3793
    url = 'http://as-per-usual.tumblr.com'
3794
    categories = ('DAMILEE', )
3795
3796
3797
class HotComicsForCoolPeopleTumblr(GenericTumblrV1):
3798
    """Class to retrieve Hot Comics For Cool People."""
3799
    # Also on https://tapastic.com/series/Hot-Comics-For-Cool-People
3800
    # Also on http://hotcomics.biz (links to tumblr)
3801
    # Also on http://hcfcp.com (links to tumblr)
3802
    name = 'hotcomics-tumblr'
3803
    long_name = 'Hot Comics For Cool People (from Tumblr)'
3804
    url = 'http://hotcomicsforcoolpeople.tumblr.com'
3805
    categories = ('DAMILEE', )
3806
3807
3808
class OneOneOneOneComicTumblr(GenericTumblrV1):
3809
    """Class to retrieve 1111 Comics."""
3810
    # Also on http://www.1111comics.me
3811
    # Also on https://tapastic.com/series/1111-Comics
3812
    name = '1111-tumblr'
3813
    long_name = '1111 Comics (from Tumblr)'
3814
    url = 'http://comics1111.tumblr.com'
3815
    _categories = ('ONEONEONEONE', )
3816
3817
3818
class JhallComicsTumblr(GenericTumblrV1):
3819
    """Class to retrieve Jhall Comics."""
3820
    # Also on http://jhallcomics.com
3821
    name = 'jhall-tumblr'
3822
    long_name = 'Jhall Comics (from Tumblr)'
3823
    url = 'http://jhallcomics.tumblr.com'
3824
3825
3826
class BerkeleyMewsTumblr(GenericTumblrV1):
3827
    """Class to retrieve Berkeley Mews comics."""
3828
    # Also on http://www.gocomics.com/berkeley-mews
3829
    # Also on http://www.berkeleymews.com
3830
    name = 'berkeley-tumblr'
3831
    long_name = 'Berkeley Mews (from Tumblr)'
3832
    url = 'http://mews.tumblr.com'
3833
    _categories = ('BERKELEY', )
3834
3835
3836
class JoanCornellaTumblr(GenericTumblrV1):
3837
    """Class to retrieve Joan Cornella comics."""
3838
    # Also on http://joancornella.net
3839
    name = 'cornella-tumblr'
3840
    long_name = 'Joan Cornella (from Tumblr)'
3841
    url = 'http://cornellajoan.tumblr.com'
3842
3843
3844
class RespawnComicTumblr(GenericTumblrV1):
3845
    """Class to retrieve Respawn Comic."""
3846
    # Also on http://respawncomic.com
3847
    name = 'respawn-tumblr'
3848
    long_name = 'Respawn Comic (from Tumblr)'
3849
    url = 'http://respawncomic.tumblr.com'
3850
3851
3852
class ChrisHallbeckTumblr(GenericTumblrV1):
3853
    """Class to retrieve Chris Hallbeck comics."""
3854
    # Also on https://tapastic.com/ChrisHallbeck
3855
    # Also on http://maximumble.com
3856
    # Also on http://minimumble.com
3857
    # Also on http://thebookofbiff.com
3858
    name = 'hallbeck-tumblr'
3859
    long_name = 'Chris Hallback (from Tumblr)'
3860
    url = 'http://chrishallbeck.tumblr.com'
3861
    _categories = ('HALLBACK', )
3862
3863
3864
class ComicNuggets(GenericTumblrV1):
3865
    """Class to retrieve Comic Nuggets."""
3866
    name = 'nuggets'
3867
    long_name = 'Comic Nuggets'
3868
    url = 'http://comicnuggets.com'
3869
3870
3871
class PigeonGazetteTumblr(GenericTumblrV1):
3872
    """Class to retrieve The Pigeon Gazette comics."""
3873
    # Also on https://tapastic.com/series/The-Pigeon-Gazette
3874
    name = 'pigeon-tumblr'
3875
    long_name = 'The Pigeon Gazette (from Tumblr)'
3876
    url = 'http://thepigeongazette.tumblr.com'
3877
3878
3879
class CancerOwl(GenericTumblrV1):
3880
    """Class to retrieve Cancer Owl comics."""
3881
    # Also on http://cancerowl.com
3882
    name = 'cancerowl-tumblr'
3883
    long_name = 'Cancer Owl (from Tumblr)'
3884
    url = 'http://cancerowl.tumblr.com'
3885
3886
3887
class FowlLanguageTumblr(GenericTumblrV1):
3888
    """Class to retrieve Fowl Language comics."""
3889
    # Also on http://www.fowllanguagecomics.com
3890
    # Also on http://tapastic.com/series/Fowl-Language-Comics
3891
    # Also on http://www.gocomics.com/fowl-language
3892
    name = 'fowllanguage-tumblr'
3893
    long_name = 'Fowl Language Comics (from Tumblr)'
3894
    url = 'http://fowllanguagecomics.tumblr.com'
3895
    _categories = ('FOWLLANGUAGE', )
3896
3897
3898
class TheOdd1sOutTumblr(GenericTumblrV1):
3899
    """Class to retrieve The Odd 1s Out comics."""
3900
    # Also on http://theodd1sout.com
3901
    # Also on https://tapastic.com/series/Theodd1sout
3902
    name = 'theodd-tumblr'
3903
    long_name = 'The Odd 1s Out (from Tumblr)'
3904
    url = 'http://theodd1sout.tumblr.com'
3905
3906
3907
class TheUnderfoldTumblr(GenericTumblrV1):
3908
    """Class to retrieve The Underfold comics."""
3909
    # Also on http://theunderfold.com
3910
    name = 'underfold-tumblr'
3911
    long_name = 'The Underfold (from Tumblr)'
3912
    url = 'http://theunderfold.tumblr.com'
3913
3914
3915
class LolNeinTumblr(GenericTumblrV1):
3916
    """Class to retrieve Lol Nein comics."""
3917
    # Also on http://lolnein.com
3918
    name = 'lolnein-tumblr'
3919
    long_name = 'Lol Nein (from Tumblr)'
3920
    url = 'http://lolneincom.tumblr.com'
3921
3922
3923
class FatAwesomeComicsTumblr(GenericTumblrV1):
3924
    """Class to retrieve Fat Awesome Comics."""
3925
    # Also on http://fatawesome.com/comics
3926
    name = 'fatawesome-tumblr'
3927
    long_name = 'Fat Awesome (from Tumblr)'
3928
    url = 'http://fatawesomecomedy.tumblr.com'
3929
3930
3931
class TheWorldIsFlatTumblr(GenericTumblrV1):
3932
    """Class to retrieve The World Is Flat Comics."""
3933
    # Also on https://tapastic.com/series/The-World-is-Flat
3934
    name = 'flatworld-tumblr'
3935
    long_name = 'The World Is Flat (from Tumblr)'
3936
    url = 'http://theworldisflatcomics.tumblr.com'
3937
3938
3939
class DorrisMc(GenericTumblrV1):
3940
    """Class to retrieve Dorris Mc Comics"""
3941
    # Also on http://www.gocomics.com/dorris-mccomics
3942
    name = 'dorrismc'
3943
    long_name = 'Dorris Mc'
3944
    url = 'http://dorrismccomics.com'
3945
3946
3947
class LeleozTumblr(GenericEmptyComic, GenericTumblrV1):
3948
    """Class to retrieve Leleoz comics."""
3949
    # Also on https://tapastic.com/series/Leleoz
3950
    name = 'leleoz-tumblr'
3951
    long_name = 'Leleoz (from Tumblr)'
3952
    url = 'http://leleozcomics.tumblr.com'
3953
3954
3955
class MoonBeardTumblr(GenericTumblrV1):
3956
    """Class to retrieve MoonBeard comics."""
3957
    # Also on http://moonbeard.com
3958
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
3959
    name = 'moonbeard-tumblr'
3960
    long_name = 'Moon Beard (from Tumblr)'
3961
    url = 'http://blog.squiresjam.es/moonbeard'
3962
3963
3964
class AComik(GenericTumblrV1):
3965
    """Class to retrieve A Comik"""
3966
    name = 'comik'
3967
    long_name = 'A Comik'
3968
    url = 'http://acomik.com'
3969
3970
3971
class ClassicRandy(GenericTumblrV1):
3972
    """Class to retrieve Classic Randy comics."""
3973
    name = 'randy'
3974
    long_name = 'Classic Randy'
3975
    url = 'http://classicrandy.tumblr.com'
3976
3977
3978
class DagssonTumblr(GenericTumblrV1):
3979
    """Class to retrieve Dagsson comics."""
3980
    # Also on http://www.dagsson.com
3981
    name = 'dagsson-tumblr'
3982
    long_name = 'Dagsson Hugleikur (from Tumblr)'
3983
    url = 'http://hugleikurdagsson.tumblr.com'
3984
3985
3986
class LinsEditionsTumblr(GenericTumblrV1):
3987
    """Class to retrieve L.I.N.S. Editions comics."""
3988
    # Also on https://linsedition.com
3989
    # Now on http://warandpeas.tumblr.com
3990
    name = 'lins-tumblr'
3991
    long_name = 'L.I.N.S. Editions (from Tumblr)'
3992
    url = 'http://linscomics.tumblr.com'
3993
    _categories = ('LINS', )
3994
3995
3996
class WarAndPeasTumblr(GenericTumblrV1):
3997
    """Class to retrieve War And Peas comics."""
3998
    # Was on http://linscomics.tumblr.com
3999
    name = 'warandpeas-tumblr'
4000
    long_name = 'War And Peas (from Tumblr)'
4001
    url = 'http://warandpeas.tumblr.com'
4002
    _categories = ('WARANDPEAS', )
4003
4004
4005
class OrigamiHotDish(GenericTumblrV1):
4006
    """Class to retrieve Origami Hot Dish comics."""
4007
    name = 'origamihotdish'
4008
    long_name = 'Origami Hot Dish'
4009
    url = 'http://origamihotdish.com'
4010
4011
4012
class HitAndMissComicsTumblr(GenericTumblrV1):
4013
    """Class to retrieve Hit and Miss Comics."""
4014
    name = 'hitandmiss'
4015
    long_name = 'Hit and Miss Comics'
4016
    url = 'http://hitandmisscomics.tumblr.com'
4017
4018
4019
class HMBlanc(GenericTumblrV1):
4020
    """Class to retrieve HM Blanc comics."""
4021
    name = 'hmblanc'
4022
    long_name = 'HM Blanc'
4023
    url = 'http://hmblanc.tumblr.com'
4024
4025
4026
class TalesOfAbsurdityTumblr(GenericTumblrV1):
4027
    """Class to retrieve Tales Of Absurdity comics."""
4028
    # Also on http://talesofabsurdity.com
4029
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
4030
    name = 'absurdity-tumblr'
4031
    long_name = 'Tales of Absurdity (from Tumblr)'
4032
    url = 'http://talesofabsurdity.tumblr.com'
4033
    _categories = ('ABSURDITY', )
4034
4035
4036
class RobbieAndBobby(GenericTumblrV1):
4037
    """Class to retrieve Robbie And Bobby comics."""
4038
    # Also on http://robbieandbobby.com
4039
    name = 'robbie-tumblr'
4040
    long_name = 'Robbie And Bobby (from Tumblr)'
4041
    url = 'http://robbieandbobby.tumblr.com'
4042
4043
4044
class ElectricBunnyComicTumblr(GenericTumblrV1):
4045
    """Class to retrieve Electric Bunny Comics."""
4046
    # Also on http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell
4047
    name = 'bunny-tumblr'
4048
    long_name = 'Electric Bunny Comic (from Tumblr)'
4049
    url = 'http://electricbunnycomics.tumblr.com'
4050
4051
4052
class Hoomph(GenericTumblrV1):
4053
    """Class to retrieve Hoomph comics."""
4054
    name = 'hoomph'
4055
    long_name = 'Hoomph'
4056
    url = 'http://hoom.ph'
4057
4058
4059
class BFGFSTumblr(GenericTumblrV1):
4060
    """Class to retrieve BFGFS comics."""
4061
    # Also on https://tapastic.com/series/BFGFS
4062
    # Also on http://bfgfs.com
4063
    name = 'bfgfs-tumblr'
4064
    long_name = 'BFGFS (from Tumblr)'
4065
    url = 'http://bfgfs.tumblr.com'
4066
4067
4068
class DoodleForFood(GenericTumblrV1):
4069
    """Class to retrieve Doodle For Food comics."""
4070
    # Also on http://doodleforfood.com
4071
    name = 'doodle'
4072
    long_name = 'Doodle For Food'
4073
    url = 'http://doodleforfood.com'
4074
4075
4076
class CassandraCalinTumblr(GenericTumblrV1):
4077
    """Class to retrieve C. Cassandra comics."""
4078
    # Also on http://cassandracalin.com
4079
    # Also on https://tapastic.com/series/C-Cassandra-comics
4080
    name = 'cassandra-tumblr'
4081
    long_name = 'Cassandra Calin (from Tumblr)'
4082
    url = 'http://c-cassandra.tumblr.com'
4083
4084
4085
class DougWasTaken(GenericTumblrV1):
4086
    """Class to retrieve Doug Was Taken comics."""
4087
    name = 'doug'
4088
    long_name = 'Doug Was Taken'
4089
    url = 'http://dougwastaken.tumblr.com'
4090
4091
4092
class MandatoryRollerCoaster(GenericTumblrV1):
4093
    """Class to retrieve Mandatory Roller Coaster comics."""
4094
    name = 'rollercoaster'
4095
    long_name = 'Mandatory Roller Coaster'
4096
    url = 'http://mandatoryrollercoaster.com'
4097
4098
4099
class CEstPasEnRegardantSesPompes(GenericTumblrV1):
4100
    """Class to retrieve C'Est Pas En Regardant Ses Pompes (...)  comics."""
4101
    name = 'cperspqccltt'
4102
    long_name = 'C Est Pas En Regardant Ses Pompes (...)'
4103
    url = 'http://cperspqccltt.tumblr.com'
4104
4105
4106
class TheGrohlTroll(GenericTumblrV1):
4107
    """Class to retrieve The Grohl Troll comics."""
4108
    name = 'grohltroll'
4109
    long_name = 'The Grohl Troll'
4110
    url = 'http://thegrohltroll.com'
4111
4112
4113
class WebcomicName(GenericTumblrV1):
4114
    """Class to retrieve Webcomic Name comics."""
4115
    name = 'webcomicname'
4116
    long_name = 'Webcomic Name'
4117
    url = 'http://webcomicname.com'
4118
4119
4120
class BooksOfAdam(GenericTumblrV1):
4121
    """Class to retrieve Books of Adam comics."""
4122
    # Also on http://www.booksofadam.com
4123
    name = 'booksofadam'
4124
    long_name = 'Books of Adam'
4125
    url = 'http://booksofadam.tumblr.com'
4126
4127
4128
class HarkAVagrant(GenericTumblrV1):
4129
    """Class to retrieve Hark A Vagrant comics."""
4130
    # Also on http://www.harkavagrant.com
4131
    name = 'hark-tumblr'
4132
    long_name = 'Hark A Vagrant (from Tumblr)'
4133
    url = 'http://beatonna.tumblr.com'
4134
4135
4136
class OurSuperAdventureTumblr(GenericTumblrV1):
4137
    """Class to retrieve Our Super Adventure comics."""
4138
    # Also on https://tapastic.com/series/Our-Super-Adventure
4139
    # Also on http://www.oursuperadventure.com
4140
    # http://sarahgraley.com
4141
    name = 'superadventure-tumblr'
4142
    long_name = 'Our Super Adventure (from Tumblr)'
4143
    url = 'http://sarahssketchbook.tumblr.com'
4144
4145
4146
class JakeLikesOnions(GenericTumblrV1):
4147
    """Class to retrieve Jake Likes Onions comics."""
4148
    name = 'jake'
4149
    long_name = 'Jake Likes Onions'
4150
    url = 'http://jakelikesonions.com'
4151
4152
4153
class InYourFaceCake(GenericTumblrV1):
4154
    """Class to retrieve In Your Face Cake comics."""
4155
    name = 'inyourfacecake-tumblr'
4156
    long_name = 'In Your Face Cake (from Tumblr)'
4157
    url = 'http://in-your-face-cake.tumblr.com'
4158
4159
4160
class Robospunk(GenericTumblrV1):
4161
    """Class to retrieve Robospunk comics."""
4162
    name = 'robospunk'
4163
    long_name = 'Robospunk'
4164
    url = 'http://robospunk.com'
4165
4166
4167
class BananaTwinky(GenericTumblrV1):
4168
    """Class to retrieve Banana Twinky comics."""
4169
    name = 'banana'
4170
    long_name = 'Banana Twinky'
4171
    url = 'http://bananatwinky.tumblr.com'
4172
4173
4174
class YesterdaysPopcornTumblr(GenericTumblrV1):
4175
    """Class to retrieve Yesterday's Popcorn comics."""
4176
    # Also on http://www.yesterdayspopcorn.com
4177
    # Also on https://tapastic.com/series/Yesterdays-Popcorn
4178
    name = 'popcorn-tumblr'
4179
    long_name = 'Yesterday\'s Popcorn (from Tumblr)'
4180
    url = 'http://yesterdayspopcorn.tumblr.com'
4181
4182
4183
class TwistedDoodles(GenericTumblrV1):
4184
    """Class to retrieve Twisted Doodles comics."""
4185
    name = 'twisted'
4186
    long_name = 'Twisted Doodles'
4187
    url = 'http://www.twisteddoodles.com'
4188
4189
4190
class UbertoolTumblr(GenericTumblrV1):
4191
    """Class to retrieve Ubertool comics."""
4192
    # Also on http://ubertoolcomic.com
4193
    # Also on https://tapastic.com/series/ubertool
4194
    name = 'ubertool-tumblr'
4195
    long_name = 'Ubertool (from Tumblr)'
4196
    url = 'http://ubertool.tumblr.com'
4197
    _categories = ('UBERTOOL', )
4198
4199
4200
class LittleLifeLinesTumblr(GenericTumblrV1):
4201
    """Class to retrieve Little Life Lines comics."""
4202
    # Also on http://www.littlelifelines.com
4203
    name = 'life-tumblr'
4204
    long_name = 'Little Life Lines (from Tumblr)'
4205
    url = 'https://little-life-lines.tumblr.com'
4206
4207
4208
class TheyCanTalk(GenericTumblrV1):
4209
    """Class to retrieve They Can Talk comics."""
4210
    name = 'theycantalk'
4211
    long_name = 'They Can Talk'
4212
    url = 'http://theycantalk.com'
4213
4214
4215
class Will5NeverCome(GenericTumblrV1):
4216
    """Class to retrieve Will 5:00 Never Come comics."""
4217
    name = 'will5'
4218
    long_name = 'Will 5:00 Never Come ?'
4219
    url = 'http://will5nevercome.com'
4220
4221
4222
class Sephko(GenericTumblrV1):
4223
    """Class to retrieve Sephko Comics."""
4224
    # Also on http://www.sephko.com
4225
    name = 'sephko'
4226
    long_name = 'Sephko'
4227
    url = 'http://sephko.tumblr.com'
4228
4229
4230
class BlazersAtDawn(GenericTumblrV1):
4231
    """Class to retrieve Blazers At Dawn Comics."""
4232
    name = 'blazers'
4233
    long_name = 'Blazers At Dawn'
4234
    url = 'http://blazersatdawn.tumblr.com'
4235
4236
4237
class ArtByMoga(GenericEmptyComic, GenericTumblrV1):  # Deactivated because it downloads too many things
4238
    """Class to retrieve Art By Moga Comics."""
4239
    name = 'moga'
4240
    long_name = 'Art By Moga'
4241
    url = 'http://artbymoga.tumblr.com'
4242
4243
4244
class VerbalVomitTumblr(GenericTumblrV1):
4245
    """Class to retrieve Verbal Vomit comics."""
4246
    # Also on http://www.verbal-vomit.com
4247
    name = 'vomit-tumblr'
4248
    long_name = 'Verbal Vomit (from Tumblr)'
4249
    url = 'http://verbalvomits.tumblr.com'
4250
4251
4252
class LibraryComic(GenericTumblrV1):
4253
    """Class to retrieve LibraryComic."""
4254
    # Also on http://librarycomic.com
4255
    name = 'library-tumblr'
4256
    long_name = 'LibraryComic (from Tumblr)'
4257
    url = 'http://librarycomic.tumblr.com'
4258
4259
4260
class TizzyStitchBirdTumblr(GenericTumblrV1):
4261
    """Class to retrieve Tizzy Stitch Bird comics."""
4262
    # Also on http://tizzystitchbird.com
4263
    # Also on https://tapastic.com/series/TizzyStitchbird
4264
    # Also on http://www.webtoons.com/en/challenge/tizzy-stitchbird/list?title_no=50082
4265
    name = 'tizzy-tumblr'
4266
    long_name = 'Tizzy Stitch Bird (from Tumblr)'
4267
    url = 'http://tizzystitchbird.tumblr.com'
4268
4269
4270
class VictimsOfCircumsolarTumblr(GenericTumblrV1):
4271
    """Class to retrieve VictimsOfCircumsolar comics."""
4272
    # Also on http://www.victimsofcircumsolar.com
4273
    name = 'circumsolar-tumblr'
4274
    long_name = 'Victims Of Circumsolar (from Tumblr)'
4275
    url = 'http://victimsofcomics.tumblr.com'
4276
4277
4278
class RockPaperCynicTumblr(GenericTumblrV1):
4279
    """Class to retrieve RockPaperCynic comics."""
4280
    # Also on http://www.rockpapercynic.com
4281
    # Also on https://tapastic.com/series/rockpapercynic
4282
    name = 'rpc-tumblr'
4283
    long_name = 'Rock Paper Cynic (from Tumblr)'
4284
    url = 'http://rockpapercynic.tumblr.com'
4285
4286
4287
class CatanaComics(GenericTumblrV1):
4288
    """Class to retrieve Catana comics."""
4289
    name = 'catana'
4290
    long_name = 'Catana'
4291
    url = 'http://www.catanacomics.com'
4292
4293
4294
class OffTheLeashDogTumblr(GenericTumblrV1):
4295
    """Class to retrieve Off The Leash Dog comics."""
4296
    # Also on http://offtheleashdogcartoons.com
4297
    # Also on http://www.rupertfawcettcartoons.com
4298
    name = 'offtheleash-tumblr'
4299
    long_name = 'Off The Leash Dog (from Tumblr)'
4300
    url = 'http://rupertfawcettsdoggyblog.tumblr.com'
4301
    _categories = ('FAWCETT', )
4302
4303
4304
class HorovitzComics(GenericListableComic):
4305
    """Generic class to handle the logic common to the different comics from Horovitz."""
4306
    url = 'http://www.horovitzcomics.com'
4307
    _categories = ('HOROVITZ', )
4308
    img_re = re.compile('.*comics/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
4309
    link_re = NotImplemented
4310
    get_url_from_archive_element = join_cls_url_to_href
4311
4312 View Code Duplication
    @classmethod
4313
    def get_comic_info(cls, soup, link):
4314
        """Get information about a particular comics."""
4315
        href = link['href']
4316
        num = int(cls.link_re.match(href).groups()[0])
4317
        title = link.string
4318
        imgs = soup.find_all('img', id='comic')
4319
        assert len(imgs) == 1
4320
        year, month, day = [int(s)
4321
                            for s in cls.img_re.match(imgs[0]['src']).groups()]
4322
        return {
4323
            'title': title,
4324
            'day': day,
4325
            'month': month,
4326
            'year': year,
4327
            'img': [i['src'] for i in imgs],
4328
            'num': num,
4329
        }
4330
4331
    @classmethod
4332
    def get_archive_elements(cls):
4333
        archive_url = 'http://www.horovitzcomics.com/comics/archive/'
4334
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.link_re))
4335
4336
4337
class HorovitzNew(HorovitzComics):
4338
    """Class to retrieve Horovitz new comics."""
4339
    name = 'horovitznew'
4340
    long_name = 'Horovitz New'
4341
    link_re = re.compile('^/comics/new/([0-9]+)$')
4342
4343
4344
class HorovitzClassic(HorovitzComics):
4345
    """Class to retrieve Horovitz classic comics."""
4346
    name = 'horovitzclassic'
4347
    long_name = 'Horovitz Classic'
4348
    link_re = re.compile('^/comics/classic/([0-9]+)$')
4349
4350
4351
class GenericGoComic(GenericNavigableComic):
4352
    """Generic class to handle the logic common to comics from gocomics.com."""
4353
    _categories = ('GOCOMIC', )
4354
4355
    @classmethod
4356
    def get_first_comic_link(cls):
4357
        """Get link to first comics."""
4358
        return get_soup_at_url(cls.url).find('a', class_='fa btn btn-outline-default btn-circle fa-backward sm ')
4359
4360
    @classmethod
4361
    def get_navi_link(cls, last_soup, next_):
4362
        """Get link to next or previous comic."""
4363
        PREV = 'fa btn btn-outline-default btn-circle fa-caret-left sm '
4364
        NEXT = 'fa btn btn-outline-default btn-circle fa-caret-right sm '
4365
        return last_soup.find('a', class_=NEXT if next_ else PREV)
4366
4367
    @classmethod
4368
    def get_url_from_link(cls, link):
4369
        gocomics = 'http://www.gocomics.com'
4370
        return urljoin_wrapper(gocomics, link['href'])
4371
4372
    @classmethod
4373
    def get_comic_info(cls, soup, link):
4374
        """Get information about a particular comics."""
4375
        date_str = soup.find('meta', property='article:published_time')['content']
4376
        day = string_to_date(date_str, "%Y-%m-%d")
4377
        imgs = soup.find('picture', class_='img-fluid item-comic-image').find_all('img')
4378
        author = soup.find('meta', property='article:author')['content']
4379
        tags = soup.find('meta', property='article:tag')['content']
4380
        return {
4381
            'day': day.day,
4382
            'month': day.month,
4383
            'year': day.year,
4384
            'img': [i['src'] for i in imgs],
4385
            'author': author,
4386
            'tags': tags,
4387
        }
4388
4389
4390
class PearlsBeforeSwine(GenericGoComic):
4391
    """Class to retrieve Pearls Before Swine comics."""
4392
    name = 'pearls'
4393
    long_name = 'Pearls Before Swine'
4394
    url = 'http://www.gocomics.com/pearlsbeforeswine'
4395
4396
4397
class Peanuts(GenericGoComic):
4398
    """Class to retrieve Peanuts comics."""
4399
    name = 'peanuts'
4400
    long_name = 'Peanuts'
4401
    url = 'http://www.gocomics.com/peanuts'
4402
4403
4404
class MattWuerker(GenericGoComic):
4405
    """Class to retrieve Matt Wuerker comics."""
4406
    name = 'wuerker'
4407
    long_name = 'Matt Wuerker'
4408
    url = 'http://www.gocomics.com/mattwuerker'
4409
4410
4411
class TomToles(GenericGoComic):
4412
    """Class to retrieve Tom Toles comics."""
4413
    name = 'toles'
4414
    long_name = 'Tom Toles'
4415
    url = 'http://www.gocomics.com/tomtoles'
4416
4417
4418
class BreakOfDay(GenericGoComic):
4419
    """Class to retrieve Break Of Day comics."""
4420
    name = 'breakofday'
4421
    long_name = 'Break Of Day'
4422
    url = 'http://www.gocomics.com/break-of-day'
4423
4424
4425
class Brevity(GenericGoComic):
4426
    """Class to retrieve Brevity comics."""
4427
    name = 'brevity'
4428
    long_name = 'Brevity'
4429
    url = 'http://www.gocomics.com/brevitypanel'
4430
4431
4432
class MichaelRamirez(GenericGoComic):
4433
    """Class to retrieve Michael Ramirez comics."""
4434
    name = 'ramirez'
4435
    long_name = 'Michael Ramirez'
4436
    url = 'http://www.gocomics.com/michaelramirez'
4437
4438
4439
class MikeLuckovich(GenericGoComic):
4440
    """Class to retrieve Mike Luckovich comics."""
4441
    name = 'luckovich'
4442
    long_name = 'Mike Luckovich'
4443
    url = 'http://www.gocomics.com/mikeluckovich'
4444
4445
4446
class JimBenton(GenericGoComic):
4447
    """Class to retrieve Jim Benton comics."""
4448
    # Also on http://jimbenton.tumblr.com
4449
    name = 'benton'
4450
    long_name = 'Jim Benton'
4451
    url = 'http://www.gocomics.com/jim-benton-cartoons'
4452
4453
4454
class TheArgyleSweater(GenericGoComic):
4455
    """Class to retrieve the Argyle Sweater comics."""
4456
    name = 'argyle'
4457
    long_name = 'Argyle Sweater'
4458
    url = 'http://www.gocomics.com/theargylesweater'
4459
4460
4461
class SunnyStreet(GenericGoComic):
4462
    """Class to retrieve Sunny Street comics."""
4463
    # Also on http://www.sunnystreetcomics.com
4464
    name = 'sunny'
4465
    long_name = 'Sunny Street'
4466
    url = 'http://www.gocomics.com/sunny-street'
4467
4468
4469
class OffTheMark(GenericGoComic):
4470
    """Class to retrieve Off The Mark comics."""
4471
    # Also on https://www.offthemark.com
4472
    name = 'offthemark'
4473
    long_name = 'Off The Mark'
4474
    url = 'http://www.gocomics.com/offthemark'
4475
4476
4477
class WuMo(GenericGoComic):
4478
    """Class to retrieve WuMo comics."""
4479
    # Also on http://wumo.com
4480
    name = 'wumo'
4481
    long_name = 'WuMo'
4482
    url = 'http://www.gocomics.com/wumo'
4483
4484
4485
class LunarBaboon(GenericGoComic):
4486
    """Class to retrieve Lunar Baboon comics."""
4487
    # Also on http://www.lunarbaboon.com
4488
    # Also on https://tapastic.com/series/Lunarbaboon
4489
    name = 'lunarbaboon'
4490
    long_name = 'Lunar Baboon'
4491
    url = 'http://www.gocomics.com/lunarbaboon'
4492
4493
4494
class SandersenGocomic(GenericGoComic):
4495
    """Class to retrieve Sarah Andersen comics."""
4496
    # Also on http://sarahcandersen.com
4497
    # Also on http://tapastic.com/series/Doodle-Time
4498
    name = 'sandersen-goc'
4499
    long_name = 'Sarah Andersen (from GoComics)'
4500
    url = 'http://www.gocomics.com/sarahs-scribbles'
4501
4502
4503
class SaturdayMorningBreakfastCerealGoComic(GenericGoComic):
4504
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
4505
    # Also on http://smbc-comics.tumblr.com
4506
    # Also on http://www.smbc-comics.com
4507
    name = 'smbc-goc'
4508
    long_name = 'Saturday Morning Breakfast Cereal (from GoComics)'
4509
    url = 'http://www.gocomics.com/saturday-morning-breakfast-cereal'
4510
    _categories = ('SMBC', )
4511
4512
4513
class CalvinAndHobbesGoComic(GenericGoComic):
4514
    """Class to retrieve Calvin and Hobbes comics."""
4515
    # From gocomics, not http://marcel-oehler.marcellosendos.ch/comics/ch/
4516
    name = 'calvin-goc'
4517
    long_name = 'Calvin and Hobbes (from GoComics)'
4518
    url = 'http://www.gocomics.com/calvinandhobbes'
4519
4520
4521
class RallGoComic(GenericGoComic):
4522
    """Class to retrieve Ted Rall comics."""
4523
    # Also on http://rall.com/comic
4524
    name = 'rall-goc'
4525
    long_name = "Ted Rall (from GoComics)"
4526
    url = "http://www.gocomics.com/ted-rall"
4527
    _categories = ('RALL', )
4528
4529
4530
class TheAwkwardYetiGoComic(GenericGoComic):
4531
    """Class to retrieve The Awkward Yeti comics."""
4532
    # Also on http://larstheyeti.tumblr.com
4533
    # Also on http://theawkwardyeti.com
4534
    # Also on https://tapastic.com/series/TheAwkwardYeti
4535
    name = 'yeti-goc'
4536
    long_name = 'The Awkward Yeti (from GoComics)'
4537
    url = 'http://www.gocomics.com/the-awkward-yeti'
4538
    _categories = ('YETI', )
4539
4540
4541
class BerkeleyMewsGoComics(GenericGoComic):
4542
    """Class to retrieve Berkeley Mews comics."""
4543
    # Also on http://mews.tumblr.com
4544
    # Also on http://www.berkeleymews.com
4545
    name = 'berkeley-goc'
4546
    long_name = 'Berkeley Mews (from GoComics)'
4547
    url = 'http://www.gocomics.com/berkeley-mews'
4548
    _categories = ('BERKELEY', )
4549
4550
4551
class SheldonGoComics(GenericGoComic):
4552
    """Class to retrieve Sheldon comics."""
4553
    # Also on http://www.sheldoncomics.com
4554
    name = 'sheldon-goc'
4555
    long_name = 'Sheldon Comics (from GoComics)'
4556
    url = 'http://www.gocomics.com/sheldon'
4557
4558
4559
class FowlLanguageGoComics(GenericGoComic):
4560
    """Class to retrieve Fowl Language comics."""
4561
    # Also on http://www.fowllanguagecomics.com
4562
    # Also on http://tapastic.com/series/Fowl-Language-Comics
4563
    # Also on http://fowllanguagecomics.tumblr.com
4564
    name = 'fowllanguage-goc'
4565
    long_name = 'Fowl Language Comics (from GoComics)'
4566
    url = 'http://www.gocomics.com/fowl-language'
4567
    _categories = ('FOWLLANGUAGE', )
4568
4569
4570
class NickAnderson(GenericGoComic):
4571
    """Class to retrieve Nick Anderson comics."""
4572
    name = 'nickanderson'
4573
    long_name = 'Nick Anderson'
4574
    url = 'http://www.gocomics.com/nickanderson'
4575
4576
4577
class GarfieldGoComics(GenericGoComic):
4578
    """Class to retrieve Garfield comics."""
4579
    # Also on http://garfield.com
4580
    name = 'garfield-goc'
4581
    long_name = 'Garfield (from GoComics)'
4582
    url = 'http://www.gocomics.com/garfield'
4583
    _categories = ('GARFIELD', )
4584
4585
4586
class DorrisMcGoComics(GenericGoComic):
4587
    """Class to retrieve Dorris Mc Comics"""
4588
    # Also on http://dorrismccomics.com
4589
    name = 'dorrismc-goc'
4590
    long_name = 'Dorris Mc (from GoComics)'
4591
    url = 'http://www.gocomics.com/dorris-mccomics'
4592
4593
4594
class FoxTrot(GenericGoComic):
4595
    """Class to retrieve FoxTrot comics."""
4596
    name = 'foxtrot'
4597
    long_name = 'FoxTrot'
4598
    url = 'http://www.gocomics.com/foxtrot'
4599
4600
4601
class FoxTrotClassics(GenericGoComic):
4602
    """Class to retrieve FoxTrot Classics comics."""
4603
    name = 'foxtrot-classics'
4604
    long_name = 'FoxTrot Classics'
4605
    url = 'http://www.gocomics.com/foxtrotclassics'
4606
4607
4608
class MisterAndMeGoComics(GenericEmptyComic, GenericGoComic):  # Removed ?
4609
    """Class to retrieve Mister & Me Comics."""
4610
    # Also on http://www.mister-and-me.com
4611
    # Also on https://tapastic.com/series/Mister-and-Me
4612
    name = 'mister-goc'
4613
    long_name = 'Mister & Me (from GoComics)'
4614
    url = 'http://www.gocomics.com/mister-and-me'
4615
4616
4617
class NonSequitur(GenericGoComic):
4618
    """Class to retrieve Non Sequitur (Wiley Miller) comics."""
4619
    name = 'nonsequitur'
4620
    long_name = 'Non Sequitur'
4621
    url = 'http://www.gocomics.com/nonsequitur'
4622
4623
4624
class GenericTapasticComic(GenericListableComic):
4625
    """Generic class to handle the logic common to comics from tapastic.com."""
4626
    _categories = ('TAPASTIC', )
4627
4628
    @classmethod
4629
    def get_comic_info(cls, soup, archive_elt):
4630
        """Get information about a particular comics."""
4631
        timestamp = int(archive_elt['publishDate']) / 1000.0
4632
        day = datetime.datetime.fromtimestamp(timestamp).date()
4633
        imgs = soup.find_all('img', class_='art-image')
4634
        if not imgs:
4635
            print("Comic %s is being uploaded, retry later" % cls.get_url_from_archive_element(archive_elt))
4636
            return None
4637
        assert len(imgs) > 0
4638
        return {
4639
            'day': day.day,
4640
            'year': day.year,
4641
            'month': day.month,
4642
            'img': [i['src'] for i in imgs],
4643
            'title': archive_elt['title'],
4644
        }
4645
4646
    @classmethod
4647
    def get_url_from_archive_element(cls, archive_elt):
4648
        return 'http://tapastic.com/episode/' + str(archive_elt['id'])
4649
4650
    @classmethod
4651
    def get_archive_elements(cls):
4652
        pref, suff = 'episodeList : ', ','
4653
        # Information is stored in the javascript part
4654
        # I don't know the clean way to get it so this is the ugly way.
4655
        string = [s[len(pref):-len(suff)] for s in (s.decode('utf-8').strip() for s in urlopen_wrapper(cls.url).readlines()) if s.startswith(pref) and s.endswith(suff)][0]
4656
        return json.loads(string)
4657
4658
4659
class VegetablesForDessert(GenericTapasticComic):
4660
    """Class to retrieve Vegetables For Dessert comics."""
4661
    # Also on http://vegetablesfordessert.tumblr.com
4662
    name = 'vegetables'
4663
    long_name = 'Vegetables For Dessert'
4664
    url = 'http://tapastic.com/series/vegetablesfordessert'
4665
4666
4667
class FowlLanguageTapa(GenericTapasticComic):
4668
    """Class to retrieve Fowl Language comics."""
4669
    # Also on http://www.fowllanguagecomics.com
4670
    # Also on http://fowllanguagecomics.tumblr.com
4671
    # Also on http://www.gocomics.com/fowl-language
4672
    name = 'fowllanguage-tapa'
4673
    long_name = 'Fowl Language Comics (from Tapastic)'
4674
    url = 'http://tapastic.com/series/Fowl-Language-Comics'
4675
    _categories = ('FOWLLANGUAGE', )
4676
4677
4678
class OscillatingProfundities(GenericTapasticComic):
4679
    """Class to retrieve Oscillating Profundities comics."""
4680
    name = 'oscillating'
4681
    long_name = 'Oscillating Profundities'
4682
    url = 'http://tapastic.com/series/oscillatingprofundities'
4683
4684
4685
class ZnoflatsComics(GenericTapasticComic):
4686
    """Class to retrieve Znoflats comics."""
4687
    name = 'znoflats'
4688
    long_name = 'Znoflats Comics'
4689
    url = 'http://tapastic.com/series/Znoflats-Comics'
4690
4691
4692
class SandersenTapastic(GenericTapasticComic):
4693
    """Class to retrieve Sarah Andersen comics."""
4694
    # Also on http://sarahcandersen.com
4695
    # Also on http://www.gocomics.com/sarahs-scribbles
4696
    name = 'sandersen-tapa'
4697
    long_name = 'Sarah Andersen (from Tapastic)'
4698
    url = 'http://tapastic.com/series/Doodle-Time'
4699
4700
4701
class TubeyToonsTapastic(GenericTapasticComic):
4702
    """Class to retrieve TubeyToons comics."""
4703
    # Also on http://tubeytoons.com
4704
    # Also on http://tubeytoons.tumblr.com
4705
    name = 'tubeytoons-tapa'
4706
    long_name = 'Tubey Toons (from Tapastic)'
4707
    url = 'http://tapastic.com/series/Tubey-Toons'
4708
    _categories = ('TUNEYTOONS', )
4709
4710
4711
class AnythingComicTapastic(GenericTapasticComic):
4712
    """Class to retrieve Anything Comics."""
4713
    # Also on http://www.anythingcomic.com
4714
    name = 'anythingcomic-tapa'
4715
    long_name = 'Anything Comic (from Tapastic)'
4716
    url = 'http://tapastic.com/series/anything'
4717
4718
4719
class UnearthedComicsTapastic(GenericTapasticComic):
4720
    """Class to retrieve Unearthed comics."""
4721
    # Also on http://unearthedcomics.com
4722
    # Also on http://unearthedcomics.tumblr.com
4723
    name = 'unearthed-tapa'
4724
    long_name = 'Unearthed Comics (from Tapastic)'
4725
    url = 'http://tapastic.com/series/UnearthedComics'
4726
    _categories = ('UNEARTHED', )
4727
4728
4729
class EverythingsStupidTapastic(GenericTapasticComic):
4730
    """Class to retrieve Everything's stupid Comics."""
4731
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
4732
    # Also on http://everythingsstupid.net
4733
    name = 'stupid-tapa'
4734
    long_name = "Everything's Stupid (from Tapastic)"
4735
    url = 'http://tapastic.com/series/EverythingsStupid'
4736
4737
4738
class JustSayEhTapastic(GenericTapasticComic):
4739
    """Class to retrieve Just Say Eh comics."""
4740
    # Also on http://www.justsayeh.com
4741
    name = 'justsayeh-tapa'
4742
    long_name = 'Just Say Eh (from Tapastic)'
4743
    url = 'http://tapastic.com/series/Just-Say-Eh'
4744
4745
4746
class ThorsThundershackTapastic(GenericTapasticComic):
4747
    """Class to retrieve Thor's Thundershack comics."""
4748
    # Also on http://www.thorsthundershack.com
4749
    name = 'thor-tapa'
4750
    long_name = 'Thor\'s Thundershack (from Tapastic)'
4751
    url = 'http://tapastic.com/series/Thors-Thundershac'
4752
    _categories = ('THOR', )
4753
4754
4755
class OwlTurdTapastic(GenericTapasticComic):
4756
    """Class to retrieve Owl Turd comics."""
4757
    # Also on http://owlturd.com
4758
    name = 'owlturd-tapa'
4759
    long_name = 'Owl Turd (from Tapastic)'
4760
    url = 'http://tapastic.com/series/Owl-Turd-Comix'
4761
    _categories = ('OWLTURD', )
4762
4763
4764
class GoneIntoRaptureTapastic(GenericTapasticComic):
4765
    """Class to retrieve Gone Into Rapture comics."""
4766
    # Also on http://goneintorapture.tumblr.com
4767
    # Also on http://www.goneintorapture.com
4768
    name = 'rapture-tapa'
4769
    long_name = 'Gone Into Rapture (from Tapastic)'
4770
    url = 'http://tapastic.com/series/Goneintorapture'
4771
4772
4773
class HeckIfIKnowComicsTapa(GenericTapasticComic):
4774
    """Class to retrieve Heck If I Know Comics."""
4775
    # Also on http://heckifiknowcomics.com
4776
    name = 'heck-tapa'
4777
    long_name = 'Heck if I Know comics (from Tapastic)'
4778
    url = 'http://tapastic.com/series/Regular'
4779
4780
4781
class CheerUpEmoKidTapa(GenericTapasticComic):
4782
    """Class to retrieve CheerUpEmoKid comics."""
4783
    # Also on http://www.cheerupemokid.com
4784
    # Also on http://enzocomics.tumblr.com
4785
    name = 'cuek-tapa'
4786
    long_name = 'Cheer Up Emo Kid (from Tapastic)'
4787
    url = 'http://tapastic.com/series/CUEK'
4788
4789
4790
class BigFootJusticeTapa(GenericTapasticComic):
4791
    """Class to retrieve Big Foot Justice comics."""
4792
    # Also on http://bigfootjustice.com
4793
    name = 'bigfoot-tapa'
4794
    long_name = 'Big Foot Justice (from Tapastic)'
4795
    url = 'http://tapastic.com/series/bigfoot-justice'
4796
4797
4798
class UpAndOutTapa(GenericTapasticComic):
4799
    """Class to retrieve Up & Out comics."""
4800
    # Also on http://upandoutcomic.tumblr.com
4801
    name = 'upandout-tapa'
4802
    long_name = 'Up And Out (from Tapastic)'
4803
    url = 'http://tapastic.com/series/UP-and-OUT'
4804
4805
4806
class ToonHoleTapa(GenericTapasticComic):
4807
    """Class to retrieve Toon Holes comics."""
4808
    # Also on http://www.toonhole.com
4809
    name = 'toonhole-tapa'
4810
    long_name = 'Toon Hole (from Tapastic)'
4811
    url = 'http://tapastic.com/series/TOONHOLE'
4812
4813
4814
class AngryAtNothingTapa(GenericTapasticComic):
4815
    """Class to retrieve Angry at Nothing comics."""
4816
    # Also on http://www.angryatnothing.net
4817
    name = 'angry-tapa'
4818
    long_name = 'Angry At Nothing (from Tapastic)'
4819
    url = 'http://tapastic.com/series/Comics-yeah-definitely-comics-'
4820
4821
4822
class LeleozTapa(GenericTapasticComic):
4823
    """Class to retrieve Leleoz comics."""
4824
    # Also on http://leleozcomics.tumblr.com
4825
    name = 'leleoz-tapa'
4826
    long_name = 'Leleoz (from Tapastic)'
4827
    url = 'https://tapastic.com/series/Leleoz'
4828
4829
4830
class TheAwkwardYetiTapa(GenericTapasticComic):
4831
    """Class to retrieve The Awkward Yeti comics."""
4832
    # Also on http://www.gocomics.com/the-awkward-yeti
4833
    # Also on http://theawkwardyeti.com
4834
    # Also on http://larstheyeti.tumblr.com
4835
    name = 'yeti-tapa'
4836
    long_name = 'The Awkward Yeti (from Tapastic)'
4837
    url = 'https://tapastic.com/series/TheAwkwardYeti'
4838
    _categories = ('YETI', )
4839
4840
4841
class AsPerUsualTapa(GenericTapasticComic):
4842
    """Class to retrieve As Per Usual comics."""
4843
    # Also on http://as-per-usual.tumblr.com
4844
    name = 'usual-tapa'
4845
    long_name = 'As Per Usual (from Tapastic)'
4846
    url = 'https://tapastic.com/series/AsPerUsual'
4847
    categories = ('DAMILEE', )
4848
4849
4850
class HotComicsForCoolPeopleTapa(GenericTapasticComic):
4851
    """Class to retrieve Hot Comics For Cool People."""
4852
    # Also on http://hotcomicsforcoolpeople.tumblr.com
4853
    # Also on http://hotcomics.biz (links to tumblr)
4854
    # Also on http://hcfcp.com (links to tumblr)
4855
    name = 'hotcomics-tapa'
4856
    long_name = 'Hot Comics For Cool People (from Tapastic)'
4857
    url = 'https://tapastic.com/series/Hot-Comics-For-Cool-People'
4858
    categories = ('DAMILEE', )
4859
4860
4861
class OneOneOneOneComicTapa(GenericTapasticComic):
4862
    """Class to retrieve 1111 Comics."""
4863
    # Also on http://www.1111comics.me
4864
    # Also on http://comics1111.tumblr.com
4865
    name = '1111-tapa'
4866
    long_name = '1111 Comics (from Tapastic)'
4867
    url = 'https://tapastic.com/series/1111-Comics'
4868
    _categories = ('ONEONEONEONE', )
4869
4870
4871
class TumbleDryTapa(GenericTapasticComic):
4872
    """Class to retrieve Tumble Dry comics."""
4873
    # Also on http://tumbledrycomics.com
4874
    name = 'tumbledry-tapa'
4875
    long_name = 'Tumblr Dry (from Tapastic)'
4876
    url = 'https://tapastic.com/series/TumbleDryComics'
4877
4878
4879
class DeadlyPanelTapa(GenericTapasticComic):
4880
    """Class to retrieve Deadly Panel comics."""
4881
    # Also on http://www.deadlypanel.com
4882
    name = 'deadly-tapa'
4883
    long_name = 'Deadly Panel (from Tapastic)'
4884
    url = 'https://tapastic.com/series/deadlypanel'
4885
4886
4887
class ChrisHallbeckMaxiTapa(GenericTapasticComic):
4888
    """Class to retrieve Chris Hallbeck comics."""
4889
    # Also on http://chrishallbeck.tumblr.com
4890
    # Also on http://maximumble.com
4891
    name = 'hallbeckmaxi-tapa'
4892
    long_name = 'Chris Hallback - Maximumble (from Tapastic)'
4893
    url = 'https://tapastic.com/series/Maximumble'
4894
    _categories = ('HALLBACK', )
4895
4896
4897
class ChrisHallbeckMiniTapa(GenericTapasticComic):
4898
    """Class to retrieve Chris Hallbeck comics."""
4899
    # Also on http://chrishallbeck.tumblr.com
4900
    # Also on http://minimumble.com
4901
    name = 'hallbeckmini-tapa'
4902
    long_name = 'Chris Hallback - Minimumble (from Tapastic)'
4903
    url = 'https://tapastic.com/series/Minimumble'
4904
    _categories = ('HALLBACK', )
4905
4906
4907
class ChrisHallbeckBiffTapa(GenericTapasticComic):
4908
    """Class to retrieve Chris Hallbeck comics."""
4909
    # Also on http://chrishallbeck.tumblr.com
4910
    # Also on http://thebookofbiff.com
4911
    name = 'hallbeckbiff-tapa'
4912
    long_name = 'Chris Hallback - The Book of Biff (from Tapastic)'
4913
    url = 'https://tapastic.com/series/Biff'
4914
    _categories = ('HALLBACK', )
4915
4916
4917
class RandoWisTapa(GenericTapasticComic):
4918
    """Class to retrieve RandoWis comics."""
4919
    # Also on https://randowis.com
4920
    name = 'randowis-tapa'
4921
    long_name = 'RandoWis (from Tapastic)'
4922
    url = 'https://tapastic.com/series/RandoWis'
4923
4924
4925
class PigeonGazetteTapa(GenericTapasticComic):
4926
    """Class to retrieve The Pigeon Gazette comics."""
4927
    # Also on http://thepigeongazette.tumblr.com
4928
    name = 'pigeon-tapa'
4929
    long_name = 'The Pigeon Gazette (from Tapastic)'
4930
    url = 'https://tapastic.com/series/The-Pigeon-Gazette'
4931
4932
4933
class TheOdd1sOutTapa(GenericTapasticComic):
4934
    """Class to retrieve The Odd 1s Out comics."""
4935
    # Also on http://theodd1sout.com
4936
    # Also on http://theodd1sout.tumblr.com
4937
    name = 'theodd-tapa'
4938
    long_name = 'The Odd 1s Out (from Tapastic)'
4939
    url = 'https://tapastic.com/series/Theodd1sout'
4940
4941
4942
class TheWorldIsFlatTapa(GenericTapasticComic):
4943
    """Class to retrieve The World Is Flat Comics."""
4944
    # Also on http://theworldisflatcomics.tumblr.com
4945
    name = 'flatworld-tapa'
4946
    long_name = 'The World Is Flat (from Tapastic)'
4947
    url = 'https://tapastic.com/series/The-World-is-Flat'
4948
4949
4950
class MisterAndMeTapa(GenericTapasticComic):
4951
    """Class to retrieve Mister & Me Comics."""
4952
    # Also on http://www.mister-and-me.com
4953
    # Also on http://www.gocomics.com/mister-and-me
4954
    name = 'mister-tapa'
4955
    long_name = 'Mister & Me (from Tapastic)'
4956
    url = 'https://tapastic.com/series/Mister-and-Me'
4957
4958
4959
class TalesOfAbsurdityTapa(GenericTapasticComic):
4960
    """Class to retrieve Tales Of Absurdity comics."""
4961
    # Also on http://talesofabsurdity.com
4962
    # Also on http://talesofabsurdity.tumblr.com
4963
    name = 'absurdity-tapa'
4964
    long_name = 'Tales of Absurdity (from Tapastic)'
4965
    url = 'http://tapastic.com/series/Tales-Of-Absurdity'
4966
    _categories = ('ABSURDITY', )
4967
4968
4969
class BFGFSTapa(GenericTapasticComic):
4970
    """Class to retrieve BFGFS comics."""
4971
    # Also on http://bfgfs.com
4972
    # Also on http://bfgfs.tumblr.com
4973
    name = 'bfgfs-tapa'
4974
    long_name = 'BFGFS (from Tapastic)'
4975
    url = 'https://tapastic.com/series/BFGFS'
4976
4977
4978
class DoodleForFoodTapa(GenericTapasticComic):
4979
    """Class to retrieve Doodle For Food comics."""
4980
    # Also on http://doodleforfood.com
4981
    name = 'doodle-tapa'
4982
    long_name = 'Doodle For Food (from Tapastic)'
4983
    url = 'https://tapastic.com/series/Doodle-for-Food'
4984
4985
4986
class MrLovensteinTapa(GenericTapasticComic):
4987
    """Class to retrieve Mr Lovenstein comics."""
4988
    # Also on  https://tapastic.com/series/MrLovenstein
4989
    name = 'mrlovenstein-tapa'
4990
    long_name = 'Mr. Lovenstein (from Tapastic)'
4991
    url = 'https://tapastic.com/series/MrLovenstein'
4992
4993
4994
class CassandraCalinTapa(GenericTapasticComic):
4995
    """Class to retrieve C. Cassandra comics."""
4996
    # Also on http://cassandracalin.com
4997
    # Also on http://c-cassandra.tumblr.com
4998
    name = 'cassandra-tapa'
4999
    long_name = 'Cassandra Calin (from Tapastic)'
5000
    url = 'https://tapastic.com/series/C-Cassandra-comics'
5001
5002
5003
class WafflesAndPancakes(GenericTapasticComic):
5004
    """Class to retrieve Waffles And Pancakes comics."""
5005
    # Also on http://wandpcomic.com
5006
    name = 'waffles'
5007
    long_name = 'Waffles And Pancakes'
5008
    url = 'https://tapastic.com/series/Waffles-and-Pancakes'
5009
5010
5011
class YesterdaysPopcornTapastic(GenericTapasticComic):
5012
    """Class to retrieve Yesterday's Popcorn comics."""
5013
    # Also on http://www.yesterdayspopcorn.com
5014
    # Also on http://yesterdayspopcorn.tumblr.com
5015
    name = 'popcorn-tapa'
5016
    long_name = 'Yesterday\'s Popcorn (from Tapastic)'
5017
    url = 'https://tapastic.com/series/Yesterdays-Popcorn'
5018
5019
5020
class OurSuperAdventureTapastic(GenericTapasticComic):
5021
    """Class to retrieve Our Super Adventure comics."""
5022
    # Also on http://www.oursuperadventure.com
5023
    # http://sarahssketchbook.tumblr.com
5024
    # http://sarahgraley.com
5025
    name = 'superadventure-tapastic'
5026
    long_name = 'Our Super Adventure (from Tapastic)'
5027
    url = 'https://tapastic.com/series/Our-Super-Adventure'
5028
5029
5030
class NamelessPCs(GenericTapasticComic):
5031
    """Class to retrieve Nameless PCs comics."""
5032
    # Also on http://namelesspcs.com
5033
    name = 'namelesspcs-tapa'
5034
    long_name = 'NamelessPCs (from Tapastic)'
5035
    url = 'https://tapastic.com/series/NamelessPC'
5036
5037
5038
class UbertoolTapa(GenericTapasticComic):
5039
    """Class to retrieve Ubertool comics."""
5040
    # Also on http://ubertoolcomic.com
5041
    # Also on http://ubertool.tumblr.com
5042
    name = 'ubertool-tapa'
5043
    long_name = 'Ubertool (from Tapastic)'
5044
    url = 'https://tapastic.com/series/ubertool'
5045
    _categories = ('UBERTOOL', )
5046
5047
5048
class BarteNerdsTapa(GenericTapasticComic):
5049
    """Class to retrieve BarteNerds comics."""
5050
    # Also on http://www.bartenerds.com
5051
    name = 'bartenerds-tapa'
5052
    long_name = 'BarteNerds (from Tapastic)'
5053
    url = 'https://tapastic.com/series/BarteNERDS'
5054
5055
5056
class SmallBlueYonderTapa(GenericTapasticComic):
5057
    """Class to retrieve Small Blue Yonder comics."""
5058
    # Also on http://www.smallblueyonder.com
5059
    name = 'smallblue-tapa'
5060
    long_name = 'Small Blue Yonder (from Tapastic)'
5061
    url = 'https://tapastic.com/series/Small-Blue-Yonder'
5062
5063
5064
class TizzyStitchBirdTapa(GenericTapasticComic):
5065
    """Class to retrieve Tizzy Stitch Bird comics."""
5066
    # Also on http://tizzystitchbird.com
5067
    # Also on http://tizzystitchbird.tumblr.com
5068
    # Also on http://www.webtoons.com/en/challenge/tizzy-stitchbird/list?title_no=50082
5069
    name = 'tizzy-tapa'
5070
    long_name = 'Tizzy Stitch Bird (from Tapastic)'
5071
    url = 'https://tapastic.com/series/TizzyStitchbird'
5072
5073
5074
class RockPaperCynicTapa(GenericTapasticComic):
5075
    """Class to retrieve RockPaperCynic comics."""
5076
    # Also on http://www.rockpapercynic.com
5077
    # Also on http://rockpapercynic.tumblr.com
5078
    name = 'rpc-tapa'
5079
    long_name = 'Rock Paper Cynic (from Tapastic)'
5080
    url = 'https://tapastic.com/series/rockpapercynic'
5081
5082
5083
def get_subclasses(klass):
5084
    """Gets the list of direct/indirect subclasses of a class"""
5085
    subclasses = klass.__subclasses__()
5086
    for derived in list(subclasses):
5087
        subclasses.extend(get_subclasses(derived))
5088
    return subclasses
5089
5090
5091
def remove_st_nd_rd_th_from_date(string):
5092
    """Function to transform 1st/2nd/3rd/4th in a parsable date format."""
5093
    # Hackish way to convert string with numeral "1st"/"2nd"/etc to date
5094
    return (string.replace('st', '')
5095
            .replace('nd', '')
5096
            .replace('rd', '')
5097
            .replace('th', '')
5098
            .replace('Augu', 'August'))
5099
5100
5101
def string_to_date(string, date_format, local=DEFAULT_LOCAL):
5102
    """Function to convert string to date object.
5103
    Wrapper around datetime.datetime.strptime."""
5104
    # format described in https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior
5105
    prev_locale = locale.setlocale(locale.LC_ALL)
5106
    if local != prev_locale:
5107
        locale.setlocale(locale.LC_ALL, local)
5108
    ret = datetime.datetime.strptime(string, date_format).date()
5109
    if local != prev_locale:
5110
        locale.setlocale(locale.LC_ALL, prev_locale)
5111
    return ret
5112
5113
5114
COMICS = set(get_subclasses(GenericComic))
5115
VALID_COMICS = [c for c in COMICS if c.name is not None]
5116
COMIC_NAMES = {c.name: c for c in VALID_COMICS}
5117
assert len(VALID_COMICS) == len(COMIC_NAMES)
5118
CLASS_NAMES = {c.__name__ for c in VALID_COMICS}
5119
assert len(VALID_COMICS) == len(CLASS_NAMES)
5120