Completed
Push — master ( ec8079...b4c51c )
by De
01:07
created

comics.py (35 issues)

Code
1
#! /usr/bin/python3
2
# vim: set expandtab tabstop=4 shiftwidth=4 :
3
"""Module to retrieve webcomics"""
4
5
from comic_abstract import GenericComic, get_date_for_comic
6
import re
7
from datetime import date, timedelta
8
import datetime
9
from urlfunctions import get_soup_at_url, urljoin_wrapper,\
10
    convert_iri_to_plain_ascii_uri, load_json_at_url, urlopen_wrapper
11
import json
12
import locale
13
import urllib
14
15
DEFAULT_LOCAL = 'en_GB.UTF-8'
16
17
18
class Xkcd(GenericComic):
19
    """Class to retrieve Xkcd comics."""
20
    name = 'xkcd'
21
    long_name = 'xkcd'
22
    url = 'http://xkcd.com'
23
24
    @classmethod
25
    def get_next_comic(cls, last_comic):
26
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
27
        first_num = last_comic['num'] if last_comic else 0
28
        last_num = load_json_at_url(
29
            urljoin_wrapper(cls.url, 'info.0.json'))['num']
30
31
        for num in range(first_num + 1, last_num + 1):
32
            if num != 404:
33
                json_url = urljoin_wrapper(cls.url, '%d/info.0.json' % num)
34
                comic = load_json_at_url(json_url)
35
                comic['img'] = [comic['img']]
36
                comic['prefix'] = '%d-' % num
37
                comic['json_url'] = json_url
38
                comic['url'] = urljoin_wrapper(cls.url, str(num))
39
                comic['day'] = int(comic['day'])
40
                comic['month'] = int(comic['month'])
41
                comic['year'] = int(comic['year'])
42
                assert comic['num'] == num
43
                yield comic
44
45
46
# Helper functions corresponding to get_url_from_link/get_url_from_archive_element
47
48
49
@classmethod
50
def get_href(cls, link):
51
    """Implementation of get_url_from_link/get_url_from_archive_element."""
52
    return link['href']
53
54
55
@classmethod
56
def join_cls_url_to_href(cls, link):
57
    """Implementation of get_url_from_link/get_url_from_archive_element."""
58
    return urljoin_wrapper(cls.url, link['href'])
59
60
61
class GenericNavigableComic(GenericComic):
62
    """Generic class for "navigable" comics : with first/next arrows.
63
64
    This class applies to comic where previous and next comics can be
65
    accessed from a given comic. Once given a starting point (either
66
    the first comic or the last comic retrieved), it will handle the
67
    navigation, the retrieval of the soup object and the setting of
68
    the 'url' attribute on retrieved comics. This limits a lot the
69
    amount of boilerplate code in the different implementation classes.
70
71
    The method `get_next_comic` methods is implemented in terms of new
72
    more specialized methods to be implemented/overridden:
73
        - get_first_comic_link
74
        - get_navi_link
75
        - get_comic_info
76
        - get_url_from_link
77
    """
78
    _categories = ('NAVIGABLE', )
79
80
    @classmethod
81
    def get_first_comic_link(cls):
82
        """Get link to first comics.
83
84
        Sometimes this can be retrieved of any comic page, sometimes on
85
        the archive page, sometimes it doesn't exist at all and one has
86
        to iterate backward to find it before hardcoding the result found.
87
        """
88
        raise NotImplementedError
89
90
    @classmethod
91
    def get_navi_link(cls, last_soup, next_):
92
        """Get link to next (or previous - for dev purposes) comic."""
93
        raise NotImplementedError
94
95
    @classmethod
96
    def get_comic_info(cls, soup, link):
97
        """Get information about a particular comics."""
98
        raise NotImplementedError
99
100
    @classmethod
101
    def get_url_from_link(cls, link):
102
        """Get url corresponding to a link. Default implementation is similar to get_href."""
103
        return link['href']
104
105
    @classmethod
106
    def get_next_link(cls, last_soup):
107
        """Get link to next comic."""
108
        link = cls.get_navi_link(last_soup, True)
109
        cls.log("Next link is %s" % link)
110
        return link
111
112
    @classmethod
113
    def get_prev_link(cls, last_soup):
114
        """Get link to previous comic."""
115
        link = cls.get_navi_link(last_soup, False)
116
        cls.log("Prev link is %s" % link)
117
        return link
118
119
    @classmethod
120
    def get_next_comic(cls, last_comic):
121
        """Generic implementation of get_next_comic for navigable comics."""
122
        url = last_comic['url'] if last_comic else None
123
        cls.log("starting 'get_next_comic' from %s" % url)
124
        next_comic = \
125
            cls.get_next_link(get_soup_at_url(url)) \
126
            if url else \
127
            cls.get_first_comic_link()
128
        cls.log("next/first comic will be %s (url is %s)" % (str(next_comic), url))
129
        # cls.check_navigation(url)
130
        while next_comic:
131
            prev_url, url = url, cls.get_url_from_link(next_comic)
132
            if prev_url == url:
133
                cls.log("got same url %s" % url)
134
                break
135
            cls.log("about to get %s (%s)" % (url, str(next_comic)))
136
            soup = get_soup_at_url(url)
137
            comic = cls.get_comic_info(soup, next_comic)
138
            if comic is not None:
139
                assert 'url' not in comic
140
                comic['url'] = url
141
                yield comic
142
            next_comic = cls.get_next_link(soup)
143
            cls.log("next comic will be %s" % str(next_comic))
144
145
    @classmethod
146
    def check_first_link(cls):
147
        """Check that navigation to first comic seems to be working - for dev purposes."""
148
        cls.log("about to check first link")
149
        ok = True
150
        firstlink = cls.get_first_comic_link()
151
        if firstlink is None:
152
            print("From %s : no first link" % cls.url)
153
            ok = False
154
        else:
155
            firsturl = cls.get_url_from_link(firstlink)
156
            try:
157
                get_soup_at_url(firsturl)
158
            except urllib.error.HTTPError:
159
                print("From %s : invalid first url" % cls.url)
160
                ok = False
161
        cls.log("checked first link -> returned %d" % ok)
162
        return ok
163
164
    @classmethod
165
    def check_prev_next_links(cls, url):
166
        """Check that navigation to prev/next from a given URL seems to be working - for dev purposes."""
167
        cls.log("about to check prev/next from %s" % url)
168
        ok = True
169
        if url is None:
170
            prevlink, nextlink = None, None
171
        else:
172
            soup = get_soup_at_url(url)
173
            prevlink, nextlink = cls.get_prev_link(soup), cls.get_next_link(soup)
174
        if prevlink is None and nextlink is None:
175
            print("From %s : no previous nor next" % url)
176
            ok = False
177
        else:
178
            if prevlink:
179
                prevurl = cls.get_url_from_link(prevlink)
180
                prevsoup = get_soup_at_url(prevurl)
181
                prevnextlink = cls.get_next_link(prevsoup)
182
                prevnext = cls.get_url_from_link(prevnextlink) if prevnextlink is not None else "NO URL"
183
                if prevnext != url:
184
                    print("From %s, going backward then forward leads to %s" % (url, prevnext))
185
                    ok = False
186
            if nextlink:
187
                nexturl = cls.get_url_from_link(nextlink)
188
                if nexturl != url:
189
                    nextsoup = get_soup_at_url(nexturl)
190
                    nextprevlink = cls.get_prev_link(nextsoup)
191
                    nextprev = cls.get_url_from_link(nextprevlink) if nextprevlink is not None else "NO URL"
192
                    if nextprev != url:
193
                        print("From %s, going forward then backward leads to %s" % (url, nextprev))
194
                        ok = False
195
        cls.log("checked prev/next from %s -> returned %d" % (url, ok))
196
        return ok
197
198
    @classmethod
199
    def check_navigation(cls, url):
200
        """Check that navigation functions seem to be working - for dev purposes."""
201
        cls.log("about to check navigation from %s" % url)
202
        first = cls.check_first_link()
203
        prevnext = cls.check_prev_next_links(url)
204
        ok = first and prevnext
205
        cls.log("checked navigation from %s -> returned %d" % (url, ok))
206
        return ok
207
208
209
class GenericListableComic(GenericComic):
210
    """Generic class for "listable" comics : with a list of comics (aka 'archive')
211
212
    The method `get_next_comic` methods is implemented in terms of new
213
    more specialized methods to be implemented/overridden:
214
        - get_archive_elements
215
        - get_url_from_archive_element
216
        - get_comic_info
217
    """
218
    _categories = ('LISTABLE', )
219
220
    @classmethod
221
    def get_archive_elements(cls):
222
        """Get the archive elements (iterable)."""
223
        raise NotImplementedError
224
225
    @classmethod
226
    def get_url_from_archive_element(cls, archive_elt):
227
        """Get url corresponding to an archive element."""
228
        raise NotImplementedError
229
230
    @classmethod
231
    def get_comic_info(cls, soup, archive_elt):
232
        """Get information about a particular comics."""
233
        raise NotImplementedError
234
235
    @classmethod
236
    def get_next_comic(cls, last_comic):
237
        """Generic implementation of get_next_comic for listable comics."""
238
        waiting_for_url = last_comic['url'] if last_comic else None
239
        for archive_elt in cls.get_archive_elements():
240
            url = cls.get_url_from_archive_element(archive_elt)
241
            cls.log("considering %s" % url)
242
            if waiting_for_url is None:
243
                cls.log("about to get %s (%s)" % (url, str(archive_elt)))
244
                soup = get_soup_at_url(url)
245
                comic = cls.get_comic_info(soup, archive_elt)
246
                if comic is not None:
247
                    assert 'url' not in comic
248
                    comic['url'] = url
249
                    yield comic
250
            elif waiting_for_url == url:
251
                waiting_for_url = None
252
        if waiting_for_url is not None:
253
            print("Did not find %s : there might be a problem" % waiting_for_url)
254
255
# Helper functions corresponding to get_first_comic_link/get_navi_link
256
257
258
@classmethod
259
def get_link_rel_next(cls, last_soup, next_):
260
    """Implementation of get_navi_link."""
261
    return last_soup.find('link', rel='next' if next_ else 'prev')
262
263
264
@classmethod
265
def get_a_rel_next(cls, last_soup, next_):
266
    """Implementation of get_navi_link."""
267
    return last_soup.find('a', rel='next' if next_ else 'prev')
268
269
270
@classmethod
271
def get_a_navi_navinext(cls, last_soup, next_):
272
    """Implementation of get_navi_link."""
273
    return last_soup.find('a', class_='navi navi-next' if next_ else 'navi navi-prev')
274
275
276
@classmethod
277
def get_a_navi_comicnavnext_navinext(cls, last_soup, next_):
278
    """Implementation of get_navi_link."""
279
    return last_soup.find('a', class_='navi comic-nav-next navi-next' if next_ else 'navi comic-nav-previous navi-prev')
280
281
282
@classmethod
283
def get_a_comicnavbase_comicnavnext(cls, last_soup, next_):
284
    """Implementation of get_navi_link."""
285
    return last_soup.find('a', class_='comic-nav-base comic-nav-next' if next_ else 'comic-nav-base comic-nav-previous')
286
287
288
@classmethod
289
def get_a_navi_navifirst(cls):
290
    """Implementation of get_first_comic_link."""
291
    return get_soup_at_url(cls.url).find('a', class_='navi navi-first')
292
293
294
@classmethod
295
def get_div_navfirst_a(cls):
296
    """Implementation of get_first_comic_link."""
297
    return get_soup_at_url(cls.url).find('div', class_="nav-first").find('a')
298
299
300
@classmethod
301
def get_a_comicnavbase_comicnavfirst(cls):
302
    """Implementation of get_first_comic_link."""
303
    return get_soup_at_url(cls.url).find('a', class_='comic-nav-base comic-nav-first')
304
305
306
@classmethod
307
def simulate_first_link(cls):
308
    """Implementation of get_first_comic_link creating a link-like object from
309
    an URL provided by the class.
310
311
    Note: The first URL can easily be found using :
312
    `get_first_comic_link = navigate_to_first_comic`.
313
    """
314
    return {'href': cls.first_url}
315
316
317
@classmethod
318
def navigate_to_first_comic(cls):
319
    """Implementation of get_first_comic_link navigating from a user provided
320
    URL to the first comic.
321
322
    Sometimes, the first comic cannot be reached directly so to start
323
    from the first comic one has to go to the previous comic until
324
    there is no previous comics. Once this URL is reached, it
325
    is better to hardcode it but for development purposes, it
326
    is convenient to have an automatic way to find it.
327
328
    Then, the URL found can easily be used via `simulate_first_link`.
329
    """
330
    url = input("Get starting URL: ")
331
    print(url)
332
    comic = cls.get_prev_link(get_soup_at_url(url))
333
    while comic:
334
        url = cls.get_url_from_link(comic)
335
        print(url)
336
        comic = cls.get_prev_link(get_soup_at_url(url))
337
    return {'href': url}
338
339
340
class GenericEmptyComic(GenericComic):
341
    """Generic class for comics where nothing is to be done.
342
343
    It can be useful to deactivate temporarily comics that do not work
344
    properly by replacing `def MyComic(GenericWhateverComic)` with
345
    `def MyComic(GenericEmptyComic, GenericWhateverComic)`."""
346
    _categories = ('EMPTY', )
347
348
    @classmethod
349
    def get_next_comic(cls, last_comic):
350
        """Implementation of get_next_comic returning no comics."""
351
        cls.log("comic is considered as empty - returning no comic")
352
        return []
353
354
355 View Code Duplication
class ExtraFabulousComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
356
    """Class to retrieve Extra Fabulous Comics."""
357
    name = 'efc'
358
    long_name = 'Extra Fabulous Comics'
359
    url = 'http://extrafabulouscomics.com'
360
    get_first_comic_link = get_a_navi_navifirst
361
    get_navi_link = get_link_rel_next
362
363
    @classmethod
364
    def get_comic_info(cls, soup, link):
365
        """Get information about a particular comics."""
366
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
367
        imgs = soup.find_all('img', src=img_src_re)
368
        title = soup.find('meta', property='og:title')['content']
369
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
370
        day = string_to_date(date_str, "%Y-%m-%d")
371
        return {
372
            'title': title,
373
            'img': [i['src'] for i in imgs],
374
            'month': day.month,
375
            'year': day.year,
376
            'day': day.day,
377
            'prefix': title + '-'
378
        }
379
380
381 View Code Duplication
class GenericLeMondeBlog(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
382
    """Generic class to retrieve comics from Le Monde blogs."""
383
    _categories = ('LEMONDE', 'FRANCAIS')
384
    get_navi_link = get_link_rel_next
385
    get_first_comic_link = simulate_first_link
386
    first_url = NotImplemented
387
388
    @classmethod
389
    def get_comic_info(cls, soup, link):
390
        """Get information about a particular comics."""
391
        url2 = soup.find('link', rel='shortlink')['href']
392
        title = soup.find('meta', property='og:title')['content']
393
        date_str = soup.find("span", class_="entry-date").string
394
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
395
        imgs = soup.find_all('meta', property='og:image')
396
        return {
397
            'title': title,
398
            'url2': url2,
399
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
400
            'month': day.month,
401
            'year': day.year,
402
            'day': day.day,
403
        }
404
405
406
class ZepWorld(GenericLeMondeBlog):
407
    """Class to retrieve Zep World comics."""
408
    name = "zep"
409
    long_name = "Zep World"
410
    url = "http://zepworld.blog.lemonde.fr"
411
    first_url = "http://zepworld.blog.lemonde.fr/2014/10/31/bientot-le-blog-de-zep/"
412
413
414
class Vidberg(GenericLeMondeBlog):
415
    """Class to retrieve Vidberg comics."""
416
    name = 'vidberg'
417
    long_name = "Vidberg - l'actu en patates"
418
    url = "http://vidberg.blog.lemonde.fr"
419
    # Not the first but I didn't find an efficient way to retrieve it
420
    first_url = "http://vidberg.blog.lemonde.fr/2012/02/09/revue-de-campagne-la-campagne-du-modem-semballe/"
421
422
423
class Plantu(GenericLeMondeBlog):
424
    """Class to retrieve Plantu comics."""
425
    name = 'plantu'
426
    long_name = "Plantu"
427
    url = "http://plantu.blog.lemonde.fr"
428
    first_url = "http://plantu.blog.lemonde.fr/2014/10/28/stress-test-a-bruxelles/"
429
430
431
class XavierGorce(GenericLeMondeBlog):
432
    """Class to retrieve Xavier Gorce comics."""
433
    name = 'gorce'
434
    long_name = "Xavier Gorce"
435
    url = "http://xaviergorce.blog.lemonde.fr"
436
    first_url = "http://xaviergorce.blog.lemonde.fr/2015/01/09/distinction/"
437
438
439
class CartooningForPeace(GenericLeMondeBlog):
440
    """Class to retrieve Cartooning For Peace comics."""
441
    name = 'forpeace'
442
    long_name = "Cartooning For Peace"
443
    url = "http://cartooningforpeace.blog.lemonde.fr"
444
    first_url = "http://cartooningforpeace.blog.lemonde.fr/2014/12/15/bado/"
445
446
447
class Aurel(GenericLeMondeBlog):
448
    """Class to retrieve Aurel comics."""
449
    name = 'aurel'
450
    long_name = "Aurel"
451
    url = "http://aurel.blog.lemonde.fr"
452
    first_url = "http://aurel.blog.lemonde.fr/2014/09/29/le-senat-repasse-a-droite/"
453
454
455
class LesCulottees(GenericLeMondeBlog):
456
    """Class to retrieve Les Culottees comics."""
457
    name = 'culottees'
458
    long_name = 'Les Culottees'
459
    url = "http://lesculottees.blog.lemonde.fr"
460
    first_url = "http://lesculottees.blog.lemonde.fr/2016/01/11/clementine-delait-femme-a-barbe/"
461
462
463
class UneAnneeAuLycee(GenericLeMondeBlog):
464
    """Class to retrieve Une Annee Au Lycee comics."""
465
    name = 'lycee'
466
    long_name = 'Une Annee au Lycee'
467
    url = 'http://uneanneeaulycee.blog.lemonde.fr'
468
    first_url = "http://uneanneeaulycee.blog.lemonde.fr/2016/06/13/la-semaine-du-bac-est-arrivee/"
469
470
471
class Rall(GenericNavigableComic):
472
    """Class to retrieve Ted Rall comics."""
473
    # Also on http://www.gocomics.com/tedrall
474
    name = 'rall'
475
    long_name = "Ted Rall"
476
    url = "http://rall.com/comic"
477
    _categories = ('RALL', )
478
    get_navi_link = get_link_rel_next
479
    get_first_comic_link = simulate_first_link
480
    # Not the first but I didn't find an efficient way to retrieve it
481
    first_url = "http://rall.com/2014/01/30/los-angeles-times-cartoon-well-miss-those-california-flowers"
482
483
    @classmethod
484
    def get_comic_info(cls, soup, link):
485
        """Get information about a particular comics."""
486
        title = soup.find('meta', property='og:title')['content']
487
        author = soup.find("span", class_="author vcard").find("a").string
488
        date_str = soup.find("span", class_="entry-date").string
489
        day = string_to_date(date_str, "%B %d, %Y")
490
        desc = soup.find('meta', property='og:description')['content']
491
        imgs = soup.find('div', class_='entry-content').find_all('img')
492
        imgs = imgs[:-7]  # remove social media buttons
493
        return {
494
            'title': title,
495
            'author': author,
496
            'month': day.month,
497
            'year': day.year,
498
            'day': day.day,
499
            'description': desc,
500
            'img': [i['src'] for i in imgs],
501
        }
502
503
504
class Dilem(GenericNavigableComic):
505
    """Class to retrieve Ali Dilem comics."""
506
    name = 'dilem'
507
    long_name = 'Ali Dilem'
508
    url = 'http://information.tv5monde.com/dilem'
509
    _categories = ('FRANCAIS', )
510
    get_url_from_link = join_cls_url_to_href
511
    get_first_comic_link = simulate_first_link
512
    first_url = "http://information.tv5monde.com/dilem/2004-06-26"
513
514
    @classmethod
515
    def get_navi_link(cls, last_soup, next_):
516
        """Get link to next or previous comic."""
517
        # prev is next / next is prev
518
        li = last_soup.find('li', class_='prev' if next_ else 'next')
519
        return li.find('a') if li else None
520
521 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
522
    def get_comic_info(cls, soup, link):
523
        """Get information about a particular comics."""
524
        short_url = soup.find('link', rel='shortlink')['href']
525
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
526
        imgs = soup.find_all('meta', property='og:image')
527
        date_str = soup.find('span', property='dc:date')['content']
528
        date_str = date_str[:10]
529
        day = string_to_date(date_str, "%Y-%m-%d")
530
        return {
531
            'short_url': short_url,
532
            'title': title,
533
            'img': [i['content'] for i in imgs],
534
            'day': day.day,
535
            'month': day.month,
536
            'year': day.year,
537
        }
538
539
540
class SpaceAvalanche(GenericNavigableComic):
541
    """Class to retrieve Space Avalanche comics."""
542
    name = 'avalanche'
543
    long_name = 'Space Avalanche'
544
    url = 'http://www.spaceavalanche.com'
545
    get_navi_link = get_link_rel_next
546
547
    @classmethod
548
    def get_first_comic_link(cls):
549
        """Get link to first comics."""
550
        return {'href': "http://www.spaceavalanche.com/2009/02/02/irish-sea/", 'title': "Irish Sea"}
551
552
    @classmethod
553
    def get_comic_info(cls, soup, link):
554
        """Get information about a particular comics."""
555
        url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
556
        title = link['title']
557
        url = cls.get_url_from_link(link)
558
        year, month, day = [int(s)
559
                            for s in url_date_re.match(url).groups()]
560
        imgs = soup.find("div", class_="entry").find_all("img")
561
        return {
562
            'title': title,
563
            'day': day,
564
            'month': month,
565
            'year': year,
566
            'img': [i['src'] for i in imgs],
567
        }
568
569
570
class ZenPencils(GenericNavigableComic):
571
    """Class to retrieve ZenPencils comics."""
572
    # Also on http://zenpencils.tumblr.com
573
    # Also on http://www.gocomics.com/zen-pencils
574
    name = 'zenpencils'
575
    long_name = 'Zen Pencils'
576
    url = 'http://zenpencils.com'
577
    _categories = ('ZENPENCILS', )
578
    get_navi_link = get_link_rel_next
579
    get_first_comic_link = simulate_first_link
580
    first_url = "http://zenpencils.com/comic/1-ralph-waldo-emerson-make-them-cry/"
581
582
    @classmethod
583
    def get_comic_info(cls, soup, link):
584
        """Get information about a particular comics."""
585
        imgs = soup.find('div', id='comic').find_all('img')
586
        # imgs2 = soup.find_all('meta', property='og:image')
587
        post = soup.find('div', class_='post-content')
588
        author = post.find("span", class_="post-author").find("a").string
589
        title = soup.find('meta', property='og:title')['content']
590
        date_str = post.find('span', class_='post-date').string
591
        day = string_to_date(date_str, "%B %d, %Y")
592
        assert imgs
593
        assert all(i['alt'] == i['title'] for i in imgs)
594
        assert all(i['alt'] in (title, "") for i in imgs)
595
        desc = soup.find('meta', property='og:description')['content']
596
        return {
597
            'title': title,
598
            'description': desc,
599
            'author': author,
600
            'day': day.day,
601
            'month': day.month,
602
            'year': day.year,
603
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
604
        }
605
606
607
class ItsTheTie(GenericNavigableComic):
608
    """Class to retrieve It's the tie comics."""
609
    # Also on http://itsthetie.tumblr.com
610
    # Also on https://tapastic.com/series/itsthetie
611
    name = 'tie'
612
    long_name = "It's the tie"
613
    url = "http://itsthetie.com"
614
    _categories = ('TIE', )
615
    get_first_comic_link = get_div_navfirst_a
616
    get_navi_link = get_a_rel_next
617
618
    @classmethod
619
    def get_comic_info(cls, soup, link):
620
        """Get information about a particular comics."""
621
        title = soup.find('h1', class_='comic-title').find('a').string
622
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
623
        day = string_to_date(date_str, "%B %d, %Y")
624
        # Bonus images may or may not be in meta og:image.
625
        imgs = soup.find_all('meta', property='og:image')
626
        imgs_src = [i['content'] for i in imgs]
627
        bonus = soup.find_all('img', attrs={'data-oversrc': True})
628
        bonus_src = [b['data-oversrc'] for b in bonus]
629
        all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src]
630
        all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")]
631
        tag_meta = soup.find('meta', property='article:tag')
632
        tags = tag_meta['content'] if tag_meta else ""
633
        return {
634
            'title': title,
635
            'month': day.month,
636
            'year': day.year,
637
            'day': day.day,
638
            'img': all_imgs_src,
639
            'tags': tags,
640
        }
641
642
643
class PenelopeBagieu(GenericNavigableComic):
644
    """Class to retrieve comics from Penelope Bagieu's blog."""
645 View Code Duplication
    name = 'bagieu'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
646
    long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
647
    url = 'http://www.penelope-jolicoeur.com'
648
    _categories = ('FRANCAIS', )
649
    get_navi_link = get_link_rel_next
650
    get_first_comic_link = simulate_first_link
651
    first_url = 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'
652
653
    @classmethod
654
    def get_comic_info(cls, soup, link):
655
        """Get information about a particular comics."""
656
        date_str = soup.find('h2', class_='date-header').string
657
        day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
658
        imgs = soup.find('div', class_='entry-body').find_all('img')
659
        title = soup.find('h3', class_='entry-header').string
660
        return {
661
            'title': title,
662
            'img': [i['src'] for i in imgs],
663
            'month': day.month,
664
            'year': day.year,
665
            'day': day.day,
666
        }
667
668
669 View Code Duplication
class OneOneOneOneComic(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
670
    """Class to retrieve 1111 Comics."""
671
    # Also on http://comics1111.tumblr.com
672
    # Also on https://tapastic.com/series/1111-Comics
673
    name = '1111'
674
    long_name = '1111 Comics'
675
    url = 'http://www.1111comics.me'
676
    _categories = ('ONEONEONEONE', )
677
    get_first_comic_link = get_div_navfirst_a
678
    get_navi_link = get_link_rel_next
679
680
    @classmethod
681
    def get_comic_info(cls, soup, link):
682
        """Get information about a particular comics."""
683
        title = soup.find('h1', class_='comic-title').find('a').string
684
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
685
        day = string_to_date(date_str, "%B %d, %Y")
686
        imgs = soup.find_all('meta', property='og:image')
687
        return {
688
            'title': title,
689
            'month': day.month,
690
            'year': day.year,
691
            'day': day.day,
692
            'img': [i['content'] for i in imgs],
693
        }
694
695
696 View Code Duplication
class AngryAtNothing(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
697
    """Class to retrieve Angry at Nothing comics."""
698
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
699
    name = 'angry'
700
    long_name = 'Angry At Nothing'
701
    url = 'http://www.angryatnothing.net'
702
    get_first_comic_link = get_div_navfirst_a
703
    get_navi_link = get_a_rel_next
704
705
    @classmethod
706
    def get_comic_info(cls, soup, link):
707
        """Get information about a particular comics."""
708
        title = soup.find('h1', class_='comic-title').find('a').string
709
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
710
        day = string_to_date(date_str, "%B %d, %Y")
711
        imgs = soup.find_all('meta', property='og:image')
712
        return {
713
            'title': title,
714
            'month': day.month,
715
            'year': day.year,
716
            'day': day.day,
717
            'img': [i['content'] for i in imgs],
718
        }
719
720
721
class NeDroid(GenericNavigableComic):
722
    """Class to retrieve NeDroid comics."""
723
    name = 'nedroid'
724
    long_name = 'NeDroid'
725
    url = 'http://nedroid.com'
726
    get_first_comic_link = get_div_navfirst_a
727
    get_navi_link = get_link_rel_next
728
    get_url_from_link = join_cls_url_to_href
729
730
    @classmethod
731
    def get_comic_info(cls, soup, link):
732
        """Get information about a particular comics."""
733
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
734
        comic_url_re = re.compile('//nedroid.com/comics/([0-9]*)-([0-9]*)-([0-9]*).*')
735
        short_url = cls.get_url_from_link(soup.find('link', rel='shortlink'))
736
        num = int(short_url_re.match(short_url).groups()[0])
737
        imgs = soup.find('div', id='comic').find_all('img')
738
        year, month, day = [int(s) for s in comic_url_re.match(imgs[0]['src']).groups()]
739
        assert len(imgs) == 1
740
        title = imgs[0]['alt']
741
        title2 = imgs[0]['title']
742
        return {
743
            'short_url': short_url,
744
            'title': title,
745
            'title2': title2,
746
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
747
            'day': day,
748
            'month': month,
749
            'year': year,
750
            'num': num,
751
        }
752
753
754
class Garfield(GenericNavigableComic):
755
    """Class to retrieve Garfield comics."""
756
    # Also on http://www.gocomics.com/garfield
757
    name = 'garfield'
758 View Code Duplication
    long_name = 'Garfield'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
759
    url = 'https://garfield.com'
760
    _categories = ('GARFIELD', )
761
    get_first_comic_link = simulate_first_link
762
    first_url = 'https://garfield.com/comic/1978/06/19'
763
764
    @classmethod
765
    def get_navi_link(cls, last_soup, next_):
766
        """Get link to next or previous comic."""
767
        return last_soup.find('a', class_='comic-arrow-right' if next_ else 'comic-arrow-left')
768
769
    @classmethod
770
    def get_comic_info(cls, soup, link):
771
        """Get information about a particular comics."""
772
        url = cls.get_url_from_link(link)
773
        date_re = re.compile('^%s/comic/([0-9]*)/([0-9]*)/([0-9]*)' % cls.url)
774
        year, month, day = [int(s) for s in date_re.match(url).groups()]
775
        imgs = soup.find('div', class_='comic-display').find_all('img', class_='img-responsive')
776
        return {
777
            'month': month,
778
            'year': year,
779
            'day': day,
780
            'img': [i['src'] for i in imgs],
781
        }
782
783
784
class Dilbert(GenericNavigableComic):
785
    """Class to retrieve Dilbert comics."""
786
    # Also on http://www.gocomics.com/dilbert-classics
787
    name = 'dilbert'
788
    long_name = 'Dilbert'
789
    url = 'http://dilbert.com'
790
    get_url_from_link = join_cls_url_to_href
791
    get_first_comic_link = simulate_first_link
792
    first_url = 'http://dilbert.com/strip/1989-04-16'
793
794
    @classmethod
795
    def get_navi_link(cls, last_soup, next_):
796
        """Get link to next or previous comic."""
797
        link = last_soup.find('div', class_='nav-comic nav-right' if next_ else 'nav-comic nav-left')
798
        return link.find('a') if link else None
799
800
    @classmethod
801
    def get_comic_info(cls, soup, link):
802
        """Get information about a particular comics."""
803
        title = soup.find('meta', property='og:title')['content']
804
        imgs = soup.find_all('meta', property='og:image')
805
        desc = soup.find('meta', property='og:description')['content']
806
        date_str = soup.find('meta', property='article:publish_date')['content']
807
        day = string_to_date(date_str, "%B %d, %Y")
808
        author = soup.find('meta', property='article:author')['content']
809
        tags = soup.find('meta', property='article:tag')['content']
810
        return {
811
            'title': title,
812
            'description': desc,
813
            'img': [i['content'] for i in imgs],
814
            'author': author,
815
            'tags': tags,
816
            'day': day.day,
817
            'month': day.month,
818
            'year': day.year
819
        }
820
821
822
class VictimsOfCircumsolar(GenericNavigableComic):
823
    """Class to retrieve VictimsOfCircumsolar comics."""
824
    name = 'circumsolar'
825
    long_name = 'Victims Of Circumsolar'
826
    url = 'http://www.victimsofcircumsolar.com'
827
    get_navi_link = get_a_navi_comicnavnext_navinext
828
    get_first_comic_link = simulate_first_link
829
    first_url = 'http://www.victimsofcircumsolar.com/comic/modern-addiction'
830
831
    @classmethod
832
    def get_comic_info(cls, soup, link):
833
        """Get information about a particular comics."""
834
        # Date is on the archive page
835
        title = soup.find_all('meta', property='og:title')[-1]['content']
836
        desc = soup.find_all('meta', property='og:description')[-1]['content']
837
        imgs = soup.find('div', id='comic').find_all('img')
838
        assert all(i['title'] == i['alt'] == title for i in imgs)
839
        return {
840
            'title': title,
841
            'description': desc,
842
            'img': [i['src'] for i in imgs],
843
        }
844
845
846
class ThreeWordPhrase(GenericNavigableComic):
847
    """Class to retrieve Three Word Phrase comics."""
848
    # Also on http://www.threewordphrase.tumblr.com
849
    name = 'threeword'
850
    long_name = 'Three Word Phrase'
851
    url = 'http://threewordphrase.com'
852
    get_url_from_link = join_cls_url_to_href
853
854
    @classmethod
855
    def get_first_comic_link(cls):
856
        """Get link to first comics."""
857
        return get_soup_at_url(cls.url).find('img', src='/firstlink.gif').parent
858
859
    @classmethod
860
    def get_navi_link(cls, last_soup, next_):
861
        """Get link to next or previous comic."""
862
        link = last_soup.find('img', src='/nextlink.gif' if next_ else '/prevlink.gif').parent
863
        return None if link.get('href') is None else link
864
865
    @classmethod
866
    def get_comic_info(cls, soup, link):
867
        """Get information about a particular comics."""
868
        title = soup.find('title')
869
        imgs = [img for img in soup.find_all('img')
870
                if not img['src'].endswith(
871
                    ('link.gif', '32.png', 'twpbookad.jpg',
872
                     'merchad.jpg', 'header.gif', 'tipjar.jpg'))]
873
        return {
874
            'title': title.string if title else None,
875
            'title2': '  '.join(img.get('alt') for img in imgs if img.get('alt')),
876
            'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs],
877
        }
878
879
880
class DeadlyPanel(GenericEmptyComic, GenericNavigableComic):
881
    """Class to retrieve Deadly Panel comics."""
882
    # Also on https://tapastic.com/series/deadlypanel
883
    name = 'deadly'
884
    long_name = 'Deadly Panel'
885
    url = 'http://www.deadlypanel.com'
886
    get_first_comic_link = get_a_navi_navifirst
887
    get_navi_link = get_a_navi_comicnavnext_navinext
888
889
    @classmethod
890
    def get_comic_info(cls, soup, link):
891
        """Get information about a particular comics."""
892
        imgs = soup.find('div', id='comic').find_all('img')
893
        assert all(i['alt'] == i['title'] for i in imgs)
894
        return {
895
            'img': [i['src'] for i in imgs],
896
        }
897
898
899 View Code Duplication
class TheGentlemanArmchair(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
900
    """Class to retrieve The Gentleman Armchair comics."""
901
    name = 'gentlemanarmchair'
902
    long_name = 'The Gentleman Armchair'
903
    url = 'http://thegentlemansarmchair.com'
904
    get_first_comic_link = get_a_navi_navifirst
905
    get_navi_link = get_link_rel_next
906
907
    @classmethod
908
    def get_comic_info(cls, soup, link):
909
        """Get information about a particular comics."""
910
        title = soup.find('h2', class_='post-title').string
911
        author = soup.find("span", class_="post-author").find("a").string
912
        date_str = soup.find('span', class_='post-date').string
913
        day = string_to_date(date_str, "%B %d, %Y")
914
        imgs = soup.find('div', id='comic').find_all('img')
915
        return {
916
            'img': [i['src'] for i in imgs],
917
            'title': title,
918
            'author': author,
919
            'month': day.month,
920
            'year': day.year,
921
            'day': day.day,
922
        }
923
924
925
class MyExtraLife(GenericNavigableComic):
926
    """Class to retrieve My Extra Life comics."""
927
    name = 'extralife'
928
    long_name = 'My Extra Life'
929
    url = 'http://www.myextralife.com'
930
    get_navi_link = get_link_rel_next
931
932
    @classmethod
933
    def get_first_comic_link(cls):
934
        """Get link to first comics."""
935
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
936
937
    @classmethod
938
    def get_comic_info(cls, soup, link):
939
        """Get information about a particular comics."""
940
        title = soup.find("h1", class_="comic_title").string
941
        date_str = soup.find("span", class_="comic_date").string
942
        day = string_to_date(date_str, "%B %d, %Y")
943
        imgs = soup.find_all("img", class_="comic")
944
        assert all(i['alt'] == i['title'] == title for i in imgs)
945
        return {
946
            'title': title,
947
            'img': [i['src'] for i in imgs if i["src"]],
948
            'day': day.day,
949
            'month': day.month,
950
            'year': day.year
951
        }
952
953
954
class SaturdayMorningBreakfastCereal(GenericNavigableComic):
955
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
956
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
957
    # Also on http://smbc-comics.tumblr.com
958
    name = 'smbc'
959
    long_name = 'Saturday Morning Breakfast Cereal'
960
    url = 'http://www.smbc-comics.com'
961
    _categories = ('SMBC', )
962
    get_navi_link = get_a_rel_next
963
964
    @classmethod
965
    def get_first_comic_link(cls):
966
        """Get link to first comics."""
967
        return get_soup_at_url(cls.url).find('a', rel='start')
968
969
    @classmethod
970
    def get_comic_info(cls, soup, link):
971
        """Get information about a particular comics."""
972
        image1 = soup.find('img', id='cc-comic')
973
        image_url1 = image1['src']
974
        aftercomic = soup.find('div', id='aftercomic')
975
        image_url2 = aftercomic.find('img')['src'] if aftercomic else ''
976
        imgs = [image_url1] + ([image_url2] if image_url2 else [])
977
        date_str = soup.find('div', class_='cc-publishtime').contents[0]
978
        day = string_to_date(date_str, "%B %d, %Y")
979
        return {
980
            'title': image1['title'],
981
            'img': [urljoin_wrapper(cls.url, i) for i in imgs],
982
            'day': day.day,
983
            'month': day.month,
984
            'year': day.year
985
        }
986
987
988
class PerryBibleFellowship(GenericListableComic):
989
    """Class to retrieve Perry Bible Fellowship comics."""
990
    name = 'pbf'
991
    long_name = 'Perry Bible Fellowship'
992
    url = 'http://pbfcomics.com'
993
    get_url_from_archive_element = join_cls_url_to_href
994
995
    @classmethod
996
    def get_archive_elements(cls):
997
        comic_link_re = re.compile('^/[0-9]*/$')
998
        return reversed(get_soup_at_url(cls.url).find_all('a', href=comic_link_re))
999
1000
    @classmethod
1001
    def get_comic_info(cls, soup, link):
1002
        """Get information about a particular comics."""
1003
        url = cls.get_url_from_archive_element(link)
1004
        comic_img_re = re.compile('^/archive_b/PBF.*')
1005
        name = link.string
1006
        num = int(link['name'])
1007
        href = link['href']
1008
        assert href == '/%d/' % num
1009
        imgs = soup.find_all('img', src=comic_img_re)
1010
        assert len(imgs) == 1
1011
        assert imgs[0]['alt'] == name
1012
        return {
1013
            'num': num,
1014
            'name': name,
1015
            'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1016
            'prefix': '%d-' % num,
1017
        }
1018
1019
1020 View Code Duplication
class Mercworks(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1021
    """Class to retrieve Mercworks comics."""
1022
    # Also on http://mercworks.tumblr.com
1023
    name = 'mercworks'
1024
    long_name = 'Mercworks'
1025
    url = 'http://mercworks.net'
1026
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1027
    get_navi_link = get_link_rel_next
1028
1029
    @classmethod
1030
    def get_comic_info(cls, soup, link):
1031
        """Get information about a particular comics."""
1032
        title = soup.find('meta', property='og:title')['content']
1033
        metadesc = soup.find('meta', property='og:description')
1034
        desc = metadesc['content'] if metadesc else ""
1035
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
1036
        day = string_to_date(date_str, "%Y-%m-%d")
1037
        imgs = soup.find_all('meta', property='og:image')
1038
        return {
1039
            'img': [i['content'] for i in imgs],
1040
            'title': title,
1041
            'desc': desc,
1042
            'day': day.day,
1043
            'month': day.month,
1044
            'year': day.year
1045
        }
1046
1047
1048
class BerkeleyMews(GenericListableComic):
1049
    """Class to retrieve Berkeley Mews comics."""
1050
    # Also on http://mews.tumblr.com
1051
    # Also on http://www.gocomics.com/berkeley-mews
1052
    name = 'berkeley'
1053
    long_name = 'Berkeley Mews'
1054
    url = 'http://www.berkeleymews.com'
1055
    _categories = ('BERKELEY', )
1056
    get_url_from_archive_element = get_href
1057
    comic_num_re = re.compile('%s/\\?p=([0-9]*)$' % url)
1058
1059
    @classmethod
1060
    def get_archive_elements(cls):
1061
        archive_url = urljoin_wrapper(cls.url, "?page_id=2")
1062
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_num_re))
1063
1064
    @classmethod
1065
    def get_comic_info(cls, soup, link):
1066
        """Get information about a particular comics."""
1067
        comic_date_re = re.compile('.*/([0-9]*)-([0-9]*)-([0-9]*)-.*')
1068
        url = cls.get_url_from_archive_element(link)
1069
        num = int(cls.comic_num_re.match(url).groups()[0])
1070
        img = soup.find('div', id='comic').find('img')
1071
        assert all(i['alt'] == i['title'] for i in [img])
1072
        title2 = img['title']
1073
        img_url = img['src']
1074
        year, month, day = [int(s) for s in comic_date_re.match(img_url).groups()]
1075
        return {
1076
            'num': num,
1077
            'title': link.string,
1078
            'title2': title2,
1079
            'img': [img_url],
1080
            'year': year,
1081
            'month': month,
1082
            'day': day,
1083
        }
1084
1085
1086
class GenericBouletCorp(GenericNavigableComic):
1087
    """Generic class to retrieve BouletCorp comics in different languages."""
1088
    # Also on http://bouletcorp.tumblr.com
1089
    _categories = ('BOULET', )
1090
    get_navi_link = get_link_rel_next
1091
1092
    @classmethod
1093
    def get_first_comic_link(cls):
1094
        """Get link to first comics."""
1095
        return get_soup_at_url(cls.url).find('div', id='centered_nav').find_all('a')[0]
1096
1097
    @classmethod
1098
    def get_comic_info(cls, soup, link):
1099
        """Get information about a particular comics."""
1100
        url = cls.get_url_from_link(link)
1101
        date_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/' % cls.url)
1102
        year, month, day = [int(s) for s in date_re.match(url).groups()]
1103
        imgs = soup.find('div', id='notes').find('div', class_='storycontent').find_all('img')
1104
        texts = '  '.join(t for t in (i.get('title') for i in imgs) if t)
1105
        title = soup.find('title').string
1106
        return {
1107
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs if i.get('src') is not None],
1108
            'title': title,
1109
            'texts': texts,
1110
            'year': year,
1111
            'month': month,
1112
            'day': day,
1113
        }
1114
1115
1116
class BouletCorp(GenericBouletCorp):
1117
    """Class to retrieve BouletCorp comics."""
1118
    name = 'boulet'
1119
    long_name = 'Boulet Corp'
1120
    url = 'http://www.bouletcorp.com'
1121
    _categories = ('FRANCAIS', )
1122
1123
1124
class BouletCorpEn(GenericBouletCorp):
1125
    """Class to retrieve EnglishBouletCorp comics."""
1126
    name = 'boulet_en'
1127
    long_name = 'Boulet Corp English'
1128
    url = 'http://english.bouletcorp.com'
1129
1130
1131 View Code Duplication
class AmazingSuperPowers(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1132
    """Class to retrieve Amazing Super Powers comics."""
1133
    name = 'asp'
1134
    long_name = 'Amazing Super Powers'
1135
    url = 'http://www.amazingsuperpowers.com'
1136
    get_first_comic_link = get_a_navi_navifirst
1137
    get_navi_link = get_a_navi_navinext
1138
1139
    @classmethod
1140
    def get_comic_info(cls, soup, link):
1141
        """Get information about a particular comics."""
1142
        author = soup.find("span", class_="post-author").find("a").string
1143
        date_str = soup.find('span', class_='post-date').string
1144
        day = string_to_date(date_str, "%B %d, %Y")
1145
        imgs = soup.find('div', id='comic').find_all('img')
1146
        title = ' '.join(i['title'] for i in imgs)
1147
        assert all(i['alt'] == i['title'] for i in imgs)
1148
        return {
1149
            'title': title,
1150
            'author': author,
1151
            'img': [img['src'] for img in imgs],
1152
            'day': day.day,
1153
            'month': day.month,
1154
            'year': day.year
1155
        }
1156
1157
1158
class ToonHole(GenericNavigableComic):
1159
    """Class to retrieve Toon Holes comics."""
1160
    # Also on http://tapastic.com/series/TOONHOLE
1161
    name = 'toonhole'
1162
    long_name = 'Toon Hole'
1163
    url = 'http://www.toonhole.com'
1164
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1165
    get_navi_link = get_link_rel_next
1166
1167
    @classmethod
1168
    def get_comic_info(cls, soup, link):
1169
        """Get information about a particular comics."""
1170
        short_url = soup.find('link', rel='shortlink')['href']
1171
        date_str = soup.find('div', class_='entry-meta').contents[0].strip()
1172
        day = string_to_date(date_str, "%B %d, %Y")
1173
        imgs = soup.find('div', id='comic').find_all('img')
1174
        if imgs:
1175
            img = imgs[0]
1176
            title = img['alt']
1177
            assert img['title'] == title
1178
        else:
1179
            title = ""
1180
        return {
1181
            'short_url': short_url,
1182
            'title': title,
1183
            'month': day.month,
1184
            'year': day.year,
1185
            'day': day.day,
1186
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs],
1187
        }
1188
1189
1190
class Channelate(GenericNavigableComic):
1191
    """Class to retrieve Channelate comics."""
1192
    name = 'channelate'
1193
    long_name = 'Channelate'
1194
    url = 'http://www.channelate.com'
1195
    get_first_comic_link = get_div_navfirst_a
1196
    get_navi_link = get_link_rel_next
1197
    get_url_from_link = join_cls_url_to_href
1198
1199
    @classmethod
1200
    def get_comic_info(cls, soup, link):
1201
        """Get information about a particular comics."""
1202
        author = soup.find("span", class_="post-author").find("a").string
1203
        date_str = soup.find('span', class_='post-date').string
1204
        day = string_to_date(date_str, '%Y/%m/%d')
1205
        title = soup.find('meta', property='og:title')['content']
1206
        post = soup.find('div', id='comic')
1207
        imgs = post.find_all('img') if post else []
1208
        extra_url = None
1209
        extra_div = soup.find('div', id='extrapanelbutton')
1210
        if extra_div:
1211
            extra_url = extra_div.find('a')['href']
1212
            extra_soup = get_soup_at_url(extra_url)
1213
            extra_imgs = extra_soup.find_all('img', class_='extrapanelimage')
1214
            imgs.extend(extra_imgs)
1215
        return {
1216
            'url_extra': extra_url,
1217
            'title': title,
1218
            'author': author,
1219
            'month': day.month,
1220
            'year': day.year,
1221
            'day': day.day,
1222
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1223
        }
1224
1225
1226
class CyanideAndHappiness(GenericNavigableComic):
1227
    """Class to retrieve Cyanide And Happiness comics."""
1228
    name = 'cyanide'
1229
    long_name = 'Cyanide and Happiness'
1230
    url = 'http://explosm.net'
1231
    _categories = ('NSFW', )
1232
    get_url_from_link = join_cls_url_to_href
1233
1234
    @classmethod
1235
    def get_first_comic_link(cls):
1236
        """Get link to first comics."""
1237
        return get_soup_at_url(cls.url).find('a', title='Oldest comic')
1238
1239
    @classmethod
1240
    def get_navi_link(cls, last_soup, next_):
1241
        """Get link to next or previous comic."""
1242
        link = last_soup.find('a', class_='next-comic' if next_ else 'previous-comic ')
1243
        return None if link.get('href') is None else link
1244
1245
    @classmethod
1246
    def get_comic_info(cls, soup, link):
1247
        """Get information about a particular comics."""
1248
        url2 = soup.find('meta', property='og:url')['content']
1249
        num = int(url2.split('/')[-2])
1250
        date_str = soup.find('h3').find('a').string
1251
        day = string_to_date(date_str, '%Y.%m.%d')
1252
        author = soup.find('small', class_="author-credit-name").string
1253
        assert author.startswith('by ')
1254
        author = author[3:]
1255
        imgs = soup.find_all('img', id='main-comic')
1256
        return {
1257
            'num': num,
1258
            'author': author,
1259
            'month': day.month,
1260
            'year': day.year,
1261
            'day': day.day,
1262
            'prefix': '%d-' % num,
1263
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i['src'])) for i in imgs]
1264
        }
1265
1266
1267
class MrLovenstein(GenericComic):
1268
    """Class to retrieve Mr Lovenstein comics."""
1269
    # Also on https://tapastic.com/series/MrLovenstein
1270
    name = 'mrlovenstein'
1271
    long_name = 'Mr. Lovenstein'
1272
    url = 'http://www.mrlovenstein.com'
1273
1274
    @classmethod
1275
    def get_next_comic(cls, last_comic):
1276
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1277
        # TODO: more info from http://www.mrlovenstein.com/archive
1278
        comic_num_re = re.compile('^/comic/([0-9]*)$')
1279
        nums = [int(comic_num_re.match(link['href']).groups()[0])
1280
                for link in get_soup_at_url(cls.url).find_all('a', href=comic_num_re)]
1281
        first, last = min(nums), max(nums)
1282
        if last_comic:
1283
            first = last_comic['num'] + 1
1284
        for num in range(first, last + 1):
1285
            url = urljoin_wrapper(cls.url, '/comic/%d' % num)
1286
            soup = get_soup_at_url(url)
1287
            imgs = list(
1288
                reversed(soup.find_all('img', src=re.compile('^/images/comics/'))))
1289
            description = soup.find('meta', attrs={'name': 'description'})['content']
1290
            yield {
1291
                'url': url,
1292
                'num': num,
1293
                'texts': '  '.join(t for t in (i.get('title') for i in imgs) if t),
1294
                'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1295
                'description': description,
1296
            }
1297
1298
1299
class DinosaurComics(GenericListableComic):
1300
    """Class to retrieve Dinosaur Comics comics."""
1301
    name = 'dinosaur'
1302
    long_name = 'Dinosaur Comics'
1303
    url = 'http://www.qwantz.com'
1304
    get_url_from_archive_element = get_href
1305
    comic_link_re = re.compile('^%s/index.php\\?comic=([0-9]*)$' % url)
1306
1307
    @classmethod
1308
    def get_archive_elements(cls):
1309
        archive_url = urljoin_wrapper(cls.url, 'archive.php')
1310
        # first link is random -> skip it
1311
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re)[1:])
1312
1313
    @classmethod
1314
    def get_comic_info(cls, soup, link):
1315
        """Get information about a particular comics."""
1316
        url = cls.get_url_from_archive_element(link)
1317
        num = int(cls.comic_link_re.match(url).groups()[0])
1318
        date_str = link.string
1319
        text = link.next_sibling.string
1320
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1321
        comic_img_re = re.compile('^%s/comics/' % cls.url)
1322
        img = soup.find('img', src=comic_img_re)
1323
        return {
1324
            'month': day.month,
1325
            'year': day.year,
1326
            'day': day.day,
1327 View Code Duplication
            'img': [img.get('src')],
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1328
            'title': img.get('title'),
1329
            'text': text,
1330
            'num': num,
1331
        }
1332
1333
1334
class ButterSafe(GenericListableComic):
1335
    """Class to retrieve Butter Safe comics."""
1336
    name = 'butter'
1337
    long_name = 'ButterSafe'
1338
    url = 'http://buttersafe.com'
1339
    get_url_from_archive_element = get_href
1340
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)
1341
1342
    @classmethod
1343
    def get_archive_elements(cls):
1344
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1345
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
1346
1347
    @classmethod
1348
    def get_comic_info(cls, soup, link):
1349
        """Get information about a particular comics."""
1350
        url = cls.get_url_from_archive_element(link)
1351
        title = link.string
1352
        year, month, day = [int(s) for s in cls.comic_link_re.match(url).groups()]
1353
        img = soup.find('div', id='comic').find('img')
1354
        assert img['alt'] == title
1355
        return {
1356
            'title': title,
1357
            'day': day,
1358
            'month': month,
1359
            'year': year,
1360
            'img': [img['src']],
1361
        }
1362
1363
1364
class CalvinAndHobbes(GenericComic):
1365
    """Class to retrieve Calvin and Hobbes comics."""
1366
    # Also on http://www.gocomics.com/calvinandhobbes/
1367
    name = 'calvin'
1368
    long_name = 'Calvin and Hobbes'
1369
    # This is not through any official webpage but eh...
1370
    url = 'http://marcel-oehler.marcellosendos.ch/comics/ch/'
1371
1372
    @classmethod
1373
    def get_next_comic(cls, last_comic):
1374
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1375
        last_date = get_date_for_comic(
1376
            last_comic) if last_comic else date(1985, 11, 1)
1377
        link_re = re.compile('^([0-9]*)/([0-9]*)/')
1378
        img_re = re.compile('')
1379
        for link in get_soup_at_url(cls.url).find_all('a', href=link_re):
1380
            url = link['href']
1381
            year, month = link_re.match(url).groups()
1382
            if date(int(year), int(month), 1) + timedelta(days=31) >= last_date:
1383
                img_re = re.compile('^%s%s([0-9]*)' % (year, month))
1384
                month_url = urljoin_wrapper(cls.url, url)
1385
                for img in get_soup_at_url(month_url).find_all('img', src=img_re):
1386
                    img_src = img['src']
1387
                    day = int(img_re.match(img_src).groups()[0])
1388
                    comic_date = date(int(year), int(month), day)
1389
                    if comic_date > last_date:
1390
                        yield {
1391
                            'url': month_url,
1392
                            'year': int(year),
1393 View Code Duplication
                            'month': int(month),
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1394
                            'day': int(day),
1395
                            'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)],
1396
                        }
1397
                        last_date = comic_date
1398
1399
1400
class AbstruseGoose(GenericListableComic):
1401
    """Class to retrieve AbstruseGoose Comics."""
1402
    name = 'abstruse'
1403
    long_name = 'Abstruse Goose'
1404
    url = 'http://abstrusegoose.com'
1405
    get_url_from_archive_element = get_href
1406
    comic_url_re = re.compile('^%s/([0-9]*)$' % url)
1407
    comic_img_re = re.compile('^%s/strips/.*' % url)
1408
1409
    @classmethod
1410
    def get_archive_elements(cls):
1411
        archive_url = urljoin_wrapper(cls.url, 'archive')
1412
        return get_soup_at_url(archive_url).find_all('a', href=cls.comic_url_re)
1413
1414
    @classmethod
1415
    def get_comic_info(cls, soup, archive_elt):
1416
        comic_url = cls.get_url_from_archive_element(archive_elt)
1417
        num = int(cls.comic_url_re.match(comic_url).groups()[0])
1418
        return {
1419
            'num': num,
1420
            'title': archive_elt.string,
1421
            'img': [soup.find('img', src=cls.comic_img_re)['src']]
1422
        }
1423
1424
1425
class PhDComics(GenericNavigableComic):
1426
    """Class to retrieve PHD Comics."""
1427
    name = 'phd'
1428
    long_name = 'PhD Comics'
1429
    url = 'http://phdcomics.com/comics/archive.php'
1430
1431
    @classmethod
1432
    def get_first_comic_link(cls):
1433
        """Get link to first comics."""
1434
        soup = get_soup_at_url(cls.url)
1435
        img = soup.find('img', src='http://phdcomics.com/comics/images/first_button.gif')
1436
        return None if img is None else img.parent
1437
1438
    @classmethod
1439
    def get_navi_link(cls, last_soup, next_):
1440
        """Get link to next or previous comic."""
1441
        url = 'http://phdcomics.com/comics/images/%s_button.gif' % ('next' if next_ else 'prev')
1442
        img = last_soup.find('img', src=url)
1443
        return None if img is None else img.parent
1444
1445
    @classmethod
1446
    def get_comic_info(cls, soup, link):
1447
        """Get information about a particular comics."""
1448
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
1449
        imgs = soup.find_all('meta', property='og:image')
1450
        return {
1451
            'img': [i['content'] for i in imgs],
1452
            'title': title,
1453
        }
1454
1455
1456
class Octopuns(GenericEmptyComic, GenericNavigableComic):
1457
    """Class to retrieve Octopuns comics."""
1458
    # Also on http://octopuns.tumblr.com
1459
    name = 'octopuns'
1460
    long_name = 'Octopuns'
1461
    url = 'http://www.octopuns.net'
1462
1463
    @classmethod
1464
    def get_first_comic_link(cls):
1465 View Code Duplication
        """Get link to first comics."""
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1466
        return get_soup_at_url(cls.url).find('img', src=re.compile('.*/First.png')).parent
1467
1468
    @classmethod
1469
    def get_navi_link(cls, last_soup, next_):
1470
        """Get link to next or previous comic."""
1471
        link = last_soup.find('img', src=re.compile('.*/Next.png' if next_ else '.*/Back.png')).parent
1472
        return None if link.get('href') is None else link
1473
1474
    @classmethod
1475
    def get_comic_info(cls, soup, link):
1476
        """Get information about a particular comics."""
1477
        title = soup.find('h3', class_='post-title entry-title').string
1478
        date_str = soup.find('h2', class_='date-header').string
1479
        day = string_to_date(date_str, "%A, %B %d, %Y")
1480
        imgs = soup.find_all('link', rel='image_src')
1481
        return {
1482
            'img': [i['href'] for i in imgs],
1483
            'title': title,
1484
            'day': day.day,
1485
            'month': day.month,
1486
            'year': day.year,
1487
        }
1488
1489
1490
class Quarktees(GenericNavigableComic):
1491
    """Class to retrieve the Quarktees comics."""
1492
    name = 'quarktees'
1493
    long_name = 'Quarktees'
1494
    url = 'http://www.quarktees.com/blogs/news'
1495
    get_url_from_link = join_cls_url_to_href
1496
    get_first_comic_link = simulate_first_link
1497
    first_url = 'http://www.quarktees.com/blogs/news/12486621-coming-soon'
1498
1499
    @classmethod
1500
    def get_navi_link(cls, last_soup, next_):
1501
        """Get link to next or previous comic."""
1502
        return last_soup.find('a', id='article-next' if next_ else 'article-prev')
1503
1504
    @classmethod
1505
    def get_comic_info(cls, soup, link):
1506
        """Get information about a particular comics."""
1507
        title = soup.find('meta', property='og:title')['content']
1508
        article = soup.find('div', class_='single-article')
1509
        imgs = article.find_all('img')
1510
        return {
1511
            'title': title,
1512
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1513
        }
1514
1515
1516
class OverCompensating(GenericNavigableComic):
1517
    """Class to retrieve the Over Compensating comics."""
1518
    name = 'compensating'
1519
    long_name = 'Over Compensating'
1520
    url = 'http://www.overcompensating.com'
1521
    get_url_from_link = join_cls_url_to_href
1522
1523
    @classmethod
1524
    def get_first_comic_link(cls):
1525
        """Get link to first comics."""
1526
        return get_soup_at_url(cls.url).find('a', href=re.compile('comic=1$'))
1527
1528
    @classmethod
1529
    def get_navi_link(cls, last_soup, next_):
1530
        """Get link to next or previous comic."""
1531
        return last_soup.find('a', title='next comic' if next_ else 'go back already')
1532
1533
    @classmethod
1534
    def get_comic_info(cls, soup, link):
1535
        """Get information about a particular comics."""
1536
        img_src_re = re.compile('^/oc/comics/.*')
1537
        comic_num_re = re.compile('.*comic=([0-9]*)$')
1538
        comic_url = cls.get_url_from_link(link)
1539
        num = int(comic_num_re.match(comic_url).groups()[0])
1540
        img = soup.find('img', src=img_src_re)
1541
        return {
1542
            'num': num,
1543
            'img': [urljoin_wrapper(comic_url, img['src'])],
1544
            'title': img.get('title')
1545
        }
1546
1547
1548
class Oglaf(GenericNavigableComic):
1549
    """Class to retrieve Oglaf comics."""
1550
    name = 'oglaf'
1551
    long_name = 'Oglaf [NSFW]'
1552
    url = 'http://oglaf.com'
1553
    _categories = ('NSFW', )
1554
    get_url_from_link = join_cls_url_to_href
1555
1556
    @classmethod
1557
    def get_first_comic_link(cls):
1558
        """Get link to first comics."""
1559
        return get_soup_at_url(cls.url).find("div", id="st").parent
1560
1561
    @classmethod
1562
    def get_navi_link(cls, last_soup, next_):
1563
        """Get link to next or previous comic."""
1564
        div = last_soup.find("div", id="nx" if next_ else "pvs")
1565
        return div.parent if div else None
1566
1567
    @classmethod
1568
    def get_comic_info(cls, soup, link):
1569
        """Get information about a particular comics."""
1570
        title = soup.find('title').string
1571
        title_imgs = soup.find('div', id='tt').find_all('img')
1572
        assert len(title_imgs) == 1
1573
        strip_imgs = soup.find_all('img', id='strip')
1574
        assert len(strip_imgs) == 1
1575
        imgs = title_imgs + strip_imgs
1576
        desc = ' '.join(i['title'] for i in imgs)
1577
        return {
1578
            'title': title,
1579
            'img': [i['src'] for i in imgs],
1580
            'description': desc,
1581
        }
1582
1583
1584
class ScandinaviaAndTheWorld(GenericNavigableComic):
1585
    """Class to retrieve Scandinavia And The World comics."""
1586
    name = 'satw'
1587
    long_name = 'Scandinavia And The World'
1588
    url = 'http://satwcomic.com'
1589
    get_first_comic_link = simulate_first_link
1590
    first_url = 'http://satwcomic.com/sweden-denmark-and-norway'
1591
1592
    @classmethod
1593
    def get_navi_link(cls, last_soup, next_):
1594
        """Get link to next or previous comic."""
1595
        return last_soup.find('a', accesskey='n' if next_ else 'p')
1596
1597
    @classmethod
1598
    def get_comic_info(cls, soup, link):
1599
        """Get information about a particular comics."""
1600
        title = soup.find('meta', attrs={'name': 'twitter:label1'})['content']
1601
        desc = soup.find('meta', property='og:description')['content']
1602
        imgs = soup.find_all('img', itemprop="image")
1603
        return {
1604
            'title': title,
1605
            'description': desc,
1606
            'img': [i['src'] for i in imgs],
1607
        }
1608
1609
1610
class SomethingOfThatIlk(GenericEmptyComic):  # Does not exist anymore
1611
    """Class to retrieve the Something Of That Ilk comics."""
1612
    name = 'somethingofthatilk'
1613
    long_name = 'Something Of That Ilk'
1614
    url = 'http://www.somethingofthatilk.com'
1615
1616
1617
class InfiniteMonkeyBusiness(GenericNavigableComic):
1618
    """Generic class to retrieve InfiniteMonkeyBusiness comics."""
1619
    name = 'monkey'
1620
    long_name = 'Infinite Monkey Business'
1621
    url = 'http://infinitemonkeybusiness.net'
1622
    get_navi_link = get_a_navi_comicnavnext_navinext
1623
    get_first_comic_link = simulate_first_link
1624
    first_url = 'http://infinitemonkeybusiness.net/comic/pillory/'
1625
1626
    @classmethod
1627
    def get_comic_info(cls, soup, link):
1628
        """Get information about a particular comics."""
1629
        title = soup.find('meta', property='og:title')['content']
1630
        imgs = soup.find('div', id='comic').find_all('img')
1631
        return {
1632
            'title': title,
1633
            'img': [i['src'] for i in imgs],
1634
        }
1635
1636
1637
class Wondermark(GenericListableComic):
1638
    """Class to retrieve the Wondermark comics."""
1639
    name = 'wondermark'
1640
    long_name = 'Wondermark'
1641
    url = 'http://wondermark.com'
1642
    get_url_from_archive_element = get_href
1643
1644
    @classmethod
1645
    def get_archive_elements(cls):
1646
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1647
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1648
1649
    @classmethod
1650 View Code Duplication
    def get_comic_info(cls, soup, link):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1651
        """Get information about a particular comics."""
1652
        date_str = soup.find('div', class_='postdate').find('em').string
1653
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1654
        div = soup.find('div', id='comic')
1655
        if div:
1656
            img = div.find('img')
1657
            img_src = [img['src']]
1658
            alt = img['alt']
1659
            assert alt == img['title']
1660
            title = soup.find('meta', property='og:title')['content']
1661
        else:
1662
            img_src = []
1663
            alt = ''
1664
            title = ''
1665
        return {
1666
            'month': day.month,
1667
            'year': day.year,
1668
            'day': day.day,
1669
            'img': img_src,
1670
            'title': title,
1671
            'alt': alt,
1672
            'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')),
1673
        }
1674
1675
1676
class WarehouseComic(GenericNavigableComic):
1677
    """Class to retrieve Warehouse Comic comics."""
1678
    name = 'warehouse'
1679
    long_name = 'Warehouse Comic'
1680
    url = 'http://warehousecomic.com'
1681
    get_first_comic_link = get_a_navi_navifirst
1682
    get_navi_link = get_link_rel_next
1683
1684
    @classmethod
1685
    def get_comic_info(cls, soup, link):
1686
        """Get information about a particular comics."""
1687
        title = soup.find('h2', class_='post-title').string
1688
        date_str = soup.find('span', class_='post-date').string
1689
        day = string_to_date(date_str, "%B %d, %Y")
1690
        imgs = soup.find('div', id='comic').find_all('img')
1691
        return {
1692
            'img': [i['src'] for i in imgs],
1693
            'title': title,
1694
            'day': day.day,
1695
            'month': day.month,
1696
            'year': day.year,
1697
        }
1698
1699
1700
class JustSayEh(GenericNavigableComic):
1701
    """Class to retrieve Just Say Eh comics."""
1702
    # Also on http//tapastic.com/series/Just-Say-Eh
1703
    name = 'justsayeh'
1704
    long_name = 'Just Say Eh'
1705
    url = 'http://www.justsayeh.com'
1706
    get_first_comic_link = get_a_navi_navifirst
1707
    get_navi_link = get_a_navi_comicnavnext_navinext
1708
1709
    @classmethod
1710
    def get_comic_info(cls, soup, link):
1711
        """Get information about a particular comics."""
1712
        title = soup.find('h2', class_='post-title').string
1713
        imgs = soup.find("div", id="comic").find_all("img")
1714
        assert all(i['alt'] == i['title'] for i in imgs)
1715
        alt = imgs[0]['alt']
1716
        return {
1717
            'img': [i['src'] for i in imgs],
1718
            'title': title,
1719
            'alt': alt,
1720
        }
1721
1722
1723 View Code Duplication
class MouseBearComedy(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1724
    """Class to retrieve Mouse Bear Comedy comics."""
1725
    # Also on http://mousebearcomedy.tumblr.com
1726
    name = 'mousebear'
1727
    long_name = 'Mouse Bear Comedy'
1728
    url = 'http://www.mousebearcomedy.com'
1729
    get_first_comic_link = get_a_navi_navifirst
1730
    get_navi_link = get_a_navi_comicnavnext_navinext
1731
1732
    @classmethod
1733
    def get_comic_info(cls, soup, link):
1734
        """Get information about a particular comics."""
1735
        title = soup.find('h2', class_='post-title').string
1736
        author = soup.find("span", class_="post-author").find("a").string
1737
        date_str = soup.find("span", class_="post-date").string
1738
        day = string_to_date(date_str, '%B %d, %Y')
1739
        imgs = soup.find("div", id="comic").find_all("img")
1740
        assert all(i['alt'] == i['title'] == title for i in imgs)
1741
        return {
1742
            'day': day.day,
1743
            'month': day.month,
1744
            'year': day.year,
1745
            'img': [i['src'] for i in imgs],
1746
            'title': title,
1747
            'author': author,
1748
        }
1749
1750
1751 View Code Duplication
class BigFootJustice(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1752
    """Class to retrieve Big Foot Justice comics."""
1753
    # Also on http://tapastic.com/series/bigfoot-justice
1754
    name = 'bigfoot'
1755
    long_name = 'Big Foot Justice'
1756
    url = 'http://bigfootjustice.com'
1757
    get_first_comic_link = get_a_navi_navifirst
1758
    get_navi_link = get_a_navi_comicnavnext_navinext
1759
1760
    @classmethod
1761
    def get_comic_info(cls, soup, link):
1762
        """Get information about a particular comics."""
1763
        imgs = soup.find('div', id='comic').find_all('img')
1764
        assert all(i['title'] == i['alt'] for i in imgs)
1765
        title = ' '.join(i['title'] for i in imgs)
1766
        return {
1767
            'img': [i['src'] for i in imgs],
1768
            'title': title,
1769
        }
1770
1771
1772
class RespawnComic(GenericNavigableComic):
1773
    """Class to retrieve Respawn Comic."""
1774
    # Also on http://respawncomic.tumblr.com
1775
    name = 'respawn'
1776
    long_name = 'Respawn Comic'
1777
    url = 'http://respawncomic.com '
1778
    _categories = ('RESPAWN', )
1779
    get_navi_link = get_a_rel_next
1780
    get_first_comic_link = simulate_first_link
1781
    first_url = 'http://respawncomic.com/comic/c0001/'
1782
1783 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1784
    def get_comic_info(cls, soup, link):
1785
        """Get information about a particular comics."""
1786
        title = soup.find('meta', property='og:title')['content']
1787
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1788
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1789
        date_str = date_str[:10]
1790
        day = string_to_date(date_str, "%Y-%m-%d")
1791
        imgs = soup.find_all('meta', property='og:image')
1792
        skip_imgs = {
1793
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1794
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1795
        }
1796
        return {
1797
            'title': title,
1798
            'author': author,
1799
            'day': day.day,
1800
            'month': day.month,
1801
            'year': day.year,
1802
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1803
        }
1804
1805
1806 View Code Duplication
class SafelyEndangered(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1807
    """Class to retrieve Safely Endangered comics."""
1808
    # Also on http://tumblr.safelyendangered.com
1809
    name = 'endangered'
1810
    long_name = 'Safely Endangered'
1811
    url = 'http://www.safelyendangered.com'
1812
    get_navi_link = get_link_rel_next
1813
    get_first_comic_link = simulate_first_link
1814
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1815
1816
    @classmethod
1817
    def get_comic_info(cls, soup, link):
1818
        """Get information about a particular comics."""
1819
        title = soup.find('h2', class_='post-title').string
1820
        date_str = soup.find('span', class_='post-date').string
1821
        day = string_to_date(date_str, '%B %d, %Y')
1822
        imgs = soup.find('div', id='comic').find_all('img')
1823
        alt = imgs[0]['alt']
1824
        assert all(i['alt'] == i['title'] for i in imgs)
1825
        return {
1826
            'day': day.day,
1827
            'month': day.month,
1828
            'year': day.year,
1829
            'img': [i['src'] for i in imgs],
1830
            'title': title,
1831
            'alt': alt,
1832
        }
1833
1834
1835 View Code Duplication
class PicturesInBoxes(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1836
    """Class to retrieve Pictures In Boxes comics."""
1837
    # Also on http://picturesinboxescomic.tumblr.com
1838
    name = 'picturesinboxes'
1839
    long_name = 'Pictures in Boxes'
1840
    url = 'http://www.picturesinboxes.com'
1841
    get_navi_link = get_a_navi_navinext
1842
    get_first_comic_link = simulate_first_link
1843
    first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'
1844
1845
    @classmethod
1846
    def get_comic_info(cls, soup, link):
1847
        """Get information about a particular comics."""
1848
        title = soup.find('h2', class_='post-title').string
1849
        author = soup.find("span", class_="post-author").find("a").string
1850
        date_str = soup.find('span', class_='post-date').string
1851
        day = string_to_date(date_str, '%B %d, %Y')
1852
        imgs = soup.find('div', class_='comicpane').find_all('img')
1853
        assert imgs
1854
        assert all(i['title'] == i['alt'] == title for i in imgs)
1855
        return {
1856
            'day': day.day,
1857
            'month': day.month,
1858
            'year': day.year,
1859
            'img': [i['src'] for i in imgs],
1860
            'title': title,
1861
            'author': author,
1862
        }
1863
1864
1865
class Penmen(GenericEmptyComic):
1866
    """Class to retrieve Penmen comics."""
1867
    name = 'penmen'
1868
    long_name = 'Penmen'
1869
    url = 'http://penmen.com'
1870
1871
1872
class TheDoghouseDiaries(GenericNavigableComic):
1873
    """Class to retrieve The Dog House Diaries comics."""
1874
    name = 'doghouse'
1875
    long_name = 'The Dog House Diaries'
1876
    url = 'http://thedoghousediaries.com'
1877
1878
    @classmethod
1879
    def get_first_comic_link(cls):
1880
        """Get link to first comics."""
1881
        return get_soup_at_url(cls.url).find('a', id='firstlink')
1882
1883
    @classmethod
1884
    def get_navi_link(cls, last_soup, next_):
1885
        """Get link to next or previous comic."""
1886
        return last_soup.find('a', id='nextlink' if next_ else 'previouslink')
1887
1888
    @classmethod
1889
    def get_comic_info(cls, soup, link):
1890
        """Get information about a particular comics."""
1891
        comic_img_re = re.compile('^dhdcomics/.*')
1892
        img = soup.find('img', src=comic_img_re)
1893
        comic_url = cls.get_url_from_link(link)
1894
        return {
1895
            'title': soup.find('h2', id='titleheader').string,
1896
            'title2': soup.find('div', id='subtext').string,
1897
            'alt': img.get('title'),
1898
            'img': [urljoin_wrapper(comic_url, img['src'].strip())],
1899
            'num': int(comic_url.split('/')[-1]),
1900
        }
1901
1902
1903
class InvisibleBread(GenericListableComic):
1904
    """Class to retrieve Invisible Bread comics."""
1905
    # Also on http://www.gocomics.com/invisible-bread
1906
    name = 'invisiblebread'
1907
    long_name = 'Invisible Bread'
1908
    url = 'http://invisiblebread.com'
1909
1910
    @classmethod
1911
    def get_archive_elements(cls):
1912
        archive_url = urljoin_wrapper(cls.url, 'archives/')
1913
        return reversed(get_soup_at_url(archive_url).find_all('td', class_='archive-title'))
1914 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1915
    @classmethod
1916
    def get_url_from_archive_element(cls, td):
1917
        return td.find('a')['href']
1918
1919
    @classmethod
1920
    def get_comic_info(cls, soup, td):
1921
        """Get information about a particular comics."""
1922
        url = cls.get_url_from_archive_element(td)
1923
        title = td.find('a').string
1924
        month_and_day = td.previous_sibling.string
1925
        link_re = re.compile('^%s/([0-9]+)/' % cls.url)
1926
        year = link_re.match(url).groups()[0]
1927
        date_str = month_and_day + ' ' + year
1928
        day = string_to_date(date_str, '%b %d %Y')
1929
        imgs = [soup.find('div', id='comic').find('img')]
1930
        assert len(imgs) == 1
1931
        assert all(i['title'] == i['alt'] == title for i in imgs)
1932
        return {
1933
            'month': day.month,
1934
            'year': day.year,
1935
            'day': day.day,
1936
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1937
            'title': title,
1938
        }
1939
1940
1941
class DiscoBleach(GenericEmptyComic):  # Does not work anymore
1942
    """Class to retrieve Disco Bleach Comics."""
1943
    name = 'discobleach'
1944
    long_name = 'Disco Bleach'
1945
    url = 'http://discobleach.com'
1946
1947
1948
class TubeyToons(GenericEmptyComic):  # Does not work anymore
1949
    """Class to retrieve TubeyToons comics."""
1950
    # Also on http://tapastic.com/series/Tubey-Toons
1951
    # Also on http://tubeytoons.tumblr.com
1952
    name = 'tubeytoons'
1953
    long_name = 'Tubey Toons'
1954
    url = 'http://tubeytoons.com'
1955
    _categories = ('TUNEYTOONS', )
1956
1957
1958
class CompletelySeriousComics(GenericNavigableComic):
1959
    """Class to retrieve Completely Serious comics."""
1960
    name = 'completelyserious'
1961
    long_name = 'Completely Serious Comics'
1962
    url = 'http://completelyseriouscomics.com'
1963
    get_first_comic_link = get_a_navi_navifirst
1964
    get_navi_link = get_a_navi_navinext
1965
1966
    @classmethod
1967
    def get_comic_info(cls, soup, link):
1968
        """Get information about a particular comics."""
1969
        title = soup.find('h2', class_='post-title').string
1970
        author = soup.find('span', class_='post-author').contents[1].string
1971
        date_str = soup.find('span', class_='post-date').string
1972
        day = string_to_date(date_str, '%B %d, %Y')
1973
        imgs = soup.find('div', class_='comicpane').find_all('img')
1974
        assert imgs
1975
        alt = imgs[0]['title']
1976
        assert all(i['title'] == i['alt'] == alt for i in imgs)
1977
        return {
1978
            'month': day.month,
1979
            'year': day.year,
1980
            'day': day.day,
1981
            'img': [i['src'] for i in imgs],
1982
            'title': title,
1983
            'alt': alt,
1984
            'author': author,
1985
        }
1986
1987
1988
class PoorlyDrawnLines(GenericListableComic):
1989
    """Class to retrieve Poorly Drawn Lines comics."""
1990
    # Also on http://pdlcomics.tumblr.com
1991
    name = 'poorlydrawn'
1992
    long_name = 'Poorly Drawn Lines'
1993
    url = 'http://poorlydrawnlines.com'
1994
    _categories = ('POORLYDRAWN', )
1995
    get_url_from_archive_element = get_href
1996
1997
    @classmethod
1998
    def get_comic_info(cls, soup, link):
1999
        """Get information about a particular comics."""
2000
        imgs = soup.find('div', class_='post').find_all('img')
2001
        assert len(imgs) <= 1
2002
        return {
2003
            'img': [i['src'] for i in imgs],
2004
            'title': imgs[0].get('title', "") if imgs else "",
2005
        }
2006
2007
    @classmethod
2008
    def get_archive_elements(cls):
2009
        archive_url = urljoin_wrapper(cls.url, 'archive')
2010
        url_re = re.compile('^%s/comic/.' % cls.url)
2011
        return reversed(get_soup_at_url(archive_url).find_all('a', href=url_re))
2012
2013
2014
class LoadingComics(GenericNavigableComic):
2015
    """Class to retrieve Loading Artist comics."""
2016
    name = 'loadingartist'
2017
    long_name = 'Loading Artist'
2018
    url = 'http://www.loadingartist.com/latest'
2019
2020
    @classmethod
2021
    def get_first_comic_link(cls):
2022
        """Get link to first comics."""
2023 View Code Duplication
        return get_soup_at_url(cls.url).find('a', title="First")
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2024
2025
    @classmethod
2026
    def get_navi_link(cls, last_soup, next_):
2027
        """Get link to next or previous comic."""
2028
        return last_soup.find('a', title='Next' if next_ else 'Previous')
2029
2030
    @classmethod
2031
    def get_comic_info(cls, soup, link):
2032
        """Get information about a particular comics."""
2033
        title = soup.find('h1').string
2034
        date_str = soup.find('span', class_='date').string.strip()
2035
        day = string_to_date(date_str, "%B %d, %Y")
2036
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
2037
        return {
2038
            'title': title,
2039
            'img': [i['src'] for i in imgs],
2040
            'month': day.month,
2041
            'year': day.year,
2042
            'day': day.day,
2043
        }
2044
2045
2046
class ChuckleADuck(GenericNavigableComic):
2047
    """Class to retrieve Chuckle-A-Duck comics."""
2048
    name = 'chuckleaduck'
2049
    long_name = 'Chuckle-A-duck'
2050
    url = 'http://chuckleaduck.com'
2051
    get_first_comic_link = get_div_navfirst_a
2052
    get_navi_link = get_link_rel_next
2053
2054
    @classmethod
2055
    def get_comic_info(cls, soup, link):
2056
        """Get information about a particular comics."""
2057
        date_str = soup.find('span', class_='post-date').string
2058
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
2059
        author = soup.find('span', class_='post-author').string
2060
        div = soup.find('div', id='comic')
2061
        imgs = div.find_all('img') if div else []
2062
        title = imgs[0]['title'] if imgs else ""
2063
        assert all(i['title'] == i['alt'] == title for i in imgs)
2064
        return {
2065
            'month': day.month,
2066
            'year': day.year,
2067
            'day': day.day,
2068
            'img': [i['src'] for i in imgs],
2069
            'title': title,
2070
            'author': author,
2071
        }
2072
2073
2074
class DepressedAlien(GenericNavigableComic):
2075
    """Class to retrieve Depressed Alien Comics."""
2076
    name = 'depressedalien'
2077
    long_name = 'Depressed Alien'
2078
    url = 'http://depressedalien.com'
2079
    get_url_from_link = join_cls_url_to_href
2080
2081
    @classmethod
2082
    def get_first_comic_link(cls):
2083
        """Get link to first comics."""
2084
        return get_soup_at_url(cls.url).find('img', attrs={'name': 'beginArrow'}).parent
2085
2086
    @classmethod
2087
    def get_navi_link(cls, last_soup, next_):
2088
        """Get link to next or previous comic."""
2089
        return last_soup.find('img', attrs={'name': 'rightArrow' if next_ else 'leftArrow'}).parent
2090
2091
    @classmethod
2092
    def get_comic_info(cls, soup, link):
2093
        """Get information about a particular comics."""
2094
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2095
        imgs = soup.find_all('meta', property='og:image')
2096
        return {
2097
            'title': title,
2098
            'img': [i['content'] for i in imgs],
2099
        }
2100
2101
2102
class ThingsInSquares(GenericListableComic):
2103
    """Class to retrieve Things In Squares comics."""
2104
    # This can be retrieved in other languages
2105
    # Also on https://tapastic.com/series/Things-in-Squares
2106
    name = 'squares'
2107
    long_name = 'Things in squares'
2108
    url = 'http://www.thingsinsquares.com'
2109
2110
    @classmethod
2111
    def get_comic_info(cls, soup, tr):
2112
        """Get information about a particular comics."""
2113
        _, td2, td3 = tr.find_all('td')
2114
        a = td2.find('a')
2115
        date_str = td3.string
2116
        day = string_to_date(date_str, "%m.%d.%y")
2117
        title = a.string
2118
        title2 = soup.find('meta', property='og:title')['content']
2119
        desc = soup.find('meta', property='og:description')
2120
        description = desc['content'] if desc else ''
2121
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2122
        imgs = soup.find('div', class_='entry-content').find_all('img')
2123
        return {
2124
            'day': day.day,
2125
            'month': day.month,
2126
            'year': day.year,
2127
            'title': title,
2128
            'title2': title2,
2129
            'description': description,
2130
            'tags': tags,
2131
            'img': [i['src'] for i in imgs],
2132
            'alt': ' '.join(i['alt'] for i in imgs),
2133
        }
2134
2135
    @classmethod
2136
    def get_url_from_archive_element(cls, tr):
2137
        _, td2, td3 = tr.find_all('td')
2138
        return td2.find('a')['href']
2139
2140
    @classmethod
2141
    def get_archive_elements(cls):
2142
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
2143
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2144
2145
2146
class HappleTea(GenericNavigableComic):
2147
    """Class to retrieve Happle Tea Comics."""
2148
    name = 'happletea'
2149
    long_name = 'Happle Tea'
2150
    url = 'http://www.happletea.com'
2151
    get_first_comic_link = get_a_navi_navifirst
2152
    get_navi_link = get_link_rel_next
2153
2154
    @classmethod
2155
    def get_comic_info(cls, soup, link):
2156
        """Get information about a particular comics."""
2157
        imgs = soup.find('div', id='comic').find_all('img')
2158
        post = soup.find('div', class_='post-content')
2159
        title = post.find('h2', class_='post-title').string
2160
        author = post.find('a', rel='author').string
2161
        date_str = post.find('span', class_='post-date').string
2162
        day = string_to_date(date_str, "%B %d, %Y")
2163
        assert all(i['alt'] == i['title'] for i in imgs)
2164
        return {
2165
            'title': title,
2166
            'img': [i['src'] for i in imgs],
2167
            'alt': ''.join(i['alt'] for i in imgs),
2168
            'month': day.month,
2169
            'year': day.year,
2170
            'day': day.day,
2171
            'author': author,
2172
        }
2173
2174
2175
class FatAwesomeComics(GenericNavigableComic):
2176
    """Class to retrieve Fat Awesome Comics."""
2177
    # Also on http://fatawesomecomedy.tumblr.com
2178
    name = 'fatawesome'
2179
    long_name = 'Fat Awesome'
2180
    url = 'http://fatawesome.com/comics'
2181
    get_navi_link = get_a_rel_next
2182
    get_first_comic_link = simulate_first_link
2183
    first_url = 'http://fatawesome.com/shortbus/'
2184
2185
    @classmethod
2186
    def get_comic_info(cls, soup, link):
2187
        """Get information about a particular comics."""
2188
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2189
        description = soup.find('meta', attrs={'name': 'description'})['content']
2190
        tags_prop = soup.find('meta', property='article:tag')
2191
        tags = tags_prop['content'] if tags_prop else ""
2192
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2193
        day = string_to_date(date_str, "%Y-%m-%d")
2194
        imgs = soup.find_all('img', attrs={'data-recalc-dims': "1"})
2195
        assert len(imgs) == 1
2196
        return {
2197
            'title': title,
2198
            'description': description,
2199
            'tags': tags,
2200
            'alt': "".join(i['alt'] for i in imgs),
2201
            'img': [i['src'].rsplit('?', 1)[0] for i in imgs],
2202
            'month': day.month,
2203
            'year': day.year,
2204
            'day': day.day,
2205
        }
2206
2207
2208
class AnythingComic(GenericListableComic):
2209
    """Class to retrieve Anything Comics."""
2210
    # Also on http://tapastic.com/series/anything
2211
    name = 'anythingcomic'
2212
    long_name = 'Anything Comic'
2213
    url = 'http://www.anythingcomic.com'
2214
2215
    @classmethod
2216
    def get_archive_elements(cls):
2217
        archive_url = urljoin_wrapper(cls.url, 'archive/')
2218
        # The first 2 <tr>'s do not correspond to comics
2219
        return get_soup_at_url(archive_url).find('table', id='chapter_table').find_all('tr')[2:]
2220
2221
    @classmethod
2222
    def get_url_from_archive_element(cls, tr):
2223 View Code Duplication
        """Get url corresponding to an archive element."""
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2224
        td_num, td_comic, td_date, _ = tr.find_all('td')
2225
        link = td_comic.find('a')
2226
        return urljoin_wrapper(cls.url, link['href'])
2227
2228
    @classmethod
2229
    def get_comic_info(cls, soup, tr):
2230
        """Get information about a particular comics."""
2231
        td_num, td_comic, td_date, _ = tr.find_all('td')
2232
        num = int(td_num.string)
2233
        link = td_comic.find('a')
2234
        title = link.string
2235
        imgs = soup.find_all('img', id='comic_image')
2236
        date_str = td_date.string
2237
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y, %I:%M %p")
2238
        assert len(imgs) == 1
2239
        assert all(i.get('alt') == i.get('title') for i in imgs)
2240
        return {
2241
            'num': num,
2242
            'title': title,
2243
            'alt': imgs[0].get('alt', ''),
2244
            'img': [i['src'] for i in imgs],
2245
            'month': day.month,
2246
            'year': day.year,
2247
            'day': day.day,
2248
        }
2249
2250
2251
class LonnieMillsap(GenericNavigableComic):
2252
    """Class to retrieve Lonnie Millsap's comics."""
2253
    name = 'millsap'
2254
    long_name = 'Lonnie Millsap'
2255
    url = 'http://www.lonniemillsap.com'
2256
    get_navi_link = get_link_rel_next
2257
    get_first_comic_link = simulate_first_link
2258
    first_url = 'http://www.lonniemillsap.com/?p=42'
2259
2260
    @classmethod
2261
    def get_comic_info(cls, soup, link):
2262
        """Get information about a particular comics."""
2263
        title = soup.find('h2', class_='post-title').string
2264
        post = soup.find('div', class_='post-content')
2265
        author = post.find("span", class_="post-author").find("a").string
2266
        date_str = post.find("span", class_="post-date").string
2267
        day = string_to_date(date_str, "%B %d, %Y")
2268
        imgs = post.find("div", class_="entry").find_all("img")
2269
        return {
2270
            'title': title,
2271
            'author': author,
2272
            'img': [i['src'] for i in imgs],
2273
            'month': day.month,
2274
            'year': day.year,
2275
            'day': day.day,
2276
        }
2277
2278
2279
class LinsEditions(GenericNavigableComic):
2280
    """Class to retrieve L.I.N.S. Editions comics."""
2281
    # Also on http://linscomics.tumblr.com
2282
    # Now on https://warandpeas.com
2283
    name = 'lins'
2284
    long_name = 'L.I.N.S. Editions'
2285 View Code Duplication
    url = 'https://linsedition.com'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2286
    _categories = ('LINS', )
2287
    get_navi_link = get_link_rel_next
2288
    get_first_comic_link = simulate_first_link
2289
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2290
2291
    @classmethod
2292
    def get_comic_info(cls, soup, link):
2293
        """Get information about a particular comics."""
2294
        title = soup.find('meta', property='og:title')['content']
2295
        imgs = soup.find_all('meta', property='og:image')
2296
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2297
        day = string_to_date(date_str, "%Y-%m-%d")
2298
        return {
2299
            'title': title,
2300
            'img': [i['content'] for i in imgs],
2301
            'month': day.month,
2302
            'year': day.year,
2303
            'day': day.day,
2304
        }
2305
2306
2307
class ThorsThundershack(GenericNavigableComic):
2308
    """Class to retrieve Thor's Thundershack comics."""
2309
    # Also on http://tapastic.com/series/Thors-Thundershac
2310
    name = 'thor'
2311
    long_name = 'Thor\'s Thundershack'
2312
    url = 'http://www.thorsthundershack.com'
2313
    _categories = ('THOR', )
2314
    get_url_from_link = join_cls_url_to_href
2315
2316
    @classmethod
2317
    def get_first_comic_link(cls):
2318
        """Get link to first comics."""
2319
        return get_soup_at_url(cls.url).find('a', class_='first navlink')
2320
2321
    @classmethod
2322
    def get_navi_link(cls, last_soup, next_):
2323
        """Get link to next or previous comic."""
2324
        for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
2325
            if link['href'] != '/comic':
2326
                return link
2327
        return None
2328
2329
    @classmethod
2330
    def get_comic_info(cls, soup, link):
2331
        """Get information about a particular comics."""
2332
        title = soup.find('meta', attrs={'name': 'description'})["content"]
2333
        description = soup.find('div', itemprop='articleBody').text
2334
        author = soup.find('span', itemprop='author copyrightHolder').string
2335
        imgs = soup.find_all('img', itemprop='image')
2336
        assert all(i['title'] == i['alt'] for i in imgs)
2337
        alt = imgs[0]['alt'] if imgs else ""
2338
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
2339
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2340
        return {
2341
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2342
            'month': day.month,
2343
            'year': day.year,
2344
            'day': day.day,
2345
            'author': author,
2346
            'title': title,
2347
            'alt': alt,
2348
            'description': description,
2349
        }
2350
2351
2352 View Code Duplication
class GerbilWithAJetpack(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2353
    """Class to retrieve GerbilWithAJetpack comics."""
2354
    name = 'gerbil'
2355
    long_name = 'Gerbil With A Jetpack'
2356
    url = 'http://gerbilwithajetpack.com'
2357
    get_first_comic_link = get_a_navi_navifirst
2358
    get_navi_link = get_a_rel_next
2359
2360
    @classmethod
2361
    def get_comic_info(cls, soup, link):
2362
        """Get information about a particular comics."""
2363
        title = soup.find('h2', class_='post-title').string
2364
        author = soup.find("span", class_="post-author").find("a").string
2365
        date_str = soup.find("span", class_="post-date").string
2366
        day = string_to_date(date_str, "%B %d, %Y")
2367
        imgs = soup.find("div", id="comic").find_all("img")
2368
        alt = imgs[0]['alt']
2369
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2370
        return {
2371
            'img': [i['src'] for i in imgs],
2372
            'title': title,
2373
            'alt': alt,
2374
            'author': author,
2375
            'day': day.day,
2376
            'month': day.month,
2377
            'year': day.year
2378
        }
2379
2380
2381 View Code Duplication
class EveryDayBlues(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2382
    """Class to retrieve EveryDayBlues Comics."""
2383
    name = "blues"
2384
    long_name = "Every Day Blues"
2385
    url = "http://everydayblues.net"
2386
    get_first_comic_link = get_a_navi_navifirst
2387
    get_navi_link = get_link_rel_next
2388
2389
    @classmethod
2390
    def get_comic_info(cls, soup, link):
2391
        """Get information about a particular comics."""
2392
        title = soup.find("h2", class_="post-title").string
2393
        author = soup.find("span", class_="post-author").find("a").string
2394
        date_str = soup.find("span", class_="post-date").string
2395
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2396
        imgs = soup.find("div", id="comic").find_all("img")
2397
        assert all(i['alt'] == i['title'] == title for i in imgs)
2398
        assert len(imgs) <= 1
2399
        return {
2400
            'img': [i['src'] for i in imgs],
2401
            'title': title,
2402
            'author': author,
2403
            'day': day.day,
2404
            'month': day.month,
2405
            'year': day.year
2406
        }
2407
2408
2409
class BiterComics(GenericNavigableComic):
2410
    """Class to retrieve Biter Comics."""
2411
    name = "biter"
2412
    long_name = "Biter Comics"
2413
    url = "http://www.bitercomics.com"
2414
    get_first_comic_link = get_a_navi_navifirst
2415
    get_navi_link = get_link_rel_next
2416
2417
    @classmethod
2418
    def get_comic_info(cls, soup, link):
2419
        """Get information about a particular comics."""
2420
        title = soup.find("h1", class_="entry-title").string
2421
        author = soup.find("span", class_="author vcard").find("a").string
2422
        date_str = soup.find("span", class_="entry-date").string
2423
        day = string_to_date(date_str, "%B %d, %Y")
2424
        imgs = soup.find("div", id="comic").find_all("img")
2425
        assert all(i['alt'] == i['title'] for i in imgs)
2426
        assert len(imgs) == 1
2427
        alt = imgs[0]['alt']
2428
        return {
2429
            'img': [i['src'] for i in imgs],
2430
            'title': title,
2431
            'alt': alt,
2432
            'author': author,
2433
            'day': day.day,
2434
            'month': day.month,
2435
            'year': day.year
2436
        }
2437
2438
2439 View Code Duplication
class TheAwkwardYeti(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2440
    """Class to retrieve The Awkward Yeti comics."""
2441
    # Also on http://www.gocomics.com/the-awkward-yeti
2442
    # Also on http://larstheyeti.tumblr.com
2443
    # Also on https://tapastic.com/series/TheAwkwardYeti
2444
    name = 'yeti'
2445
    long_name = 'The Awkward Yeti'
2446
    url = 'http://theawkwardyeti.com'
2447
    _categories = ('YETI', )
2448
    get_first_comic_link = get_a_navi_navifirst
2449
    get_navi_link = get_link_rel_next
2450
2451
    @classmethod
2452
    def get_comic_info(cls, soup, link):
2453
        """Get information about a particular comics."""
2454
        title = soup.find('h2', class_='post-title').string
2455
        date_str = soup.find("span", class_="post-date").string
2456
        day = string_to_date(date_str, "%B %d, %Y")
2457
        imgs = soup.find("div", id="comic").find_all("img")
2458
        assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
2459
        return {
2460
            'img': [i['src'] for i in imgs],
2461
            'title': title,
2462
            'day': day.day,
2463
            'month': day.month,
2464
            'year': day.year
2465
        }
2466
2467
2468
class PleasantThoughts(GenericNavigableComic):
2469
    """Class to retrieve Pleasant Thoughts comics."""
2470
    name = 'pleasant'
2471
    long_name = 'Pleasant Thoughts'
2472
    url = 'http://pleasant-thoughts.com'
2473
    get_first_comic_link = get_a_navi_navifirst
2474
    get_navi_link = get_link_rel_next
2475
2476
    @classmethod
2477
    def get_comic_info(cls, soup, link):
2478
        """Get information about a particular comics."""
2479
        post = soup.find('div', class_='post-content')
2480
        title = post.find('h2', class_='post-title').string
2481
        imgs = post.find("div", class_="entry").find_all("img")
2482
        return {
2483
            'title': title,
2484
            'img': [i['src'] for i in imgs],
2485
        }
2486
2487
2488
class MisterAndMe(GenericNavigableComic):
2489
    """Class to retrieve Mister & Me Comics."""
2490
    # Also on http://www.gocomics.com/mister-and-me
2491
    # Also on https://tapastic.com/series/Mister-and-Me
2492
    name = 'mister'
2493
    long_name = 'Mister & Me'
2494
    url = 'http://www.mister-and-me.com'
2495
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2496
    get_navi_link = get_link_rel_next
2497
2498
    @classmethod
2499
    def get_comic_info(cls, soup, link):
2500
        """Get information about a particular comics."""
2501
        title = soup.find('h2', class_='post-title').string
2502
        author = soup.find("span", class_="post-author").find("a").string
2503
        date_str = soup.find("span", class_="post-date").string
2504
        day = string_to_date(date_str, "%B %d, %Y")
2505
        imgs = soup.find("div", id="comic").find_all("img")
2506
        assert all(i['alt'] == i['title'] for i in imgs)
2507
        assert len(imgs) <= 1
2508
        alt = imgs[0]['alt'] if imgs else ""
2509
        return {
2510
            'img': [i['src'] for i in imgs],
2511
            'title': title,
2512
            'alt': alt,
2513
            'author': author,
2514
            'day': day.day,
2515
            'month': day.month,
2516
            'year': day.year
2517
        }
2518
2519
2520
class LastPlaceComics(GenericNavigableComic):
2521
    """Class to retrieve Last Place Comics."""
2522
    name = 'lastplace'
2523
    long_name = 'Last Place Comics'
2524
    url = "http://lastplacecomics.com"
2525
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2526
    get_navi_link = get_link_rel_next
2527
2528
    @classmethod
2529
    def get_comic_info(cls, soup, link):
2530
        """Get information about a particular comics."""
2531
        title = soup.find('h2', class_='post-title').string
2532
        author = soup.find("span", class_="post-author").find("a").string
2533
        date_str = soup.find("span", class_="post-date").string
2534
        day = string_to_date(date_str, "%B %d, %Y")
2535
        imgs = soup.find("div", id="comic").find_all("img")
2536
        assert all(i['alt'] == i['title'] for i in imgs)
2537
        assert len(imgs) <= 1
2538
        alt = imgs[0]['alt'] if imgs else ""
2539
        return {
2540
            'img': [i['src'] for i in imgs],
2541
            'title': title,
2542
            'alt': alt,
2543
            'author': author,
2544
            'day': day.day,
2545
            'month': day.month,
2546
            'year': day.year
2547
        }
2548
2549
2550
class TalesOfAbsurdity(GenericNavigableComic):
2551
    """Class to retrieve Tales Of Absurdity comics."""
2552
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
2553
    # Also on http://talesofabsurdity.tumblr.com
2554
    name = 'absurdity'
2555
    long_name = 'Tales of Absurdity'
2556
    url = 'http://talesofabsurdity.com'
2557
    _categories = ('ABSURDITY', )
2558
    get_first_comic_link = get_a_navi_navifirst
2559
    get_navi_link = get_a_navi_comicnavnext_navinext
2560
2561
    @classmethod
2562
    def get_comic_info(cls, soup, link):
2563
        """Get information about a particular comics."""
2564
        title = soup.find('h2', class_='post-title').string
2565
        author = soup.find("span", class_="post-author").find("a").string
2566
        date_str = soup.find("span", class_="post-date").string
2567
        day = string_to_date(date_str, "%B %d, %Y")
2568
        imgs = soup.find("div", id="comic").find_all("img")
2569
        assert all(i['alt'] == i['title'] for i in imgs)
2570
        alt = imgs[0]['alt'] if imgs else ""
2571
        return {
2572
            'img': [i['src'] for i in imgs],
2573
            'title': title,
2574
            'alt': alt,
2575
            'author': author,
2576
            'day': day.day,
2577
            'month': day.month,
2578
            'year': day.year
2579
        }
2580
2581
2582
class EndlessOrigami(GenericEmptyComic, GenericNavigableComic):
2583
    """Class to retrieve Endless Origami Comics."""
2584
    name = "origami"
2585
    long_name = "Endless Origami"
2586
    url = "http://endlessorigami.com"
2587
    get_first_comic_link = get_a_navi_navifirst
2588
    get_navi_link = get_link_rel_next
2589
2590
    @classmethod
2591
    def get_comic_info(cls, soup, link):
2592
        """Get information about a particular comics."""
2593
        title = soup.find('h2', class_='post-title').string
2594
        author = soup.find("span", class_="post-author").find("a").string
2595
        date_str = soup.find("span", class_="post-date").string
2596
        day = string_to_date(date_str, "%B %d, %Y")
2597
        imgs = soup.find("div", id="comic").find_all("img")
2598
        assert all(i['alt'] == i['title'] for i in imgs)
2599
        alt = imgs[0]['alt'] if imgs else ""
2600
        return {
2601
            'img': [i['src'] for i in imgs],
2602
            'title': title,
2603
            'alt': alt,
2604
            'author': author,
2605
            'day': day.day,
2606
            'month': day.month,
2607
            'year': day.year
2608
        }
2609
2610
2611
class PlanC(GenericNavigableComic):
2612
    """Class to retrieve Plan C comics."""
2613
    name = 'planc'
2614
    long_name = 'Plan C'
2615
    url = 'http://www.plancomic.com'
2616
    get_first_comic_link = get_a_navi_navifirst
2617
    get_navi_link = get_a_navi_comicnavnext_navinext
2618
2619
    @classmethod
2620
    def get_comic_info(cls, soup, link):
2621
        """Get information about a particular comics."""
2622
        title = soup.find('h2', class_='post-title').string
2623
        date_str = soup.find("span", class_="post-date").string
2624
        day = string_to_date(date_str, "%B %d, %Y")
2625
        imgs = soup.find('div', id='comic').find_all('img')
2626
        return {
2627
            'title': title,
2628
            'img': [i['src'] for i in imgs],
2629
            'month': day.month,
2630
            'year': day.year,
2631
            'day': day.day,
2632
        }
2633
2634
2635 View Code Duplication
class BuniComic(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2636
    """Class to retrieve Buni Comics."""
2637
    name = 'buni'
2638
    long_name = 'BuniComics'
2639
    url = 'http://www.bunicomic.com'
2640
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2641
    get_navi_link = get_link_rel_next
2642
2643
    @classmethod
2644
    def get_comic_info(cls, soup, link):
2645
        """Get information about a particular comics."""
2646
        imgs = soup.find('div', id='comic').find_all('img')
2647
        assert all(i['alt'] == i['title'] for i in imgs)
2648
        assert len(imgs) == 1
2649
        return {
2650
            'img': [i['src'] for i in imgs],
2651
            'title': imgs[0]['title'],
2652
        }
2653
2654
2655
class GenericCommitStrip(GenericNavigableComic):
2656 View Code Duplication
    """Generic class to retrieve Commit Strips in different languages."""
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2657
    get_navi_link = get_a_rel_next
2658
    get_first_comic_link = simulate_first_link
2659
    first_url = NotImplemented
2660
2661
    @classmethod
2662
    def get_comic_info(cls, soup, link):
2663
        """Get information about a particular comics."""
2664
        desc = soup.find('meta', property='og:description')['content']
2665
        title = soup.find('meta', property='og:title')['content']
2666
        imgs = soup.find('div', class_='entry-content').find_all('img')
2667
        title2 = ' '.join(i.get('title', '') for i in imgs)
2668
        return {
2669
            'title': title,
2670
            'title2': title2,
2671
            'description': desc,
2672
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2673
        }
2674
2675
2676
class CommitStripFr(GenericCommitStrip):
2677
    """Class to retrieve Commit Strips in French."""
2678
    name = 'commit_fr'
2679
    long_name = 'Commit Strip (Fr)'
2680
    url = 'http://www.commitstrip.com/fr'
2681
    _categories = ('FRANCAIS', )
2682
    first_url = 'http://www.commitstrip.com/fr/2012/02/22/interview/'
2683
2684
2685
class CommitStripEn(GenericCommitStrip):
2686
    """Class to retrieve Commit Strips in English."""
2687
    name = 'commit_en'
2688
    long_name = 'Commit Strip (En)'
2689
    url = 'http://www.commitstrip.com/en'
2690
    first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'
2691
2692
2693 View Code Duplication
class GenericBoumerie(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2694
    """Generic class to retrieve Boumeries comics in different languages."""
2695
    get_first_comic_link = get_a_navi_navifirst
2696
    get_navi_link = get_link_rel_next
2697
    date_format = NotImplemented
2698
    lang = NotImplemented
2699
2700
    @classmethod
2701
    def get_comic_info(cls, soup, link):
2702
        """Get information about a particular comics."""
2703
        title = soup.find('h2', class_='post-title').string
2704
        short_url = soup.find('link', rel='shortlink')['href']
2705
        author = soup.find("span", class_="post-author").find("a").string
2706
        date_str = soup.find('span', class_='post-date').string
2707
        day = string_to_date(date_str, cls.date_format, cls.lang)
2708
        imgs = soup.find('div', id='comic').find_all('img')
2709
        assert all(i['alt'] == i['title'] for i in imgs)
2710
        return {
2711
            'short_url': short_url,
2712
            'img': [i['src'] for i in imgs],
2713
            'title': title,
2714
            'author': author,
2715
            'month': day.month,
2716
            'year': day.year,
2717
            'day': day.day,
2718
        }
2719
2720
2721
class BoumerieEn(GenericBoumerie):
2722
    """Class to retrieve Boumeries comics in English."""
2723
    name = 'boumeries_en'
2724
    long_name = 'Boumeries (En)'
2725
    url = 'http://comics.boumerie.com'
2726
    date_format = "%B %d, %Y"
2727
    lang = 'en_GB.UTF-8'
2728
2729
2730
class BoumerieFr(GenericBoumerie):
2731
    """Class to retrieve Boumeries comics in French."""
2732
    name = 'boumeries_fr'
2733
    long_name = 'Boumeries (Fr)'
2734
    url = 'http://bd.boumerie.com'
2735
    _categories = ('FRANCAIS', )
2736
    date_format = "%A, %d %B %Y"
2737
    lang = "fr_FR.utf8"
2738
2739
2740
class UnearthedComics(GenericNavigableComic):
2741
    """Class to retrieve Unearthed comics."""
2742
    # Also on http://tapastic.com/series/UnearthedComics
2743
    # Also on http://unearthedcomics.tumblr.com
2744
    name = 'unearthed'
2745
    long_name = 'Unearthed Comics'
2746
    url = 'http://unearthedcomics.com'
2747
    _categories = ('UNEARTHED', )
2748
    get_navi_link = get_link_rel_next
2749
    get_first_comic_link = simulate_first_link
2750
    first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'
2751
2752
    @classmethod
2753
    def get_comic_info(cls, soup, link):
2754
        """Get information about a particular comics."""
2755
        short_url = soup.find('link', rel='shortlink')['href']
2756
        title_elt = soup.find('h1') or soup.find('h2')
2757
        title = title_elt.string if title_elt else ""
2758
        desc = soup.find('meta', property='og:description')
2759
        date_str = soup.find('time', class_='published updated hidden')['datetime']
2760
        day = string_to_date(date_str, "%Y-%m-%d")
2761
        post = soup.find('div', class_="entry content entry-content type-portfolio")
2762
        imgs = post.find_all('img')
2763
        return {
2764
            'title': title,
2765
            'description': desc,
2766
            'url2': short_url,
2767
            'img': [i['src'] for i in imgs],
2768
            'month': day.month,
2769
            'year': day.year,
2770
            'day': day.day,
2771
        }
2772
2773
2774
class Optipess(GenericNavigableComic):
2775
    """Class to retrieve Optipess comics."""
2776
    name = 'optipess'
2777
    long_name = 'Optipess'
2778
    url = 'http://www.optipess.com'
2779
    get_first_comic_link = get_a_navi_navifirst
2780
    get_navi_link = get_link_rel_next
2781
2782
    @classmethod
2783
    def get_comic_info(cls, soup, link):
2784
        """Get information about a particular comics."""
2785
        title = soup.find('h2', class_='post-title').string
2786
        author = soup.find("span", class_="post-author").find("a").string
2787
        comic = soup.find('div', id='comic')
2788
        imgs = comic.find_all('img') if comic else []
2789
        alt = imgs[0]['title'] if imgs else ""
2790
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2791
        date_str = soup.find('span', class_='post-date').string
2792
        day = string_to_date(date_str, "%B %d, %Y")
2793
        return {
2794
            'title': title,
2795
            'alt': alt,
2796
            'author': author,
2797
            'img': [i['src'] for i in imgs],
2798
            'month': day.month,
2799
            'year': day.year,
2800
            'day': day.day,
2801
        }
2802
2803
2804
class PainTrainComic(GenericNavigableComic):
2805
    """Class to retrieve Pain Train Comics."""
2806
    name = 'paintrain'
2807
    long_name = 'Pain Train Comics'
2808
    url = 'http://paintraincomic.com'
2809
    get_first_comic_link = get_a_navi_navifirst
2810
    get_navi_link = get_link_rel_next
2811
2812
    @classmethod
2813
    def get_comic_info(cls, soup, link):
2814
        """Get information about a particular comics."""
2815
        title = soup.find('h2', class_='post-title').string
2816
        short_url = soup.find('link', rel='shortlink')['href']
2817
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2818
        num = int(short_url_re.match(short_url).groups()[0])
2819
        imgs = soup.find('div', id='comic').find_all('img')
2820
        alt = imgs[0]['title']
2821
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2822
        date_str = soup.find('span', class_='post-date').string
2823
        day = string_to_date(date_str, "%d/%m/%Y")
2824
        return {
2825
            'short_url': short_url,
2826
            'num': num,
2827
            'img': [i['src'] for i in imgs],
2828
            'month': day.month,
2829
            'year': day.year,
2830
            'day': day.day,
2831
            'alt': alt,
2832
            'title': title,
2833
        }
2834
2835
2836
class MoonBeard(GenericNavigableComic):
2837
    """Class to retrieve MoonBeard comics."""
2838
    # Also on http://blog.squiresjam.es/moonbeard
2839
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
2840
    name = 'moonbeard'
2841
    long_name = 'Moon Beard'
2842
    url = 'http://moonbeard.com'
2843
    get_first_comic_link = get_a_navi_navifirst
2844
    get_navi_link = get_a_navi_navinext
2845
2846
    @classmethod
2847
    def get_comic_info(cls, soup, link):
2848
        """Get information about a particular comics."""
2849
        title = soup.find('h2', class_='post-title').string
2850
        short_url = soup.find('link', rel='shortlink')['href']
2851
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2852
        num = int(short_url_re.match(short_url).groups()[0])
2853
        imgs = soup.find('div', id='comic').find_all('img')
2854
        alt = imgs[0]['title']
2855
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2856
        date_str = soup.find('span', class_='post-date').string
2857
        day = string_to_date(date_str, "%B %d, %Y")
2858
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2859
        author = soup.find('span', class_='post-author').string
2860
        return {
2861
            'short_url': short_url,
2862
            'num': num,
2863
            'img': [i['src'] for i in imgs],
2864
            'month': day.month,
2865
            'year': day.year,
2866
            'day': day.day,
2867
            'title': title,
2868
            'tags': tags,
2869 View Code Duplication
            'alt': alt,
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2870
            'author': author,
2871
        }
2872
2873
2874
class AHamADay(GenericNavigableComic):
2875
    """Class to retrieve class A Ham A Day comics."""
2876
    name = 'ham'
2877
    long_name = 'A Ham A Day'
2878
    url = 'http://www.ahammaday.com'
2879
    get_url_from_link = join_cls_url_to_href
2880
    get_first_comic_link = simulate_first_link
2881
    first_url = 'http://www.ahammaday.com/today/3/6/french'
2882
2883
    @classmethod
2884
    def get_navi_link(cls, last_soup, next_):
2885
        """Get link to next or previous comic."""
2886
        # prev is next / next is prev
2887
        return last_soup.find('li', class_='previous' if next_ else 'next').find('a')
2888
2889
    @classmethod
2890
    def get_comic_info(cls, soup, link):
2891
        """Get information about a particular comics."""
2892
        date_str = soup.find('time', class_='published')['datetime']
2893
        day = string_to_date(date_str, "%Y-%m-%d")
2894
        author = soup.find('span', class_='blog-author').find('a').string
2895
        title = soup.find('meta', property='og:title')['content']
2896
        imgs = soup.find_all('meta', itemprop='image')
2897
        return {
2898
            'img': [i['content'] for i in imgs],
2899
            'title': title,
2900
            'author': author,
2901
            'day': day.day,
2902
            'month': day.month,
2903
            'year': day.year,
2904
        }
2905
2906
2907
class LittleLifeLines(GenericNavigableComic):
2908
    """Class to retrieve Little Life Lines comics."""
2909
    # Also on https://little-life-lines.tumblr.com
2910
    name = 'life'
2911
    long_name = 'Little Life Lines'
2912
    url = 'http://www.littlelifelines.com'
2913
    get_url_from_link = join_cls_url_to_href
2914
    get_first_comic_link = simulate_first_link
2915
    first_url = 'http://www.littlelifelines.com/comics/well-done'
2916
2917
    @classmethod
2918
    def get_navi_link(cls, last_soup, next_):
2919
        """Get link to next or previous comic."""
2920
        # prev is next / next is prev
2921
        li = last_soup.find('li', class_='prev' if next_ else 'next')
2922
        return li.find('a') if li else None
2923
2924
    @classmethod
2925
    def get_comic_info(cls, soup, link):
2926
        """Get information about a particular comics."""
2927
        title = soup.find('meta', property='og:title')['content']
2928
        desc = soup.find('meta', property='og:description')['content']
2929
        date_str = soup.find('time', class_='published')['datetime']
2930
        day = string_to_date(date_str, "%Y-%m-%d")
2931
        author = soup.find('a', rel='author').string
2932
        div_content = soup.find('div', class_="body entry-content")
2933
        imgs = div_content.find_all('img')
2934
        imgs = [i for i in imgs if i.get('src') is not None]
2935
        alt = imgs[0]['alt']
2936
        return {
2937
            'title': title,
2938
            'alt': alt,
2939
            'description': desc,
2940
            'author': author,
2941
            'day': day.day,
2942
            'month': day.month,
2943
            'year': day.year,
2944
            'img': [i['src'] for i in imgs],
2945
        }
2946
2947
2948
class GenericWordPressInkblot(GenericNavigableComic):
2949
    """Generic class to retrieve comics using WordPress with Inkblot."""
2950
    get_navi_link = get_link_rel_next
2951
2952 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2953
    def get_first_comic_link(cls):
2954
        """Get link to first comics."""
2955
        return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')
2956
2957
    @classmethod
2958
    def get_comic_info(cls, soup, link):
2959
        """Get information about a particular comics."""
2960
        title = soup.find('meta', property='og:title')['content']
2961
        imgs = soup.find('div', class_='webcomic-image').find_all('img')
2962
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2963
        day = string_to_date(date_str, "%Y-%m-%d")
2964
        return {
2965
            'title': title,
2966
            'day': day.day,
2967
            'month': day.month,
2968
            'year': day.year,
2969
            'img': [i['src'] for i in imgs],
2970
        }
2971
2972
2973
class EverythingsStupid(GenericWordPressInkblot):
2974
    """Class to retrieve Everything's stupid Comics."""
2975
    # Also on http://tapastic.com/series/EverythingsStupid
2976
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
2977
    # Also on http://everythingsstupidcomics.tumblr.com
2978
    name = 'stupid'
2979
    long_name = "Everything's Stupid"
2980
    url = 'http://everythingsstupid.net'
2981
2982
2983
class TheIsmComics(GenericWordPressInkblot):
2984
    """Class to retrieve The Ism Comics."""
2985
    # Also on https://tapastic.com/series/TheIsm (?)
2986
    name = 'theism'
2987
    long_name = "The Ism"
2988
    url = 'http://www.theism-comics.com'
2989
2990
2991
class WoodenPlankStudios(GenericWordPressInkblot):
2992
    """Class to retrieve Wooden Plank Studios comics."""
2993
    name = 'woodenplank'
2994
    long_name = 'Wooden Plank Studios'
2995
    url = 'http://woodenplankstudios.com'
2996
2997
2998
class ElectricBunnyComic(GenericNavigableComic):
2999
    """Class to retrieve Electric Bunny Comics."""
3000
    # Also on http://electricbunnycomics.tumblr.com
3001
    name = 'bunny'
3002
    long_name = 'Electric Bunny Comic'
3003
    url = 'http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell'
3004
    get_url_from_link = join_cls_url_to_href
3005
3006
    @classmethod
3007
    def get_first_comic_link(cls):
3008
        """Get link to first comics."""
3009
        return get_soup_at_url(cls.url).find('img', alt='First').parent
3010
3011
    @classmethod
3012
    def get_navi_link(cls, last_soup, next_):
3013
        """Get link to next or previous comic."""
3014
        img = last_soup.find('img', alt='Next' if next_ else 'Back')
3015
        return img.parent if img else None
3016
3017
    @classmethod
3018
    def get_comic_info(cls, soup, link):
3019
        """Get information about a particular comics."""
3020
        title = soup.find('meta', property='og:title')['content']
3021
        imgs = soup.find_all('meta', property='og:image')
3022
        return {
3023
            'title': title,
3024
            'img': [i['content'] for i in imgs],
3025
        }
3026
3027
3028
class SheldonComics(GenericNavigableComic):
3029
    """Class to retrieve Sheldon comics."""
3030
    # Also on http://www.gocomics.com/sheldon
3031
    name = 'sheldon'
3032
    long_name = 'Sheldon Comics'
3033
    url = 'http://www.sheldoncomics.com'
3034
3035
    @classmethod
3036
    def get_first_comic_link(cls):
3037
        """Get link to first comics."""
3038
        return get_soup_at_url(cls.url).find("a", id="nav-first")
3039
3040
    @classmethod
3041
    def get_navi_link(cls, last_soup, next_):
3042
        """Get link to next or previous comic."""
3043
        for link in last_soup.find_all("a", id="nav-next" if next_ else "nav-prev"):
3044
            if link['href'] != 'http://www.sheldoncomics.com':
3045
                return link
3046
        return None
3047
3048
    @classmethod
3049
    def get_comic_info(cls, soup, link):
3050
        """Get information about a particular comics."""
3051
        imgs = soup.find("div", id="comic-foot").find_all("img")
3052
        assert all(i['alt'] == i['title'] for i in imgs)
3053
        assert len(imgs) == 1
3054
        title = imgs[0]['title']
3055
        return {
3056
            'title': title,
3057
            'img': [i['src'] for i in imgs],
3058
        }
3059
3060
3061
class Ubertool(GenericNavigableComic):
3062
    """Class to retrieve Ubertool comics."""
3063
    # Also on http://ubertool.tumblr.com
3064
    # Also on https://tapastic.com/series/ubertool
3065
    name = 'ubertool'
3066
    long_name = 'Ubertool'
3067
    url = 'http://ubertoolcomic.com'
3068
    _categories = ('UBERTOOL', )
3069
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
3070
    get_navi_link = get_a_comicnavbase_comicnavnext
3071
3072
    @classmethod
3073
    def get_comic_info(cls, soup, link):
3074
        """Get information about a particular comics."""
3075
        title = soup.find('h2', class_='post-title').string
3076
        date_str = soup.find('span', class_='post-date').string
3077
        day = string_to_date(date_str, "%B %d, %Y")
3078
        imgs = soup.find('div', id='comic').find_all('img')
3079
        return {
3080
            'img': [i['src'] for i in imgs],
3081
            'title': title,
3082
            'month': day.month,
3083
            'year': day.year,
3084
            'day': day.day,
3085
        }
3086
3087
3088
class EarthExplodes(GenericNavigableComic):
3089
    """Class to retrieve The Earth Explodes comics."""
3090
    name = 'earthexplodes'
3091
    long_name = 'The Earth Explodes'
3092
    url = 'http://www.earthexplodes.com'
3093
    get_url_from_link = join_cls_url_to_href
3094
    get_first_comic_link = simulate_first_link
3095
    first_url = 'http://www.earthexplodes.com/comics/000/'
3096
3097
    @classmethod
3098
    def get_navi_link(cls, last_soup, next_):
3099
        """Get link to next or previous comic."""
3100
        return last_soup.find('a', id='next' if next_ else 'prev')
3101
3102
    @classmethod
3103
    def get_comic_info(cls, soup, link):
3104
        """Get information about a particular comics."""
3105
        title = soup.find('title').string
3106
        imgs = soup.find('div', id='image').find_all('img')
3107
        alt = imgs[0].get('title', '')
3108
        return {
3109
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3110
            'title': title,
3111
            'alt': alt,
3112
        }
3113
3114
3115
class CubeDrone(GenericNavigableComic):
3116
    """Class to retrieve Cube Drone comics."""
3117
    name = 'cubedrone'
3118
    long_name = 'Cube Drone'
3119
    url = 'http://cube-drone.com/comics'
3120
    get_url_from_link = join_cls_url_to_href
3121
3122
    @classmethod
3123
    def get_first_comic_link(cls):
3124
        """Get link to first comics."""
3125
        return get_soup_at_url(cls.url).find('span', class_='glyphicon glyphicon-backward').parent
3126
3127
    @classmethod
3128
    def get_navi_link(cls, last_soup, next_):
3129
        """Get link to next or previous comic."""
3130
        class_ = 'glyphicon glyphicon-chevron-' + ('right' if next_ else 'left')
3131
        return last_soup.find('span', class_=class_).parent
3132
3133
    @classmethod
3134
    def get_comic_info(cls, soup, link):
3135
        """Get information about a particular comics."""
3136
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
3137
        url2 = soup.find('meta', attrs={'name': 'twitter:url'})['content']
3138
        # date_str = soup.find('h2', class_='comic_title').find('small').string
3139
        # day = string_to_date(date_str, "%B %d, %Y, %I:%M %p")
3140
        imgs = soup.find_all('img', class_='comic img-responsive')
3141
        title2 = imgs[0]['title']
3142
        alt = imgs[0]['alt']
3143
        return {
3144
            'url2': url2,
3145
            'title': title,
3146
            'title2': title2,
3147
            'alt': alt,
3148
            'img': [i['src'] for i in imgs],
3149
        }
3150
3151
3152
class MakeItStoopid(GenericNavigableComic):
3153
    """Class to retrieve Make It Stoopid Comics."""
3154
    name = 'stoopid'
3155
    long_name = 'Make it stoopid'
3156
    url = 'http://makeitstoopid.com/comic.php'
3157
3158
    @classmethod
3159
    def get_nav(cls, soup):
3160
        """Get the navigation elements from soup object."""
3161
        cnav = soup.find_all(class_='cnav')
3162
        nav1, nav2 = cnav[:5], cnav[5:]
3163
        assert nav1 == nav2
3164
        # begin, prev, archive, next_, end = nav1
3165
        return [None if i.get('href') is None else i for i in nav1]
3166
3167
    @classmethod
3168
    def get_first_comic_link(cls):
3169
        """Get link to first comics."""
3170
        return cls.get_nav(get_soup_at_url(cls.url))[0]
3171
3172
    @classmethod
3173
    def get_navi_link(cls, last_soup, next_):
3174
        """Get link to next or previous comic."""
3175
        return cls.get_nav(last_soup)[3 if next_ else 1]
3176
3177
    @classmethod
3178
    def get_comic_info(cls, soup, link):
3179
        """Get information about a particular comics."""
3180
        title = link['title']
3181
        imgs = soup.find_all('img', id='comicimg')
3182
        return {
3183
            'title': title,
3184
            'img': [i['src'] for i in imgs],
3185
        }
3186
3187
3188
class MarketoonistComics(GenericNavigableComic):
3189
    """Class to retrieve Marketoonist Comics."""
3190
    name = 'marketoonist'
3191
    long_name = 'Marketoonist'
3192
    url = 'https://marketoonist.com/cartoons'
3193
    get_first_comic_link = simulate_first_link
3194 View Code Duplication
    get_navi_link = get_link_rel_next
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3195
    first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'
3196
3197
    @classmethod
3198
    def get_comic_info(cls, soup, link):
3199
        """Get information about a particular comics."""
3200
        imgs = soup.find_all('meta', property='og:image')
3201
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3202
        day = string_to_date(date_str, "%Y-%m-%d")
3203
        title = soup.find('meta', property='og:title')['content']
3204
        return {
3205
            'img': [i['content'] for i in imgs],
3206
            'day': day.day,
3207
            'month': day.month,
3208
            'year': day.year,
3209
            'title': title,
3210
        }
3211
3212
3213
class ConsoliaComics(GenericNavigableComic):
3214
    """Class to retrieve Consolia comics."""
3215
    name = 'consolia'
3216
    long_name = 'consolia'
3217
    url = 'https://consolia-comic.com'
3218
    get_url_from_link = join_cls_url_to_href
3219
3220
    @classmethod
3221
    def get_first_comic_link(cls):
3222
        """Get link to first comics."""
3223
        return get_soup_at_url(cls.url).find('span', class_='first').find('a')
3224
3225
    @classmethod
3226
    def get_navi_link(cls, last_soup, next_):
3227
        """Get link to next or previous comic."""
3228
        return last_soup.find('span', class_='next' if next_ else 'prev').find('a')
3229
3230
    @classmethod
3231
    def get_comic_info(cls, soup, link):
3232
        """Get information about a particular comics."""
3233
        title = soup.find('meta', property='og:title')['content']
3234
        date_str = soup.find('time')["datetime"]
3235
        day = string_to_date(date_str, "%Y-%m-%d")
3236
        imgs = soup.find('div', id='comic').find_all('img')
3237
        alt = imgs[0]['title']
3238
        # article = soup.find('div', id='blag')
3239
        # text = article.encode_contents()
3240
        return {
3241
            'title': title,
3242
            'alt': alt,
3243
            'img': [i['src'] for i in imgs],
3244
            # 'text': text,
3245
            'day': day.day,
3246
            'month': day.month,
3247
            'year': day.year,
3248
        }
3249
3250
3251
class TuMourrasMoinsBete(GenericNavigableComic):
3252
    """Class to retrieve Tu Mourras Moins Bete comics."""
3253
    name = 'mourrasmoinsbete'
3254 View Code Duplication
    long_name = 'Tu Mourras Moins Bete'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3255
    url = 'http://tumourrasmoinsbete.blogspot.fr'
3256
    _categories = ('FRANCAIS', )
3257
    get_first_comic_link = simulate_first_link
3258
    first_url = 'http://tumourrasmoinsbete.blogspot.fr/2008/06/essai.html'
3259
3260
    @classmethod
3261
    def get_navi_link(cls, last_soup, next_):
3262
        """Get link to next or previous comic."""
3263
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3264
3265
    @classmethod
3266
    def get_comic_info(cls, soup, link):
3267
        """Get information about a particular comics."""
3268
        title = soup.find('title').string
3269
        imgs = soup.find('div', itemprop='description articleBody').find_all('img')
3270
        author = soup.find('span', itemprop='author').string
3271
        return {
3272
            'img': [i['src'] for i in imgs],
3273
            'author': author,
3274
            'title': title,
3275
        }
3276
3277
3278
class GeekAndPoke(GenericNavigableComic):
3279
    """Class to retrieve Geek And Poke comics."""
3280
    name = 'geek'
3281
    long_name = 'Geek And Poke'
3282
    url = 'http://geek-and-poke.com'
3283
    get_url_from_link = join_cls_url_to_href
3284
    get_first_comic_link = simulate_first_link
3285
    first_url = 'http://geek-and-poke.com/geekandpoke/2006/8/27/a-new-place-for-a-not-so-old-blog.html'
3286
3287
    @classmethod
3288
    def get_navi_link(cls, last_soup, next_):
3289
        """Get link to next or previous comic."""
3290
        return last_soup.find('a', class_='prev-item' if next_ else 'next-item')
3291
3292
    @classmethod
3293
    def get_comic_info(cls, soup, link):
3294
        """Get information about a particular comics."""
3295
        title = soup.find('meta', property='og:title')['content']
3296
        desc = soup.find('meta', property='og:description')['content']
3297
        date_str = soup.find('time', class_='published')['datetime']
3298
        day = string_to_date(date_str, "%Y-%m-%d")
3299
        author = soup.find('a', rel='author').string
3300
        div_content = (soup.find('div', class_="body entry-content") or
3301
                       soup.find('div', class_="special-content"))
3302
        imgs = div_content.find_all('img')
3303
        imgs = [i for i in imgs if i.get('src') is not None]
3304
        assert all('title' not in i or i['alt'] == i['title'] for i in imgs)
3305
        alt = imgs[0].get('alt', "") if imgs else []
3306
        return {
3307
            'title': title,
3308
            'alt': alt,
3309
            'description': desc,
3310
            'author': author,
3311
            'day': day.day,
3312
            'month': day.month,
3313
            'year': day.year,
3314
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3315
        }
3316
3317
3318
class GloryOwlComix(GenericNavigableComic):
3319
    """Class to retrieve Glory Owl comics."""
3320
    name = 'gloryowl'
3321 View Code Duplication
    long_name = 'Glory Owl'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3322
    url = 'http://gloryowlcomix.blogspot.fr'
3323
    _categories = ('NSFW', 'FRANCAIS')
3324
    get_first_comic_link = simulate_first_link
3325
    first_url = 'http://gloryowlcomix.blogspot.fr/2013/02/1_7.html'
3326
3327
    @classmethod
3328
    def get_navi_link(cls, last_soup, next_):
3329
        """Get link to next or previous comic."""
3330
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3331
3332
    @classmethod
3333
    def get_comic_info(cls, soup, link):
3334
        """Get information about a particular comics."""
3335
        title = soup.find('title').string
3336
        imgs = soup.find_all('link', rel='image_src')
3337
        author = soup.find('a', rel='author').string
3338
        return {
3339
            'img': [i['href'] for i in imgs],
3340
            'author': author,
3341
            'title': title,
3342
        }
3343
3344
3345
class GenericTumblrV1(GenericComic):
3346
    """Generic class to retrieve comics from Tumblr using the V1 API."""
3347
    _categories = ('TUMBLR', )
3348
3349
    @classmethod
3350
    def get_next_comic(cls, last_comic):
3351
        """Generic implementation of get_next_comic for Tumblr comics."""
3352
        for p in cls.get_posts(last_comic):
3353
            comic = cls.get_comic_info(p)
3354
            if comic is not None:
3355
                yield comic
3356
3357
    @classmethod
3358
    def get_url_from_post(cls, post):
3359
        return post['url']
3360
3361
    @classmethod
3362
    def get_api_url(cls):
3363
        return urljoin_wrapper(cls.url, '/api/read/')
3364
3365
    @classmethod
3366
    def get_comic_info(cls, post):
3367
        """Get information about a particular comics."""
3368
        type_ = post['type']
3369
        if type_ != 'photo':
3370
            return None
3371
        tumblr_id = int(post['id'])
3372
        api_url = cls.get_api_url() + '?id=%d' % (tumblr_id)
3373
        day = datetime.datetime.fromtimestamp(int(post['unix-timestamp'])).date()
3374
        caption = post.find('photo-caption')
3375
        title = caption.string if caption else ""
3376
        tags = ' '.join(t.string for t in post.find_all('tag'))
3377
        # Photos may appear in 'photo' tags and/or straight in the post
3378
        photo_tags = post.find_all('photo')
3379
        if not photo_tags:
3380
            photo_tags = [post]
3381
        # Images are in multiple resolutions - taking the first one
3382
        imgs = [photo.find('photo-url') for photo in photo_tags]
3383
        return {
3384
            'url': cls.get_url_from_post(post),
3385
            'url2': post['url-with-slug'],
3386
            'day': day.day,
3387
            'month': day.month,
3388
            'year': day.year,
3389
            'title': title,
3390
            'tags': tags,
3391
            'img': [i.string for i in imgs],
3392
            'tumblr-id': tumblr_id,
3393
            'api_url': api_url,
3394
        }
3395
3396
    @classmethod
3397
    def get_posts(cls, last_comic, nb_post_per_call=10):
3398
        """Get posts using API. nb_post_per_call is max 50.
3399
3400
        Posts are retrieved from newer to older as per the tumblr v1 api
3401
        but are returned in chronological order."""
3402
        waiting_for_url = last_comic['url'] if last_comic else None
3403
        posts_acc = []
3404
        if last_comic is not None:
3405
            # Sometimes, tumblr posts are deleted. When previous post is deleted, we
3406
            # might end up spending a lot of time looking for something that
3407
            # doesn't exist. Failing early and clearly might be a better option.
3408
            last_api_url = last_comic['api_url']
3409
            try:
3410
                get_soup_at_url(last_api_url)
3411
            except urllib.error.HTTPError:
3412
                try:
3413
                    get_soup_at_url(cls.url)
3414
                except urllib.error.HTTPError:
3415
                    print("Did not find previous post nor main url %s" % cls.url)
3416
                else:
3417
                    print("Did not find previous post %s : it might have been deleted" % last_api_url)
3418
                return reversed(posts_acc)
3419
        api_url = cls.get_api_url()
3420
        posts = get_soup_at_url(api_url).find('posts')
3421
        start, total = int(posts['start']), int(posts['total'])
3422
        assert start == 0
3423
        for starting_num in range(0, total, nb_post_per_call):
3424
            api_url2 = api_url + '?start=%d&num=%d' % (starting_num, nb_post_per_call)
3425
            posts2 = get_soup_at_url(api_url2).find('posts')
3426
            start2, total2 = int(posts2['start']), int(posts2['total'])
3427
            assert starting_num == start2, "%d != %d" % (starting_num, start2)
3428
            # This may happen and should be handled in the future
3429
            assert total == total2, "%d != %d" % (total, total2)
3430
            for p in posts2.find_all('post'):
3431
                if waiting_for_url and waiting_for_url == cls.get_url_from_post(p):
3432
                    return reversed(posts_acc)
3433
                posts_acc.append(p)
3434
        if waiting_for_url is None:
3435
            return reversed(posts_acc)
3436
        print("Did not find %s : there might be a problem" % waiting_for_url)
3437
        return []
3438
3439
3440
class SaturdayMorningBreakfastCerealTumblr(GenericTumblrV1):
3441
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
3442
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
3443
    # Also on http://www.smbc-comics.com
3444
    name = 'smbc-tumblr'
3445
    long_name = 'Saturday Morning Breakfast Cereal (from Tumblr)'
3446
    url = 'http://smbc-comics.tumblr.com'
3447
    _categories = ('SMBC', )
3448
3449
3450
class IrwinCardozo(GenericTumblrV1):
3451
    """Class to retrieve Irwin Cardozo Comics."""
3452
    name = 'irwinc'
3453
    long_name = 'Irwin Cardozo'
3454
    url = 'http://irwincardozocomics.tumblr.com'
3455
3456
3457
class AccordingToDevin(GenericTumblrV1):
3458
    """Class to retrieve According To Devin comics."""
3459
    name = 'devin'
3460
    long_name = 'According To Devin'
3461
    url = 'http://accordingtodevin.tumblr.com'
3462
3463
3464
class ItsTheTieTumblr(GenericTumblrV1):
3465
    """Class to retrieve It's the tie comics."""
3466
    # Also on http://itsthetie.com
3467
    # Also on https://tapastic.com/series/itsthetie
3468
    name = 'tie-tumblr'
3469
    long_name = "It's the tie (from Tumblr)"
3470
    url = "http://itsthetie.tumblr.com"
3471
    _categories = ('TIE', )
3472
3473
3474
class OctopunsTumblr(GenericTumblrV1):
3475
    """Class to retrieve Octopuns comics."""
3476
    # Also on http://www.octopuns.net
3477
    name = 'octopuns-tumblr'
3478
    long_name = 'Octopuns (from Tumblr)'
3479
    url = 'http://octopuns.tumblr.com'
3480
3481
3482
class PicturesInBoxesTumblr(GenericTumblrV1):
3483
    """Class to retrieve Pictures In Boxes comics."""
3484
    # Also on http://www.picturesinboxes.com
3485
    name = 'picturesinboxes-tumblr'
3486
    long_name = 'Pictures in Boxes (from Tumblr)'
3487
    url = 'http://picturesinboxescomic.tumblr.com'
3488
3489
3490
class TubeyToonsTumblr(GenericTumblrV1):
3491
    """Class to retrieve TubeyToons comics."""
3492
    # Also on http://tapastic.com/series/Tubey-Toons
3493
    # Also on http://tubeytoons.com
3494
    name = 'tubeytoons-tumblr'
3495
    long_name = 'Tubey Toons (from Tumblr)'
3496
    url = 'http://tubeytoons.tumblr.com'
3497
    _categories = ('TUNEYTOONS', )
3498
3499
3500
class UnearthedComicsTumblr(GenericTumblrV1):
3501
    """Class to retrieve Unearthed comics."""
3502
    # Also on http://tapastic.com/series/UnearthedComics
3503
    # Also on http://unearthedcomics.com
3504
    name = 'unearthed-tumblr'
3505
    long_name = 'Unearthed Comics (from Tumblr)'
3506
    url = 'http://unearthedcomics.tumblr.com'
3507
    _categories = ('UNEARTHED', )
3508
3509
3510
class PieComic(GenericTumblrV1):
3511
    """Class to retrieve Pie Comic comics."""
3512
    name = 'pie'
3513
    long_name = 'Pie Comic'
3514
    url = "http://piecomic.tumblr.com"
3515
3516
3517
class MrEthanDiamond(GenericTumblrV1):
3518
    """Class to retrieve Mr Ethan Diamond comics."""
3519
    name = 'diamond'
3520
    long_name = 'Mr Ethan Diamond'
3521
    url = 'http://mrethandiamond.tumblr.com'
3522
3523
3524
class Flocci(GenericTumblrV1):
3525
    """Class to retrieve floccinaucinihilipilification comics."""
3526
    name = 'flocci'
3527
    long_name = 'floccinaucinihilipilification'
3528
    url = "http://floccinaucinihilipilificationa.tumblr.com"
3529
3530
3531
class UpAndOut(GenericTumblrV1):
3532
    """Class to retrieve Up & Out comics."""
3533
    # Also on http://tapastic.com/series/UP-and-OUT
3534
    name = 'upandout'
3535
    long_name = 'Up And Out (from Tumblr)'
3536
    url = 'http://upandoutcomic.tumblr.com'
3537
3538
3539
class Pundemonium(GenericTumblrV1):
3540
    """Class to retrieve Pundemonium comics."""
3541
    name = 'pundemonium'
3542
    long_name = 'Pundemonium'
3543
    url = 'http://monstika.tumblr.com'
3544
3545
3546
class PoorlyDrawnLinesTumblr(GenericTumblrV1):
3547
    """Class to retrieve Poorly Drawn Lines comics."""
3548
    # Also on http://poorlydrawnlines.com
3549
    name = 'poorlydrawn-tumblr'
3550
    long_name = 'Poorly Drawn Lines (from Tumblr)'
3551
    url = 'http://pdlcomics.tumblr.com'
3552
    _categories = ('POORLYDRAWN', )
3553
3554
3555
class PearShapedComics(GenericTumblrV1):
3556
    """Class to retrieve Pear Shaped Comics."""
3557
    name = 'pearshaped'
3558
    long_name = 'Pear-Shaped Comics'
3559
    url = 'http://pearshapedcomics.com'
3560
3561
3562
class PondScumComics(GenericTumblrV1):
3563
    """Class to retrieve Pond Scum Comics."""
3564
    name = 'pond'
3565
    long_name = 'Pond Scum'
3566
    url = 'http://pondscumcomic.tumblr.com'
3567
3568
3569
class MercworksTumblr(GenericTumblrV1):
3570
    """Class to retrieve Mercworks comics."""
3571
    # Also on http://mercworks.net
3572
    name = 'mercworks-tumblr'
3573
    long_name = 'Mercworks (from Tumblr)'
3574
    url = 'http://mercworks.tumblr.com'
3575
3576
3577
class OwlTurdTumblr(GenericTumblrV1):
3578
    """Class to retrieve Owl Turd comics."""
3579
    # Also on http://tapastic.com/series/Owl-Turd-Comix
3580
    name = 'owlturd-tumblr'
3581
    long_name = 'Owl Turd (from Tumblr)'
3582
    url = 'http://owlturd.com'
3583
    _categories = ('OWLTURD', )
3584
3585
3586
class VectorBelly(GenericTumblrV1):
3587
    """Class to retrieve Vector Belly comics."""
3588
    # Also on http://vectorbelly.com
3589
    name = 'vector'
3590
    long_name = 'Vector Belly'
3591
    url = 'http://vectorbelly.tumblr.com'
3592
3593
3594
class GoneIntoRapture(GenericTumblrV1):
3595
    """Class to retrieve Gone Into Rapture comics."""
3596
    # Also on http://goneintorapture.tumblr.com
3597
    # Also on http://tapastic.com/series/Goneintorapture
3598
    name = 'rapture'
3599
    long_name = 'Gone Into Rapture'
3600
    url = 'http://www.goneintorapture.com'
3601
3602
3603
class TheOatmealTumblr(GenericTumblrV1):
3604
    """Class to retrieve The Oatmeal comics."""
3605
    # Also on http://theoatmeal.com
3606
    name = 'oatmeal-tumblr'
3607
    long_name = 'The Oatmeal (from Tumblr)'
3608
    url = 'http://oatmeal.tumblr.com'
3609
3610
3611
class HeckIfIKnowComicsTumblr(GenericTumblrV1):
3612
    """Class to retrieve Heck If I Know Comics."""
3613
    # Also on http://tapastic.com/series/Regular
3614
    name = 'heck-tumblr'
3615
    long_name = 'Heck if I Know comics (from Tumblr)'
3616
    url = 'http://heckifiknowcomics.com'
3617
3618
3619
class MyJetPack(GenericTumblrV1):
3620
    """Class to retrieve My Jet Pack comics."""
3621
    name = 'jetpack'
3622
    long_name = 'My Jet Pack'
3623
    url = 'http://myjetpack.tumblr.com'
3624
3625
3626
class CheerUpEmoKidTumblr(GenericTumblrV1):
3627
    """Class to retrieve CheerUpEmoKid comics."""
3628
    # Also on http://www.cheerupemokid.com
3629
    # Also on http://tapastic.com/series/CUEK
3630
    name = 'cuek-tumblr'
3631
    long_name = 'Cheer Up Emo Kid (from Tumblr)'
3632
    url = 'http://enzocomics.tumblr.com'
3633
3634
3635
class ForLackOfABetterComic(GenericTumblrV1):
3636
    """Class to retrieve For Lack Of A Better Comics."""
3637
    # Also on http://forlackofabettercomic.com
3638
    name = 'lack'
3639
    long_name = 'For Lack Of A Better Comic'
3640
    url = 'http://forlackofabettercomic.tumblr.com'
3641
3642
3643
class ZenPencilsTumblr(GenericTumblrV1):
3644
    """Class to retrieve ZenPencils comics."""
3645
    # Also on http://zenpencils.com
3646
    # Also on http://www.gocomics.com/zen-pencils
3647
    name = 'zenpencils-tumblr'
3648
    long_name = 'Zen Pencils (from Tumblr)'
3649
    url = 'http://zenpencils.tumblr.com'
3650
    _categories = ('ZENPENCILS', )
3651
3652
3653
class ThreeWordPhraseTumblr(GenericTumblrV1):
3654
    """Class to retrieve Three Word Phrase comics."""
3655
    # Also on http://threewordphrase.com
3656
    name = 'threeword-tumblr'
3657
    long_name = 'Three Word Phrase (from Tumblr)'
3658
    url = 'http://www.threewordphrase.tumblr.com'
3659
3660
3661
class TimeTrabbleTumblr(GenericTumblrV1):
3662
    """Class to retrieve Time Trabble comics."""
3663
    # Also on http://timetrabble.com
3664
    name = 'timetrabble-tumblr'
3665
    long_name = 'Time Trabble (from Tumblr)'
3666
    url = 'http://timetrabble.tumblr.com'
3667
3668
3669
class SafelyEndangeredTumblr(GenericTumblrV1):
3670
    """Class to retrieve Safely Endangered comics."""
3671
    # Also on http://www.safelyendangered.com
3672
    name = 'endangered-tumblr'
3673
    long_name = 'Safely Endangered (from Tumblr)'
3674
    url = 'http://tumblr.safelyendangered.com'
3675
3676
3677
class MouseBearComedyTumblr(GenericTumblrV1):
3678
    """Class to retrieve Mouse Bear Comedy comics."""
3679
    # Also on http://www.mousebearcomedy.com
3680
    name = 'mousebear-tumblr'
3681
    long_name = 'Mouse Bear Comedy (from Tumblr)'
3682
    url = 'http://mousebearcomedy.tumblr.com'
3683
3684
3685
class BouletCorpTumblr(GenericTumblrV1):
3686
    """Class to retrieve BouletCorp comics."""
3687
    # Also on http://www.bouletcorp.com
3688
    name = 'boulet-tumblr'
3689
    long_name = 'Boulet Corp (from Tumblr)'
3690
    url = 'http://bouletcorp.tumblr.com'
3691
    _categories = ('BOULET', )
3692
3693
3694
class TheAwkwardYetiTumblr(GenericTumblrV1):
3695
    """Class to retrieve The Awkward Yeti comics."""
3696
    # Also on http://www.gocomics.com/the-awkward-yeti
3697
    # Also on http://theawkwardyeti.com
3698
    # Also on https://tapastic.com/series/TheAwkwardYeti
3699
    name = 'yeti-tumblr'
3700
    long_name = 'The Awkward Yeti (from Tumblr)'
3701
    url = 'http://larstheyeti.tumblr.com'
3702
    _categories = ('YETI', )
3703
3704
3705
class NellucNhoj(GenericTumblrV1):
3706
    """Class to retrieve NellucNhoj comics."""
3707
    name = 'nhoj'
3708
    long_name = 'Nelluc Nhoj'
3709
    url = 'http://nellucnhoj.com'
3710
3711
3712
class DownTheUpwardSpiralTumblr(GenericTumblrV1):
3713
    """Class to retrieve Down The Upward Spiral comics."""
3714
    # Also on http://www.downtheupwardspiral.com
3715
    name = 'spiral-tumblr'
3716
    long_name = 'Down the Upward Spiral (from Tumblr)'
3717
    url = 'http://downtheupwardspiral.tumblr.com'
3718
3719
3720
class AsPerUsualTumblr(GenericTumblrV1):
3721
    """Class to retrieve As Per Usual comics."""
3722
    # Also on https://tapastic.com/series/AsPerUsual
3723
    name = 'usual-tumblr'
3724
    long_name = 'As Per Usual (from Tumblr)'
3725
    url = 'http://as-per-usual.tumblr.com'
3726
    categories = ('DAMILEE', )
3727
3728
3729
class HotComicsForCoolPeopleTumblr(GenericTumblrV1):
3730
    """Class to retrieve Hot Comics For Cool People."""
3731
    # Also on https://tapastic.com/series/Hot-Comics-For-Cool-People
3732
    # Also on http://hotcomics.biz (links to tumblr)
3733
    # Also on http://hcfcp.com (links to tumblr)
3734
    name = 'hotcomics-tumblr'
3735
    long_name = 'Hot Comics For Cool People (from Tumblr)'
3736
    url = 'http://hotcomicsforcoolpeople.tumblr.com'
3737
    categories = ('DAMILEE', )
3738
3739
3740
class OneOneOneOneComicTumblr(GenericTumblrV1):
3741
    """Class to retrieve 1111 Comics."""
3742
    # Also on http://www.1111comics.me
3743
    # Also on https://tapastic.com/series/1111-Comics
3744
    name = '1111-tumblr'
3745
    long_name = '1111 Comics (from Tumblr)'
3746
    url = 'http://comics1111.tumblr.com'
3747
    _categories = ('ONEONEONEONE', )
3748
3749
3750
class JhallComicsTumblr(GenericTumblrV1):
3751
    """Class to retrieve Jhall Comics."""
3752
    # Also on http://jhallcomics.com
3753
    name = 'jhall-tumblr'
3754
    long_name = 'Jhall Comics (from Tumblr)'
3755
    url = 'http://jhallcomics.tumblr.com'
3756
3757
3758
class BerkeleyMewsTumblr(GenericTumblrV1):
3759
    """Class to retrieve Berkeley Mews comics."""
3760
    # Also on http://www.gocomics.com/berkeley-mews
3761
    # Also on http://www.berkeleymews.com
3762
    name = 'berkeley-tumblr'
3763
    long_name = 'Berkeley Mews (from Tumblr)'
3764
    url = 'http://mews.tumblr.com'
3765
    _categories = ('BERKELEY', )
3766
3767
3768
class JoanCornellaTumblr(GenericTumblrV1):
3769
    """Class to retrieve Joan Cornella comics."""
3770
    # Also on http://joancornella.net
3771
    name = 'cornella-tumblr'
3772
    long_name = 'Joan Cornella (from Tumblr)'
3773
    url = 'http://cornellajoan.tumblr.com'
3774
3775
3776
class RespawnComicTumblr(GenericTumblrV1):
3777
    """Class to retrieve Respawn Comic."""
3778
    # Also on http://respawncomic.com
3779
    name = 'respawn-tumblr'
3780
    long_name = 'Respawn Comic (from Tumblr)'
3781
    url = 'http://respawncomic.tumblr.com'
3782
3783
3784
class ChrisHallbeckTumblr(GenericTumblrV1):
3785
    """Class to retrieve Chris Hallbeck comics."""
3786
    # Also on https://tapastic.com/ChrisHallbeck
3787
    # Also on http://maximumble.com
3788
    # Also on http://minimumble.com
3789
    # Also on http://thebookofbiff.com
3790
    name = 'hallbeck-tumblr'
3791
    long_name = 'Chris Hallback (from Tumblr)'
3792
    url = 'http://chrishallbeck.tumblr.com'
3793
    _categories = ('HALLBACK', )
3794
3795
3796
class ComicNuggets(GenericTumblrV1):
3797
    """Class to retrieve Comic Nuggets."""
3798
    name = 'nuggets'
3799
    long_name = 'Comic Nuggets'
3800
    url = 'http://comicnuggets.com'
3801
3802
3803
class PigeonGazetteTumblr(GenericTumblrV1):
3804
    """Class to retrieve The Pigeon Gazette comics."""
3805
    # Also on https://tapastic.com/series/The-Pigeon-Gazette
3806
    name = 'pigeon-tumblr'
3807
    long_name = 'The Pigeon Gazette (from Tumblr)'
3808
    url = 'http://thepigeongazette.tumblr.com'
3809
3810
3811
class CancerOwl(GenericTumblrV1):
3812
    """Class to retrieve Cancer Owl comics."""
3813
    # Also on http://cancerowl.com
3814
    name = 'cancerowl-tumblr'
3815
    long_name = 'Cancer Owl (from Tumblr)'
3816
    url = 'http://cancerowl.tumblr.com'
3817
3818
3819
class FowlLanguageTumblr(GenericTumblrV1):
3820
    """Class to retrieve Fowl Language comics."""
3821
    # Also on http://www.fowllanguagecomics.com
3822
    # Also on http://tapastic.com/series/Fowl-Language-Comics
3823
    # Also on http://www.gocomics.com/fowl-language
3824
    name = 'fowllanguage-tumblr'
3825
    long_name = 'Fowl Language Comics (from Tumblr)'
3826
    url = 'http://fowllanguagecomics.tumblr.com'
3827
    _categories = ('FOWLLANGUAGE', )
3828
3829
3830
class TheOdd1sOutTumblr(GenericTumblrV1):
3831
    """Class to retrieve The Odd 1s Out comics."""
3832
    # Also on http://theodd1sout.com
3833
    # Also on https://tapastic.com/series/Theodd1sout
3834
    name = 'theodd-tumblr'
3835
    long_name = 'The Odd 1s Out (from Tumblr)'
3836
    url = 'http://theodd1sout.tumblr.com'
3837
3838
3839
class TheUnderfoldTumblr(GenericTumblrV1):
3840
    """Class to retrieve The Underfold comics."""
3841
    # Also on http://theunderfold.com
3842
    name = 'underfold-tumblr'
3843
    long_name = 'The Underfold (from Tumblr)'
3844
    url = 'http://theunderfold.tumblr.com'
3845
3846
3847
class LolNeinTumblr(GenericTumblrV1):
3848
    """Class to retrieve Lol Nein comics."""
3849
    # Also on http://lolnein.com
3850
    name = 'lolnein-tumblr'
3851
    long_name = 'Lol Nein (from Tumblr)'
3852
    url = 'http://lolneincom.tumblr.com'
3853
3854
3855
class FatAwesomeComicsTumblr(GenericTumblrV1):
3856
    """Class to retrieve Fat Awesome Comics."""
3857
    # Also on http://fatawesome.com/comics
3858
    name = 'fatawesome-tumblr'
3859
    long_name = 'Fat Awesome (from Tumblr)'
3860
    url = 'http://fatawesomecomedy.tumblr.com'
3861
3862
3863
class TheWorldIsFlatTumblr(GenericTumblrV1):
3864
    """Class to retrieve The World Is Flat Comics."""
3865
    # Also on https://tapastic.com/series/The-World-is-Flat
3866
    name = 'flatworld-tumblr'
3867
    long_name = 'The World Is Flat (from Tumblr)'
3868
    url = 'http://theworldisflatcomics.tumblr.com'
3869
3870
3871
class DorrisMc(GenericTumblrV1):
3872
    """Class to retrieve Dorris Mc Comics"""
3873
    # Also on http://www.gocomics.com/dorris-mccomics
3874
    name = 'dorrismc'
3875
    long_name = 'Dorris Mc'
3876
    url = 'http://dorrismccomics.com'
3877
3878
3879
class LeleozTumblr(GenericEmptyComic, GenericTumblrV1):
3880
    """Class to retrieve Leleoz comics."""
3881
    # Also on https://tapastic.com/series/Leleoz
3882
    name = 'leleoz-tumblr'
3883
    long_name = 'Leleoz (from Tumblr)'
3884
    url = 'http://leleozcomics.tumblr.com'
3885
3886
3887
class MoonBeardTumblr(GenericTumblrV1):
3888
    """Class to retrieve MoonBeard comics."""
3889
    # Also on http://moonbeard.com
3890
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
3891
    name = 'moonbeard-tumblr'
3892
    long_name = 'Moon Beard (from Tumblr)'
3893
    url = 'http://blog.squiresjam.es/moonbeard'
3894
3895
3896
class AComik(GenericTumblrV1):
3897
    """Class to retrieve A Comik"""
3898
    name = 'comik'
3899
    long_name = 'A Comik'
3900
    url = 'http://acomik.com'
3901
3902
3903
class ClassicRandy(GenericTumblrV1):
3904
    """Class to retrieve Classic Randy comics."""
3905
    name = 'randy'
3906
    long_name = 'Classic Randy'
3907
    url = 'http://classicrandy.tumblr.com'
3908
3909
3910
class DagssonTumblr(GenericTumblrV1):
3911
    """Class to retrieve Dagsson comics."""
3912
    # Also on http://www.dagsson.com
3913
    name = 'dagsson-tumblr'
3914
    long_name = 'Dagsson Hugleikur (from Tumblr)'
3915
    url = 'http://hugleikurdagsson.tumblr.com'
3916
3917
3918
class LinsEditionsTumblr(GenericTumblrV1):
3919
    """Class to retrieve L.I.N.S. Editions comics."""
3920
    # Also on https://linsedition.com
3921
    # Now on http://warandpeas.tumblr.com
3922
    name = 'lins-tumblr'
3923
    long_name = 'L.I.N.S. Editions (from Tumblr)'
3924
    url = 'http://linscomics.tumblr.com'
3925
    _categories = ('LINS', )
3926
3927
3928
class WarAndPeasTumblr(GenericTumblrV1):
3929
    """Class to retrieve War And Peas comics."""
3930
    # Was on http://linscomics.tumblr.com
3931
    name = 'warandpeas-tumblr'
3932
    long_name = 'War And Peas (from Tumblr)'
3933
    url = 'http://warandpeas.tumblr.com'
3934
    _categories = ('WARANDPEAS', )
3935
3936
3937
class OrigamiHotDish(GenericTumblrV1):
3938
    """Class to retrieve Origami Hot Dish comics."""
3939
    name = 'origamihotdish'
3940
    long_name = 'Origami Hot Dish'
3941
    url = 'http://origamihotdish.com'
3942
3943
3944
class HitAndMissComicsTumblr(GenericTumblrV1):
3945
    """Class to retrieve Hit and Miss Comics."""
3946
    name = 'hitandmiss'
3947
    long_name = 'Hit and Miss Comics'
3948
    url = 'http://hitandmisscomics.tumblr.com'
3949
3950
3951
class HMBlanc(GenericTumblrV1):
3952
    """Class to retrieve HM Blanc comics."""
3953
    name = 'hmblanc'
3954
    long_name = 'HM Blanc'
3955
    url = 'http://hmblanc.tumblr.com'
3956
3957
3958
class TalesOfAbsurdityTumblr(GenericTumblrV1):
3959
    """Class to retrieve Tales Of Absurdity comics."""
3960
    # Also on http://talesofabsurdity.com
3961
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
3962
    name = 'absurdity-tumblr'
3963
    long_name = 'Tales of Absurdity (from Tumblr)'
3964
    url = 'http://talesofabsurdity.tumblr.com'
3965
    _categories = ('ABSURDITY', )
3966
3967
3968
class RobbieAndBobby(GenericTumblrV1):
3969
    """Class to retrieve Robbie And Bobby comics."""
3970
    # Also on http://robbieandbobby.com
3971
    name = 'robbie-tumblr'
3972
    long_name = 'Robbie And Bobby (from Tumblr)'
3973
    url = 'http://robbieandbobby.tumblr.com'
3974
3975
3976
class ElectricBunnyComicTumblr(GenericTumblrV1):
3977
    """Class to retrieve Electric Bunny Comics."""
3978
    # Also on http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell
3979
    name = 'bunny-tumblr'
3980
    long_name = 'Electric Bunny Comic (from Tumblr)'
3981
    url = 'http://electricbunnycomics.tumblr.com'
3982
3983
3984
class Hoomph(GenericTumblrV1):
3985
    """Class to retrieve Hoomph comics."""
3986
    name = 'hoomph'
3987
    long_name = 'Hoomph'
3988
    url = 'http://hoom.ph'
3989
3990
3991
class BFGFSTumblr(GenericTumblrV1):
3992
    """Class to retrieve BFGFS comics."""
3993
    # Also on https://tapastic.com/series/BFGFS
3994
    # Also on http://bfgfs.com
3995
    name = 'bfgfs-tumblr'
3996
    long_name = 'BFGFS (from Tumblr)'
3997
    url = 'http://bfgfs.tumblr.com'
3998
3999
4000
class DoodleForFood(GenericTumblrV1):
4001
    """Class to retrieve Doodle For Food comics."""
4002
    # Also on http://doodleforfood.com
4003
    name = 'doodle'
4004
    long_name = 'Doodle For Food'
4005
    url = 'http://doodleforfood.com'
4006
4007
4008
class CassandraCalinTumblr(GenericTumblrV1):
4009
    """Class to retrieve C. Cassandra comics."""
4010
    # Also on http://cassandracalin.com
4011
    # Also on https://tapastic.com/series/C-Cassandra-comics
4012
    name = 'cassandra-tumblr'
4013
    long_name = 'Cassandra Calin (from Tumblr)'
4014
    url = 'http://c-cassandra.tumblr.com'
4015
4016
4017
class DougWasTaken(GenericTumblrV1):
4018
    """Class to retrieve Doug Was Taken comics."""
4019
    name = 'doug'
4020
    long_name = 'Doug Was Taken'
4021
    url = 'http://dougwastaken.tumblr.com'
4022
4023
4024
class MandatoryRollerCoaster(GenericTumblrV1):
4025
    """Class to retrieve Mandatory Roller Coaster comics."""
4026
    name = 'rollercoaster'
4027
    long_name = 'Mandatory Roller Coaster'
4028
    url = 'http://mandatoryrollercoaster.com'
4029
4030
4031
class CEstPasEnRegardantSesPompes(GenericTumblrV1):
4032
    """Class to retrieve C'Est Pas En Regardant Ses Pompes (...)  comics."""
4033
    name = 'cperspqccltt'
4034
    long_name = 'C Est Pas En Regardant Ses Pompes (...)'
4035
    url = 'http://cperspqccltt.tumblr.com'
4036
4037
4038
class TheGrohlTroll(GenericTumblrV1):
4039
    """Class to retrieve The Grohl Troll comics."""
4040
    name = 'grohltroll'
4041
    long_name = 'The Grohl Troll'
4042
    url = 'http://thegrohltroll.com'
4043
4044
4045
class WebcomicName(GenericTumblrV1):
4046
    """Class to retrieve Webcomic Name comics."""
4047
    name = 'webcomicname'
4048
    long_name = 'Webcomic Name'
4049
    url = 'http://webcomicname.com'
4050
4051
4052
class BooksOfAdam(GenericTumblrV1):
4053
    """Class to retrieve Books of Adam comics."""
4054
    # Also on http://www.booksofadam.com
4055
    name = 'booksofadam'
4056
    long_name = 'Books of Adam'
4057
    url = 'http://booksofadam.tumblr.com'
4058
4059
4060
class HarkAVagrant(GenericTumblrV1):
4061
    """Class to retrieve Hark A Vagrant comics."""
4062
    # Also on http://www.harkavagrant.com
4063
    name = 'hark-tumblr'
4064
    long_name = 'Hark A Vagrant (from Tumblr)'
4065
    url = 'http://beatonna.tumblr.com'
4066
4067
4068
class OurSuperAdventureTumblr(GenericTumblrV1):
4069
    """Class to retrieve Our Super Adventure comics."""
4070
    # Also on https://tapastic.com/series/Our-Super-Adventure
4071
    # Also on http://www.oursuperadventure.com
4072
    # http://sarahgraley.com
4073
    name = 'superadventure-tumblr'
4074
    long_name = 'Our Super Adventure (from Tumblr)'
4075
    url = 'http://sarahssketchbook.tumblr.com'
4076
4077
4078
class JakeLikesOnions(GenericTumblrV1):
4079
    """Class to retrieve Jake Likes Onions comics."""
4080
    name = 'jake'
4081
    long_name = 'Jake Likes Onions'
4082
    url = 'http://jakelikesonions.com'
4083
4084
4085
class InYourFaceCake(GenericTumblrV1):
4086
    """Class to retrieve In Your Face Cake comics."""
4087
    name = 'inyourfacecake-tumblr'
4088
    long_name = 'In Your Face Cake (from Tumblr)'
4089
    url = 'http://in-your-face-cake.tumblr.com'
4090
4091
4092
class Robospunk(GenericTumblrV1):
4093
    """Class to retrieve Robospunk comics."""
4094
    name = 'robospunk'
4095
    long_name = 'Robospunk'
4096
    url = 'http://robospunk.com'
4097
4098
4099
class BananaTwinky(GenericTumblrV1):
4100
    """Class to retrieve Banana Twinky comics."""
4101
    name = 'banana'
4102
    long_name = 'Banana Twinky'
4103
    url = 'http://bananatwinky.tumblr.com'
4104
4105
4106
class YesterdaysPopcornTumblr(GenericTumblrV1):
4107
    """Class to retrieve Yesterday's Popcorn comics."""
4108
    # Also on http://www.yesterdayspopcorn.com
4109
    # Also on https://tapastic.com/series/Yesterdays-Popcorn
4110
    name = 'popcorn-tumblr'
4111
    long_name = 'Yesterday\'s Popcorn (from Tumblr)'
4112
    url = 'http://yesterdayspopcorn.tumblr.com'
4113
4114
4115
class TwistedDoodles(GenericTumblrV1):
4116
    """Class to retrieve Twisted Doodles comics."""
4117
    name = 'twisted'
4118
    long_name = 'Twisted Doodles'
4119
    url = 'http://www.twisteddoodles.com'
4120
4121
4122
class UbertoolTumblr(GenericTumblrV1):
4123
    """Class to retrieve Ubertool comics."""
4124
    # Also on http://ubertoolcomic.com
4125
    # Also on https://tapastic.com/series/ubertool
4126
    name = 'ubertool-tumblr'
4127
    long_name = 'Ubertool (from Tumblr)'
4128
    url = 'http://ubertool.tumblr.com'
4129
    _categories = ('UBERTOOL', )
4130
4131
4132
class LittleLifeLinesTumblr(GenericTumblrV1):
4133
    """Class to retrieve Little Life Lines comics."""
4134
    # Also on http://www.littlelifelines.com
4135
    name = 'life-tumblr'
4136
    long_name = 'Little Life Lines (from Tumblr)'
4137
    url = 'https://little-life-lines.tumblr.com'
4138
4139
4140
class TheyCanTalk(GenericTumblrV1):
4141
    """Class to retrieve They Can Talk comics."""
4142
    name = 'theycantalk'
4143
    long_name = 'They Can Talk'
4144
    url = 'http://theycantalk.com'
4145
4146
4147
class Will5NeverCome(GenericTumblrV1):
4148
    """Class to retrieve Will 5:00 Never Come comics."""
4149
    name = 'will5'
4150
    long_name = 'Will 5:00 Never Come ?'
4151
    url = 'http://will5nevercome.com'
4152
4153
4154
class Sephko(GenericTumblrV1):
4155
    """Class to retrieve Sephko Comics."""
4156
    # Also on http://www.sephko.com
4157
    name = 'sephko'
4158
    long_name = 'Sephko'
4159
    url = 'http://sephko.tumblr.com'
4160
4161
4162
class BlazersAtDawn(GenericTumblrV1):
4163
    """Class to retrieve Blazers At Dawn Comics."""
4164
    name = 'blazers'
4165
    long_name = 'Blazers At Dawn'
4166
    url = 'http://blazersatdawn.tumblr.com'
4167
4168
4169
class ArtByMoga(GenericEmptyComic, GenericTumblrV1):  # Deactivated because it downloads too many things
4170
    """Class to retrieve Art By Moga Comics."""
4171
    name = 'moga'
4172
    long_name = 'Art By Moga'
4173
    url = 'http://artbymoga.tumblr.com'
4174
4175
4176
class VerbalVomitTumblr(GenericTumblrV1):
4177
    """Class to retrieve Verbal Vomit comics."""
4178
    # Also on http://www.verbal-vomit.com
4179
    name = 'vomit-tumblr'
4180
    long_name = 'Verbal Vomit (from Tumblr)'
4181
    url = 'http://verbalvomits.tumblr.com'
4182
4183
4184 View Code Duplication
class LibraryComic(GenericTumblrV1):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
4185
    """Class to retrieve LibraryComic."""
4186
    # Also on http://librarycomic.com
4187
    name = 'library-tumblr'
4188
    long_name = 'LibraryComic (from Tumblr)'
4189
    url = 'http://librarycomic.tumblr.com'
4190
4191
4192
class TizzyStitchBirdTumblr(GenericTumblrV1):
4193
    """Class to retrieve Tizzy Stitch Bird comics."""
4194
    # Also on http://tizzystitchbird.com
4195
    # Also on https://tapastic.com/series/TizzyStitchbird
4196
    # Also on http://www.webtoons.com/en/challenge/tizzy-stitchbird/list?title_no=50082
4197
    name = 'tizzy-tumblr'
4198
    long_name = 'Tizzy Stitch Bird (from Tumblr)'
4199
    url = 'http://tizzystitchbird.tumblr.com'
4200
4201
4202
class HorovitzComics(GenericListableComic):
4203
    """Generic class to handle the logic common to the different comics from Horovitz."""
4204
    url = 'http://www.horovitzcomics.com'
4205
    _categories = ('HOROVITZ', )
4206
    img_re = re.compile('.*comics/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
4207
    link_re = NotImplemented
4208
    get_url_from_archive_element = join_cls_url_to_href
4209
4210
    @classmethod
4211
    def get_comic_info(cls, soup, link):
4212
        """Get information about a particular comics."""
4213
        href = link['href']
4214
        num = int(cls.link_re.match(href).groups()[0])
4215
        title = link.string
4216
        imgs = soup.find_all('img', id='comic')
4217
        assert len(imgs) == 1
4218
        year, month, day = [int(s)
4219
                            for s in cls.img_re.match(imgs[0]['src']).groups()]
4220
        return {
4221
            'title': title,
4222
            'day': day,
4223
            'month': month,
4224
            'year': year,
4225
            'img': [i['src'] for i in imgs],
4226
            'num': num,
4227
        }
4228
4229
    @classmethod
4230
    def get_archive_elements(cls):
4231
        archive_url = 'http://www.horovitzcomics.com/comics/archive/'
4232
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.link_re))
4233
4234
4235
class HorovitzNew(HorovitzComics):
4236
    """Class to retrieve Horovitz new comics."""
4237
    name = 'horovitznew'
4238
    long_name = 'Horovitz New'
4239
    link_re = re.compile('^/comics/new/([0-9]+)$')
4240
4241
4242
class HorovitzClassic(HorovitzComics):
4243
    """Class to retrieve Horovitz classic comics."""
4244
    name = 'horovitzclassic'
4245
    long_name = 'Horovitz Classic'
4246
    link_re = re.compile('^/comics/classic/([0-9]+)$')
4247
4248
4249
class GenericGoComic(GenericNavigableComic):
4250
    """Generic class to handle the logic common to comics from gocomics.com."""
4251
    _categories = ('GOCOMIC', )
4252
4253
    @classmethod
4254
    def get_first_comic_link(cls):
4255
        """Get link to first comics."""
4256
        return get_soup_at_url(cls.url).find('a', class_='fa btn btn-outline-default btn-circle fa-backward sm ')
4257
4258
    @classmethod
4259
    def get_navi_link(cls, last_soup, next_):
4260
        """Get link to next or previous comic."""
4261
        PREV = 'fa btn btn-outline-default btn-circle fa-caret-left sm '
4262
        NEXT = 'fa btn btn-outline-default btn-circle fa-caret-right sm '
4263
        return last_soup.find('a', class_=NEXT if next_ else PREV)
4264
4265
    @classmethod
4266
    def get_url_from_link(cls, link):
4267
        gocomics = 'http://www.gocomics.com'
4268
        return urljoin_wrapper(gocomics, link['href'])
4269
4270
    @classmethod
4271
    def get_comic_info(cls, soup, link):
4272
        """Get information about a particular comics."""
4273
        date_str = soup.find('meta', property='article:published_time')['content']
4274
        day = string_to_date(date_str, "%Y-%m-%d")
4275
        imgs = soup.find('picture', class_='img-fluid item-comic-image').find_all('img')
4276
        author = soup.find('meta', property='article:author')['content']
4277
        tags = soup.find('meta', property='article:tag')['content']
4278
        return {
4279
            'day': day.day,
4280
            'month': day.month,
4281
            'year': day.year,
4282
            'img': [i['src'] for i in imgs],
4283
            'author': author,
4284
            'tags': tags,
4285
        }
4286
4287
4288
class PearlsBeforeSwine(GenericGoComic):
4289
    """Class to retrieve Pearls Before Swine comics."""
4290
    name = 'pearls'
4291
    long_name = 'Pearls Before Swine'
4292
    url = 'http://www.gocomics.com/pearlsbeforeswine'
4293
4294
4295
class Peanuts(GenericGoComic):
4296
    """Class to retrieve Peanuts comics."""
4297
    name = 'peanuts'
4298
    long_name = 'Peanuts'
4299
    url = 'http://www.gocomics.com/peanuts'
4300
4301
4302
class MattWuerker(GenericGoComic):
4303
    """Class to retrieve Matt Wuerker comics."""
4304
    name = 'wuerker'
4305
    long_name = 'Matt Wuerker'
4306
    url = 'http://www.gocomics.com/mattwuerker'
4307
4308
4309
class TomToles(GenericGoComic):
4310
    """Class to retrieve Tom Toles comics."""
4311
    name = 'toles'
4312
    long_name = 'Tom Toles'
4313
    url = 'http://www.gocomics.com/tomtoles'
4314
4315
4316
class BreakOfDay(GenericGoComic):
4317
    """Class to retrieve Break Of Day comics."""
4318
    name = 'breakofday'
4319
    long_name = 'Break Of Day'
4320
    url = 'http://www.gocomics.com/break-of-day'
4321
4322
4323
class Brevity(GenericGoComic):
4324
    """Class to retrieve Brevity comics."""
4325
    name = 'brevity'
4326
    long_name = 'Brevity'
4327
    url = 'http://www.gocomics.com/brevitypanel'
4328
4329
4330
class MichaelRamirez(GenericGoComic):
4331
    """Class to retrieve Michael Ramirez comics."""
4332
    name = 'ramirez'
4333
    long_name = 'Michael Ramirez'
4334
    url = 'http://www.gocomics.com/michaelramirez'
4335
4336
4337
class MikeLuckovich(GenericGoComic):
4338
    """Class to retrieve Mike Luckovich comics."""
4339
    name = 'luckovich'
4340
    long_name = 'Mike Luckovich'
4341
    url = 'http://www.gocomics.com/mikeluckovich'
4342
4343
4344
class JimBenton(GenericGoComic):
4345
    """Class to retrieve Jim Benton comics."""
4346
    # Also on http://jimbenton.tumblr.com
4347
    name = 'benton'
4348
    long_name = 'Jim Benton'
4349
    url = 'http://www.gocomics.com/jim-benton-cartoons'
4350
4351
4352
class TheArgyleSweater(GenericGoComic):
4353
    """Class to retrieve the Argyle Sweater comics."""
4354
    name = 'argyle'
4355
    long_name = 'Argyle Sweater'
4356
    url = 'http://www.gocomics.com/theargylesweater'
4357
4358
4359
class SunnyStreet(GenericGoComic):
4360
    """Class to retrieve Sunny Street comics."""
4361
    # Also on http://www.sunnystreetcomics.com
4362
    name = 'sunny'
4363
    long_name = 'Sunny Street'
4364
    url = 'http://www.gocomics.com/sunny-street'
4365
4366
4367
class OffTheMark(GenericGoComic):
4368
    """Class to retrieve Off The Mark comics."""
4369
    # Also on https://www.offthemark.com
4370
    name = 'offthemark'
4371
    long_name = 'Off The Mark'
4372
    url = 'http://www.gocomics.com/offthemark'
4373
4374
4375
class WuMo(GenericGoComic):
4376
    """Class to retrieve WuMo comics."""
4377
    # Also on http://wumo.com
4378
    name = 'wumo'
4379
    long_name = 'WuMo'
4380
    url = 'http://www.gocomics.com/wumo'
4381
4382
4383
class LunarBaboon(GenericGoComic):
4384
    """Class to retrieve Lunar Baboon comics."""
4385
    # Also on http://www.lunarbaboon.com
4386
    # Also on https://tapastic.com/series/Lunarbaboon
4387
    name = 'lunarbaboon'
4388
    long_name = 'Lunar Baboon'
4389
    url = 'http://www.gocomics.com/lunarbaboon'
4390
4391
4392
class SandersenGocomic(GenericGoComic):
4393
    """Class to retrieve Sarah Andersen comics."""
4394
    # Also on http://sarahcandersen.com
4395
    # Also on http://tapastic.com/series/Doodle-Time
4396
    name = 'sandersen-goc'
4397
    long_name = 'Sarah Andersen (from GoComics)'
4398
    url = 'http://www.gocomics.com/sarahs-scribbles'
4399
4400
4401
class SaturdayMorningBreakfastCerealGoComic(GenericGoComic):
4402
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
4403
    # Also on http://smbc-comics.tumblr.com
4404
    # Also on http://www.smbc-comics.com
4405
    name = 'smbc-goc'
4406
    long_name = 'Saturday Morning Breakfast Cereal (from GoComics)'
4407
    url = 'http://www.gocomics.com/saturday-morning-breakfast-cereal'
4408
    _categories = ('SMBC', )
4409
4410
4411
class CalvinAndHobbesGoComic(GenericGoComic):
4412
    """Class to retrieve Calvin and Hobbes comics."""
4413
    # From gocomics, not http://marcel-oehler.marcellosendos.ch/comics/ch/
4414
    name = 'calvin-goc'
4415
    long_name = 'Calvin and Hobbes (from GoComics)'
4416
    url = 'http://www.gocomics.com/calvinandhobbes'
4417
4418
4419
class RallGoComic(GenericGoComic):
4420
    """Class to retrieve Ted Rall comics."""
4421
    # Also on http://rall.com/comic
4422
    name = 'rall-goc'
4423
    long_name = "Ted Rall (from GoComics)"
4424
    url = "http://www.gocomics.com/ted-rall"
4425
    _categories = ('RALL', )
4426
4427
4428
class TheAwkwardYetiGoComic(GenericGoComic):
4429
    """Class to retrieve The Awkward Yeti comics."""
4430
    # Also on http://larstheyeti.tumblr.com
4431
    # Also on http://theawkwardyeti.com
4432
    # Also on https://tapastic.com/series/TheAwkwardYeti
4433
    name = 'yeti-goc'
4434
    long_name = 'The Awkward Yeti (from GoComics)'
4435
    url = 'http://www.gocomics.com/the-awkward-yeti'
4436
    _categories = ('YETI', )
4437
4438
4439
class BerkeleyMewsGoComics(GenericGoComic):
4440
    """Class to retrieve Berkeley Mews comics."""
4441
    # Also on http://mews.tumblr.com
4442
    # Also on http://www.berkeleymews.com
4443
    name = 'berkeley-goc'
4444
    long_name = 'Berkeley Mews (from GoComics)'
4445
    url = 'http://www.gocomics.com/berkeley-mews'
4446
    _categories = ('BERKELEY', )
4447
4448
4449
class SheldonGoComics(GenericGoComic):
4450
    """Class to retrieve Sheldon comics."""
4451
    # Also on http://www.sheldoncomics.com
4452
    name = 'sheldon-goc'
4453
    long_name = 'Sheldon Comics (from GoComics)'
4454
    url = 'http://www.gocomics.com/sheldon'
4455
4456
4457
class FowlLanguageGoComics(GenericGoComic):
4458
    """Class to retrieve Fowl Language comics."""
4459
    # Also on http://www.fowllanguagecomics.com
4460
    # Also on http://tapastic.com/series/Fowl-Language-Comics
4461
    # Also on http://fowllanguagecomics.tumblr.com
4462
    name = 'fowllanguage-goc'
4463
    long_name = 'Fowl Language Comics (from GoComics)'
4464
    url = 'http://www.gocomics.com/fowl-language'
4465
    _categories = ('FOWLLANGUAGE', )
4466
4467
4468
class NickAnderson(GenericGoComic):
4469
    """Class to retrieve Nick Anderson comics."""
4470
    name = 'nickanderson'
4471
    long_name = 'Nick Anderson'
4472
    url = 'http://www.gocomics.com/nickanderson'
4473
4474
4475
class GarfieldGoComics(GenericGoComic):
4476
    """Class to retrieve Garfield comics."""
4477
    # Also on http://garfield.com
4478
    name = 'garfield-goc'
4479
    long_name = 'Garfield (from GoComics)'
4480
    url = 'http://www.gocomics.com/garfield'
4481
    _categories = ('GARFIELD', )
4482
4483
4484
class DorrisMcGoComics(GenericGoComic):
4485
    """Class to retrieve Dorris Mc Comics"""
4486
    # Also on http://dorrismccomics.com
4487
    name = 'dorrismc-goc'
4488
    long_name = 'Dorris Mc (from GoComics)'
4489
    url = 'http://www.gocomics.com/dorris-mccomics'
4490
4491
4492
class FoxTrot(GenericGoComic):
4493
    """Class to retrieve FoxTrot comics."""
4494
    name = 'foxtrot'
4495
    long_name = 'FoxTrot'
4496
    url = 'http://www.gocomics.com/foxtrot'
4497
4498
4499
class FoxTrotClassics(GenericGoComic):
4500
    """Class to retrieve FoxTrot Classics comics."""
4501
    name = 'foxtrot-classics'
4502
    long_name = 'FoxTrot Classics'
4503
    url = 'http://www.gocomics.com/foxtrotclassics'
4504
4505
4506
class MisterAndMeGoComics(GenericEmptyComic, GenericGoComic):  # Removed ?
4507
    """Class to retrieve Mister & Me Comics."""
4508
    # Also on http://www.mister-and-me.com
4509
    # Also on https://tapastic.com/series/Mister-and-Me
4510
    name = 'mister-goc'
4511
    long_name = 'Mister & Me (from GoComics)'
4512
    url = 'http://www.gocomics.com/mister-and-me'
4513
4514
4515
class NonSequitur(GenericGoComic):
4516
    """Class to retrieve Non Sequitur (Wiley Miller) comics."""
4517
    name = 'nonsequitur'
4518
    long_name = 'Non Sequitur'
4519
    url = 'http://www.gocomics.com/nonsequitur'
4520
4521
4522
class GenericTapasticComic(GenericListableComic):
4523
    """Generic class to handle the logic common to comics from tapastic.com."""
4524
    _categories = ('TAPASTIC', )
4525
4526
    @classmethod
4527
    def get_comic_info(cls, soup, archive_elt):
4528
        """Get information about a particular comics."""
4529
        timestamp = int(archive_elt['publishDate']) / 1000.0
4530
        day = datetime.datetime.fromtimestamp(timestamp).date()
4531
        imgs = soup.find_all('img', class_='art-image')
4532
        if not imgs:
4533
            print("Comic %s is being uploaded, retry later" % cls.get_url_from_archive_element(archive_elt))
4534
            return None
4535
        assert len(imgs) > 0
4536
        return {
4537
            'day': day.day,
4538
            'year': day.year,
4539
            'month': day.month,
4540
            'img': [i['src'] for i in imgs],
4541
            'title': archive_elt['title'],
4542
        }
4543
4544
    @classmethod
4545
    def get_url_from_archive_element(cls, archive_elt):
4546
        return 'http://tapastic.com/episode/' + str(archive_elt['id'])
4547
4548
    @classmethod
4549
    def get_archive_elements(cls):
4550
        pref, suff = 'episodeList : ', ','
4551
        # Information is stored in the javascript part
4552
        # I don't know the clean way to get it so this is the ugly way.
4553
        string = [s[len(pref):-len(suff)] for s in (s.decode('utf-8').strip() for s in urlopen_wrapper(cls.url).readlines()) if s.startswith(pref) and s.endswith(suff)][0]
4554
        return json.loads(string)
4555
4556
4557
class VegetablesForDessert(GenericTapasticComic):
4558
    """Class to retrieve Vegetables For Dessert comics."""
4559
    # Also on http://vegetablesfordessert.tumblr.com
4560
    name = 'vegetables'
4561
    long_name = 'Vegetables For Dessert'
4562
    url = 'http://tapastic.com/series/vegetablesfordessert'
4563
4564
4565
class FowlLanguageTapa(GenericTapasticComic):
4566
    """Class to retrieve Fowl Language comics."""
4567
    # Also on http://www.fowllanguagecomics.com
4568
    # Also on http://fowllanguagecomics.tumblr.com
4569
    # Also on http://www.gocomics.com/fowl-language
4570
    name = 'fowllanguage-tapa'
4571
    long_name = 'Fowl Language Comics (from Tapastic)'
4572
    url = 'http://tapastic.com/series/Fowl-Language-Comics'
4573
    _categories = ('FOWLLANGUAGE', )
4574
4575
4576
class OscillatingProfundities(GenericTapasticComic):
4577
    """Class to retrieve Oscillating Profundities comics."""
4578
    name = 'oscillating'
4579
    long_name = 'Oscillating Profundities'
4580
    url = 'http://tapastic.com/series/oscillatingprofundities'
4581
4582
4583
class ZnoflatsComics(GenericTapasticComic):
4584
    """Class to retrieve Znoflats comics."""
4585
    name = 'znoflats'
4586
    long_name = 'Znoflats Comics'
4587
    url = 'http://tapastic.com/series/Znoflats-Comics'
4588
4589
4590
class SandersenTapastic(GenericTapasticComic):
4591
    """Class to retrieve Sarah Andersen comics."""
4592
    # Also on http://sarahcandersen.com
4593
    # Also on http://www.gocomics.com/sarahs-scribbles
4594
    name = 'sandersen-tapa'
4595
    long_name = 'Sarah Andersen (from Tapastic)'
4596
    url = 'http://tapastic.com/series/Doodle-Time'
4597
4598
4599
class TubeyToonsTapastic(GenericTapasticComic):
4600
    """Class to retrieve TubeyToons comics."""
4601
    # Also on http://tubeytoons.com
4602
    # Also on http://tubeytoons.tumblr.com
4603
    name = 'tubeytoons-tapa'
4604
    long_name = 'Tubey Toons (from Tapastic)'
4605
    url = 'http://tapastic.com/series/Tubey-Toons'
4606
    _categories = ('TUNEYTOONS', )
4607
4608
4609
class AnythingComicTapastic(GenericTapasticComic):
4610
    """Class to retrieve Anything Comics."""
4611
    # Also on http://www.anythingcomic.com
4612
    name = 'anythingcomic-tapa'
4613
    long_name = 'Anything Comic (from Tapastic)'
4614
    url = 'http://tapastic.com/series/anything'
4615
4616
4617
class UnearthedComicsTapastic(GenericTapasticComic):
4618
    """Class to retrieve Unearthed comics."""
4619
    # Also on http://unearthedcomics.com
4620
    # Also on http://unearthedcomics.tumblr.com
4621
    name = 'unearthed-tapa'
4622
    long_name = 'Unearthed Comics (from Tapastic)'
4623
    url = 'http://tapastic.com/series/UnearthedComics'
4624
    _categories = ('UNEARTHED', )
4625
4626
4627
class EverythingsStupidTapastic(GenericTapasticComic):
4628
    """Class to retrieve Everything's stupid Comics."""
4629
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
4630
    # Also on http://everythingsstupid.net
4631
    name = 'stupid-tapa'
4632
    long_name = "Everything's Stupid (from Tapastic)"
4633
    url = 'http://tapastic.com/series/EverythingsStupid'
4634
4635
4636
class JustSayEhTapastic(GenericTapasticComic):
4637
    """Class to retrieve Just Say Eh comics."""
4638
    # Also on http://www.justsayeh.com
4639
    name = 'justsayeh-tapa'
4640
    long_name = 'Just Say Eh (from Tapastic)'
4641
    url = 'http://tapastic.com/series/Just-Say-Eh'
4642
4643
4644
class ThorsThundershackTapastic(GenericTapasticComic):
4645
    """Class to retrieve Thor's Thundershack comics."""
4646
    # Also on http://www.thorsthundershack.com
4647
    name = 'thor-tapa'
4648
    long_name = 'Thor\'s Thundershack (from Tapastic)'
4649
    url = 'http://tapastic.com/series/Thors-Thundershac'
4650
    _categories = ('THOR', )
4651
4652
4653
class OwlTurdTapastic(GenericTapasticComic):
4654
    """Class to retrieve Owl Turd comics."""
4655
    # Also on http://owlturd.com
4656
    name = 'owlturd-tapa'
4657
    long_name = 'Owl Turd (from Tapastic)'
4658
    url = 'http://tapastic.com/series/Owl-Turd-Comix'
4659
    _categories = ('OWLTURD', )
4660
4661
4662
class GoneIntoRaptureTapastic(GenericTapasticComic):
4663
    """Class to retrieve Gone Into Rapture comics."""
4664
    # Also on http://goneintorapture.tumblr.com
4665
    # Also on http://www.goneintorapture.com
4666
    name = 'rapture-tapa'
4667
    long_name = 'Gone Into Rapture (from Tapastic)'
4668
    url = 'http://tapastic.com/series/Goneintorapture'
4669
4670
4671
class HeckIfIKnowComicsTapa(GenericTapasticComic):
4672
    """Class to retrieve Heck If I Know Comics."""
4673
    # Also on http://heckifiknowcomics.com
4674
    name = 'heck-tapa'
4675
    long_name = 'Heck if I Know comics (from Tapastic)'
4676
    url = 'http://tapastic.com/series/Regular'
4677
4678
4679
class CheerUpEmoKidTapa(GenericTapasticComic):
4680
    """Class to retrieve CheerUpEmoKid comics."""
4681
    # Also on http://www.cheerupemokid.com
4682
    # Also on http://enzocomics.tumblr.com
4683
    name = 'cuek-tapa'
4684
    long_name = 'Cheer Up Emo Kid (from Tapastic)'
4685
    url = 'http://tapastic.com/series/CUEK'
4686
4687
4688
class BigFootJusticeTapa(GenericTapasticComic):
4689
    """Class to retrieve Big Foot Justice comics."""
4690
    # Also on http://bigfootjustice.com
4691
    name = 'bigfoot-tapa'
4692
    long_name = 'Big Foot Justice (from Tapastic)'
4693
    url = 'http://tapastic.com/series/bigfoot-justice'
4694
4695
4696
class UpAndOutTapa(GenericTapasticComic):
4697
    """Class to retrieve Up & Out comics."""
4698
    # Also on http://upandoutcomic.tumblr.com
4699
    name = 'upandout-tapa'
4700
    long_name = 'Up And Out (from Tapastic)'
4701
    url = 'http://tapastic.com/series/UP-and-OUT'
4702
4703
4704
class ToonHoleTapa(GenericTapasticComic):
4705
    """Class to retrieve Toon Holes comics."""
4706
    # Also on http://www.toonhole.com
4707
    name = 'toonhole-tapa'
4708
    long_name = 'Toon Hole (from Tapastic)'
4709
    url = 'http://tapastic.com/series/TOONHOLE'
4710
4711
4712
class AngryAtNothingTapa(GenericTapasticComic):
4713
    """Class to retrieve Angry at Nothing comics."""
4714
    # Also on http://www.angryatnothing.net
4715
    name = 'angry-tapa'
4716
    long_name = 'Angry At Nothing (from Tapastic)'
4717
    url = 'http://tapastic.com/series/Comics-yeah-definitely-comics-'
4718
4719
4720
class LeleozTapa(GenericTapasticComic):
4721
    """Class to retrieve Leleoz comics."""
4722
    # Also on http://leleozcomics.tumblr.com
4723
    name = 'leleoz-tapa'
4724
    long_name = 'Leleoz (from Tapastic)'
4725
    url = 'https://tapastic.com/series/Leleoz'
4726
4727
4728
class TheAwkwardYetiTapa(GenericTapasticComic):
4729
    """Class to retrieve The Awkward Yeti comics."""
4730
    # Also on http://www.gocomics.com/the-awkward-yeti
4731
    # Also on http://theawkwardyeti.com
4732
    # Also on http://larstheyeti.tumblr.com
4733
    name = 'yeti-tapa'
4734
    long_name = 'The Awkward Yeti (from Tapastic)'
4735
    url = 'https://tapastic.com/series/TheAwkwardYeti'
4736
    _categories = ('YETI', )
4737
4738
4739
class AsPerUsualTapa(GenericTapasticComic):
4740
    """Class to retrieve As Per Usual comics."""
4741
    # Also on http://as-per-usual.tumblr.com
4742
    name = 'usual-tapa'
4743
    long_name = 'As Per Usual (from Tapastic)'
4744
    url = 'https://tapastic.com/series/AsPerUsual'
4745
    categories = ('DAMILEE', )
4746
4747
4748
class HotComicsForCoolPeopleTapa(GenericTapasticComic):
4749
    """Class to retrieve Hot Comics For Cool People."""
4750
    # Also on http://hotcomicsforcoolpeople.tumblr.com
4751
    # Also on http://hotcomics.biz (links to tumblr)
4752
    # Also on http://hcfcp.com (links to tumblr)
4753
    name = 'hotcomics-tapa'
4754
    long_name = 'Hot Comics For Cool People (from Tapastic)'
4755
    url = 'https://tapastic.com/series/Hot-Comics-For-Cool-People'
4756
    categories = ('DAMILEE', )
4757
4758
4759
class OneOneOneOneComicTapa(GenericTapasticComic):
4760
    """Class to retrieve 1111 Comics."""
4761
    # Also on http://www.1111comics.me
4762
    # Also on http://comics1111.tumblr.com
4763
    name = '1111-tapa'
4764
    long_name = '1111 Comics (from Tapastic)'
4765
    url = 'https://tapastic.com/series/1111-Comics'
4766
    _categories = ('ONEONEONEONE', )
4767
4768
4769
class TumbleDryTapa(GenericTapasticComic):
4770
    """Class to retrieve Tumble Dry comics."""
4771
    # Also on http://tumbledrycomics.com
4772
    name = 'tumbledry-tapa'
4773
    long_name = 'Tumblr Dry (from Tapastic)'
4774
    url = 'https://tapastic.com/series/TumbleDryComics'
4775
4776
4777
class DeadlyPanelTapa(GenericTapasticComic):
4778
    """Class to retrieve Deadly Panel comics."""
4779
    # Also on http://www.deadlypanel.com
4780
    name = 'deadly-tapa'
4781
    long_name = 'Deadly Panel (from Tapastic)'
4782
    url = 'https://tapastic.com/series/deadlypanel'
4783
4784
4785
class ChrisHallbeckMaxiTapa(GenericTapasticComic):
4786
    """Class to retrieve Chris Hallbeck comics."""
4787
    # Also on http://chrishallbeck.tumblr.com
4788
    # Also on http://maximumble.com
4789
    name = 'hallbeckmaxi-tapa'
4790
    long_name = 'Chris Hallback - Maximumble (from Tapastic)'
4791
    url = 'https://tapastic.com/series/Maximumble'
4792
    _categories = ('HALLBACK', )
4793
4794
4795
class ChrisHallbeckMiniTapa(GenericTapasticComic):
4796
    """Class to retrieve Chris Hallbeck comics."""
4797
    # Also on http://chrishallbeck.tumblr.com
4798
    # Also on http://minimumble.com
4799
    name = 'hallbeckmini-tapa'
4800
    long_name = 'Chris Hallback - Minimumble (from Tapastic)'
4801
    url = 'https://tapastic.com/series/Minimumble'
4802
    _categories = ('HALLBACK', )
4803
4804
4805
class ChrisHallbeckBiffTapa(GenericTapasticComic):
4806
    """Class to retrieve Chris Hallbeck comics."""
4807
    # Also on http://chrishallbeck.tumblr.com
4808
    # Also on http://thebookofbiff.com
4809
    name = 'hallbeckbiff-tapa'
4810
    long_name = 'Chris Hallback - The Book of Biff (from Tapastic)'
4811
    url = 'https://tapastic.com/series/Biff'
4812
    _categories = ('HALLBACK', )
4813
4814
4815
class RandoWisTapa(GenericTapasticComic):
4816
    """Class to retrieve RandoWis comics."""
4817
    # Also on https://randowis.com
4818
    name = 'randowis-tapa'
4819
    long_name = 'RandoWis (from Tapastic)'
4820
    url = 'https://tapastic.com/series/RandoWis'
4821
4822
4823
class PigeonGazetteTapa(GenericTapasticComic):
4824
    """Class to retrieve The Pigeon Gazette comics."""
4825
    # Also on http://thepigeongazette.tumblr.com
4826
    name = 'pigeon-tapa'
4827
    long_name = 'The Pigeon Gazette (from Tapastic)'
4828
    url = 'https://tapastic.com/series/The-Pigeon-Gazette'
4829
4830
4831
class TheOdd1sOutTapa(GenericTapasticComic):
4832
    """Class to retrieve The Odd 1s Out comics."""
4833
    # Also on http://theodd1sout.com
4834
    # Also on http://theodd1sout.tumblr.com
4835
    name = 'theodd-tapa'
4836
    long_name = 'The Odd 1s Out (from Tapastic)'
4837
    url = 'https://tapastic.com/series/Theodd1sout'
4838
4839
4840
class TheWorldIsFlatTapa(GenericTapasticComic):
4841
    """Class to retrieve The World Is Flat Comics."""
4842
    # Also on http://theworldisflatcomics.tumblr.com
4843
    name = 'flatworld-tapa'
4844
    long_name = 'The World Is Flat (from Tapastic)'
4845
    url = 'https://tapastic.com/series/The-World-is-Flat'
4846
4847
4848
class MisterAndMeTapa(GenericTapasticComic):
4849
    """Class to retrieve Mister & Me Comics."""
4850
    # Also on http://www.mister-and-me.com
4851
    # Also on http://www.gocomics.com/mister-and-me
4852
    name = 'mister-tapa'
4853
    long_name = 'Mister & Me (from Tapastic)'
4854
    url = 'https://tapastic.com/series/Mister-and-Me'
4855
4856
4857
class TalesOfAbsurdityTapa(GenericTapasticComic):
4858
    """Class to retrieve Tales Of Absurdity comics."""
4859
    # Also on http://talesofabsurdity.com
4860
    # Also on http://talesofabsurdity.tumblr.com
4861
    name = 'absurdity-tapa'
4862
    long_name = 'Tales of Absurdity (from Tapastic)'
4863
    url = 'http://tapastic.com/series/Tales-Of-Absurdity'
4864
    _categories = ('ABSURDITY', )
4865
4866
4867
class BFGFSTapa(GenericTapasticComic):
4868
    """Class to retrieve BFGFS comics."""
4869
    # Also on http://bfgfs.com
4870
    # Also on http://bfgfs.tumblr.com
4871
    name = 'bfgfs-tapa'
4872
    long_name = 'BFGFS (from Tapastic)'
4873
    url = 'https://tapastic.com/series/BFGFS'
4874
4875
4876
class DoodleForFoodTapa(GenericTapasticComic):
4877
    """Class to retrieve Doodle For Food comics."""
4878
    # Also on http://doodleforfood.com
4879
    name = 'doodle-tapa'
4880
    long_name = 'Doodle For Food (from Tapastic)'
4881
    url = 'https://tapastic.com/series/Doodle-for-Food'
4882
4883
4884
class MrLovensteinTapa(GenericTapasticComic):
4885
    """Class to retrieve Mr Lovenstein comics."""
4886
    # Also on  https://tapastic.com/series/MrLovenstein
4887
    name = 'mrlovenstein-tapa'
4888
    long_name = 'Mr. Lovenstein (from Tapastic)'
4889
    url = 'https://tapastic.com/series/MrLovenstein'
4890
4891
4892
class CassandraCalinTapa(GenericTapasticComic):
4893
    """Class to retrieve C. Cassandra comics."""
4894
    # Also on http://cassandracalin.com
4895
    # Also on http://c-cassandra.tumblr.com
4896
    name = 'cassandra-tapa'
4897
    long_name = 'Cassandra Calin (from Tapastic)'
4898
    url = 'https://tapastic.com/series/C-Cassandra-comics'
4899
4900
4901
class WafflesAndPancakes(GenericTapasticComic):
4902
    """Class to retrieve Waffles And Pancakes comics."""
4903
    # Also on http://wandpcomic.com
4904
    name = 'waffles'
4905
    long_name = 'Waffles And Pancakes'
4906
    url = 'https://tapastic.com/series/Waffles-and-Pancakes'
4907
4908
4909
class YesterdaysPopcornTapastic(GenericTapasticComic):
4910
    """Class to retrieve Yesterday's Popcorn comics."""
4911
    # Also on http://www.yesterdayspopcorn.com
4912
    # Also on http://yesterdayspopcorn.tumblr.com
4913
    name = 'popcorn-tapa'
4914
    long_name = 'Yesterday\'s Popcorn (from Tapastic)'
4915
    url = 'https://tapastic.com/series/Yesterdays-Popcorn'
4916
4917
4918
class OurSuperAdventureTapastic(GenericTapasticComic):
4919
    """Class to retrieve Our Super Adventure comics."""
4920
    # Also on http://www.oursuperadventure.com
4921
    # http://sarahssketchbook.tumblr.com
4922
    # http://sarahgraley.com
4923
    name = 'superadventure-tapastic'
4924
    long_name = 'Our Super Adventure (from Tapastic)'
4925
    url = 'https://tapastic.com/series/Our-Super-Adventure'
4926
4927
4928
class NamelessPCs(GenericTapasticComic):
4929
    """Class to retrieve Nameless PCs comics."""
4930
    # Also on http://namelesspcs.com
4931
    name = 'namelesspcs-tapa'
4932
    long_name = 'NamelessPCs (from Tapastic)'
4933
    url = 'https://tapastic.com/series/NamelessPC'
4934
4935
4936
class UbertoolTapa(GenericTapasticComic):
4937
    """Class to retrieve Ubertool comics."""
4938
    # Also on http://ubertoolcomic.com
4939
    # Also on http://ubertool.tumblr.com
4940
    name = 'ubertool-tapa'
4941
    long_name = 'Ubertool (from Tapastic)'
4942
    url = 'https://tapastic.com/series/ubertool'
4943
    _categories = ('UBERTOOL', )
4944
4945
4946
class BarteNerdsTapa(GenericTapasticComic):
4947
    """Class to retrieve BarteNerds comics."""
4948
    # Also on http://www.bartenerds.com
4949
    name = 'bartenerds-tapa'
4950
    long_name = 'BarteNerds (from Tapastic)'
4951
    url = 'https://tapastic.com/series/BarteNERDS'
4952
4953
4954
class SmallBlueYonderTapa(GenericTapasticComic):
4955
    """Class to retrieve Small Blue Yonder comics."""
4956
    # Also on http://www.smallblueyonder.com
4957
    name = 'smallblue-tapa'
4958
    long_name = 'Small Blue Yonder (from Tapastic)'
4959
    url = 'https://tapastic.com/series/Small-Blue-Yonder'
4960
4961
4962
class TizzyStitchBirdTapa(GenericTapasticComic):
4963
    """Class to retrieve Tizzy Stitch Bird comics."""
4964
    # Also on http://tizzystitchbird.com
4965
    # Also on http://tizzystitchbird.tumblr.com
4966
    # Also on http://www.webtoons.com/en/challenge/tizzy-stitchbird/list?title_no=50082
4967
    name = 'tizzy-tapa'
4968
    long_name = 'Tizzy Stitch Bird (from Tapastic)'
4969
    url = 'https://tapastic.com/series/TizzyStitchbird'
4970
4971
4972
def get_subclasses(klass):
4973
    """Gets the list of direct/indirect subclasses of a class"""
4974
    subclasses = klass.__subclasses__()
4975
    for derived in list(subclasses):
4976
        subclasses.extend(get_subclasses(derived))
4977
    return subclasses
4978
4979
4980
def remove_st_nd_rd_th_from_date(string):
4981
    """Function to transform 1st/2nd/3rd/4th in a parsable date format."""
4982
    # Hackish way to convert string with numeral "1st"/"2nd"/etc to date
4983
    return (string.replace('st', '')
4984
            .replace('nd', '')
4985
            .replace('rd', '')
4986
            .replace('th', '')
4987
            .replace('Augu', 'August'))
4988
4989
4990
def string_to_date(string, date_format, local=DEFAULT_LOCAL):
4991
    """Function to convert string to date object.
4992
    Wrapper around datetime.datetime.strptime."""
4993
    # format described in https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior
4994
    prev_locale = locale.setlocale(locale.LC_ALL)
4995
    if local != prev_locale:
4996
        locale.setlocale(locale.LC_ALL, local)
4997
    ret = datetime.datetime.strptime(string, date_format).date()
4998
    if local != prev_locale:
4999
        locale.setlocale(locale.LC_ALL, prev_locale)
5000
    return ret
5001
5002
5003
COMICS = set(get_subclasses(GenericComic))
5004
VALID_COMICS = [c for c in COMICS if c.name is not None]
5005
COMIC_NAMES = {c.name: c for c in VALID_COMICS}
5006
assert len(VALID_COMICS) == len(COMIC_NAMES)
5007
CLASS_NAMES = {c.__name__ for c in VALID_COMICS}
5008
assert len(VALID_COMICS) == len(CLASS_NAMES)
5009