Completed
Push — master ( f3d2c0...39361d )
by De
01:11
created

comics.py (34 issues)

Code
1
#! /usr/bin/python3
2
# vim: set expandtab tabstop=4 shiftwidth=4 :
3
"""Module to retrieve webcomics"""
4
5
from comic_abstract import GenericComic, get_date_for_comic
6
import re
7
from datetime import date, timedelta
8
import datetime
9
from urlfunctions import get_soup_at_url, urljoin_wrapper,\
10
    convert_iri_to_plain_ascii_uri, load_json_at_url, urlopen_wrapper
11
import json
12
import locale
13
import urllib
14
15
DEFAULT_LOCAL = 'en_GB.UTF-8'
16
17
18
class Xkcd(GenericComic):
19
    """Class to retrieve Xkcd comics."""
20
    name = 'xkcd'
21
    long_name = 'xkcd'
22
    url = 'http://xkcd.com'
23
24
    @classmethod
25
    def get_next_comic(cls, last_comic):
26
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
27
        first_num = last_comic['num'] if last_comic else 0
28
        last_num = load_json_at_url(
29
            urljoin_wrapper(cls.url, 'info.0.json'))['num']
30
31
        for num in range(first_num + 1, last_num + 1):
32
            if num != 404:
33
                json_url = urljoin_wrapper(cls.url, '%d/info.0.json' % num)
34
                comic = load_json_at_url(json_url)
35
                comic['img'] = [comic['img']]
36
                comic['prefix'] = '%d-' % num
37
                comic['json_url'] = json_url
38
                comic['url'] = urljoin_wrapper(cls.url, str(num))
39
                comic['day'] = int(comic['day'])
40
                comic['month'] = int(comic['month'])
41
                comic['year'] = int(comic['year'])
42
                assert comic['num'] == num
43
                yield comic
44
45
46
# Helper functions corresponding to get_url_from_link/get_url_from_archive_element
47
48
49
@classmethod
50
def get_href(cls, link):
51
    """Implementation of get_url_from_link/get_url_from_archive_element."""
52
    return link['href']
53
54
55
@classmethod
56
def join_cls_url_to_href(cls, link):
57
    """Implementation of get_url_from_link/get_url_from_archive_element."""
58
    return urljoin_wrapper(cls.url, link['href'])
59
60
61
class GenericNavigableComic(GenericComic):
62
    """Generic class for "navigable" comics : with first/next arrows.
63
64
    This class applies to comic where previous and next comics can be
65
    accessed from a given comic. Once given a starting point (either
66
    the first comic or the last comic retrieved), it will handle the
67
    navigation, the retrieval of the soup object and the setting of
68
    the 'url' attribute on retrieved comics. This limits a lot the
69
    amount of boilerplate code in the different implementation classes.
70
71
    The method `get_next_comic` methods is implemented in terms of new
72
    more specialized methods to be implemented/overridden:
73
        - get_first_comic_link
74
        - get_navi_link
75
        - get_comic_info
76
        - get_url_from_link
77
    """
78
    _categories = ('NAVIGABLE', )
79
80
    @classmethod
81
    def get_first_comic_link(cls):
82
        """Get link to first comics.
83
84
        Sometimes this can be retrieved of any comic page, sometimes on
85
        the archive page, sometimes it doesn't exist at all and one has
86
        to iterate backward to find it before hardcoding the result found.
87
        """
88
        raise NotImplementedError
89
90
    @classmethod
91
    def get_navi_link(cls, last_soup, next_):
92
        """Get link to next (or previous - for dev purposes) comic."""
93
        raise NotImplementedError
94
95
    @classmethod
96
    def get_comic_info(cls, soup, link):
97
        """Get information about a particular comics."""
98
        raise NotImplementedError
99
100
    @classmethod
101
    def get_url_from_link(cls, link):
102
        """Get url corresponding to a link. Default implementation is similar to get_href."""
103
        return link['href']
104
105
    @classmethod
106
    def get_next_link(cls, last_soup):
107
        """Get link to next comic."""
108
        link = cls.get_navi_link(last_soup, True)
109
        cls.log("Next link is %s" % link)
110
        return link
111
112
    @classmethod
113
    def get_prev_link(cls, last_soup):
114
        """Get link to previous comic."""
115
        link = cls.get_navi_link(last_soup, False)
116
        cls.log("Prev link is %s" % link)
117
        return link
118
119
    @classmethod
120
    def get_next_comic(cls, last_comic):
121
        """Generic implementation of get_next_comic for navigable comics."""
122
        url = last_comic['url'] if last_comic else None
123
        cls.log("starting 'get_next_comic' from %s" % url)
124
        next_comic = \
125
            cls.get_next_link(get_soup_at_url(url)) \
126
            if url else \
127
            cls.get_first_comic_link()
128
        cls.log("next/first comic will be %s (url is %s)" % (str(next_comic), url))
129
        # cls.check_navigation(url)
130
        while next_comic:
131
            prev_url, url = url, cls.get_url_from_link(next_comic)
132
            if prev_url == url:
133
                cls.log("got same url %s" % url)
134
                break
135
            cls.log("about to get %s (%s)" % (url, str(next_comic)))
136
            soup = get_soup_at_url(url)
137
            comic = cls.get_comic_info(soup, next_comic)
138
            if comic is not None:
139
                assert 'url' not in comic
140
                comic['url'] = url
141
                yield comic
142
            next_comic = cls.get_next_link(soup)
143
            cls.log("next comic will be %s" % str(next_comic))
144
145
    @classmethod
146
    def check_first_link(cls):
147
        """Check that navigation to first comic seems to be working - for dev purposes."""
148
        cls.log("about to check first link")
149
        ok = True
150
        firstlink = cls.get_first_comic_link()
151
        if firstlink is None:
152
            print("From %s : no first link" % cls.url)
153
            ok = False
154
        else:
155
            firsturl = cls.get_url_from_link(firstlink)
156
            try:
157
                get_soup_at_url(firsturl)
158
            except urllib.error.HTTPError:
159
                print("From %s : invalid first url" % cls.url)
160
                ok = False
161
        cls.log("checked first link -> returned %d" % ok)
162
        return ok
163
164
    @classmethod
165
    def check_prev_next_links(cls, url):
166
        """Check that navigation to prev/next from a given URL seems to be working - for dev purposes."""
167
        cls.log("about to check prev/next from %s" % url)
168
        ok = True
169
        if url is None:
170
            prevlink, nextlink = None, None
171
        else:
172
            soup = get_soup_at_url(url)
173
            prevlink, nextlink = cls.get_prev_link(soup), cls.get_next_link(soup)
174
        if prevlink is None and nextlink is None:
175
            print("From %s : no previous nor next" % url)
176
            ok = False
177
        else:
178
            if prevlink:
179
                prevurl = cls.get_url_from_link(prevlink)
180
                prevsoup = get_soup_at_url(prevurl)
181
                prevnextlink = cls.get_next_link(prevsoup)
182
                prevnext = cls.get_url_from_link(prevnextlink) if prevnextlink is not None else "NO URL"
183
                if prevnext != url:
184
                    print("From %s, going backward then forward leads to %s" % (url, prevnext))
185
                    ok = False
186
            if nextlink:
187
                nexturl = cls.get_url_from_link(nextlink)
188
                if nexturl != url:
189
                    nextsoup = get_soup_at_url(nexturl)
190
                    nextprevlink = cls.get_prev_link(nextsoup)
191
                    nextprev = cls.get_url_from_link(nextprevlink) if nextprevlink is not None else "NO URL"
192
                    if nextprev != url:
193
                        print("From %s, going forward then backward leads to %s" % (url, nextprev))
194
                        ok = False
195
        cls.log("checked prev/next from %s -> returned %d" % (url, ok))
196
        return ok
197
198
    @classmethod
199
    def check_navigation(cls, url):
200
        """Check that navigation functions seem to be working - for dev purposes."""
201
        cls.log("about to check navigation from %s" % url)
202
        first = cls.check_first_link()
203
        prevnext = cls.check_prev_next_links(url)
204
        ok = first and prevnext
205
        cls.log("checked navigation from %s -> returned %d" % (url, ok))
206
        return ok
207
208
209
class GenericListableComic(GenericComic):
210
    """Generic class for "listable" comics : with a list of comics (aka 'archive')
211
212
    The method `get_next_comic` methods is implemented in terms of new
213
    more specialized methods to be implemented/overridden:
214
        - get_archive_elements
215
        - get_url_from_archive_element
216
        - get_comic_info
217
    """
218
    _categories = ('LISTABLE', )
219
220
    @classmethod
221
    def get_archive_elements(cls):
222
        """Get the archive elements (iterable)."""
223
        raise NotImplementedError
224
225
    @classmethod
226
    def get_url_from_archive_element(cls, archive_elt):
227
        """Get url corresponding to an archive element."""
228
        raise NotImplementedError
229
230
    @classmethod
231
    def get_comic_info(cls, soup, archive_elt):
232
        """Get information about a particular comics."""
233
        raise NotImplementedError
234
235
    @classmethod
236
    def get_next_comic(cls, last_comic):
237
        """Generic implementation of get_next_comic for listable comics."""
238
        waiting_for_url = last_comic['url'] if last_comic else None
239
        for archive_elt in cls.get_archive_elements():
240
            url = cls.get_url_from_archive_element(archive_elt)
241
            cls.log("considering %s" % url)
242
            if waiting_for_url is None:
243
                cls.log("about to get %s (%s)" % (url, str(archive_elt)))
244
                soup = get_soup_at_url(url)
245
                comic = cls.get_comic_info(soup, archive_elt)
246
                if comic is not None:
247
                    assert 'url' not in comic
248
                    comic['url'] = url
249
                    yield comic
250
            elif waiting_for_url == url:
251
                waiting_for_url = None
252
        if waiting_for_url is not None:
253
            print("Did not find %s : there might be a problem" % waiting_for_url)
254
255
# Helper functions corresponding to get_first_comic_link/get_navi_link
256
257
258
@classmethod
259
def get_link_rel_next(cls, last_soup, next_):
260
    """Implementation of get_navi_link."""
261
    return last_soup.find('link', rel='next' if next_ else 'prev')
262
263
264
@classmethod
265
def get_a_rel_next(cls, last_soup, next_):
266
    """Implementation of get_navi_link."""
267
    return last_soup.find('a', rel='next' if next_ else 'prev')
268
269
270
@classmethod
271
def get_a_navi_navinext(cls, last_soup, next_):
272
    """Implementation of get_navi_link."""
273
    return last_soup.find('a', class_='navi navi-next' if next_ else 'navi navi-prev')
274
275
276
@classmethod
277
def get_a_navi_comicnavnext_navinext(cls, last_soup, next_):
278
    """Implementation of get_navi_link."""
279
    return last_soup.find('a', class_='navi comic-nav-next navi-next' if next_ else 'navi comic-nav-previous navi-prev')
280
281
282
@classmethod
283
def get_a_comicnavbase_comicnavnext(cls, last_soup, next_):
284
    """Implementation of get_navi_link."""
285
    return last_soup.find('a', class_='comic-nav-base comic-nav-next' if next_ else 'comic-nav-base comic-nav-previous')
286
287
288
@classmethod
289
def get_a_navi_navifirst(cls):
290
    """Implementation of get_first_comic_link."""
291
    return get_soup_at_url(cls.url).find('a', class_='navi navi-first')
292
293
294
@classmethod
295
def get_div_navfirst_a(cls):
296
    """Implementation of get_first_comic_link."""
297
    return get_soup_at_url(cls.url).find('div', class_="nav-first").find('a')
298
299
300
@classmethod
301
def get_a_comicnavbase_comicnavfirst(cls):
302
    """Implementation of get_first_comic_link."""
303
    return get_soup_at_url(cls.url).find('a', class_='comic-nav-base comic-nav-first')
304
305
306
@classmethod
307
def simulate_first_link(cls):
308
    """Implementation of get_first_comic_link creating a link-like object from
309
    an URL provided by the class.
310
311
    Note: The first URL can easily be found using :
312
    `get_first_comic_link = navigate_to_first_comic`.
313
    """
314
    return {'href': cls.first_url}
315
316
317
@classmethod
318
def navigate_to_first_comic(cls):
319
    """Implementation of get_first_comic_link navigating from a user provided
320
    URL to the first comic.
321
322
    Sometimes, the first comic cannot be reached directly so to start
323
    from the first comic one has to go to the previous comic until
324
    there is no previous comics. Once this URL is reached, it
325
    is better to hardcode it but for development purposes, it
326
    is convenient to have an automatic way to find it.
327
328
    Then, the URL found can easily be used via `simulate_first_link`.
329
    """
330
    url = input("Get starting URL: ")
331
    print(url)
332
    comic = cls.get_prev_link(get_soup_at_url(url))
333
    while comic:
334
        url = cls.get_url_from_link(comic)
335
        print(url)
336
        comic = cls.get_prev_link(get_soup_at_url(url))
337
    return {'href': url}
338
339
340
class GenericEmptyComic(GenericComic):
341
    """Generic class for comics where nothing is to be done.
342
343
    It can be useful to deactivate temporarily comics that do not work
344
    properly by replacing `def MyComic(GenericWhateverComic)` with
345
    `def MyComic(GenericEmptyComic, GenericWhateverComic)`."""
346
    _categories = ('EMPTY', )
347
348
    @classmethod
349
    def get_next_comic(cls, last_comic):
350
        """Implementation of get_next_comic returning no comics."""
351
        cls.log("comic is considered as empty - returning no comic")
352
        return []
353
354
355 View Code Duplication
class ExtraFabulousComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
356
    """Class to retrieve Extra Fabulous Comics."""
357
    name = 'efc'
358
    long_name = 'Extra Fabulous Comics'
359
    url = 'http://extrafabulouscomics.com'
360
    get_first_comic_link = get_a_navi_navifirst
361
    get_navi_link = get_link_rel_next
362
363
    @classmethod
364
    def get_comic_info(cls, soup, link):
365
        """Get information about a particular comics."""
366
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
367
        imgs = soup.find_all('img', src=img_src_re)
368
        title = soup.find('meta', property='og:title')['content']
369
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
370
        day = string_to_date(date_str, "%Y-%m-%d")
371
        return {
372
            'title': title,
373
            'img': [i['src'] for i in imgs],
374
            'month': day.month,
375
            'year': day.year,
376
            'day': day.day,
377
            'prefix': title + '-'
378
        }
379
380
381 View Code Duplication
class GenericLeMondeBlog(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
382
    """Generic class to retrieve comics from Le Monde blogs."""
383
    _categories = ('LEMONDE', 'FRANCAIS')
384
    get_navi_link = get_link_rel_next
385
    get_first_comic_link = simulate_first_link
386
    first_url = NotImplemented
387
388
    @classmethod
389
    def get_comic_info(cls, soup, link):
390
        """Get information about a particular comics."""
391
        url2 = soup.find('link', rel='shortlink')['href']
392
        title = soup.find('meta', property='og:title')['content']
393
        date_str = soup.find("span", class_="entry-date").string
394
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
395
        imgs = soup.find_all('meta', property='og:image')
396
        return {
397
            'title': title,
398
            'url2': url2,
399
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
400
            'month': day.month,
401
            'year': day.year,
402
            'day': day.day,
403
        }
404
405
406
class ZepWorld(GenericLeMondeBlog):
407
    """Class to retrieve Zep World comics."""
408
    name = "zep"
409
    long_name = "Zep World"
410
    url = "http://zepworld.blog.lemonde.fr"
411
    first_url = "http://zepworld.blog.lemonde.fr/2014/10/31/bientot-le-blog-de-zep/"
412
413
414
class Vidberg(GenericLeMondeBlog):
415
    """Class to retrieve Vidberg comics."""
416
    name = 'vidberg'
417
    long_name = "Vidberg - l'actu en patates"
418
    url = "http://vidberg.blog.lemonde.fr"
419
    # Not the first but I didn't find an efficient way to retrieve it
420
    first_url = "http://vidberg.blog.lemonde.fr/2012/02/09/revue-de-campagne-la-campagne-du-modem-semballe/"
421
422
423
class Plantu(GenericLeMondeBlog):
424
    """Class to retrieve Plantu comics."""
425
    name = 'plantu'
426
    long_name = "Plantu"
427
    url = "http://plantu.blog.lemonde.fr"
428
    first_url = "http://plantu.blog.lemonde.fr/2014/10/28/stress-test-a-bruxelles/"
429
430
431
class XavierGorce(GenericLeMondeBlog):
432
    """Class to retrieve Xavier Gorce comics."""
433
    name = 'gorce'
434
    long_name = "Xavier Gorce"
435
    url = "http://xaviergorce.blog.lemonde.fr"
436
    first_url = "http://xaviergorce.blog.lemonde.fr/2015/01/09/distinction/"
437
438
439
class CartooningForPeace(GenericLeMondeBlog):
440
    """Class to retrieve Cartooning For Peace comics."""
441
    name = 'forpeace'
442
    long_name = "Cartooning For Peace"
443
    url = "http://cartooningforpeace.blog.lemonde.fr"
444
    first_url = "http://cartooningforpeace.blog.lemonde.fr/2014/12/15/bado/"
445
446
447
class Aurel(GenericLeMondeBlog):
448
    """Class to retrieve Aurel comics."""
449
    name = 'aurel'
450
    long_name = "Aurel"
451
    url = "http://aurel.blog.lemonde.fr"
452
    first_url = "http://aurel.blog.lemonde.fr/2014/09/29/le-senat-repasse-a-droite/"
453
454
455
class LesCulottees(GenericLeMondeBlog):
456
    """Class to retrieve Les Culottees comics."""
457
    name = 'culottees'
458
    long_name = 'Les Culottees'
459
    url = "http://lesculottees.blog.lemonde.fr"
460
    first_url = "http://lesculottees.blog.lemonde.fr/2016/01/11/clementine-delait-femme-a-barbe/"
461
462
463
class UneAnneeAuLycee(GenericLeMondeBlog):
464
    """Class to retrieve Une Annee Au Lycee comics."""
465
    name = 'lycee'
466
    long_name = 'Une Annee au Lycee'
467
    url = 'http://uneanneeaulycee.blog.lemonde.fr'
468
    first_url = "http://uneanneeaulycee.blog.lemonde.fr/2016/06/13/la-semaine-du-bac-est-arrivee/"
469
470
471
class Rall(GenericNavigableComic):
472
    """Class to retrieve Ted Rall comics."""
473
    # Also on http://www.gocomics.com/tedrall
474
    name = 'rall'
475
    long_name = "Ted Rall"
476
    url = "http://rall.com/comic"
477
    _categories = ('RALL', )
478
    get_navi_link = get_link_rel_next
479
    get_first_comic_link = simulate_first_link
480
    # Not the first but I didn't find an efficient way to retrieve it
481
    first_url = "http://rall.com/2014/01/30/los-angeles-times-cartoon-well-miss-those-california-flowers"
482
483
    @classmethod
484
    def get_comic_info(cls, soup, link):
485
        """Get information about a particular comics."""
486
        title = soup.find('meta', property='og:title')['content']
487
        author = soup.find("span", class_="author vcard").find("a").string
488
        date_str = soup.find("span", class_="entry-date").string
489
        day = string_to_date(date_str, "%B %d, %Y")
490
        desc = soup.find('meta', property='og:description')['content']
491
        imgs = soup.find('div', class_='entry-content').find_all('img')
492
        imgs = imgs[:-7]  # remove social media buttons
493
        return {
494
            'title': title,
495
            'author': author,
496
            'month': day.month,
497
            'year': day.year,
498
            'day': day.day,
499
            'description': desc,
500
            'img': [i['src'] for i in imgs],
501
        }
502
503
504
class Dilem(GenericNavigableComic):
505
    """Class to retrieve Ali Dilem comics."""
506
    name = 'dilem'
507
    long_name = 'Ali Dilem'
508
    url = 'http://information.tv5monde.com/dilem'
509
    _categories = ('FRANCAIS', )
510
    get_url_from_link = join_cls_url_to_href
511
    get_first_comic_link = simulate_first_link
512
    first_url = "http://information.tv5monde.com/dilem/2004-06-26"
513
514
    @classmethod
515
    def get_navi_link(cls, last_soup, next_):
516
        """Get link to next or previous comic."""
517
        # prev is next / next is prev
518
        li = last_soup.find('li', class_='prev' if next_ else 'next')
519
        return li.find('a') if li else None
520
521 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
522
    def get_comic_info(cls, soup, link):
523
        """Get information about a particular comics."""
524
        short_url = soup.find('link', rel='shortlink')['href']
525
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
526
        imgs = soup.find_all('meta', property='og:image')
527
        date_str = soup.find('span', property='dc:date')['content']
528
        date_str = date_str[:10]
529
        day = string_to_date(date_str, "%Y-%m-%d")
530
        return {
531
            'short_url': short_url,
532
            'title': title,
533
            'img': [i['content'] for i in imgs],
534
            'day': day.day,
535
            'month': day.month,
536
            'year': day.year,
537
        }
538
539
540
class SpaceAvalanche(GenericNavigableComic):
541
    """Class to retrieve Space Avalanche comics."""
542
    name = 'avalanche'
543
    long_name = 'Space Avalanche'
544
    url = 'http://www.spaceavalanche.com'
545
    get_navi_link = get_link_rel_next
546
547
    @classmethod
548
    def get_first_comic_link(cls):
549
        """Get link to first comics."""
550
        return {'href': "http://www.spaceavalanche.com/2009/02/02/irish-sea/", 'title': "Irish Sea"}
551
552
    @classmethod
553
    def get_comic_info(cls, soup, link):
554
        """Get information about a particular comics."""
555
        url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
556
        title = link['title']
557
        url = cls.get_url_from_link(link)
558
        year, month, day = [int(s)
559
                            for s in url_date_re.match(url).groups()]
560
        imgs = soup.find("div", class_="entry").find_all("img")
561
        return {
562
            'title': title,
563
            'day': day,
564
            'month': month,
565
            'year': year,
566
            'img': [i['src'] for i in imgs],
567
        }
568
569
570
class ZenPencils(GenericNavigableComic):
571
    """Class to retrieve ZenPencils comics."""
572
    # Also on http://zenpencils.tumblr.com
573
    # Also on http://www.gocomics.com/zen-pencils
574
    name = 'zenpencils'
575
    long_name = 'Zen Pencils'
576
    url = 'http://zenpencils.com'
577
    _categories = ('ZENPENCILS', )
578
    get_navi_link = get_link_rel_next
579
    get_first_comic_link = simulate_first_link
580
    first_url = "http://zenpencils.com/comic/1-ralph-waldo-emerson-make-them-cry/"
581
582
    @classmethod
583
    def get_comic_info(cls, soup, link):
584
        """Get information about a particular comics."""
585
        imgs = soup.find('div', id='comic').find_all('img')
586
        # imgs2 = soup.find_all('meta', property='og:image')
587
        post = soup.find('div', class_='post-content')
588
        author = post.find("span", class_="post-author").find("a").string
589
        title = soup.find('meta', property='og:title')['content']
590
        date_str = post.find('span', class_='post-date').string
591
        day = string_to_date(date_str, "%B %d, %Y")
592
        assert imgs
593
        assert all(i['alt'] == i['title'] for i in imgs)
594
        assert all(i['alt'] in (title, "") for i in imgs)
595
        desc = soup.find('meta', property='og:description')['content']
596
        return {
597
            'title': title,
598
            'description': desc,
599
            'author': author,
600
            'day': day.day,
601
            'month': day.month,
602
            'year': day.year,
603
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
604
        }
605
606
607
class ItsTheTie(GenericNavigableComic):
608
    """Class to retrieve It's the tie comics."""
609
    # Also on http://itsthetie.tumblr.com
610
    # Also on https://tapastic.com/series/itsthetie
611
    name = 'tie'
612
    long_name = "It's the tie"
613
    url = "http://itsthetie.com"
614
    _categories = ('TIE', )
615
    get_first_comic_link = get_div_navfirst_a
616
    get_navi_link = get_a_rel_next
617
618
    @classmethod
619
    def get_comic_info(cls, soup, link):
620
        """Get information about a particular comics."""
621
        title = soup.find('h1', class_='comic-title').find('a').string
622
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
623
        day = string_to_date(date_str, "%B %d, %Y")
624
        # Bonus images may or may not be in meta og:image.
625
        imgs = soup.find_all('meta', property='og:image')
626
        imgs_src = [i['content'] for i in imgs]
627
        bonus = soup.find_all('img', attrs={'data-oversrc': True})
628
        bonus_src = [b['data-oversrc'] for b in bonus]
629
        all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src]
630
        all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")]
631
        tag_meta = soup.find('meta', property='article:tag')
632
        tags = tag_meta['content'] if tag_meta else ""
633
        return {
634
            'title': title,
635
            'month': day.month,
636
            'year': day.year,
637
            'day': day.day,
638
            'img': all_imgs_src,
639
            'tags': tags,
640
        }
641
642
643
class PenelopeBagieu(GenericNavigableComic):
644
    """Class to retrieve comics from Penelope Bagieu's blog."""
645 View Code Duplication
    name = 'bagieu'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
646
    long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
647
    url = 'http://www.penelope-jolicoeur.com'
648
    _categories = ('FRANCAIS', )
649
    get_navi_link = get_link_rel_next
650
    get_first_comic_link = simulate_first_link
651
    first_url = 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'
652
653
    @classmethod
654
    def get_comic_info(cls, soup, link):
655
        """Get information about a particular comics."""
656
        date_str = soup.find('h2', class_='date-header').string
657
        day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
658
        imgs = soup.find('div', class_='entry-body').find_all('img')
659
        title = soup.find('h3', class_='entry-header').string
660
        return {
661
            'title': title,
662
            'img': [i['src'] for i in imgs],
663
            'month': day.month,
664
            'year': day.year,
665
            'day': day.day,
666
        }
667
668
669
class OneOneOneOneComic(GenericNavigableComic):
670
    """Class to retrieve 1111 Comics."""
671
    # Also on http://comics1111.tumblr.com
672
    # Also on https://tapastic.com/series/1111-Comics
673
    name = '1111'
674
    long_name = '1111 Comics'
675
    url = 'http://www.1111comics.me'
676
    _categories = ('ONEONEONEONE', )
677
    get_first_comic_link = get_div_navfirst_a
678
    get_navi_link = get_link_rel_next
679
680
    @classmethod
681
    def get_comic_info(cls, soup, link):
682
        """Get information about a particular comics."""
683
        title = soup.find('h1', class_='comic-title').find('a').string
684
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
685
        day = string_to_date(date_str, "%B %d, %Y")
686
        imgs = soup.find_all('meta', property='og:image')
687
        return {
688
            'title': title,
689
            'month': day.month,
690
            'year': day.year,
691
            'day': day.day,
692
            'img': [i['content'] for i in imgs],
693
        }
694
695
696
class AngryAtNothing(GenericNavigableComic):
697
    """Class to retrieve Angry at Nothing comics."""
698
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
699
    name = 'angry'
700
    long_name = 'Angry At Nothing'
701
    url = 'http://www.angryatnothing.net'
702
    get_first_comic_link = get_div_navfirst_a
703
    get_navi_link = get_a_rel_next
704
705
    @classmethod
706
    def get_comic_info(cls, soup, link):
707
        """Get information about a particular comics."""
708
        title = soup.find('h1', class_='comic-title').find('a').string
709
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
710
        day = string_to_date(date_str, "%B %d, %Y")
711
        imgs = soup.find_all('meta', property='og:image')
712
        return {
713
            'title': title,
714
            'month': day.month,
715
            'year': day.year,
716
            'day': day.day,
717
            'img': [i['content'] for i in imgs],
718
        }
719
720
721
class NeDroid(GenericNavigableComic):
722
    """Class to retrieve NeDroid comics."""
723
    name = 'nedroid'
724
    long_name = 'NeDroid'
725
    url = 'http://nedroid.com'
726
    get_first_comic_link = get_div_navfirst_a
727
    get_navi_link = get_link_rel_next
728
    get_url_from_link = join_cls_url_to_href
729
730
    @classmethod
731
    def get_comic_info(cls, soup, link):
732
        """Get information about a particular comics."""
733
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
734
        comic_url_re = re.compile('//nedroid.com/comics/([0-9]*)-([0-9]*)-([0-9]*).*')
735
        short_url = cls.get_url_from_link(soup.find('link', rel='shortlink'))
736
        num = int(short_url_re.match(short_url).groups()[0])
737
        imgs = soup.find('div', id='comic').find_all('img')
738
        year, month, day = [int(s) for s in comic_url_re.match(imgs[0]['src']).groups()]
739
        assert len(imgs) == 1
740
        title = imgs[0]['alt']
741
        title2 = imgs[0]['title']
742
        return {
743
            'short_url': short_url,
744
            'title': title,
745
            'title2': title2,
746
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
747
            'day': day,
748
            'month': month,
749
            'year': year,
750
            'num': num,
751
        }
752
753
754
class Garfield(GenericNavigableComic):
755
    """Class to retrieve Garfield comics."""
756
    # Also on http://www.gocomics.com/garfield
757
    name = 'garfield'
758 View Code Duplication
    long_name = 'Garfield'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
759
    url = 'https://garfield.com'
760
    _categories = ('GARFIELD', )
761
    get_first_comic_link = simulate_first_link
762
    first_url = 'https://garfield.com/comic/1978/06/19'
763
764
    @classmethod
765
    def get_navi_link(cls, last_soup, next_):
766
        """Get link to next or previous comic."""
767
        return last_soup.find('a', class_='comic-arrow-right' if next_ else 'comic-arrow-left')
768
769
    @classmethod
770
    def get_comic_info(cls, soup, link):
771
        """Get information about a particular comics."""
772
        url = cls.get_url_from_link(link)
773
        date_re = re.compile('^%s/comic/([0-9]*)/([0-9]*)/([0-9]*)' % cls.url)
774
        year, month, day = [int(s) for s in date_re.match(url).groups()]
775
        imgs = soup.find('div', class_='comic-display').find_all('img', class_='img-responsive')
776
        return {
777
            'month': month,
778
            'year': year,
779
            'day': day,
780
            'img': [i['src'] for i in imgs],
781
        }
782
783
784
class Dilbert(GenericNavigableComic):
785
    """Class to retrieve Dilbert comics."""
786
    # Also on http://www.gocomics.com/dilbert-classics
787
    name = 'dilbert'
788
    long_name = 'Dilbert'
789
    url = 'http://dilbert.com'
790
    get_url_from_link = join_cls_url_to_href
791
    get_first_comic_link = simulate_first_link
792
    first_url = 'http://dilbert.com/strip/1989-04-16'
793
794
    @classmethod
795
    def get_navi_link(cls, last_soup, next_):
796
        """Get link to next or previous comic."""
797
        link = last_soup.find('div', class_='nav-comic nav-right' if next_ else 'nav-comic nav-left')
798
        return link.find('a') if link else None
799
800
    @classmethod
801
    def get_comic_info(cls, soup, link):
802
        """Get information about a particular comics."""
803
        title = soup.find('meta', property='og:title')['content']
804
        imgs = soup.find_all('meta', property='og:image')
805
        desc = soup.find('meta', property='og:description')['content']
806
        date_str = soup.find('meta', property='article:publish_date')['content']
807
        day = string_to_date(date_str, "%B %d, %Y")
808
        author = soup.find('meta', property='article:author')['content']
809
        tags = soup.find('meta', property='article:tag')['content']
810
        return {
811
            'title': title,
812
            'description': desc,
813
            'img': [i['content'] for i in imgs],
814
            'author': author,
815
            'tags': tags,
816
            'day': day.day,
817
            'month': day.month,
818
            'year': day.year
819
        }
820
821
822
class VictimsOfCircumsolar(GenericNavigableComic):
823
    """Class to retrieve VictimsOfCircumsolar comics."""
824
    # Also on http://victimsofcomics.tumblr.com
825
    name = 'circumsolar'
826
    long_name = 'Victims Of Circumsolar'
827
    url = 'http://www.victimsofcircumsolar.com'
828
    get_navi_link = get_a_navi_comicnavnext_navinext
829
    get_first_comic_link = simulate_first_link
830
    first_url = 'http://www.victimsofcircumsolar.com/comic/modern-addiction'
831
832
    @classmethod
833
    def get_comic_info(cls, soup, link):
834
        """Get information about a particular comics."""
835
        # Date is on the archive page
836
        title = soup.find_all('meta', property='og:title')[-1]['content']
837
        desc = soup.find_all('meta', property='og:description')[-1]['content']
838
        imgs = soup.find('div', id='comic').find_all('img')
839
        assert all(i['title'] == i['alt'] == title for i in imgs)
840
        return {
841
            'title': title,
842
            'description': desc,
843
            'img': [i['src'] for i in imgs],
844
        }
845
846
847
class ThreeWordPhrase(GenericNavigableComic):
848
    """Class to retrieve Three Word Phrase comics."""
849
    # Also on http://www.threewordphrase.tumblr.com
850
    name = 'threeword'
851
    long_name = 'Three Word Phrase'
852
    url = 'http://threewordphrase.com'
853
    get_url_from_link = join_cls_url_to_href
854
855
    @classmethod
856
    def get_first_comic_link(cls):
857
        """Get link to first comics."""
858
        return get_soup_at_url(cls.url).find('img', src='/firstlink.gif').parent
859
860
    @classmethod
861
    def get_navi_link(cls, last_soup, next_):
862
        """Get link to next or previous comic."""
863
        link = last_soup.find('img', src='/nextlink.gif' if next_ else '/prevlink.gif').parent
864
        return None if link.get('href') is None else link
865
866
    @classmethod
867
    def get_comic_info(cls, soup, link):
868
        """Get information about a particular comics."""
869
        title = soup.find('title')
870
        imgs = [img for img in soup.find_all('img')
871
                if not img['src'].endswith(
872
                    ('link.gif', '32.png', 'twpbookad.jpg',
873
                     'merchad.jpg', 'header.gif', 'tipjar.jpg'))]
874
        return {
875
            'title': title.string if title else None,
876
            'title2': '  '.join(img.get('alt') for img in imgs if img.get('alt')),
877
            'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs],
878
        }
879
880
881
class DeadlyPanel(GenericEmptyComic, GenericNavigableComic):
882
    """Class to retrieve Deadly Panel comics."""
883
    # Also on https://tapastic.com/series/deadlypanel
884
    name = 'deadly'
885
    long_name = 'Deadly Panel'
886
    url = 'http://www.deadlypanel.com'
887
    get_first_comic_link = get_a_navi_navifirst
888
    get_navi_link = get_a_navi_comicnavnext_navinext
889
890
    @classmethod
891
    def get_comic_info(cls, soup, link):
892
        """Get information about a particular comics."""
893
        imgs = soup.find('div', id='comic').find_all('img')
894
        assert all(i['alt'] == i['title'] for i in imgs)
895
        return {
896
            'img': [i['src'] for i in imgs],
897
        }
898
899
900
class TheGentlemanArmchair(GenericNavigableComic):
901
    """Class to retrieve The Gentleman Armchair comics."""
902
    name = 'gentlemanarmchair'
903
    long_name = 'The Gentleman Armchair'
904
    url = 'http://thegentlemansarmchair.com'
905
    get_first_comic_link = get_a_navi_navifirst
906
    get_navi_link = get_link_rel_next
907
908
    @classmethod
909
    def get_comic_info(cls, soup, link):
910
        """Get information about a particular comics."""
911
        title = soup.find('h2', class_='post-title').string
912
        author = soup.find("span", class_="post-author").find("a").string
913
        date_str = soup.find('span', class_='post-date').string
914
        day = string_to_date(date_str, "%B %d, %Y")
915
        imgs = soup.find('div', id='comic').find_all('img')
916
        return {
917
            'img': [i['src'] for i in imgs],
918
            'title': title,
919
            'author': author,
920
            'month': day.month,
921
            'year': day.year,
922
            'day': day.day,
923
        }
924
925 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
926
class MyExtraLife(GenericNavigableComic):
927
    """Class to retrieve My Extra Life comics."""
928
    name = 'extralife'
929
    long_name = 'My Extra Life'
930
    url = 'http://www.myextralife.com'
931
    get_navi_link = get_link_rel_next
932
933
    @classmethod
934
    def get_first_comic_link(cls):
935
        """Get link to first comics."""
936
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
937
938
    @classmethod
939
    def get_comic_info(cls, soup, link):
940
        """Get information about a particular comics."""
941
        title = soup.find("h1", class_="comic_title").string
942
        date_str = soup.find("span", class_="comic_date").string
943
        day = string_to_date(date_str, "%B %d, %Y")
944
        imgs = soup.find_all("img", class_="comic")
945
        assert all(i['alt'] == i['title'] == title for i in imgs)
946
        return {
947
            'title': title,
948
            'img': [i['src'] for i in imgs if i["src"]],
949
            'day': day.day,
950
            'month': day.month,
951
            'year': day.year
952
        }
953
954
955
class SaturdayMorningBreakfastCereal(GenericNavigableComic):
956
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
957
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
958
    # Also on http://smbc-comics.tumblr.com
959
    name = 'smbc'
960
    long_name = 'Saturday Morning Breakfast Cereal'
961
    url = 'http://www.smbc-comics.com'
962
    _categories = ('SMBC', )
963
    get_navi_link = get_a_rel_next
964
965
    @classmethod
966
    def get_first_comic_link(cls):
967
        """Get link to first comics."""
968
        return get_soup_at_url(cls.url).find('a', rel='start')
969
970
    @classmethod
971
    def get_comic_info(cls, soup, link):
972
        """Get information about a particular comics."""
973
        image1 = soup.find('img', id='cc-comic')
974
        image_url1 = image1['src']
975
        aftercomic = soup.find('div', id='aftercomic')
976
        image_url2 = aftercomic.find('img')['src'] if aftercomic else ''
977
        imgs = [image_url1] + ([image_url2] if image_url2 else [])
978
        date_str = soup.find('div', class_='cc-publishtime').contents[0]
979
        day = string_to_date(date_str, "%B %d, %Y")
980
        return {
981
            'title': image1['title'],
982
            'img': [urljoin_wrapper(cls.url, i) for i in imgs],
983
            'day': day.day,
984
            'month': day.month,
985
            'year': day.year
986
        }
987
988
989
class PerryBibleFellowship(GenericListableComic):
990
    """Class to retrieve Perry Bible Fellowship comics."""
991
    name = 'pbf'
992
    long_name = 'Perry Bible Fellowship'
993
    url = 'http://pbfcomics.com'
994
    get_url_from_archive_element = join_cls_url_to_href
995
996
    @classmethod
997
    def get_archive_elements(cls):
998
        comic_link_re = re.compile('^/[0-9]*/$')
999
        return reversed(get_soup_at_url(cls.url).find_all('a', href=comic_link_re))
1000
1001
    @classmethod
1002
    def get_comic_info(cls, soup, link):
1003
        """Get information about a particular comics."""
1004
        url = cls.get_url_from_archive_element(link)
1005
        comic_img_re = re.compile('^/archive_b/PBF.*')
1006
        name = link.string
1007
        num = int(link['name'])
1008
        href = link['href']
1009
        assert href == '/%d/' % num
1010
        imgs = soup.find_all('img', src=comic_img_re)
1011
        assert len(imgs) == 1
1012
        assert imgs[0]['alt'] == name
1013
        return {
1014
            'num': num,
1015
            'name': name,
1016
            'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1017
            'prefix': '%d-' % num,
1018
        }
1019
1020 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1021
class Mercworks(GenericNavigableComic):
1022
    """Class to retrieve Mercworks comics."""
1023
    # Also on http://mercworks.tumblr.com
1024
    name = 'mercworks'
1025
    long_name = 'Mercworks'
1026
    url = 'http://mercworks.net'
1027
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1028
    get_navi_link = get_link_rel_next
1029
1030
    @classmethod
1031
    def get_comic_info(cls, soup, link):
1032
        """Get information about a particular comics."""
1033
        title = soup.find('meta', property='og:title')['content']
1034
        metadesc = soup.find('meta', property='og:description')
1035
        desc = metadesc['content'] if metadesc else ""
1036
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
1037
        day = string_to_date(date_str, "%Y-%m-%d")
1038
        imgs = soup.find_all('meta', property='og:image')
1039
        return {
1040
            'img': [i['content'] for i in imgs],
1041
            'title': title,
1042
            'desc': desc,
1043
            'day': day.day,
1044
            'month': day.month,
1045
            'year': day.year
1046
        }
1047
1048
1049
class BerkeleyMews(GenericListableComic):
1050
    """Class to retrieve Berkeley Mews comics."""
1051
    # Also on http://mews.tumblr.com
1052
    # Also on http://www.gocomics.com/berkeley-mews
1053
    name = 'berkeley'
1054
    long_name = 'Berkeley Mews'
1055
    url = 'http://www.berkeleymews.com'
1056
    _categories = ('BERKELEY', )
1057
    get_url_from_archive_element = get_href
1058
    comic_num_re = re.compile('%s/\\?p=([0-9]*)$' % url)
1059
1060
    @classmethod
1061
    def get_archive_elements(cls):
1062
        archive_url = urljoin_wrapper(cls.url, "?page_id=2")
1063
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_num_re))
1064
1065
    @classmethod
1066
    def get_comic_info(cls, soup, link):
1067
        """Get information about a particular comics."""
1068
        comic_date_re = re.compile('.*/([0-9]*)-([0-9]*)-([0-9]*)-.*')
1069
        url = cls.get_url_from_archive_element(link)
1070
        num = int(cls.comic_num_re.match(url).groups()[0])
1071
        img = soup.find('div', id='comic').find('img')
1072
        assert all(i['alt'] == i['title'] for i in [img])
1073
        title2 = img['title']
1074
        img_url = img['src']
1075
        year, month, day = [int(s) for s in comic_date_re.match(img_url).groups()]
1076
        return {
1077
            'num': num,
1078
            'title': link.string,
1079
            'title2': title2,
1080
            'img': [img_url],
1081
            'year': year,
1082
            'month': month,
1083
            'day': day,
1084
        }
1085
1086
1087
class GenericBouletCorp(GenericNavigableComic):
1088
    """Generic class to retrieve BouletCorp comics in different languages."""
1089
    # Also on http://bouletcorp.tumblr.com
1090
    _categories = ('BOULET', )
1091
    get_navi_link = get_link_rel_next
1092
1093
    @classmethod
1094
    def get_first_comic_link(cls):
1095
        """Get link to first comics."""
1096
        return get_soup_at_url(cls.url).find('div', id='centered_nav').find_all('a')[0]
1097
1098
    @classmethod
1099
    def get_comic_info(cls, soup, link):
1100
        """Get information about a particular comics."""
1101
        url = cls.get_url_from_link(link)
1102
        date_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/' % cls.url)
1103
        year, month, day = [int(s) for s in date_re.match(url).groups()]
1104
        imgs = soup.find('div', id='notes').find('div', class_='storycontent').find_all('img')
1105
        texts = '  '.join(t for t in (i.get('title') for i in imgs) if t)
1106
        title = soup.find('title').string
1107
        return {
1108
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs if i.get('src') is not None],
1109
            'title': title,
1110
            'texts': texts,
1111
            'year': year,
1112
            'month': month,
1113
            'day': day,
1114
        }
1115
1116
1117
class BouletCorp(GenericBouletCorp):
1118
    """Class to retrieve BouletCorp comics."""
1119
    name = 'boulet'
1120
    long_name = 'Boulet Corp'
1121
    url = 'http://www.bouletcorp.com'
1122
    _categories = ('FRANCAIS', )
1123
1124
1125
class BouletCorpEn(GenericBouletCorp):
1126
    """Class to retrieve EnglishBouletCorp comics."""
1127
    name = 'boulet_en'
1128
    long_name = 'Boulet Corp English'
1129
    url = 'http://english.bouletcorp.com'
1130
1131
1132
class AmazingSuperPowers(GenericNavigableComic):
1133
    """Class to retrieve Amazing Super Powers comics."""
1134
    name = 'asp'
1135
    long_name = 'Amazing Super Powers'
1136
    url = 'http://www.amazingsuperpowers.com'
1137
    get_first_comic_link = get_a_navi_navifirst
1138
    get_navi_link = get_a_navi_navinext
1139
1140
    @classmethod
1141
    def get_comic_info(cls, soup, link):
1142
        """Get information about a particular comics."""
1143
        author = soup.find("span", class_="post-author").find("a").string
1144
        date_str = soup.find('span', class_='post-date').string
1145
        day = string_to_date(date_str, "%B %d, %Y")
1146
        imgs = soup.find('div', id='comic').find_all('img')
1147
        title = ' '.join(i['title'] for i in imgs)
1148
        assert all(i['alt'] == i['title'] for i in imgs)
1149
        return {
1150
            'title': title,
1151
            'author': author,
1152
            'img': [img['src'] for img in imgs],
1153
            'day': day.day,
1154
            'month': day.month,
1155
            'year': day.year
1156
        }
1157
1158
1159
class ToonHole(GenericNavigableComic):
1160
    """Class to retrieve Toon Holes comics."""
1161
    # Also on http://tapastic.com/series/TOONHOLE
1162
    name = 'toonhole'
1163
    long_name = 'Toon Hole'
1164
    url = 'http://www.toonhole.com'
1165
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1166
    get_navi_link = get_link_rel_next
1167
1168
    @classmethod
1169
    def get_comic_info(cls, soup, link):
1170
        """Get information about a particular comics."""
1171
        short_url = soup.find('link', rel='shortlink')['href']
1172
        date_str = soup.find('div', class_='entry-meta').contents[0].strip()
1173
        day = string_to_date(date_str, "%B %d, %Y")
1174
        imgs = soup.find('div', id='comic').find_all('img')
1175
        if imgs:
1176
            img = imgs[0]
1177
            title = img['alt']
1178
            assert img['title'] == title
1179
        else:
1180
            title = ""
1181
        return {
1182
            'short_url': short_url,
1183
            'title': title,
1184
            'month': day.month,
1185
            'year': day.year,
1186
            'day': day.day,
1187
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs],
1188
        }
1189
1190
1191
class Channelate(GenericNavigableComic):
1192
    """Class to retrieve Channelate comics."""
1193
    name = 'channelate'
1194
    long_name = 'Channelate'
1195
    url = 'http://www.channelate.com'
1196
    get_first_comic_link = get_div_navfirst_a
1197
    get_navi_link = get_link_rel_next
1198
    get_url_from_link = join_cls_url_to_href
1199
1200
    @classmethod
1201
    def get_comic_info(cls, soup, link):
1202
        """Get information about a particular comics."""
1203
        author = soup.find("span", class_="post-author").find("a").string
1204
        date_str = soup.find('span', class_='post-date').string
1205
        day = string_to_date(date_str, '%Y/%m/%d')
1206
        title = soup.find('meta', property='og:title')['content']
1207
        post = soup.find('div', id='comic')
1208
        imgs = post.find_all('img') if post else []
1209
        extra_url = None
1210
        extra_div = soup.find('div', id='extrapanelbutton')
1211
        if extra_div:
1212
            extra_url = extra_div.find('a')['href']
1213
            extra_soup = get_soup_at_url(extra_url)
1214
            extra_imgs = extra_soup.find_all('img', class_='extrapanelimage')
1215
            imgs.extend(extra_imgs)
1216
        return {
1217
            'url_extra': extra_url,
1218
            'title': title,
1219
            'author': author,
1220
            'month': day.month,
1221
            'year': day.year,
1222
            'day': day.day,
1223
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1224
        }
1225
1226
1227
class CyanideAndHappiness(GenericNavigableComic):
1228
    """Class to retrieve Cyanide And Happiness comics."""
1229
    name = 'cyanide'
1230
    long_name = 'Cyanide and Happiness'
1231
    url = 'http://explosm.net'
1232
    _categories = ('NSFW', )
1233
    get_url_from_link = join_cls_url_to_href
1234
1235
    @classmethod
1236
    def get_first_comic_link(cls):
1237
        """Get link to first comics."""
1238
        return get_soup_at_url(cls.url).find('a', title='Oldest comic')
1239
1240
    @classmethod
1241
    def get_navi_link(cls, last_soup, next_):
1242
        """Get link to next or previous comic."""
1243
        link = last_soup.find('a', class_='next-comic' if next_ else 'previous-comic ')
1244
        return None if link.get('href') is None else link
1245
1246
    @classmethod
1247
    def get_comic_info(cls, soup, link):
1248
        """Get information about a particular comics."""
1249
        url2 = soup.find('meta', property='og:url')['content']
1250
        num = int(url2.split('/')[-2])
1251
        date_str = soup.find('h3').find('a').string
1252
        day = string_to_date(date_str, '%Y.%m.%d')
1253
        author = soup.find('small', class_="author-credit-name").string
1254
        assert author.startswith('by ')
1255
        author = author[3:]
1256
        imgs = soup.find_all('img', id='main-comic')
1257
        return {
1258
            'num': num,
1259
            'author': author,
1260
            'month': day.month,
1261
            'year': day.year,
1262
            'day': day.day,
1263
            'prefix': '%d-' % num,
1264
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i['src'])) for i in imgs]
1265
        }
1266
1267
1268
class MrLovenstein(GenericComic):
1269
    """Class to retrieve Mr Lovenstein comics."""
1270
    # Also on https://tapastic.com/series/MrLovenstein
1271
    name = 'mrlovenstein'
1272
    long_name = 'Mr. Lovenstein'
1273
    url = 'http://www.mrlovenstein.com'
1274
1275
    @classmethod
1276
    def get_next_comic(cls, last_comic):
1277
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1278
        # TODO: more info from http://www.mrlovenstein.com/archive
1279
        comic_num_re = re.compile('^/comic/([0-9]*)$')
1280
        nums = [int(comic_num_re.match(link['href']).groups()[0])
1281
                for link in get_soup_at_url(cls.url).find_all('a', href=comic_num_re)]
1282
        first, last = min(nums), max(nums)
1283
        if last_comic:
1284
            first = last_comic['num'] + 1
1285
        for num in range(first, last + 1):
1286
            url = urljoin_wrapper(cls.url, '/comic/%d' % num)
1287
            soup = get_soup_at_url(url)
1288
            imgs = list(
1289
                reversed(soup.find_all('img', src=re.compile('^/images/comics/'))))
1290
            description = soup.find('meta', attrs={'name': 'description'})['content']
1291
            yield {
1292
                'url': url,
1293
                'num': num,
1294
                'texts': '  '.join(t for t in (i.get('title') for i in imgs) if t),
1295
                'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1296
                'description': description,
1297
            }
1298
1299
1300
class DinosaurComics(GenericListableComic):
1301
    """Class to retrieve Dinosaur Comics comics."""
1302
    name = 'dinosaur'
1303
    long_name = 'Dinosaur Comics'
1304
    url = 'http://www.qwantz.com'
1305
    get_url_from_archive_element = get_href
1306
    comic_link_re = re.compile('^%s/index.php\\?comic=([0-9]*)$' % url)
1307
1308
    @classmethod
1309
    def get_archive_elements(cls):
1310
        archive_url = urljoin_wrapper(cls.url, 'archive.php')
1311
        # first link is random -> skip it
1312
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re)[1:])
1313
1314
    @classmethod
1315
    def get_comic_info(cls, soup, link):
1316
        """Get information about a particular comics."""
1317
        url = cls.get_url_from_archive_element(link)
1318
        num = int(cls.comic_link_re.match(url).groups()[0])
1319
        date_str = link.string
1320
        text = link.next_sibling.string
1321
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1322
        comic_img_re = re.compile('^%s/comics/' % cls.url)
1323
        img = soup.find('img', src=comic_img_re)
1324
        return {
1325
            'month': day.month,
1326
            'year': day.year,
1327 View Code Duplication
            'day': day.day,
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1328
            'img': [img.get('src')],
1329
            'title': img.get('title'),
1330
            'text': text,
1331
            'num': num,
1332
        }
1333
1334
1335
class ButterSafe(GenericListableComic):
1336
    """Class to retrieve Butter Safe comics."""
1337
    name = 'butter'
1338
    long_name = 'ButterSafe'
1339
    url = 'http://buttersafe.com'
1340
    get_url_from_archive_element = get_href
1341
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)
1342
1343
    @classmethod
1344
    def get_archive_elements(cls):
1345
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1346
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
1347
1348
    @classmethod
1349
    def get_comic_info(cls, soup, link):
1350
        """Get information about a particular comics."""
1351
        url = cls.get_url_from_archive_element(link)
1352
        title = link.string
1353
        year, month, day = [int(s) for s in cls.comic_link_re.match(url).groups()]
1354
        img = soup.find('div', id='comic').find('img')
1355
        assert img['alt'] == title
1356
        return {
1357
            'title': title,
1358
            'day': day,
1359
            'month': month,
1360
            'year': year,
1361
            'img': [img['src']],
1362
        }
1363
1364
1365
class CalvinAndHobbes(GenericComic):
1366
    """Class to retrieve Calvin and Hobbes comics."""
1367
    # Also on http://www.gocomics.com/calvinandhobbes/
1368
    name = 'calvin'
1369
    long_name = 'Calvin and Hobbes'
1370
    # This is not through any official webpage but eh...
1371
    url = 'http://marcel-oehler.marcellosendos.ch/comics/ch/'
1372
1373
    @classmethod
1374
    def get_next_comic(cls, last_comic):
1375
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1376
        last_date = get_date_for_comic(
1377
            last_comic) if last_comic else date(1985, 11, 1)
1378
        link_re = re.compile('^([0-9]*)/([0-9]*)/')
1379
        img_re = re.compile('')
1380
        for link in get_soup_at_url(cls.url).find_all('a', href=link_re):
1381
            url = link['href']
1382
            year, month = link_re.match(url).groups()
1383
            if date(int(year), int(month), 1) + timedelta(days=31) >= last_date:
1384
                img_re = re.compile('^%s%s([0-9]*)' % (year, month))
1385
                month_url = urljoin_wrapper(cls.url, url)
1386
                for img in get_soup_at_url(month_url).find_all('img', src=img_re):
1387
                    img_src = img['src']
1388
                    day = int(img_re.match(img_src).groups()[0])
1389
                    comic_date = date(int(year), int(month), day)
1390
                    if comic_date > last_date:
1391
                        yield {
1392
                            'url': month_url,
1393 View Code Duplication
                            'year': int(year),
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1394
                            'month': int(month),
1395
                            'day': int(day),
1396
                            'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)],
1397
                        }
1398
                        last_date = comic_date
1399
1400
1401
class AbstruseGoose(GenericListableComic):
1402
    """Class to retrieve AbstruseGoose Comics."""
1403
    name = 'abstruse'
1404
    long_name = 'Abstruse Goose'
1405
    url = 'http://abstrusegoose.com'
1406
    get_url_from_archive_element = get_href
1407
    comic_url_re = re.compile('^%s/([0-9]*)$' % url)
1408
    comic_img_re = re.compile('^%s/strips/.*' % url)
1409
1410
    @classmethod
1411
    def get_archive_elements(cls):
1412
        archive_url = urljoin_wrapper(cls.url, 'archive')
1413
        return get_soup_at_url(archive_url).find_all('a', href=cls.comic_url_re)
1414
1415
    @classmethod
1416
    def get_comic_info(cls, soup, archive_elt):
1417
        comic_url = cls.get_url_from_archive_element(archive_elt)
1418
        num = int(cls.comic_url_re.match(comic_url).groups()[0])
1419
        return {
1420
            'num': num,
1421
            'title': archive_elt.string,
1422
            'img': [soup.find('img', src=cls.comic_img_re)['src']]
1423
        }
1424
1425
1426
class PhDComics(GenericNavigableComic):
1427
    """Class to retrieve PHD Comics."""
1428
    name = 'phd'
1429
    long_name = 'PhD Comics'
1430
    url = 'http://phdcomics.com/comics/archive.php'
1431
1432
    @classmethod
1433
    def get_first_comic_link(cls):
1434
        """Get link to first comics."""
1435
        soup = get_soup_at_url(cls.url)
1436
        img = soup.find('img', src='http://phdcomics.com/comics/images/first_button.gif')
1437
        return None if img is None else img.parent
1438
1439
    @classmethod
1440
    def get_navi_link(cls, last_soup, next_):
1441
        """Get link to next or previous comic."""
1442
        url = 'http://phdcomics.com/comics/images/%s_button.gif' % ('next' if next_ else 'prev')
1443
        img = last_soup.find('img', src=url)
1444
        return None if img is None else img.parent
1445
1446
    @classmethod
1447
    def get_comic_info(cls, soup, link):
1448
        """Get information about a particular comics."""
1449
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
1450
        imgs = soup.find_all('meta', property='og:image')
1451
        return {
1452
            'img': [i['content'] for i in imgs],
1453
            'title': title,
1454
        }
1455
1456 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1457
class Octopuns(GenericEmptyComic, GenericNavigableComic):
1458
    """Class to retrieve Octopuns comics."""
1459
    # Also on http://octopuns.tumblr.com
1460
    name = 'octopuns'
1461
    long_name = 'Octopuns'
1462
    url = 'http://www.octopuns.net'
1463
1464
    @classmethod
1465
    def get_first_comic_link(cls):
1466
        """Get link to first comics."""
1467
        return get_soup_at_url(cls.url).find('img', src=re.compile('.*/First.png')).parent
1468
1469
    @classmethod
1470
    def get_navi_link(cls, last_soup, next_):
1471
        """Get link to next or previous comic."""
1472
        link = last_soup.find('img', src=re.compile('.*/Next.png' if next_ else '.*/Back.png')).parent
1473
        return None if link.get('href') is None else link
1474
1475
    @classmethod
1476
    def get_comic_info(cls, soup, link):
1477
        """Get information about a particular comics."""
1478
        title = soup.find('h3', class_='post-title entry-title').string
1479
        date_str = soup.find('h2', class_='date-header').string
1480
        day = string_to_date(date_str, "%A, %B %d, %Y")
1481
        imgs = soup.find_all('link', rel='image_src')
1482
        return {
1483
            'img': [i['href'] for i in imgs],
1484
            'title': title,
1485
            'day': day.day,
1486
            'month': day.month,
1487
            'year': day.year,
1488
        }
1489
1490
1491
class Quarktees(GenericNavigableComic):
1492
    """Class to retrieve the Quarktees comics."""
1493
    name = 'quarktees'
1494
    long_name = 'Quarktees'
1495
    url = 'http://www.quarktees.com/blogs/news'
1496
    get_url_from_link = join_cls_url_to_href
1497
    get_first_comic_link = simulate_first_link
1498
    first_url = 'http://www.quarktees.com/blogs/news/12486621-coming-soon'
1499
1500
    @classmethod
1501
    def get_navi_link(cls, last_soup, next_):
1502
        """Get link to next or previous comic."""
1503
        return last_soup.find('a', id='article-next' if next_ else 'article-prev')
1504
1505
    @classmethod
1506
    def get_comic_info(cls, soup, link):
1507
        """Get information about a particular comics."""
1508
        title = soup.find('meta', property='og:title')['content']
1509
        article = soup.find('div', class_='single-article')
1510
        imgs = article.find_all('img')
1511
        return {
1512
            'title': title,
1513
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1514
        }
1515
1516
1517
class OverCompensating(GenericNavigableComic):
1518
    """Class to retrieve the Over Compensating comics."""
1519
    name = 'compensating'
1520
    long_name = 'Over Compensating'
1521
    url = 'http://www.overcompensating.com'
1522
    get_url_from_link = join_cls_url_to_href
1523
1524
    @classmethod
1525
    def get_first_comic_link(cls):
1526
        """Get link to first comics."""
1527
        return get_soup_at_url(cls.url).find('a', href=re.compile('comic=1$'))
1528
1529
    @classmethod
1530
    def get_navi_link(cls, last_soup, next_):
1531
        """Get link to next or previous comic."""
1532
        return last_soup.find('a', title='next comic' if next_ else 'go back already')
1533
1534
    @classmethod
1535
    def get_comic_info(cls, soup, link):
1536
        """Get information about a particular comics."""
1537
        img_src_re = re.compile('^/oc/comics/.*')
1538
        comic_num_re = re.compile('.*comic=([0-9]*)$')
1539
        comic_url = cls.get_url_from_link(link)
1540
        num = int(comic_num_re.match(comic_url).groups()[0])
1541
        img = soup.find('img', src=img_src_re)
1542
        return {
1543
            'num': num,
1544
            'img': [urljoin_wrapper(comic_url, img['src'])],
1545
            'title': img.get('title')
1546
        }
1547
1548
1549
class Oglaf(GenericNavigableComic):
1550
    """Class to retrieve Oglaf comics."""
1551
    name = 'oglaf'
1552
    long_name = 'Oglaf [NSFW]'
1553
    url = 'http://oglaf.com'
1554
    _categories = ('NSFW', )
1555
    get_url_from_link = join_cls_url_to_href
1556
1557
    @classmethod
1558
    def get_first_comic_link(cls):
1559
        """Get link to first comics."""
1560
        return get_soup_at_url(cls.url).find("div", id="st").parent
1561
1562
    @classmethod
1563
    def get_navi_link(cls, last_soup, next_):
1564
        """Get link to next or previous comic."""
1565
        div = last_soup.find("div", id="nx" if next_ else "pvs")
1566
        return div.parent if div else None
1567
1568
    @classmethod
1569
    def get_comic_info(cls, soup, link):
1570
        """Get information about a particular comics."""
1571
        title = soup.find('title').string
1572
        title_imgs = soup.find('div', id='tt').find_all('img')
1573
        assert len(title_imgs) == 1
1574
        strip_imgs = soup.find_all('img', id='strip')
1575
        assert len(strip_imgs) == 1
1576
        imgs = title_imgs + strip_imgs
1577
        desc = ' '.join(i['title'] for i in imgs)
1578
        return {
1579
            'title': title,
1580
            'img': [i['src'] for i in imgs],
1581
            'description': desc,
1582
        }
1583
1584
1585
class ScandinaviaAndTheWorld(GenericNavigableComic):
1586
    """Class to retrieve Scandinavia And The World comics."""
1587
    name = 'satw'
1588
    long_name = 'Scandinavia And The World'
1589
    url = 'http://satwcomic.com'
1590
    get_first_comic_link = simulate_first_link
1591
    first_url = 'http://satwcomic.com/sweden-denmark-and-norway'
1592
1593
    @classmethod
1594
    def get_navi_link(cls, last_soup, next_):
1595
        """Get link to next or previous comic."""
1596
        return last_soup.find('a', accesskey='n' if next_ else 'p')
1597
1598
    @classmethod
1599
    def get_comic_info(cls, soup, link):
1600
        """Get information about a particular comics."""
1601
        title = soup.find('meta', attrs={'name': 'twitter:label1'})['content']
1602
        desc = soup.find('meta', property='og:description')['content']
1603
        imgs = soup.find_all('img', itemprop="image")
1604
        return {
1605
            'title': title,
1606
            'description': desc,
1607
            'img': [i['src'] for i in imgs],
1608
        }
1609
1610
1611
class SomethingOfThatIlk(GenericEmptyComic):  # Does not exist anymore
1612
    """Class to retrieve the Something Of That Ilk comics."""
1613
    name = 'somethingofthatilk'
1614
    long_name = 'Something Of That Ilk'
1615
    url = 'http://www.somethingofthatilk.com'
1616
1617
1618
class InfiniteMonkeyBusiness(GenericNavigableComic):
1619
    """Generic class to retrieve InfiniteMonkeyBusiness comics."""
1620
    name = 'monkey'
1621
    long_name = 'Infinite Monkey Business'
1622
    url = 'http://infinitemonkeybusiness.net'
1623
    get_navi_link = get_a_navi_comicnavnext_navinext
1624
    get_first_comic_link = simulate_first_link
1625
    first_url = 'http://infinitemonkeybusiness.net/comic/pillory/'
1626
1627
    @classmethod
1628
    def get_comic_info(cls, soup, link):
1629
        """Get information about a particular comics."""
1630
        title = soup.find('meta', property='og:title')['content']
1631
        imgs = soup.find('div', id='comic').find_all('img')
1632
        return {
1633
            'title': title,
1634
            'img': [i['src'] for i in imgs],
1635
        }
1636
1637
1638
class Wondermark(GenericListableComic):
1639
    """Class to retrieve the Wondermark comics."""
1640
    name = 'wondermark'
1641
    long_name = 'Wondermark'
1642
    url = 'http://wondermark.com'
1643
    get_url_from_archive_element = get_href
1644
1645
    @classmethod
1646
    def get_archive_elements(cls):
1647
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1648
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1649
1650 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1651
    def get_comic_info(cls, soup, link):
1652
        """Get information about a particular comics."""
1653
        date_str = soup.find('div', class_='postdate').find('em').string
1654
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1655
        div = soup.find('div', id='comic')
1656
        if div:
1657
            img = div.find('img')
1658
            img_src = [img['src']]
1659
            alt = img['alt']
1660
            assert alt == img['title']
1661
            title = soup.find('meta', property='og:title')['content']
1662
        else:
1663
            img_src = []
1664
            alt = ''
1665
            title = ''
1666
        return {
1667
            'month': day.month,
1668
            'year': day.year,
1669
            'day': day.day,
1670
            'img': img_src,
1671
            'title': title,
1672
            'alt': alt,
1673
            'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')),
1674
        }
1675
1676
1677
class WarehouseComic(GenericNavigableComic):
1678
    """Class to retrieve Warehouse Comic comics."""
1679
    name = 'warehouse'
1680
    long_name = 'Warehouse Comic'
1681
    url = 'http://warehousecomic.com'
1682
    get_first_comic_link = get_a_navi_navifirst
1683
    get_navi_link = get_link_rel_next
1684
1685
    @classmethod
1686
    def get_comic_info(cls, soup, link):
1687
        """Get information about a particular comics."""
1688
        title = soup.find('h2', class_='post-title').string
1689
        date_str = soup.find('span', class_='post-date').string
1690
        day = string_to_date(date_str, "%B %d, %Y")
1691
        imgs = soup.find('div', id='comic').find_all('img')
1692
        return {
1693
            'img': [i['src'] for i in imgs],
1694
            'title': title,
1695
            'day': day.day,
1696
            'month': day.month,
1697
            'year': day.year,
1698
        }
1699
1700
1701
class JustSayEh(GenericNavigableComic):
1702
    """Class to retrieve Just Say Eh comics."""
1703
    # Also on http//tapastic.com/series/Just-Say-Eh
1704
    name = 'justsayeh'
1705
    long_name = 'Just Say Eh'
1706
    url = 'http://www.justsayeh.com'
1707
    get_first_comic_link = get_a_navi_navifirst
1708
    get_navi_link = get_a_navi_comicnavnext_navinext
1709
1710
    @classmethod
1711
    def get_comic_info(cls, soup, link):
1712
        """Get information about a particular comics."""
1713
        title = soup.find('h2', class_='post-title').string
1714
        imgs = soup.find("div", id="comic").find_all("img")
1715
        assert all(i['alt'] == i['title'] for i in imgs)
1716
        alt = imgs[0]['alt']
1717
        return {
1718
            'img': [i['src'] for i in imgs],
1719
            'title': title,
1720
            'alt': alt,
1721
        }
1722
1723
1724
class MouseBearComedy(GenericNavigableComic):
1725
    """Class to retrieve Mouse Bear Comedy comics."""
1726
    # Also on http://mousebearcomedy.tumblr.com
1727
    name = 'mousebear'
1728
    long_name = 'Mouse Bear Comedy'
1729
    url = 'http://www.mousebearcomedy.com'
1730
    get_first_comic_link = get_a_navi_navifirst
1731
    get_navi_link = get_a_navi_comicnavnext_navinext
1732
1733
    @classmethod
1734
    def get_comic_info(cls, soup, link):
1735
        """Get information about a particular comics."""
1736
        title = soup.find('h2', class_='post-title').string
1737
        author = soup.find("span", class_="post-author").find("a").string
1738
        date_str = soup.find("span", class_="post-date").string
1739
        day = string_to_date(date_str, '%B %d, %Y')
1740
        imgs = soup.find("div", id="comic").find_all("img")
1741
        assert all(i['alt'] == i['title'] == title for i in imgs)
1742
        return {
1743
            'day': day.day,
1744
            'month': day.month,
1745
            'year': day.year,
1746
            'img': [i['src'] for i in imgs],
1747
            'title': title,
1748
            'author': author,
1749
        }
1750
1751
1752 View Code Duplication
class BigFootJustice(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1753
    """Class to retrieve Big Foot Justice comics."""
1754
    # Also on http://tapastic.com/series/bigfoot-justice
1755
    name = 'bigfoot'
1756
    long_name = 'Big Foot Justice'
1757
    url = 'http://bigfootjustice.com'
1758
    get_first_comic_link = get_a_navi_navifirst
1759
    get_navi_link = get_a_navi_comicnavnext_navinext
1760
1761
    @classmethod
1762
    def get_comic_info(cls, soup, link):
1763
        """Get information about a particular comics."""
1764
        imgs = soup.find('div', id='comic').find_all('img')
1765
        assert all(i['title'] == i['alt'] for i in imgs)
1766
        title = ' '.join(i['title'] for i in imgs)
1767
        return {
1768
            'img': [i['src'] for i in imgs],
1769
            'title': title,
1770
        }
1771
1772
1773
class RespawnComic(GenericNavigableComic):
1774
    """Class to retrieve Respawn Comic."""
1775
    # Also on http://respawncomic.tumblr.com
1776
    name = 'respawn'
1777
    long_name = 'Respawn Comic'
1778
    url = 'http://respawncomic.com '
1779
    _categories = ('RESPAWN', )
1780
    get_navi_link = get_a_rel_next
1781
    get_first_comic_link = simulate_first_link
1782
    first_url = 'http://respawncomic.com/comic/c0001/'
1783
1784 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1785
    def get_comic_info(cls, soup, link):
1786
        """Get information about a particular comics."""
1787
        title = soup.find('meta', property='og:title')['content']
1788
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1789
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1790
        date_str = date_str[:10]
1791
        day = string_to_date(date_str, "%Y-%m-%d")
1792
        imgs = soup.find_all('meta', property='og:image')
1793
        skip_imgs = {
1794
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1795
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1796
        }
1797
        return {
1798
            'title': title,
1799
            'author': author,
1800
            'day': day.day,
1801
            'month': day.month,
1802
            'year': day.year,
1803
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1804
        }
1805
1806 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1807
class SafelyEndangered(GenericNavigableComic):
1808
    """Class to retrieve Safely Endangered comics."""
1809
    # Also on http://tumblr.safelyendangered.com
1810
    name = 'endangered'
1811
    long_name = 'Safely Endangered'
1812
    url = 'http://www.safelyendangered.com'
1813
    get_navi_link = get_link_rel_next
1814
    get_first_comic_link = simulate_first_link
1815
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1816
1817
    @classmethod
1818
    def get_comic_info(cls, soup, link):
1819
        """Get information about a particular comics."""
1820
        title = soup.find('h2', class_='post-title').string
1821
        date_str = soup.find('span', class_='post-date').string
1822
        day = string_to_date(date_str, '%B %d, %Y')
1823
        imgs = soup.find('div', id='comic').find_all('img')
1824
        alt = imgs[0]['alt']
1825
        assert all(i['alt'] == i['title'] for i in imgs)
1826
        return {
1827
            'day': day.day,
1828
            'month': day.month,
1829
            'year': day.year,
1830
            'img': [i['src'] for i in imgs],
1831
            'title': title,
1832
            'alt': alt,
1833
        }
1834
1835
1836
class PicturesInBoxes(GenericNavigableComic):
1837
    """Class to retrieve Pictures In Boxes comics."""
1838
    # Also on http://picturesinboxescomic.tumblr.com
1839
    name = 'picturesinboxes'
1840
    long_name = 'Pictures in Boxes'
1841
    url = 'http://www.picturesinboxes.com'
1842
    get_navi_link = get_a_navi_navinext
1843
    get_first_comic_link = simulate_first_link
1844
    first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'
1845
1846
    @classmethod
1847
    def get_comic_info(cls, soup, link):
1848
        """Get information about a particular comics."""
1849
        title = soup.find('h2', class_='post-title').string
1850
        author = soup.find("span", class_="post-author").find("a").string
1851
        date_str = soup.find('span', class_='post-date').string
1852
        day = string_to_date(date_str, '%B %d, %Y')
1853
        imgs = soup.find('div', class_='comicpane').find_all('img')
1854
        assert imgs
1855
        assert all(i['title'] == i['alt'] == title for i in imgs)
1856
        return {
1857
            'day': day.day,
1858
            'month': day.month,
1859
            'year': day.year,
1860
            'img': [i['src'] for i in imgs],
1861
            'title': title,
1862
            'author': author,
1863
        }
1864
1865 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1866
class Penmen(GenericNavigableComic):
1867
    """Class to retrieve Penmen comics."""
1868
    name = 'penmen'
1869
    long_name = 'Penmen'
1870
    url = 'http://penmen.com'
1871
    get_navi_link = get_link_rel_next
1872
    get_first_comic_link = simulate_first_link
1873
    first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/'
1874
1875
    @classmethod
1876
    def get_comic_info(cls, soup, link):
1877
        """Get information about a particular comics."""
1878
        title = soup.find('title').string
1879
        imgs = soup.find('div', class_='entry-content').find_all('img')
1880
        short_url = soup.find('link', rel='shortlink')['href']
1881
        tags = ' '.join(t.string for t in soup.find_all('a', rel='tag'))
1882
        date_str = soup.find('time')['datetime'][:10]
1883
        day = string_to_date(date_str, "%Y-%m-%d")
1884
        return {
1885
            'title': title,
1886
            'short_url': short_url,
1887
            'img': [i['src'] for i in imgs],
1888
            'tags': tags,
1889
            'month': day.month,
1890
            'year': day.year,
1891
            'day': day.day,
1892
        }
1893
1894
1895
class TheDoghouseDiaries(GenericNavigableComic):
1896
    """Class to retrieve The Dog House Diaries comics."""
1897
    name = 'doghouse'
1898
    long_name = 'The Dog House Diaries'
1899
    url = 'http://thedoghousediaries.com'
1900
1901
    @classmethod
1902
    def get_first_comic_link(cls):
1903
        """Get link to first comics."""
1904
        return get_soup_at_url(cls.url).find('a', id='firstlink')
1905
1906
    @classmethod
1907
    def get_navi_link(cls, last_soup, next_):
1908
        """Get link to next or previous comic."""
1909
        return last_soup.find('a', id='nextlink' if next_ else 'previouslink')
1910
1911
    @classmethod
1912
    def get_comic_info(cls, soup, link):
1913
        """Get information about a particular comics."""
1914 View Code Duplication
        comic_img_re = re.compile('^dhdcomics/.*')
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1915
        img = soup.find('img', src=comic_img_re)
1916
        comic_url = cls.get_url_from_link(link)
1917
        return {
1918
            'title': soup.find('h2', id='titleheader').string,
1919
            'title2': soup.find('div', id='subtext').string,
1920
            'alt': img.get('title'),
1921
            'img': [urljoin_wrapper(comic_url, img['src'].strip())],
1922
            'num': int(comic_url.split('/')[-1]),
1923
        }
1924
1925
1926
class InvisibleBread(GenericListableComic):
1927
    """Class to retrieve Invisible Bread comics."""
1928
    # Also on http://www.gocomics.com/invisible-bread
1929
    name = 'invisiblebread'
1930
    long_name = 'Invisible Bread'
1931
    url = 'http://invisiblebread.com'
1932
1933
    @classmethod
1934
    def get_archive_elements(cls):
1935
        archive_url = urljoin_wrapper(cls.url, 'archives/')
1936
        return reversed(get_soup_at_url(archive_url).find_all('td', class_='archive-title'))
1937
1938
    @classmethod
1939
    def get_url_from_archive_element(cls, td):
1940
        return td.find('a')['href']
1941
1942
    @classmethod
1943
    def get_comic_info(cls, soup, td):
1944
        """Get information about a particular comics."""
1945
        url = cls.get_url_from_archive_element(td)
1946
        title = td.find('a').string
1947
        month_and_day = td.previous_sibling.string
1948
        link_re = re.compile('^%s/([0-9]+)/' % cls.url)
1949
        year = link_re.match(url).groups()[0]
1950
        date_str = month_and_day + ' ' + year
1951
        day = string_to_date(date_str, '%b %d %Y')
1952
        imgs = [soup.find('div', id='comic').find('img')]
1953
        assert len(imgs) == 1
1954
        assert all(i['title'] == i['alt'] == title for i in imgs)
1955
        return {
1956
            'month': day.month,
1957
            'year': day.year,
1958
            'day': day.day,
1959
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1960
            'title': title,
1961
        }
1962
1963
1964
class DiscoBleach(GenericEmptyComic):  # Does not work anymore
1965
    """Class to retrieve Disco Bleach Comics."""
1966
    name = 'discobleach'
1967
    long_name = 'Disco Bleach'
1968
    url = 'http://discobleach.com'
1969
1970
1971
class TubeyToons(GenericEmptyComic):  # Does not work anymore
1972
    """Class to retrieve TubeyToons comics."""
1973
    # Also on http://tapastic.com/series/Tubey-Toons
1974
    # Also on http://tubeytoons.tumblr.com
1975
    name = 'tubeytoons'
1976
    long_name = 'Tubey Toons'
1977
    url = 'http://tubeytoons.com'
1978
    _categories = ('TUNEYTOONS', )
1979
1980 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1981
class CompletelySeriousComics(GenericNavigableComic):
1982
    """Class to retrieve Completely Serious comics."""
1983
    name = 'completelyserious'
1984
    long_name = 'Completely Serious Comics'
1985
    url = 'http://completelyseriouscomics.com'
1986
    get_first_comic_link = get_a_navi_navifirst
1987
    get_navi_link = get_a_navi_navinext
1988
1989
    @classmethod
1990
    def get_comic_info(cls, soup, link):
1991
        """Get information about a particular comics."""
1992
        title = soup.find('h2', class_='post-title').string
1993
        author = soup.find('span', class_='post-author').contents[1].string
1994
        date_str = soup.find('span', class_='post-date').string
1995
        day = string_to_date(date_str, '%B %d, %Y')
1996
        imgs = soup.find('div', class_='comicpane').find_all('img')
1997
        assert imgs
1998
        alt = imgs[0]['title']
1999
        assert all(i['title'] == i['alt'] == alt for i in imgs)
2000
        return {
2001
            'month': day.month,
2002
            'year': day.year,
2003
            'day': day.day,
2004
            'img': [i['src'] for i in imgs],
2005
            'title': title,
2006
            'alt': alt,
2007
            'author': author,
2008
        }
2009
2010
2011
class PoorlyDrawnLines(GenericListableComic):
2012
    """Class to retrieve Poorly Drawn Lines comics."""
2013
    # Also on http://pdlcomics.tumblr.com
2014
    name = 'poorlydrawn'
2015
    long_name = 'Poorly Drawn Lines'
2016
    url = 'http://poorlydrawnlines.com'
2017
    _categories = ('POORLYDRAWN', )
2018
    get_url_from_archive_element = get_href
2019
2020
    @classmethod
2021
    def get_comic_info(cls, soup, link):
2022
        """Get information about a particular comics."""
2023
        imgs = soup.find('div', class_='post').find_all('img')
2024
        assert len(imgs) <= 1
2025
        return {
2026
            'img': [i['src'] for i in imgs],
2027
            'title': imgs[0].get('title', "") if imgs else "",
2028
        }
2029
2030
    @classmethod
2031
    def get_archive_elements(cls):
2032
        archive_url = urljoin_wrapper(cls.url, 'archive')
2033
        url_re = re.compile('^%s/comic/.' % cls.url)
2034
        return reversed(get_soup_at_url(archive_url).find_all('a', href=url_re))
2035
2036 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2037
class LoadingComics(GenericNavigableComic):
2038
    """Class to retrieve Loading Artist comics."""
2039
    name = 'loadingartist'
2040
    long_name = 'Loading Artist'
2041
    url = 'http://www.loadingartist.com/latest'
2042
2043
    @classmethod
2044
    def get_first_comic_link(cls):
2045
        """Get link to first comics."""
2046
        return get_soup_at_url(cls.url).find('a', title="First")
2047
2048
    @classmethod
2049
    def get_navi_link(cls, last_soup, next_):
2050
        """Get link to next or previous comic."""
2051
        return last_soup.find('a', title='Next' if next_ else 'Previous')
2052
2053
    @classmethod
2054
    def get_comic_info(cls, soup, link):
2055
        """Get information about a particular comics."""
2056
        title = soup.find('h1').string
2057
        date_str = soup.find('span', class_='date').string.strip()
2058
        day = string_to_date(date_str, "%B %d, %Y")
2059
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
2060
        return {
2061
            'title': title,
2062
            'img': [i['src'] for i in imgs],
2063
            'month': day.month,
2064
            'year': day.year,
2065
            'day': day.day,
2066
        }
2067
2068 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2069
class ChuckleADuck(GenericNavigableComic):
2070
    """Class to retrieve Chuckle-A-Duck comics."""
2071
    name = 'chuckleaduck'
2072
    long_name = 'Chuckle-A-duck'
2073
    url = 'http://chuckleaduck.com'
2074
    get_first_comic_link = get_div_navfirst_a
2075
    get_navi_link = get_link_rel_next
2076
2077
    @classmethod
2078
    def get_comic_info(cls, soup, link):
2079
        """Get information about a particular comics."""
2080
        date_str = soup.find('span', class_='post-date').string
2081
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
2082
        author = soup.find('span', class_='post-author').string
2083
        div = soup.find('div', id='comic')
2084
        imgs = div.find_all('img') if div else []
2085
        title = imgs[0]['title'] if imgs else ""
2086
        assert all(i['title'] == i['alt'] == title for i in imgs)
2087
        return {
2088
            'month': day.month,
2089
            'year': day.year,
2090
            'day': day.day,
2091
            'img': [i['src'] for i in imgs],
2092
            'title': title,
2093
            'author': author,
2094
        }
2095
2096
2097
class DepressedAlien(GenericNavigableComic):
2098
    """Class to retrieve Depressed Alien Comics."""
2099
    name = 'depressedalien'
2100
    long_name = 'Depressed Alien'
2101
    url = 'http://depressedalien.com'
2102
    get_url_from_link = join_cls_url_to_href
2103
2104
    @classmethod
2105
    def get_first_comic_link(cls):
2106
        """Get link to first comics."""
2107
        return get_soup_at_url(cls.url).find('img', attrs={'name': 'beginArrow'}).parent
2108
2109
    @classmethod
2110
    def get_navi_link(cls, last_soup, next_):
2111
        """Get link to next or previous comic."""
2112
        return last_soup.find('img', attrs={'name': 'rightArrow' if next_ else 'leftArrow'}).parent
2113
2114
    @classmethod
2115
    def get_comic_info(cls, soup, link):
2116
        """Get information about a particular comics."""
2117
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2118
        imgs = soup.find_all('meta', property='og:image')
2119
        return {
2120
            'title': title,
2121
            'img': [i['content'] for i in imgs],
2122
        }
2123
2124
2125
class ThingsInSquares(GenericListableComic):
2126
    """Class to retrieve Things In Squares comics."""
2127
    # This can be retrieved in other languages
2128
    # Also on https://tapastic.com/series/Things-in-Squares
2129
    name = 'squares'
2130
    long_name = 'Things in squares'
2131
    url = 'http://www.thingsinsquares.com'
2132
2133
    @classmethod
2134
    def get_comic_info(cls, soup, tr):
2135
        """Get information about a particular comics."""
2136
        _, td2, td3 = tr.find_all('td')
2137
        a = td2.find('a')
2138
        date_str = td3.string
2139
        day = string_to_date(date_str, "%m.%d.%y")
2140
        title = a.string
2141
        title2 = soup.find('meta', property='og:title')['content']
2142
        desc = soup.find('meta', property='og:description')
2143
        description = desc['content'] if desc else ''
2144
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2145
        imgs = soup.find('div', class_='entry-content').find_all('img')
2146
        return {
2147
            'day': day.day,
2148
            'month': day.month,
2149
            'year': day.year,
2150
            'title': title,
2151
            'title2': title2,
2152
            'description': description,
2153
            'tags': tags,
2154
            'img': [i['src'] for i in imgs],
2155
            'alt': ' '.join(i['alt'] for i in imgs),
2156
        }
2157
2158
    @classmethod
2159
    def get_url_from_archive_element(cls, tr):
2160
        _, td2, td3 = tr.find_all('td')
2161
        return td2.find('a')['href']
2162
2163
    @classmethod
2164
    def get_archive_elements(cls):
2165
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
2166
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2167
2168
2169
class HappleTea(GenericNavigableComic):
2170
    """Class to retrieve Happle Tea Comics."""
2171
    name = 'happletea'
2172
    long_name = 'Happle Tea'
2173
    url = 'http://www.happletea.com'
2174
    get_first_comic_link = get_a_navi_navifirst
2175
    get_navi_link = get_link_rel_next
2176
2177
    @classmethod
2178
    def get_comic_info(cls, soup, link):
2179
        """Get information about a particular comics."""
2180
        imgs = soup.find('div', id='comic').find_all('img')
2181
        post = soup.find('div', class_='post-content')
2182
        title = post.find('h2', class_='post-title').string
2183
        author = post.find('a', rel='author').string
2184
        date_str = post.find('span', class_='post-date').string
2185
        day = string_to_date(date_str, "%B %d, %Y")
2186
        assert all(i['alt'] == i['title'] for i in imgs)
2187
        return {
2188
            'title': title,
2189
            'img': [i['src'] for i in imgs],
2190
            'alt': ''.join(i['alt'] for i in imgs),
2191
            'month': day.month,
2192
            'year': day.year,
2193
            'day': day.day,
2194
            'author': author,
2195
        }
2196
2197
2198
class FatAwesomeComics(GenericNavigableComic):
2199
    """Class to retrieve Fat Awesome Comics."""
2200
    # Also on http://fatawesomecomedy.tumblr.com
2201
    name = 'fatawesome'
2202
    long_name = 'Fat Awesome'
2203
    url = 'http://fatawesome.com/comics'
2204
    get_navi_link = get_a_rel_next
2205
    get_first_comic_link = simulate_first_link
2206
    first_url = 'http://fatawesome.com/shortbus/'
2207
2208
    @classmethod
2209
    def get_comic_info(cls, soup, link):
2210
        """Get information about a particular comics."""
2211
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2212
        description = soup.find('meta', attrs={'name': 'description'})['content']
2213
        tags_prop = soup.find('meta', property='article:tag')
2214
        tags = tags_prop['content'] if tags_prop else ""
2215
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2216
        day = string_to_date(date_str, "%Y-%m-%d")
2217
        imgs = soup.find_all('img', attrs={'data-recalc-dims': "1"})
2218
        assert len(imgs) == 1
2219
        return {
2220
            'title': title,
2221
            'description': description,
2222
            'tags': tags,
2223 View Code Duplication
            'alt': "".join(i['alt'] for i in imgs),
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2224
            'img': [i['src'].rsplit('?', 1)[0] for i in imgs],
2225
            'month': day.month,
2226
            'year': day.year,
2227
            'day': day.day,
2228
        }
2229
2230
2231
class AnythingComic(GenericListableComic):
2232
    """Class to retrieve Anything Comics."""
2233
    # Also on http://tapastic.com/series/anything
2234
    name = 'anythingcomic'
2235
    long_name = 'Anything Comic'
2236
    url = 'http://www.anythingcomic.com'
2237
2238
    @classmethod
2239
    def get_archive_elements(cls):
2240
        archive_url = urljoin_wrapper(cls.url, 'archive/')
2241
        # The first 2 <tr>'s do not correspond to comics
2242
        return get_soup_at_url(archive_url).find('table', id='chapter_table').find_all('tr')[2:]
2243
2244
    @classmethod
2245
    def get_url_from_archive_element(cls, tr):
2246
        """Get url corresponding to an archive element."""
2247
        td_num, td_comic, td_date, _ = tr.find_all('td')
2248
        link = td_comic.find('a')
2249
        return urljoin_wrapper(cls.url, link['href'])
2250
2251
    @classmethod
2252
    def get_comic_info(cls, soup, tr):
2253
        """Get information about a particular comics."""
2254
        td_num, td_comic, td_date, _ = tr.find_all('td')
2255
        num = int(td_num.string)
2256
        link = td_comic.find('a')
2257
        title = link.string
2258
        imgs = soup.find_all('img', id='comic_image')
2259
        date_str = td_date.string
2260
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y, %I:%M %p")
2261
        assert len(imgs) == 1
2262
        assert all(i.get('alt') == i.get('title') for i in imgs)
2263
        return {
2264
            'num': num,
2265
            'title': title,
2266
            'alt': imgs[0].get('alt', ''),
2267
            'img': [i['src'] for i in imgs],
2268
            'month': day.month,
2269
            'year': day.year,
2270
            'day': day.day,
2271
        }
2272
2273
2274
class LonnieMillsap(GenericNavigableComic):
2275
    """Class to retrieve Lonnie Millsap's comics."""
2276
    name = 'millsap'
2277
    long_name = 'Lonnie Millsap'
2278
    url = 'http://www.lonniemillsap.com'
2279
    get_navi_link = get_link_rel_next
2280
    get_first_comic_link = simulate_first_link
2281
    first_url = 'http://www.lonniemillsap.com/?p=42'
2282
2283
    @classmethod
2284
    def get_comic_info(cls, soup, link):
2285 View Code Duplication
        """Get information about a particular comics."""
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2286
        title = soup.find('h2', class_='post-title').string
2287
        post = soup.find('div', class_='post-content')
2288
        author = post.find("span", class_="post-author").find("a").string
2289
        date_str = post.find("span", class_="post-date").string
2290
        day = string_to_date(date_str, "%B %d, %Y")
2291
        imgs = post.find("div", class_="entry").find_all("img")
2292
        return {
2293
            'title': title,
2294
            'author': author,
2295
            'img': [i['src'] for i in imgs],
2296
            'month': day.month,
2297
            'year': day.year,
2298
            'day': day.day,
2299
        }
2300
2301
2302
class LinsEditions(GenericNavigableComic):
2303
    """Class to retrieve L.I.N.S. Editions comics."""
2304
    # Also on http://linscomics.tumblr.com
2305
    # Now on https://warandpeas.com
2306
    name = 'lins'
2307
    long_name = 'L.I.N.S. Editions'
2308
    url = 'https://linsedition.com'
2309
    _categories = ('LINS', )
2310
    get_navi_link = get_link_rel_next
2311
    get_first_comic_link = simulate_first_link
2312
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2313
2314
    @classmethod
2315
    def get_comic_info(cls, soup, link):
2316
        """Get information about a particular comics."""
2317
        title = soup.find('meta', property='og:title')['content']
2318
        imgs = soup.find_all('meta', property='og:image')
2319
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2320
        day = string_to_date(date_str, "%Y-%m-%d")
2321
        return {
2322
            'title': title,
2323
            'img': [i['content'] for i in imgs],
2324
            'month': day.month,
2325
            'year': day.year,
2326
            'day': day.day,
2327
        }
2328
2329
2330
class ThorsThundershack(GenericNavigableComic):
2331
    """Class to retrieve Thor's Thundershack comics."""
2332
    # Also on http://tapastic.com/series/Thors-Thundershac
2333
    name = 'thor'
2334
    long_name = 'Thor\'s Thundershack'
2335
    url = 'http://www.thorsthundershack.com'
2336
    _categories = ('THOR', )
2337
    get_url_from_link = join_cls_url_to_href
2338
2339
    @classmethod
2340
    def get_first_comic_link(cls):
2341
        """Get link to first comics."""
2342
        return get_soup_at_url(cls.url).find('a', class_='first navlink')
2343
2344
    @classmethod
2345
    def get_navi_link(cls, last_soup, next_):
2346
        """Get link to next or previous comic."""
2347
        for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
2348
            if link['href'] != '/comic':
2349
                return link
2350
        return None
2351
2352 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2353
    def get_comic_info(cls, soup, link):
2354
        """Get information about a particular comics."""
2355
        title = soup.find('meta', attrs={'name': 'description'})["content"]
2356
        description = soup.find('div', itemprop='articleBody').text
2357
        author = soup.find('span', itemprop='author copyrightHolder').string
2358
        imgs = soup.find_all('img', itemprop='image')
2359
        assert all(i['title'] == i['alt'] for i in imgs)
2360
        alt = imgs[0]['alt'] if imgs else ""
2361
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
2362
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2363
        return {
2364
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2365
            'month': day.month,
2366
            'year': day.year,
2367
            'day': day.day,
2368
            'author': author,
2369
            'title': title,
2370
            'alt': alt,
2371
            'description': description,
2372
        }
2373
2374
2375
class GerbilWithAJetpack(GenericNavigableComic):
2376
    """Class to retrieve GerbilWithAJetpack comics."""
2377
    name = 'gerbil'
2378
    long_name = 'Gerbil With A Jetpack'
2379
    url = 'http://gerbilwithajetpack.com'
2380
    get_first_comic_link = get_a_navi_navifirst
2381
    get_navi_link = get_a_rel_next
2382
2383
    @classmethod
2384
    def get_comic_info(cls, soup, link):
2385
        """Get information about a particular comics."""
2386
        title = soup.find('h2', class_='post-title').string
2387
        author = soup.find("span", class_="post-author").find("a").string
2388
        date_str = soup.find("span", class_="post-date").string
2389
        day = string_to_date(date_str, "%B %d, %Y")
2390
        imgs = soup.find("div", id="comic").find_all("img")
2391
        alt = imgs[0]['alt']
2392
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2393
        return {
2394
            'img': [i['src'] for i in imgs],
2395
            'title': title,
2396
            'alt': alt,
2397
            'author': author,
2398
            'day': day.day,
2399
            'month': day.month,
2400
            'year': day.year
2401
        }
2402
2403
2404
class EveryDayBlues(GenericNavigableComic):
2405
    """Class to retrieve EveryDayBlues Comics."""
2406
    name = "blues"
2407
    long_name = "Every Day Blues"
2408
    url = "http://everydayblues.net"
2409
    get_first_comic_link = get_a_navi_navifirst
2410
    get_navi_link = get_link_rel_next
2411
2412
    @classmethod
2413
    def get_comic_info(cls, soup, link):
2414
        """Get information about a particular comics."""
2415
        title = soup.find("h2", class_="post-title").string
2416
        author = soup.find("span", class_="post-author").find("a").string
2417
        date_str = soup.find("span", class_="post-date").string
2418
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2419
        imgs = soup.find("div", id="comic").find_all("img")
2420
        assert all(i['alt'] == i['title'] == title for i in imgs)
2421
        assert len(imgs) <= 1
2422
        return {
2423
            'img': [i['src'] for i in imgs],
2424
            'title': title,
2425
            'author': author,
2426
            'day': day.day,
2427
            'month': day.month,
2428
            'year': day.year
2429
        }
2430
2431 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2432
class BiterComics(GenericNavigableComic):
2433
    """Class to retrieve Biter Comics."""
2434
    name = "biter"
2435
    long_name = "Biter Comics"
2436
    url = "http://www.bitercomics.com"
2437
    get_first_comic_link = get_a_navi_navifirst
2438
    get_navi_link = get_link_rel_next
2439
2440
    @classmethod
2441
    def get_comic_info(cls, soup, link):
2442
        """Get information about a particular comics."""
2443
        title = soup.find("h1", class_="entry-title").string
2444
        author = soup.find("span", class_="author vcard").find("a").string
2445
        date_str = soup.find("span", class_="entry-date").string
2446
        day = string_to_date(date_str, "%B %d, %Y")
2447
        imgs = soup.find("div", id="comic").find_all("img")
2448
        assert all(i['alt'] == i['title'] for i in imgs)
2449
        assert len(imgs) == 1
2450
        alt = imgs[0]['alt']
2451
        return {
2452
            'img': [i['src'] for i in imgs],
2453
            'title': title,
2454
            'alt': alt,
2455
            'author': author,
2456
            'day': day.day,
2457
            'month': day.month,
2458
            'year': day.year
2459
        }
2460
2461
2462
class TheAwkwardYeti(GenericNavigableComic):
2463
    """Class to retrieve The Awkward Yeti comics."""
2464
    # Also on http://www.gocomics.com/the-awkward-yeti
2465
    # Also on http://larstheyeti.tumblr.com
2466
    # Also on https://tapastic.com/series/TheAwkwardYeti
2467
    name = 'yeti'
2468
    long_name = 'The Awkward Yeti'
2469
    url = 'http://theawkwardyeti.com'
2470
    _categories = ('YETI', )
2471
    get_first_comic_link = get_a_navi_navifirst
2472
    get_navi_link = get_link_rel_next
2473
2474
    @classmethod
2475
    def get_comic_info(cls, soup, link):
2476
        """Get information about a particular comics."""
2477
        title = soup.find('h2', class_='post-title').string
2478
        date_str = soup.find("span", class_="post-date").string
2479
        day = string_to_date(date_str, "%B %d, %Y")
2480
        imgs = soup.find("div", id="comic").find_all("img")
2481
        assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
2482
        return {
2483
            'img': [i['src'] for i in imgs],
2484
            'title': title,
2485
            'day': day.day,
2486
            'month': day.month,
2487
            'year': day.year
2488
        }
2489
2490
2491
class PleasantThoughts(GenericNavigableComic):
2492
    """Class to retrieve Pleasant Thoughts comics."""
2493
    name = 'pleasant'
2494
    long_name = 'Pleasant Thoughts'
2495
    url = 'http://pleasant-thoughts.com'
2496
    get_first_comic_link = get_a_navi_navifirst
2497
    get_navi_link = get_link_rel_next
2498
2499
    @classmethod
2500
    def get_comic_info(cls, soup, link):
2501
        """Get information about a particular comics."""
2502
        post = soup.find('div', class_='post-content')
2503
        title = post.find('h2', class_='post-title').string
2504
        imgs = post.find("div", class_="entry").find_all("img")
2505
        return {
2506
            'title': title,
2507
            'img': [i['src'] for i in imgs],
2508
        }
2509
2510 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2511
class MisterAndMe(GenericNavigableComic):
2512
    """Class to retrieve Mister & Me Comics."""
2513
    # Also on http://www.gocomics.com/mister-and-me
2514
    # Also on https://tapastic.com/series/Mister-and-Me
2515
    name = 'mister'
2516
    long_name = 'Mister & Me'
2517
    url = 'http://www.mister-and-me.com'
2518
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2519
    get_navi_link = get_link_rel_next
2520
2521
    @classmethod
2522
    def get_comic_info(cls, soup, link):
2523
        """Get information about a particular comics."""
2524
        title = soup.find('h2', class_='post-title').string
2525
        author = soup.find("span", class_="post-author").find("a").string
2526
        date_str = soup.find("span", class_="post-date").string
2527
        day = string_to_date(date_str, "%B %d, %Y")
2528
        imgs = soup.find("div", id="comic").find_all("img")
2529
        assert all(i['alt'] == i['title'] for i in imgs)
2530
        assert len(imgs) <= 1
2531
        alt = imgs[0]['alt'] if imgs else ""
2532
        return {
2533
            'img': [i['src'] for i in imgs],
2534
            'title': title,
2535
            'alt': alt,
2536
            'author': author,
2537
            'day': day.day,
2538
            'month': day.month,
2539
            'year': day.year
2540
        }
2541
2542
2543
class LastPlaceComics(GenericNavigableComic):
2544
    """Class to retrieve Last Place Comics."""
2545
    name = 'lastplace'
2546
    long_name = 'Last Place Comics'
2547
    url = "http://lastplacecomics.com"
2548
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2549
    get_navi_link = get_link_rel_next
2550
2551
    @classmethod
2552
    def get_comic_info(cls, soup, link):
2553
        """Get information about a particular comics."""
2554
        title = soup.find('h2', class_='post-title').string
2555
        author = soup.find("span", class_="post-author").find("a").string
2556
        date_str = soup.find("span", class_="post-date").string
2557
        day = string_to_date(date_str, "%B %d, %Y")
2558
        imgs = soup.find("div", id="comic").find_all("img")
2559
        assert all(i['alt'] == i['title'] for i in imgs)
2560
        assert len(imgs) <= 1
2561
        alt = imgs[0]['alt'] if imgs else ""
2562
        return {
2563
            'img': [i['src'] for i in imgs],
2564
            'title': title,
2565
            'alt': alt,
2566
            'author': author,
2567
            'day': day.day,
2568
            'month': day.month,
2569
            'year': day.year
2570
        }
2571
2572 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2573
class TalesOfAbsurdity(GenericNavigableComic):
2574
    """Class to retrieve Tales Of Absurdity comics."""
2575
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
2576
    # Also on http://talesofabsurdity.tumblr.com
2577
    name = 'absurdity'
2578
    long_name = 'Tales of Absurdity'
2579
    url = 'http://talesofabsurdity.com'
2580
    _categories = ('ABSURDITY', )
2581
    get_first_comic_link = get_a_navi_navifirst
2582
    get_navi_link = get_a_navi_comicnavnext_navinext
2583
2584
    @classmethod
2585
    def get_comic_info(cls, soup, link):
2586
        """Get information about a particular comics."""
2587
        title = soup.find('h2', class_='post-title').string
2588
        author = soup.find("span", class_="post-author").find("a").string
2589
        date_str = soup.find("span", class_="post-date").string
2590
        day = string_to_date(date_str, "%B %d, %Y")
2591
        imgs = soup.find("div", id="comic").find_all("img")
2592
        assert all(i['alt'] == i['title'] for i in imgs)
2593
        alt = imgs[0]['alt'] if imgs else ""
2594
        return {
2595
            'img': [i['src'] for i in imgs],
2596
            'title': title,
2597
            'alt': alt,
2598
            'author': author,
2599
            'day': day.day,
2600
            'month': day.month,
2601
            'year': day.year
2602
        }
2603
2604 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2605
class EndlessOrigami(GenericEmptyComic, GenericNavigableComic):
2606
    """Class to retrieve Endless Origami Comics."""
2607
    name = "origami"
2608
    long_name = "Endless Origami"
2609
    url = "http://endlessorigami.com"
2610
    get_first_comic_link = get_a_navi_navifirst
2611
    get_navi_link = get_link_rel_next
2612
2613
    @classmethod
2614
    def get_comic_info(cls, soup, link):
2615
        """Get information about a particular comics."""
2616
        title = soup.find('h2', class_='post-title').string
2617
        author = soup.find("span", class_="post-author").find("a").string
2618
        date_str = soup.find("span", class_="post-date").string
2619
        day = string_to_date(date_str, "%B %d, %Y")
2620
        imgs = soup.find("div", id="comic").find_all("img")
2621
        assert all(i['alt'] == i['title'] for i in imgs)
2622
        alt = imgs[0]['alt'] if imgs else ""
2623
        return {
2624
            'img': [i['src'] for i in imgs],
2625
            'title': title,
2626
            'alt': alt,
2627
            'author': author,
2628
            'day': day.day,
2629
            'month': day.month,
2630
            'year': day.year
2631
        }
2632
2633
2634
class PlanC(GenericNavigableComic):
2635
    """Class to retrieve Plan C comics."""
2636
    name = 'planc'
2637
    long_name = 'Plan C'
2638
    url = 'http://www.plancomic.com'
2639
    get_first_comic_link = get_a_navi_navifirst
2640
    get_navi_link = get_a_navi_comicnavnext_navinext
2641
2642
    @classmethod
2643
    def get_comic_info(cls, soup, link):
2644
        """Get information about a particular comics."""
2645
        title = soup.find('h2', class_='post-title').string
2646
        date_str = soup.find("span", class_="post-date").string
2647
        day = string_to_date(date_str, "%B %d, %Y")
2648
        imgs = soup.find('div', id='comic').find_all('img')
2649
        return {
2650
            'title': title,
2651
            'img': [i['src'] for i in imgs],
2652
            'month': day.month,
2653
            'year': day.year,
2654
            'day': day.day,
2655
        }
2656
2657
2658 View Code Duplication
class BuniComic(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2659
    """Class to retrieve Buni Comics."""
2660
    name = 'buni'
2661
    long_name = 'BuniComics'
2662
    url = 'http://www.bunicomic.com'
2663
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2664
    get_navi_link = get_link_rel_next
2665
2666
    @classmethod
2667
    def get_comic_info(cls, soup, link):
2668
        """Get information about a particular comics."""
2669
        imgs = soup.find('div', id='comic').find_all('img')
2670
        assert all(i['alt'] == i['title'] for i in imgs)
2671
        assert len(imgs) == 1
2672
        return {
2673
            'img': [i['src'] for i in imgs],
2674
            'title': imgs[0]['title'],
2675
        }
2676
2677
2678
class GenericCommitStrip(GenericNavigableComic):
2679
    """Generic class to retrieve Commit Strips in different languages."""
2680
    get_navi_link = get_a_rel_next
2681
    get_first_comic_link = simulate_first_link
2682
    first_url = NotImplemented
2683
2684
    @classmethod
2685
    def get_comic_info(cls, soup, link):
2686
        """Get information about a particular comics."""
2687
        desc = soup.find('meta', property='og:description')['content']
2688
        title = soup.find('meta', property='og:title')['content']
2689
        imgs = soup.find('div', class_='entry-content').find_all('img')
2690
        title2 = ' '.join(i.get('title', '') for i in imgs)
2691
        return {
2692
            'title': title,
2693
            'title2': title2,
2694
            'description': desc,
2695
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2696
        }
2697
2698
2699
class CommitStripFr(GenericCommitStrip):
2700
    """Class to retrieve Commit Strips in French."""
2701
    name = 'commit_fr'
2702
    long_name = 'Commit Strip (Fr)'
2703
    url = 'http://www.commitstrip.com/fr'
2704
    _categories = ('FRANCAIS', )
2705
    first_url = 'http://www.commitstrip.com/fr/2012/02/22/interview/'
2706
2707
2708
class CommitStripEn(GenericCommitStrip):
2709
    """Class to retrieve Commit Strips in English."""
2710
    name = 'commit_en'
2711
    long_name = 'Commit Strip (En)'
2712
    url = 'http://www.commitstrip.com/en'
2713
    first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'
2714
2715
2716
class GenericBoumerie(GenericNavigableComic):
2717
    """Generic class to retrieve Boumeries comics in different languages."""
2718
    get_first_comic_link = get_a_navi_navifirst
2719
    get_navi_link = get_link_rel_next
2720
    date_format = NotImplemented
2721
    lang = NotImplemented
2722
2723
    @classmethod
2724
    def get_comic_info(cls, soup, link):
2725
        """Get information about a particular comics."""
2726
        title = soup.find('h2', class_='post-title').string
2727
        short_url = soup.find('link', rel='shortlink')['href']
2728
        author = soup.find("span", class_="post-author").find("a").string
2729
        date_str = soup.find('span', class_='post-date').string
2730
        day = string_to_date(date_str, cls.date_format, cls.lang)
2731
        imgs = soup.find('div', id='comic').find_all('img')
2732
        assert all(i['alt'] == i['title'] for i in imgs)
2733
        return {
2734
            'short_url': short_url,
2735
            'img': [i['src'] for i in imgs],
2736
            'title': title,
2737
            'author': author,
2738
            'month': day.month,
2739
            'year': day.year,
2740
            'day': day.day,
2741
        }
2742
2743
2744
class BoumerieEn(GenericBoumerie):
2745
    """Class to retrieve Boumeries comics in English."""
2746
    name = 'boumeries_en'
2747
    long_name = 'Boumeries (En)'
2748
    url = 'http://comics.boumerie.com'
2749
    date_format = "%B %d, %Y"
2750
    lang = 'en_GB.UTF-8'
2751
2752
2753
class BoumerieFr(GenericBoumerie):
2754
    """Class to retrieve Boumeries comics in French."""
2755
    name = 'boumeries_fr'
2756
    long_name = 'Boumeries (Fr)'
2757
    url = 'http://bd.boumerie.com'
2758
    _categories = ('FRANCAIS', )
2759
    date_format = "%A, %d %B %Y"
2760
    lang = "fr_FR.utf8"
2761
2762
2763
class UnearthedComics(GenericNavigableComic):
2764
    """Class to retrieve Unearthed comics."""
2765
    # Also on http://tapastic.com/series/UnearthedComics
2766
    # Also on http://unearthedcomics.tumblr.com
2767
    name = 'unearthed'
2768
    long_name = 'Unearthed Comics'
2769
    url = 'http://unearthedcomics.com'
2770
    _categories = ('UNEARTHED', )
2771
    get_navi_link = get_link_rel_next
2772
    get_first_comic_link = simulate_first_link
2773
    first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'
2774
2775
    @classmethod
2776
    def get_comic_info(cls, soup, link):
2777
        """Get information about a particular comics."""
2778
        short_url = soup.find('link', rel='shortlink')['href']
2779
        title_elt = soup.find('h1') or soup.find('h2')
2780
        title = title_elt.string if title_elt else ""
2781
        desc = soup.find('meta', property='og:description')
2782
        date_str = soup.find('time', class_='published updated hidden')['datetime']
2783
        day = string_to_date(date_str, "%Y-%m-%d")
2784
        post = soup.find('div', class_="entry content entry-content type-portfolio")
2785
        imgs = post.find_all('img')
2786
        return {
2787
            'title': title,
2788
            'description': desc,
2789
            'url2': short_url,
2790
            'img': [i['src'] for i in imgs],
2791
            'month': day.month,
2792
            'year': day.year,
2793
            'day': day.day,
2794
        }
2795
2796 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2797
class Optipess(GenericNavigableComic):
2798
    """Class to retrieve Optipess comics."""
2799
    name = 'optipess'
2800
    long_name = 'Optipess'
2801
    url = 'http://www.optipess.com'
2802
    get_first_comic_link = get_a_navi_navifirst
2803
    get_navi_link = get_link_rel_next
2804
2805
    @classmethod
2806
    def get_comic_info(cls, soup, link):
2807
        """Get information about a particular comics."""
2808
        title = soup.find('h2', class_='post-title').string
2809
        author = soup.find("span", class_="post-author").find("a").string
2810
        comic = soup.find('div', id='comic')
2811
        imgs = comic.find_all('img') if comic else []
2812
        alt = imgs[0]['title'] if imgs else ""
2813
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2814
        date_str = soup.find('span', class_='post-date').string
2815
        day = string_to_date(date_str, "%B %d, %Y")
2816
        return {
2817
            'title': title,
2818
            'alt': alt,
2819
            'author': author,
2820
            'img': [i['src'] for i in imgs],
2821
            'month': day.month,
2822
            'year': day.year,
2823
            'day': day.day,
2824
        }
2825
2826
2827
class PainTrainComic(GenericNavigableComic):
2828
    """Class to retrieve Pain Train Comics."""
2829
    name = 'paintrain'
2830
    long_name = 'Pain Train Comics'
2831
    url = 'http://paintraincomic.com'
2832
    get_first_comic_link = get_a_navi_navifirst
2833
    get_navi_link = get_link_rel_next
2834
2835 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2836
    def get_comic_info(cls, soup, link):
2837
        """Get information about a particular comics."""
2838
        title = soup.find('h2', class_='post-title').string
2839
        short_url = soup.find('link', rel='shortlink')['href']
2840
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2841
        num = int(short_url_re.match(short_url).groups()[0])
2842
        imgs = soup.find('div', id='comic').find_all('img')
2843
        alt = imgs[0]['title']
2844
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2845
        date_str = soup.find('span', class_='post-date').string
2846
        day = string_to_date(date_str, "%d/%m/%Y")
2847
        return {
2848
            'short_url': short_url,
2849
            'num': num,
2850
            'img': [i['src'] for i in imgs],
2851
            'month': day.month,
2852
            'year': day.year,
2853
            'day': day.day,
2854
            'alt': alt,
2855
            'title': title,
2856
        }
2857
2858
2859
class MoonBeard(GenericNavigableComic):
2860
    """Class to retrieve MoonBeard comics."""
2861
    # Also on http://blog.squiresjam.es/moonbeard
2862
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
2863
    name = 'moonbeard'
2864
    long_name = 'Moon Beard'
2865
    url = 'http://moonbeard.com'
2866
    get_first_comic_link = get_a_navi_navifirst
2867
    get_navi_link = get_a_navi_navinext
2868
2869 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2870
    def get_comic_info(cls, soup, link):
2871
        """Get information about a particular comics."""
2872
        title = soup.find('h2', class_='post-title').string
2873
        short_url = soup.find('link', rel='shortlink')['href']
2874
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2875
        num = int(short_url_re.match(short_url).groups()[0])
2876
        imgs = soup.find('div', id='comic').find_all('img')
2877
        alt = imgs[0]['title']
2878
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2879
        date_str = soup.find('span', class_='post-date').string
2880
        day = string_to_date(date_str, "%B %d, %Y")
2881
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2882
        author = soup.find('span', class_='post-author').string
2883
        return {
2884
            'short_url': short_url,
2885
            'num': num,
2886
            'img': [i['src'] for i in imgs],
2887
            'month': day.month,
2888
            'year': day.year,
2889
            'day': day.day,
2890
            'title': title,
2891
            'tags': tags,
2892
            'alt': alt,
2893
            'author': author,
2894
        }
2895
2896
2897
class AHamADay(GenericNavigableComic):
2898
    """Class to retrieve class A Ham A Day comics."""
2899
    name = 'ham'
2900
    long_name = 'A Ham A Day'
2901
    url = 'http://www.ahammaday.com'
2902
    get_url_from_link = join_cls_url_to_href
2903
    get_first_comic_link = simulate_first_link
2904
    first_url = 'http://www.ahammaday.com/today/3/6/french'
2905
2906
    @classmethod
2907
    def get_navi_link(cls, last_soup, next_):
2908
        """Get link to next or previous comic."""
2909
        # prev is next / next is prev
2910
        return last_soup.find('li', class_='previous' if next_ else 'next').find('a')
2911
2912
    @classmethod
2913
    def get_comic_info(cls, soup, link):
2914
        """Get information about a particular comics."""
2915
        date_str = soup.find('time', class_='published')['datetime']
2916
        day = string_to_date(date_str, "%Y-%m-%d")
2917
        author = soup.find('span', class_='blog-author').find('a').string
2918
        title = soup.find('meta', property='og:title')['content']
2919
        imgs = soup.find_all('meta', itemprop='image')
2920
        return {
2921
            'img': [i['content'] for i in imgs],
2922
            'title': title,
2923
            'author': author,
2924
            'day': day.day,
2925
            'month': day.month,
2926
            'year': day.year,
2927
        }
2928
2929
2930
class LittleLifeLines(GenericNavigableComic):
2931
    """Class to retrieve Little Life Lines comics."""
2932
    # Also on https://little-life-lines.tumblr.com
2933
    name = 'life'
2934
    long_name = 'Little Life Lines'
2935
    url = 'http://www.littlelifelines.com'
2936
    get_url_from_link = join_cls_url_to_href
2937
    get_first_comic_link = simulate_first_link
2938
    first_url = 'http://www.littlelifelines.com/comics/well-done'
2939
2940
    @classmethod
2941
    def get_navi_link(cls, last_soup, next_):
2942
        """Get link to next or previous comic."""
2943
        # prev is next / next is prev
2944
        li = last_soup.find('li', class_='prev' if next_ else 'next')
2945
        return li.find('a') if li else None
2946
2947 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2948
    def get_comic_info(cls, soup, link):
2949
        """Get information about a particular comics."""
2950
        title = soup.find('meta', property='og:title')['content']
2951
        desc = soup.find('meta', property='og:description')['content']
2952
        date_str = soup.find('time', class_='published')['datetime']
2953
        day = string_to_date(date_str, "%Y-%m-%d")
2954
        author = soup.find('a', rel='author').string
2955
        div_content = soup.find('div', class_="body entry-content")
2956
        imgs = div_content.find_all('img')
2957
        imgs = [i for i in imgs if i.get('src') is not None]
2958
        alt = imgs[0]['alt']
2959
        return {
2960
            'title': title,
2961
            'alt': alt,
2962
            'description': desc,
2963
            'author': author,
2964
            'day': day.day,
2965
            'month': day.month,
2966
            'year': day.year,
2967
            'img': [i['src'] for i in imgs],
2968
        }
2969
2970
2971
class GenericWordPressInkblot(GenericNavigableComic):
2972
    """Generic class to retrieve comics using WordPress with Inkblot."""
2973
    get_navi_link = get_link_rel_next
2974
2975
    @classmethod
2976
    def get_first_comic_link(cls):
2977
        """Get link to first comics."""
2978
        return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')
2979
2980
    @classmethod
2981
    def get_comic_info(cls, soup, link):
2982
        """Get information about a particular comics."""
2983
        title = soup.find('meta', property='og:title')['content']
2984
        imgs = soup.find('div', class_='webcomic-image').find_all('img')
2985
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2986
        day = string_to_date(date_str, "%Y-%m-%d")
2987
        return {
2988
            'title': title,
2989
            'day': day.day,
2990
            'month': day.month,
2991
            'year': day.year,
2992
            'img': [i['src'] for i in imgs],
2993
        }
2994
2995
2996
class EverythingsStupid(GenericWordPressInkblot):
2997
    """Class to retrieve Everything's stupid Comics."""
2998
    # Also on http://tapastic.com/series/EverythingsStupid
2999
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
3000
    # Also on http://everythingsstupidcomics.tumblr.com
3001
    name = 'stupid'
3002
    long_name = "Everything's Stupid"
3003
    url = 'http://everythingsstupid.net'
3004
3005
3006
class TheIsmComics(GenericWordPressInkblot):
3007
    """Class to retrieve The Ism Comics."""
3008
    # Also on https://tapastic.com/series/TheIsm (?)
3009
    name = 'theism'
3010
    long_name = "The Ism"
3011
    url = 'http://www.theism-comics.com'
3012
3013
3014
class WoodenPlankStudios(GenericWordPressInkblot):
3015
    """Class to retrieve Wooden Plank Studios comics."""
3016
    name = 'woodenplank'
3017
    long_name = 'Wooden Plank Studios'
3018
    url = 'http://woodenplankstudios.com'
3019
3020
3021
class ElectricBunnyComic(GenericNavigableComic):
3022
    """Class to retrieve Electric Bunny Comics."""
3023
    # Also on http://electricbunnycomics.tumblr.com
3024
    name = 'bunny'
3025
    long_name = 'Electric Bunny Comic'
3026
    url = 'http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell'
3027
    get_url_from_link = join_cls_url_to_href
3028
3029
    @classmethod
3030
    def get_first_comic_link(cls):
3031
        """Get link to first comics."""
3032
        return get_soup_at_url(cls.url).find('img', alt='First').parent
3033
3034
    @classmethod
3035
    def get_navi_link(cls, last_soup, next_):
3036
        """Get link to next or previous comic."""
3037
        img = last_soup.find('img', alt='Next' if next_ else 'Back')
3038
        return img.parent if img else None
3039
3040
    @classmethod
3041
    def get_comic_info(cls, soup, link):
3042
        """Get information about a particular comics."""
3043
        title = soup.find('meta', property='og:title')['content']
3044
        imgs = soup.find_all('meta', property='og:image')
3045
        return {
3046
            'title': title,
3047
            'img': [i['content'] for i in imgs],
3048
        }
3049
3050
3051
class SheldonComics(GenericNavigableComic):
3052
    """Class to retrieve Sheldon comics."""
3053
    # Also on http://www.gocomics.com/sheldon
3054
    name = 'sheldon'
3055
    long_name = 'Sheldon Comics'
3056
    url = 'http://www.sheldoncomics.com'
3057
3058
    @classmethod
3059
    def get_first_comic_link(cls):
3060
        """Get link to first comics."""
3061
        return get_soup_at_url(cls.url).find("a", id="nav-first")
3062
3063
    @classmethod
3064
    def get_navi_link(cls, last_soup, next_):
3065
        """Get link to next or previous comic."""
3066
        for link in last_soup.find_all("a", id="nav-next" if next_ else "nav-prev"):
3067
            if link['href'] != 'http://www.sheldoncomics.com':
3068
                return link
3069
        return None
3070
3071
    @classmethod
3072
    def get_comic_info(cls, soup, link):
3073
        """Get information about a particular comics."""
3074
        imgs = soup.find("div", id="comic-foot").find_all("img")
3075
        assert all(i['alt'] == i['title'] for i in imgs)
3076
        assert len(imgs) == 1
3077
        title = imgs[0]['title']
3078
        return {
3079
            'title': title,
3080
            'img': [i['src'] for i in imgs],
3081
        }
3082
3083
3084
class Ubertool(GenericNavigableComic):
3085
    """Class to retrieve Ubertool comics."""
3086
    # Also on http://ubertool.tumblr.com
3087
    # Also on https://tapastic.com/series/ubertool
3088
    name = 'ubertool'
3089
    long_name = 'Ubertool'
3090
    url = 'http://ubertoolcomic.com'
3091
    _categories = ('UBERTOOL', )
3092
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
3093
    get_navi_link = get_a_comicnavbase_comicnavnext
3094
3095
    @classmethod
3096
    def get_comic_info(cls, soup, link):
3097
        """Get information about a particular comics."""
3098
        title = soup.find('h2', class_='post-title').string
3099
        date_str = soup.find('span', class_='post-date').string
3100
        day = string_to_date(date_str, "%B %d, %Y")
3101
        imgs = soup.find('div', id='comic').find_all('img')
3102
        return {
3103
            'img': [i['src'] for i in imgs],
3104
            'title': title,
3105
            'month': day.month,
3106
            'year': day.year,
3107
            'day': day.day,
3108
        }
3109
3110
3111
class EarthExplodes(GenericNavigableComic):
3112
    """Class to retrieve The Earth Explodes comics."""
3113
    name = 'earthexplodes'
3114
    long_name = 'The Earth Explodes'
3115
    url = 'http://www.earthexplodes.com'
3116
    get_url_from_link = join_cls_url_to_href
3117
    get_first_comic_link = simulate_first_link
3118
    first_url = 'http://www.earthexplodes.com/comics/000/'
3119
3120
    @classmethod
3121
    def get_navi_link(cls, last_soup, next_):
3122
        """Get link to next or previous comic."""
3123
        return last_soup.find('a', id='next' if next_ else 'prev')
3124
3125
    @classmethod
3126
    def get_comic_info(cls, soup, link):
3127
        """Get information about a particular comics."""
3128
        title = soup.find('title').string
3129
        imgs = soup.find('div', id='image').find_all('img')
3130
        alt = imgs[0].get('title', '')
3131
        return {
3132
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3133
            'title': title,
3134
            'alt': alt,
3135
        }
3136
3137
3138
class CubeDrone(GenericNavigableComic):
3139
    """Class to retrieve Cube Drone comics."""
3140
    name = 'cubedrone'
3141
    long_name = 'Cube Drone'
3142
    url = 'http://cube-drone.com/comics'
3143
    get_url_from_link = join_cls_url_to_href
3144
3145
    @classmethod
3146
    def get_first_comic_link(cls):
3147
        """Get link to first comics."""
3148
        return get_soup_at_url(cls.url).find('span', class_='glyphicon glyphicon-backward').parent
3149
3150
    @classmethod
3151
    def get_navi_link(cls, last_soup, next_):
3152
        """Get link to next or previous comic."""
3153
        class_ = 'glyphicon glyphicon-chevron-' + ('right' if next_ else 'left')
3154
        return last_soup.find('span', class_=class_).parent
3155
3156
    @classmethod
3157
    def get_comic_info(cls, soup, link):
3158
        """Get information about a particular comics."""
3159
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
3160
        url2 = soup.find('meta', attrs={'name': 'twitter:url'})['content']
3161
        # date_str = soup.find('h2', class_='comic_title').find('small').string
3162
        # day = string_to_date(date_str, "%B %d, %Y, %I:%M %p")
3163
        imgs = soup.find_all('img', class_='comic img-responsive')
3164
        title2 = imgs[0]['title']
3165
        alt = imgs[0]['alt']
3166
        return {
3167
            'url2': url2,
3168
            'title': title,
3169
            'title2': title2,
3170
            'alt': alt,
3171
            'img': [i['src'] for i in imgs],
3172
        }
3173
3174
3175
class MakeItStoopid(GenericNavigableComic):
3176
    """Class to retrieve Make It Stoopid Comics."""
3177
    name = 'stoopid'
3178
    long_name = 'Make it stoopid'
3179
    url = 'http://makeitstoopid.com/comic.php'
3180
3181
    @classmethod
3182
    def get_nav(cls, soup):
3183
        """Get the navigation elements from soup object."""
3184
        cnav = soup.find_all(class_='cnav')
3185
        nav1, nav2 = cnav[:5], cnav[5:]
3186
        assert nav1 == nav2
3187
        # begin, prev, archive, next_, end = nav1
3188
        return [None if i.get('href') is None else i for i in nav1]
3189
3190
    @classmethod
3191
    def get_first_comic_link(cls):
3192
        """Get link to first comics."""
3193
        return cls.get_nav(get_soup_at_url(cls.url))[0]
3194 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3195
    @classmethod
3196
    def get_navi_link(cls, last_soup, next_):
3197
        """Get link to next or previous comic."""
3198
        return cls.get_nav(last_soup)[3 if next_ else 1]
3199
3200
    @classmethod
3201
    def get_comic_info(cls, soup, link):
3202
        """Get information about a particular comics."""
3203
        title = link['title']
3204
        imgs = soup.find_all('img', id='comicimg')
3205
        return {
3206
            'title': title,
3207
            'img': [i['src'] for i in imgs],
3208
        }
3209
3210
3211
class MarketoonistComics(GenericNavigableComic):
3212
    """Class to retrieve Marketoonist Comics."""
3213
    name = 'marketoonist'
3214
    long_name = 'Marketoonist'
3215
    url = 'https://marketoonist.com/cartoons'
3216
    get_first_comic_link = simulate_first_link
3217
    get_navi_link = get_link_rel_next
3218
    first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'
3219
3220
    @classmethod
3221
    def get_comic_info(cls, soup, link):
3222
        """Get information about a particular comics."""
3223
        imgs = soup.find_all('meta', property='og:image')
3224
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3225
        day = string_to_date(date_str, "%Y-%m-%d")
3226
        title = soup.find('meta', property='og:title')['content']
3227
        return {
3228
            'img': [i['content'] for i in imgs],
3229
            'day': day.day,
3230
            'month': day.month,
3231
            'year': day.year,
3232
            'title': title,
3233
        }
3234
3235
3236
class ConsoliaComics(GenericNavigableComic):
3237
    """Class to retrieve Consolia comics."""
3238
    name = 'consolia'
3239
    long_name = 'consolia'
3240
    url = 'https://consolia-comic.com'
3241
    get_url_from_link = join_cls_url_to_href
3242
3243
    @classmethod
3244
    def get_first_comic_link(cls):
3245
        """Get link to first comics."""
3246
        return get_soup_at_url(cls.url).find('a', class_='first')
3247
3248
    @classmethod
3249
    def get_navi_link(cls, last_soup, next_):
3250
        """Get link to next or previous comic."""
3251
        return last_soup.find('a', class_='next' if next_ else 'prev')
3252
3253
    @classmethod
3254 View Code Duplication
    def get_comic_info(cls, soup, link):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3255
        """Get information about a particular comics."""
3256
        title = soup.find('meta', property='og:title')['content']
3257
        date_str = soup.find('time')["datetime"]
3258
        day = string_to_date(date_str, "%Y-%m-%d")
3259
        imgs = soup.find_all('meta', property='og:image')
3260
        return {
3261
            'title': title,
3262
            'img': [i['content'] for i in imgs],
3263
            'day': day.day,
3264
            'month': day.month,
3265
            'year': day.year,
3266
        }
3267
3268
3269
class TuMourrasMoinsBete(GenericNavigableComic):
3270
    """Class to retrieve Tu Mourras Moins Bete comics."""
3271
    name = 'mourrasmoinsbete'
3272
    long_name = 'Tu Mourras Moins Bete'
3273
    url = 'http://tumourrasmoinsbete.blogspot.fr'
3274
    _categories = ('FRANCAIS', )
3275
    get_first_comic_link = simulate_first_link
3276
    first_url = 'http://tumourrasmoinsbete.blogspot.fr/2008/06/essai.html'
3277
3278
    @classmethod
3279
    def get_navi_link(cls, last_soup, next_):
3280
        """Get link to next or previous comic."""
3281
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3282
3283
    @classmethod
3284
    def get_comic_info(cls, soup, link):
3285
        """Get information about a particular comics."""
3286
        title = soup.find('title').string
3287
        imgs = soup.find('div', itemprop='description articleBody').find_all('img')
3288
        author = soup.find('span', itemprop='author').string
3289
        return {
3290
            'img': [i['src'] for i in imgs],
3291
            'author': author,
3292
            'title': title,
3293
        }
3294
3295
3296
class GeekAndPoke(GenericNavigableComic):
3297
    """Class to retrieve Geek And Poke comics."""
3298
    name = 'geek'
3299
    long_name = 'Geek And Poke'
3300
    url = 'http://geek-and-poke.com'
3301
    get_url_from_link = join_cls_url_to_href
3302
    get_first_comic_link = simulate_first_link
3303
    first_url = 'http://geek-and-poke.com/geekandpoke/2006/8/27/a-new-place-for-a-not-so-old-blog.html'
3304
3305
    @classmethod
3306
    def get_navi_link(cls, last_soup, next_):
3307
        """Get link to next or previous comic."""
3308
        return last_soup.find('a', class_='prev-item' if next_ else 'next-item')
3309
3310
    @classmethod
3311
    def get_comic_info(cls, soup, link):
3312
        """Get information about a particular comics."""
3313
        title = soup.find('meta', property='og:title')['content']
3314
        desc = soup.find('meta', property='og:description')['content']
3315
        date_str = soup.find('time', class_='published')['datetime']
3316
        day = string_to_date(date_str, "%Y-%m-%d")
3317
        author = soup.find('a', rel='author').string
3318
        div_content = (soup.find('div', class_="body entry-content") or
3319
                       soup.find('div', class_="special-content"))
3320
        imgs = div_content.find_all('img')
3321 View Code Duplication
        imgs = [i for i in imgs if i.get('src') is not None]
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3322
        assert all('title' not in i or i['alt'] == i['title'] for i in imgs)
3323
        alt = imgs[0].get('alt', "") if imgs else []
3324
        return {
3325
            'title': title,
3326
            'alt': alt,
3327
            'description': desc,
3328
            'author': author,
3329
            'day': day.day,
3330
            'month': day.month,
3331
            'year': day.year,
3332
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3333
        }
3334
3335
3336
class GloryOwlComix(GenericNavigableComic):
3337
    """Class to retrieve Glory Owl comics."""
3338
    name = 'gloryowl'
3339
    long_name = 'Glory Owl'
3340
    url = 'http://gloryowlcomix.blogspot.fr'
3341
    _categories = ('NSFW', 'FRANCAIS')
3342
    get_first_comic_link = simulate_first_link
3343
    first_url = 'http://gloryowlcomix.blogspot.fr/2013/02/1_7.html'
3344
3345
    @classmethod
3346
    def get_navi_link(cls, last_soup, next_):
3347
        """Get link to next or previous comic."""
3348
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3349
3350
    @classmethod
3351
    def get_comic_info(cls, soup, link):
3352
        """Get information about a particular comics."""
3353
        title = soup.find('title').string
3354
        imgs = soup.find_all('link', rel='image_src')
3355
        author = soup.find('a', rel='author').string
3356
        return {
3357
            'img': [i['href'] for i in imgs],
3358
            'author': author,
3359
            'title': title,
3360
        }
3361
3362
3363
class GenericTumblrV1(GenericComic):
3364
    """Generic class to retrieve comics from Tumblr using the V1 API."""
3365
    _categories = ('TUMBLR', )
3366
3367
    @classmethod
3368
    def get_next_comic(cls, last_comic):
3369
        """Generic implementation of get_next_comic for Tumblr comics."""
3370
        for p in cls.get_posts(last_comic):
3371
            comic = cls.get_comic_info(p)
3372
            if comic is not None:
3373
                yield comic
3374
3375
    @classmethod
3376
    def get_url_from_post(cls, post):
3377
        return post['url']
3378
3379
    @classmethod
3380
    def get_api_url(cls):
3381
        return urljoin_wrapper(cls.url, '/api/read/')
3382
3383
    @classmethod
3384
    def get_comic_info(cls, post):
3385
        """Get information about a particular comics."""
3386
        type_ = post['type']
3387
        if type_ != 'photo':
3388
            return None
3389
        tumblr_id = int(post['id'])
3390
        api_url = cls.get_api_url() + '?id=%d' % (tumblr_id)
3391
        day = datetime.datetime.fromtimestamp(int(post['unix-timestamp'])).date()
3392
        caption = post.find('photo-caption')
3393
        title = caption.string if caption else ""
3394
        tags = ' '.join(t.string for t in post.find_all('tag'))
3395
        # Photos may appear in 'photo' tags and/or straight in the post
3396
        photo_tags = post.find_all('photo')
3397
        if not photo_tags:
3398
            photo_tags = [post]
3399
        # Images are in multiple resolutions - taking the first one
3400
        imgs = [photo.find('photo-url') for photo in photo_tags]
3401
        return {
3402
            'url': cls.get_url_from_post(post),
3403
            'url2': post['url-with-slug'],
3404
            'day': day.day,
3405
            'month': day.month,
3406
            'year': day.year,
3407
            'title': title,
3408
            'tags': tags,
3409
            'img': [i.string for i in imgs],
3410
            'tumblr-id': tumblr_id,
3411
            'api_url': api_url,
3412
        }
3413
3414
    @classmethod
3415
    def get_posts(cls, last_comic, nb_post_per_call=10):
3416
        """Get posts using API. nb_post_per_call is max 50.
3417
3418
        Posts are retrieved from newer to older as per the tumblr v1 api
3419
        but are returned in chronological order."""
3420
        waiting_for_url = last_comic['url'] if last_comic else None
3421
        posts_acc = []
3422
        if last_comic is not None:
3423
            # Sometimes, tumblr posts are deleted. When previous post is deleted, we
3424
            # might end up spending a lot of time looking for something that
3425
            # doesn't exist. Failing early and clearly might be a better option.
3426
            last_api_url = last_comic['api_url']
3427
            try:
3428
                get_soup_at_url(last_api_url)
3429
            except urllib.error.HTTPError:
3430
                try:
3431
                    get_soup_at_url(cls.url)
3432
                except urllib.error.HTTPError:
3433
                    print("Did not find previous post nor main url %s" % cls.url)
3434
                else:
3435
                    print("Did not find previous post %s : it might have been deleted" % last_api_url)
3436
                return reversed(posts_acc)
3437
        api_url = cls.get_api_url()
3438
        posts = get_soup_at_url(api_url).find('posts')
3439
        start, total = int(posts['start']), int(posts['total'])
3440
        assert start == 0
3441
        for starting_num in range(0, total, nb_post_per_call):
3442
            api_url2 = api_url + '?start=%d&num=%d' % (starting_num, nb_post_per_call)
3443
            posts2 = get_soup_at_url(api_url2).find('posts')
3444
            start2, total2 = int(posts2['start']), int(posts2['total'])
3445
            assert starting_num == start2, "%d != %d" % (starting_num, start2)
3446
            # This may happen and should be handled in the future
3447
            assert total == total2, "%d != %d" % (total, total2)
3448
            for p in posts2.find_all('post'):
3449
                if waiting_for_url and waiting_for_url == cls.get_url_from_post(p):
3450
                    return reversed(posts_acc)
3451
                posts_acc.append(p)
3452
        if waiting_for_url is None:
3453
            return reversed(posts_acc)
3454
        print("Did not find %s : there might be a problem" % waiting_for_url)
3455
        return []
3456
3457
3458
class SaturdayMorningBreakfastCerealTumblr(GenericTumblrV1):
3459
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
3460
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
3461
    # Also on http://www.smbc-comics.com
3462
    name = 'smbc-tumblr'
3463
    long_name = 'Saturday Morning Breakfast Cereal (from Tumblr)'
3464
    url = 'http://smbc-comics.tumblr.com'
3465
    _categories = ('SMBC', )
3466
3467
3468
class IrwinCardozo(GenericTumblrV1):
3469
    """Class to retrieve Irwin Cardozo Comics."""
3470
    name = 'irwinc'
3471
    long_name = 'Irwin Cardozo'
3472
    url = 'http://irwincardozocomics.tumblr.com'
3473
3474
3475
class AccordingToDevin(GenericTumblrV1):
3476
    """Class to retrieve According To Devin comics."""
3477
    name = 'devin'
3478
    long_name = 'According To Devin'
3479
    url = 'http://accordingtodevin.tumblr.com'
3480
3481
3482
class ItsTheTieTumblr(GenericTumblrV1):
3483
    """Class to retrieve It's the tie comics."""
3484
    # Also on http://itsthetie.com
3485
    # Also on https://tapastic.com/series/itsthetie
3486
    name = 'tie-tumblr'
3487
    long_name = "It's the tie (from Tumblr)"
3488
    url = "http://itsthetie.tumblr.com"
3489
    _categories = ('TIE', )
3490
3491
3492
class OctopunsTumblr(GenericTumblrV1):
3493
    """Class to retrieve Octopuns comics."""
3494
    # Also on http://www.octopuns.net
3495
    name = 'octopuns-tumblr'
3496
    long_name = 'Octopuns (from Tumblr)'
3497
    url = 'http://octopuns.tumblr.com'
3498
3499
3500
class PicturesInBoxesTumblr(GenericTumblrV1):
3501
    """Class to retrieve Pictures In Boxes comics."""
3502
    # Also on http://www.picturesinboxes.com
3503
    name = 'picturesinboxes-tumblr'
3504
    long_name = 'Pictures in Boxes (from Tumblr)'
3505
    url = 'http://picturesinboxescomic.tumblr.com'
3506
3507
3508
class TubeyToonsTumblr(GenericTumblrV1):
3509
    """Class to retrieve TubeyToons comics."""
3510
    # Also on http://tapastic.com/series/Tubey-Toons
3511
    # Also on http://tubeytoons.com
3512
    name = 'tubeytoons-tumblr'
3513
    long_name = 'Tubey Toons (from Tumblr)'
3514
    url = 'http://tubeytoons.tumblr.com'
3515
    _categories = ('TUNEYTOONS', )
3516
3517
3518
class UnearthedComicsTumblr(GenericTumblrV1):
3519
    """Class to retrieve Unearthed comics."""
3520
    # Also on http://tapastic.com/series/UnearthedComics
3521
    # Also on http://unearthedcomics.com
3522
    name = 'unearthed-tumblr'
3523
    long_name = 'Unearthed Comics (from Tumblr)'
3524
    url = 'http://unearthedcomics.tumblr.com'
3525
    _categories = ('UNEARTHED', )
3526
3527
3528
class PieComic(GenericTumblrV1):
3529
    """Class to retrieve Pie Comic comics."""
3530
    name = 'pie'
3531
    long_name = 'Pie Comic'
3532
    url = "http://piecomic.tumblr.com"
3533
3534
3535
class MrEthanDiamond(GenericTumblrV1):
3536
    """Class to retrieve Mr Ethan Diamond comics."""
3537
    name = 'diamond'
3538
    long_name = 'Mr Ethan Diamond'
3539
    url = 'http://mrethandiamond.tumblr.com'
3540
3541
3542
class Flocci(GenericTumblrV1):
3543
    """Class to retrieve floccinaucinihilipilification comics."""
3544
    name = 'flocci'
3545
    long_name = 'floccinaucinihilipilification'
3546
    url = "http://floccinaucinihilipilificationa.tumblr.com"
3547
3548
3549
class UpAndOut(GenericTumblrV1):
3550
    """Class to retrieve Up & Out comics."""
3551
    # Also on http://tapastic.com/series/UP-and-OUT
3552
    name = 'upandout'
3553
    long_name = 'Up And Out (from Tumblr)'
3554
    url = 'http://upandoutcomic.tumblr.com'
3555
3556
3557
class Pundemonium(GenericTumblrV1):
3558
    """Class to retrieve Pundemonium comics."""
3559
    name = 'pundemonium'
3560
    long_name = 'Pundemonium'
3561
    url = 'http://monstika.tumblr.com'
3562
3563
3564
class PoorlyDrawnLinesTumblr(GenericTumblrV1):
3565
    """Class to retrieve Poorly Drawn Lines comics."""
3566
    # Also on http://poorlydrawnlines.com
3567
    name = 'poorlydrawn-tumblr'
3568
    long_name = 'Poorly Drawn Lines (from Tumblr)'
3569
    url = 'http://pdlcomics.tumblr.com'
3570
    _categories = ('POORLYDRAWN', )
3571
3572
3573
class PearShapedComics(GenericTumblrV1):
3574
    """Class to retrieve Pear Shaped Comics."""
3575
    name = 'pearshaped'
3576
    long_name = 'Pear-Shaped Comics'
3577
    url = 'http://pearshapedcomics.com'
3578
3579
3580
class PondScumComics(GenericTumblrV1):
3581
    """Class to retrieve Pond Scum Comics."""
3582
    name = 'pond'
3583
    long_name = 'Pond Scum'
3584
    url = 'http://pondscumcomic.tumblr.com'
3585
3586
3587
class MercworksTumblr(GenericTumblrV1):
3588
    """Class to retrieve Mercworks comics."""
3589
    # Also on http://mercworks.net
3590
    name = 'mercworks-tumblr'
3591
    long_name = 'Mercworks (from Tumblr)'
3592
    url = 'http://mercworks.tumblr.com'
3593
3594
3595
class OwlTurdTumblr(GenericTumblrV1):
3596
    """Class to retrieve Owl Turd comics."""
3597
    # Also on http://tapastic.com/series/Owl-Turd-Comix
3598
    name = 'owlturd-tumblr'
3599
    long_name = 'Owl Turd (from Tumblr)'
3600
    url = 'http://owlturd.com'
3601
    _categories = ('OWLTURD', )
3602
3603
3604
class VectorBelly(GenericTumblrV1):
3605
    """Class to retrieve Vector Belly comics."""
3606
    # Also on http://vectorbelly.com
3607
    name = 'vector'
3608
    long_name = 'Vector Belly'
3609
    url = 'http://vectorbelly.tumblr.com'
3610
3611
3612
class GoneIntoRapture(GenericTumblrV1):
3613
    """Class to retrieve Gone Into Rapture comics."""
3614
    # Also on http://goneintorapture.tumblr.com
3615
    # Also on http://tapastic.com/series/Goneintorapture
3616
    name = 'rapture'
3617
    long_name = 'Gone Into Rapture'
3618
    url = 'http://www.goneintorapture.com'
3619
3620
3621
class TheOatmealTumblr(GenericTumblrV1):
3622
    """Class to retrieve The Oatmeal comics."""
3623
    # Also on http://theoatmeal.com
3624
    name = 'oatmeal-tumblr'
3625
    long_name = 'The Oatmeal (from Tumblr)'
3626
    url = 'http://oatmeal.tumblr.com'
3627
3628
3629
class HeckIfIKnowComicsTumblr(GenericTumblrV1):
3630
    """Class to retrieve Heck If I Know Comics."""
3631
    # Also on http://tapastic.com/series/Regular
3632
    name = 'heck-tumblr'
3633
    long_name = 'Heck if I Know comics (from Tumblr)'
3634
    url = 'http://heckifiknowcomics.com'
3635
3636
3637
class MyJetPack(GenericTumblrV1):
3638
    """Class to retrieve My Jet Pack comics."""
3639
    name = 'jetpack'
3640
    long_name = 'My Jet Pack'
3641
    url = 'http://myjetpack.tumblr.com'
3642
3643
3644
class CheerUpEmoKidTumblr(GenericTumblrV1):
3645
    """Class to retrieve CheerUpEmoKid comics."""
3646
    # Also on http://www.cheerupemokid.com
3647
    # Also on http://tapastic.com/series/CUEK
3648
    name = 'cuek-tumblr'
3649
    long_name = 'Cheer Up Emo Kid (from Tumblr)'
3650
    url = 'http://enzocomics.tumblr.com'
3651
3652
3653
class ForLackOfABetterComic(GenericTumblrV1):
3654
    """Class to retrieve For Lack Of A Better Comics."""
3655
    # Also on http://forlackofabettercomic.com
3656
    name = 'lack'
3657
    long_name = 'For Lack Of A Better Comic'
3658
    url = 'http://forlackofabettercomic.tumblr.com'
3659
3660
3661
class ZenPencilsTumblr(GenericTumblrV1):
3662
    """Class to retrieve ZenPencils comics."""
3663
    # Also on http://zenpencils.com
3664
    # Also on http://www.gocomics.com/zen-pencils
3665
    name = 'zenpencils-tumblr'
3666
    long_name = 'Zen Pencils (from Tumblr)'
3667
    url = 'http://zenpencils.tumblr.com'
3668
    _categories = ('ZENPENCILS', )
3669
3670
3671
class ThreeWordPhraseTumblr(GenericTumblrV1):
3672
    """Class to retrieve Three Word Phrase comics."""
3673
    # Also on http://threewordphrase.com
3674
    name = 'threeword-tumblr'
3675
    long_name = 'Three Word Phrase (from Tumblr)'
3676
    url = 'http://www.threewordphrase.tumblr.com'
3677
3678
3679
class TimeTrabbleTumblr(GenericTumblrV1):
3680
    """Class to retrieve Time Trabble comics."""
3681
    # Also on http://timetrabble.com
3682
    name = 'timetrabble-tumblr'
3683
    long_name = 'Time Trabble (from Tumblr)'
3684
    url = 'http://timetrabble.tumblr.com'
3685
3686
3687
class SafelyEndangeredTumblr(GenericTumblrV1):
3688
    """Class to retrieve Safely Endangered comics."""
3689
    # Also on http://www.safelyendangered.com
3690
    name = 'endangered-tumblr'
3691
    long_name = 'Safely Endangered (from Tumblr)'
3692
    url = 'http://tumblr.safelyendangered.com'
3693
3694
3695
class MouseBearComedyTumblr(GenericTumblrV1):
3696
    """Class to retrieve Mouse Bear Comedy comics."""
3697
    # Also on http://www.mousebearcomedy.com
3698
    name = 'mousebear-tumblr'
3699
    long_name = 'Mouse Bear Comedy (from Tumblr)'
3700
    url = 'http://mousebearcomedy.tumblr.com'
3701
3702
3703
class BouletCorpTumblr(GenericTumblrV1):
3704
    """Class to retrieve BouletCorp comics."""
3705
    # Also on http://www.bouletcorp.com
3706
    name = 'boulet-tumblr'
3707
    long_name = 'Boulet Corp (from Tumblr)'
3708
    url = 'http://bouletcorp.tumblr.com'
3709
    _categories = ('BOULET', )
3710
3711
3712
class TheAwkwardYetiTumblr(GenericTumblrV1):
3713
    """Class to retrieve The Awkward Yeti comics."""
3714
    # Also on http://www.gocomics.com/the-awkward-yeti
3715
    # Also on http://theawkwardyeti.com
3716
    # Also on https://tapastic.com/series/TheAwkwardYeti
3717
    name = 'yeti-tumblr'
3718
    long_name = 'The Awkward Yeti (from Tumblr)'
3719
    url = 'http://larstheyeti.tumblr.com'
3720
    _categories = ('YETI', )
3721
3722
3723
class NellucNhoj(GenericTumblrV1):
3724
    """Class to retrieve NellucNhoj comics."""
3725
    name = 'nhoj'
3726
    long_name = 'Nelluc Nhoj'
3727
    url = 'http://nellucnhoj.com'
3728
3729
3730
class DownTheUpwardSpiralTumblr(GenericTumblrV1):
3731
    """Class to retrieve Down The Upward Spiral comics."""
3732
    # Also on http://www.downtheupwardspiral.com
3733
    name = 'spiral-tumblr'
3734
    long_name = 'Down the Upward Spiral (from Tumblr)'
3735
    url = 'http://downtheupwardspiral.tumblr.com'
3736
3737
3738
class AsPerUsualTumblr(GenericTumblrV1):
3739
    """Class to retrieve As Per Usual comics."""
3740
    # Also on https://tapastic.com/series/AsPerUsual
3741
    name = 'usual-tumblr'
3742
    long_name = 'As Per Usual (from Tumblr)'
3743
    url = 'http://as-per-usual.tumblr.com'
3744
    categories = ('DAMILEE', )
3745
3746
3747
class HotComicsForCoolPeopleTumblr(GenericTumblrV1):
3748
    """Class to retrieve Hot Comics For Cool People."""
3749
    # Also on https://tapastic.com/series/Hot-Comics-For-Cool-People
3750
    # Also on http://hotcomics.biz (links to tumblr)
3751
    # Also on http://hcfcp.com (links to tumblr)
3752
    name = 'hotcomics-tumblr'
3753
    long_name = 'Hot Comics For Cool People (from Tumblr)'
3754
    url = 'http://hotcomicsforcoolpeople.tumblr.com'
3755
    categories = ('DAMILEE', )
3756
3757
3758
class OneOneOneOneComicTumblr(GenericTumblrV1):
3759
    """Class to retrieve 1111 Comics."""
3760
    # Also on http://www.1111comics.me
3761
    # Also on https://tapastic.com/series/1111-Comics
3762
    name = '1111-tumblr'
3763
    long_name = '1111 Comics (from Tumblr)'
3764
    url = 'http://comics1111.tumblr.com'
3765
    _categories = ('ONEONEONEONE', )
3766
3767
3768
class JhallComicsTumblr(GenericTumblrV1):
3769
    """Class to retrieve Jhall Comics."""
3770
    # Also on http://jhallcomics.com
3771
    name = 'jhall-tumblr'
3772
    long_name = 'Jhall Comics (from Tumblr)'
3773
    url = 'http://jhallcomics.tumblr.com'
3774
3775
3776
class BerkeleyMewsTumblr(GenericTumblrV1):
3777
    """Class to retrieve Berkeley Mews comics."""
3778
    # Also on http://www.gocomics.com/berkeley-mews
3779
    # Also on http://www.berkeleymews.com
3780
    name = 'berkeley-tumblr'
3781
    long_name = 'Berkeley Mews (from Tumblr)'
3782
    url = 'http://mews.tumblr.com'
3783
    _categories = ('BERKELEY', )
3784
3785
3786
class JoanCornellaTumblr(GenericTumblrV1):
3787
    """Class to retrieve Joan Cornella comics."""
3788
    # Also on http://joancornella.net
3789
    name = 'cornella-tumblr'
3790
    long_name = 'Joan Cornella (from Tumblr)'
3791
    url = 'http://cornellajoan.tumblr.com'
3792
3793
3794
class RespawnComicTumblr(GenericTumblrV1):
3795
    """Class to retrieve Respawn Comic."""
3796
    # Also on http://respawncomic.com
3797
    name = 'respawn-tumblr'
3798
    long_name = 'Respawn Comic (from Tumblr)'
3799
    url = 'http://respawncomic.tumblr.com'
3800
3801
3802
class ChrisHallbeckTumblr(GenericTumblrV1):
3803
    """Class to retrieve Chris Hallbeck comics."""
3804
    # Also on https://tapastic.com/ChrisHallbeck
3805
    # Also on http://maximumble.com
3806
    # Also on http://minimumble.com
3807
    # Also on http://thebookofbiff.com
3808
    name = 'hallbeck-tumblr'
3809
    long_name = 'Chris Hallback (from Tumblr)'
3810
    url = 'http://chrishallbeck.tumblr.com'
3811
    _categories = ('HALLBACK', )
3812
3813
3814
class ComicNuggets(GenericTumblrV1):
3815
    """Class to retrieve Comic Nuggets."""
3816
    name = 'nuggets'
3817
    long_name = 'Comic Nuggets'
3818
    url = 'http://comicnuggets.com'
3819
3820
3821
class PigeonGazetteTumblr(GenericTumblrV1):
3822
    """Class to retrieve The Pigeon Gazette comics."""
3823
    # Also on https://tapastic.com/series/The-Pigeon-Gazette
3824
    name = 'pigeon-tumblr'
3825
    long_name = 'The Pigeon Gazette (from Tumblr)'
3826
    url = 'http://thepigeongazette.tumblr.com'
3827
3828
3829
class CancerOwl(GenericTumblrV1):
3830
    """Class to retrieve Cancer Owl comics."""
3831
    # Also on http://cancerowl.com
3832
    name = 'cancerowl-tumblr'
3833
    long_name = 'Cancer Owl (from Tumblr)'
3834
    url = 'http://cancerowl.tumblr.com'
3835
3836
3837
class FowlLanguageTumblr(GenericTumblrV1):
3838
    """Class to retrieve Fowl Language comics."""
3839
    # Also on http://www.fowllanguagecomics.com
3840
    # Also on http://tapastic.com/series/Fowl-Language-Comics
3841
    # Also on http://www.gocomics.com/fowl-language
3842
    name = 'fowllanguage-tumblr'
3843
    long_name = 'Fowl Language Comics (from Tumblr)'
3844
    url = 'http://fowllanguagecomics.tumblr.com'
3845
    _categories = ('FOWLLANGUAGE', )
3846
3847
3848
class TheOdd1sOutTumblr(GenericTumblrV1):
3849
    """Class to retrieve The Odd 1s Out comics."""
3850
    # Also on http://theodd1sout.com
3851
    # Also on https://tapastic.com/series/Theodd1sout
3852
    name = 'theodd-tumblr'
3853
    long_name = 'The Odd 1s Out (from Tumblr)'
3854
    url = 'http://theodd1sout.tumblr.com'
3855
3856
3857
class TheUnderfoldTumblr(GenericTumblrV1):
3858
    """Class to retrieve The Underfold comics."""
3859
    # Also on http://theunderfold.com
3860
    name = 'underfold-tumblr'
3861
    long_name = 'The Underfold (from Tumblr)'
3862
    url = 'http://theunderfold.tumblr.com'
3863
3864
3865
class LolNeinTumblr(GenericTumblrV1):
3866
    """Class to retrieve Lol Nein comics."""
3867
    # Also on http://lolnein.com
3868
    name = 'lolnein-tumblr'
3869
    long_name = 'Lol Nein (from Tumblr)'
3870
    url = 'http://lolneincom.tumblr.com'
3871
3872
3873
class FatAwesomeComicsTumblr(GenericTumblrV1):
3874
    """Class to retrieve Fat Awesome Comics."""
3875
    # Also on http://fatawesome.com/comics
3876
    name = 'fatawesome-tumblr'
3877
    long_name = 'Fat Awesome (from Tumblr)'
3878
    url = 'http://fatawesomecomedy.tumblr.com'
3879
3880
3881
class TheWorldIsFlatTumblr(GenericTumblrV1):
3882
    """Class to retrieve The World Is Flat Comics."""
3883
    # Also on https://tapastic.com/series/The-World-is-Flat
3884
    name = 'flatworld-tumblr'
3885
    long_name = 'The World Is Flat (from Tumblr)'
3886
    url = 'http://theworldisflatcomics.tumblr.com'
3887
3888
3889
class DorrisMc(GenericTumblrV1):
3890
    """Class to retrieve Dorris Mc Comics"""
3891
    # Also on http://www.gocomics.com/dorris-mccomics
3892
    name = 'dorrismc'
3893
    long_name = 'Dorris Mc'
3894
    url = 'http://dorrismccomics.com'
3895
3896
3897
class LeleozTumblr(GenericEmptyComic, GenericTumblrV1):
3898
    """Class to retrieve Leleoz comics."""
3899
    # Also on https://tapastic.com/series/Leleoz
3900
    name = 'leleoz-tumblr'
3901
    long_name = 'Leleoz (from Tumblr)'
3902
    url = 'http://leleozcomics.tumblr.com'
3903
3904
3905
class MoonBeardTumblr(GenericTumblrV1):
3906
    """Class to retrieve MoonBeard comics."""
3907
    # Also on http://moonbeard.com
3908
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
3909
    name = 'moonbeard-tumblr'
3910
    long_name = 'Moon Beard (from Tumblr)'
3911
    url = 'http://blog.squiresjam.es/moonbeard'
3912
3913
3914
class AComik(GenericTumblrV1):
3915
    """Class to retrieve A Comik"""
3916
    name = 'comik'
3917
    long_name = 'A Comik'
3918
    url = 'http://acomik.com'
3919
3920
3921
class ClassicRandy(GenericTumblrV1):
3922
    """Class to retrieve Classic Randy comics."""
3923
    name = 'randy'
3924
    long_name = 'Classic Randy'
3925
    url = 'http://classicrandy.tumblr.com'
3926
3927
3928
class DagssonTumblr(GenericTumblrV1):
3929
    """Class to retrieve Dagsson comics."""
3930
    # Also on http://www.dagsson.com
3931
    name = 'dagsson-tumblr'
3932
    long_name = 'Dagsson Hugleikur (from Tumblr)'
3933
    url = 'http://hugleikurdagsson.tumblr.com'
3934
3935
3936
class LinsEditionsTumblr(GenericTumblrV1):
3937
    """Class to retrieve L.I.N.S. Editions comics."""
3938
    # Also on https://linsedition.com
3939
    # Now on http://warandpeas.tumblr.com
3940
    name = 'lins-tumblr'
3941
    long_name = 'L.I.N.S. Editions (from Tumblr)'
3942
    url = 'http://linscomics.tumblr.com'
3943
    _categories = ('LINS', )
3944
3945
3946
class WarAndPeasTumblr(GenericTumblrV1):
3947
    """Class to retrieve War And Peas comics."""
3948
    # Was on http://linscomics.tumblr.com
3949
    name = 'warandpeas-tumblr'
3950
    long_name = 'War And Peas (from Tumblr)'
3951
    url = 'http://warandpeas.tumblr.com'
3952
    _categories = ('WARANDPEAS', )
3953
3954
3955
class OrigamiHotDish(GenericTumblrV1):
3956
    """Class to retrieve Origami Hot Dish comics."""
3957
    name = 'origamihotdish'
3958
    long_name = 'Origami Hot Dish'
3959
    url = 'http://origamihotdish.com'
3960
3961
3962
class HitAndMissComicsTumblr(GenericTumblrV1):
3963
    """Class to retrieve Hit and Miss Comics."""
3964
    name = 'hitandmiss'
3965
    long_name = 'Hit and Miss Comics'
3966
    url = 'http://hitandmisscomics.tumblr.com'
3967
3968
3969
class HMBlanc(GenericTumblrV1):
3970
    """Class to retrieve HM Blanc comics."""
3971
    name = 'hmblanc'
3972
    long_name = 'HM Blanc'
3973
    url = 'http://hmblanc.tumblr.com'
3974
3975
3976
class TalesOfAbsurdityTumblr(GenericTumblrV1):
3977
    """Class to retrieve Tales Of Absurdity comics."""
3978
    # Also on http://talesofabsurdity.com
3979
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
3980
    name = 'absurdity-tumblr'
3981
    long_name = 'Tales of Absurdity (from Tumblr)'
3982
    url = 'http://talesofabsurdity.tumblr.com'
3983
    _categories = ('ABSURDITY', )
3984
3985
3986
class RobbieAndBobby(GenericTumblrV1):
3987
    """Class to retrieve Robbie And Bobby comics."""
3988
    # Also on http://robbieandbobby.com
3989
    name = 'robbie-tumblr'
3990
    long_name = 'Robbie And Bobby (from Tumblr)'
3991
    url = 'http://robbieandbobby.tumblr.com'
3992
3993
3994
class ElectricBunnyComicTumblr(GenericTumblrV1):
3995
    """Class to retrieve Electric Bunny Comics."""
3996
    # Also on http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell
3997
    name = 'bunny-tumblr'
3998
    long_name = 'Electric Bunny Comic (from Tumblr)'
3999
    url = 'http://electricbunnycomics.tumblr.com'
4000
4001
4002
class Hoomph(GenericTumblrV1):
4003
    """Class to retrieve Hoomph comics."""
4004
    name = 'hoomph'
4005
    long_name = 'Hoomph'
4006
    url = 'http://hoom.ph'
4007
4008
4009
class BFGFSTumblr(GenericTumblrV1):
4010
    """Class to retrieve BFGFS comics."""
4011
    # Also on https://tapastic.com/series/BFGFS
4012
    # Also on http://bfgfs.com
4013
    name = 'bfgfs-tumblr'
4014
    long_name = 'BFGFS (from Tumblr)'
4015
    url = 'http://bfgfs.tumblr.com'
4016
4017
4018
class DoodleForFood(GenericTumblrV1):
4019
    """Class to retrieve Doodle For Food comics."""
4020
    # Also on http://doodleforfood.com
4021
    name = 'doodle'
4022
    long_name = 'Doodle For Food'
4023
    url = 'http://doodleforfood.com'
4024
4025
4026
class CassandraCalinTumblr(GenericTumblrV1):
4027
    """Class to retrieve C. Cassandra comics."""
4028
    # Also on http://cassandracalin.com
4029
    # Also on https://tapastic.com/series/C-Cassandra-comics
4030
    name = 'cassandra-tumblr'
4031
    long_name = 'Cassandra Calin (from Tumblr)'
4032
    url = 'http://c-cassandra.tumblr.com'
4033
4034
4035
class DougWasTaken(GenericTumblrV1):
4036
    """Class to retrieve Doug Was Taken comics."""
4037
    name = 'doug'
4038
    long_name = 'Doug Was Taken'
4039
    url = 'http://dougwastaken.tumblr.com'
4040
4041
4042
class MandatoryRollerCoaster(GenericTumblrV1):
4043
    """Class to retrieve Mandatory Roller Coaster comics."""
4044
    name = 'rollercoaster'
4045
    long_name = 'Mandatory Roller Coaster'
4046
    url = 'http://mandatoryrollercoaster.com'
4047
4048
4049
class CEstPasEnRegardantSesPompes(GenericTumblrV1):
4050
    """Class to retrieve C'Est Pas En Regardant Ses Pompes (...)  comics."""
4051
    name = 'cperspqccltt'
4052
    long_name = 'C Est Pas En Regardant Ses Pompes (...)'
4053
    url = 'http://cperspqccltt.tumblr.com'
4054
4055
4056
class TheGrohlTroll(GenericTumblrV1):
4057
    """Class to retrieve The Grohl Troll comics."""
4058
    name = 'grohltroll'
4059
    long_name = 'The Grohl Troll'
4060
    url = 'http://thegrohltroll.com'
4061
4062
4063
class WebcomicName(GenericTumblrV1):
4064
    """Class to retrieve Webcomic Name comics."""
4065
    name = 'webcomicname'
4066
    long_name = 'Webcomic Name'
4067
    url = 'http://webcomicname.com'
4068
4069
4070
class BooksOfAdam(GenericTumblrV1):
4071
    """Class to retrieve Books of Adam comics."""
4072
    # Also on http://www.booksofadam.com
4073
    name = 'booksofadam'
4074
    long_name = 'Books of Adam'
4075
    url = 'http://booksofadam.tumblr.com'
4076
4077
4078
class HarkAVagrant(GenericTumblrV1):
4079
    """Class to retrieve Hark A Vagrant comics."""
4080
    # Also on http://www.harkavagrant.com
4081
    name = 'hark-tumblr'
4082
    long_name = 'Hark A Vagrant (from Tumblr)'
4083
    url = 'http://beatonna.tumblr.com'
4084
4085
4086
class OurSuperAdventureTumblr(GenericTumblrV1):
4087
    """Class to retrieve Our Super Adventure comics."""
4088
    # Also on https://tapastic.com/series/Our-Super-Adventure
4089
    # Also on http://www.oursuperadventure.com
4090
    # http://sarahgraley.com
4091
    name = 'superadventure-tumblr'
4092
    long_name = 'Our Super Adventure (from Tumblr)'
4093
    url = 'http://sarahssketchbook.tumblr.com'
4094
4095
4096
class JakeLikesOnions(GenericTumblrV1):
4097
    """Class to retrieve Jake Likes Onions comics."""
4098
    name = 'jake'
4099
    long_name = 'Jake Likes Onions'
4100
    url = 'http://jakelikesonions.com'
4101
4102
4103
class InYourFaceCake(GenericTumblrV1):
4104
    """Class to retrieve In Your Face Cake comics."""
4105
    name = 'inyourfacecake-tumblr'
4106
    long_name = 'In Your Face Cake (from Tumblr)'
4107
    url = 'http://in-your-face-cake.tumblr.com'
4108
4109
4110
class Robospunk(GenericTumblrV1):
4111
    """Class to retrieve Robospunk comics."""
4112
    name = 'robospunk'
4113
    long_name = 'Robospunk'
4114
    url = 'http://robospunk.com'
4115
4116
4117
class BananaTwinky(GenericTumblrV1):
4118
    """Class to retrieve Banana Twinky comics."""
4119
    name = 'banana'
4120
    long_name = 'Banana Twinky'
4121
    url = 'http://bananatwinky.tumblr.com'
4122
4123
4124
class YesterdaysPopcornTumblr(GenericTumblrV1):
4125
    """Class to retrieve Yesterday's Popcorn comics."""
4126
    # Also on http://www.yesterdayspopcorn.com
4127
    # Also on https://tapastic.com/series/Yesterdays-Popcorn
4128
    name = 'popcorn-tumblr'
4129
    long_name = 'Yesterday\'s Popcorn (from Tumblr)'
4130
    url = 'http://yesterdayspopcorn.tumblr.com'
4131
4132
4133
class TwistedDoodles(GenericTumblrV1):
4134
    """Class to retrieve Twisted Doodles comics."""
4135
    name = 'twisted'
4136
    long_name = 'Twisted Doodles'
4137
    url = 'http://www.twisteddoodles.com'
4138
4139
4140
class UbertoolTumblr(GenericTumblrV1):
4141
    """Class to retrieve Ubertool comics."""
4142
    # Also on http://ubertoolcomic.com
4143
    # Also on https://tapastic.com/series/ubertool
4144
    name = 'ubertool-tumblr'
4145
    long_name = 'Ubertool (from Tumblr)'
4146
    url = 'http://ubertool.tumblr.com'
4147
    _categories = ('UBERTOOL', )
4148
4149
4150
class LittleLifeLinesTumblr(GenericTumblrV1):
4151
    """Class to retrieve Little Life Lines comics."""
4152
    # Also on http://www.littlelifelines.com
4153
    name = 'life-tumblr'
4154
    long_name = 'Little Life Lines (from Tumblr)'
4155
    url = 'https://little-life-lines.tumblr.com'
4156
4157
4158
class TheyCanTalk(GenericTumblrV1):
4159
    """Class to retrieve They Can Talk comics."""
4160
    name = 'theycantalk'
4161
    long_name = 'They Can Talk'
4162
    url = 'http://theycantalk.com'
4163
4164
4165
class Will5NeverCome(GenericTumblrV1):
4166
    """Class to retrieve Will 5:00 Never Come comics."""
4167
    name = 'will5'
4168
    long_name = 'Will 5:00 Never Come ?'
4169
    url = 'http://will5nevercome.com'
4170
4171
4172
class Sephko(GenericTumblrV1):
4173
    """Class to retrieve Sephko Comics."""
4174
    # Also on http://www.sephko.com
4175
    name = 'sephko'
4176
    long_name = 'Sephko'
4177
    url = 'http://sephko.tumblr.com'
4178
4179
4180
class BlazersAtDawn(GenericTumblrV1):
4181
    """Class to retrieve Blazers At Dawn Comics."""
4182
    name = 'blazers'
4183
    long_name = 'Blazers At Dawn'
4184
    url = 'http://blazersatdawn.tumblr.com'
4185
4186
4187
class ArtByMoga(GenericEmptyComic, GenericTumblrV1):  # Deactivated because it downloads too many things
4188
    """Class to retrieve Art By Moga Comics."""
4189
    name = 'moga'
4190
    long_name = 'Art By Moga'
4191
    url = 'http://artbymoga.tumblr.com'
4192
4193
4194
class VerbalVomitTumblr(GenericTumblrV1):
4195
    """Class to retrieve Verbal Vomit comics."""
4196
    # Also on http://www.verbal-vomit.com
4197
    name = 'vomit-tumblr'
4198
    long_name = 'Verbal Vomit (from Tumblr)'
4199
    url = 'http://verbalvomits.tumblr.com'
4200
4201
4202
class LibraryComic(GenericTumblrV1):
4203
    """Class to retrieve LibraryComic."""
4204
    # Also on http://librarycomic.com
4205
    name = 'library-tumblr'
4206
    long_name = 'LibraryComic (from Tumblr)'
4207
    url = 'http://librarycomic.tumblr.com'
4208
4209
4210
class TizzyStitchBirdTumblr(GenericTumblrV1):
4211
    """Class to retrieve Tizzy Stitch Bird comics."""
4212
    # Also on http://tizzystitchbird.com
4213
    # Also on https://tapastic.com/series/TizzyStitchbird
4214
    # Also on http://www.webtoons.com/en/challenge/tizzy-stitchbird/list?title_no=50082
4215
    name = 'tizzy-tumblr'
4216
    long_name = 'Tizzy Stitch Bird (from Tumblr)'
4217
    url = 'http://tizzystitchbird.tumblr.com'
4218
4219
4220
class VictimsOfCircumsolarTumblr(GenericTumblrV1):
4221
    """Class to retrieve VictimsOfCircumsolar comics."""
4222
    # Also on http://www.victimsofcircumsolar.com
4223
    name = 'circumsolar-tumblr'
4224
    long_name = 'Victims Of Circumsolar (from Tumblr)'
4225
    url = 'http://victimsofcomics.tumblr.com'
4226
4227
4228
class HorovitzComics(GenericListableComic):
4229
    """Generic class to handle the logic common to the different comics from Horovitz."""
4230
    url = 'http://www.horovitzcomics.com'
4231
    _categories = ('HOROVITZ', )
4232
    img_re = re.compile('.*comics/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
4233
    link_re = NotImplemented
4234
    get_url_from_archive_element = join_cls_url_to_href
4235
4236
    @classmethod
4237
    def get_comic_info(cls, soup, link):
4238
        """Get information about a particular comics."""
4239
        href = link['href']
4240
        num = int(cls.link_re.match(href).groups()[0])
4241
        title = link.string
4242
        imgs = soup.find_all('img', id='comic')
4243
        assert len(imgs) == 1
4244
        year, month, day = [int(s)
4245
                            for s in cls.img_re.match(imgs[0]['src']).groups()]
4246
        return {
4247
            'title': title,
4248
            'day': day,
4249
            'month': month,
4250
            'year': year,
4251
            'img': [i['src'] for i in imgs],
4252
            'num': num,
4253
        }
4254
4255
    @classmethod
4256
    def get_archive_elements(cls):
4257
        archive_url = 'http://www.horovitzcomics.com/comics/archive/'
4258
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.link_re))
4259
4260
4261
class HorovitzNew(HorovitzComics):
4262
    """Class to retrieve Horovitz new comics."""
4263
    name = 'horovitznew'
4264
    long_name = 'Horovitz New'
4265
    link_re = re.compile('^/comics/new/([0-9]+)$')
4266
4267
4268
class HorovitzClassic(HorovitzComics):
4269
    """Class to retrieve Horovitz classic comics."""
4270
    name = 'horovitzclassic'
4271
    long_name = 'Horovitz Classic'
4272
    link_re = re.compile('^/comics/classic/([0-9]+)$')
4273
4274
4275
class GenericGoComic(GenericNavigableComic):
4276
    """Generic class to handle the logic common to comics from gocomics.com."""
4277
    _categories = ('GOCOMIC', )
4278
4279
    @classmethod
4280
    def get_first_comic_link(cls):
4281
        """Get link to first comics."""
4282
        return get_soup_at_url(cls.url).find('a', class_='fa btn btn-outline-default btn-circle fa-backward sm ')
4283
4284
    @classmethod
4285
    def get_navi_link(cls, last_soup, next_):
4286
        """Get link to next or previous comic."""
4287
        PREV = 'fa btn btn-outline-default btn-circle fa-caret-left sm '
4288
        NEXT = 'fa btn btn-outline-default btn-circle fa-caret-right sm '
4289
        return last_soup.find('a', class_=NEXT if next_ else PREV)
4290
4291
    @classmethod
4292
    def get_url_from_link(cls, link):
4293
        gocomics = 'http://www.gocomics.com'
4294
        return urljoin_wrapper(gocomics, link['href'])
4295
4296
    @classmethod
4297
    def get_comic_info(cls, soup, link):
4298
        """Get information about a particular comics."""
4299
        date_str = soup.find('meta', property='article:published_time')['content']
4300
        day = string_to_date(date_str, "%Y-%m-%d")
4301
        imgs = soup.find('picture', class_='img-fluid item-comic-image').find_all('img')
4302
        author = soup.find('meta', property='article:author')['content']
4303
        tags = soup.find('meta', property='article:tag')['content']
4304
        return {
4305
            'day': day.day,
4306
            'month': day.month,
4307
            'year': day.year,
4308
            'img': [i['src'] for i in imgs],
4309
            'author': author,
4310
            'tags': tags,
4311
        }
4312
4313
4314
class PearlsBeforeSwine(GenericGoComic):
4315
    """Class to retrieve Pearls Before Swine comics."""
4316
    name = 'pearls'
4317
    long_name = 'Pearls Before Swine'
4318
    url = 'http://www.gocomics.com/pearlsbeforeswine'
4319
4320
4321
class Peanuts(GenericGoComic):
4322
    """Class to retrieve Peanuts comics."""
4323
    name = 'peanuts'
4324
    long_name = 'Peanuts'
4325
    url = 'http://www.gocomics.com/peanuts'
4326
4327
4328
class MattWuerker(GenericGoComic):
4329
    """Class to retrieve Matt Wuerker comics."""
4330
    name = 'wuerker'
4331
    long_name = 'Matt Wuerker'
4332
    url = 'http://www.gocomics.com/mattwuerker'
4333
4334
4335
class TomToles(GenericGoComic):
4336
    """Class to retrieve Tom Toles comics."""
4337
    name = 'toles'
4338
    long_name = 'Tom Toles'
4339
    url = 'http://www.gocomics.com/tomtoles'
4340
4341
4342
class BreakOfDay(GenericGoComic):
4343
    """Class to retrieve Break Of Day comics."""
4344
    name = 'breakofday'
4345
    long_name = 'Break Of Day'
4346
    url = 'http://www.gocomics.com/break-of-day'
4347
4348
4349
class Brevity(GenericGoComic):
4350
    """Class to retrieve Brevity comics."""
4351
    name = 'brevity'
4352
    long_name = 'Brevity'
4353
    url = 'http://www.gocomics.com/brevitypanel'
4354
4355
4356
class MichaelRamirez(GenericGoComic):
4357
    """Class to retrieve Michael Ramirez comics."""
4358
    name = 'ramirez'
4359
    long_name = 'Michael Ramirez'
4360
    url = 'http://www.gocomics.com/michaelramirez'
4361
4362
4363
class MikeLuckovich(GenericGoComic):
4364
    """Class to retrieve Mike Luckovich comics."""
4365
    name = 'luckovich'
4366
    long_name = 'Mike Luckovich'
4367
    url = 'http://www.gocomics.com/mikeluckovich'
4368
4369
4370
class JimBenton(GenericGoComic):
4371
    """Class to retrieve Jim Benton comics."""
4372
    # Also on http://jimbenton.tumblr.com
4373
    name = 'benton'
4374
    long_name = 'Jim Benton'
4375
    url = 'http://www.gocomics.com/jim-benton-cartoons'
4376
4377
4378
class TheArgyleSweater(GenericGoComic):
4379
    """Class to retrieve the Argyle Sweater comics."""
4380
    name = 'argyle'
4381
    long_name = 'Argyle Sweater'
4382
    url = 'http://www.gocomics.com/theargylesweater'
4383
4384
4385
class SunnyStreet(GenericGoComic):
4386
    """Class to retrieve Sunny Street comics."""
4387
    # Also on http://www.sunnystreetcomics.com
4388
    name = 'sunny'
4389
    long_name = 'Sunny Street'
4390
    url = 'http://www.gocomics.com/sunny-street'
4391
4392
4393
class OffTheMark(GenericGoComic):
4394
    """Class to retrieve Off The Mark comics."""
4395
    # Also on https://www.offthemark.com
4396
    name = 'offthemark'
4397
    long_name = 'Off The Mark'
4398
    url = 'http://www.gocomics.com/offthemark'
4399
4400
4401
class WuMo(GenericGoComic):
4402
    """Class to retrieve WuMo comics."""
4403
    # Also on http://wumo.com
4404
    name = 'wumo'
4405
    long_name = 'WuMo'
4406
    url = 'http://www.gocomics.com/wumo'
4407
4408
4409
class LunarBaboon(GenericGoComic):
4410
    """Class to retrieve Lunar Baboon comics."""
4411
    # Also on http://www.lunarbaboon.com
4412
    # Also on https://tapastic.com/series/Lunarbaboon
4413
    name = 'lunarbaboon'
4414
    long_name = 'Lunar Baboon'
4415
    url = 'http://www.gocomics.com/lunarbaboon'
4416
4417
4418
class SandersenGocomic(GenericGoComic):
4419
    """Class to retrieve Sarah Andersen comics."""
4420
    # Also on http://sarahcandersen.com
4421
    # Also on http://tapastic.com/series/Doodle-Time
4422
    name = 'sandersen-goc'
4423
    long_name = 'Sarah Andersen (from GoComics)'
4424
    url = 'http://www.gocomics.com/sarahs-scribbles'
4425
4426
4427
class SaturdayMorningBreakfastCerealGoComic(GenericGoComic):
4428
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
4429
    # Also on http://smbc-comics.tumblr.com
4430
    # Also on http://www.smbc-comics.com
4431
    name = 'smbc-goc'
4432
    long_name = 'Saturday Morning Breakfast Cereal (from GoComics)'
4433
    url = 'http://www.gocomics.com/saturday-morning-breakfast-cereal'
4434
    _categories = ('SMBC', )
4435
4436
4437
class CalvinAndHobbesGoComic(GenericGoComic):
4438
    """Class to retrieve Calvin and Hobbes comics."""
4439
    # From gocomics, not http://marcel-oehler.marcellosendos.ch/comics/ch/
4440
    name = 'calvin-goc'
4441
    long_name = 'Calvin and Hobbes (from GoComics)'
4442
    url = 'http://www.gocomics.com/calvinandhobbes'
4443
4444
4445
class RallGoComic(GenericGoComic):
4446
    """Class to retrieve Ted Rall comics."""
4447
    # Also on http://rall.com/comic
4448
    name = 'rall-goc'
4449
    long_name = "Ted Rall (from GoComics)"
4450
    url = "http://www.gocomics.com/ted-rall"
4451
    _categories = ('RALL', )
4452
4453
4454
class TheAwkwardYetiGoComic(GenericGoComic):
4455
    """Class to retrieve The Awkward Yeti comics."""
4456
    # Also on http://larstheyeti.tumblr.com
4457
    # Also on http://theawkwardyeti.com
4458
    # Also on https://tapastic.com/series/TheAwkwardYeti
4459
    name = 'yeti-goc'
4460
    long_name = 'The Awkward Yeti (from GoComics)'
4461
    url = 'http://www.gocomics.com/the-awkward-yeti'
4462
    _categories = ('YETI', )
4463
4464
4465
class BerkeleyMewsGoComics(GenericGoComic):
4466
    """Class to retrieve Berkeley Mews comics."""
4467
    # Also on http://mews.tumblr.com
4468
    # Also on http://www.berkeleymews.com
4469
    name = 'berkeley-goc'
4470
    long_name = 'Berkeley Mews (from GoComics)'
4471
    url = 'http://www.gocomics.com/berkeley-mews'
4472
    _categories = ('BERKELEY', )
4473
4474
4475
class SheldonGoComics(GenericGoComic):
4476
    """Class to retrieve Sheldon comics."""
4477
    # Also on http://www.sheldoncomics.com
4478
    name = 'sheldon-goc'
4479
    long_name = 'Sheldon Comics (from GoComics)'
4480
    url = 'http://www.gocomics.com/sheldon'
4481
4482
4483
class FowlLanguageGoComics(GenericGoComic):
4484
    """Class to retrieve Fowl Language comics."""
4485
    # Also on http://www.fowllanguagecomics.com
4486
    # Also on http://tapastic.com/series/Fowl-Language-Comics
4487
    # Also on http://fowllanguagecomics.tumblr.com
4488
    name = 'fowllanguage-goc'
4489
    long_name = 'Fowl Language Comics (from GoComics)'
4490
    url = 'http://www.gocomics.com/fowl-language'
4491
    _categories = ('FOWLLANGUAGE', )
4492
4493
4494
class NickAnderson(GenericGoComic):
4495
    """Class to retrieve Nick Anderson comics."""
4496
    name = 'nickanderson'
4497
    long_name = 'Nick Anderson'
4498
    url = 'http://www.gocomics.com/nickanderson'
4499
4500
4501
class GarfieldGoComics(GenericGoComic):
4502
    """Class to retrieve Garfield comics."""
4503
    # Also on http://garfield.com
4504
    name = 'garfield-goc'
4505
    long_name = 'Garfield (from GoComics)'
4506
    url = 'http://www.gocomics.com/garfield'
4507
    _categories = ('GARFIELD', )
4508
4509
4510
class DorrisMcGoComics(GenericGoComic):
4511
    """Class to retrieve Dorris Mc Comics"""
4512
    # Also on http://dorrismccomics.com
4513
    name = 'dorrismc-goc'
4514
    long_name = 'Dorris Mc (from GoComics)'
4515
    url = 'http://www.gocomics.com/dorris-mccomics'
4516
4517
4518
class FoxTrot(GenericGoComic):
4519
    """Class to retrieve FoxTrot comics."""
4520
    name = 'foxtrot'
4521
    long_name = 'FoxTrot'
4522
    url = 'http://www.gocomics.com/foxtrot'
4523
4524
4525
class FoxTrotClassics(GenericGoComic):
4526
    """Class to retrieve FoxTrot Classics comics."""
4527
    name = 'foxtrot-classics'
4528
    long_name = 'FoxTrot Classics'
4529
    url = 'http://www.gocomics.com/foxtrotclassics'
4530
4531
4532
class MisterAndMeGoComics(GenericEmptyComic, GenericGoComic):  # Removed ?
4533
    """Class to retrieve Mister & Me Comics."""
4534
    # Also on http://www.mister-and-me.com
4535
    # Also on https://tapastic.com/series/Mister-and-Me
4536
    name = 'mister-goc'
4537
    long_name = 'Mister & Me (from GoComics)'
4538
    url = 'http://www.gocomics.com/mister-and-me'
4539
4540
4541
class NonSequitur(GenericGoComic):
4542
    """Class to retrieve Non Sequitur (Wiley Miller) comics."""
4543
    name = 'nonsequitur'
4544
    long_name = 'Non Sequitur'
4545
    url = 'http://www.gocomics.com/nonsequitur'
4546
4547
4548
class GenericTapasticComic(GenericListableComic):
4549
    """Generic class to handle the logic common to comics from tapastic.com."""
4550
    _categories = ('TAPASTIC', )
4551
4552
    @classmethod
4553
    def get_comic_info(cls, soup, archive_elt):
4554
        """Get information about a particular comics."""
4555
        timestamp = int(archive_elt['publishDate']) / 1000.0
4556
        day = datetime.datetime.fromtimestamp(timestamp).date()
4557
        imgs = soup.find_all('img', class_='art-image')
4558
        if not imgs:
4559
            print("Comic %s is being uploaded, retry later" % cls.get_url_from_archive_element(archive_elt))
4560
            return None
4561
        assert len(imgs) > 0
4562
        return {
4563
            'day': day.day,
4564
            'year': day.year,
4565
            'month': day.month,
4566
            'img': [i['src'] for i in imgs],
4567
            'title': archive_elt['title'],
4568
        }
4569
4570
    @classmethod
4571
    def get_url_from_archive_element(cls, archive_elt):
4572
        return 'http://tapastic.com/episode/' + str(archive_elt['id'])
4573
4574
    @classmethod
4575
    def get_archive_elements(cls):
4576
        pref, suff = 'episodeList : ', ','
4577
        # Information is stored in the javascript part
4578
        # I don't know the clean way to get it so this is the ugly way.
4579
        string = [s[len(pref):-len(suff)] for s in (s.decode('utf-8').strip() for s in urlopen_wrapper(cls.url).readlines()) if s.startswith(pref) and s.endswith(suff)][0]
4580
        return json.loads(string)
4581
4582
4583
class VegetablesForDessert(GenericTapasticComic):
4584
    """Class to retrieve Vegetables For Dessert comics."""
4585
    # Also on http://vegetablesfordessert.tumblr.com
4586
    name = 'vegetables'
4587
    long_name = 'Vegetables For Dessert'
4588
    url = 'http://tapastic.com/series/vegetablesfordessert'
4589
4590
4591
class FowlLanguageTapa(GenericTapasticComic):
4592
    """Class to retrieve Fowl Language comics."""
4593
    # Also on http://www.fowllanguagecomics.com
4594
    # Also on http://fowllanguagecomics.tumblr.com
4595
    # Also on http://www.gocomics.com/fowl-language
4596
    name = 'fowllanguage-tapa'
4597
    long_name = 'Fowl Language Comics (from Tapastic)'
4598
    url = 'http://tapastic.com/series/Fowl-Language-Comics'
4599
    _categories = ('FOWLLANGUAGE', )
4600
4601
4602
class OscillatingProfundities(GenericTapasticComic):
4603
    """Class to retrieve Oscillating Profundities comics."""
4604
    name = 'oscillating'
4605
    long_name = 'Oscillating Profundities'
4606
    url = 'http://tapastic.com/series/oscillatingprofundities'
4607
4608
4609
class ZnoflatsComics(GenericTapasticComic):
4610
    """Class to retrieve Znoflats comics."""
4611
    name = 'znoflats'
4612
    long_name = 'Znoflats Comics'
4613
    url = 'http://tapastic.com/series/Znoflats-Comics'
4614
4615
4616
class SandersenTapastic(GenericTapasticComic):
4617
    """Class to retrieve Sarah Andersen comics."""
4618
    # Also on http://sarahcandersen.com
4619
    # Also on http://www.gocomics.com/sarahs-scribbles
4620
    name = 'sandersen-tapa'
4621
    long_name = 'Sarah Andersen (from Tapastic)'
4622
    url = 'http://tapastic.com/series/Doodle-Time'
4623
4624
4625
class TubeyToonsTapastic(GenericTapasticComic):
4626
    """Class to retrieve TubeyToons comics."""
4627
    # Also on http://tubeytoons.com
4628
    # Also on http://tubeytoons.tumblr.com
4629
    name = 'tubeytoons-tapa'
4630
    long_name = 'Tubey Toons (from Tapastic)'
4631
    url = 'http://tapastic.com/series/Tubey-Toons'
4632
    _categories = ('TUNEYTOONS', )
4633
4634
4635
class AnythingComicTapastic(GenericTapasticComic):
4636
    """Class to retrieve Anything Comics."""
4637
    # Also on http://www.anythingcomic.com
4638
    name = 'anythingcomic-tapa'
4639
    long_name = 'Anything Comic (from Tapastic)'
4640
    url = 'http://tapastic.com/series/anything'
4641
4642
4643
class UnearthedComicsTapastic(GenericTapasticComic):
4644
    """Class to retrieve Unearthed comics."""
4645
    # Also on http://unearthedcomics.com
4646
    # Also on http://unearthedcomics.tumblr.com
4647
    name = 'unearthed-tapa'
4648
    long_name = 'Unearthed Comics (from Tapastic)'
4649
    url = 'http://tapastic.com/series/UnearthedComics'
4650
    _categories = ('UNEARTHED', )
4651
4652
4653
class EverythingsStupidTapastic(GenericTapasticComic):
4654
    """Class to retrieve Everything's stupid Comics."""
4655
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
4656
    # Also on http://everythingsstupid.net
4657
    name = 'stupid-tapa'
4658
    long_name = "Everything's Stupid (from Tapastic)"
4659
    url = 'http://tapastic.com/series/EverythingsStupid'
4660
4661
4662
class JustSayEhTapastic(GenericTapasticComic):
4663
    """Class to retrieve Just Say Eh comics."""
4664
    # Also on http://www.justsayeh.com
4665
    name = 'justsayeh-tapa'
4666
    long_name = 'Just Say Eh (from Tapastic)'
4667
    url = 'http://tapastic.com/series/Just-Say-Eh'
4668
4669
4670
class ThorsThundershackTapastic(GenericTapasticComic):
4671
    """Class to retrieve Thor's Thundershack comics."""
4672
    # Also on http://www.thorsthundershack.com
4673
    name = 'thor-tapa'
4674
    long_name = 'Thor\'s Thundershack (from Tapastic)'
4675
    url = 'http://tapastic.com/series/Thors-Thundershac'
4676
    _categories = ('THOR', )
4677
4678
4679
class OwlTurdTapastic(GenericTapasticComic):
4680
    """Class to retrieve Owl Turd comics."""
4681
    # Also on http://owlturd.com
4682
    name = 'owlturd-tapa'
4683
    long_name = 'Owl Turd (from Tapastic)'
4684
    url = 'http://tapastic.com/series/Owl-Turd-Comix'
4685
    _categories = ('OWLTURD', )
4686
4687
4688
class GoneIntoRaptureTapastic(GenericTapasticComic):
4689
    """Class to retrieve Gone Into Rapture comics."""
4690
    # Also on http://goneintorapture.tumblr.com
4691
    # Also on http://www.goneintorapture.com
4692
    name = 'rapture-tapa'
4693
    long_name = 'Gone Into Rapture (from Tapastic)'
4694
    url = 'http://tapastic.com/series/Goneintorapture'
4695
4696
4697
class HeckIfIKnowComicsTapa(GenericTapasticComic):
4698
    """Class to retrieve Heck If I Know Comics."""
4699
    # Also on http://heckifiknowcomics.com
4700
    name = 'heck-tapa'
4701
    long_name = 'Heck if I Know comics (from Tapastic)'
4702
    url = 'http://tapastic.com/series/Regular'
4703
4704
4705
class CheerUpEmoKidTapa(GenericTapasticComic):
4706
    """Class to retrieve CheerUpEmoKid comics."""
4707
    # Also on http://www.cheerupemokid.com
4708
    # Also on http://enzocomics.tumblr.com
4709
    name = 'cuek-tapa'
4710
    long_name = 'Cheer Up Emo Kid (from Tapastic)'
4711
    url = 'http://tapastic.com/series/CUEK'
4712
4713
4714
class BigFootJusticeTapa(GenericTapasticComic):
4715
    """Class to retrieve Big Foot Justice comics."""
4716
    # Also on http://bigfootjustice.com
4717
    name = 'bigfoot-tapa'
4718
    long_name = 'Big Foot Justice (from Tapastic)'
4719
    url = 'http://tapastic.com/series/bigfoot-justice'
4720
4721
4722
class UpAndOutTapa(GenericTapasticComic):
4723
    """Class to retrieve Up & Out comics."""
4724
    # Also on http://upandoutcomic.tumblr.com
4725
    name = 'upandout-tapa'
4726
    long_name = 'Up And Out (from Tapastic)'
4727
    url = 'http://tapastic.com/series/UP-and-OUT'
4728
4729
4730
class ToonHoleTapa(GenericTapasticComic):
4731
    """Class to retrieve Toon Holes comics."""
4732
    # Also on http://www.toonhole.com
4733
    name = 'toonhole-tapa'
4734
    long_name = 'Toon Hole (from Tapastic)'
4735
    url = 'http://tapastic.com/series/TOONHOLE'
4736
4737
4738
class AngryAtNothingTapa(GenericTapasticComic):
4739
    """Class to retrieve Angry at Nothing comics."""
4740
    # Also on http://www.angryatnothing.net
4741
    name = 'angry-tapa'
4742
    long_name = 'Angry At Nothing (from Tapastic)'
4743
    url = 'http://tapastic.com/series/Comics-yeah-definitely-comics-'
4744
4745
4746
class LeleozTapa(GenericTapasticComic):
4747
    """Class to retrieve Leleoz comics."""
4748
    # Also on http://leleozcomics.tumblr.com
4749
    name = 'leleoz-tapa'
4750
    long_name = 'Leleoz (from Tapastic)'
4751
    url = 'https://tapastic.com/series/Leleoz'
4752
4753
4754
class TheAwkwardYetiTapa(GenericTapasticComic):
4755
    """Class to retrieve The Awkward Yeti comics."""
4756
    # Also on http://www.gocomics.com/the-awkward-yeti
4757
    # Also on http://theawkwardyeti.com
4758
    # Also on http://larstheyeti.tumblr.com
4759
    name = 'yeti-tapa'
4760
    long_name = 'The Awkward Yeti (from Tapastic)'
4761
    url = 'https://tapastic.com/series/TheAwkwardYeti'
4762
    _categories = ('YETI', )
4763
4764
4765
class AsPerUsualTapa(GenericTapasticComic):
4766
    """Class to retrieve As Per Usual comics."""
4767
    # Also on http://as-per-usual.tumblr.com
4768
    name = 'usual-tapa'
4769
    long_name = 'As Per Usual (from Tapastic)'
4770
    url = 'https://tapastic.com/series/AsPerUsual'
4771
    categories = ('DAMILEE', )
4772
4773
4774
class HotComicsForCoolPeopleTapa(GenericTapasticComic):
4775
    """Class to retrieve Hot Comics For Cool People."""
4776
    # Also on http://hotcomicsforcoolpeople.tumblr.com
4777
    # Also on http://hotcomics.biz (links to tumblr)
4778
    # Also on http://hcfcp.com (links to tumblr)
4779
    name = 'hotcomics-tapa'
4780
    long_name = 'Hot Comics For Cool People (from Tapastic)'
4781
    url = 'https://tapastic.com/series/Hot-Comics-For-Cool-People'
4782
    categories = ('DAMILEE', )
4783
4784
4785
class OneOneOneOneComicTapa(GenericTapasticComic):
4786
    """Class to retrieve 1111 Comics."""
4787
    # Also on http://www.1111comics.me
4788
    # Also on http://comics1111.tumblr.com
4789
    name = '1111-tapa'
4790
    long_name = '1111 Comics (from Tapastic)'
4791
    url = 'https://tapastic.com/series/1111-Comics'
4792
    _categories = ('ONEONEONEONE', )
4793
4794
4795
class TumbleDryTapa(GenericTapasticComic):
4796
    """Class to retrieve Tumble Dry comics."""
4797
    # Also on http://tumbledrycomics.com
4798
    name = 'tumbledry-tapa'
4799
    long_name = 'Tumblr Dry (from Tapastic)'
4800
    url = 'https://tapastic.com/series/TumbleDryComics'
4801
4802
4803
class DeadlyPanelTapa(GenericTapasticComic):
4804
    """Class to retrieve Deadly Panel comics."""
4805
    # Also on http://www.deadlypanel.com
4806
    name = 'deadly-tapa'
4807
    long_name = 'Deadly Panel (from Tapastic)'
4808
    url = 'https://tapastic.com/series/deadlypanel'
4809
4810
4811
class ChrisHallbeckMaxiTapa(GenericTapasticComic):
4812
    """Class to retrieve Chris Hallbeck comics."""
4813
    # Also on http://chrishallbeck.tumblr.com
4814
    # Also on http://maximumble.com
4815
    name = 'hallbeckmaxi-tapa'
4816
    long_name = 'Chris Hallback - Maximumble (from Tapastic)'
4817
    url = 'https://tapastic.com/series/Maximumble'
4818
    _categories = ('HALLBACK', )
4819
4820
4821
class ChrisHallbeckMiniTapa(GenericTapasticComic):
4822
    """Class to retrieve Chris Hallbeck comics."""
4823
    # Also on http://chrishallbeck.tumblr.com
4824
    # Also on http://minimumble.com
4825
    name = 'hallbeckmini-tapa'
4826
    long_name = 'Chris Hallback - Minimumble (from Tapastic)'
4827
    url = 'https://tapastic.com/series/Minimumble'
4828
    _categories = ('HALLBACK', )
4829
4830
4831
class ChrisHallbeckBiffTapa(GenericTapasticComic):
4832
    """Class to retrieve Chris Hallbeck comics."""
4833
    # Also on http://chrishallbeck.tumblr.com
4834
    # Also on http://thebookofbiff.com
4835
    name = 'hallbeckbiff-tapa'
4836
    long_name = 'Chris Hallback - The Book of Biff (from Tapastic)'
4837
    url = 'https://tapastic.com/series/Biff'
4838
    _categories = ('HALLBACK', )
4839
4840
4841
class RandoWisTapa(GenericTapasticComic):
4842
    """Class to retrieve RandoWis comics."""
4843
    # Also on https://randowis.com
4844
    name = 'randowis-tapa'
4845
    long_name = 'RandoWis (from Tapastic)'
4846
    url = 'https://tapastic.com/series/RandoWis'
4847
4848
4849
class PigeonGazetteTapa(GenericTapasticComic):
4850
    """Class to retrieve The Pigeon Gazette comics."""
4851
    # Also on http://thepigeongazette.tumblr.com
4852
    name = 'pigeon-tapa'
4853
    long_name = 'The Pigeon Gazette (from Tapastic)'
4854
    url = 'https://tapastic.com/series/The-Pigeon-Gazette'
4855
4856
4857
class TheOdd1sOutTapa(GenericTapasticComic):
4858
    """Class to retrieve The Odd 1s Out comics."""
4859
    # Also on http://theodd1sout.com
4860
    # Also on http://theodd1sout.tumblr.com
4861
    name = 'theodd-tapa'
4862
    long_name = 'The Odd 1s Out (from Tapastic)'
4863
    url = 'https://tapastic.com/series/Theodd1sout'
4864
4865
4866
class TheWorldIsFlatTapa(GenericTapasticComic):
4867
    """Class to retrieve The World Is Flat Comics."""
4868
    # Also on http://theworldisflatcomics.tumblr.com
4869
    name = 'flatworld-tapa'
4870
    long_name = 'The World Is Flat (from Tapastic)'
4871
    url = 'https://tapastic.com/series/The-World-is-Flat'
4872
4873
4874
class MisterAndMeTapa(GenericTapasticComic):
4875
    """Class to retrieve Mister & Me Comics."""
4876
    # Also on http://www.mister-and-me.com
4877
    # Also on http://www.gocomics.com/mister-and-me
4878
    name = 'mister-tapa'
4879
    long_name = 'Mister & Me (from Tapastic)'
4880
    url = 'https://tapastic.com/series/Mister-and-Me'
4881
4882
4883
class TalesOfAbsurdityTapa(GenericTapasticComic):
4884
    """Class to retrieve Tales Of Absurdity comics."""
4885
    # Also on http://talesofabsurdity.com
4886
    # Also on http://talesofabsurdity.tumblr.com
4887
    name = 'absurdity-tapa'
4888
    long_name = 'Tales of Absurdity (from Tapastic)'
4889
    url = 'http://tapastic.com/series/Tales-Of-Absurdity'
4890
    _categories = ('ABSURDITY', )
4891
4892
4893
class BFGFSTapa(GenericTapasticComic):
4894
    """Class to retrieve BFGFS comics."""
4895
    # Also on http://bfgfs.com
4896
    # Also on http://bfgfs.tumblr.com
4897
    name = 'bfgfs-tapa'
4898
    long_name = 'BFGFS (from Tapastic)'
4899
    url = 'https://tapastic.com/series/BFGFS'
4900
4901
4902
class DoodleForFoodTapa(GenericTapasticComic):
4903
    """Class to retrieve Doodle For Food comics."""
4904
    # Also on http://doodleforfood.com
4905
    name = 'doodle-tapa'
4906
    long_name = 'Doodle For Food (from Tapastic)'
4907
    url = 'https://tapastic.com/series/Doodle-for-Food'
4908
4909
4910
class MrLovensteinTapa(GenericTapasticComic):
4911
    """Class to retrieve Mr Lovenstein comics."""
4912
    # Also on  https://tapastic.com/series/MrLovenstein
4913
    name = 'mrlovenstein-tapa'
4914
    long_name = 'Mr. Lovenstein (from Tapastic)'
4915
    url = 'https://tapastic.com/series/MrLovenstein'
4916
4917
4918
class CassandraCalinTapa(GenericTapasticComic):
4919
    """Class to retrieve C. Cassandra comics."""
4920
    # Also on http://cassandracalin.com
4921
    # Also on http://c-cassandra.tumblr.com
4922
    name = 'cassandra-tapa'
4923
    long_name = 'Cassandra Calin (from Tapastic)'
4924
    url = 'https://tapastic.com/series/C-Cassandra-comics'
4925
4926
4927
class WafflesAndPancakes(GenericTapasticComic):
4928
    """Class to retrieve Waffles And Pancakes comics."""
4929
    # Also on http://wandpcomic.com
4930
    name = 'waffles'
4931
    long_name = 'Waffles And Pancakes'
4932
    url = 'https://tapastic.com/series/Waffles-and-Pancakes'
4933
4934
4935
class YesterdaysPopcornTapastic(GenericTapasticComic):
4936
    """Class to retrieve Yesterday's Popcorn comics."""
4937
    # Also on http://www.yesterdayspopcorn.com
4938
    # Also on http://yesterdayspopcorn.tumblr.com
4939
    name = 'popcorn-tapa'
4940
    long_name = 'Yesterday\'s Popcorn (from Tapastic)'
4941
    url = 'https://tapastic.com/series/Yesterdays-Popcorn'
4942
4943
4944
class OurSuperAdventureTapastic(GenericTapasticComic):
4945
    """Class to retrieve Our Super Adventure comics."""
4946
    # Also on http://www.oursuperadventure.com
4947
    # http://sarahssketchbook.tumblr.com
4948
    # http://sarahgraley.com
4949
    name = 'superadventure-tapastic'
4950
    long_name = 'Our Super Adventure (from Tapastic)'
4951
    url = 'https://tapastic.com/series/Our-Super-Adventure'
4952
4953
4954
class NamelessPCs(GenericTapasticComic):
4955
    """Class to retrieve Nameless PCs comics."""
4956
    # Also on http://namelesspcs.com
4957
    name = 'namelesspcs-tapa'
4958
    long_name = 'NamelessPCs (from Tapastic)'
4959
    url = 'https://tapastic.com/series/NamelessPC'
4960
4961
4962
class UbertoolTapa(GenericTapasticComic):
4963
    """Class to retrieve Ubertool comics."""
4964
    # Also on http://ubertoolcomic.com
4965
    # Also on http://ubertool.tumblr.com
4966
    name = 'ubertool-tapa'
4967
    long_name = 'Ubertool (from Tapastic)'
4968
    url = 'https://tapastic.com/series/ubertool'
4969
    _categories = ('UBERTOOL', )
4970
4971
4972
class BarteNerdsTapa(GenericTapasticComic):
4973
    """Class to retrieve BarteNerds comics."""
4974
    # Also on http://www.bartenerds.com
4975
    name = 'bartenerds-tapa'
4976
    long_name = 'BarteNerds (from Tapastic)'
4977
    url = 'https://tapastic.com/series/BarteNERDS'
4978
4979
4980
class SmallBlueYonderTapa(GenericTapasticComic):
4981
    """Class to retrieve Small Blue Yonder comics."""
4982
    # Also on http://www.smallblueyonder.com
4983
    name = 'smallblue-tapa'
4984
    long_name = 'Small Blue Yonder (from Tapastic)'
4985
    url = 'https://tapastic.com/series/Small-Blue-Yonder'
4986
4987
4988
class TizzyStitchBirdTapa(GenericTapasticComic):
4989
    """Class to retrieve Tizzy Stitch Bird comics."""
4990
    # Also on http://tizzystitchbird.com
4991
    # Also on http://tizzystitchbird.tumblr.com
4992
    # Also on http://www.webtoons.com/en/challenge/tizzy-stitchbird/list?title_no=50082
4993
    name = 'tizzy-tapa'
4994
    long_name = 'Tizzy Stitch Bird (from Tapastic)'
4995
    url = 'https://tapastic.com/series/TizzyStitchbird'
4996
4997
4998
def get_subclasses(klass):
4999
    """Gets the list of direct/indirect subclasses of a class"""
5000
    subclasses = klass.__subclasses__()
5001
    for derived in list(subclasses):
5002
        subclasses.extend(get_subclasses(derived))
5003
    return subclasses
5004
5005
5006
def remove_st_nd_rd_th_from_date(string):
5007
    """Function to transform 1st/2nd/3rd/4th in a parsable date format."""
5008
    # Hackish way to convert string with numeral "1st"/"2nd"/etc to date
5009
    return (string.replace('st', '')
5010
            .replace('nd', '')
5011
            .replace('rd', '')
5012
            .replace('th', '')
5013
            .replace('Augu', 'August'))
5014
5015
5016
def string_to_date(string, date_format, local=DEFAULT_LOCAL):
5017
    """Function to convert string to date object.
5018
    Wrapper around datetime.datetime.strptime."""
5019
    # format described in https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior
5020
    prev_locale = locale.setlocale(locale.LC_ALL)
5021
    if local != prev_locale:
5022
        locale.setlocale(locale.LC_ALL, local)
5023
    ret = datetime.datetime.strptime(string, date_format).date()
5024
    if local != prev_locale:
5025
        locale.setlocale(locale.LC_ALL, prev_locale)
5026
    return ret
5027
5028
5029
COMICS = set(get_subclasses(GenericComic))
5030
VALID_COMICS = [c for c in COMICS if c.name is not None]
5031
COMIC_NAMES = {c.name: c for c in VALID_COMICS}
5032
assert len(VALID_COMICS) == len(COMIC_NAMES)
5033
CLASS_NAMES = {c.__name__ for c in VALID_COMICS}
5034
assert len(VALID_COMICS) == len(CLASS_NAMES)
5035