Completed
Push — master ( f3d2c0...39361d )
by De
01:11
created

comics.py (18 issues)

1
#! /usr/bin/python3
2
# vim: set expandtab tabstop=4 shiftwidth=4 :
3
"""Module to retrieve webcomics"""
4
5
from comic_abstract import GenericComic, get_date_for_comic
6
import re
7
from datetime import date, timedelta
8
import datetime
9
from urlfunctions import get_soup_at_url, urljoin_wrapper,\
10
    convert_iri_to_plain_ascii_uri, load_json_at_url, urlopen_wrapper
11
import json
12
import locale
13
import urllib
14
15
DEFAULT_LOCAL = 'en_GB.UTF-8'
16
17
18
class Xkcd(GenericComic):
19
    """Class to retrieve Xkcd comics."""
20
    name = 'xkcd'
21
    long_name = 'xkcd'
22
    url = 'http://xkcd.com'
23
24
    @classmethod
25
    def get_next_comic(cls, last_comic):
26
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
27
        first_num = last_comic['num'] if last_comic else 0
28
        last_num = load_json_at_url(
29
            urljoin_wrapper(cls.url, 'info.0.json'))['num']
30
31
        for num in range(first_num + 1, last_num + 1):
32
            if num != 404:
33
                json_url = urljoin_wrapper(cls.url, '%d/info.0.json' % num)
34
                comic = load_json_at_url(json_url)
35
                comic['img'] = [comic['img']]
36
                comic['prefix'] = '%d-' % num
37
                comic['json_url'] = json_url
38
                comic['url'] = urljoin_wrapper(cls.url, str(num))
39
                comic['day'] = int(comic['day'])
40
                comic['month'] = int(comic['month'])
41
                comic['year'] = int(comic['year'])
42
                assert comic['num'] == num
43
                yield comic
44
45
46
# Helper functions corresponding to get_url_from_link/get_url_from_archive_element
47
48
49
@classmethod
50
def get_href(cls, link):
51
    """Implementation of get_url_from_link/get_url_from_archive_element."""
52
    return link['href']
53
54
55
@classmethod
56
def join_cls_url_to_href(cls, link):
57
    """Implementation of get_url_from_link/get_url_from_archive_element."""
58
    return urljoin_wrapper(cls.url, link['href'])
59
60
61
class GenericNavigableComic(GenericComic):
62
    """Generic class for "navigable" comics : with first/next arrows.
63
64
    This class applies to comic where previous and next comics can be
65
    accessed from a given comic. Once given a starting point (either
66
    the first comic or the last comic retrieved), it will handle the
67
    navigation, the retrieval of the soup object and the setting of
68
    the 'url' attribute on retrieved comics. This limits a lot the
69
    amount of boilerplate code in the different implementation classes.
70
71
    The method `get_next_comic` methods is implemented in terms of new
72
    more specialized methods to be implemented/overridden:
73
        - get_first_comic_link
74
        - get_navi_link
75
        - get_comic_info
76
        - get_url_from_link
77
    """
78
    _categories = ('NAVIGABLE', )
79
80
    @classmethod
81
    def get_first_comic_link(cls):
82
        """Get link to first comics.
83
84
        Sometimes this can be retrieved of any comic page, sometimes on
85
        the archive page, sometimes it doesn't exist at all and one has
86
        to iterate backward to find it before hardcoding the result found.
87
        """
88
        raise NotImplementedError
89
90
    @classmethod
91
    def get_navi_link(cls, last_soup, next_):
92
        """Get link to next (or previous - for dev purposes) comic."""
93
        raise NotImplementedError
94
95
    @classmethod
96
    def get_comic_info(cls, soup, link):
97
        """Get information about a particular comics."""
98
        raise NotImplementedError
99
100
    @classmethod
101
    def get_url_from_link(cls, link):
102
        """Get url corresponding to a link. Default implementation is similar to get_href."""
103
        return link['href']
104
105
    @classmethod
106
    def get_next_link(cls, last_soup):
107
        """Get link to next comic."""
108
        link = cls.get_navi_link(last_soup, True)
109
        cls.log("Next link is %s" % link)
110
        return link
111
112
    @classmethod
113
    def get_prev_link(cls, last_soup):
114
        """Get link to previous comic."""
115
        link = cls.get_navi_link(last_soup, False)
116
        cls.log("Prev link is %s" % link)
117
        return link
118
119
    @classmethod
120
    def get_next_comic(cls, last_comic):
121
        """Generic implementation of get_next_comic for navigable comics."""
122
        url = last_comic['url'] if last_comic else None
123
        cls.log("starting 'get_next_comic' from %s" % url)
124
        next_comic = \
125
            cls.get_next_link(get_soup_at_url(url)) \
126
            if url else \
127
            cls.get_first_comic_link()
128
        cls.log("next/first comic will be %s (url is %s)" % (str(next_comic), url))
129
        # cls.check_navigation(url)
130
        while next_comic:
131
            prev_url, url = url, cls.get_url_from_link(next_comic)
132
            if prev_url == url:
133
                cls.log("got same url %s" % url)
134
                break
135
            cls.log("about to get %s (%s)" % (url, str(next_comic)))
136
            soup = get_soup_at_url(url)
137
            comic = cls.get_comic_info(soup, next_comic)
138
            if comic is not None:
139
                assert 'url' not in comic
140
                comic['url'] = url
141
                yield comic
142
            next_comic = cls.get_next_link(soup)
143
            cls.log("next comic will be %s" % str(next_comic))
144
145
    @classmethod
146
    def check_first_link(cls):
147
        """Check that navigation to first comic seems to be working - for dev purposes."""
148
        cls.log("about to check first link")
149
        ok = True
150
        firstlink = cls.get_first_comic_link()
151
        if firstlink is None:
152
            print("From %s : no first link" % cls.url)
153
            ok = False
154
        else:
155
            firsturl = cls.get_url_from_link(firstlink)
156
            try:
157
                get_soup_at_url(firsturl)
158
            except urllib.error.HTTPError:
159
                print("From %s : invalid first url" % cls.url)
160
                ok = False
161
        cls.log("checked first link -> returned %d" % ok)
162
        return ok
163
164
    @classmethod
165
    def check_prev_next_links(cls, url):
166
        """Check that navigation to prev/next from a given URL seems to be working - for dev purposes."""
167
        cls.log("about to check prev/next from %s" % url)
168
        ok = True
169
        if url is None:
170
            prevlink, nextlink = None, None
171
        else:
172
            soup = get_soup_at_url(url)
173
            prevlink, nextlink = cls.get_prev_link(soup), cls.get_next_link(soup)
174
        if prevlink is None and nextlink is None:
175
            print("From %s : no previous nor next" % url)
176
            ok = False
177
        else:
178
            if prevlink:
179
                prevurl = cls.get_url_from_link(prevlink)
180
                prevsoup = get_soup_at_url(prevurl)
181
                prevnextlink = cls.get_next_link(prevsoup)
182
                prevnext = cls.get_url_from_link(prevnextlink) if prevnextlink is not None else "NO URL"
183
                if prevnext != url:
184
                    print("From %s, going backward then forward leads to %s" % (url, prevnext))
185
                    ok = False
186
            if nextlink:
187
                nexturl = cls.get_url_from_link(nextlink)
188
                if nexturl != url:
189
                    nextsoup = get_soup_at_url(nexturl)
190
                    nextprevlink = cls.get_prev_link(nextsoup)
191
                    nextprev = cls.get_url_from_link(nextprevlink) if nextprevlink is not None else "NO URL"
192
                    if nextprev != url:
193
                        print("From %s, going forward then backward leads to %s" % (url, nextprev))
194
                        ok = False
195
        cls.log("checked prev/next from %s -> returned %d" % (url, ok))
196
        return ok
197
198
    @classmethod
199
    def check_navigation(cls, url):
200
        """Check that navigation functions seem to be working - for dev purposes."""
201
        cls.log("about to check navigation from %s" % url)
202
        first = cls.check_first_link()
203
        prevnext = cls.check_prev_next_links(url)
204
        ok = first and prevnext
205
        cls.log("checked navigation from %s -> returned %d" % (url, ok))
206
        return ok
207
208
209
class GenericListableComic(GenericComic):
210
    """Generic class for "listable" comics : with a list of comics (aka 'archive')
211
212
    The method `get_next_comic` methods is implemented in terms of new
213
    more specialized methods to be implemented/overridden:
214
        - get_archive_elements
215
        - get_url_from_archive_element
216
        - get_comic_info
217
    """
218
    _categories = ('LISTABLE', )
219
220
    @classmethod
221
    def get_archive_elements(cls):
222
        """Get the archive elements (iterable)."""
223
        raise NotImplementedError
224
225
    @classmethod
226
    def get_url_from_archive_element(cls, archive_elt):
227
        """Get url corresponding to an archive element."""
228
        raise NotImplementedError
229
230
    @classmethod
231
    def get_comic_info(cls, soup, archive_elt):
232
        """Get information about a particular comics."""
233
        raise NotImplementedError
234
235
    @classmethod
236
    def get_next_comic(cls, last_comic):
237
        """Generic implementation of get_next_comic for listable comics."""
238
        waiting_for_url = last_comic['url'] if last_comic else None
239
        for archive_elt in cls.get_archive_elements():
240
            url = cls.get_url_from_archive_element(archive_elt)
241
            cls.log("considering %s" % url)
242
            if waiting_for_url is None:
243
                cls.log("about to get %s (%s)" % (url, str(archive_elt)))
244
                soup = get_soup_at_url(url)
245
                comic = cls.get_comic_info(soup, archive_elt)
246
                if comic is not None:
247
                    assert 'url' not in comic
248
                    comic['url'] = url
249
                    yield comic
250
            elif waiting_for_url == url:
251
                waiting_for_url = None
252
        if waiting_for_url is not None:
253
            print("Did not find %s : there might be a problem" % waiting_for_url)
254
255
# Helper functions corresponding to get_first_comic_link/get_navi_link
256
257
258
@classmethod
259
def get_link_rel_next(cls, last_soup, next_):
260
    """Implementation of get_navi_link."""
261
    return last_soup.find('link', rel='next' if next_ else 'prev')
262
263
264
@classmethod
265
def get_a_rel_next(cls, last_soup, next_):
266
    """Implementation of get_navi_link."""
267
    return last_soup.find('a', rel='next' if next_ else 'prev')
268
269
270
@classmethod
271
def get_a_navi_navinext(cls, last_soup, next_):
272
    """Implementation of get_navi_link."""
273
    return last_soup.find('a', class_='navi navi-next' if next_ else 'navi navi-prev')
274
275
276
@classmethod
277
def get_a_navi_comicnavnext_navinext(cls, last_soup, next_):
278
    """Implementation of get_navi_link."""
279
    return last_soup.find('a', class_='navi comic-nav-next navi-next' if next_ else 'navi comic-nav-previous navi-prev')
280
281
282
@classmethod
283
def get_a_comicnavbase_comicnavnext(cls, last_soup, next_):
284
    """Implementation of get_navi_link."""
285
    return last_soup.find('a', class_='comic-nav-base comic-nav-next' if next_ else 'comic-nav-base comic-nav-previous')
286
287
288
@classmethod
289
def get_a_navi_navifirst(cls):
290
    """Implementation of get_first_comic_link."""
291
    return get_soup_at_url(cls.url).find('a', class_='navi navi-first')
292
293
294
@classmethod
295
def get_div_navfirst_a(cls):
296
    """Implementation of get_first_comic_link."""
297
    return get_soup_at_url(cls.url).find('div', class_="nav-first").find('a')
298
299
300
@classmethod
301
def get_a_comicnavbase_comicnavfirst(cls):
302
    """Implementation of get_first_comic_link."""
303
    return get_soup_at_url(cls.url).find('a', class_='comic-nav-base comic-nav-first')
304
305
306
@classmethod
307
def simulate_first_link(cls):
308
    """Implementation of get_first_comic_link creating a link-like object from
309
    an URL provided by the class.
310
311
    Note: The first URL can easily be found using :
312
    `get_first_comic_link = navigate_to_first_comic`.
313
    """
314
    return {'href': cls.first_url}
315
316
317
@classmethod
318
def navigate_to_first_comic(cls):
319
    """Implementation of get_first_comic_link navigating from a user provided
320
    URL to the first comic.
321
322
    Sometimes, the first comic cannot be reached directly so to start
323
    from the first comic one has to go to the previous comic until
324
    there is no previous comics. Once this URL is reached, it
325
    is better to hardcode it but for development purposes, it
326
    is convenient to have an automatic way to find it.
327
328
    Then, the URL found can easily be used via `simulate_first_link`.
329
    """
330
    url = input("Get starting URL: ")
331
    print(url)
332
    comic = cls.get_prev_link(get_soup_at_url(url))
333
    while comic:
334
        url = cls.get_url_from_link(comic)
335
        print(url)
336
        comic = cls.get_prev_link(get_soup_at_url(url))
337
    return {'href': url}
338
339
340
class GenericEmptyComic(GenericComic):
341
    """Generic class for comics where nothing is to be done.
342
343
    It can be useful to deactivate temporarily comics that do not work
344
    properly by replacing `def MyComic(GenericWhateverComic)` with
345
    `def MyComic(GenericEmptyComic, GenericWhateverComic)`."""
346
    _categories = ('EMPTY', )
347
348
    @classmethod
349
    def get_next_comic(cls, last_comic):
350
        """Implementation of get_next_comic returning no comics."""
351
        cls.log("comic is considered as empty - returning no comic")
352
        return []
353
354
355 View Code Duplication
class ExtraFabulousComics(GenericNavigableComic):
356
    """Class to retrieve Extra Fabulous Comics."""
357
    name = 'efc'
358
    long_name = 'Extra Fabulous Comics'
359
    url = 'http://extrafabulouscomics.com'
360
    get_first_comic_link = get_a_navi_navifirst
361
    get_navi_link = get_link_rel_next
362
363
    @classmethod
364
    def get_comic_info(cls, soup, link):
365
        """Get information about a particular comics."""
366
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
367
        imgs = soup.find_all('img', src=img_src_re)
368
        title = soup.find('meta', property='og:title')['content']
369
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
370
        day = string_to_date(date_str, "%Y-%m-%d")
371
        return {
372
            'title': title,
373
            'img': [i['src'] for i in imgs],
374
            'month': day.month,
375
            'year': day.year,
376
            'day': day.day,
377
            'prefix': title + '-'
378
        }
379
380
381 View Code Duplication
class GenericLeMondeBlog(GenericNavigableComic):
382
    """Generic class to retrieve comics from Le Monde blogs."""
383
    _categories = ('LEMONDE', 'FRANCAIS')
384
    get_navi_link = get_link_rel_next
385
    get_first_comic_link = simulate_first_link
386
    first_url = NotImplemented
387
388
    @classmethod
389
    def get_comic_info(cls, soup, link):
390
        """Get information about a particular comics."""
391
        url2 = soup.find('link', rel='shortlink')['href']
392
        title = soup.find('meta', property='og:title')['content']
393
        date_str = soup.find("span", class_="entry-date").string
394
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
395
        imgs = soup.find_all('meta', property='og:image')
396
        return {
397
            'title': title,
398
            'url2': url2,
399
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
400
            'month': day.month,
401
            'year': day.year,
402
            'day': day.day,
403
        }
404
405
406
class ZepWorld(GenericLeMondeBlog):
407
    """Class to retrieve Zep World comics."""
408
    name = "zep"
409
    long_name = "Zep World"
410
    url = "http://zepworld.blog.lemonde.fr"
411
    first_url = "http://zepworld.blog.lemonde.fr/2014/10/31/bientot-le-blog-de-zep/"
412
413
414
class Vidberg(GenericLeMondeBlog):
415
    """Class to retrieve Vidberg comics."""
416
    name = 'vidberg'
417
    long_name = "Vidberg - l'actu en patates"
418
    url = "http://vidberg.blog.lemonde.fr"
419
    # Not the first but I didn't find an efficient way to retrieve it
420
    first_url = "http://vidberg.blog.lemonde.fr/2012/02/09/revue-de-campagne-la-campagne-du-modem-semballe/"
421
422
423
class Plantu(GenericLeMondeBlog):
424
    """Class to retrieve Plantu comics."""
425
    name = 'plantu'
426
    long_name = "Plantu"
427
    url = "http://plantu.blog.lemonde.fr"
428
    first_url = "http://plantu.blog.lemonde.fr/2014/10/28/stress-test-a-bruxelles/"
429
430
431
class XavierGorce(GenericLeMondeBlog):
432
    """Class to retrieve Xavier Gorce comics."""
433
    name = 'gorce'
434
    long_name = "Xavier Gorce"
435
    url = "http://xaviergorce.blog.lemonde.fr"
436
    first_url = "http://xaviergorce.blog.lemonde.fr/2015/01/09/distinction/"
437
438
439
class CartooningForPeace(GenericLeMondeBlog):
440
    """Class to retrieve Cartooning For Peace comics."""
441
    name = 'forpeace'
442
    long_name = "Cartooning For Peace"
443
    url = "http://cartooningforpeace.blog.lemonde.fr"
444
    first_url = "http://cartooningforpeace.blog.lemonde.fr/2014/12/15/bado/"
445
446
447
class Aurel(GenericLeMondeBlog):
448
    """Class to retrieve Aurel comics."""
449
    name = 'aurel'
450
    long_name = "Aurel"
451
    url = "http://aurel.blog.lemonde.fr"
452
    first_url = "http://aurel.blog.lemonde.fr/2014/09/29/le-senat-repasse-a-droite/"
453
454
455
class LesCulottees(GenericLeMondeBlog):
456
    """Class to retrieve Les Culottees comics."""
457
    name = 'culottees'
458
    long_name = 'Les Culottees'
459
    url = "http://lesculottees.blog.lemonde.fr"
460
    first_url = "http://lesculottees.blog.lemonde.fr/2016/01/11/clementine-delait-femme-a-barbe/"
461
462
463
class UneAnneeAuLycee(GenericLeMondeBlog):
464
    """Class to retrieve Une Annee Au Lycee comics."""
465
    name = 'lycee'
466
    long_name = 'Une Annee au Lycee'
467
    url = 'http://uneanneeaulycee.blog.lemonde.fr'
468
    first_url = "http://uneanneeaulycee.blog.lemonde.fr/2016/06/13/la-semaine-du-bac-est-arrivee/"
469
470
471 View Code Duplication
class Rall(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
472
    """Class to retrieve Ted Rall comics."""
473
    # Also on http://www.gocomics.com/tedrall
474
    name = 'rall'
475
    long_name = "Ted Rall"
476
    url = "http://rall.com/comic"
477
    _categories = ('RALL', )
478
    get_navi_link = get_link_rel_next
479
    get_first_comic_link = simulate_first_link
480
    # Not the first but I didn't find an efficient way to retrieve it
481
    first_url = "http://rall.com/2014/01/30/los-angeles-times-cartoon-well-miss-those-california-flowers"
482
483
    @classmethod
484
    def get_comic_info(cls, soup, link):
485
        """Get information about a particular comics."""
486
        title = soup.find('meta', property='og:title')['content']
487
        author = soup.find("span", class_="author vcard").find("a").string
488
        date_str = soup.find("span", class_="entry-date").string
489
        day = string_to_date(date_str, "%B %d, %Y")
490
        desc = soup.find('meta', property='og:description')['content']
491
        imgs = soup.find('div', class_='entry-content').find_all('img')
492
        imgs = imgs[:-7]  # remove social media buttons
493
        return {
494
            'title': title,
495
            'author': author,
496
            'month': day.month,
497
            'year': day.year,
498
            'day': day.day,
499
            'description': desc,
500
            'img': [i['src'] for i in imgs],
501
        }
502
503
504
class Dilem(GenericNavigableComic):
505
    """Class to retrieve Ali Dilem comics."""
506
    name = 'dilem'
507
    long_name = 'Ali Dilem'
508
    url = 'http://information.tv5monde.com/dilem'
509
    _categories = ('FRANCAIS', )
510
    get_url_from_link = join_cls_url_to_href
511
    get_first_comic_link = simulate_first_link
512
    first_url = "http://information.tv5monde.com/dilem/2004-06-26"
513
514
    @classmethod
515
    def get_navi_link(cls, last_soup, next_):
516
        """Get link to next or previous comic."""
517
        # prev is next / next is prev
518
        li = last_soup.find('li', class_='prev' if next_ else 'next')
519
        return li.find('a') if li else None
520
521
    @classmethod
522
    def get_comic_info(cls, soup, link):
523
        """Get information about a particular comics."""
524
        short_url = soup.find('link', rel='shortlink')['href']
525
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
526
        imgs = soup.find_all('meta', property='og:image')
527
        date_str = soup.find('span', property='dc:date')['content']
528
        date_str = date_str[:10]
529
        day = string_to_date(date_str, "%Y-%m-%d")
530
        return {
531
            'short_url': short_url,
532
            'title': title,
533
            'img': [i['content'] for i in imgs],
534
            'day': day.day,
535
            'month': day.month,
536
            'year': day.year,
537
        }
538
539
540
class SpaceAvalanche(GenericNavigableComic):
541
    """Class to retrieve Space Avalanche comics."""
542
    name = 'avalanche'
543
    long_name = 'Space Avalanche'
544
    url = 'http://www.spaceavalanche.com'
545
    get_navi_link = get_link_rel_next
546
547
    @classmethod
548
    def get_first_comic_link(cls):
549
        """Get link to first comics."""
550
        return {'href': "http://www.spaceavalanche.com/2009/02/02/irish-sea/", 'title': "Irish Sea"}
551
552
    @classmethod
553
    def get_comic_info(cls, soup, link):
554
        """Get information about a particular comics."""
555
        url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
556
        title = link['title']
557
        url = cls.get_url_from_link(link)
558
        year, month, day = [int(s)
559
                            for s in url_date_re.match(url).groups()]
560
        imgs = soup.find("div", class_="entry").find_all("img")
561
        return {
562
            'title': title,
563
            'day': day,
564
            'month': month,
565
            'year': year,
566
            'img': [i['src'] for i in imgs],
567
        }
568
569
570
class ZenPencils(GenericNavigableComic):
571
    """Class to retrieve ZenPencils comics."""
572
    # Also on http://zenpencils.tumblr.com
573
    # Also on http://www.gocomics.com/zen-pencils
574
    name = 'zenpencils'
575
    long_name = 'Zen Pencils'
576
    url = 'http://zenpencils.com'
577
    _categories = ('ZENPENCILS', )
578
    get_navi_link = get_link_rel_next
579
    get_first_comic_link = simulate_first_link
580
    first_url = "http://zenpencils.com/comic/1-ralph-waldo-emerson-make-them-cry/"
581
582
    @classmethod
583
    def get_comic_info(cls, soup, link):
584
        """Get information about a particular comics."""
585
        imgs = soup.find('div', id='comic').find_all('img')
586
        # imgs2 = soup.find_all('meta', property='og:image')
587
        post = soup.find('div', class_='post-content')
588
        author = post.find("span", class_="post-author").find("a").string
589
        title = soup.find('meta', property='og:title')['content']
590
        date_str = post.find('span', class_='post-date').string
591
        day = string_to_date(date_str, "%B %d, %Y")
592
        assert imgs
593
        assert all(i['alt'] == i['title'] for i in imgs)
594
        assert all(i['alt'] in (title, "") for i in imgs)
595
        desc = soup.find('meta', property='og:description')['content']
596
        return {
597
            'title': title,
598
            'description': desc,
599
            'author': author,
600
            'day': day.day,
601
            'month': day.month,
602
            'year': day.year,
603
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
604
        }
605
606
607
class ItsTheTie(GenericNavigableComic):
608
    """Class to retrieve It's the tie comics."""
609
    # Also on http://itsthetie.tumblr.com
610
    # Also on https://tapastic.com/series/itsthetie
611
    name = 'tie'
612
    long_name = "It's the tie"
613
    url = "http://itsthetie.com"
614
    _categories = ('TIE', )
615
    get_first_comic_link = get_div_navfirst_a
616
    get_navi_link = get_a_rel_next
617
618
    @classmethod
619
    def get_comic_info(cls, soup, link):
620
        """Get information about a particular comics."""
621
        title = soup.find('h1', class_='comic-title').find('a').string
622
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
623
        day = string_to_date(date_str, "%B %d, %Y")
624
        # Bonus images may or may not be in meta og:image.
625
        imgs = soup.find_all('meta', property='og:image')
626
        imgs_src = [i['content'] for i in imgs]
627
        bonus = soup.find_all('img', attrs={'data-oversrc': True})
628
        bonus_src = [b['data-oversrc'] for b in bonus]
629
        all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src]
630
        all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")]
631
        tag_meta = soup.find('meta', property='article:tag')
632
        tags = tag_meta['content'] if tag_meta else ""
633
        return {
634
            'title': title,
635
            'month': day.month,
636
            'year': day.year,
637
            'day': day.day,
638
            'img': all_imgs_src,
639
            'tags': tags,
640
        }
641
642
643
class PenelopeBagieu(GenericNavigableComic):
644
    """Class to retrieve comics from Penelope Bagieu's blog."""
645 View Code Duplication
    name = 'bagieu'
646
    long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
647
    url = 'http://www.penelope-jolicoeur.com'
648
    _categories = ('FRANCAIS', )
649
    get_navi_link = get_link_rel_next
650
    get_first_comic_link = simulate_first_link
651
    first_url = 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'
652
653
    @classmethod
654
    def get_comic_info(cls, soup, link):
655
        """Get information about a particular comics."""
656
        date_str = soup.find('h2', class_='date-header').string
657
        day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
658
        imgs = soup.find('div', class_='entry-body').find_all('img')
659
        title = soup.find('h3', class_='entry-header').string
660
        return {
661
            'title': title,
662
            'img': [i['src'] for i in imgs],
663
            'month': day.month,
664
            'year': day.year,
665
            'day': day.day,
666
        }
667
668
669 View Code Duplication
class OneOneOneOneComic(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
670
    """Class to retrieve 1111 Comics."""
671
    # Also on http://comics1111.tumblr.com
672
    # Also on https://tapastic.com/series/1111-Comics
673
    name = '1111'
674
    long_name = '1111 Comics'
675
    url = 'http://www.1111comics.me'
676
    _categories = ('ONEONEONEONE', )
677
    get_first_comic_link = get_div_navfirst_a
678
    get_navi_link = get_link_rel_next
679
680
    @classmethod
681
    def get_comic_info(cls, soup, link):
682
        """Get information about a particular comics."""
683
        title = soup.find('h1', class_='comic-title').find('a').string
684
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
685
        day = string_to_date(date_str, "%B %d, %Y")
686
        imgs = soup.find_all('meta', property='og:image')
687
        return {
688
            'title': title,
689
            'month': day.month,
690
            'year': day.year,
691
            'day': day.day,
692
            'img': [i['content'] for i in imgs],
693
        }
694
695
696 View Code Duplication
class AngryAtNothing(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
697
    """Class to retrieve Angry at Nothing comics."""
698
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
699
    name = 'angry'
700
    long_name = 'Angry At Nothing'
701
    url = 'http://www.angryatnothing.net'
702
    get_first_comic_link = get_div_navfirst_a
703
    get_navi_link = get_a_rel_next
704
705
    @classmethod
706
    def get_comic_info(cls, soup, link):
707
        """Get information about a particular comics."""
708
        title = soup.find('h1', class_='comic-title').find('a').string
709
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
710
        day = string_to_date(date_str, "%B %d, %Y")
711
        imgs = soup.find_all('meta', property='og:image')
712
        return {
713
            'title': title,
714
            'month': day.month,
715
            'year': day.year,
716
            'day': day.day,
717
            'img': [i['content'] for i in imgs],
718
        }
719
720
721
class NeDroid(GenericNavigableComic):
722
    """Class to retrieve NeDroid comics."""
723
    name = 'nedroid'
724
    long_name = 'NeDroid'
725
    url = 'http://nedroid.com'
726
    get_first_comic_link = get_div_navfirst_a
727
    get_navi_link = get_link_rel_next
728
    get_url_from_link = join_cls_url_to_href
729
730
    @classmethod
731
    def get_comic_info(cls, soup, link):
732
        """Get information about a particular comics."""
733
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
734
        comic_url_re = re.compile('//nedroid.com/comics/([0-9]*)-([0-9]*)-([0-9]*).*')
735
        short_url = cls.get_url_from_link(soup.find('link', rel='shortlink'))
736
        num = int(short_url_re.match(short_url).groups()[0])
737
        imgs = soup.find('div', id='comic').find_all('img')
738
        year, month, day = [int(s) for s in comic_url_re.match(imgs[0]['src']).groups()]
739
        assert len(imgs) == 1
740
        title = imgs[0]['alt']
741
        title2 = imgs[0]['title']
742
        return {
743
            'short_url': short_url,
744
            'title': title,
745
            'title2': title2,
746
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
747
            'day': day,
748
            'month': month,
749
            'year': year,
750
            'num': num,
751
        }
752
753
754
class Garfield(GenericNavigableComic):
755
    """Class to retrieve Garfield comics."""
756
    # Also on http://www.gocomics.com/garfield
757
    name = 'garfield'
758 View Code Duplication
    long_name = 'Garfield'
759
    url = 'https://garfield.com'
760
    _categories = ('GARFIELD', )
761
    get_first_comic_link = simulate_first_link
762
    first_url = 'https://garfield.com/comic/1978/06/19'
763
764
    @classmethod
765
    def get_navi_link(cls, last_soup, next_):
766
        """Get link to next or previous comic."""
767
        return last_soup.find('a', class_='comic-arrow-right' if next_ else 'comic-arrow-left')
768
769
    @classmethod
770
    def get_comic_info(cls, soup, link):
771
        """Get information about a particular comics."""
772
        url = cls.get_url_from_link(link)
773
        date_re = re.compile('^%s/comic/([0-9]*)/([0-9]*)/([0-9]*)' % cls.url)
774
        year, month, day = [int(s) for s in date_re.match(url).groups()]
775
        imgs = soup.find('div', class_='comic-display').find_all('img', class_='img-responsive')
776
        return {
777
            'month': month,
778
            'year': year,
779
            'day': day,
780
            'img': [i['src'] for i in imgs],
781
        }
782
783
784
class Dilbert(GenericNavigableComic):
785
    """Class to retrieve Dilbert comics."""
786
    # Also on http://www.gocomics.com/dilbert-classics
787
    name = 'dilbert'
788
    long_name = 'Dilbert'
789
    url = 'http://dilbert.com'
790
    get_url_from_link = join_cls_url_to_href
791
    get_first_comic_link = simulate_first_link
792
    first_url = 'http://dilbert.com/strip/1989-04-16'
793
794
    @classmethod
795
    def get_navi_link(cls, last_soup, next_):
796
        """Get link to next or previous comic."""
797
        link = last_soup.find('div', class_='nav-comic nav-right' if next_ else 'nav-comic nav-left')
798
        return link.find('a') if link else None
799
800
    @classmethod
801
    def get_comic_info(cls, soup, link):
802
        """Get information about a particular comics."""
803
        title = soup.find('meta', property='og:title')['content']
804
        imgs = soup.find_all('meta', property='og:image')
805
        desc = soup.find('meta', property='og:description')['content']
806
        date_str = soup.find('meta', property='article:publish_date')['content']
807
        day = string_to_date(date_str, "%B %d, %Y")
808
        author = soup.find('meta', property='article:author')['content']
809
        tags = soup.find('meta', property='article:tag')['content']
810
        return {
811
            'title': title,
812
            'description': desc,
813
            'img': [i['content'] for i in imgs],
814
            'author': author,
815
            'tags': tags,
816
            'day': day.day,
817
            'month': day.month,
818
            'year': day.year
819
        }
820
821
822
class VictimsOfCircumsolar(GenericNavigableComic):
823
    """Class to retrieve VictimsOfCircumsolar comics."""
824
    name = 'circumsolar'
825
    long_name = 'Victims Of Circumsolar'
826
    url = 'http://www.victimsofcircumsolar.com'
827
    get_navi_link = get_a_navi_comicnavnext_navinext
828
    get_first_comic_link = simulate_first_link
829
    first_url = 'http://www.victimsofcircumsolar.com/comic/modern-addiction'
830
831
    @classmethod
832
    def get_comic_info(cls, soup, link):
833
        """Get information about a particular comics."""
834
        # Date is on the archive page
835
        title = soup.find_all('meta', property='og:title')[-1]['content']
836
        desc = soup.find_all('meta', property='og:description')[-1]['content']
837
        imgs = soup.find('div', id='comic').find_all('img')
838
        assert all(i['title'] == i['alt'] == title for i in imgs)
839
        return {
840
            'title': title,
841
            'description': desc,
842
            'img': [i['src'] for i in imgs],
843
        }
844
845
846
class ThreeWordPhrase(GenericNavigableComic):
847
    """Class to retrieve Three Word Phrase comics."""
848
    # Also on http://www.threewordphrase.tumblr.com
849
    name = 'threeword'
850
    long_name = 'Three Word Phrase'
851
    url = 'http://threewordphrase.com'
852
    get_url_from_link = join_cls_url_to_href
853
854
    @classmethod
855
    def get_first_comic_link(cls):
856
        """Get link to first comics."""
857
        return get_soup_at_url(cls.url).find('img', src='/firstlink.gif').parent
858
859
    @classmethod
860
    def get_navi_link(cls, last_soup, next_):
861
        """Get link to next or previous comic."""
862
        link = last_soup.find('img', src='/nextlink.gif' if next_ else '/prevlink.gif').parent
863
        return None if link.get('href') is None else link
864
865
    @classmethod
866
    def get_comic_info(cls, soup, link):
867
        """Get information about a particular comics."""
868
        title = soup.find('title')
869
        imgs = [img for img in soup.find_all('img')
870
                if not img['src'].endswith(
871
                    ('link.gif', '32.png', 'twpbookad.jpg',
872
                     'merchad.jpg', 'header.gif', 'tipjar.jpg'))]
873
        return {
874
            'title': title.string if title else None,
875
            'title2': '  '.join(img.get('alt') for img in imgs if img.get('alt')),
876
            'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs],
877
        }
878
879
880
class DeadlyPanel(GenericEmptyComic, GenericNavigableComic):
881
    """Class to retrieve Deadly Panel comics."""
882
    # Also on https://tapastic.com/series/deadlypanel
883
    name = 'deadly'
884
    long_name = 'Deadly Panel'
885
    url = 'http://www.deadlypanel.com'
886
    get_first_comic_link = get_a_navi_navifirst
887
    get_navi_link = get_a_navi_comicnavnext_navinext
888
889
    @classmethod
890
    def get_comic_info(cls, soup, link):
891
        """Get information about a particular comics."""
892
        imgs = soup.find('div', id='comic').find_all('img')
893
        assert all(i['alt'] == i['title'] for i in imgs)
894
        return {
895
            'img': [i['src'] for i in imgs],
896
        }
897
898
899 View Code Duplication
class TheGentlemanArmchair(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
900
    """Class to retrieve The Gentleman Armchair comics."""
901
    name = 'gentlemanarmchair'
902
    long_name = 'The Gentleman Armchair'
903
    url = 'http://thegentlemansarmchair.com'
904
    get_first_comic_link = get_a_navi_navifirst
905
    get_navi_link = get_link_rel_next
906
907
    @classmethod
908
    def get_comic_info(cls, soup, link):
909
        """Get information about a particular comics."""
910
        title = soup.find('h2', class_='post-title').string
911
        author = soup.find("span", class_="post-author").find("a").string
912
        date_str = soup.find('span', class_='post-date').string
913
        day = string_to_date(date_str, "%B %d, %Y")
914
        imgs = soup.find('div', id='comic').find_all('img')
915
        return {
916
            'img': [i['src'] for i in imgs],
917
            'title': title,
918
            'author': author,
919
            'month': day.month,
920
            'year': day.year,
921
            'day': day.day,
922
        }
923
924
925 View Code Duplication
class MyExtraLife(GenericNavigableComic):
926
    """Class to retrieve My Extra Life comics."""
927
    name = 'extralife'
928
    long_name = 'My Extra Life'
929
    url = 'http://www.myextralife.com'
930
    get_navi_link = get_link_rel_next
931
932
    @classmethod
933
    def get_first_comic_link(cls):
934
        """Get link to first comics."""
935
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
936
937
    @classmethod
938
    def get_comic_info(cls, soup, link):
939
        """Get information about a particular comics."""
940
        title = soup.find("h1", class_="comic_title").string
941
        date_str = soup.find("span", class_="comic_date").string
942
        day = string_to_date(date_str, "%B %d, %Y")
943
        imgs = soup.find_all("img", class_="comic")
944
        assert all(i['alt'] == i['title'] == title for i in imgs)
945
        return {
946
            'title': title,
947
            'img': [i['src'] for i in imgs if i["src"]],
948
            'day': day.day,
949
            'month': day.month,
950
            'year': day.year
951
        }
952
953
954
class SaturdayMorningBreakfastCereal(GenericNavigableComic):
955
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
956
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
957
    # Also on http://smbc-comics.tumblr.com
958
    name = 'smbc'
959
    long_name = 'Saturday Morning Breakfast Cereal'
960
    url = 'http://www.smbc-comics.com'
961
    _categories = ('SMBC', )
962
    get_navi_link = get_a_rel_next
963
964
    @classmethod
965
    def get_first_comic_link(cls):
966
        """Get link to first comics."""
967
        return get_soup_at_url(cls.url).find('a', rel='start')
968
969
    @classmethod
970
    def get_comic_info(cls, soup, link):
971
        """Get information about a particular comics."""
972
        image1 = soup.find('img', id='cc-comic')
973
        image_url1 = image1['src']
974
        aftercomic = soup.find('div', id='aftercomic')
975
        image_url2 = aftercomic.find('img')['src'] if aftercomic else ''
976
        imgs = [image_url1] + ([image_url2] if image_url2 else [])
977
        date_str = soup.find('div', class_='cc-publishtime').contents[0]
978
        day = string_to_date(date_str, "%B %d, %Y")
979
        return {
980
            'title': image1['title'],
981
            'img': [urljoin_wrapper(cls.url, i) for i in imgs],
982
            'day': day.day,
983
            'month': day.month,
984
            'year': day.year
985
        }
986
987
988
class PerryBibleFellowship(GenericListableComic):
989
    """Class to retrieve Perry Bible Fellowship comics."""
990
    name = 'pbf'
991
    long_name = 'Perry Bible Fellowship'
992
    url = 'http://pbfcomics.com'
993
    get_url_from_archive_element = join_cls_url_to_href
994
995
    @classmethod
996
    def get_archive_elements(cls):
997
        comic_link_re = re.compile('^/[0-9]*/$')
998
        return reversed(get_soup_at_url(cls.url).find_all('a', href=comic_link_re))
999
1000
    @classmethod
1001
    def get_comic_info(cls, soup, link):
1002
        """Get information about a particular comics."""
1003
        url = cls.get_url_from_archive_element(link)
1004
        comic_img_re = re.compile('^/archive_b/PBF.*')
1005
        name = link.string
1006
        num = int(link['name'])
1007
        href = link['href']
1008
        assert href == '/%d/' % num
1009
        imgs = soup.find_all('img', src=comic_img_re)
1010
        assert len(imgs) == 1
1011
        assert imgs[0]['alt'] == name
1012
        return {
1013
            'num': num,
1014
            'name': name,
1015
            'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1016
            'prefix': '%d-' % num,
1017
        }
1018
1019
1020 View Code Duplication
class Mercworks(GenericNavigableComic):
1021
    """Class to retrieve Mercworks comics."""
1022
    # Also on http://mercworks.tumblr.com
1023
    name = 'mercworks'
1024
    long_name = 'Mercworks'
1025
    url = 'http://mercworks.net'
1026
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1027
    get_navi_link = get_link_rel_next
1028
1029
    @classmethod
1030
    def get_comic_info(cls, soup, link):
1031
        """Get information about a particular comics."""
1032
        title = soup.find('meta', property='og:title')['content']
1033
        metadesc = soup.find('meta', property='og:description')
1034
        desc = metadesc['content'] if metadesc else ""
1035
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
1036
        day = string_to_date(date_str, "%Y-%m-%d")
1037
        imgs = soup.find_all('meta', property='og:image')
1038
        return {
1039
            'img': [i['content'] for i in imgs],
1040
            'title': title,
1041
            'desc': desc,
1042
            'day': day.day,
1043
            'month': day.month,
1044
            'year': day.year
1045
        }
1046
1047
1048
class BerkeleyMews(GenericListableComic):
1049
    """Class to retrieve Berkeley Mews comics."""
1050
    # Also on http://mews.tumblr.com
1051
    # Also on http://www.gocomics.com/berkeley-mews
1052
    name = 'berkeley'
1053
    long_name = 'Berkeley Mews'
1054
    url = 'http://www.berkeleymews.com'
1055
    _categories = ('BERKELEY', )
1056
    get_url_from_archive_element = get_href
1057
    comic_num_re = re.compile('%s/\\?p=([0-9]*)$' % url)
1058
1059
    @classmethod
1060
    def get_archive_elements(cls):
1061
        archive_url = urljoin_wrapper(cls.url, "?page_id=2")
1062
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_num_re))
1063
1064
    @classmethod
1065
    def get_comic_info(cls, soup, link):
1066
        """Get information about a particular comics."""
1067
        comic_date_re = re.compile('.*/([0-9]*)-([0-9]*)-([0-9]*)-.*')
1068
        url = cls.get_url_from_archive_element(link)
1069
        num = int(cls.comic_num_re.match(url).groups()[0])
1070
        img = soup.find('div', id='comic').find('img')
1071
        assert all(i['alt'] == i['title'] for i in [img])
1072
        title2 = img['title']
1073
        img_url = img['src']
1074
        year, month, day = [int(s) for s in comic_date_re.match(img_url).groups()]
1075
        return {
1076
            'num': num,
1077
            'title': link.string,
1078
            'title2': title2,
1079
            'img': [img_url],
1080
            'year': year,
1081
            'month': month,
1082
            'day': day,
1083
        }
1084
1085
1086
class GenericBouletCorp(GenericNavigableComic):
1087
    """Generic class to retrieve BouletCorp comics in different languages."""
1088
    # Also on http://bouletcorp.tumblr.com
1089
    _categories = ('BOULET', )
1090
    get_navi_link = get_link_rel_next
1091
1092
    @classmethod
1093
    def get_first_comic_link(cls):
1094
        """Get link to first comics."""
1095
        return get_soup_at_url(cls.url).find('div', id='centered_nav').find_all('a')[0]
1096
1097
    @classmethod
1098
    def get_comic_info(cls, soup, link):
1099
        """Get information about a particular comics."""
1100
        url = cls.get_url_from_link(link)
1101
        date_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/' % cls.url)
1102
        year, month, day = [int(s) for s in date_re.match(url).groups()]
1103
        imgs = soup.find('div', id='notes').find('div', class_='storycontent').find_all('img')
1104
        texts = '  '.join(t for t in (i.get('title') for i in imgs) if t)
1105
        title = soup.find('title').string
1106
        return {
1107
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs if i.get('src') is not None],
1108
            'title': title,
1109
            'texts': texts,
1110
            'year': year,
1111
            'month': month,
1112
            'day': day,
1113
        }
1114
1115
1116
class BouletCorp(GenericBouletCorp):
1117
    """Class to retrieve BouletCorp comics."""
1118
    name = 'boulet'
1119
    long_name = 'Boulet Corp'
1120
    url = 'http://www.bouletcorp.com'
1121
    _categories = ('FRANCAIS', )
1122
1123
1124
class BouletCorpEn(GenericBouletCorp):
1125
    """Class to retrieve EnglishBouletCorp comics."""
1126
    name = 'boulet_en'
1127
    long_name = 'Boulet Corp English'
1128
    url = 'http://english.bouletcorp.com'
1129
1130
1131 View Code Duplication
class AmazingSuperPowers(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1132
    """Class to retrieve Amazing Super Powers comics."""
1133
    name = 'asp'
1134
    long_name = 'Amazing Super Powers'
1135
    url = 'http://www.amazingsuperpowers.com'
1136
    get_first_comic_link = get_a_navi_navifirst
1137
    get_navi_link = get_a_navi_navinext
1138
1139
    @classmethod
1140
    def get_comic_info(cls, soup, link):
1141
        """Get information about a particular comics."""
1142
        author = soup.find("span", class_="post-author").find("a").string
1143
        date_str = soup.find('span', class_='post-date').string
1144
        day = string_to_date(date_str, "%B %d, %Y")
1145
        imgs = soup.find('div', id='comic').find_all('img')
1146
        title = ' '.join(i['title'] for i in imgs)
1147
        assert all(i['alt'] == i['title'] for i in imgs)
1148
        return {
1149
            'title': title,
1150
            'author': author,
1151
            'img': [img['src'] for img in imgs],
1152
            'day': day.day,
1153
            'month': day.month,
1154
            'year': day.year
1155
        }
1156
1157
1158
class ToonHole(GenericNavigableComic):
1159
    """Class to retrieve Toon Holes comics."""
1160
    # Also on http://tapastic.com/series/TOONHOLE
1161
    name = 'toonhole'
1162
    long_name = 'Toon Hole'
1163
    url = 'http://www.toonhole.com'
1164
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1165
    get_navi_link = get_link_rel_next
1166
1167
    @classmethod
1168
    def get_comic_info(cls, soup, link):
1169
        """Get information about a particular comics."""
1170
        short_url = soup.find('link', rel='shortlink')['href']
1171
        date_str = soup.find('div', class_='entry-meta').contents[0].strip()
1172
        day = string_to_date(date_str, "%B %d, %Y")
1173
        imgs = soup.find('div', id='comic').find_all('img')
1174
        if imgs:
1175
            img = imgs[0]
1176
            title = img['alt']
1177
            assert img['title'] == title
1178
        else:
1179
            title = ""
1180
        return {
1181
            'short_url': short_url,
1182
            'title': title,
1183
            'month': day.month,
1184
            'year': day.year,
1185
            'day': day.day,
1186
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs],
1187
        }
1188
1189
1190
class Channelate(GenericNavigableComic):
1191
    """Class to retrieve Channelate comics."""
1192
    name = 'channelate'
1193
    long_name = 'Channelate'
1194
    url = 'http://www.channelate.com'
1195
    get_first_comic_link = get_div_navfirst_a
1196
    get_navi_link = get_link_rel_next
1197
    get_url_from_link = join_cls_url_to_href
1198
1199
    @classmethod
1200
    def get_comic_info(cls, soup, link):
1201
        """Get information about a particular comics."""
1202
        author = soup.find("span", class_="post-author").find("a").string
1203
        date_str = soup.find('span', class_='post-date').string
1204
        day = string_to_date(date_str, '%Y/%m/%d')
1205
        title = soup.find('meta', property='og:title')['content']
1206
        post = soup.find('div', id='comic')
1207
        imgs = post.find_all('img') if post else []
1208
        extra_url = None
1209
        extra_div = soup.find('div', id='extrapanelbutton')
1210
        if extra_div:
1211
            extra_url = extra_div.find('a')['href']
1212
            extra_soup = get_soup_at_url(extra_url)
1213
            extra_imgs = extra_soup.find_all('img', class_='extrapanelimage')
1214
            imgs.extend(extra_imgs)
1215
        return {
1216
            'url_extra': extra_url,
1217
            'title': title,
1218
            'author': author,
1219
            'month': day.month,
1220
            'year': day.year,
1221
            'day': day.day,
1222
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1223
        }
1224
1225
1226
class CyanideAndHappiness(GenericNavigableComic):
1227
    """Class to retrieve Cyanide And Happiness comics."""
1228
    name = 'cyanide'
1229
    long_name = 'Cyanide and Happiness'
1230
    url = 'http://explosm.net'
1231
    _categories = ('NSFW', )
1232
    get_url_from_link = join_cls_url_to_href
1233
1234
    @classmethod
1235
    def get_first_comic_link(cls):
1236
        """Get link to first comics."""
1237
        return get_soup_at_url(cls.url).find('a', title='Oldest comic')
1238
1239
    @classmethod
1240
    def get_navi_link(cls, last_soup, next_):
1241
        """Get link to next or previous comic."""
1242
        link = last_soup.find('a', class_='next-comic' if next_ else 'previous-comic ')
1243
        return None if link.get('href') is None else link
1244
1245
    @classmethod
1246
    def get_comic_info(cls, soup, link):
1247
        """Get information about a particular comics."""
1248
        url2 = soup.find('meta', property='og:url')['content']
1249
        num = int(url2.split('/')[-2])
1250
        date_str = soup.find('h3').find('a').string
1251
        day = string_to_date(date_str, '%Y.%m.%d')
1252
        author = soup.find('small', class_="author-credit-name").string
1253
        assert author.startswith('by ')
1254
        author = author[3:]
1255
        imgs = soup.find_all('img', id='main-comic')
1256
        return {
1257
            'num': num,
1258
            'author': author,
1259
            'month': day.month,
1260
            'year': day.year,
1261
            'day': day.day,
1262
            'prefix': '%d-' % num,
1263
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i['src'])) for i in imgs]
1264
        }
1265
1266
1267
class MrLovenstein(GenericComic):
1268
    """Class to retrieve Mr Lovenstein comics."""
1269
    # Also on https://tapastic.com/series/MrLovenstein
1270
    name = 'mrlovenstein'
1271
    long_name = 'Mr. Lovenstein'
1272
    url = 'http://www.mrlovenstein.com'
1273
1274
    @classmethod
1275
    def get_next_comic(cls, last_comic):
1276
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1277
        # TODO: more info from http://www.mrlovenstein.com/archive
1278
        comic_num_re = re.compile('^/comic/([0-9]*)$')
1279
        nums = [int(comic_num_re.match(link['href']).groups()[0])
1280
                for link in get_soup_at_url(cls.url).find_all('a', href=comic_num_re)]
1281
        first, last = min(nums), max(nums)
1282
        if last_comic:
1283
            first = last_comic['num'] + 1
1284
        for num in range(first, last + 1):
1285
            url = urljoin_wrapper(cls.url, '/comic/%d' % num)
1286
            soup = get_soup_at_url(url)
1287
            imgs = list(
1288
                reversed(soup.find_all('img', src=re.compile('^/images/comics/'))))
1289
            description = soup.find('meta', attrs={'name': 'description'})['content']
1290
            yield {
1291
                'url': url,
1292
                'num': num,
1293
                'texts': '  '.join(t for t in (i.get('title') for i in imgs) if t),
1294
                'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1295
                'description': description,
1296
            }
1297
1298
1299
class DinosaurComics(GenericListableComic):
1300
    """Class to retrieve Dinosaur Comics comics."""
1301
    name = 'dinosaur'
1302
    long_name = 'Dinosaur Comics'
1303
    url = 'http://www.qwantz.com'
1304
    get_url_from_archive_element = get_href
1305
    comic_link_re = re.compile('^%s/index.php\\?comic=([0-9]*)$' % url)
1306
1307
    @classmethod
1308
    def get_archive_elements(cls):
1309
        archive_url = urljoin_wrapper(cls.url, 'archive.php')
1310
        # first link is random -> skip it
1311
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re)[1:])
1312
1313
    @classmethod
1314
    def get_comic_info(cls, soup, link):
1315
        """Get information about a particular comics."""
1316
        url = cls.get_url_from_archive_element(link)
1317
        num = int(cls.comic_link_re.match(url).groups()[0])
1318
        date_str = link.string
1319
        text = link.next_sibling.string
1320
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1321
        comic_img_re = re.compile('^%s/comics/' % cls.url)
1322
        img = soup.find('img', src=comic_img_re)
1323
        return {
1324
            'month': day.month,
1325
            'year': day.year,
1326
            'day': day.day,
1327 View Code Duplication
            'img': [img.get('src')],
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1328
            'title': img.get('title'),
1329
            'text': text,
1330
            'num': num,
1331
        }
1332
1333
1334
class ButterSafe(GenericListableComic):
1335
    """Class to retrieve Butter Safe comics."""
1336
    name = 'butter'
1337
    long_name = 'ButterSafe'
1338
    url = 'http://buttersafe.com'
1339
    get_url_from_archive_element = get_href
1340
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)
1341
1342
    @classmethod
1343
    def get_archive_elements(cls):
1344
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1345
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
1346
1347
    @classmethod
1348
    def get_comic_info(cls, soup, link):
1349
        """Get information about a particular comics."""
1350
        url = cls.get_url_from_archive_element(link)
1351
        title = link.string
1352
        year, month, day = [int(s) for s in cls.comic_link_re.match(url).groups()]
1353
        img = soup.find('div', id='comic').find('img')
1354
        assert img['alt'] == title
1355
        return {
1356
            'title': title,
1357
            'day': day,
1358
            'month': month,
1359
            'year': year,
1360
            'img': [img['src']],
1361
        }
1362
1363
1364
class CalvinAndHobbes(GenericComic):
1365
    """Class to retrieve Calvin and Hobbes comics."""
1366
    # Also on http://www.gocomics.com/calvinandhobbes/
1367
    name = 'calvin'
1368
    long_name = 'Calvin and Hobbes'
1369
    # This is not through any official webpage but eh...
1370
    url = 'http://marcel-oehler.marcellosendos.ch/comics/ch/'
1371
1372
    @classmethod
1373
    def get_next_comic(cls, last_comic):
1374
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1375
        last_date = get_date_for_comic(
1376
            last_comic) if last_comic else date(1985, 11, 1)
1377
        link_re = re.compile('^([0-9]*)/([0-9]*)/')
1378
        img_re = re.compile('')
1379
        for link in get_soup_at_url(cls.url).find_all('a', href=link_re):
1380
            url = link['href']
1381
            year, month = link_re.match(url).groups()
1382
            if date(int(year), int(month), 1) + timedelta(days=31) >= last_date:
1383
                img_re = re.compile('^%s%s([0-9]*)' % (year, month))
1384
                month_url = urljoin_wrapper(cls.url, url)
1385
                for img in get_soup_at_url(month_url).find_all('img', src=img_re):
1386
                    img_src = img['src']
1387
                    day = int(img_re.match(img_src).groups()[0])
1388
                    comic_date = date(int(year), int(month), day)
1389
                    if comic_date > last_date:
1390
                        yield {
1391
                            'url': month_url,
1392
                            'year': int(year),
1393 View Code Duplication
                            'month': int(month),
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1394
                            'day': int(day),
1395
                            'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)],
1396
                        }
1397
                        last_date = comic_date
1398
1399
1400
class AbstruseGoose(GenericListableComic):
1401
    """Class to retrieve AbstruseGoose Comics."""
1402
    name = 'abstruse'
1403
    long_name = 'Abstruse Goose'
1404
    url = 'http://abstrusegoose.com'
1405
    get_url_from_archive_element = get_href
1406
    comic_url_re = re.compile('^%s/([0-9]*)$' % url)
1407
    comic_img_re = re.compile('^%s/strips/.*' % url)
1408
1409
    @classmethod
1410
    def get_archive_elements(cls):
1411
        archive_url = urljoin_wrapper(cls.url, 'archive')
1412
        return get_soup_at_url(archive_url).find_all('a', href=cls.comic_url_re)
1413
1414
    @classmethod
1415
    def get_comic_info(cls, soup, archive_elt):
1416
        comic_url = cls.get_url_from_archive_element(archive_elt)
1417
        num = int(cls.comic_url_re.match(comic_url).groups()[0])
1418
        return {
1419
            'num': num,
1420
            'title': archive_elt.string,
1421
            'img': [soup.find('img', src=cls.comic_img_re)['src']]
1422
        }
1423
1424
1425
class PhDComics(GenericNavigableComic):
1426
    """Class to retrieve PHD Comics."""
1427
    name = 'phd'
1428
    long_name = 'PhD Comics'
1429
    url = 'http://phdcomics.com/comics/archive.php'
1430
1431
    @classmethod
1432
    def get_first_comic_link(cls):
1433
        """Get link to first comics."""
1434
        soup = get_soup_at_url(cls.url)
1435
        img = soup.find('img', src='http://phdcomics.com/comics/images/first_button.gif')
1436
        return None if img is None else img.parent
1437
1438
    @classmethod
1439
    def get_navi_link(cls, last_soup, next_):
1440
        """Get link to next or previous comic."""
1441
        url = 'http://phdcomics.com/comics/images/%s_button.gif' % ('next' if next_ else 'prev')
1442
        img = last_soup.find('img', src=url)
1443
        return None if img is None else img.parent
1444
1445
    @classmethod
1446
    def get_comic_info(cls, soup, link):
1447
        """Get information about a particular comics."""
1448
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
1449
        imgs = soup.find_all('meta', property='og:image')
1450
        return {
1451
            'img': [i['content'] for i in imgs],
1452
            'title': title,
1453
        }
1454
1455
1456 View Code Duplication
class Octopuns(GenericEmptyComic, GenericNavigableComic):
1457
    """Class to retrieve Octopuns comics."""
1458
    # Also on http://octopuns.tumblr.com
1459
    name = 'octopuns'
1460
    long_name = 'Octopuns'
1461
    url = 'http://www.octopuns.net'
1462
1463
    @classmethod
1464
    def get_first_comic_link(cls):
1465
        """Get link to first comics."""
1466
        return get_soup_at_url(cls.url).find('img', src=re.compile('.*/First.png')).parent
1467
1468
    @classmethod
1469
    def get_navi_link(cls, last_soup, next_):
1470
        """Get link to next or previous comic."""
1471
        link = last_soup.find('img', src=re.compile('.*/Next.png' if next_ else '.*/Back.png')).parent
1472
        return None if link.get('href') is None else link
1473
1474
    @classmethod
1475
    def get_comic_info(cls, soup, link):
1476
        """Get information about a particular comics."""
1477
        title = soup.find('h3', class_='post-title entry-title').string
1478
        date_str = soup.find('h2', class_='date-header').string
1479
        day = string_to_date(date_str, "%A, %B %d, %Y")
1480
        imgs = soup.find_all('link', rel='image_src')
1481
        return {
1482
            'img': [i['href'] for i in imgs],
1483
            'title': title,
1484
            'day': day.day,
1485
            'month': day.month,
1486
            'year': day.year,
1487
        }
1488
1489
1490
class Quarktees(GenericNavigableComic):
1491
    """Class to retrieve the Quarktees comics."""
1492
    name = 'quarktees'
1493
    long_name = 'Quarktees'
1494
    url = 'http://www.quarktees.com/blogs/news'
1495
    get_url_from_link = join_cls_url_to_href
1496
    get_first_comic_link = simulate_first_link
1497
    first_url = 'http://www.quarktees.com/blogs/news/12486621-coming-soon'
1498
1499
    @classmethod
1500
    def get_navi_link(cls, last_soup, next_):
1501
        """Get link to next or previous comic."""
1502
        return last_soup.find('a', id='article-next' if next_ else 'article-prev')
1503
1504
    @classmethod
1505
    def get_comic_info(cls, soup, link):
1506
        """Get information about a particular comics."""
1507
        title = soup.find('meta', property='og:title')['content']
1508
        article = soup.find('div', class_='single-article')
1509
        imgs = article.find_all('img')
1510
        return {
1511
            'title': title,
1512
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1513
        }
1514
1515
1516
class OverCompensating(GenericNavigableComic):
1517
    """Class to retrieve the Over Compensating comics."""
1518
    name = 'compensating'
1519
    long_name = 'Over Compensating'
1520
    url = 'http://www.overcompensating.com'
1521
    get_url_from_link = join_cls_url_to_href
1522
1523
    @classmethod
1524
    def get_first_comic_link(cls):
1525
        """Get link to first comics."""
1526
        return get_soup_at_url(cls.url).find('a', href=re.compile('comic=1$'))
1527
1528
    @classmethod
1529
    def get_navi_link(cls, last_soup, next_):
1530
        """Get link to next or previous comic."""
1531
        return last_soup.find('a', title='next comic' if next_ else 'go back already')
1532
1533
    @classmethod
1534
    def get_comic_info(cls, soup, link):
1535
        """Get information about a particular comics."""
1536
        img_src_re = re.compile('^/oc/comics/.*')
1537
        comic_num_re = re.compile('.*comic=([0-9]*)$')
1538
        comic_url = cls.get_url_from_link(link)
1539
        num = int(comic_num_re.match(comic_url).groups()[0])
1540
        img = soup.find('img', src=img_src_re)
1541
        return {
1542
            'num': num,
1543
            'img': [urljoin_wrapper(comic_url, img['src'])],
1544
            'title': img.get('title')
1545
        }
1546
1547
1548
class Oglaf(GenericNavigableComic):
1549
    """Class to retrieve Oglaf comics."""
1550
    name = 'oglaf'
1551
    long_name = 'Oglaf [NSFW]'
1552
    url = 'http://oglaf.com'
1553
    _categories = ('NSFW', )
1554
    get_url_from_link = join_cls_url_to_href
1555
1556
    @classmethod
1557
    def get_first_comic_link(cls):
1558
        """Get link to first comics."""
1559
        return get_soup_at_url(cls.url).find("div", id="st").parent
1560
1561
    @classmethod
1562
    def get_navi_link(cls, last_soup, next_):
1563
        """Get link to next or previous comic."""
1564
        div = last_soup.find("div", id="nx" if next_ else "pvs")
1565
        return div.parent if div else None
1566
1567
    @classmethod
1568
    def get_comic_info(cls, soup, link):
1569
        """Get information about a particular comics."""
1570
        title = soup.find('title').string
1571
        title_imgs = soup.find('div', id='tt').find_all('img')
1572
        assert len(title_imgs) == 1
1573
        strip_imgs = soup.find_all('img', id='strip')
1574
        assert len(strip_imgs) == 1
1575
        imgs = title_imgs + strip_imgs
1576
        desc = ' '.join(i['title'] for i in imgs)
1577
        return {
1578
            'title': title,
1579
            'img': [i['src'] for i in imgs],
1580
            'description': desc,
1581
        }
1582
1583
1584
class ScandinaviaAndTheWorld(GenericNavigableComic):
1585
    """Class to retrieve Scandinavia And The World comics."""
1586
    name = 'satw'
1587
    long_name = 'Scandinavia And The World'
1588
    url = 'http://satwcomic.com'
1589
    get_first_comic_link = simulate_first_link
1590
    first_url = 'http://satwcomic.com/sweden-denmark-and-norway'
1591
1592
    @classmethod
1593
    def get_navi_link(cls, last_soup, next_):
1594
        """Get link to next or previous comic."""
1595
        return last_soup.find('a', accesskey='n' if next_ else 'p')
1596
1597
    @classmethod
1598
    def get_comic_info(cls, soup, link):
1599
        """Get information about a particular comics."""
1600
        title = soup.find('meta', attrs={'name': 'twitter:label1'})['content']
1601
        desc = soup.find('meta', property='og:description')['content']
1602
        imgs = soup.find_all('img', itemprop="image")
1603
        return {
1604
            'title': title,
1605
            'description': desc,
1606
            'img': [i['src'] for i in imgs],
1607
        }
1608
1609
1610
class SomethingOfThatIlk(GenericEmptyComic):  # Does not exist anymore
1611
    """Class to retrieve the Something Of That Ilk comics."""
1612
    name = 'somethingofthatilk'
1613
    long_name = 'Something Of That Ilk'
1614
    url = 'http://www.somethingofthatilk.com'
1615
1616
1617
class InfiniteMonkeyBusiness(GenericNavigableComic):
1618
    """Generic class to retrieve InfiniteMonkeyBusiness comics."""
1619
    name = 'monkey'
1620
    long_name = 'Infinite Monkey Business'
1621
    url = 'http://infinitemonkeybusiness.net'
1622
    get_navi_link = get_a_navi_comicnavnext_navinext
1623
    get_first_comic_link = simulate_first_link
1624
    first_url = 'http://infinitemonkeybusiness.net/comic/pillory/'
1625
1626
    @classmethod
1627
    def get_comic_info(cls, soup, link):
1628
        """Get information about a particular comics."""
1629
        title = soup.find('meta', property='og:title')['content']
1630
        imgs = soup.find('div', id='comic').find_all('img')
1631
        return {
1632
            'title': title,
1633
            'img': [i['src'] for i in imgs],
1634
        }
1635
1636
1637
class Wondermark(GenericListableComic):
1638
    """Class to retrieve the Wondermark comics."""
1639
    name = 'wondermark'
1640
    long_name = 'Wondermark'
1641
    url = 'http://wondermark.com'
1642
    get_url_from_archive_element = get_href
1643
1644
    @classmethod
1645
    def get_archive_elements(cls):
1646
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1647
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1648
1649
    @classmethod
1650 View Code Duplication
    def get_comic_info(cls, soup, link):
1651
        """Get information about a particular comics."""
1652
        date_str = soup.find('div', class_='postdate').find('em').string
1653
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1654
        div = soup.find('div', id='comic')
1655
        if div:
1656
            img = div.find('img')
1657
            img_src = [img['src']]
1658
            alt = img['alt']
1659
            assert alt == img['title']
1660
            title = soup.find('meta', property='og:title')['content']
1661
        else:
1662
            img_src = []
1663
            alt = ''
1664
            title = ''
1665
        return {
1666
            'month': day.month,
1667
            'year': day.year,
1668
            'day': day.day,
1669
            'img': img_src,
1670
            'title': title,
1671
            'alt': alt,
1672
            'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')),
1673
        }
1674
1675
1676
class WarehouseComic(GenericNavigableComic):
1677
    """Class to retrieve Warehouse Comic comics."""
1678
    name = 'warehouse'
1679
    long_name = 'Warehouse Comic'
1680
    url = 'http://warehousecomic.com'
1681
    get_first_comic_link = get_a_navi_navifirst
1682
    get_navi_link = get_link_rel_next
1683
1684
    @classmethod
1685
    def get_comic_info(cls, soup, link):
1686
        """Get information about a particular comics."""
1687
        title = soup.find('h2', class_='post-title').string
1688
        date_str = soup.find('span', class_='post-date').string
1689
        day = string_to_date(date_str, "%B %d, %Y")
1690
        imgs = soup.find('div', id='comic').find_all('img')
1691
        return {
1692
            'img': [i['src'] for i in imgs],
1693
            'title': title,
1694
            'day': day.day,
1695
            'month': day.month,
1696
            'year': day.year,
1697
        }
1698
1699
1700
class JustSayEh(GenericNavigableComic):
1701
    """Class to retrieve Just Say Eh comics."""
1702
    # Also on http//tapastic.com/series/Just-Say-Eh
1703
    name = 'justsayeh'
1704
    long_name = 'Just Say Eh'
1705
    url = 'http://www.justsayeh.com'
1706
    get_first_comic_link = get_a_navi_navifirst
1707
    get_navi_link = get_a_navi_comicnavnext_navinext
1708
1709
    @classmethod
1710
    def get_comic_info(cls, soup, link):
1711
        """Get information about a particular comics."""
1712
        title = soup.find('h2', class_='post-title').string
1713
        imgs = soup.find("div", id="comic").find_all("img")
1714
        assert all(i['alt'] == i['title'] for i in imgs)
1715
        alt = imgs[0]['alt']
1716
        return {
1717
            'img': [i['src'] for i in imgs],
1718
            'title': title,
1719
            'alt': alt,
1720
        }
1721
1722
1723 View Code Duplication
class MouseBearComedy(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1724
    """Class to retrieve Mouse Bear Comedy comics."""
1725
    # Also on http://mousebearcomedy.tumblr.com
1726
    name = 'mousebear'
1727
    long_name = 'Mouse Bear Comedy'
1728
    url = 'http://www.mousebearcomedy.com'
1729
    get_first_comic_link = get_a_navi_navifirst
1730
    get_navi_link = get_a_navi_comicnavnext_navinext
1731
1732
    @classmethod
1733
    def get_comic_info(cls, soup, link):
1734
        """Get information about a particular comics."""
1735
        title = soup.find('h2', class_='post-title').string
1736
        author = soup.find("span", class_="post-author").find("a").string
1737
        date_str = soup.find("span", class_="post-date").string
1738
        day = string_to_date(date_str, '%B %d, %Y')
1739
        imgs = soup.find("div", id="comic").find_all("img")
1740
        assert all(i['alt'] == i['title'] == title for i in imgs)
1741
        return {
1742
            'day': day.day,
1743
            'month': day.month,
1744
            'year': day.year,
1745
            'img': [i['src'] for i in imgs],
1746
            'title': title,
1747
            'author': author,
1748
        }
1749
1750
1751
class BigFootJustice(GenericNavigableComic):
1752
    """Class to retrieve Big Foot Justice comics."""
1753
    # Also on http://tapastic.com/series/bigfoot-justice
1754
    name = 'bigfoot'
1755
    long_name = 'Big Foot Justice'
1756
    url = 'http://bigfootjustice.com'
1757
    get_first_comic_link = get_a_navi_navifirst
1758
    get_navi_link = get_a_navi_comicnavnext_navinext
1759
1760
    @classmethod
1761
    def get_comic_info(cls, soup, link):
1762
        """Get information about a particular comics."""
1763
        imgs = soup.find('div', id='comic').find_all('img')
1764
        assert all(i['title'] == i['alt'] for i in imgs)
1765
        title = ' '.join(i['title'] for i in imgs)
1766
        return {
1767
            'img': [i['src'] for i in imgs],
1768
            'title': title,
1769
        }
1770
1771
1772
class RespawnComic(GenericNavigableComic):
1773
    """Class to retrieve Respawn Comic."""
1774
    # Also on http://respawncomic.tumblr.com
1775
    name = 'respawn'
1776
    long_name = 'Respawn Comic'
1777
    url = 'http://respawncomic.com '
1778
    _categories = ('RESPAWN', )
1779
    get_navi_link = get_a_rel_next
1780
    get_first_comic_link = simulate_first_link
1781
    first_url = 'http://respawncomic.com/comic/c0001/'
1782
1783
    @classmethod
1784
    def get_comic_info(cls, soup, link):
1785
        """Get information about a particular comics."""
1786
        title = soup.find('meta', property='og:title')['content']
1787
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1788
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1789
        date_str = date_str[:10]
1790
        day = string_to_date(date_str, "%Y-%m-%d")
1791
        imgs = soup.find_all('meta', property='og:image')
1792
        skip_imgs = {
1793
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1794
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1795
        }
1796
        return {
1797
            'title': title,
1798
            'author': author,
1799
            'day': day.day,
1800
            'month': day.month,
1801
            'year': day.year,
1802
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1803
        }
1804
1805
1806 View Code Duplication
class SafelyEndangered(GenericNavigableComic):
1807
    """Class to retrieve Safely Endangered comics."""
1808
    # Also on http://tumblr.safelyendangered.com
1809
    name = 'endangered'
1810
    long_name = 'Safely Endangered'
1811
    url = 'http://www.safelyendangered.com'
1812
    get_navi_link = get_link_rel_next
1813
    get_first_comic_link = simulate_first_link
1814
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1815
1816
    @classmethod
1817
    def get_comic_info(cls, soup, link):
1818
        """Get information about a particular comics."""
1819
        title = soup.find('h2', class_='post-title').string
1820
        date_str = soup.find('span', class_='post-date').string
1821
        day = string_to_date(date_str, '%B %d, %Y')
1822
        imgs = soup.find('div', id='comic').find_all('img')
1823
        alt = imgs[0]['alt']
1824
        assert all(i['alt'] == i['title'] for i in imgs)
1825
        return {
1826
            'day': day.day,
1827
            'month': day.month,
1828
            'year': day.year,
1829
            'img': [i['src'] for i in imgs],
1830
            'title': title,
1831
            'alt': alt,
1832
        }
1833
1834
1835 View Code Duplication
class PicturesInBoxes(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1836
    """Class to retrieve Pictures In Boxes comics."""
1837
    # Also on http://picturesinboxescomic.tumblr.com
1838
    name = 'picturesinboxes'
1839
    long_name = 'Pictures in Boxes'
1840
    url = 'http://www.picturesinboxes.com'
1841
    get_navi_link = get_a_navi_navinext
1842
    get_first_comic_link = simulate_first_link
1843
    first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'
1844
1845
    @classmethod
1846
    def get_comic_info(cls, soup, link):
1847
        """Get information about a particular comics."""
1848
        title = soup.find('h2', class_='post-title').string
1849
        author = soup.find("span", class_="post-author").find("a").string
1850
        date_str = soup.find('span', class_='post-date').string
1851
        day = string_to_date(date_str, '%B %d, %Y')
1852
        imgs = soup.find('div', class_='comicpane').find_all('img')
1853
        assert imgs
1854
        assert all(i['title'] == i['alt'] == title for i in imgs)
1855
        return {
1856
            'day': day.day,
1857
            'month': day.month,
1858
            'year': day.year,
1859
            'img': [i['src'] for i in imgs],
1860
            'title': title,
1861
            'author': author,
1862
        }
1863
1864
1865 View Code Duplication
class Penmen(GenericNavigableComic):
1866
    """Class to retrieve Penmen comics."""
1867
    name = 'penmen'
1868
    long_name = 'Penmen'
1869
    url = 'http://penmen.com'
1870
    get_navi_link = get_link_rel_next
1871
    get_first_comic_link = simulate_first_link
1872
    first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/'
1873
1874
    @classmethod
1875
    def get_comic_info(cls, soup, link):
1876
        """Get information about a particular comics."""
1877
        title = soup.find('title').string
1878
        imgs = soup.find('div', class_='entry-content').find_all('img')
1879
        short_url = soup.find('link', rel='shortlink')['href']
1880
        tags = ' '.join(t.string for t in soup.find_all('a', rel='tag'))
1881
        date_str = soup.find('time')['datetime'][:10]
1882
        day = string_to_date(date_str, "%Y-%m-%d")
1883
        return {
1884
            'title': title,
1885
            'short_url': short_url,
1886
            'img': [i['src'] for i in imgs],
1887
            'tags': tags,
1888
            'month': day.month,
1889
            'year': day.year,
1890
            'day': day.day,
1891
        }
1892
1893
1894
class TheDoghouseDiaries(GenericNavigableComic):
1895
    """Class to retrieve The Dog House Diaries comics."""
1896
    name = 'doghouse'
1897
    long_name = 'The Dog House Diaries'
1898
    url = 'http://thedoghousediaries.com'
1899
1900
    @classmethod
1901
    def get_first_comic_link(cls):
1902
        """Get link to first comics."""
1903
        return get_soup_at_url(cls.url).find('a', id='firstlink')
1904
1905
    @classmethod
1906
    def get_navi_link(cls, last_soup, next_):
1907
        """Get link to next or previous comic."""
1908
        return last_soup.find('a', id='nextlink' if next_ else 'previouslink')
1909
1910
    @classmethod
1911
    def get_comic_info(cls, soup, link):
1912
        """Get information about a particular comics."""
1913
        comic_img_re = re.compile('^dhdcomics/.*')
1914 View Code Duplication
        img = soup.find('img', src=comic_img_re)
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1915
        comic_url = cls.get_url_from_link(link)
1916
        return {
1917
            'title': soup.find('h2', id='titleheader').string,
1918
            'title2': soup.find('div', id='subtext').string,
1919
            'alt': img.get('title'),
1920
            'img': [urljoin_wrapper(comic_url, img['src'].strip())],
1921
            'num': int(comic_url.split('/')[-1]),
1922
        }
1923
1924
1925
class InvisibleBread(GenericListableComic):
1926
    """Class to retrieve Invisible Bread comics."""
1927
    # Also on http://www.gocomics.com/invisible-bread
1928
    name = 'invisiblebread'
1929
    long_name = 'Invisible Bread'
1930
    url = 'http://invisiblebread.com'
1931
1932
    @classmethod
1933
    def get_archive_elements(cls):
1934
        archive_url = urljoin_wrapper(cls.url, 'archives/')
1935
        return reversed(get_soup_at_url(archive_url).find_all('td', class_='archive-title'))
1936
1937
    @classmethod
1938
    def get_url_from_archive_element(cls, td):
1939
        return td.find('a')['href']
1940
1941
    @classmethod
1942
    def get_comic_info(cls, soup, td):
1943
        """Get information about a particular comics."""
1944
        url = cls.get_url_from_archive_element(td)
1945
        title = td.find('a').string
1946
        month_and_day = td.previous_sibling.string
1947
        link_re = re.compile('^%s/([0-9]+)/' % cls.url)
1948
        year = link_re.match(url).groups()[0]
1949
        date_str = month_and_day + ' ' + year
1950
        day = string_to_date(date_str, '%b %d %Y')
1951
        imgs = [soup.find('div', id='comic').find('img')]
1952
        assert len(imgs) == 1
1953
        assert all(i['title'] == i['alt'] == title for i in imgs)
1954
        return {
1955
            'month': day.month,
1956
            'year': day.year,
1957
            'day': day.day,
1958
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1959
            'title': title,
1960
        }
1961
1962
1963
class DiscoBleach(GenericEmptyComic):  # Does not work anymore
1964
    """Class to retrieve Disco Bleach Comics."""
1965
    name = 'discobleach'
1966
    long_name = 'Disco Bleach'
1967
    url = 'http://discobleach.com'
1968
1969
1970
class TubeyToons(GenericEmptyComic):  # Does not work anymore
1971
    """Class to retrieve TubeyToons comics."""
1972
    # Also on http://tapastic.com/series/Tubey-Toons
1973
    # Also on http://tubeytoons.tumblr.com
1974
    name = 'tubeytoons'
1975
    long_name = 'Tubey Toons'
1976
    url = 'http://tubeytoons.com'
1977
    _categories = ('TUNEYTOONS', )
1978
1979
1980 View Code Duplication
class CompletelySeriousComics(GenericNavigableComic):
1981
    """Class to retrieve Completely Serious comics."""
1982
    name = 'completelyserious'
1983
    long_name = 'Completely Serious Comics'
1984
    url = 'http://completelyseriouscomics.com'
1985
    get_first_comic_link = get_a_navi_navifirst
1986
    get_navi_link = get_a_navi_navinext
1987
1988
    @classmethod
1989
    def get_comic_info(cls, soup, link):
1990
        """Get information about a particular comics."""
1991
        title = soup.find('h2', class_='post-title').string
1992
        author = soup.find('span', class_='post-author').contents[1].string
1993
        date_str = soup.find('span', class_='post-date').string
1994
        day = string_to_date(date_str, '%B %d, %Y')
1995
        imgs = soup.find('div', class_='comicpane').find_all('img')
1996
        assert imgs
1997
        alt = imgs[0]['title']
1998
        assert all(i['title'] == i['alt'] == alt for i in imgs)
1999
        return {
2000
            'month': day.month,
2001
            'year': day.year,
2002
            'day': day.day,
2003
            'img': [i['src'] for i in imgs],
2004
            'title': title,
2005
            'alt': alt,
2006
            'author': author,
2007
        }
2008
2009
2010
class PoorlyDrawnLines(GenericListableComic):
2011
    """Class to retrieve Poorly Drawn Lines comics."""
2012
    # Also on http://pdlcomics.tumblr.com
2013
    name = 'poorlydrawn'
2014
    long_name = 'Poorly Drawn Lines'
2015
    url = 'http://poorlydrawnlines.com'
2016
    _categories = ('POORLYDRAWN', )
2017
    get_url_from_archive_element = get_href
2018
2019
    @classmethod
2020
    def get_comic_info(cls, soup, link):
2021
        """Get information about a particular comics."""
2022
        imgs = soup.find('div', class_='post').find_all('img')
2023
        assert len(imgs) <= 1
2024
        return {
2025
            'img': [i['src'] for i in imgs],
2026
            'title': imgs[0].get('title', "") if imgs else "",
2027
        }
2028
2029
    @classmethod
2030
    def get_archive_elements(cls):
2031
        archive_url = urljoin_wrapper(cls.url, 'archive')
2032
        url_re = re.compile('^%s/comic/.' % cls.url)
2033
        return reversed(get_soup_at_url(archive_url).find_all('a', href=url_re))
2034
2035
2036 View Code Duplication
class LoadingComics(GenericNavigableComic):
2037
    """Class to retrieve Loading Artist comics."""
2038
    name = 'loadingartist'
2039
    long_name = 'Loading Artist'
2040
    url = 'http://www.loadingartist.com/latest'
2041
2042
    @classmethod
2043
    def get_first_comic_link(cls):
2044
        """Get link to first comics."""
2045
        return get_soup_at_url(cls.url).find('a', title="First")
2046
2047
    @classmethod
2048
    def get_navi_link(cls, last_soup, next_):
2049
        """Get link to next or previous comic."""
2050
        return last_soup.find('a', title='Next' if next_ else 'Previous')
2051
2052
    @classmethod
2053
    def get_comic_info(cls, soup, link):
2054
        """Get information about a particular comics."""
2055
        title = soup.find('h1').string
2056
        date_str = soup.find('span', class_='date').string.strip()
2057
        day = string_to_date(date_str, "%B %d, %Y")
2058
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
2059
        return {
2060
            'title': title,
2061
            'img': [i['src'] for i in imgs],
2062
            'month': day.month,
2063
            'year': day.year,
2064
            'day': day.day,
2065
        }
2066
2067
2068 View Code Duplication
class ChuckleADuck(GenericNavigableComic):
2069
    """Class to retrieve Chuckle-A-Duck comics."""
2070
    name = 'chuckleaduck'
2071
    long_name = 'Chuckle-A-duck'
2072
    url = 'http://chuckleaduck.com'
2073
    get_first_comic_link = get_div_navfirst_a
2074
    get_navi_link = get_link_rel_next
2075
2076
    @classmethod
2077
    def get_comic_info(cls, soup, link):
2078
        """Get information about a particular comics."""
2079
        date_str = soup.find('span', class_='post-date').string
2080
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
2081
        author = soup.find('span', class_='post-author').string
2082
        div = soup.find('div', id='comic')
2083
        imgs = div.find_all('img') if div else []
2084
        title = imgs[0]['title'] if imgs else ""
2085
        assert all(i['title'] == i['alt'] == title for i in imgs)
2086
        return {
2087
            'month': day.month,
2088
            'year': day.year,
2089
            'day': day.day,
2090
            'img': [i['src'] for i in imgs],
2091
            'title': title,
2092
            'author': author,
2093
        }
2094
2095
2096
class DepressedAlien(GenericNavigableComic):
2097
    """Class to retrieve Depressed Alien Comics."""
2098
    name = 'depressedalien'
2099
    long_name = 'Depressed Alien'
2100
    url = 'http://depressedalien.com'
2101
    get_url_from_link = join_cls_url_to_href
2102
2103
    @classmethod
2104
    def get_first_comic_link(cls):
2105
        """Get link to first comics."""
2106
        return get_soup_at_url(cls.url).find('img', attrs={'name': 'beginArrow'}).parent
2107
2108
    @classmethod
2109
    def get_navi_link(cls, last_soup, next_):
2110
        """Get link to next or previous comic."""
2111
        return last_soup.find('img', attrs={'name': 'rightArrow' if next_ else 'leftArrow'}).parent
2112
2113
    @classmethod
2114
    def get_comic_info(cls, soup, link):
2115
        """Get information about a particular comics."""
2116
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2117
        imgs = soup.find_all('meta', property='og:image')
2118
        return {
2119
            'title': title,
2120
            'img': [i['content'] for i in imgs],
2121
        }
2122
2123
2124
class ThingsInSquares(GenericListableComic):
2125
    """Class to retrieve Things In Squares comics."""
2126
    # This can be retrieved in other languages
2127
    # Also on https://tapastic.com/series/Things-in-Squares
2128
    name = 'squares'
2129
    long_name = 'Things in squares'
2130
    url = 'http://www.thingsinsquares.com'
2131
2132
    @classmethod
2133
    def get_comic_info(cls, soup, tr):
2134
        """Get information about a particular comics."""
2135
        _, td2, td3 = tr.find_all('td')
2136
        a = td2.find('a')
2137
        date_str = td3.string
2138
        day = string_to_date(date_str, "%m.%d.%y")
2139
        title = a.string
2140
        title2 = soup.find('meta', property='og:title')['content']
2141
        desc = soup.find('meta', property='og:description')
2142
        description = desc['content'] if desc else ''
2143
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2144
        imgs = soup.find('div', class_='entry-content').find_all('img')
2145
        return {
2146
            'day': day.day,
2147
            'month': day.month,
2148
            'year': day.year,
2149
            'title': title,
2150
            'title2': title2,
2151
            'description': description,
2152
            'tags': tags,
2153
            'img': [i['src'] for i in imgs],
2154
            'alt': ' '.join(i['alt'] for i in imgs),
2155
        }
2156
2157
    @classmethod
2158
    def get_url_from_archive_element(cls, tr):
2159
        _, td2, td3 = tr.find_all('td')
2160
        return td2.find('a')['href']
2161
2162
    @classmethod
2163
    def get_archive_elements(cls):
2164
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
2165
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2166
2167
2168
class HappleTea(GenericNavigableComic):
2169
    """Class to retrieve Happle Tea Comics."""
2170
    name = 'happletea'
2171
    long_name = 'Happle Tea'
2172
    url = 'http://www.happletea.com'
2173
    get_first_comic_link = get_a_navi_navifirst
2174
    get_navi_link = get_link_rel_next
2175
2176
    @classmethod
2177
    def get_comic_info(cls, soup, link):
2178
        """Get information about a particular comics."""
2179
        imgs = soup.find('div', id='comic').find_all('img')
2180
        post = soup.find('div', class_='post-content')
2181
        title = post.find('h2', class_='post-title').string
2182
        author = post.find('a', rel='author').string
2183
        date_str = post.find('span', class_='post-date').string
2184
        day = string_to_date(date_str, "%B %d, %Y")
2185
        assert all(i['alt'] == i['title'] for i in imgs)
2186
        return {
2187
            'title': title,
2188
            'img': [i['src'] for i in imgs],
2189
            'alt': ''.join(i['alt'] for i in imgs),
2190
            'month': day.month,
2191
            'year': day.year,
2192
            'day': day.day,
2193
            'author': author,
2194
        }
2195
2196
2197
class FatAwesomeComics(GenericNavigableComic):
2198
    """Class to retrieve Fat Awesome Comics."""
2199
    # Also on http://fatawesomecomedy.tumblr.com
2200
    name = 'fatawesome'
2201
    long_name = 'Fat Awesome'
2202
    url = 'http://fatawesome.com/comics'
2203
    get_navi_link = get_a_rel_next
2204
    get_first_comic_link = simulate_first_link
2205
    first_url = 'http://fatawesome.com/shortbus/'
2206
2207
    @classmethod
2208
    def get_comic_info(cls, soup, link):
2209
        """Get information about a particular comics."""
2210
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2211
        description = soup.find('meta', attrs={'name': 'description'})['content']
2212
        tags_prop = soup.find('meta', property='article:tag')
2213
        tags = tags_prop['content'] if tags_prop else ""
2214
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2215
        day = string_to_date(date_str, "%Y-%m-%d")
2216
        imgs = soup.find_all('img', attrs={'data-recalc-dims': "1"})
2217
        assert len(imgs) == 1
2218
        return {
2219
            'title': title,
2220
            'description': description,
2221
            'tags': tags,
2222
            'alt': "".join(i['alt'] for i in imgs),
2223 View Code Duplication
            'img': [i['src'].rsplit('?', 1)[0] for i in imgs],
2224
            'month': day.month,
2225
            'year': day.year,
2226
            'day': day.day,
2227
        }
2228
2229
2230
class AnythingComic(GenericListableComic):
2231
    """Class to retrieve Anything Comics."""
2232
    # Also on http://tapastic.com/series/anything
2233
    name = 'anythingcomic'
2234
    long_name = 'Anything Comic'
2235
    url = 'http://www.anythingcomic.com'
2236
2237
    @classmethod
2238
    def get_archive_elements(cls):
2239
        archive_url = urljoin_wrapper(cls.url, 'archive/')
2240
        # The first 2 <tr>'s do not correspond to comics
2241
        return get_soup_at_url(archive_url).find('table', id='chapter_table').find_all('tr')[2:]
2242
2243
    @classmethod
2244
    def get_url_from_archive_element(cls, tr):
2245
        """Get url corresponding to an archive element."""
2246
        td_num, td_comic, td_date, _ = tr.find_all('td')
2247
        link = td_comic.find('a')
2248
        return urljoin_wrapper(cls.url, link['href'])
2249
2250
    @classmethod
2251
    def get_comic_info(cls, soup, tr):
2252
        """Get information about a particular comics."""
2253
        td_num, td_comic, td_date, _ = tr.find_all('td')
2254
        num = int(td_num.string)
2255
        link = td_comic.find('a')
2256
        title = link.string
2257
        imgs = soup.find_all('img', id='comic_image')
2258
        date_str = td_date.string
2259
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y, %I:%M %p")
2260
        assert len(imgs) == 1
2261
        assert all(i.get('alt') == i.get('title') for i in imgs)
2262
        return {
2263
            'num': num,
2264
            'title': title,
2265
            'alt': imgs[0].get('alt', ''),
2266
            'img': [i['src'] for i in imgs],
2267
            'month': day.month,
2268
            'year': day.year,
2269
            'day': day.day,
2270
        }
2271
2272
2273
class LonnieMillsap(GenericNavigableComic):
2274
    """Class to retrieve Lonnie Millsap's comics."""
2275
    name = 'millsap'
2276
    long_name = 'Lonnie Millsap'
2277
    url = 'http://www.lonniemillsap.com'
2278
    get_navi_link = get_link_rel_next
2279
    get_first_comic_link = simulate_first_link
2280
    first_url = 'http://www.lonniemillsap.com/?p=42'
2281
2282
    @classmethod
2283
    def get_comic_info(cls, soup, link):
2284
        """Get information about a particular comics."""
2285 View Code Duplication
        title = soup.find('h2', class_='post-title').string
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2286
        post = soup.find('div', class_='post-content')
2287
        author = post.find("span", class_="post-author").find("a").string
2288
        date_str = post.find("span", class_="post-date").string
2289
        day = string_to_date(date_str, "%B %d, %Y")
2290
        imgs = post.find("div", class_="entry").find_all("img")
2291
        return {
2292
            'title': title,
2293
            'author': author,
2294
            'img': [i['src'] for i in imgs],
2295
            'month': day.month,
2296
            'year': day.year,
2297
            'day': day.day,
2298
        }
2299
2300
2301
class LinsEditions(GenericNavigableComic):
2302
    """Class to retrieve L.I.N.S. Editions comics."""
2303
    # Also on http://linscomics.tumblr.com
2304
    # Now on https://warandpeas.com
2305
    name = 'lins'
2306
    long_name = 'L.I.N.S. Editions'
2307
    url = 'https://linsedition.com'
2308
    _categories = ('LINS', )
2309
    get_navi_link = get_link_rel_next
2310
    get_first_comic_link = simulate_first_link
2311
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2312
2313
    @classmethod
2314
    def get_comic_info(cls, soup, link):
2315
        """Get information about a particular comics."""
2316
        title = soup.find('meta', property='og:title')['content']
2317
        imgs = soup.find_all('meta', property='og:image')
2318
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2319
        day = string_to_date(date_str, "%Y-%m-%d")
2320
        return {
2321
            'title': title,
2322
            'img': [i['content'] for i in imgs],
2323
            'month': day.month,
2324
            'year': day.year,
2325
            'day': day.day,
2326
        }
2327
2328
2329
class ThorsThundershack(GenericNavigableComic):
2330
    """Class to retrieve Thor's Thundershack comics."""
2331
    # Also on http://tapastic.com/series/Thors-Thundershac
2332
    name = 'thor'
2333
    long_name = 'Thor\'s Thundershack'
2334
    url = 'http://www.thorsthundershack.com'
2335
    _categories = ('THOR', )
2336
    get_url_from_link = join_cls_url_to_href
2337
2338
    @classmethod
2339
    def get_first_comic_link(cls):
2340
        """Get link to first comics."""
2341
        return get_soup_at_url(cls.url).find('a', class_='first navlink')
2342
2343
    @classmethod
2344
    def get_navi_link(cls, last_soup, next_):
2345
        """Get link to next or previous comic."""
2346
        for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
2347
            if link['href'] != '/comic':
2348
                return link
2349
        return None
2350
2351
    @classmethod
2352
    def get_comic_info(cls, soup, link):
2353
        """Get information about a particular comics."""
2354
        title = soup.find('meta', attrs={'name': 'description'})["content"]
2355
        description = soup.find('div', itemprop='articleBody').text
2356
        author = soup.find('span', itemprop='author copyrightHolder').string
2357
        imgs = soup.find_all('img', itemprop='image')
2358
        assert all(i['title'] == i['alt'] for i in imgs)
2359
        alt = imgs[0]['alt'] if imgs else ""
2360
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
2361
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2362
        return {
2363
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2364
            'month': day.month,
2365
            'year': day.year,
2366
            'day': day.day,
2367
            'author': author,
2368
            'title': title,
2369
            'alt': alt,
2370
            'description': description,
2371
        }
2372
2373
2374 View Code Duplication
class GerbilWithAJetpack(GenericNavigableComic):
2375
    """Class to retrieve GerbilWithAJetpack comics."""
2376
    name = 'gerbil'
2377
    long_name = 'Gerbil With A Jetpack'
2378
    url = 'http://gerbilwithajetpack.com'
2379
    get_first_comic_link = get_a_navi_navifirst
2380
    get_navi_link = get_a_rel_next
2381
2382
    @classmethod
2383
    def get_comic_info(cls, soup, link):
2384
        """Get information about a particular comics."""
2385
        title = soup.find('h2', class_='post-title').string
2386
        author = soup.find("span", class_="post-author").find("a").string
2387
        date_str = soup.find("span", class_="post-date").string
2388
        day = string_to_date(date_str, "%B %d, %Y")
2389
        imgs = soup.find("div", id="comic").find_all("img")
2390
        alt = imgs[0]['alt']
2391
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2392
        return {
2393
            'img': [i['src'] for i in imgs],
2394
            'title': title,
2395
            'alt': alt,
2396
            'author': author,
2397
            'day': day.day,
2398
            'month': day.month,
2399
            'year': day.year
2400
        }
2401
2402
2403 View Code Duplication
class EveryDayBlues(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2404
    """Class to retrieve EveryDayBlues Comics."""
2405
    name = "blues"
2406
    long_name = "Every Day Blues"
2407
    url = "http://everydayblues.net"
2408
    get_first_comic_link = get_a_navi_navifirst
2409
    get_navi_link = get_link_rel_next
2410
2411
    @classmethod
2412
    def get_comic_info(cls, soup, link):
2413
        """Get information about a particular comics."""
2414
        title = soup.find("h2", class_="post-title").string
2415
        author = soup.find("span", class_="post-author").find("a").string
2416
        date_str = soup.find("span", class_="post-date").string
2417
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2418
        imgs = soup.find("div", id="comic").find_all("img")
2419
        assert all(i['alt'] == i['title'] == title for i in imgs)
2420
        assert len(imgs) <= 1
2421
        return {
2422
            'img': [i['src'] for i in imgs],
2423
            'title': title,
2424
            'author': author,
2425
            'day': day.day,
2426
            'month': day.month,
2427
            'year': day.year
2428
        }
2429
2430
2431 View Code Duplication
class BiterComics(GenericNavigableComic):
2432
    """Class to retrieve Biter Comics."""
2433
    name = "biter"
2434
    long_name = "Biter Comics"
2435
    url = "http://www.bitercomics.com"
2436
    get_first_comic_link = get_a_navi_navifirst
2437
    get_navi_link = get_link_rel_next
2438
2439
    @classmethod
2440
    def get_comic_info(cls, soup, link):
2441
        """Get information about a particular comics."""
2442
        title = soup.find("h1", class_="entry-title").string
2443
        author = soup.find("span", class_="author vcard").find("a").string
2444
        date_str = soup.find("span", class_="entry-date").string
2445
        day = string_to_date(date_str, "%B %d, %Y")
2446
        imgs = soup.find("div", id="comic").find_all("img")
2447
        assert all(i['alt'] == i['title'] for i in imgs)
2448
        assert len(imgs) == 1
2449
        alt = imgs[0]['alt']
2450
        return {
2451
            'img': [i['src'] for i in imgs],
2452
            'title': title,
2453
            'alt': alt,
2454
            'author': author,
2455
            'day': day.day,
2456
            'month': day.month,
2457
            'year': day.year
2458
        }
2459
2460
2461 View Code Duplication
class TheAwkwardYeti(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2462
    """Class to retrieve The Awkward Yeti comics."""
2463
    # Also on http://www.gocomics.com/the-awkward-yeti
2464
    # Also on http://larstheyeti.tumblr.com
2465
    # Also on https://tapastic.com/series/TheAwkwardYeti
2466
    name = 'yeti'
2467
    long_name = 'The Awkward Yeti'
2468
    url = 'http://theawkwardyeti.com'
2469
    _categories = ('YETI', )
2470
    get_first_comic_link = get_a_navi_navifirst
2471
    get_navi_link = get_link_rel_next
2472
2473
    @classmethod
2474
    def get_comic_info(cls, soup, link):
2475
        """Get information about a particular comics."""
2476
        title = soup.find('h2', class_='post-title').string
2477
        date_str = soup.find("span", class_="post-date").string
2478
        day = string_to_date(date_str, "%B %d, %Y")
2479
        imgs = soup.find("div", id="comic").find_all("img")
2480
        assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
2481
        return {
2482
            'img': [i['src'] for i in imgs],
2483
            'title': title,
2484
            'day': day.day,
2485
            'month': day.month,
2486
            'year': day.year
2487
        }
2488
2489
2490
class PleasantThoughts(GenericNavigableComic):
2491
    """Class to retrieve Pleasant Thoughts comics."""
2492
    name = 'pleasant'
2493
    long_name = 'Pleasant Thoughts'
2494
    url = 'http://pleasant-thoughts.com'
2495
    get_first_comic_link = get_a_navi_navifirst
2496
    get_navi_link = get_link_rel_next
2497
2498
    @classmethod
2499
    def get_comic_info(cls, soup, link):
2500
        """Get information about a particular comics."""
2501
        post = soup.find('div', class_='post-content')
2502
        title = post.find('h2', class_='post-title').string
2503
        imgs = post.find("div", class_="entry").find_all("img")
2504
        return {
2505
            'title': title,
2506
            'img': [i['src'] for i in imgs],
2507
        }
2508
2509
2510 View Code Duplication
class MisterAndMe(GenericNavigableComic):
2511
    """Class to retrieve Mister & Me Comics."""
2512
    # Also on http://www.gocomics.com/mister-and-me
2513
    # Also on https://tapastic.com/series/Mister-and-Me
2514
    name = 'mister'
2515
    long_name = 'Mister & Me'
2516
    url = 'http://www.mister-and-me.com'
2517
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2518
    get_navi_link = get_link_rel_next
2519
2520
    @classmethod
2521
    def get_comic_info(cls, soup, link):
2522
        """Get information about a particular comics."""
2523
        title = soup.find('h2', class_='post-title').string
2524
        author = soup.find("span", class_="post-author").find("a").string
2525
        date_str = soup.find("span", class_="post-date").string
2526
        day = string_to_date(date_str, "%B %d, %Y")
2527
        imgs = soup.find("div", id="comic").find_all("img")
2528
        assert all(i['alt'] == i['title'] for i in imgs)
2529
        assert len(imgs) <= 1
2530
        alt = imgs[0]['alt'] if imgs else ""
2531
        return {
2532
            'img': [i['src'] for i in imgs],
2533
            'title': title,
2534
            'alt': alt,
2535
            'author': author,
2536
            'day': day.day,
2537
            'month': day.month,
2538
            'year': day.year
2539
        }
2540
2541
2542 View Code Duplication
class LastPlaceComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2543
    """Class to retrieve Last Place Comics."""
2544
    name = 'lastplace'
2545
    long_name = 'Last Place Comics'
2546
    url = "http://lastplacecomics.com"
2547
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2548
    get_navi_link = get_link_rel_next
2549
2550
    @classmethod
2551
    def get_comic_info(cls, soup, link):
2552
        """Get information about a particular comics."""
2553
        title = soup.find('h2', class_='post-title').string
2554
        author = soup.find("span", class_="post-author").find("a").string
2555
        date_str = soup.find("span", class_="post-date").string
2556
        day = string_to_date(date_str, "%B %d, %Y")
2557
        imgs = soup.find("div", id="comic").find_all("img")
2558
        assert all(i['alt'] == i['title'] for i in imgs)
2559
        assert len(imgs) <= 1
2560
        alt = imgs[0]['alt'] if imgs else ""
2561
        return {
2562
            'img': [i['src'] for i in imgs],
2563
            'title': title,
2564
            'alt': alt,
2565
            'author': author,
2566
            'day': day.day,
2567
            'month': day.month,
2568
            'year': day.year
2569
        }
2570
2571
2572 View Code Duplication
class TalesOfAbsurdity(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2573
    """Class to retrieve Tales Of Absurdity comics."""
2574
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
2575
    # Also on http://talesofabsurdity.tumblr.com
2576
    name = 'absurdity'
2577
    long_name = 'Tales of Absurdity'
2578
    url = 'http://talesofabsurdity.com'
2579
    _categories = ('ABSURDITY', )
2580
    get_first_comic_link = get_a_navi_navifirst
2581
    get_navi_link = get_a_navi_comicnavnext_navinext
2582
2583
    @classmethod
2584
    def get_comic_info(cls, soup, link):
2585
        """Get information about a particular comics."""
2586
        title = soup.find('h2', class_='post-title').string
2587
        author = soup.find("span", class_="post-author").find("a").string
2588
        date_str = soup.find("span", class_="post-date").string
2589
        day = string_to_date(date_str, "%B %d, %Y")
2590
        imgs = soup.find("div", id="comic").find_all("img")
2591
        assert all(i['alt'] == i['title'] for i in imgs)
2592
        alt = imgs[0]['alt'] if imgs else ""
2593
        return {
2594
            'img': [i['src'] for i in imgs],
2595
            'title': title,
2596
            'alt': alt,
2597
            'author': author,
2598
            'day': day.day,
2599
            'month': day.month,
2600
            'year': day.year
2601
        }
2602
2603
2604 View Code Duplication
class EndlessOrigami(GenericEmptyComic, GenericNavigableComic):
2605
    """Class to retrieve Endless Origami Comics."""
2606
    name = "origami"
2607
    long_name = "Endless Origami"
2608
    url = "http://endlessorigami.com"
2609
    get_first_comic_link = get_a_navi_navifirst
2610
    get_navi_link = get_link_rel_next
2611
2612
    @classmethod
2613
    def get_comic_info(cls, soup, link):
2614
        """Get information about a particular comics."""
2615
        title = soup.find('h2', class_='post-title').string
2616
        author = soup.find("span", class_="post-author").find("a").string
2617
        date_str = soup.find("span", class_="post-date").string
2618
        day = string_to_date(date_str, "%B %d, %Y")
2619
        imgs = soup.find("div", id="comic").find_all("img")
2620
        assert all(i['alt'] == i['title'] for i in imgs)
2621
        alt = imgs[0]['alt'] if imgs else ""
2622
        return {
2623
            'img': [i['src'] for i in imgs],
2624
            'title': title,
2625
            'alt': alt,
2626
            'author': author,
2627
            'day': day.day,
2628
            'month': day.month,
2629
            'year': day.year
2630
        }
2631
2632
2633
class PlanC(GenericNavigableComic):
2634
    """Class to retrieve Plan C comics."""
2635
    name = 'planc'
2636
    long_name = 'Plan C'
2637
    url = 'http://www.plancomic.com'
2638
    get_first_comic_link = get_a_navi_navifirst
2639
    get_navi_link = get_a_navi_comicnavnext_navinext
2640
2641
    @classmethod
2642
    def get_comic_info(cls, soup, link):
2643
        """Get information about a particular comics."""
2644
        title = soup.find('h2', class_='post-title').string
2645
        date_str = soup.find("span", class_="post-date").string
2646
        day = string_to_date(date_str, "%B %d, %Y")
2647
        imgs = soup.find('div', id='comic').find_all('img')
2648
        return {
2649
            'title': title,
2650
            'img': [i['src'] for i in imgs],
2651
            'month': day.month,
2652
            'year': day.year,
2653
            'day': day.day,
2654
        }
2655
2656
2657
class BuniComic(GenericNavigableComic):
2658
    """Class to retrieve Buni Comics."""
2659
    name = 'buni'
2660
    long_name = 'BuniComics'
2661
    url = 'http://www.bunicomic.com'
2662
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2663
    get_navi_link = get_link_rel_next
2664
2665
    @classmethod
2666
    def get_comic_info(cls, soup, link):
2667
        """Get information about a particular comics."""
2668
        imgs = soup.find('div', id='comic').find_all('img')
2669
        assert all(i['alt'] == i['title'] for i in imgs)
2670
        assert len(imgs) == 1
2671
        return {
2672
            'img': [i['src'] for i in imgs],
2673
            'title': imgs[0]['title'],
2674
        }
2675
2676
2677
class GenericCommitStrip(GenericNavigableComic):
2678
    """Generic class to retrieve Commit Strips in different languages."""
2679
    get_navi_link = get_a_rel_next
2680
    get_first_comic_link = simulate_first_link
2681
    first_url = NotImplemented
2682
2683
    @classmethod
2684
    def get_comic_info(cls, soup, link):
2685
        """Get information about a particular comics."""
2686
        desc = soup.find('meta', property='og:description')['content']
2687
        title = soup.find('meta', property='og:title')['content']
2688
        imgs = soup.find('div', class_='entry-content').find_all('img')
2689
        title2 = ' '.join(i.get('title', '') for i in imgs)
2690
        return {
2691
            'title': title,
2692
            'title2': title2,
2693
            'description': desc,
2694
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2695
        }
2696
2697
2698
class CommitStripFr(GenericCommitStrip):
2699
    """Class to retrieve Commit Strips in French."""
2700
    name = 'commit_fr'
2701
    long_name = 'Commit Strip (Fr)'
2702
    url = 'http://www.commitstrip.com/fr'
2703
    _categories = ('FRANCAIS', )
2704
    first_url = 'http://www.commitstrip.com/fr/2012/02/22/interview/'
2705
2706
2707
class CommitStripEn(GenericCommitStrip):
2708
    """Class to retrieve Commit Strips in English."""
2709
    name = 'commit_en'
2710
    long_name = 'Commit Strip (En)'
2711
    url = 'http://www.commitstrip.com/en'
2712
    first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'
2713
2714
2715 View Code Duplication
class GenericBoumerie(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2716
    """Generic class to retrieve Boumeries comics in different languages."""
2717
    get_first_comic_link = get_a_navi_navifirst
2718
    get_navi_link = get_link_rel_next
2719
    date_format = NotImplemented
2720
    lang = NotImplemented
2721
2722
    @classmethod
2723
    def get_comic_info(cls, soup, link):
2724
        """Get information about a particular comics."""
2725
        title = soup.find('h2', class_='post-title').string
2726
        short_url = soup.find('link', rel='shortlink')['href']
2727
        author = soup.find("span", class_="post-author").find("a").string
2728
        date_str = soup.find('span', class_='post-date').string
2729
        day = string_to_date(date_str, cls.date_format, cls.lang)
2730
        imgs = soup.find('div', id='comic').find_all('img')
2731
        assert all(i['alt'] == i['title'] for i in imgs)
2732
        return {
2733
            'short_url': short_url,
2734
            'img': [i['src'] for i in imgs],
2735
            'title': title,
2736
            'author': author,
2737
            'month': day.month,
2738
            'year': day.year,
2739
            'day': day.day,
2740 View Code Duplication
        }
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2741
2742
2743
class BoumerieEn(GenericBoumerie):
2744
    """Class to retrieve Boumeries comics in English."""
2745
    name = 'boumeries_en'
2746
    long_name = 'Boumeries (En)'
2747
    url = 'http://comics.boumerie.com'
2748
    date_format = "%B %d, %Y"
2749
    lang = 'en_GB.UTF-8'
2750
2751
2752
class BoumerieFr(GenericBoumerie):
2753
    """Class to retrieve Boumeries comics in French."""
2754
    name = 'boumeries_fr'
2755
    long_name = 'Boumeries (Fr)'
2756
    url = 'http://bd.boumerie.com'
2757
    _categories = ('FRANCAIS', )
2758
    date_format = "%A, %d %B %Y"
2759
    lang = "fr_FR.utf8"
2760
2761
2762
class UnearthedComics(GenericNavigableComic):
2763
    """Class to retrieve Unearthed comics."""
2764
    # Also on http://tapastic.com/series/UnearthedComics
2765
    # Also on http://unearthedcomics.tumblr.com
2766
    name = 'unearthed'
2767
    long_name = 'Unearthed Comics'
2768
    url = 'http://unearthedcomics.com'
2769
    _categories = ('UNEARTHED', )
2770
    get_navi_link = get_link_rel_next
2771
    get_first_comic_link = simulate_first_link
2772
    first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'
2773
2774
    @classmethod
2775
    def get_comic_info(cls, soup, link):
2776
        """Get information about a particular comics."""
2777
        short_url = soup.find('link', rel='shortlink')['href']
2778
        title_elt = soup.find('h1') or soup.find('h2')
2779
        title = title_elt.string if title_elt else ""
2780
        desc = soup.find('meta', property='og:description')
2781
        date_str = soup.find('time', class_='published updated hidden')['datetime']
2782
        day = string_to_date(date_str, "%Y-%m-%d")
2783
        post = soup.find('div', class_="entry content entry-content type-portfolio")
2784
        imgs = post.find_all('img')
2785
        return {
2786
            'title': title,
2787
            'description': desc,
2788
            'url2': short_url,
2789
            'img': [i['src'] for i in imgs],
2790
            'month': day.month,
2791
            'year': day.year,
2792
            'day': day.day,
2793
        }
2794
2795
2796 View Code Duplication
class Optipess(GenericNavigableComic):
2797
    """Class to retrieve Optipess comics."""
2798
    name = 'optipess'
2799
    long_name = 'Optipess'
2800
    url = 'http://www.optipess.com'
2801
    get_first_comic_link = get_a_navi_navifirst
2802
    get_navi_link = get_link_rel_next
2803
2804
    @classmethod
2805
    def get_comic_info(cls, soup, link):
2806
        """Get information about a particular comics."""
2807
        title = soup.find('h2', class_='post-title').string
2808
        author = soup.find("span", class_="post-author").find("a").string
2809
        comic = soup.find('div', id='comic')
2810
        imgs = comic.find_all('img') if comic else []
2811
        alt = imgs[0]['title'] if imgs else ""
2812
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2813
        date_str = soup.find('span', class_='post-date').string
2814
        day = string_to_date(date_str, "%B %d, %Y")
2815
        return {
2816
            'title': title,
2817
            'alt': alt,
2818
            'author': author,
2819
            'img': [i['src'] for i in imgs],
2820
            'month': day.month,
2821
            'year': day.year,
2822
            'day': day.day,
2823
        }
2824
2825
2826
class PainTrainComic(GenericNavigableComic):
2827
    """Class to retrieve Pain Train Comics."""
2828
    name = 'paintrain'
2829
    long_name = 'Pain Train Comics'
2830
    url = 'http://paintraincomic.com'
2831
    get_first_comic_link = get_a_navi_navifirst
2832
    get_navi_link = get_link_rel_next
2833
2834
    @classmethod
2835
    def get_comic_info(cls, soup, link):
2836
        """Get information about a particular comics."""
2837
        title = soup.find('h2', class_='post-title').string
2838
        short_url = soup.find('link', rel='shortlink')['href']
2839
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2840
        num = int(short_url_re.match(short_url).groups()[0])
2841
        imgs = soup.find('div', id='comic').find_all('img')
2842
        alt = imgs[0]['title']
2843
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2844
        date_str = soup.find('span', class_='post-date').string
2845
        day = string_to_date(date_str, "%d/%m/%Y")
2846
        return {
2847
            'short_url': short_url,
2848
            'num': num,
2849
            'img': [i['src'] for i in imgs],
2850
            'month': day.month,
2851
            'year': day.year,
2852
            'day': day.day,
2853
            'alt': alt,
2854
            'title': title,
2855
        }
2856
2857
2858
class MoonBeard(GenericNavigableComic):
2859
    """Class to retrieve MoonBeard comics."""
2860
    # Also on http://blog.squiresjam.es/moonbeard
2861
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
2862
    name = 'moonbeard'
2863
    long_name = 'Moon Beard'
2864
    url = 'http://moonbeard.com'
2865
    get_first_comic_link = get_a_navi_navifirst
2866
    get_navi_link = get_a_navi_navinext
2867
2868
    @classmethod
2869 View Code Duplication
    def get_comic_info(cls, soup, link):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2870
        """Get information about a particular comics."""
2871
        title = soup.find('h2', class_='post-title').string
2872
        short_url = soup.find('link', rel='shortlink')['href']
2873
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2874
        num = int(short_url_re.match(short_url).groups()[0])
2875
        imgs = soup.find('div', id='comic').find_all('img')
2876
        alt = imgs[0]['title']
2877
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2878
        date_str = soup.find('span', class_='post-date').string
2879
        day = string_to_date(date_str, "%B %d, %Y")
2880
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2881
        author = soup.find('span', class_='post-author').string
2882
        return {
2883
            'short_url': short_url,
2884
            'num': num,
2885
            'img': [i['src'] for i in imgs],
2886
            'month': day.month,
2887
            'year': day.year,
2888
            'day': day.day,
2889
            'title': title,
2890
            'tags': tags,
2891
            'alt': alt,
2892
            'author': author,
2893
        }
2894
2895
2896
class AHamADay(GenericNavigableComic):
2897
    """Class to retrieve class A Ham A Day comics."""
2898
    name = 'ham'
2899
    long_name = 'A Ham A Day'
2900
    url = 'http://www.ahammaday.com'
2901
    get_url_from_link = join_cls_url_to_href
2902
    get_first_comic_link = simulate_first_link
2903
    first_url = 'http://www.ahammaday.com/today/3/6/french'
2904
2905
    @classmethod
2906
    def get_navi_link(cls, last_soup, next_):
2907
        """Get link to next or previous comic."""
2908
        # prev is next / next is prev
2909
        return last_soup.find('li', class_='previous' if next_ else 'next').find('a')
2910
2911
    @classmethod
2912
    def get_comic_info(cls, soup, link):
2913
        """Get information about a particular comics."""
2914
        date_str = soup.find('time', class_='published')['datetime']
2915
        day = string_to_date(date_str, "%Y-%m-%d")
2916
        author = soup.find('span', class_='blog-author').find('a').string
2917
        title = soup.find('meta', property='og:title')['content']
2918
        imgs = soup.find_all('meta', itemprop='image')
2919
        return {
2920
            'img': [i['content'] for i in imgs],
2921
            'title': title,
2922
            'author': author,
2923
            'day': day.day,
2924
            'month': day.month,
2925
            'year': day.year,
2926
        }
2927
2928
2929
class LittleLifeLines(GenericNavigableComic):
2930
    """Class to retrieve Little Life Lines comics."""
2931
    # Also on https://little-life-lines.tumblr.com
2932
    name = 'life'
2933
    long_name = 'Little Life Lines'
2934
    url = 'http://www.littlelifelines.com'
2935
    get_url_from_link = join_cls_url_to_href
2936
    get_first_comic_link = simulate_first_link
2937
    first_url = 'http://www.littlelifelines.com/comics/well-done'
2938
2939
    @classmethod
2940
    def get_navi_link(cls, last_soup, next_):
2941
        """Get link to next or previous comic."""
2942
        # prev is next / next is prev
2943
        li = last_soup.find('li', class_='prev' if next_ else 'next')
2944
        return li.find('a') if li else None
2945
2946
    @classmethod
2947
    def get_comic_info(cls, soup, link):
2948
        """Get information about a particular comics."""
2949
        title = soup.find('meta', property='og:title')['content']
2950
        desc = soup.find('meta', property='og:description')['content']
2951
        date_str = soup.find('time', class_='published')['datetime']
2952
        day = string_to_date(date_str, "%Y-%m-%d")
2953
        author = soup.find('a', rel='author').string
2954
        div_content = soup.find('div', class_="body entry-content")
2955
        imgs = div_content.find_all('img')
2956
        imgs = [i for i in imgs if i.get('src') is not None]
2957
        alt = imgs[0]['alt']
2958
        return {
2959
            'title': title,
2960
            'alt': alt,
2961
            'description': desc,
2962
            'author': author,
2963
            'day': day.day,
2964
            'month': day.month,
2965
            'year': day.year,
2966
            'img': [i['src'] for i in imgs],
2967
        }
2968
2969
2970
class GenericWordPressInkblot(GenericNavigableComic):
2971
    """Generic class to retrieve comics using WordPress with Inkblot."""
2972
    get_navi_link = get_link_rel_next
2973
2974
    @classmethod
2975
    def get_first_comic_link(cls):
2976
        """Get link to first comics."""
2977
        return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')
2978
2979
    @classmethod
2980
    def get_comic_info(cls, soup, link):
2981
        """Get information about a particular comics."""
2982
        title = soup.find('meta', property='og:title')['content']
2983
        imgs = soup.find('div', class_='webcomic-image').find_all('img')
2984
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2985
        day = string_to_date(date_str, "%Y-%m-%d")
2986
        return {
2987
            'title': title,
2988
            'day': day.day,
2989
            'month': day.month,
2990
            'year': day.year,
2991
            'img': [i['src'] for i in imgs],
2992
        }
2993
2994
2995
class EverythingsStupid(GenericWordPressInkblot):
2996
    """Class to retrieve Everything's stupid Comics."""
2997
    # Also on http://tapastic.com/series/EverythingsStupid
2998
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
2999
    # Also on http://everythingsstupidcomics.tumblr.com
3000
    name = 'stupid'
3001
    long_name = "Everything's Stupid"
3002
    url = 'http://everythingsstupid.net'
3003
3004
3005
class TheIsmComics(GenericWordPressInkblot):
3006
    """Class to retrieve The Ism Comics."""
3007
    # Also on https://tapastic.com/series/TheIsm (?)
3008
    name = 'theism'
3009
    long_name = "The Ism"
3010
    url = 'http://www.theism-comics.com'
3011
3012
3013
class WoodenPlankStudios(GenericWordPressInkblot):
3014
    """Class to retrieve Wooden Plank Studios comics."""
3015
    name = 'woodenplank'
3016
    long_name = 'Wooden Plank Studios'
3017
    url = 'http://woodenplankstudios.com'
3018
3019
3020
class ElectricBunnyComic(GenericNavigableComic):
3021
    """Class to retrieve Electric Bunny Comics."""
3022
    # Also on http://electricbunnycomics.tumblr.com
3023
    name = 'bunny'
3024
    long_name = 'Electric Bunny Comic'
3025
    url = 'http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell'
3026
    get_url_from_link = join_cls_url_to_href
3027
3028
    @classmethod
3029
    def get_first_comic_link(cls):
3030
        """Get link to first comics."""
3031
        return get_soup_at_url(cls.url).find('img', alt='First').parent
3032
3033
    @classmethod
3034
    def get_navi_link(cls, last_soup, next_):
3035
        """Get link to next or previous comic."""
3036
        img = last_soup.find('img', alt='Next' if next_ else 'Back')
3037
        return img.parent if img else None
3038
3039
    @classmethod
3040
    def get_comic_info(cls, soup, link):
3041
        """Get information about a particular comics."""
3042
        title = soup.find('meta', property='og:title')['content']
3043
        imgs = soup.find_all('meta', property='og:image')
3044
        return {
3045
            'title': title,
3046
            'img': [i['content'] for i in imgs],
3047
        }
3048
3049
3050
class SheldonComics(GenericNavigableComic):
3051
    """Class to retrieve Sheldon comics."""
3052
    # Also on http://www.gocomics.com/sheldon
3053
    name = 'sheldon'
3054
    long_name = 'Sheldon Comics'
3055
    url = 'http://www.sheldoncomics.com'
3056
3057
    @classmethod
3058
    def get_first_comic_link(cls):
3059
        """Get link to first comics."""
3060
        return get_soup_at_url(cls.url).find("a", id="nav-first")
3061
3062
    @classmethod
3063
    def get_navi_link(cls, last_soup, next_):
3064
        """Get link to next or previous comic."""
3065
        for link in last_soup.find_all("a", id="nav-next" if next_ else "nav-prev"):
3066
            if link['href'] != 'http://www.sheldoncomics.com':
3067
                return link
3068
        return None
3069
3070
    @classmethod
3071
    def get_comic_info(cls, soup, link):
3072
        """Get information about a particular comics."""
3073
        imgs = soup.find("div", id="comic-foot").find_all("img")
3074
        assert all(i['alt'] == i['title'] for i in imgs)
3075
        assert len(imgs) == 1
3076
        title = imgs[0]['title']
3077
        return {
3078
            'title': title,
3079
            'img': [i['src'] for i in imgs],
3080
        }
3081
3082
3083
class Ubertool(GenericNavigableComic):
3084
    """Class to retrieve Ubertool comics."""
3085
    # Also on http://ubertool.tumblr.com
3086
    # Also on https://tapastic.com/series/ubertool
3087
    name = 'ubertool'
3088
    long_name = 'Ubertool'
3089
    url = 'http://ubertoolcomic.com'
3090
    _categories = ('UBERTOOL', )
3091
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
3092
    get_navi_link = get_a_comicnavbase_comicnavnext
3093
3094
    @classmethod
3095
    def get_comic_info(cls, soup, link):
3096
        """Get information about a particular comics."""
3097
        title = soup.find('h2', class_='post-title').string
3098
        date_str = soup.find('span', class_='post-date').string
3099
        day = string_to_date(date_str, "%B %d, %Y")
3100
        imgs = soup.find('div', id='comic').find_all('img')
3101
        return {
3102
            'img': [i['src'] for i in imgs],
3103
            'title': title,
3104
            'month': day.month,
3105
            'year': day.year,
3106
            'day': day.day,
3107
        }
3108
3109
3110
class EarthExplodes(GenericNavigableComic):
3111
    """Class to retrieve The Earth Explodes comics."""
3112
    name = 'earthexplodes'
3113
    long_name = 'The Earth Explodes'
3114
    url = 'http://www.earthexplodes.com'
3115
    get_url_from_link = join_cls_url_to_href
3116
    get_first_comic_link = simulate_first_link
3117
    first_url = 'http://www.earthexplodes.com/comics/000/'
3118
3119
    @classmethod
3120
    def get_navi_link(cls, last_soup, next_):
3121
        """Get link to next or previous comic."""
3122
        return last_soup.find('a', id='next' if next_ else 'prev')
3123
3124
    @classmethod
3125
    def get_comic_info(cls, soup, link):
3126
        """Get information about a particular comics."""
3127
        title = soup.find('title').string
3128
        imgs = soup.find('div', id='image').find_all('img')
3129
        alt = imgs[0].get('title', '')
3130
        return {
3131
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3132
            'title': title,
3133
            'alt': alt,
3134
        }
3135
3136
3137
class CubeDrone(GenericNavigableComic):
3138
    """Class to retrieve Cube Drone comics."""
3139
    name = 'cubedrone'
3140
    long_name = 'Cube Drone'
3141
    url = 'http://cube-drone.com/comics'
3142
    get_url_from_link = join_cls_url_to_href
3143
3144
    @classmethod
3145
    def get_first_comic_link(cls):
3146
        """Get link to first comics."""
3147
        return get_soup_at_url(cls.url).find('span', class_='glyphicon glyphicon-backward').parent
3148
3149
    @classmethod
3150
    def get_navi_link(cls, last_soup, next_):
3151
        """Get link to next or previous comic."""
3152
        class_ = 'glyphicon glyphicon-chevron-' + ('right' if next_ else 'left')
3153
        return last_soup.find('span', class_=class_).parent
3154
3155
    @classmethod
3156
    def get_comic_info(cls, soup, link):
3157
        """Get information about a particular comics."""
3158
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
3159
        url2 = soup.find('meta', attrs={'name': 'twitter:url'})['content']
3160
        # date_str = soup.find('h2', class_='comic_title').find('small').string
3161
        # day = string_to_date(date_str, "%B %d, %Y, %I:%M %p")
3162
        imgs = soup.find_all('img', class_='comic img-responsive')
3163
        title2 = imgs[0]['title']
3164
        alt = imgs[0]['alt']
3165
        return {
3166
            'url2': url2,
3167
            'title': title,
3168
            'title2': title2,
3169
            'alt': alt,
3170
            'img': [i['src'] for i in imgs],
3171
        }
3172
3173
3174
class MakeItStoopid(GenericNavigableComic):
3175
    """Class to retrieve Make It Stoopid Comics."""
3176
    name = 'stoopid'
3177
    long_name = 'Make it stoopid'
3178
    url = 'http://makeitstoopid.com/comic.php'
3179
3180
    @classmethod
3181
    def get_nav(cls, soup):
3182
        """Get the navigation elements from soup object."""
3183
        cnav = soup.find_all(class_='cnav')
3184
        nav1, nav2 = cnav[:5], cnav[5:]
3185
        assert nav1 == nav2
3186
        # begin, prev, archive, next_, end = nav1
3187
        return [None if i.get('href') is None else i for i in nav1]
3188
3189
    @classmethod
3190
    def get_first_comic_link(cls):
3191
        """Get link to first comics."""
3192
        return cls.get_nav(get_soup_at_url(cls.url))[0]
3193
3194 View Code Duplication
    @classmethod
3195
    def get_navi_link(cls, last_soup, next_):
3196
        """Get link to next or previous comic."""
3197
        return cls.get_nav(last_soup)[3 if next_ else 1]
3198
3199
    @classmethod
3200
    def get_comic_info(cls, soup, link):
3201
        """Get information about a particular comics."""
3202
        title = link['title']
3203
        imgs = soup.find_all('img', id='comicimg')
3204
        return {
3205
            'title': title,
3206
            'img': [i['src'] for i in imgs],
3207
        }
3208
3209
3210
class MarketoonistComics(GenericNavigableComic):
3211
    """Class to retrieve Marketoonist Comics."""
3212
    name = 'marketoonist'
3213
    long_name = 'Marketoonist'
3214
    url = 'https://marketoonist.com/cartoons'
3215
    get_first_comic_link = simulate_first_link
3216
    get_navi_link = get_link_rel_next
3217
    first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'
3218
3219
    @classmethod
3220
    def get_comic_info(cls, soup, link):
3221
        """Get information about a particular comics."""
3222
        imgs = soup.find_all('meta', property='og:image')
3223
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3224
        day = string_to_date(date_str, "%Y-%m-%d")
3225
        title = soup.find('meta', property='og:title')['content']
3226
        return {
3227
            'img': [i['content'] for i in imgs],
3228
            'day': day.day,
3229
            'month': day.month,
3230
            'year': day.year,
3231
            'title': title,
3232
        }
3233
3234
3235
class ConsoliaComics(GenericNavigableComic):
3236
    """Class to retrieve Consolia comics."""
3237
    name = 'consolia'
3238
    long_name = 'consolia'
3239
    url = 'https://consolia-comic.com'
3240
    get_url_from_link = join_cls_url_to_href
3241
3242
    @classmethod
3243
    def get_first_comic_link(cls):
3244
        """Get link to first comics."""
3245
        return get_soup_at_url(cls.url).find('a', class_='first')
3246
3247
    @classmethod
3248
    def get_navi_link(cls, last_soup, next_):
3249
        """Get link to next or previous comic."""
3250
        return last_soup.find('a', class_='next' if next_ else 'prev')
3251
3252
    @classmethod
3253
    def get_comic_info(cls, soup, link):
3254 View Code Duplication
        """Get information about a particular comics."""
3255
        title = soup.find('meta', property='og:title')['content']
3256
        date_str = soup.find('time')["datetime"]
3257
        day = string_to_date(date_str, "%Y-%m-%d")
3258
        imgs = soup.find_all('meta', property='og:image')
3259
        return {
3260
            'title': title,
3261
            'img': [i['content'] for i in imgs],
3262
            'day': day.day,
3263
            'month': day.month,
3264
            'year': day.year,
3265
        }
3266
3267
3268
class TuMourrasMoinsBete(GenericNavigableComic):
3269
    """Class to retrieve Tu Mourras Moins Bete comics."""
3270
    name = 'mourrasmoinsbete'
3271
    long_name = 'Tu Mourras Moins Bete'
3272
    url = 'http://tumourrasmoinsbete.blogspot.fr'
3273
    _categories = ('FRANCAIS', )
3274
    get_first_comic_link = simulate_first_link
3275
    first_url = 'http://tumourrasmoinsbete.blogspot.fr/2008/06/essai.html'
3276
3277
    @classmethod
3278
    def get_navi_link(cls, last_soup, next_):
3279
        """Get link to next or previous comic."""
3280
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3281
3282
    @classmethod
3283
    def get_comic_info(cls, soup, link):
3284
        """Get information about a particular comics."""
3285
        title = soup.find('title').string
3286
        imgs = soup.find('div', itemprop='description articleBody').find_all('img')
3287
        author = soup.find('span', itemprop='author').string
3288
        return {
3289
            'img': [i['src'] for i in imgs],
3290
            'author': author,
3291
            'title': title,
3292
        }
3293
3294
3295
class GeekAndPoke(GenericNavigableComic):
3296
    """Class to retrieve Geek And Poke comics."""
3297
    name = 'geek'
3298
    long_name = 'Geek And Poke'
3299
    url = 'http://geek-and-poke.com'
3300
    get_url_from_link = join_cls_url_to_href
3301
    get_first_comic_link = simulate_first_link
3302
    first_url = 'http://geek-and-poke.com/geekandpoke/2006/8/27/a-new-place-for-a-not-so-old-blog.html'
3303
3304
    @classmethod
3305
    def get_navi_link(cls, last_soup, next_):
3306
        """Get link to next or previous comic."""
3307
        return last_soup.find('a', class_='prev-item' if next_ else 'next-item')
3308
3309
    @classmethod
3310
    def get_comic_info(cls, soup, link):
3311
        """Get information about a particular comics."""
3312
        title = soup.find('meta', property='og:title')['content']
3313
        desc = soup.find('meta', property='og:description')['content']
3314
        date_str = soup.find('time', class_='published')['datetime']
3315
        day = string_to_date(date_str, "%Y-%m-%d")
3316
        author = soup.find('a', rel='author').string
3317
        div_content = (soup.find('div', class_="body entry-content") or
3318
                       soup.find('div', class_="special-content"))
3319
        imgs = div_content.find_all('img')
3320
        imgs = [i for i in imgs if i.get('src') is not None]
3321 View Code Duplication
        assert all('title' not in i or i['alt'] == i['title'] for i in imgs)
3322
        alt = imgs[0].get('alt', "") if imgs else []
3323
        return {
3324
            'title': title,
3325
            'alt': alt,
3326
            'description': desc,
3327
            'author': author,
3328
            'day': day.day,
3329
            'month': day.month,
3330
            'year': day.year,
3331
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3332
        }
3333
3334
3335
class GloryOwlComix(GenericNavigableComic):
3336
    """Class to retrieve Glory Owl comics."""
3337
    name = 'gloryowl'
3338
    long_name = 'Glory Owl'
3339
    url = 'http://gloryowlcomix.blogspot.fr'
3340
    _categories = ('NSFW', 'FRANCAIS')
3341
    get_first_comic_link = simulate_first_link
3342
    first_url = 'http://gloryowlcomix.blogspot.fr/2013/02/1_7.html'
3343
3344
    @classmethod
3345
    def get_navi_link(cls, last_soup, next_):
3346
        """Get link to next or previous comic."""
3347
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3348
3349
    @classmethod
3350
    def get_comic_info(cls, soup, link):
3351
        """Get information about a particular comics."""
3352
        title = soup.find('title').string
3353
        imgs = soup.find_all('link', rel='image_src')
3354
        author = soup.find('a', rel='author').string
3355
        return {
3356
            'img': [i['href'] for i in imgs],
3357
            'author': author,
3358
            'title': title,
3359
        }
3360
3361
3362
class GenericTumblrV1(GenericComic):
3363
    """Generic class to retrieve comics from Tumblr using the V1 API."""
3364
    _categories = ('TUMBLR', )
3365
3366
    @classmethod
3367
    def get_next_comic(cls, last_comic):
3368
        """Generic implementation of get_next_comic for Tumblr comics."""
3369
        for p in cls.get_posts(last_comic):
3370
            comic = cls.get_comic_info(p)
3371
            if comic is not None:
3372
                yield comic
3373
3374
    @classmethod
3375
    def get_url_from_post(cls, post):
3376
        return post['url']
3377
3378
    @classmethod
3379
    def get_api_url(cls):
3380
        return urljoin_wrapper(cls.url, '/api/read/')
3381
3382
    @classmethod
3383
    def get_comic_info(cls, post):
3384
        """Get information about a particular comics."""
3385
        type_ = post['type']
3386
        if type_ != 'photo':
3387
            return None
3388
        tumblr_id = int(post['id'])
3389
        api_url = cls.get_api_url() + '?id=%d' % (tumblr_id)
3390
        day = datetime.datetime.fromtimestamp(int(post['unix-timestamp'])).date()
3391
        caption = post.find('photo-caption')
3392
        title = caption.string if caption else ""
3393
        tags = ' '.join(t.string for t in post.find_all('tag'))
3394
        # Photos may appear in 'photo' tags and/or straight in the post
3395
        photo_tags = post.find_all('photo')
3396
        if not photo_tags:
3397
            photo_tags = [post]
3398
        # Images are in multiple resolutions - taking the first one
3399
        imgs = [photo.find('photo-url') for photo in photo_tags]
3400
        return {
3401
            'url': cls.get_url_from_post(post),
3402
            'url2': post['url-with-slug'],
3403
            'day': day.day,
3404
            'month': day.month,
3405
            'year': day.year,
3406
            'title': title,
3407
            'tags': tags,
3408
            'img': [i.string for i in imgs],
3409
            'tumblr-id': tumblr_id,
3410
            'api_url': api_url,
3411
        }
3412
3413
    @classmethod
3414
    def get_posts(cls, last_comic, nb_post_per_call=10):
3415
        """Get posts using API. nb_post_per_call is max 50.
3416
3417
        Posts are retrieved from newer to older as per the tumblr v1 api
3418
        but are returned in chronological order."""
3419
        waiting_for_url = last_comic['url'] if last_comic else None
3420
        posts_acc = []
3421
        if last_comic is not None:
3422
            # Sometimes, tumblr posts are deleted. When previous post is deleted, we
3423
            # might end up spending a lot of time looking for something that
3424
            # doesn't exist. Failing early and clearly might be a better option.
3425
            last_api_url = last_comic['api_url']
3426
            try:
3427
                get_soup_at_url(last_api_url)
3428
            except urllib.error.HTTPError:
3429
                try:
3430
                    get_soup_at_url(cls.url)
3431
                except urllib.error.HTTPError:
3432
                    print("Did not find previous post nor main url %s" % cls.url)
3433
                else:
3434
                    print("Did not find previous post %s : it might have been deleted" % last_api_url)
3435
                return reversed(posts_acc)
3436
        api_url = cls.get_api_url()
3437
        posts = get_soup_at_url(api_url).find('posts')
3438
        start, total = int(posts['start']), int(posts['total'])
3439
        assert start == 0
3440
        for starting_num in range(0, total, nb_post_per_call):
3441
            api_url2 = api_url + '?start=%d&num=%d' % (starting_num, nb_post_per_call)
3442
            posts2 = get_soup_at_url(api_url2).find('posts')
3443
            start2, total2 = int(posts2['start']), int(posts2['total'])
3444
            assert starting_num == start2, "%d != %d" % (starting_num, start2)
3445
            # This may happen and should be handled in the future
3446
            assert total == total2, "%d != %d" % (total, total2)
3447
            for p in posts2.find_all('post'):
3448
                if waiting_for_url and waiting_for_url == cls.get_url_from_post(p):
3449
                    return reversed(posts_acc)
3450
                posts_acc.append(p)
3451
        if waiting_for_url is None:
3452
            return reversed(posts_acc)
3453
        print("Did not find %s : there might be a problem" % waiting_for_url)
3454
        return []
3455
3456
3457
class SaturdayMorningBreakfastCerealTumblr(GenericTumblrV1):
3458
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
3459
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
3460
    # Also on http://www.smbc-comics.com
3461
    name = 'smbc-tumblr'
3462
    long_name = 'Saturday Morning Breakfast Cereal (from Tumblr)'
3463
    url = 'http://smbc-comics.tumblr.com'
3464
    _categories = ('SMBC', )
3465
3466
3467
class IrwinCardozo(GenericTumblrV1):
3468
    """Class to retrieve Irwin Cardozo Comics."""
3469
    name = 'irwinc'
3470
    long_name = 'Irwin Cardozo'
3471
    url = 'http://irwincardozocomics.tumblr.com'
3472
3473
3474
class AccordingToDevin(GenericTumblrV1):
3475
    """Class to retrieve According To Devin comics."""
3476
    name = 'devin'
3477
    long_name = 'According To Devin'
3478
    url = 'http://accordingtodevin.tumblr.com'
3479
3480
3481
class ItsTheTieTumblr(GenericTumblrV1):
3482
    """Class to retrieve It's the tie comics."""
3483
    # Also on http://itsthetie.com
3484
    # Also on https://tapastic.com/series/itsthetie
3485
    name = 'tie-tumblr'
3486
    long_name = "It's the tie (from Tumblr)"
3487
    url = "http://itsthetie.tumblr.com"
3488
    _categories = ('TIE', )
3489
3490
3491
class OctopunsTumblr(GenericTumblrV1):
3492
    """Class to retrieve Octopuns comics."""
3493
    # Also on http://www.octopuns.net
3494
    name = 'octopuns-tumblr'
3495
    long_name = 'Octopuns (from Tumblr)'
3496
    url = 'http://octopuns.tumblr.com'
3497
3498
3499
class PicturesInBoxesTumblr(GenericTumblrV1):
3500
    """Class to retrieve Pictures In Boxes comics."""
3501
    # Also on http://www.picturesinboxes.com
3502
    name = 'picturesinboxes-tumblr'
3503
    long_name = 'Pictures in Boxes (from Tumblr)'
3504
    url = 'http://picturesinboxescomic.tumblr.com'
3505
3506
3507
class TubeyToonsTumblr(GenericTumblrV1):
3508
    """Class to retrieve TubeyToons comics."""
3509
    # Also on http://tapastic.com/series/Tubey-Toons
3510
    # Also on http://tubeytoons.com
3511
    name = 'tubeytoons-tumblr'
3512
    long_name = 'Tubey Toons (from Tumblr)'
3513
    url = 'http://tubeytoons.tumblr.com'
3514
    _categories = ('TUNEYTOONS', )
3515
3516
3517
class UnearthedComicsTumblr(GenericTumblrV1):
3518
    """Class to retrieve Unearthed comics."""
3519
    # Also on http://tapastic.com/series/UnearthedComics
3520
    # Also on http://unearthedcomics.com
3521
    name = 'unearthed-tumblr'
3522
    long_name = 'Unearthed Comics (from Tumblr)'
3523
    url = 'http://unearthedcomics.tumblr.com'
3524
    _categories = ('UNEARTHED', )
3525
3526
3527
class PieComic(GenericTumblrV1):
3528
    """Class to retrieve Pie Comic comics."""
3529
    name = 'pie'
3530
    long_name = 'Pie Comic'
3531
    url = "http://piecomic.tumblr.com"
3532
3533
3534
class MrEthanDiamond(GenericTumblrV1):
3535
    """Class to retrieve Mr Ethan Diamond comics."""
3536
    name = 'diamond'
3537
    long_name = 'Mr Ethan Diamond'
3538
    url = 'http://mrethandiamond.tumblr.com'
3539
3540
3541
class Flocci(GenericTumblrV1):
3542
    """Class to retrieve floccinaucinihilipilification comics."""
3543
    name = 'flocci'
3544
    long_name = 'floccinaucinihilipilification'
3545
    url = "http://floccinaucinihilipilificationa.tumblr.com"
3546
3547
3548
class UpAndOut(GenericTumblrV1):
3549
    """Class to retrieve Up & Out comics."""
3550
    # Also on http://tapastic.com/series/UP-and-OUT
3551
    name = 'upandout'
3552
    long_name = 'Up And Out (from Tumblr)'
3553
    url = 'http://upandoutcomic.tumblr.com'
3554
3555
3556
class Pundemonium(GenericTumblrV1):
3557
    """Class to retrieve Pundemonium comics."""
3558
    name = 'pundemonium'
3559
    long_name = 'Pundemonium'
3560
    url = 'http://monstika.tumblr.com'
3561
3562
3563
class PoorlyDrawnLinesTumblr(GenericTumblrV1):
3564
    """Class to retrieve Poorly Drawn Lines comics."""
3565
    # Also on http://poorlydrawnlines.com
3566
    name = 'poorlydrawn-tumblr'
3567
    long_name = 'Poorly Drawn Lines (from Tumblr)'
3568
    url = 'http://pdlcomics.tumblr.com'
3569
    _categories = ('POORLYDRAWN', )
3570
3571
3572
class PearShapedComics(GenericTumblrV1):
3573
    """Class to retrieve Pear Shaped Comics."""
3574
    name = 'pearshaped'
3575
    long_name = 'Pear-Shaped Comics'
3576
    url = 'http://pearshapedcomics.com'
3577
3578
3579
class PondScumComics(GenericTumblrV1):
3580
    """Class to retrieve Pond Scum Comics."""
3581
    name = 'pond'
3582
    long_name = 'Pond Scum'
3583
    url = 'http://pondscumcomic.tumblr.com'
3584
3585
3586
class MercworksTumblr(GenericTumblrV1):
3587
    """Class to retrieve Mercworks comics."""
3588
    # Also on http://mercworks.net
3589
    name = 'mercworks-tumblr'
3590
    long_name = 'Mercworks (from Tumblr)'
3591
    url = 'http://mercworks.tumblr.com'
3592
3593
3594
class OwlTurdTumblr(GenericTumblrV1):
3595
    """Class to retrieve Owl Turd comics."""
3596
    # Also on http://tapastic.com/series/Owl-Turd-Comix
3597
    name = 'owlturd-tumblr'
3598
    long_name = 'Owl Turd (from Tumblr)'
3599
    url = 'http://owlturd.com'
3600
    _categories = ('OWLTURD', )
3601
3602
3603
class VectorBelly(GenericTumblrV1):
3604
    """Class to retrieve Vector Belly comics."""
3605
    # Also on http://vectorbelly.com
3606
    name = 'vector'
3607
    long_name = 'Vector Belly'
3608
    url = 'http://vectorbelly.tumblr.com'
3609
3610
3611
class GoneIntoRapture(GenericTumblrV1):
3612
    """Class to retrieve Gone Into Rapture comics."""
3613
    # Also on http://goneintorapture.tumblr.com
3614
    # Also on http://tapastic.com/series/Goneintorapture
3615
    name = 'rapture'
3616
    long_name = 'Gone Into Rapture'
3617
    url = 'http://www.goneintorapture.com'
3618
3619
3620
class TheOatmealTumblr(GenericTumblrV1):
3621
    """Class to retrieve The Oatmeal comics."""
3622
    # Also on http://theoatmeal.com
3623
    name = 'oatmeal-tumblr'
3624
    long_name = 'The Oatmeal (from Tumblr)'
3625
    url = 'http://oatmeal.tumblr.com'
3626
3627
3628
class HeckIfIKnowComicsTumblr(GenericTumblrV1):
3629
    """Class to retrieve Heck If I Know Comics."""
3630
    # Also on http://tapastic.com/series/Regular
3631
    name = 'heck-tumblr'
3632
    long_name = 'Heck if I Know comics (from Tumblr)'
3633
    url = 'http://heckifiknowcomics.com'
3634
3635
3636
class MyJetPack(GenericTumblrV1):
3637
    """Class to retrieve My Jet Pack comics."""
3638
    name = 'jetpack'
3639
    long_name = 'My Jet Pack'
3640
    url = 'http://myjetpack.tumblr.com'
3641
3642
3643
class CheerUpEmoKidTumblr(GenericTumblrV1):
3644
    """Class to retrieve CheerUpEmoKid comics."""
3645
    # Also on http://www.cheerupemokid.com
3646
    # Also on http://tapastic.com/series/CUEK
3647
    name = 'cuek-tumblr'
3648
    long_name = 'Cheer Up Emo Kid (from Tumblr)'
3649
    url = 'http://enzocomics.tumblr.com'
3650
3651
3652
class ForLackOfABetterComic(GenericTumblrV1):
3653
    """Class to retrieve For Lack Of A Better Comics."""
3654
    # Also on http://forlackofabettercomic.com
3655
    name = 'lack'
3656
    long_name = 'For Lack Of A Better Comic'
3657
    url = 'http://forlackofabettercomic.tumblr.com'
3658
3659
3660
class ZenPencilsTumblr(GenericTumblrV1):
3661
    """Class to retrieve ZenPencils comics."""
3662
    # Also on http://zenpencils.com
3663
    # Also on http://www.gocomics.com/zen-pencils
3664
    name = 'zenpencils-tumblr'
3665
    long_name = 'Zen Pencils (from Tumblr)'
3666
    url = 'http://zenpencils.tumblr.com'
3667
    _categories = ('ZENPENCILS', )
3668
3669
3670
class ThreeWordPhraseTumblr(GenericTumblrV1):
3671
    """Class to retrieve Three Word Phrase comics."""
3672
    # Also on http://threewordphrase.com
3673
    name = 'threeword-tumblr'
3674
    long_name = 'Three Word Phrase (from Tumblr)'
3675
    url = 'http://www.threewordphrase.tumblr.com'
3676
3677
3678
class TimeTrabbleTumblr(GenericTumblrV1):
3679
    """Class to retrieve Time Trabble comics."""
3680
    # Also on http://timetrabble.com
3681
    name = 'timetrabble-tumblr'
3682
    long_name = 'Time Trabble (from Tumblr)'
3683
    url = 'http://timetrabble.tumblr.com'
3684
3685
3686
class SafelyEndangeredTumblr(GenericTumblrV1):
3687
    """Class to retrieve Safely Endangered comics."""
3688
    # Also on http://www.safelyendangered.com
3689
    name = 'endangered-tumblr'
3690
    long_name = 'Safely Endangered (from Tumblr)'
3691
    url = 'http://tumblr.safelyendangered.com'
3692
3693
3694
class MouseBearComedyTumblr(GenericTumblrV1):
3695
    """Class to retrieve Mouse Bear Comedy comics."""
3696
    # Also on http://www.mousebearcomedy.com
3697
    name = 'mousebear-tumblr'
3698
    long_name = 'Mouse Bear Comedy (from Tumblr)'
3699
    url = 'http://mousebearcomedy.tumblr.com'
3700
3701
3702
class BouletCorpTumblr(GenericTumblrV1):
3703
    """Class to retrieve BouletCorp comics."""
3704
    # Also on http://www.bouletcorp.com
3705
    name = 'boulet-tumblr'
3706
    long_name = 'Boulet Corp (from Tumblr)'
3707
    url = 'http://bouletcorp.tumblr.com'
3708
    _categories = ('BOULET', )
3709
3710
3711
class TheAwkwardYetiTumblr(GenericTumblrV1):
3712
    """Class to retrieve The Awkward Yeti comics."""
3713
    # Also on http://www.gocomics.com/the-awkward-yeti
3714
    # Also on http://theawkwardyeti.com
3715
    # Also on https://tapastic.com/series/TheAwkwardYeti
3716
    name = 'yeti-tumblr'
3717
    long_name = 'The Awkward Yeti (from Tumblr)'
3718
    url = 'http://larstheyeti.tumblr.com'
3719
    _categories = ('YETI', )
3720
3721
3722
class NellucNhoj(GenericTumblrV1):
3723
    """Class to retrieve NellucNhoj comics."""
3724
    name = 'nhoj'
3725
    long_name = 'Nelluc Nhoj'
3726
    url = 'http://nellucnhoj.com'
3727
3728
3729
class DownTheUpwardSpiralTumblr(GenericTumblrV1):
3730
    """Class to retrieve Down The Upward Spiral comics."""
3731
    # Also on http://www.downtheupwardspiral.com
3732
    name = 'spiral-tumblr'
3733
    long_name = 'Down the Upward Spiral (from Tumblr)'
3734
    url = 'http://downtheupwardspiral.tumblr.com'
3735
3736
3737
class AsPerUsualTumblr(GenericTumblrV1):
3738
    """Class to retrieve As Per Usual comics."""
3739
    # Also on https://tapastic.com/series/AsPerUsual
3740
    name = 'usual-tumblr'
3741
    long_name = 'As Per Usual (from Tumblr)'
3742
    url = 'http://as-per-usual.tumblr.com'
3743
    categories = ('DAMILEE', )
3744
3745
3746
class HotComicsForCoolPeopleTumblr(GenericTumblrV1):
3747
    """Class to retrieve Hot Comics For Cool People."""
3748
    # Also on https://tapastic.com/series/Hot-Comics-For-Cool-People
3749
    # Also on http://hotcomics.biz (links to tumblr)
3750
    # Also on http://hcfcp.com (links to tumblr)
3751
    name = 'hotcomics-tumblr'
3752
    long_name = 'Hot Comics For Cool People (from Tumblr)'
3753
    url = 'http://hotcomicsforcoolpeople.tumblr.com'
3754
    categories = ('DAMILEE', )
3755
3756
3757
class OneOneOneOneComicTumblr(GenericTumblrV1):
3758
    """Class to retrieve 1111 Comics."""
3759
    # Also on http://www.1111comics.me
3760
    # Also on https://tapastic.com/series/1111-Comics
3761
    name = '1111-tumblr'
3762
    long_name = '1111 Comics (from Tumblr)'
3763
    url = 'http://comics1111.tumblr.com'
3764
    _categories = ('ONEONEONEONE', )
3765
3766
3767
class JhallComicsTumblr(GenericTumblrV1):
3768
    """Class to retrieve Jhall Comics."""
3769
    # Also on http://jhallcomics.com
3770
    name = 'jhall-tumblr'
3771
    long_name = 'Jhall Comics (from Tumblr)'
3772
    url = 'http://jhallcomics.tumblr.com'
3773
3774
3775
class BerkeleyMewsTumblr(GenericTumblrV1):
3776
    """Class to retrieve Berkeley Mews comics."""
3777
    # Also on http://www.gocomics.com/berkeley-mews
3778
    # Also on http://www.berkeleymews.com
3779
    name = 'berkeley-tumblr'
3780
    long_name = 'Berkeley Mews (from Tumblr)'
3781
    url = 'http://mews.tumblr.com'
3782
    _categories = ('BERKELEY', )
3783
3784
3785
class JoanCornellaTumblr(GenericTumblrV1):
3786
    """Class to retrieve Joan Cornella comics."""
3787
    # Also on http://joancornella.net
3788
    name = 'cornella-tumblr'
3789
    long_name = 'Joan Cornella (from Tumblr)'
3790
    url = 'http://cornellajoan.tumblr.com'
3791
3792
3793
class RespawnComicTumblr(GenericTumblrV1):
3794
    """Class to retrieve Respawn Comic."""
3795
    # Also on http://respawncomic.com
3796
    name = 'respawn-tumblr'
3797
    long_name = 'Respawn Comic (from Tumblr)'
3798
    url = 'http://respawncomic.tumblr.com'
3799
3800
3801
class ChrisHallbeckTumblr(GenericTumblrV1):
3802
    """Class to retrieve Chris Hallbeck comics."""
3803
    # Also on https://tapastic.com/ChrisHallbeck
3804
    # Also on http://maximumble.com
3805
    # Also on http://minimumble.com
3806
    # Also on http://thebookofbiff.com
3807
    name = 'hallbeck-tumblr'
3808
    long_name = 'Chris Hallback (from Tumblr)'
3809
    url = 'http://chrishallbeck.tumblr.com'
3810
    _categories = ('HALLBACK', )
3811
3812
3813
class ComicNuggets(GenericTumblrV1):
3814
    """Class to retrieve Comic Nuggets."""
3815
    name = 'nuggets'
3816
    long_name = 'Comic Nuggets'
3817
    url = 'http://comicnuggets.com'
3818
3819
3820
class PigeonGazetteTumblr(GenericTumblrV1):
3821
    """Class to retrieve The Pigeon Gazette comics."""
3822
    # Also on https://tapastic.com/series/The-Pigeon-Gazette
3823
    name = 'pigeon-tumblr'
3824
    long_name = 'The Pigeon Gazette (from Tumblr)'
3825
    url = 'http://thepigeongazette.tumblr.com'
3826
3827
3828
class CancerOwl(GenericTumblrV1):
3829
    """Class to retrieve Cancer Owl comics."""
3830
    # Also on http://cancerowl.com
3831
    name = 'cancerowl-tumblr'
3832
    long_name = 'Cancer Owl (from Tumblr)'
3833
    url = 'http://cancerowl.tumblr.com'
3834
3835
3836
class FowlLanguageTumblr(GenericTumblrV1):
3837
    """Class to retrieve Fowl Language comics."""
3838
    # Also on http://www.fowllanguagecomics.com
3839
    # Also on http://tapastic.com/series/Fowl-Language-Comics
3840
    # Also on http://www.gocomics.com/fowl-language
3841
    name = 'fowllanguage-tumblr'
3842
    long_name = 'Fowl Language Comics (from Tumblr)'
3843
    url = 'http://fowllanguagecomics.tumblr.com'
3844
    _categories = ('FOWLLANGUAGE', )
3845
3846
3847
class TheOdd1sOutTumblr(GenericTumblrV1):
3848
    """Class to retrieve The Odd 1s Out comics."""
3849
    # Also on http://theodd1sout.com
3850
    # Also on https://tapastic.com/series/Theodd1sout
3851
    name = 'theodd-tumblr'
3852
    long_name = 'The Odd 1s Out (from Tumblr)'
3853
    url = 'http://theodd1sout.tumblr.com'
3854
3855
3856
class TheUnderfoldTumblr(GenericTumblrV1):
3857
    """Class to retrieve The Underfold comics."""
3858
    # Also on http://theunderfold.com
3859
    name = 'underfold-tumblr'
3860
    long_name = 'The Underfold (from Tumblr)'
3861
    url = 'http://theunderfold.tumblr.com'
3862
3863
3864
class LolNeinTumblr(GenericTumblrV1):
3865
    """Class to retrieve Lol Nein comics."""
3866
    # Also on http://lolnein.com
3867
    name = 'lolnein-tumblr'
3868
    long_name = 'Lol Nein (from Tumblr)'
3869
    url = 'http://lolneincom.tumblr.com'
3870
3871
3872
class FatAwesomeComicsTumblr(GenericTumblrV1):
3873
    """Class to retrieve Fat Awesome Comics."""
3874
    # Also on http://fatawesome.com/comics
3875
    name = 'fatawesome-tumblr'
3876
    long_name = 'Fat Awesome (from Tumblr)'
3877
    url = 'http://fatawesomecomedy.tumblr.com'
3878
3879
3880
class TheWorldIsFlatTumblr(GenericTumblrV1):
3881
    """Class to retrieve The World Is Flat Comics."""
3882
    # Also on https://tapastic.com/series/The-World-is-Flat
3883
    name = 'flatworld-tumblr'
3884
    long_name = 'The World Is Flat (from Tumblr)'
3885
    url = 'http://theworldisflatcomics.tumblr.com'
3886
3887
3888
class DorrisMc(GenericTumblrV1):
3889
    """Class to retrieve Dorris Mc Comics"""
3890
    # Also on http://www.gocomics.com/dorris-mccomics
3891
    name = 'dorrismc'
3892
    long_name = 'Dorris Mc'
3893
    url = 'http://dorrismccomics.com'
3894
3895
3896
class LeleozTumblr(GenericEmptyComic, GenericTumblrV1):
3897
    """Class to retrieve Leleoz comics."""
3898
    # Also on https://tapastic.com/series/Leleoz
3899
    name = 'leleoz-tumblr'
3900
    long_name = 'Leleoz (from Tumblr)'
3901
    url = 'http://leleozcomics.tumblr.com'
3902
3903
3904
class MoonBeardTumblr(GenericTumblrV1):
3905
    """Class to retrieve MoonBeard comics."""
3906
    # Also on http://moonbeard.com
3907
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
3908
    name = 'moonbeard-tumblr'
3909
    long_name = 'Moon Beard (from Tumblr)'
3910
    url = 'http://blog.squiresjam.es/moonbeard'
3911
3912
3913
class AComik(GenericTumblrV1):
3914
    """Class to retrieve A Comik"""
3915
    name = 'comik'
3916
    long_name = 'A Comik'
3917
    url = 'http://acomik.com'
3918
3919
3920
class ClassicRandy(GenericTumblrV1):
3921
    """Class to retrieve Classic Randy comics."""
3922
    name = 'randy'
3923
    long_name = 'Classic Randy'
3924
    url = 'http://classicrandy.tumblr.com'
3925
3926
3927
class DagssonTumblr(GenericTumblrV1):
3928
    """Class to retrieve Dagsson comics."""
3929
    # Also on http://www.dagsson.com
3930
    name = 'dagsson-tumblr'
3931
    long_name = 'Dagsson Hugleikur (from Tumblr)'
3932
    url = 'http://hugleikurdagsson.tumblr.com'
3933
3934
3935
class LinsEditionsTumblr(GenericTumblrV1):
3936
    """Class to retrieve L.I.N.S. Editions comics."""
3937
    # Also on https://linsedition.com
3938
    # Now on http://warandpeas.tumblr.com
3939
    name = 'lins-tumblr'
3940
    long_name = 'L.I.N.S. Editions (from Tumblr)'
3941
    url = 'http://linscomics.tumblr.com'
3942
    _categories = ('LINS', )
3943
3944
3945
class WarAndPeasTumblr(GenericTumblrV1):
3946
    """Class to retrieve War And Peas comics."""
3947
    # Was on http://linscomics.tumblr.com
3948
    name = 'warandpeas-tumblr'
3949
    long_name = 'War And Peas (from Tumblr)'
3950
    url = 'http://warandpeas.tumblr.com'
3951
    _categories = ('WARANDPEAS', )
3952
3953
3954
class OrigamiHotDish(GenericTumblrV1):
3955
    """Class to retrieve Origami Hot Dish comics."""
3956
    name = 'origamihotdish'
3957
    long_name = 'Origami Hot Dish'
3958
    url = 'http://origamihotdish.com'
3959
3960
3961
class HitAndMissComicsTumblr(GenericTumblrV1):
3962
    """Class to retrieve Hit and Miss Comics."""
3963
    name = 'hitandmiss'
3964
    long_name = 'Hit and Miss Comics'
3965
    url = 'http://hitandmisscomics.tumblr.com'
3966
3967
3968
class HMBlanc(GenericTumblrV1):
3969
    """Class to retrieve HM Blanc comics."""
3970
    name = 'hmblanc'
3971
    long_name = 'HM Blanc'
3972
    url = 'http://hmblanc.tumblr.com'
3973
3974
3975
class TalesOfAbsurdityTumblr(GenericTumblrV1):
3976
    """Class to retrieve Tales Of Absurdity comics."""
3977
    # Also on http://talesofabsurdity.com
3978
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
3979
    name = 'absurdity-tumblr'
3980
    long_name = 'Tales of Absurdity (from Tumblr)'
3981
    url = 'http://talesofabsurdity.tumblr.com'
3982
    _categories = ('ABSURDITY', )
3983
3984
3985
class RobbieAndBobby(GenericTumblrV1):
3986
    """Class to retrieve Robbie And Bobby comics."""
3987
    # Also on http://robbieandbobby.com
3988
    name = 'robbie-tumblr'
3989
    long_name = 'Robbie And Bobby (from Tumblr)'
3990
    url = 'http://robbieandbobby.tumblr.com'
3991
3992
3993
class ElectricBunnyComicTumblr(GenericTumblrV1):
3994
    """Class to retrieve Electric Bunny Comics."""
3995
    # Also on http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell
3996
    name = 'bunny-tumblr'
3997
    long_name = 'Electric Bunny Comic (from Tumblr)'
3998
    url = 'http://electricbunnycomics.tumblr.com'
3999
4000
4001
class Hoomph(GenericTumblrV1):
4002
    """Class to retrieve Hoomph comics."""
4003
    name = 'hoomph'
4004
    long_name = 'Hoomph'
4005
    url = 'http://hoom.ph'
4006
4007
4008
class BFGFSTumblr(GenericTumblrV1):
4009
    """Class to retrieve BFGFS comics."""
4010
    # Also on https://tapastic.com/series/BFGFS
4011
    # Also on http://bfgfs.com
4012
    name = 'bfgfs-tumblr'
4013
    long_name = 'BFGFS (from Tumblr)'
4014
    url = 'http://bfgfs.tumblr.com'
4015
4016
4017
class DoodleForFood(GenericTumblrV1):
4018
    """Class to retrieve Doodle For Food comics."""
4019
    # Also on http://doodleforfood.com
4020
    name = 'doodle'
4021
    long_name = 'Doodle For Food'
4022
    url = 'http://doodleforfood.com'
4023
4024
4025
class CassandraCalinTumblr(GenericTumblrV1):
4026
    """Class to retrieve C. Cassandra comics."""
4027
    # Also on http://cassandracalin.com
4028
    # Also on https://tapastic.com/series/C-Cassandra-comics
4029
    name = 'cassandra-tumblr'
4030
    long_name = 'Cassandra Calin (from Tumblr)'
4031
    url = 'http://c-cassandra.tumblr.com'
4032
4033
4034
class DougWasTaken(GenericTumblrV1):
4035
    """Class to retrieve Doug Was Taken comics."""
4036
    name = 'doug'
4037
    long_name = 'Doug Was Taken'
4038
    url = 'http://dougwastaken.tumblr.com'
4039
4040
4041
class MandatoryRollerCoaster(GenericTumblrV1):
4042
    """Class to retrieve Mandatory Roller Coaster comics."""
4043
    name = 'rollercoaster'
4044
    long_name = 'Mandatory Roller Coaster'
4045
    url = 'http://mandatoryrollercoaster.com'
4046
4047
4048
class CEstPasEnRegardantSesPompes(GenericTumblrV1):
4049
    """Class to retrieve C'Est Pas En Regardant Ses Pompes (...)  comics."""
4050
    name = 'cperspqccltt'
4051
    long_name = 'C Est Pas En Regardant Ses Pompes (...)'
4052
    url = 'http://cperspqccltt.tumblr.com'
4053
4054
4055
class TheGrohlTroll(GenericTumblrV1):
4056
    """Class to retrieve The Grohl Troll comics."""
4057
    name = 'grohltroll'
4058
    long_name = 'The Grohl Troll'
4059
    url = 'http://thegrohltroll.com'
4060
4061
4062
class WebcomicName(GenericTumblrV1):
4063
    """Class to retrieve Webcomic Name comics."""
4064
    name = 'webcomicname'
4065
    long_name = 'Webcomic Name'
4066
    url = 'http://webcomicname.com'
4067
4068
4069
class BooksOfAdam(GenericTumblrV1):
4070
    """Class to retrieve Books of Adam comics."""
4071
    # Also on http://www.booksofadam.com
4072
    name = 'booksofadam'
4073
    long_name = 'Books of Adam'
4074
    url = 'http://booksofadam.tumblr.com'
4075
4076
4077
class HarkAVagrant(GenericTumblrV1):
4078
    """Class to retrieve Hark A Vagrant comics."""
4079
    # Also on http://www.harkavagrant.com
4080
    name = 'hark-tumblr'
4081
    long_name = 'Hark A Vagrant (from Tumblr)'
4082
    url = 'http://beatonna.tumblr.com'
4083
4084
4085
class OurSuperAdventureTumblr(GenericTumblrV1):
4086
    """Class to retrieve Our Super Adventure comics."""
4087
    # Also on https://tapastic.com/series/Our-Super-Adventure
4088
    # Also on http://www.oursuperadventure.com
4089
    # http://sarahgraley.com
4090
    name = 'superadventure-tumblr'
4091
    long_name = 'Our Super Adventure (from Tumblr)'
4092
    url = 'http://sarahssketchbook.tumblr.com'
4093
4094
4095
class JakeLikesOnions(GenericTumblrV1):
4096
    """Class to retrieve Jake Likes Onions comics."""
4097
    name = 'jake'
4098
    long_name = 'Jake Likes Onions'
4099
    url = 'http://jakelikesonions.com'
4100
4101
4102
class InYourFaceCake(GenericTumblrV1):
4103
    """Class to retrieve In Your Face Cake comics."""
4104
    name = 'inyourfacecake-tumblr'
4105
    long_name = 'In Your Face Cake (from Tumblr)'
4106
    url = 'http://in-your-face-cake.tumblr.com'
4107
4108
4109
class Robospunk(GenericTumblrV1):
4110
    """Class to retrieve Robospunk comics."""
4111
    name = 'robospunk'
4112
    long_name = 'Robospunk'
4113
    url = 'http://robospunk.com'
4114
4115
4116
class BananaTwinky(GenericTumblrV1):
4117
    """Class to retrieve Banana Twinky comics."""
4118
    name = 'banana'
4119
    long_name = 'Banana Twinky'
4120
    url = 'http://bananatwinky.tumblr.com'
4121
4122
4123
class YesterdaysPopcornTumblr(GenericTumblrV1):
4124
    """Class to retrieve Yesterday's Popcorn comics."""
4125
    # Also on http://www.yesterdayspopcorn.com
4126
    # Also on https://tapastic.com/series/Yesterdays-Popcorn
4127
    name = 'popcorn-tumblr'
4128
    long_name = 'Yesterday\'s Popcorn (from Tumblr)'
4129
    url = 'http://yesterdayspopcorn.tumblr.com'
4130
4131
4132
class TwistedDoodles(GenericTumblrV1):
4133
    """Class to retrieve Twisted Doodles comics."""
4134
    name = 'twisted'
4135
    long_name = 'Twisted Doodles'
4136
    url = 'http://www.twisteddoodles.com'
4137
4138
4139
class UbertoolTumblr(GenericTumblrV1):
4140
    """Class to retrieve Ubertool comics."""
4141
    # Also on http://ubertoolcomic.com
4142
    # Also on https://tapastic.com/series/ubertool
4143
    name = 'ubertool-tumblr'
4144
    long_name = 'Ubertool (from Tumblr)'
4145
    url = 'http://ubertool.tumblr.com'
4146
    _categories = ('UBERTOOL', )
4147
4148
4149
class LittleLifeLinesTumblr(GenericTumblrV1):
4150
    """Class to retrieve Little Life Lines comics."""
4151
    # Also on http://www.littlelifelines.com
4152
    name = 'life-tumblr'
4153
    long_name = 'Little Life Lines (from Tumblr)'
4154
    url = 'https://little-life-lines.tumblr.com'
4155
4156
4157
class TheyCanTalk(GenericTumblrV1):
4158
    """Class to retrieve They Can Talk comics."""
4159
    name = 'theycantalk'
4160
    long_name = 'They Can Talk'
4161
    url = 'http://theycantalk.com'
4162
4163
4164
class Will5NeverCome(GenericTumblrV1):
4165
    """Class to retrieve Will 5:00 Never Come comics."""
4166
    name = 'will5'
4167
    long_name = 'Will 5:00 Never Come ?'
4168
    url = 'http://will5nevercome.com'
4169
4170
4171
class Sephko(GenericTumblrV1):
4172
    """Class to retrieve Sephko Comics."""
4173
    # Also on http://www.sephko.com
4174
    name = 'sephko'
4175
    long_name = 'Sephko'
4176
    url = 'http://sephko.tumblr.com'
4177
4178
4179
class BlazersAtDawn(GenericTumblrV1):
4180
    """Class to retrieve Blazers At Dawn Comics."""
4181
    name = 'blazers'
4182
    long_name = 'Blazers At Dawn'
4183
    url = 'http://blazersatdawn.tumblr.com'
4184
4185
4186
class ArtByMoga(GenericEmptyComic, GenericTumblrV1):  # Deactivated because it downloads too many things
4187
    """Class to retrieve Art By Moga Comics."""
4188
    name = 'moga'
4189
    long_name = 'Art By Moga'
4190
    url = 'http://artbymoga.tumblr.com'
4191
4192
4193
class VerbalVomitTumblr(GenericTumblrV1):
4194
    """Class to retrieve Verbal Vomit comics."""
4195
    # Also on http://www.verbal-vomit.com
4196
    name = 'vomit-tumblr'
4197
    long_name = 'Verbal Vomit (from Tumblr)'
4198
    url = 'http://verbalvomits.tumblr.com'
4199
4200
4201
class LibraryComic(GenericTumblrV1):
4202
    """Class to retrieve LibraryComic."""
4203
    # Also on http://librarycomic.com
4204
    name = 'library-tumblr'
4205
    long_name = 'LibraryComic (from Tumblr)'
4206
    url = 'http://librarycomic.tumblr.com'
4207
4208
4209
class TizzyStitchBirdTumblr(GenericTumblrV1):
4210
    """Class to retrieve Tizzy Stitch Bird comics."""
4211
    # Also on http://tizzystitchbird.com
4212
    # Also on https://tapastic.com/series/TizzyStitchbird
4213
    # Also on http://www.webtoons.com/en/challenge/tizzy-stitchbird/list?title_no=50082
4214
    name = 'tizzy-tumblr'
4215
    long_name = 'Tizzy Stitch Bird (from Tumblr)'
4216
    url = 'http://tizzystitchbird.tumblr.com'
4217
4218
4219
class HorovitzComics(GenericListableComic):
4220
    """Generic class to handle the logic common to the different comics from Horovitz."""
4221
    url = 'http://www.horovitzcomics.com'
4222
    _categories = ('HOROVITZ', )
4223
    img_re = re.compile('.*comics/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
4224
    link_re = NotImplemented
4225
    get_url_from_archive_element = join_cls_url_to_href
4226
4227
    @classmethod
4228
    def get_comic_info(cls, soup, link):
4229
        """Get information about a particular comics."""
4230
        href = link['href']
4231
        num = int(cls.link_re.match(href).groups()[0])
4232
        title = link.string
4233
        imgs = soup.find_all('img', id='comic')
4234
        assert len(imgs) == 1
4235
        year, month, day = [int(s)
4236
                            for s in cls.img_re.match(imgs[0]['src']).groups()]
4237
        return {
4238
            'title': title,
4239
            'day': day,
4240
            'month': month,
4241
            'year': year,
4242
            'img': [i['src'] for i in imgs],
4243
            'num': num,
4244
        }
4245
4246
    @classmethod
4247
    def get_archive_elements(cls):
4248
        archive_url = 'http://www.horovitzcomics.com/comics/archive/'
4249
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.link_re))
4250
4251
4252
class HorovitzNew(HorovitzComics):
4253
    """Class to retrieve Horovitz new comics."""
4254
    name = 'horovitznew'
4255
    long_name = 'Horovitz New'
4256
    link_re = re.compile('^/comics/new/([0-9]+)$')
4257
4258
4259
class HorovitzClassic(HorovitzComics):
4260
    """Class to retrieve Horovitz classic comics."""
4261
    name = 'horovitzclassic'
4262
    long_name = 'Horovitz Classic'
4263
    link_re = re.compile('^/comics/classic/([0-9]+)$')
4264
4265
4266
class GenericGoComic(GenericNavigableComic):
4267
    """Generic class to handle the logic common to comics from gocomics.com."""
4268
    _categories = ('GOCOMIC', )
4269
4270
    @classmethod
4271
    def get_first_comic_link(cls):
4272
        """Get link to first comics."""
4273
        return get_soup_at_url(cls.url).find('a', class_='fa btn btn-outline-default btn-circle fa-backward sm ')
4274
4275
    @classmethod
4276
    def get_navi_link(cls, last_soup, next_):
4277
        """Get link to next or previous comic."""
4278
        PREV = 'fa btn btn-outline-default btn-circle fa-caret-left sm '
4279
        NEXT = 'fa btn btn-outline-default btn-circle fa-caret-right sm '
4280
        return last_soup.find('a', class_=NEXT if next_ else PREV)
4281
4282
    @classmethod
4283
    def get_url_from_link(cls, link):
4284
        gocomics = 'http://www.gocomics.com'
4285
        return urljoin_wrapper(gocomics, link['href'])
4286
4287
    @classmethod
4288
    def get_comic_info(cls, soup, link):
4289
        """Get information about a particular comics."""
4290
        date_str = soup.find('meta', property='article:published_time')['content']
4291
        day = string_to_date(date_str, "%Y-%m-%d")
4292
        imgs = soup.find('picture', class_='img-fluid item-comic-image').find_all('img')
4293
        author = soup.find('meta', property='article:author')['content']
4294
        tags = soup.find('meta', property='article:tag')['content']
4295
        return {
4296
            'day': day.day,
4297
            'month': day.month,
4298
            'year': day.year,
4299
            'img': [i['src'] for i in imgs],
4300
            'author': author,
4301
            'tags': tags,
4302
        }
4303
4304
4305
class PearlsBeforeSwine(GenericGoComic):
4306
    """Class to retrieve Pearls Before Swine comics."""
4307
    name = 'pearls'
4308
    long_name = 'Pearls Before Swine'
4309
    url = 'http://www.gocomics.com/pearlsbeforeswine'
4310
4311
4312
class Peanuts(GenericGoComic):
4313
    """Class to retrieve Peanuts comics."""
4314
    name = 'peanuts'
4315
    long_name = 'Peanuts'
4316
    url = 'http://www.gocomics.com/peanuts'
4317
4318
4319
class MattWuerker(GenericGoComic):
4320
    """Class to retrieve Matt Wuerker comics."""
4321
    name = 'wuerker'
4322
    long_name = 'Matt Wuerker'
4323
    url = 'http://www.gocomics.com/mattwuerker'
4324
4325
4326
class TomToles(GenericGoComic):
4327
    """Class to retrieve Tom Toles comics."""
4328
    name = 'toles'
4329
    long_name = 'Tom Toles'
4330
    url = 'http://www.gocomics.com/tomtoles'
4331
4332
4333
class BreakOfDay(GenericGoComic):
4334
    """Class to retrieve Break Of Day comics."""
4335
    name = 'breakofday'
4336
    long_name = 'Break Of Day'
4337
    url = 'http://www.gocomics.com/break-of-day'
4338
4339
4340
class Brevity(GenericGoComic):
4341
    """Class to retrieve Brevity comics."""
4342
    name = 'brevity'
4343
    long_name = 'Brevity'
4344
    url = 'http://www.gocomics.com/brevitypanel'
4345
4346
4347
class MichaelRamirez(GenericGoComic):
4348
    """Class to retrieve Michael Ramirez comics."""
4349
    name = 'ramirez'
4350
    long_name = 'Michael Ramirez'
4351
    url = 'http://www.gocomics.com/michaelramirez'
4352
4353
4354
class MikeLuckovich(GenericGoComic):
4355
    """Class to retrieve Mike Luckovich comics."""
4356
    name = 'luckovich'
4357
    long_name = 'Mike Luckovich'
4358
    url = 'http://www.gocomics.com/mikeluckovich'
4359
4360
4361
class JimBenton(GenericGoComic):
4362
    """Class to retrieve Jim Benton comics."""
4363
    # Also on http://jimbenton.tumblr.com
4364
    name = 'benton'
4365
    long_name = 'Jim Benton'
4366
    url = 'http://www.gocomics.com/jim-benton-cartoons'
4367
4368
4369
class TheArgyleSweater(GenericGoComic):
4370
    """Class to retrieve the Argyle Sweater comics."""
4371
    name = 'argyle'
4372
    long_name = 'Argyle Sweater'
4373
    url = 'http://www.gocomics.com/theargylesweater'
4374
4375
4376
class SunnyStreet(GenericGoComic):
4377
    """Class to retrieve Sunny Street comics."""
4378
    # Also on http://www.sunnystreetcomics.com
4379
    name = 'sunny'
4380
    long_name = 'Sunny Street'
4381
    url = 'http://www.gocomics.com/sunny-street'
4382
4383
4384
class OffTheMark(GenericGoComic):
4385
    """Class to retrieve Off The Mark comics."""
4386
    # Also on https://www.offthemark.com
4387
    name = 'offthemark'
4388
    long_name = 'Off The Mark'
4389
    url = 'http://www.gocomics.com/offthemark'
4390
4391
4392
class WuMo(GenericGoComic):
4393
    """Class to retrieve WuMo comics."""
4394
    # Also on http://wumo.com
4395
    name = 'wumo'
4396
    long_name = 'WuMo'
4397
    url = 'http://www.gocomics.com/wumo'
4398
4399
4400
class LunarBaboon(GenericGoComic):
4401
    """Class to retrieve Lunar Baboon comics."""
4402
    # Also on http://www.lunarbaboon.com
4403
    # Also on https://tapastic.com/series/Lunarbaboon
4404
    name = 'lunarbaboon'
4405
    long_name = 'Lunar Baboon'
4406
    url = 'http://www.gocomics.com/lunarbaboon'
4407
4408
4409
class SandersenGocomic(GenericGoComic):
4410
    """Class to retrieve Sarah Andersen comics."""
4411
    # Also on http://sarahcandersen.com
4412
    # Also on http://tapastic.com/series/Doodle-Time
4413
    name = 'sandersen-goc'
4414
    long_name = 'Sarah Andersen (from GoComics)'
4415
    url = 'http://www.gocomics.com/sarahs-scribbles'
4416
4417
4418
class SaturdayMorningBreakfastCerealGoComic(GenericGoComic):
4419
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
4420
    # Also on http://smbc-comics.tumblr.com
4421
    # Also on http://www.smbc-comics.com
4422
    name = 'smbc-goc'
4423
    long_name = 'Saturday Morning Breakfast Cereal (from GoComics)'
4424
    url = 'http://www.gocomics.com/saturday-morning-breakfast-cereal'
4425
    _categories = ('SMBC', )
4426
4427
4428
class CalvinAndHobbesGoComic(GenericGoComic):
4429
    """Class to retrieve Calvin and Hobbes comics."""
4430
    # From gocomics, not http://marcel-oehler.marcellosendos.ch/comics/ch/
4431
    name = 'calvin-goc'
4432
    long_name = 'Calvin and Hobbes (from GoComics)'
4433
    url = 'http://www.gocomics.com/calvinandhobbes'
4434
4435
4436
class RallGoComic(GenericGoComic):
4437
    """Class to retrieve Ted Rall comics."""
4438
    # Also on http://rall.com/comic
4439
    name = 'rall-goc'
4440
    long_name = "Ted Rall (from GoComics)"
4441
    url = "http://www.gocomics.com/ted-rall"
4442
    _categories = ('RALL', )
4443
4444
4445
class TheAwkwardYetiGoComic(GenericGoComic):
4446
    """Class to retrieve The Awkward Yeti comics."""
4447
    # Also on http://larstheyeti.tumblr.com
4448
    # Also on http://theawkwardyeti.com
4449
    # Also on https://tapastic.com/series/TheAwkwardYeti
4450
    name = 'yeti-goc'
4451
    long_name = 'The Awkward Yeti (from GoComics)'
4452
    url = 'http://www.gocomics.com/the-awkward-yeti'
4453
    _categories = ('YETI', )
4454
4455
4456
class BerkeleyMewsGoComics(GenericGoComic):
4457
    """Class to retrieve Berkeley Mews comics."""
4458
    # Also on http://mews.tumblr.com
4459
    # Also on http://www.berkeleymews.com
4460
    name = 'berkeley-goc'
4461
    long_name = 'Berkeley Mews (from GoComics)'
4462
    url = 'http://www.gocomics.com/berkeley-mews'
4463
    _categories = ('BERKELEY', )
4464
4465
4466
class SheldonGoComics(GenericGoComic):
4467
    """Class to retrieve Sheldon comics."""
4468
    # Also on http://www.sheldoncomics.com
4469
    name = 'sheldon-goc'
4470
    long_name = 'Sheldon Comics (from GoComics)'
4471
    url = 'http://www.gocomics.com/sheldon'
4472
4473
4474
class FowlLanguageGoComics(GenericGoComic):
4475
    """Class to retrieve Fowl Language comics."""
4476
    # Also on http://www.fowllanguagecomics.com
4477
    # Also on http://tapastic.com/series/Fowl-Language-Comics
4478
    # Also on http://fowllanguagecomics.tumblr.com
4479
    name = 'fowllanguage-goc'
4480
    long_name = 'Fowl Language Comics (from GoComics)'
4481
    url = 'http://www.gocomics.com/fowl-language'
4482
    _categories = ('FOWLLANGUAGE', )
4483
4484
4485
class NickAnderson(GenericGoComic):
4486
    """Class to retrieve Nick Anderson comics."""
4487
    name = 'nickanderson'
4488
    long_name = 'Nick Anderson'
4489
    url = 'http://www.gocomics.com/nickanderson'
4490
4491
4492
class GarfieldGoComics(GenericGoComic):
4493
    """Class to retrieve Garfield comics."""
4494
    # Also on http://garfield.com
4495
    name = 'garfield-goc'
4496
    long_name = 'Garfield (from GoComics)'
4497
    url = 'http://www.gocomics.com/garfield'
4498
    _categories = ('GARFIELD', )
4499
4500
4501
class DorrisMcGoComics(GenericGoComic):
4502
    """Class to retrieve Dorris Mc Comics"""
4503
    # Also on http://dorrismccomics.com
4504
    name = 'dorrismc-goc'
4505
    long_name = 'Dorris Mc (from GoComics)'
4506
    url = 'http://www.gocomics.com/dorris-mccomics'
4507
4508
4509
class FoxTrot(GenericGoComic):
4510
    """Class to retrieve FoxTrot comics."""
4511
    name = 'foxtrot'
4512
    long_name = 'FoxTrot'
4513
    url = 'http://www.gocomics.com/foxtrot'
4514
4515
4516
class FoxTrotClassics(GenericGoComic):
4517
    """Class to retrieve FoxTrot Classics comics."""
4518
    name = 'foxtrot-classics'
4519
    long_name = 'FoxTrot Classics'
4520
    url = 'http://www.gocomics.com/foxtrotclassics'
4521
4522
4523
class MisterAndMeGoComics(GenericEmptyComic, GenericGoComic):  # Removed ?
4524
    """Class to retrieve Mister & Me Comics."""
4525
    # Also on http://www.mister-and-me.com
4526
    # Also on https://tapastic.com/series/Mister-and-Me
4527
    name = 'mister-goc'
4528
    long_name = 'Mister & Me (from GoComics)'
4529
    url = 'http://www.gocomics.com/mister-and-me'
4530
4531
4532
class NonSequitur(GenericGoComic):
4533
    """Class to retrieve Non Sequitur (Wiley Miller) comics."""
4534
    name = 'nonsequitur'
4535
    long_name = 'Non Sequitur'
4536
    url = 'http://www.gocomics.com/nonsequitur'
4537
4538
4539
class GenericTapasticComic(GenericListableComic):
4540
    """Generic class to handle the logic common to comics from tapastic.com."""
4541
    _categories = ('TAPASTIC', )
4542
4543
    @classmethod
4544
    def get_comic_info(cls, soup, archive_elt):
4545
        """Get information about a particular comics."""
4546
        timestamp = int(archive_elt['publishDate']) / 1000.0
4547
        day = datetime.datetime.fromtimestamp(timestamp).date()
4548
        imgs = soup.find_all('img', class_='art-image')
4549
        if not imgs:
4550
            print("Comic %s is being uploaded, retry later" % cls.get_url_from_archive_element(archive_elt))
4551
            return None
4552
        assert len(imgs) > 0
4553
        return {
4554
            'day': day.day,
4555
            'year': day.year,
4556
            'month': day.month,
4557
            'img': [i['src'] for i in imgs],
4558
            'title': archive_elt['title'],
4559
        }
4560
4561
    @classmethod
4562
    def get_url_from_archive_element(cls, archive_elt):
4563
        return 'http://tapastic.com/episode/' + str(archive_elt['id'])
4564
4565
    @classmethod
4566
    def get_archive_elements(cls):
4567
        pref, suff = 'episodeList : ', ','
4568
        # Information is stored in the javascript part
4569
        # I don't know the clean way to get it so this is the ugly way.
4570
        string = [s[len(pref):-len(suff)] for s in (s.decode('utf-8').strip() for s in urlopen_wrapper(cls.url).readlines()) if s.startswith(pref) and s.endswith(suff)][0]
4571
        return json.loads(string)
4572
4573
4574
class VegetablesForDessert(GenericTapasticComic):
4575
    """Class to retrieve Vegetables For Dessert comics."""
4576
    # Also on http://vegetablesfordessert.tumblr.com
4577
    name = 'vegetables'
4578
    long_name = 'Vegetables For Dessert'
4579
    url = 'http://tapastic.com/series/vegetablesfordessert'
4580
4581
4582
class FowlLanguageTapa(GenericTapasticComic):
4583
    """Class to retrieve Fowl Language comics."""
4584
    # Also on http://www.fowllanguagecomics.com
4585
    # Also on http://fowllanguagecomics.tumblr.com
4586
    # Also on http://www.gocomics.com/fowl-language
4587
    name = 'fowllanguage-tapa'
4588
    long_name = 'Fowl Language Comics (from Tapastic)'
4589
    url = 'http://tapastic.com/series/Fowl-Language-Comics'
4590
    _categories = ('FOWLLANGUAGE', )
4591
4592
4593
class OscillatingProfundities(GenericTapasticComic):
4594
    """Class to retrieve Oscillating Profundities comics."""
4595
    name = 'oscillating'
4596
    long_name = 'Oscillating Profundities'
4597
    url = 'http://tapastic.com/series/oscillatingprofundities'
4598
4599
4600
class ZnoflatsComics(GenericTapasticComic):
4601
    """Class to retrieve Znoflats comics."""
4602
    name = 'znoflats'
4603
    long_name = 'Znoflats Comics'
4604
    url = 'http://tapastic.com/series/Znoflats-Comics'
4605
4606
4607
class SandersenTapastic(GenericTapasticComic):
4608
    """Class to retrieve Sarah Andersen comics."""
4609
    # Also on http://sarahcandersen.com
4610
    # Also on http://www.gocomics.com/sarahs-scribbles
4611
    name = 'sandersen-tapa'
4612
    long_name = 'Sarah Andersen (from Tapastic)'
4613
    url = 'http://tapastic.com/series/Doodle-Time'
4614
4615
4616
class TubeyToonsTapastic(GenericTapasticComic):
4617
    """Class to retrieve TubeyToons comics."""
4618
    # Also on http://tubeytoons.com
4619
    # Also on http://tubeytoons.tumblr.com
4620
    name = 'tubeytoons-tapa'
4621
    long_name = 'Tubey Toons (from Tapastic)'
4622
    url = 'http://tapastic.com/series/Tubey-Toons'
4623
    _categories = ('TUNEYTOONS', )
4624
4625
4626
class AnythingComicTapastic(GenericTapasticComic):
4627
    """Class to retrieve Anything Comics."""
4628
    # Also on http://www.anythingcomic.com
4629
    name = 'anythingcomic-tapa'
4630
    long_name = 'Anything Comic (from Tapastic)'
4631
    url = 'http://tapastic.com/series/anything'
4632
4633
4634
class UnearthedComicsTapastic(GenericTapasticComic):
4635
    """Class to retrieve Unearthed comics."""
4636
    # Also on http://unearthedcomics.com
4637
    # Also on http://unearthedcomics.tumblr.com
4638
    name = 'unearthed-tapa'
4639
    long_name = 'Unearthed Comics (from Tapastic)'
4640
    url = 'http://tapastic.com/series/UnearthedComics'
4641
    _categories = ('UNEARTHED', )
4642
4643
4644
class EverythingsStupidTapastic(GenericTapasticComic):
4645
    """Class to retrieve Everything's stupid Comics."""
4646
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
4647
    # Also on http://everythingsstupid.net
4648
    name = 'stupid-tapa'
4649
    long_name = "Everything's Stupid (from Tapastic)"
4650
    url = 'http://tapastic.com/series/EverythingsStupid'
4651
4652
4653
class JustSayEhTapastic(GenericTapasticComic):
4654
    """Class to retrieve Just Say Eh comics."""
4655
    # Also on http://www.justsayeh.com
4656
    name = 'justsayeh-tapa'
4657
    long_name = 'Just Say Eh (from Tapastic)'
4658
    url = 'http://tapastic.com/series/Just-Say-Eh'
4659
4660
4661
class ThorsThundershackTapastic(GenericTapasticComic):
4662
    """Class to retrieve Thor's Thundershack comics."""
4663
    # Also on http://www.thorsthundershack.com
4664
    name = 'thor-tapa'
4665
    long_name = 'Thor\'s Thundershack (from Tapastic)'
4666
    url = 'http://tapastic.com/series/Thors-Thundershac'
4667
    _categories = ('THOR', )
4668
4669
4670
class OwlTurdTapastic(GenericTapasticComic):
4671
    """Class to retrieve Owl Turd comics."""
4672
    # Also on http://owlturd.com
4673
    name = 'owlturd-tapa'
4674
    long_name = 'Owl Turd (from Tapastic)'
4675
    url = 'http://tapastic.com/series/Owl-Turd-Comix'
4676
    _categories = ('OWLTURD', )
4677
4678
4679
class GoneIntoRaptureTapastic(GenericTapasticComic):
4680
    """Class to retrieve Gone Into Rapture comics."""
4681
    # Also on http://goneintorapture.tumblr.com
4682
    # Also on http://www.goneintorapture.com
4683
    name = 'rapture-tapa'
4684
    long_name = 'Gone Into Rapture (from Tapastic)'
4685
    url = 'http://tapastic.com/series/Goneintorapture'
4686
4687
4688
class HeckIfIKnowComicsTapa(GenericTapasticComic):
4689
    """Class to retrieve Heck If I Know Comics."""
4690
    # Also on http://heckifiknowcomics.com
4691
    name = 'heck-tapa'
4692
    long_name = 'Heck if I Know comics (from Tapastic)'
4693
    url = 'http://tapastic.com/series/Regular'
4694
4695
4696
class CheerUpEmoKidTapa(GenericTapasticComic):
4697
    """Class to retrieve CheerUpEmoKid comics."""
4698
    # Also on http://www.cheerupemokid.com
4699
    # Also on http://enzocomics.tumblr.com
4700
    name = 'cuek-tapa'
4701
    long_name = 'Cheer Up Emo Kid (from Tapastic)'
4702
    url = 'http://tapastic.com/series/CUEK'
4703
4704
4705
class BigFootJusticeTapa(GenericTapasticComic):
4706
    """Class to retrieve Big Foot Justice comics."""
4707
    # Also on http://bigfootjustice.com
4708
    name = 'bigfoot-tapa'
4709
    long_name = 'Big Foot Justice (from Tapastic)'
4710
    url = 'http://tapastic.com/series/bigfoot-justice'
4711
4712
4713
class UpAndOutTapa(GenericTapasticComic):
4714
    """Class to retrieve Up & Out comics."""
4715
    # Also on http://upandoutcomic.tumblr.com
4716
    name = 'upandout-tapa'
4717
    long_name = 'Up And Out (from Tapastic)'
4718
    url = 'http://tapastic.com/series/UP-and-OUT'
4719
4720
4721
class ToonHoleTapa(GenericTapasticComic):
4722
    """Class to retrieve Toon Holes comics."""
4723
    # Also on http://www.toonhole.com
4724
    name = 'toonhole-tapa'
4725
    long_name = 'Toon Hole (from Tapastic)'
4726
    url = 'http://tapastic.com/series/TOONHOLE'
4727
4728
4729
class AngryAtNothingTapa(GenericTapasticComic):
4730
    """Class to retrieve Angry at Nothing comics."""
4731
    # Also on http://www.angryatnothing.net
4732
    name = 'angry-tapa'
4733
    long_name = 'Angry At Nothing (from Tapastic)'
4734
    url = 'http://tapastic.com/series/Comics-yeah-definitely-comics-'
4735
4736
4737
class LeleozTapa(GenericTapasticComic):
4738
    """Class to retrieve Leleoz comics."""
4739
    # Also on http://leleozcomics.tumblr.com
4740
    name = 'leleoz-tapa'
4741
    long_name = 'Leleoz (from Tapastic)'
4742
    url = 'https://tapastic.com/series/Leleoz'
4743
4744
4745
class TheAwkwardYetiTapa(GenericTapasticComic):
4746
    """Class to retrieve The Awkward Yeti comics."""
4747
    # Also on http://www.gocomics.com/the-awkward-yeti
4748
    # Also on http://theawkwardyeti.com
4749
    # Also on http://larstheyeti.tumblr.com
4750
    name = 'yeti-tapa'
4751
    long_name = 'The Awkward Yeti (from Tapastic)'
4752
    url = 'https://tapastic.com/series/TheAwkwardYeti'
4753
    _categories = ('YETI', )
4754
4755
4756
class AsPerUsualTapa(GenericTapasticComic):
4757
    """Class to retrieve As Per Usual comics."""
4758
    # Also on http://as-per-usual.tumblr.com
4759
    name = 'usual-tapa'
4760
    long_name = 'As Per Usual (from Tapastic)'
4761
    url = 'https://tapastic.com/series/AsPerUsual'
4762
    categories = ('DAMILEE', )
4763
4764
4765
class HotComicsForCoolPeopleTapa(GenericTapasticComic):
4766
    """Class to retrieve Hot Comics For Cool People."""
4767
    # Also on http://hotcomicsforcoolpeople.tumblr.com
4768
    # Also on http://hotcomics.biz (links to tumblr)
4769
    # Also on http://hcfcp.com (links to tumblr)
4770
    name = 'hotcomics-tapa'
4771
    long_name = 'Hot Comics For Cool People (from Tapastic)'
4772
    url = 'https://tapastic.com/series/Hot-Comics-For-Cool-People'
4773
    categories = ('DAMILEE', )
4774
4775
4776
class OneOneOneOneComicTapa(GenericTapasticComic):
4777
    """Class to retrieve 1111 Comics."""
4778
    # Also on http://www.1111comics.me
4779
    # Also on http://comics1111.tumblr.com
4780
    name = '1111-tapa'
4781
    long_name = '1111 Comics (from Tapastic)'
4782
    url = 'https://tapastic.com/series/1111-Comics'
4783
    _categories = ('ONEONEONEONE', )
4784
4785
4786
class TumbleDryTapa(GenericTapasticComic):
4787
    """Class to retrieve Tumble Dry comics."""
4788
    # Also on http://tumbledrycomics.com
4789
    name = 'tumbledry-tapa'
4790
    long_name = 'Tumblr Dry (from Tapastic)'
4791
    url = 'https://tapastic.com/series/TumbleDryComics'
4792
4793
4794
class DeadlyPanelTapa(GenericTapasticComic):
4795
    """Class to retrieve Deadly Panel comics."""
4796
    # Also on http://www.deadlypanel.com
4797
    name = 'deadly-tapa'
4798
    long_name = 'Deadly Panel (from Tapastic)'
4799
    url = 'https://tapastic.com/series/deadlypanel'
4800
4801
4802
class ChrisHallbeckMaxiTapa(GenericTapasticComic):
4803
    """Class to retrieve Chris Hallbeck comics."""
4804
    # Also on http://chrishallbeck.tumblr.com
4805
    # Also on http://maximumble.com
4806
    name = 'hallbeckmaxi-tapa'
4807
    long_name = 'Chris Hallback - Maximumble (from Tapastic)'
4808
    url = 'https://tapastic.com/series/Maximumble'
4809
    _categories = ('HALLBACK', )
4810
4811
4812
class ChrisHallbeckMiniTapa(GenericTapasticComic):
4813
    """Class to retrieve Chris Hallbeck comics."""
4814
    # Also on http://chrishallbeck.tumblr.com
4815
    # Also on http://minimumble.com
4816
    name = 'hallbeckmini-tapa'
4817
    long_name = 'Chris Hallback - Minimumble (from Tapastic)'
4818
    url = 'https://tapastic.com/series/Minimumble'
4819
    _categories = ('HALLBACK', )
4820
4821
4822
class ChrisHallbeckBiffTapa(GenericTapasticComic):
4823
    """Class to retrieve Chris Hallbeck comics."""
4824
    # Also on http://chrishallbeck.tumblr.com
4825
    # Also on http://thebookofbiff.com
4826
    name = 'hallbeckbiff-tapa'
4827
    long_name = 'Chris Hallback - The Book of Biff (from Tapastic)'
4828
    url = 'https://tapastic.com/series/Biff'
4829
    _categories = ('HALLBACK', )
4830
4831
4832
class RandoWisTapa(GenericTapasticComic):
4833
    """Class to retrieve RandoWis comics."""
4834
    # Also on https://randowis.com
4835
    name = 'randowis-tapa'
4836
    long_name = 'RandoWis (from Tapastic)'
4837
    url = 'https://tapastic.com/series/RandoWis'
4838
4839
4840
class PigeonGazetteTapa(GenericTapasticComic):
4841
    """Class to retrieve The Pigeon Gazette comics."""
4842
    # Also on http://thepigeongazette.tumblr.com
4843
    name = 'pigeon-tapa'
4844
    long_name = 'The Pigeon Gazette (from Tapastic)'
4845
    url = 'https://tapastic.com/series/The-Pigeon-Gazette'
4846
4847
4848
class TheOdd1sOutTapa(GenericTapasticComic):
4849
    """Class to retrieve The Odd 1s Out comics."""
4850
    # Also on http://theodd1sout.com
4851
    # Also on http://theodd1sout.tumblr.com
4852
    name = 'theodd-tapa'
4853
    long_name = 'The Odd 1s Out (from Tapastic)'
4854
    url = 'https://tapastic.com/series/Theodd1sout'
4855
4856
4857
class TheWorldIsFlatTapa(GenericTapasticComic):
4858
    """Class to retrieve The World Is Flat Comics."""
4859
    # Also on http://theworldisflatcomics.tumblr.com
4860
    name = 'flatworld-tapa'
4861
    long_name = 'The World Is Flat (from Tapastic)'
4862
    url = 'https://tapastic.com/series/The-World-is-Flat'
4863
4864
4865
class MisterAndMeTapa(GenericTapasticComic):
4866
    """Class to retrieve Mister & Me Comics."""
4867
    # Also on http://www.mister-and-me.com
4868
    # Also on http://www.gocomics.com/mister-and-me
4869
    name = 'mister-tapa'
4870
    long_name = 'Mister & Me (from Tapastic)'
4871
    url = 'https://tapastic.com/series/Mister-and-Me'
4872
4873
4874
class TalesOfAbsurdityTapa(GenericTapasticComic):
4875
    """Class to retrieve Tales Of Absurdity comics."""
4876
    # Also on http://talesofabsurdity.com
4877
    # Also on http://talesofabsurdity.tumblr.com
4878
    name = 'absurdity-tapa'
4879
    long_name = 'Tales of Absurdity (from Tapastic)'
4880
    url = 'http://tapastic.com/series/Tales-Of-Absurdity'
4881
    _categories = ('ABSURDITY', )
4882
4883
4884
class BFGFSTapa(GenericTapasticComic):
4885
    """Class to retrieve BFGFS comics."""
4886
    # Also on http://bfgfs.com
4887
    # Also on http://bfgfs.tumblr.com
4888
    name = 'bfgfs-tapa'
4889
    long_name = 'BFGFS (from Tapastic)'
4890
    url = 'https://tapastic.com/series/BFGFS'
4891
4892
4893
class DoodleForFoodTapa(GenericTapasticComic):
4894
    """Class to retrieve Doodle For Food comics."""
4895
    # Also on http://doodleforfood.com
4896
    name = 'doodle-tapa'
4897
    long_name = 'Doodle For Food (from Tapastic)'
4898
    url = 'https://tapastic.com/series/Doodle-for-Food'
4899
4900
4901
class MrLovensteinTapa(GenericTapasticComic):
4902
    """Class to retrieve Mr Lovenstein comics."""
4903
    # Also on  https://tapastic.com/series/MrLovenstein
4904
    name = 'mrlovenstein-tapa'
4905
    long_name = 'Mr. Lovenstein (from Tapastic)'
4906
    url = 'https://tapastic.com/series/MrLovenstein'
4907
4908
4909
class CassandraCalinTapa(GenericTapasticComic):
4910
    """Class to retrieve C. Cassandra comics."""
4911
    # Also on http://cassandracalin.com
4912
    # Also on http://c-cassandra.tumblr.com
4913
    name = 'cassandra-tapa'
4914
    long_name = 'Cassandra Calin (from Tapastic)'
4915
    url = 'https://tapastic.com/series/C-Cassandra-comics'
4916
4917
4918
class WafflesAndPancakes(GenericTapasticComic):
4919
    """Class to retrieve Waffles And Pancakes comics."""
4920
    # Also on http://wandpcomic.com
4921
    name = 'waffles'
4922
    long_name = 'Waffles And Pancakes'
4923
    url = 'https://tapastic.com/series/Waffles-and-Pancakes'
4924
4925
4926
class YesterdaysPopcornTapastic(GenericTapasticComic):
4927
    """Class to retrieve Yesterday's Popcorn comics."""
4928
    # Also on http://www.yesterdayspopcorn.com
4929
    # Also on http://yesterdayspopcorn.tumblr.com
4930
    name = 'popcorn-tapa'
4931
    long_name = 'Yesterday\'s Popcorn (from Tapastic)'
4932
    url = 'https://tapastic.com/series/Yesterdays-Popcorn'
4933
4934
4935
class OurSuperAdventureTapastic(GenericTapasticComic):
4936
    """Class to retrieve Our Super Adventure comics."""
4937
    # Also on http://www.oursuperadventure.com
4938
    # http://sarahssketchbook.tumblr.com
4939
    # http://sarahgraley.com
4940
    name = 'superadventure-tapastic'
4941
    long_name = 'Our Super Adventure (from Tapastic)'
4942
    url = 'https://tapastic.com/series/Our-Super-Adventure'
4943
4944
4945
class NamelessPCs(GenericTapasticComic):
4946
    """Class to retrieve Nameless PCs comics."""
4947
    # Also on http://namelesspcs.com
4948
    name = 'namelesspcs-tapa'
4949
    long_name = 'NamelessPCs (from Tapastic)'
4950
    url = 'https://tapastic.com/series/NamelessPC'
4951
4952
4953
class UbertoolTapa(GenericTapasticComic):
4954
    """Class to retrieve Ubertool comics."""
4955
    # Also on http://ubertoolcomic.com
4956
    # Also on http://ubertool.tumblr.com
4957
    name = 'ubertool-tapa'
4958
    long_name = 'Ubertool (from Tapastic)'
4959
    url = 'https://tapastic.com/series/ubertool'
4960
    _categories = ('UBERTOOL', )
4961
4962
4963
class BarteNerdsTapa(GenericTapasticComic):
4964
    """Class to retrieve BarteNerds comics."""
4965
    # Also on http://www.bartenerds.com
4966
    name = 'bartenerds-tapa'
4967
    long_name = 'BarteNerds (from Tapastic)'
4968
    url = 'https://tapastic.com/series/BarteNERDS'
4969
4970
4971
class SmallBlueYonderTapa(GenericTapasticComic):
4972
    """Class to retrieve Small Blue Yonder comics."""
4973
    # Also on http://www.smallblueyonder.com
4974
    name = 'smallblue-tapa'
4975
    long_name = 'Small Blue Yonder (from Tapastic)'
4976
    url = 'https://tapastic.com/series/Small-Blue-Yonder'
4977
4978
4979
class TizzyStitchBirdTapa(GenericTapasticComic):
4980
    """Class to retrieve Tizzy Stitch Bird comics."""
4981
    # Also on http://tizzystitchbird.com
4982
    # Also on http://tizzystitchbird.tumblr.com
4983
    # Also on http://www.webtoons.com/en/challenge/tizzy-stitchbird/list?title_no=50082
4984
    name = 'tizzy-tapa'
4985
    long_name = 'Tizzy Stitch Bird (from Tapastic)'
4986
    url = 'https://tapastic.com/series/TizzyStitchbird'
4987
4988
4989
def get_subclasses(klass):
4990
    """Gets the list of direct/indirect subclasses of a class"""
4991
    subclasses = klass.__subclasses__()
4992
    for derived in list(subclasses):
4993
        subclasses.extend(get_subclasses(derived))
4994
    return subclasses
4995
4996
4997
def remove_st_nd_rd_th_from_date(string):
4998
    """Function to transform 1st/2nd/3rd/4th in a parsable date format."""
4999
    # Hackish way to convert string with numeral "1st"/"2nd"/etc to date
5000
    return (string.replace('st', '')
5001
            .replace('nd', '')
5002
            .replace('rd', '')
5003
            .replace('th', '')
5004
            .replace('Augu', 'August'))
5005
5006
5007
def string_to_date(string, date_format, local=DEFAULT_LOCAL):
5008
    """Function to convert string to date object.
5009
    Wrapper around datetime.datetime.strptime."""
5010
    # format described in https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior
5011
    prev_locale = locale.setlocale(locale.LC_ALL)
5012
    if local != prev_locale:
5013
        locale.setlocale(locale.LC_ALL, local)
5014
    ret = datetime.datetime.strptime(string, date_format).date()
5015
    if local != prev_locale:
5016
        locale.setlocale(locale.LC_ALL, prev_locale)
5017
    return ret
5018
5019
5020
COMICS = set(get_subclasses(GenericComic))
5021
VALID_COMICS = [c for c in COMICS if c.name is not None]
5022
COMIC_NAMES = {c.name: c for c in VALID_COMICS}
5023
assert len(VALID_COMICS) == len(COMIC_NAMES)
5024
CLASS_NAMES = {c.__name__ for c in VALID_COMICS}
5025
assert len(VALID_COMICS) == len(CLASS_NAMES)
5026