Completed
Push — master ( 931cb7...555114 )
by De
01:11
created

comics.py (12 issues)

1
#! /usr/bin/python3
2
# vim: set expandtab tabstop=4 shiftwidth=4 :
3
"""Module to retrieve webcomics"""
4
5
from comic_abstract import GenericComic, get_date_for_comic
6
import re
7
from datetime import date, timedelta
8
import datetime
9
from urlfunctions import get_soup_at_url, urljoin_wrapper,\
10
    convert_iri_to_plain_ascii_uri, load_json_at_url, urlopen_wrapper
11
import json
12
import locale
13
import urllib
14
15
DEFAULT_LOCAL = 'en_GB.UTF-8'
16
17
18
class Xkcd(GenericComic):
19
    """Class to retrieve Xkcd comics."""
20
    name = 'xkcd'
21
    long_name = 'xkcd'
22
    url = 'http://xkcd.com'
23
24
    @classmethod
25
    def get_next_comic(cls, last_comic):
26
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
27
        first_num = last_comic['num'] if last_comic else 0
28
        last_num = load_json_at_url(
29
            urljoin_wrapper(cls.url, 'info.0.json'))['num']
30
31
        for num in range(first_num + 1, last_num + 1):
32
            if num != 404:
33
                json_url = urljoin_wrapper(cls.url, '%d/info.0.json' % num)
34
                comic = load_json_at_url(json_url)
35
                comic['img'] = [comic['img']]
36
                comic['prefix'] = '%d-' % num
37
                comic['json_url'] = json_url
38
                comic['url'] = urljoin_wrapper(cls.url, str(num))
39
                comic['day'] = int(comic['day'])
40
                comic['month'] = int(comic['month'])
41
                comic['year'] = int(comic['year'])
42
                assert comic['num'] == num
43
                yield comic
44
45
46
# Helper functions corresponding to get_url_from_link/get_url_from_archive_element
47
48
49
@classmethod
50
def get_href(cls, link):
51
    """Implementation of get_url_from_link/get_url_from_archive_element."""
52
    return link['href']
53
54
55
@classmethod
56
def join_cls_url_to_href(cls, link):
57
    """Implementation of get_url_from_link/get_url_from_archive_element."""
58
    return urljoin_wrapper(cls.url, link['href'])
59
60
61
class GenericNavigableComic(GenericComic):
62
    """Generic class for "navigable" comics : with first/next arrows.
63
64
    This class applies to comic where previous and next comics can be
65
    accessed from a given comic. Once given a starting point (either
66
    the first comic or the last comic retrieved), it will handle the
67
    navigation, the retrieval of the soup object and the setting of
68
    the 'url' attribute on retrieved comics. This limits a lot the
69
    amount of boilerplate code in the different implementation classes.
70
71
    The method `get_next_comic` methods is implemented in terms of new
72
    more specialized methods to be implemented/overridden:
73
        - get_first_comic_link
74
        - get_navi_link
75
        - get_comic_info
76
        - get_url_from_link
77
    """
78
    _categories = ('NAVIGABLE', )
79
80
    @classmethod
81
    def get_first_comic_link(cls):
82
        """Get link to first comics.
83
84
        Sometimes this can be retrieved of any comic page, sometimes on
85
        the archive page, sometimes it doesn't exist at all and one has
86
        to iterate backward to find it before hardcoding the result found.
87
        """
88
        raise NotImplementedError
89
90
    @classmethod
91
    def get_navi_link(cls, last_soup, next_):
92
        """Get link to next (or previous - for dev purposes) comic."""
93
        raise NotImplementedError
94
95
    @classmethod
96
    def get_comic_info(cls, soup, link):
97
        """Get information about a particular comics."""
98
        raise NotImplementedError
99
100
    @classmethod
101
    def get_url_from_link(cls, link):
102
        """Get url corresponding to a link. Default implementation is similar to get_href."""
103
        return link['href']
104
105
    @classmethod
106
    def get_next_link(cls, last_soup):
107
        """Get link to next comic."""
108
        link = cls.get_navi_link(last_soup, True)
109
        cls.log("Next link is %s" % link)
110
        return link
111
112
    @classmethod
113
    def get_prev_link(cls, last_soup):
114
        """Get link to previous comic."""
115
        link = cls.get_navi_link(last_soup, False)
116
        cls.log("Prev link is %s" % link)
117
        return link
118
119
    @classmethod
120
    def get_next_comic(cls, last_comic):
121
        """Generic implementation of get_next_comic for navigable comics."""
122
        url = last_comic['url'] if last_comic else None
123
        cls.log("starting 'get_next_comic' from %s" % url)
124
        next_comic = \
125
            cls.get_next_link(get_soup_at_url(url)) \
126
            if url else \
127
            cls.get_first_comic_link()
128
        cls.log("next/first comic will be %s (url is %s)" % (str(next_comic), url))
129
        # cls.check_navigation(url)
130
        while next_comic:
131
            prev_url, url = url, cls.get_url_from_link(next_comic)
132
            if prev_url == url:
133
                cls.log("got same url %s" % url)
134
                break
135
            cls.log("about to get %s (%s)" % (url, str(next_comic)))
136
            soup = get_soup_at_url(url)
137
            comic = cls.get_comic_info(soup, next_comic)
138
            if comic is not None:
139
                assert 'url' not in comic
140
                comic['url'] = url
141
                yield comic
142
            next_comic = cls.get_next_link(soup)
143
            cls.log("next comic will be %s" % str(next_comic))
144
145
    @classmethod
146
    def check_first_link(cls):
147
        """Check that navigation to first comic seems to be working - for dev purposes."""
148
        cls.log("about to check first link")
149
        ok = True
150
        firstlink = cls.get_first_comic_link()
151
        if firstlink is None:
152
            print("From %s : no first link" % cls.url)
153
            ok = False
154
        else:
155
            firsturl = cls.get_url_from_link(firstlink)
156
            try:
157
                get_soup_at_url(firsturl)
158
            except urllib.error.HTTPError:
159
                print("From %s : invalid first url" % cls.url)
160
                ok = False
161
        cls.log("checked first link -> returned %d" % ok)
162
        return ok
163
164
    @classmethod
165
    def check_prev_next_links(cls, url):
166
        """Check that navigation to prev/next from a given URL seems to be working - for dev purposes."""
167
        cls.log("about to check prev/next from %s" % url)
168
        ok = True
169
        if url is None:
170
            prevlink, nextlink = None, None
171
        else:
172
            soup = get_soup_at_url(url)
173
            prevlink, nextlink = cls.get_prev_link(soup), cls.get_next_link(soup)
174
        if prevlink is None and nextlink is None:
175
            print("From %s : no previous nor next" % url)
176
            ok = False
177
        else:
178
            if prevlink:
179
                prevurl = cls.get_url_from_link(prevlink)
180
                prevsoup = get_soup_at_url(prevurl)
181
                prevnextlink = cls.get_next_link(prevsoup)
182
                prevnext = cls.get_url_from_link(prevnextlink) if prevnextlink is not None else "NO URL"
183
                if prevnext != url:
184
                    print("From %s, going backward then forward leads to %s" % (url, prevnext))
185
                    ok = False
186
            if nextlink:
187
                nexturl = cls.get_url_from_link(nextlink)
188
                if nexturl != url:
189
                    nextsoup = get_soup_at_url(nexturl)
190
                    nextprevlink = cls.get_prev_link(nextsoup)
191
                    nextprev = cls.get_url_from_link(nextprevlink) if nextprevlink is not None else "NO URL"
192
                    if nextprev != url:
193
                        print("From %s, going forward then backward leads to %s" % (url, nextprev))
194
                        ok = False
195
        cls.log("checked prev/next from %s -> returned %d" % (url, ok))
196
        return ok
197
198
    @classmethod
199
    def check_navigation(cls, url):
200
        """Check that navigation functions seem to be working - for dev purposes."""
201
        cls.log("about to check navigation from %s" % url)
202
        first = cls.check_first_link()
203
        prevnext = cls.check_prev_next_links(url)
204
        ok = first and prevnext
205
        cls.log("checked navigation from %s -> returned %d" % (url, ok))
206
        return ok
207
208
209
class GenericListableComic(GenericComic):
210
    """Generic class for "listable" comics : with a list of comics (aka 'archive')
211
212
    The method `get_next_comic` methods is implemented in terms of new
213
    more specialized methods to be implemented/overridden:
214
        - get_archive_elements
215
        - get_url_from_archive_element
216
        - get_comic_info
217
    """
218
    _categories = ('LISTABLE', )
219
220
    @classmethod
221
    def get_archive_elements(cls):
222
        """Get the archive elements (iterable)."""
223
        raise NotImplementedError
224
225
    @classmethod
226
    def get_url_from_archive_element(cls, archive_elt):
227
        """Get url corresponding to an archive element."""
228
        raise NotImplementedError
229
230
    @classmethod
231
    def get_comic_info(cls, soup, archive_elt):
232
        """Get information about a particular comics."""
233
        raise NotImplementedError
234
235
    @classmethod
236
    def get_next_comic(cls, last_comic):
237
        """Generic implementation of get_next_comic for listable comics."""
238
        waiting_for_url = last_comic['url'] if last_comic else None
239
        for archive_elt in cls.get_archive_elements():
240
            url = cls.get_url_from_archive_element(archive_elt)
241
            cls.log("considering %s" % url)
242
            if waiting_for_url is None:
243
                cls.log("about to get %s (%s)" % (url, str(archive_elt)))
244
                soup = get_soup_at_url(url)
245
                comic = cls.get_comic_info(soup, archive_elt)
246
                if comic is not None:
247
                    assert 'url' not in comic
248
                    comic['url'] = url
249
                    yield comic
250
            elif waiting_for_url == url:
251
                waiting_for_url = None
252
        if waiting_for_url is not None:
253
            print("Did not find %s : there might be a problem" % waiting_for_url)
254
255
# Helper functions corresponding to get_first_comic_link/get_navi_link
256
257
258
@classmethod
259
def get_link_rel_next(cls, last_soup, next_):
260
    """Implementation of get_navi_link."""
261
    return last_soup.find('link', rel='next' if next_ else 'prev')
262
263
264
@classmethod
265
def get_a_rel_next(cls, last_soup, next_):
266
    """Implementation of get_navi_link."""
267
    return last_soup.find('a', rel='next' if next_ else 'prev')
268
269
270
@classmethod
271
def get_a_navi_navinext(cls, last_soup, next_):
272
    """Implementation of get_navi_link."""
273
    return last_soup.find('a', class_='navi navi-next' if next_ else 'navi navi-prev')
274
275
276
@classmethod
277
def get_a_navi_comicnavnext_navinext(cls, last_soup, next_):
278
    """Implementation of get_navi_link."""
279
    return last_soup.find('a', class_='navi comic-nav-next navi-next' if next_ else 'navi comic-nav-previous navi-prev')
280
281
282
@classmethod
283
def get_a_comicnavbase_comicnavnext(cls, last_soup, next_):
284
    """Implementation of get_navi_link."""
285
    return last_soup.find('a', class_='comic-nav-base comic-nav-next' if next_ else 'comic-nav-base comic-nav-previous')
286
287
288
@classmethod
289
def get_a_navi_navifirst(cls):
290
    """Implementation of get_first_comic_link."""
291
    return get_soup_at_url(cls.url).find('a', class_='navi navi-first')
292
293
294
@classmethod
295
def get_div_navfirst_a(cls):
296
    """Implementation of get_first_comic_link."""
297
    return get_soup_at_url(cls.url).find('div', class_="nav-first").find('a')
298
299
300
@classmethod
301
def get_a_comicnavbase_comicnavfirst(cls):
302
    """Implementation of get_first_comic_link."""
303
    return get_soup_at_url(cls.url).find('a', class_='comic-nav-base comic-nav-first')
304
305
306
@classmethod
307
def simulate_first_link(cls):
308
    """Implementation of get_first_comic_link creating a link-like object from
309
    an URL provided by the class.
310
311
    Note: The first URL can easily be found using :
312
    `get_first_comic_link = navigate_to_first_comic`.
313
    """
314
    return {'href': cls.first_url}
315
316
317
@classmethod
318
def navigate_to_first_comic(cls):
319
    """Implementation of get_first_comic_link navigating from a user provided
320
    URL to the first comic.
321
322
    Sometimes, the first comic cannot be reached directly so to start
323
    from the first comic one has to go to the previous comic until
324
    there is no previous comics. Once this URL is reached, it
325
    is better to hardcode it but for development purposes, it
326
    is convenient to have an automatic way to find it.
327
328
    Then, the URL found can easily be used via `simulate_first_link`.
329
    """
330
    url = input("Get starting URL: ")
331
    print(url)
332
    comic = cls.get_prev_link(get_soup_at_url(url))
333
    while comic:
334
        url = cls.get_url_from_link(comic)
335
        print(url)
336
        comic = cls.get_prev_link(get_soup_at_url(url))
337
    return {'href': url}
338
339
340
class GenericEmptyComic(GenericComic):
341
    """Generic class for comics where nothing is to be done.
342
343
    It can be useful to deactivate temporarily comics that do not work
344
    properly by replacing `def MyComic(GenericWhateverComic)` with
345
    `def MyComic(GenericEmptyComic, GenericWhateverComic)`."""
346
    _categories = ('EMPTY', )
347
348
    @classmethod
349
    def get_next_comic(cls, last_comic):
350
        """Implementation of get_next_comic returning no comics."""
351
        cls.log("comic is considered as empty - returning no comic")
352
        return []
353
354
355
class ExtraFabulousComics(GenericNavigableComic):
356
    """Class to retrieve Extra Fabulous Comics."""
357
    name = 'efc'
358
    long_name = 'Extra Fabulous Comics'
359
    url = 'http://extrafabulouscomics.com'
360
    get_first_comic_link = get_a_navi_navifirst
361
    get_navi_link = get_link_rel_next
362
363
    @classmethod
364
    def get_comic_info(cls, soup, link):
365
        """Get information about a particular comics."""
366
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
367
        imgs = soup.find_all('img', src=img_src_re)
368
        title = soup.find('meta', property='og:title')['content']
369
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
370
        day = string_to_date(date_str, "%Y-%m-%d")
371
        return {
372
            'title': title,
373
            'img': [i['src'] for i in imgs],
374
            'month': day.month,
375
            'year': day.year,
376
            'day': day.day,
377
            'prefix': title + '-'
378
        }
379
380
381 View Code Duplication
class GenericLeMondeBlog(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
382
    """Generic class to retrieve comics from Le Monde blogs."""
383
    _categories = ('LEMONDE', 'FRANCAIS')
384
    get_navi_link = get_link_rel_next
385
    get_first_comic_link = simulate_first_link
386
    first_url = NotImplemented
387
388
    @classmethod
389
    def get_comic_info(cls, soup, link):
390
        """Get information about a particular comics."""
391
        url2 = soup.find('link', rel='shortlink')['href']
392
        title = soup.find('meta', property='og:title')['content']
393
        date_str = soup.find("span", class_="entry-date").string
394
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
395
        imgs = soup.find_all('meta', property='og:image')
396
        return {
397
            'title': title,
398
            'url2': url2,
399
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
400
            'month': day.month,
401
            'year': day.year,
402
            'day': day.day,
403
        }
404
405
406
class ZepWorld(GenericLeMondeBlog):
407
    """Class to retrieve Zep World comics."""
408
    name = "zep"
409
    long_name = "Zep World"
410
    url = "http://zepworld.blog.lemonde.fr"
411
    first_url = "http://zepworld.blog.lemonde.fr/2014/10/31/bientot-le-blog-de-zep/"
412
413
414
class Vidberg(GenericLeMondeBlog):
415
    """Class to retrieve Vidberg comics."""
416
    name = 'vidberg'
417
    long_name = "Vidberg - l'actu en patates"
418
    url = "http://vidberg.blog.lemonde.fr"
419
    # Not the first but I didn't find an efficient way to retrieve it
420
    first_url = "http://vidberg.blog.lemonde.fr/2012/02/09/revue-de-campagne-la-campagne-du-modem-semballe/"
421
422
423
class Plantu(GenericLeMondeBlog):
424
    """Class to retrieve Plantu comics."""
425
    name = 'plantu'
426
    long_name = "Plantu"
427
    url = "http://plantu.blog.lemonde.fr"
428
    first_url = "http://plantu.blog.lemonde.fr/2014/10/28/stress-test-a-bruxelles/"
429
430
431
class XavierGorce(GenericLeMondeBlog):
432
    """Class to retrieve Xavier Gorce comics."""
433
    name = 'gorce'
434
    long_name = "Xavier Gorce"
435
    url = "http://xaviergorce.blog.lemonde.fr"
436
    first_url = "http://xaviergorce.blog.lemonde.fr/2015/01/09/distinction/"
437
438
439
class CartooningForPeace(GenericLeMondeBlog):
440
    """Class to retrieve Cartooning For Peace comics."""
441
    name = 'forpeace'
442
    long_name = "Cartooning For Peace"
443
    url = "http://cartooningforpeace.blog.lemonde.fr"
444
    first_url = "http://cartooningforpeace.blog.lemonde.fr/2014/12/15/bado/"
445
446
447
class Aurel(GenericLeMondeBlog):
448
    """Class to retrieve Aurel comics."""
449
    name = 'aurel'
450
    long_name = "Aurel"
451
    url = "http://aurel.blog.lemonde.fr"
452
    first_url = "http://aurel.blog.lemonde.fr/2014/09/29/le-senat-repasse-a-droite/"
453
454
455
class LesCulottees(GenericLeMondeBlog):
456
    """Class to retrieve Les Culottees comics."""
457
    name = 'culottees'
458
    long_name = 'Les Culottees'
459
    url = "http://lesculottees.blog.lemonde.fr"
460
    first_url = "http://lesculottees.blog.lemonde.fr/2016/01/11/clementine-delait-femme-a-barbe/"
461
462
463
class UneAnneeAuLycee(GenericLeMondeBlog):
464
    """Class to retrieve Une Annee Au Lycee comics."""
465
    name = 'lycee'
466
    long_name = 'Une Annee au Lycee'
467
    url = 'http://uneanneeaulycee.blog.lemonde.fr'
468
    first_url = "http://uneanneeaulycee.blog.lemonde.fr/2016/06/13/la-semaine-du-bac-est-arrivee/"
469
470
471 View Code Duplication
class Rall(GenericNavigableComic):
472
    """Class to retrieve Ted Rall comics."""
473
    # Also on http://www.gocomics.com/tedrall
474
    name = 'rall'
475
    long_name = "Ted Rall"
476
    url = "http://rall.com/comic"
477
    _categories = ('RALL', )
478
    get_navi_link = get_link_rel_next
479
    get_first_comic_link = simulate_first_link
480
    # Not the first but I didn't find an efficient way to retrieve it
481
    first_url = "http://rall.com/2014/01/30/los-angeles-times-cartoon-well-miss-those-california-flowers"
482
483
    @classmethod
484
    def get_comic_info(cls, soup, link):
485
        """Get information about a particular comics."""
486
        title = soup.find('meta', property='og:title')['content']
487
        author = soup.find("span", class_="author vcard").find("a").string
488
        date_str = soup.find("span", class_="entry-date").string
489
        day = string_to_date(date_str, "%B %d, %Y")
490
        desc = soup.find('meta', property='og:description')['content']
491
        imgs = soup.find('div', class_='entry-content').find_all('img')
492
        imgs = imgs[:-7]  # remove social media buttons
493
        return {
494
            'title': title,
495
            'author': author,
496
            'month': day.month,
497
            'year': day.year,
498
            'day': day.day,
499
            'description': desc,
500
            'img': [i['src'] for i in imgs],
501
        }
502
503
504
class Dilem(GenericNavigableComic):
505
    """Class to retrieve Ali Dilem comics."""
506
    name = 'dilem'
507
    long_name = 'Ali Dilem'
508
    url = 'http://information.tv5monde.com/dilem'
509
    _categories = ('FRANCAIS', )
510
    get_url_from_link = join_cls_url_to_href
511
    get_first_comic_link = simulate_first_link
512
    first_url = "http://information.tv5monde.com/dilem/2004-06-26"
513
514
    @classmethod
515
    def get_navi_link(cls, last_soup, next_):
516
        """Get link to next or previous comic."""
517
        # prev is next / next is prev
518
        li = last_soup.find('li', class_='prev' if next_ else 'next')
519
        return li.find('a') if li else None
520
521
    @classmethod
522
    def get_comic_info(cls, soup, link):
523
        """Get information about a particular comics."""
524
        short_url = soup.find('link', rel='shortlink')['href']
525
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
526
        imgs = soup.find_all('meta', property='og:image')
527
        date_str = soup.find('span', property='dc:date')['content']
528
        date_str = date_str[:10]
529
        day = string_to_date(date_str, "%Y-%m-%d")
530
        return {
531
            'short_url': short_url,
532
            'title': title,
533
            'img': [i['content'] for i in imgs],
534
            'day': day.day,
535
            'month': day.month,
536
            'year': day.year,
537
        }
538
539
540
class SpaceAvalanche(GenericNavigableComic):
541
    """Class to retrieve Space Avalanche comics."""
542
    name = 'avalanche'
543
    long_name = 'Space Avalanche'
544
    url = 'http://www.spaceavalanche.com'
545
    get_navi_link = get_link_rel_next
546
547
    @classmethod
548
    def get_first_comic_link(cls):
549
        """Get link to first comics."""
550
        return {'href': "http://www.spaceavalanche.com/2009/02/02/irish-sea/", 'title': "Irish Sea"}
551
552
    @classmethod
553
    def get_comic_info(cls, soup, link):
554
        """Get information about a particular comics."""
555
        url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
556
        title = link['title']
557
        url = cls.get_url_from_link(link)
558
        year, month, day = [int(s)
559
                            for s in url_date_re.match(url).groups()]
560
        imgs = soup.find("div", class_="entry").find_all("img")
561
        return {
562
            'title': title,
563
            'day': day,
564
            'month': month,
565
            'year': year,
566
            'img': [i['src'] for i in imgs],
567
        }
568
569
570
class ZenPencils(GenericNavigableComic):
571
    """Class to retrieve ZenPencils comics."""
572
    # Also on http://zenpencils.tumblr.com
573
    # Also on http://www.gocomics.com/zen-pencils
574
    name = 'zenpencils'
575
    long_name = 'Zen Pencils'
576
    url = 'http://zenpencils.com'
577
    _categories = ('ZENPENCILS', )
578
    get_navi_link = get_link_rel_next
579
    get_first_comic_link = simulate_first_link
580
    first_url = "http://zenpencils.com/comic/1-ralph-waldo-emerson-make-them-cry/"
581
582
    @classmethod
583
    def get_comic_info(cls, soup, link):
584
        """Get information about a particular comics."""
585
        imgs = soup.find('div', id='comic').find_all('img')
586
        # imgs2 = soup.find_all('meta', property='og:image')
587
        post = soup.find('div', class_='post-content')
588
        author = post.find("span", class_="post-author").find("a").string
589
        title = soup.find('meta', property='og:title')['content']
590
        date_str = post.find('span', class_='post-date').string
591
        day = string_to_date(date_str, "%B %d, %Y")
592
        assert imgs
593
        assert all(i['alt'] == i['title'] for i in imgs)
594
        assert all(i['alt'] in (title, "") for i in imgs)
595
        desc = soup.find('meta', property='og:description')['content']
596
        return {
597
            'title': title,
598
            'description': desc,
599
            'author': author,
600
            'day': day.day,
601
            'month': day.month,
602
            'year': day.year,
603
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
604
        }
605
606
607
class ItsTheTie(GenericNavigableComic):
608
    """Class to retrieve It's the tie comics."""
609
    # Also on http://itsthetie.tumblr.com
610
    # Also on https://tapastic.com/series/itsthetie
611
    name = 'tie'
612
    long_name = "It's the tie"
613
    url = "http://itsthetie.com"
614
    _categories = ('TIE', )
615
    get_first_comic_link = get_div_navfirst_a
616
    get_navi_link = get_a_rel_next
617
618
    @classmethod
619
    def get_comic_info(cls, soup, link):
620
        """Get information about a particular comics."""
621
        title = soup.find('h1', class_='comic-title').find('a').string
622
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
623
        day = string_to_date(date_str, "%B %d, %Y")
624
        # Bonus images may or may not be in meta og:image.
625
        imgs = soup.find_all('meta', property='og:image')
626
        imgs_src = [i['content'] for i in imgs]
627
        bonus = soup.find_all('img', attrs={'data-oversrc': True})
628
        bonus_src = [b['data-oversrc'] for b in bonus]
629
        all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src]
630
        all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")]
631
        tag_meta = soup.find('meta', property='article:tag')
632
        tags = tag_meta['content'] if tag_meta else ""
633
        return {
634
            'title': title,
635
            'month': day.month,
636
            'year': day.year,
637
            'day': day.day,
638
            'img': all_imgs_src,
639
            'tags': tags,
640
        }
641
642
643
class PenelopeBagieu(GenericNavigableComic):
644
    """Class to retrieve comics from Penelope Bagieu's blog."""
645 View Code Duplication
    name = 'bagieu'
646
    long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
647
    url = 'http://www.penelope-jolicoeur.com'
648
    _categories = ('FRANCAIS', )
649
    get_navi_link = get_link_rel_next
650
    get_first_comic_link = simulate_first_link
651
    first_url = 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'
652
653
    @classmethod
654
    def get_comic_info(cls, soup, link):
655
        """Get information about a particular comics."""
656
        date_str = soup.find('h2', class_='date-header').string
657
        day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
658
        imgs = soup.find('div', class_='entry-body').find_all('img')
659
        title = soup.find('h3', class_='entry-header').string
660
        return {
661
            'title': title,
662
            'img': [i['src'] for i in imgs],
663
            'month': day.month,
664
            'year': day.year,
665
            'day': day.day,
666
        }
667
668
669 View Code Duplication
class OneOneOneOneComic(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
670
    """Class to retrieve 1111 Comics."""
671
    # Also on http://comics1111.tumblr.com
672
    # Also on https://tapastic.com/series/1111-Comics
673
    name = '1111'
674
    long_name = '1111 Comics'
675
    url = 'http://www.1111comics.me'
676
    _categories = ('ONEONEONEONE', )
677
    get_first_comic_link = get_div_navfirst_a
678
    get_navi_link = get_link_rel_next
679
680
    @classmethod
681
    def get_comic_info(cls, soup, link):
682
        """Get information about a particular comics."""
683
        title = soup.find('h1', class_='comic-title').find('a').string
684
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
685
        day = string_to_date(date_str, "%B %d, %Y")
686
        imgs = soup.find_all('meta', property='og:image')
687
        return {
688
            'title': title,
689
            'month': day.month,
690
            'year': day.year,
691
            'day': day.day,
692
            'img': [i['content'] for i in imgs],
693
        }
694
695
696 View Code Duplication
class AngryAtNothing(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
697
    """Class to retrieve Angry at Nothing comics."""
698
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
699
    name = 'angry'
700
    long_name = 'Angry At Nothing'
701
    url = 'http://www.angryatnothing.net'
702
    get_first_comic_link = get_div_navfirst_a
703
    get_navi_link = get_a_rel_next
704
705
    @classmethod
706
    def get_comic_info(cls, soup, link):
707
        """Get information about a particular comics."""
708
        title = soup.find('h1', class_='comic-title').find('a').string
709
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
710
        day = string_to_date(date_str, "%B %d, %Y")
711
        imgs = soup.find_all('meta', property='og:image')
712
        return {
713
            'title': title,
714
            'month': day.month,
715
            'year': day.year,
716
            'day': day.day,
717
            'img': [i['content'] for i in imgs],
718
        }
719
720
721
class NeDroid(GenericNavigableComic):
722
    """Class to retrieve NeDroid comics."""
723
    name = 'nedroid'
724
    long_name = 'NeDroid'
725
    url = 'http://nedroid.com'
726
    get_first_comic_link = get_div_navfirst_a
727
    get_navi_link = get_link_rel_next
728
    get_url_from_link = join_cls_url_to_href
729
730
    @classmethod
731
    def get_comic_info(cls, soup, link):
732
        """Get information about a particular comics."""
733
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
734
        comic_url_re = re.compile('//nedroid.com/comics/([0-9]*)-([0-9]*)-([0-9]*).*')
735
        short_url = cls.get_url_from_link(soup.find('link', rel='shortlink'))
736
        num = int(short_url_re.match(short_url).groups()[0])
737
        imgs = soup.find('div', id='comic').find_all('img')
738
        year, month, day = [int(s) for s in comic_url_re.match(imgs[0]['src']).groups()]
739
        assert len(imgs) == 1
740
        title = imgs[0]['alt']
741
        title2 = imgs[0]['title']
742
        return {
743
            'short_url': short_url,
744
            'title': title,
745
            'title2': title2,
746
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
747
            'day': day,
748
            'month': month,
749
            'year': year,
750
            'num': num,
751
        }
752
753
754
class Garfield(GenericNavigableComic):
755
    """Class to retrieve Garfield comics."""
756
    # Also on http://www.gocomics.com/garfield
757
    name = 'garfield'
758 View Code Duplication
    long_name = 'Garfield'
759
    url = 'https://garfield.com'
760
    _categories = ('GARFIELD', )
761
    get_first_comic_link = simulate_first_link
762
    first_url = 'https://garfield.com/comic/1978/06/19'
763
764
    @classmethod
765
    def get_navi_link(cls, last_soup, next_):
766
        """Get link to next or previous comic."""
767
        return last_soup.find('a', class_='comic-arrow-right' if next_ else 'comic-arrow-left')
768
769
    @classmethod
770
    def get_comic_info(cls, soup, link):
771
        """Get information about a particular comics."""
772
        url = cls.get_url_from_link(link)
773
        date_re = re.compile('^%s/comic/([0-9]*)/([0-9]*)/([0-9]*)' % cls.url)
774
        year, month, day = [int(s) for s in date_re.match(url).groups()]
775
        imgs = soup.find('div', class_='comic-display').find_all('img', class_='img-responsive')
776
        return {
777
            'month': month,
778
            'year': year,
779
            'day': day,
780
            'img': [i['src'] for i in imgs],
781
        }
782
783
784
class Dilbert(GenericNavigableComic):
785
    """Class to retrieve Dilbert comics."""
786
    # Also on http://www.gocomics.com/dilbert-classics
787
    name = 'dilbert'
788
    long_name = 'Dilbert'
789
    url = 'http://dilbert.com'
790
    get_url_from_link = join_cls_url_to_href
791
    get_first_comic_link = simulate_first_link
792
    first_url = 'http://dilbert.com/strip/1989-04-16'
793
794
    @classmethod
795
    def get_navi_link(cls, last_soup, next_):
796
        """Get link to next or previous comic."""
797
        link = last_soup.find('div', class_='nav-comic nav-right' if next_ else 'nav-comic nav-left')
798
        return link.find('a') if link else None
799
800
    @classmethod
801
    def get_comic_info(cls, soup, link):
802
        """Get information about a particular comics."""
803
        title = soup.find('meta', property='og:title')['content']
804
        imgs = soup.find_all('meta', property='og:image')
805
        desc = soup.find('meta', property='og:description')['content']
806
        date_str = soup.find('meta', property='article:publish_date')['content']
807
        day = string_to_date(date_str, "%B %d, %Y")
808
        author = soup.find('meta', property='article:author')['content']
809
        tags = soup.find('meta', property='article:tag')['content']
810
        return {
811
            'title': title,
812
            'description': desc,
813
            'img': [i['content'] for i in imgs],
814
            'author': author,
815
            'tags': tags,
816
            'day': day.day,
817
            'month': day.month,
818
            'year': day.year
819
        }
820
821
822
class VictimsOfCircumsolar(GenericNavigableComic):
823
    """Class to retrieve VictimsOfCircumsolar comics."""
824
    name = 'circumsolar'
825
    long_name = 'Victims Of Circumsolar'
826
    url = 'http://www.victimsofcircumsolar.com'
827
    get_navi_link = get_a_navi_comicnavnext_navinext
828
    get_first_comic_link = simulate_first_link
829
    first_url = 'http://www.victimsofcircumsolar.com/comic/modern-addiction'
830
831
    @classmethod
832
    def get_comic_info(cls, soup, link):
833
        """Get information about a particular comics."""
834
        # Date is on the archive page
835
        title = soup.find_all('meta', property='og:title')[-1]['content']
836
        desc = soup.find_all('meta', property='og:description')[-1]['content']
837
        imgs = soup.find('div', id='comic').find_all('img')
838
        assert all(i['title'] == i['alt'] == title for i in imgs)
839
        return {
840
            'title': title,
841
            'description': desc,
842
            'img': [i['src'] for i in imgs],
843
        }
844
845
846
class ThreeWordPhrase(GenericNavigableComic):
847
    """Class to retrieve Three Word Phrase comics."""
848
    # Also on http://www.threewordphrase.tumblr.com
849
    name = 'threeword'
850
    long_name = 'Three Word Phrase'
851
    url = 'http://threewordphrase.com'
852
    get_url_from_link = join_cls_url_to_href
853
854
    @classmethod
855
    def get_first_comic_link(cls):
856
        """Get link to first comics."""
857
        return get_soup_at_url(cls.url).find('img', src='/firstlink.gif').parent
858
859
    @classmethod
860
    def get_navi_link(cls, last_soup, next_):
861
        """Get link to next or previous comic."""
862
        link = last_soup.find('img', src='/nextlink.gif' if next_ else '/prevlink.gif').parent
863
        return None if link.get('href') is None else link
864
865
    @classmethod
866
    def get_comic_info(cls, soup, link):
867
        """Get information about a particular comics."""
868
        title = soup.find('title')
869
        imgs = [img for img in soup.find_all('img')
870
                if not img['src'].endswith(
871
                    ('link.gif', '32.png', 'twpbookad.jpg',
872
                     'merchad.jpg', 'header.gif', 'tipjar.jpg'))]
873
        return {
874
            'title': title.string if title else None,
875
            'title2': '  '.join(img.get('alt') for img in imgs if img.get('alt')),
876
            'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs],
877
        }
878
879
880
class DeadlyPanel(GenericEmptyComic, GenericNavigableComic):
881
    """Class to retrieve Deadly Panel comics."""
882
    # Also on https://tapastic.com/series/deadlypanel
883
    name = 'deadly'
884
    long_name = 'Deadly Panel'
885
    url = 'http://www.deadlypanel.com'
886
    get_first_comic_link = get_a_navi_navifirst
887
    get_navi_link = get_a_navi_comicnavnext_navinext
888
889
    @classmethod
890
    def get_comic_info(cls, soup, link):
891
        """Get information about a particular comics."""
892
        imgs = soup.find('div', id='comic').find_all('img')
893
        assert all(i['alt'] == i['title'] for i in imgs)
894
        return {
895
            'img': [i['src'] for i in imgs],
896
        }
897
898
899 View Code Duplication
class TheGentlemanArmchair(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
900
    """Class to retrieve The Gentleman Armchair comics."""
901
    name = 'gentlemanarmchair'
902
    long_name = 'The Gentleman Armchair'
903
    url = 'http://thegentlemansarmchair.com'
904
    get_first_comic_link = get_a_navi_navifirst
905
    get_navi_link = get_link_rel_next
906
907
    @classmethod
908
    def get_comic_info(cls, soup, link):
909
        """Get information about a particular comics."""
910
        title = soup.find('h2', class_='post-title').string
911
        author = soup.find("span", class_="post-author").find("a").string
912
        date_str = soup.find('span', class_='post-date').string
913
        day = string_to_date(date_str, "%B %d, %Y")
914
        imgs = soup.find('div', id='comic').find_all('img')
915
        return {
916
            'img': [i['src'] for i in imgs],
917
            'title': title,
918
            'author': author,
919
            'month': day.month,
920
            'year': day.year,
921
            'day': day.day,
922
        }
923
924
925 View Code Duplication
class MyExtraLife(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
926
    """Class to retrieve My Extra Life comics."""
927
    name = 'extralife'
928
    long_name = 'My Extra Life'
929
    url = 'http://www.myextralife.com'
930
    get_navi_link = get_link_rel_next
931
932
    @classmethod
933
    def get_first_comic_link(cls):
934
        """Get link to first comics."""
935
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
936
937
    @classmethod
938
    def get_comic_info(cls, soup, link):
939
        """Get information about a particular comics."""
940
        title = soup.find("h1", class_="comic_title").string
941
        date_str = soup.find("span", class_="comic_date").string
942
        day = string_to_date(date_str, "%B %d, %Y")
943
        imgs = soup.find_all("img", class_="comic")
944
        assert all(i['alt'] == i['title'] == title for i in imgs)
945
        return {
946
            'title': title,
947
            'img': [i['src'] for i in imgs if i["src"]],
948
            'day': day.day,
949
            'month': day.month,
950
            'year': day.year
951
        }
952
953
954
class SaturdayMorningBreakfastCereal(GenericNavigableComic):
955
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
956
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
957
    # Also on http://smbc-comics.tumblr.com
958
    name = 'smbc'
959
    long_name = 'Saturday Morning Breakfast Cereal'
960
    url = 'http://www.smbc-comics.com'
961
    _categories = ('SMBC', )
962
    get_navi_link = get_a_rel_next
963
964
    @classmethod
965
    def get_first_comic_link(cls):
966
        """Get link to first comics."""
967
        return get_soup_at_url(cls.url).find('a', rel='start')
968
969
    @classmethod
970
    def get_comic_info(cls, soup, link):
971
        """Get information about a particular comics."""
972
        image1 = soup.find('img', id='cc-comic')
973
        image_url1 = image1['src']
974
        aftercomic = soup.find('div', id='aftercomic')
975
        image_url2 = aftercomic.find('img')['src'] if aftercomic else ''
976
        imgs = [image_url1] + ([image_url2] if image_url2 else [])
977
        date_str = soup.find('div', class_='cc-publishtime').contents[0]
978
        day = string_to_date(date_str, "%B %d, %Y")
979
        return {
980
            'title': image1['title'],
981
            'img': [urljoin_wrapper(cls.url, i) for i in imgs],
982
            'day': day.day,
983
            'month': day.month,
984
            'year': day.year
985
        }
986
987
988
class PerryBibleFellowship(GenericListableComic):
989
    """Class to retrieve Perry Bible Fellowship comics."""
990
    name = 'pbf'
991
    long_name = 'Perry Bible Fellowship'
992
    url = 'http://pbfcomics.com'
993
    get_url_from_archive_element = join_cls_url_to_href
994
995
    @classmethod
996
    def get_archive_elements(cls):
997
        comic_link_re = re.compile('^/[0-9]*/$')
998
        return reversed(get_soup_at_url(cls.url).find_all('a', href=comic_link_re))
999
1000
    @classmethod
1001
    def get_comic_info(cls, soup, link):
1002
        """Get information about a particular comics."""
1003
        url = cls.get_url_from_archive_element(link)
1004
        comic_img_re = re.compile('^/archive_b/PBF.*')
1005
        name = link.string
1006 View Code Duplication
        num = int(link['name'])
1007
        href = link['href']
1008
        assert href == '/%d/' % num
1009
        imgs = soup.find_all('img', src=comic_img_re)
1010
        assert len(imgs) == 1
1011
        assert imgs[0]['alt'] == name
1012
        return {
1013
            'num': num,
1014
            'name': name,
1015
            'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1016
            'prefix': '%d-' % num,
1017
        }
1018
1019
1020
class Mercworks(GenericNavigableComic):
1021
    """Class to retrieve Mercworks comics."""
1022
    # Also on http://mercworks.tumblr.com
1023
    name = 'mercworks'
1024
    long_name = 'Mercworks'
1025
    url = 'http://mercworks.net'
1026
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1027
    get_navi_link = get_a_rel_next
1028
1029
    @classmethod
1030
    def get_comic_info(cls, soup, link):
1031
        """Get information about a particular comics."""
1032
        title = soup.find('meta', property='og:title')['content']
1033
        metadesc = soup.find('meta', property='og:description')
1034
        desc = metadesc['content'] if metadesc else ""
1035
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1036
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1037
        date_str = date_str[:10]
1038
        day = string_to_date(date_str, "%Y-%m-%d")
1039
        imgs = soup.find_all('meta', property='og:image')
1040
        return {
1041
            'img': [i['content'] for i in imgs],
1042
            'title': title,
1043
            'author': author,
1044
            'desc': desc,
1045
            'day': day.day,
1046
            'month': day.month,
1047
            'year': day.year
1048
        }
1049
1050
1051
class BerkeleyMews(GenericListableComic):
1052
    """Class to retrieve Berkeley Mews comics."""
1053
    # Also on http://mews.tumblr.com
1054
    # Also on http://www.gocomics.com/berkeley-mews
1055
    name = 'berkeley'
1056
    long_name = 'Berkeley Mews'
1057
    url = 'http://www.berkeleymews.com'
1058
    _categories = ('BERKELEY', )
1059
    get_url_from_archive_element = get_href
1060
    comic_num_re = re.compile('%s/\\?p=([0-9]*)$' % url)
1061
1062
    @classmethod
1063
    def get_archive_elements(cls):
1064
        archive_url = urljoin_wrapper(cls.url, "?page_id=2")
1065
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_num_re))
1066
1067
    @classmethod
1068
    def get_comic_info(cls, soup, link):
1069
        """Get information about a particular comics."""
1070
        comic_date_re = re.compile('.*/([0-9]*)-([0-9]*)-([0-9]*)-.*')
1071
        url = cls.get_url_from_archive_element(link)
1072
        num = int(cls.comic_num_re.match(url).groups()[0])
1073
        img = soup.find('div', id='comic').find('img')
1074
        assert all(i['alt'] == i['title'] for i in [img])
1075
        title2 = img['title']
1076
        img_url = img['src']
1077
        year, month, day = [int(s) for s in comic_date_re.match(img_url).groups()]
1078
        return {
1079
            'num': num,
1080
            'title': link.string,
1081
            'title2': title2,
1082
            'img': [img_url],
1083
            'year': year,
1084
            'month': month,
1085
            'day': day,
1086
        }
1087
1088
1089
class GenericBouletCorp(GenericNavigableComic):
1090
    """Generic class to retrieve BouletCorp comics in different languages."""
1091
    # Also on http://bouletcorp.tumblr.com
1092
    _categories = ('BOULET', )
1093
    get_navi_link = get_link_rel_next
1094
1095
    @classmethod
1096
    def get_first_comic_link(cls):
1097
        """Get link to first comics."""
1098
        return get_soup_at_url(cls.url).find('div', id='centered_nav').find_all('a')[0]
1099
1100
    @classmethod
1101
    def get_comic_info(cls, soup, link):
1102
        """Get information about a particular comics."""
1103
        url = cls.get_url_from_link(link)
1104
        date_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/' % cls.url)
1105
        year, month, day = [int(s) for s in date_re.match(url).groups()]
1106
        imgs = soup.find('div', id='notes').find('div', class_='storycontent').find_all('img')
1107
        texts = '  '.join(t for t in (i.get('title') for i in imgs) if t)
1108
        title = soup.find('title').string
1109
        return {
1110
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs if i.get('src') is not None],
1111
            'title': title,
1112
            'texts': texts,
1113
            'year': year,
1114
            'month': month,
1115
            'day': day,
1116
        }
1117
1118
1119
class BouletCorp(GenericBouletCorp):
1120
    """Class to retrieve BouletCorp comics."""
1121
    name = 'boulet'
1122
    long_name = 'Boulet Corp'
1123
    url = 'http://www.bouletcorp.com'
1124
    _categories = ('FRANCAIS', )
1125
1126
1127
class BouletCorpEn(GenericBouletCorp):
1128
    """Class to retrieve EnglishBouletCorp comics."""
1129
    name = 'boulet_en'
1130
    long_name = 'Boulet Corp English'
1131
    url = 'http://english.bouletcorp.com'
1132
1133
1134 View Code Duplication
class AmazingSuperPowers(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1135
    """Class to retrieve Amazing Super Powers comics."""
1136
    name = 'asp'
1137
    long_name = 'Amazing Super Powers'
1138
    url = 'http://www.amazingsuperpowers.com'
1139
    get_first_comic_link = get_a_navi_navifirst
1140
    get_navi_link = get_a_navi_navinext
1141
1142
    @classmethod
1143
    def get_comic_info(cls, soup, link):
1144
        """Get information about a particular comics."""
1145
        author = soup.find("span", class_="post-author").find("a").string
1146
        date_str = soup.find('span', class_='post-date').string
1147
        day = string_to_date(date_str, "%B %d, %Y")
1148
        imgs = soup.find('div', id='comic').find_all('img')
1149
        title = ' '.join(i['title'] for i in imgs)
1150
        assert all(i['alt'] == i['title'] for i in imgs)
1151
        return {
1152
            'title': title,
1153
            'author': author,
1154
            'img': [img['src'] for img in imgs],
1155
            'day': day.day,
1156
            'month': day.month,
1157
            'year': day.year
1158
        }
1159
1160
1161
class ToonHole(GenericNavigableComic):
1162
    """Class to retrieve Toon Holes comics."""
1163
    # Also on http://tapastic.com/series/TOONHOLE
1164
    name = 'toonhole'
1165
    long_name = 'Toon Hole'
1166
    url = 'http://www.toonhole.com'
1167
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1168
    get_navi_link = get_link_rel_next
1169
1170
    @classmethod
1171
    def get_comic_info(cls, soup, link):
1172
        """Get information about a particular comics."""
1173
        short_url = soup.find('link', rel='shortlink')['href']
1174
        date_str = soup.find('div', class_='entry-meta').contents[0].strip()
1175
        day = string_to_date(date_str, "%B %d, %Y")
1176
        imgs = soup.find('div', id='comic').find_all('img')
1177
        if imgs:
1178
            img = imgs[0]
1179
            title = img['alt']
1180
            assert img['title'] == title
1181
        else:
1182
            title = ""
1183
        return {
1184
            'short_url': short_url,
1185
            'title': title,
1186
            'month': day.month,
1187
            'year': day.year,
1188
            'day': day.day,
1189
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs],
1190
        }
1191
1192
1193
class Channelate(GenericNavigableComic):
1194
    """Class to retrieve Channelate comics."""
1195
    name = 'channelate'
1196
    long_name = 'Channelate'
1197
    url = 'http://www.channelate.com'
1198
    get_first_comic_link = get_div_navfirst_a
1199
    get_navi_link = get_link_rel_next
1200
    get_url_from_link = join_cls_url_to_href
1201
1202
    @classmethod
1203
    def get_comic_info(cls, soup, link):
1204
        """Get information about a particular comics."""
1205
        author = soup.find("span", class_="post-author").find("a").string
1206
        date_str = soup.find('span', class_='post-date').string
1207
        day = string_to_date(date_str, '%Y/%m/%d')
1208
        title = soup.find('meta', property='og:title')['content']
1209
        post = soup.find('div', id='comic')
1210
        imgs = post.find_all('img') if post else []
1211
        extra_url = None
1212
        extra_div = soup.find('div', id='extrapanelbutton')
1213
        if extra_div:
1214
            extra_url = extra_div.find('a')['href']
1215
            extra_soup = get_soup_at_url(extra_url)
1216
            extra_imgs = extra_soup.find_all('img', class_='extrapanelimage')
1217
            imgs.extend(extra_imgs)
1218
        return {
1219
            'url_extra': extra_url,
1220
            'title': title,
1221
            'author': author,
1222
            'month': day.month,
1223
            'year': day.year,
1224
            'day': day.day,
1225
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1226
        }
1227
1228
1229
class CyanideAndHappiness(GenericNavigableComic):
1230
    """Class to retrieve Cyanide And Happiness comics."""
1231
    name = 'cyanide'
1232
    long_name = 'Cyanide and Happiness'
1233
    url = 'http://explosm.net'
1234
    _categories = ('NSFW', )
1235
    get_url_from_link = join_cls_url_to_href
1236
1237
    @classmethod
1238
    def get_first_comic_link(cls):
1239
        """Get link to first comics."""
1240
        return get_soup_at_url(cls.url).find('a', title='Oldest comic')
1241
1242
    @classmethod
1243
    def get_navi_link(cls, last_soup, next_):
1244
        """Get link to next or previous comic."""
1245
        link = last_soup.find('a', class_='next-comic' if next_ else 'previous-comic ')
1246
        return None if link.get('href') is None else link
1247
1248
    @classmethod
1249
    def get_comic_info(cls, soup, link):
1250
        """Get information about a particular comics."""
1251
        url2 = soup.find('meta', property='og:url')['content']
1252
        num = int(url2.split('/')[-2])
1253
        date_str = soup.find('h3').find('a').string
1254
        day = string_to_date(date_str, '%Y.%m.%d')
1255
        author = soup.find('small', class_="author-credit-name").string
1256
        assert author.startswith('by ')
1257
        author = author[3:]
1258
        imgs = soup.find_all('img', id='main-comic')
1259
        return {
1260
            'num': num,
1261
            'author': author,
1262
            'month': day.month,
1263
            'year': day.year,
1264
            'day': day.day,
1265
            'prefix': '%d-' % num,
1266
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i['src'])) for i in imgs]
1267
        }
1268
1269
1270
class MrLovenstein(GenericComic):
1271
    """Class to retrieve Mr Lovenstein comics."""
1272
    # Also on https://tapastic.com/series/MrLovenstein
1273
    name = 'mrlovenstein'
1274
    long_name = 'Mr. Lovenstein'
1275
    url = 'http://www.mrlovenstein.com'
1276
1277
    @classmethod
1278
    def get_next_comic(cls, last_comic):
1279
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1280
        # TODO: more info from http://www.mrlovenstein.com/archive
1281
        comic_num_re = re.compile('^/comic/([0-9]*)$')
1282
        nums = [int(comic_num_re.match(link['href']).groups()[0])
1283
                for link in get_soup_at_url(cls.url).find_all('a', href=comic_num_re)]
1284
        first, last = min(nums), max(nums)
1285
        if last_comic:
1286
            first = last_comic['num'] + 1
1287
        for num in range(first, last + 1):
1288
            url = urljoin_wrapper(cls.url, '/comic/%d' % num)
1289
            soup = get_soup_at_url(url)
1290
            imgs = list(
1291
                reversed(soup.find_all('img', src=re.compile('^/images/comics/'))))
1292
            description = soup.find('meta', attrs={'name': 'description'})['content']
1293
            yield {
1294
                'url': url,
1295
                'num': num,
1296
                'texts': '  '.join(t for t in (i.get('title') for i in imgs) if t),
1297
                'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1298
                'description': description,
1299
            }
1300
1301
1302
class DinosaurComics(GenericListableComic):
1303
    """Class to retrieve Dinosaur Comics comics."""
1304
    name = 'dinosaur'
1305
    long_name = 'Dinosaur Comics'
1306
    url = 'http://www.qwantz.com'
1307
    get_url_from_archive_element = get_href
1308
    comic_link_re = re.compile('^%s/index.php\\?comic=([0-9]*)$' % url)
1309
1310
    @classmethod
1311
    def get_archive_elements(cls):
1312
        archive_url = urljoin_wrapper(cls.url, 'archive.php')
1313
        # first link is random -> skip it
1314
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re)[1:])
1315
1316
    @classmethod
1317
    def get_comic_info(cls, soup, link):
1318
        """Get information about a particular comics."""
1319
        url = cls.get_url_from_archive_element(link)
1320
        num = int(cls.comic_link_re.match(url).groups()[0])
1321
        date_str = link.string
1322
        text = link.next_sibling.string
1323
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1324
        comic_img_re = re.compile('^%s/comics/' % cls.url)
1325
        img = soup.find('img', src=comic_img_re)
1326
        return {
1327 View Code Duplication
            'month': day.month,
1328
            'year': day.year,
1329
            'day': day.day,
1330
            'img': [img.get('src')],
1331
            'title': img.get('title'),
1332
            'text': text,
1333
            'num': num,
1334
        }
1335
1336
1337
class ButterSafe(GenericListableComic):
1338
    """Class to retrieve Butter Safe comics."""
1339
    name = 'butter'
1340
    long_name = 'ButterSafe'
1341
    url = 'http://buttersafe.com'
1342
    get_url_from_archive_element = get_href
1343
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)
1344
1345
    @classmethod
1346
    def get_archive_elements(cls):
1347
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1348
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
1349
1350
    @classmethod
1351
    def get_comic_info(cls, soup, link):
1352
        """Get information about a particular comics."""
1353
        url = cls.get_url_from_archive_element(link)
1354
        title = link.string
1355
        year, month, day = [int(s) for s in cls.comic_link_re.match(url).groups()]
1356
        img = soup.find('div', id='comic').find('img')
1357
        assert img['alt'] == title
1358
        return {
1359
            'title': title,
1360
            'day': day,
1361
            'month': month,
1362
            'year': year,
1363
            'img': [img['src']],
1364
        }
1365
1366
1367
class CalvinAndHobbes(GenericComic):
1368
    """Class to retrieve Calvin and Hobbes comics."""
1369
    # Also on http://www.gocomics.com/calvinandhobbes/
1370
    name = 'calvin'
1371
    long_name = 'Calvin and Hobbes'
1372
    # This is not through any official webpage but eh...
1373
    url = 'http://marcel-oehler.marcellosendos.ch/comics/ch/'
1374
1375
    @classmethod
1376
    def get_next_comic(cls, last_comic):
1377
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1378
        last_date = get_date_for_comic(
1379
            last_comic) if last_comic else date(1985, 11, 1)
1380
        link_re = re.compile('^([0-9]*)/([0-9]*)/')
1381
        img_re = re.compile('')
1382
        for link in get_soup_at_url(cls.url).find_all('a', href=link_re):
1383
            url = link['href']
1384
            year, month = link_re.match(url).groups()
1385
            if date(int(year), int(month), 1) + timedelta(days=31) >= last_date:
1386
                img_re = re.compile('^%s%s([0-9]*)' % (year, month))
1387
                month_url = urljoin_wrapper(cls.url, url)
1388
                for img in get_soup_at_url(month_url).find_all('img', src=img_re):
1389
                    img_src = img['src']
1390
                    day = int(img_re.match(img_src).groups()[0])
1391
                    comic_date = date(int(year), int(month), day)
1392
                    if comic_date > last_date:
1393 View Code Duplication
                        yield {
1394
                            'url': month_url,
1395
                            'year': int(year),
1396
                            'month': int(month),
1397
                            'day': int(day),
1398
                            'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)],
1399
                        }
1400
                        last_date = comic_date
1401
1402
1403
class AbstruseGoose(GenericListableComic):
1404
    """Class to retrieve AbstruseGoose Comics."""
1405
    name = 'abstruse'
1406
    long_name = 'Abstruse Goose'
1407
    url = 'http://abstrusegoose.com'
1408
    get_url_from_archive_element = get_href
1409
    comic_url_re = re.compile('^%s/([0-9]*)$' % url)
1410
    comic_img_re = re.compile('^%s/strips/.*' % url)
1411
1412
    @classmethod
1413
    def get_archive_elements(cls):
1414
        archive_url = urljoin_wrapper(cls.url, 'archive')
1415
        return get_soup_at_url(archive_url).find_all('a', href=cls.comic_url_re)
1416
1417
    @classmethod
1418
    def get_comic_info(cls, soup, archive_elt):
1419
        comic_url = cls.get_url_from_archive_element(archive_elt)
1420
        num = int(cls.comic_url_re.match(comic_url).groups()[0])
1421
        return {
1422
            'num': num,
1423
            'title': archive_elt.string,
1424
            'img': [soup.find('img', src=cls.comic_img_re)['src']]
1425
        }
1426
1427
1428
class PhDComics(GenericNavigableComic):
1429
    """Class to retrieve PHD Comics."""
1430
    name = 'phd'
1431
    long_name = 'PhD Comics'
1432
    url = 'http://phdcomics.com/comics/archive.php'
1433
    get_url_from_link = join_cls_url_to_href
1434
1435
    @classmethod
1436
    def get_first_comic_link(cls):
1437
        """Get link to first comics."""
1438
        return get_soup_at_url(cls.url).find('img', src='images/first_button.gif').parent
1439
1440
    @classmethod
1441
    def get_navi_link(cls, last_soup, next_):
1442
        """Get link to next or previous comic."""
1443
        img = last_soup.find('img', src='images/next_button.gif' if next_ else 'images/prev_button.gif')
1444
        return None if img is None else img.parent
1445
1446
    @classmethod
1447
    def get_comic_info(cls, soup, link):
1448
        """Get information about a particular comics."""
1449
        date_str = soup.find('font', face='Arial,Helvetica,Geneva,Swiss,SunSans-Regular', color='white').string.strip()
1450
        try:
1451
            day = string_to_date(date_str, '%m/%d/%Y')
1452
        except ValueError:
1453
            print("Invalid date %s" % date_str)
1454
            day = date.today()
1455
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
1456
        return {
1457
            'year': day.year,
1458
            'month': day.month,
1459
            'day': day.day,
1460
            'img': [soup.find('img', id='comic')['src']],
1461
            'title': title,
1462
        }
1463
1464
1465 View Code Duplication
class Octopuns(GenericEmptyComic, GenericNavigableComic):
1466
    """Class to retrieve Octopuns comics."""
1467
    # Also on http://octopuns.tumblr.com
1468
    name = 'octopuns'
1469
    long_name = 'Octopuns'
1470
    url = 'http://www.octopuns.net'
1471
1472
    @classmethod
1473
    def get_first_comic_link(cls):
1474
        """Get link to first comics."""
1475
        return get_soup_at_url(cls.url).find('img', src=re.compile('.*/First.png')).parent
1476
1477
    @classmethod
1478
    def get_navi_link(cls, last_soup, next_):
1479
        """Get link to next or previous comic."""
1480
        link = last_soup.find('img', src=re.compile('.*/Next.png' if next_ else '.*/Back.png')).parent
1481
        return None if link.get('href') is None else link
1482
1483
    @classmethod
1484
    def get_comic_info(cls, soup, link):
1485
        """Get information about a particular comics."""
1486
        title = soup.find('h3', class_='post-title entry-title').string
1487
        date_str = soup.find('h2', class_='date-header').string
1488
        day = string_to_date(date_str, "%A, %B %d, %Y")
1489
        imgs = soup.find_all('link', rel='image_src')
1490
        return {
1491
            'img': [i['href'] for i in imgs],
1492
            'title': title,
1493
            'day': day.day,
1494
            'month': day.month,
1495
            'year': day.year,
1496
        }
1497
1498
1499
class Quarktees(GenericNavigableComic):
1500
    """Class to retrieve the Quarktees comics."""
1501
    name = 'quarktees'
1502
    long_name = 'Quarktees'
1503
    url = 'http://www.quarktees.com/blogs/news'
1504
    get_url_from_link = join_cls_url_to_href
1505
    get_first_comic_link = simulate_first_link
1506
    first_url = 'http://www.quarktees.com/blogs/news/12486621-coming-soon'
1507
1508
    @classmethod
1509
    def get_navi_link(cls, last_soup, next_):
1510
        """Get link to next or previous comic."""
1511
        return last_soup.find('a', id='article-next' if next_ else 'article-prev')
1512
1513
    @classmethod
1514
    def get_comic_info(cls, soup, link):
1515
        """Get information about a particular comics."""
1516
        title = soup.find('meta', property='og:title')['content']
1517
        article = soup.find('div', class_='single-article')
1518
        imgs = article.find_all('img')
1519
        return {
1520
            'title': title,
1521
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1522
        }
1523
1524
1525
class OverCompensating(GenericNavigableComic):
1526
    """Class to retrieve the Over Compensating comics."""
1527
    name = 'compensating'
1528
    long_name = 'Over Compensating'
1529
    url = 'http://www.overcompensating.com'
1530
    get_url_from_link = join_cls_url_to_href
1531
1532
    @classmethod
1533
    def get_first_comic_link(cls):
1534
        """Get link to first comics."""
1535
        return get_soup_at_url(cls.url).find('a', href=re.compile('comic=1$'))
1536
1537
    @classmethod
1538
    def get_navi_link(cls, last_soup, next_):
1539
        """Get link to next or previous comic."""
1540
        return last_soup.find('a', title='next comic' if next_ else 'go back already')
1541
1542
    @classmethod
1543
    def get_comic_info(cls, soup, link):
1544
        """Get information about a particular comics."""
1545
        img_src_re = re.compile('^/oc/comics/.*')
1546
        comic_num_re = re.compile('.*comic=([0-9]*)$')
1547
        comic_url = cls.get_url_from_link(link)
1548
        num = int(comic_num_re.match(comic_url).groups()[0])
1549
        img = soup.find('img', src=img_src_re)
1550
        return {
1551
            'num': num,
1552
            'img': [urljoin_wrapper(comic_url, img['src'])],
1553
            'title': img.get('title')
1554
        }
1555
1556
1557
class Oglaf(GenericNavigableComic):
1558
    """Class to retrieve Oglaf comics."""
1559
    name = 'oglaf'
1560
    long_name = 'Oglaf [NSFW]'
1561
    url = 'http://oglaf.com'
1562
    _categories = ('NSFW', )
1563
    get_url_from_link = join_cls_url_to_href
1564
1565
    @classmethod
1566
    def get_first_comic_link(cls):
1567
        """Get link to first comics."""
1568
        return get_soup_at_url(cls.url).find("div", id="st").parent
1569
1570
    @classmethod
1571
    def get_navi_link(cls, last_soup, next_):
1572
        """Get link to next or previous comic."""
1573
        div = last_soup.find("div", id="nx" if next_ else "pvs")
1574
        return div.parent if div else None
1575
1576
    @classmethod
1577
    def get_comic_info(cls, soup, link):
1578
        """Get information about a particular comics."""
1579
        title = soup.find('title').string
1580
        title_imgs = soup.find('div', id='tt').find_all('img')
1581
        assert len(title_imgs) == 1
1582
        strip_imgs = soup.find_all('img', id='strip')
1583
        assert len(strip_imgs) == 1
1584
        imgs = title_imgs + strip_imgs
1585
        desc = ' '.join(i['title'] for i in imgs)
1586
        return {
1587
            'title': title,
1588
            'img': [i['src'] for i in imgs],
1589
            'description': desc,
1590
        }
1591
1592
1593
class ScandinaviaAndTheWorld(GenericNavigableComic):
1594
    """Class to retrieve Scandinavia And The World comics."""
1595
    name = 'satw'
1596
    long_name = 'Scandinavia And The World'
1597
    url = 'http://satwcomic.com'
1598
    get_first_comic_link = simulate_first_link
1599
    first_url = 'http://satwcomic.com/sweden-denmark-and-norway'
1600
1601
    @classmethod
1602
    def get_navi_link(cls, last_soup, next_):
1603
        """Get link to next or previous comic."""
1604
        return last_soup.find('a', accesskey='n' if next_ else 'p')
1605
1606
    @classmethod
1607
    def get_comic_info(cls, soup, link):
1608
        """Get information about a particular comics."""
1609
        title = soup.find('meta', attrs={'name': 'twitter:label1'})['content']
1610
        desc = soup.find('meta', property='og:description')['content']
1611
        imgs = soup.find_all('img', itemprop="image")
1612
        return {
1613
            'title': title,
1614
            'description': desc,
1615
            'img': [i['src'] for i in imgs],
1616
        }
1617
1618
1619
class SomethingOfThatIlk(GenericEmptyComic):  # Does not exist anymore
1620
    """Class to retrieve the Something Of That Ilk comics."""
1621
    name = 'somethingofthatilk'
1622
    long_name = 'Something Of That Ilk'
1623
    url = 'http://www.somethingofthatilk.com'
1624
1625
1626
class InfiniteMonkeyBusiness(GenericNavigableComic):
1627
    """Generic class to retrieve InfiniteMonkeyBusiness comics."""
1628
    name = 'monkey'
1629
    long_name = 'Infinite Monkey Business'
1630
    url = 'http://infinitemonkeybusiness.net'
1631
    get_navi_link = get_a_navi_comicnavnext_navinext
1632
    get_first_comic_link = simulate_first_link
1633
    first_url = 'http://infinitemonkeybusiness.net/comic/pillory/'
1634
1635
    @classmethod
1636
    def get_comic_info(cls, soup, link):
1637
        """Get information about a particular comics."""
1638
        title = soup.find('meta', property='og:title')['content']
1639
        imgs = soup.find('div', id='comic').find_all('img')
1640
        return {
1641
            'title': title,
1642
            'img': [i['src'] for i in imgs],
1643
        }
1644
1645
1646
class Wondermark(GenericListableComic):
1647
    """Class to retrieve the Wondermark comics."""
1648
    name = 'wondermark'
1649
    long_name = 'Wondermark'
1650 View Code Duplication
    url = 'http://wondermark.com'
1651
    get_url_from_archive_element = get_href
1652
1653
    @classmethod
1654
    def get_archive_elements(cls):
1655
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1656
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1657
1658
    @classmethod
1659
    def get_comic_info(cls, soup, link):
1660
        """Get information about a particular comics."""
1661
        date_str = soup.find('div', class_='postdate').find('em').string
1662
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1663
        div = soup.find('div', id='comic')
1664
        if div:
1665
            img = div.find('img')
1666
            img_src = [img['src']]
1667
            alt = img['alt']
1668
            assert alt == img['title']
1669
            title = soup.find('meta', property='og:title')['content']
1670
        else:
1671
            img_src = []
1672
            alt = ''
1673
            title = ''
1674
        return {
1675
            'month': day.month,
1676
            'year': day.year,
1677
            'day': day.day,
1678
            'img': img_src,
1679
            'title': title,
1680
            'alt': alt,
1681
            'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')),
1682
        }
1683
1684
1685
class WarehouseComic(GenericNavigableComic):
1686
    """Class to retrieve Warehouse Comic comics."""
1687
    name = 'warehouse'
1688
    long_name = 'Warehouse Comic'
1689
    url = 'http://warehousecomic.com'
1690
    get_first_comic_link = get_a_navi_navifirst
1691
    get_navi_link = get_link_rel_next
1692
1693
    @classmethod
1694
    def get_comic_info(cls, soup, link):
1695
        """Get information about a particular comics."""
1696
        title = soup.find('h2', class_='post-title').string
1697
        date_str = soup.find('span', class_='post-date').string
1698
        day = string_to_date(date_str, "%B %d, %Y")
1699
        imgs = soup.find('div', id='comic').find_all('img')
1700
        return {
1701
            'img': [i['src'] for i in imgs],
1702
            'title': title,
1703
            'day': day.day,
1704
            'month': day.month,
1705
            'year': day.year,
1706
        }
1707
1708
1709
class JustSayEh(GenericNavigableComic):
1710
    """Class to retrieve Just Say Eh comics."""
1711
    # Also on http//tapastic.com/series/Just-Say-Eh
1712
    name = 'justsayeh'
1713
    long_name = 'Just Say Eh'
1714
    url = 'http://www.justsayeh.com'
1715
    get_first_comic_link = get_a_navi_navifirst
1716
    get_navi_link = get_a_navi_comicnavnext_navinext
1717
1718
    @classmethod
1719
    def get_comic_info(cls, soup, link):
1720
        """Get information about a particular comics."""
1721
        title = soup.find('h2', class_='post-title').string
1722
        imgs = soup.find("div", id="comic").find_all("img")
1723
        assert all(i['alt'] == i['title'] for i in imgs)
1724
        alt = imgs[0]['alt']
1725
        return {
1726
            'img': [i['src'] for i in imgs],
1727
            'title': title,
1728
            'alt': alt,
1729
        }
1730
1731
1732 View Code Duplication
class MouseBearComedy(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1733
    """Class to retrieve Mouse Bear Comedy comics."""
1734
    # Also on http://mousebearcomedy.tumblr.com
1735
    name = 'mousebear'
1736
    long_name = 'Mouse Bear Comedy'
1737
    url = 'http://www.mousebearcomedy.com'
1738
    get_first_comic_link = get_a_navi_navifirst
1739
    get_navi_link = get_a_navi_comicnavnext_navinext
1740
1741
    @classmethod
1742
    def get_comic_info(cls, soup, link):
1743
        """Get information about a particular comics."""
1744
        title = soup.find('h2', class_='post-title').string
1745
        author = soup.find("span", class_="post-author").find("a").string
1746
        date_str = soup.find("span", class_="post-date").string
1747
        day = string_to_date(date_str, '%B %d, %Y')
1748
        imgs = soup.find("div", id="comic").find_all("img")
1749
        assert all(i['alt'] == i['title'] == title for i in imgs)
1750
        return {
1751
            'day': day.day,
1752
            'month': day.month,
1753
            'year': day.year,
1754
            'img': [i['src'] for i in imgs],
1755
            'title': title,
1756
            'author': author,
1757
        }
1758
1759
1760
class BigFootJustice(GenericNavigableComic):
1761
    """Class to retrieve Big Foot Justice comics."""
1762
    # Also on http://tapastic.com/series/bigfoot-justice
1763
    name = 'bigfoot'
1764
    long_name = 'Big Foot Justice'
1765
    url = 'http://bigfootjustice.com'
1766
    get_first_comic_link = get_a_navi_navifirst
1767 View Code Duplication
    get_navi_link = get_a_navi_comicnavnext_navinext
1768
1769
    @classmethod
1770
    def get_comic_info(cls, soup, link):
1771
        """Get information about a particular comics."""
1772
        imgs = soup.find('div', id='comic').find_all('img')
1773
        assert all(i['title'] == i['alt'] for i in imgs)
1774
        title = ' '.join(i['title'] for i in imgs)
1775
        return {
1776
            'img': [i['src'] for i in imgs],
1777
            'title': title,
1778
        }
1779
1780
1781
class RespawnComic(GenericNavigableComic):
1782
    """Class to retrieve Respawn Comic."""
1783
    # Also on http://respawncomic.tumblr.com
1784
    name = 'respawn'
1785
    long_name = 'Respawn Comic'
1786
    url = 'http://respawncomic.com '
1787
    _categories = ('RESPAWN', )
1788
    get_navi_link = get_a_rel_next
1789
    get_first_comic_link = simulate_first_link
1790
    first_url = 'http://respawncomic.com/comic/c0001/'
1791
1792
    @classmethod
1793
    def get_comic_info(cls, soup, link):
1794
        """Get information about a particular comics."""
1795
        title = soup.find('meta', property='og:title')['content']
1796
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1797
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1798
        date_str = date_str[:10]
1799
        day = string_to_date(date_str, "%Y-%m-%d")
1800
        imgs = soup.find_all('meta', property='og:image')
1801
        skip_imgs = {
1802
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1803
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1804
        }
1805
        return {
1806
            'title': title,
1807
            'author': author,
1808
            'day': day.day,
1809
            'month': day.month,
1810
            'year': day.year,
1811
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1812
        }
1813
1814
1815
class SafelyEndangered(GenericNavigableComic):
1816
    """Class to retrieve Safely Endangered comics."""
1817
    # Also on http://tumblr.safelyendangered.com
1818
    name = 'endangered'
1819
    long_name = 'Safely Endangered'
1820
    url = 'http://www.safelyendangered.com'
1821
    get_navi_link = get_link_rel_next
1822
    get_first_comic_link = simulate_first_link
1823
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1824
1825
    @classmethod
1826
    def get_comic_info(cls, soup, link):
1827
        """Get information about a particular comics."""
1828
        title = soup.find('h2', class_='post-title').string
1829
        date_str = soup.find('span', class_='post-date').string
1830
        day = string_to_date(date_str, '%B %d, %Y')
1831
        imgs = soup.find('div', id='comic').find_all('img')
1832
        alt = imgs[0]['alt']
1833
        assert all(i['alt'] == i['title'] for i in imgs)
1834
        return {
1835
            'day': day.day,
1836
            'month': day.month,
1837
            'year': day.year,
1838
            'img': [i['src'] for i in imgs],
1839
            'title': title,
1840
            'alt': alt,
1841
        }
1842
1843
1844 View Code Duplication
class PicturesInBoxes(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1845
    """Class to retrieve Pictures In Boxes comics."""
1846
    # Also on http://picturesinboxescomic.tumblr.com
1847
    name = 'picturesinboxes'
1848
    long_name = 'Pictures in Boxes'
1849
    url = 'http://www.picturesinboxes.com'
1850
    get_navi_link = get_a_navi_navinext
1851
    get_first_comic_link = simulate_first_link
1852
    first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'
1853
1854
    @classmethod
1855
    def get_comic_info(cls, soup, link):
1856
        """Get information about a particular comics."""
1857
        title = soup.find('h2', class_='post-title').string
1858
        author = soup.find("span", class_="post-author").find("a").string
1859
        date_str = soup.find('span', class_='post-date').string
1860
        day = string_to_date(date_str, '%B %d, %Y')
1861
        imgs = soup.find('div', class_='comicpane').find_all('img')
1862
        assert imgs
1863
        assert all(i['title'] == i['alt'] == title for i in imgs)
1864
        return {
1865
            'day': day.day,
1866
            'month': day.month,
1867
            'year': day.year,
1868
            'img': [i['src'] for i in imgs],
1869
            'title': title,
1870
            'author': author,
1871
        }
1872
1873
1874
class Penmen(GenericEmptyComic):
1875
    """Class to retrieve Penmen comics."""
1876
    name = 'penmen'
1877
    long_name = 'Penmen'
1878
    url = 'http://penmen.com'
1879
1880
1881
class TheDoghouseDiaries(GenericNavigableComic):
1882
    """Class to retrieve The Dog House Diaries comics."""
1883
    name = 'doghouse'
1884
    long_name = 'The Dog House Diaries'
1885
    url = 'http://thedoghousediaries.com'
1886
1887
    @classmethod
1888
    def get_first_comic_link(cls):
1889
        """Get link to first comics."""
1890
        return get_soup_at_url(cls.url).find('a', id='firstlink')
1891
1892
    @classmethod
1893
    def get_navi_link(cls, last_soup, next_):
1894
        """Get link to next or previous comic."""
1895
        return last_soup.find('a', id='nextlink' if next_ else 'previouslink')
1896
1897
    @classmethod
1898
    def get_comic_info(cls, soup, link):
1899
        """Get information about a particular comics."""
1900
        comic_img_re = re.compile('^dhdcomics/.*')
1901
        img = soup.find('img', src=comic_img_re)
1902
        comic_url = cls.get_url_from_link(link)
1903
        return {
1904
            'title': soup.find('h2', id='titleheader').string,
1905
            'title2': soup.find('div', id='subtext').string,
1906
            'alt': img.get('title'),
1907
            'img': [urljoin_wrapper(comic_url, img['src'].strip())],
1908
            'num': int(comic_url.split('/')[-1]),
1909
        }
1910
1911
1912
class InvisibleBread(GenericListableComic):
1913
    """Class to retrieve Invisible Bread comics."""
1914 View Code Duplication
    # Also on http://www.gocomics.com/invisible-bread
1915
    name = 'invisiblebread'
1916
    long_name = 'Invisible Bread'
1917
    url = 'http://invisiblebread.com'
1918
1919
    @classmethod
1920
    def get_archive_elements(cls):
1921
        archive_url = urljoin_wrapper(cls.url, 'archives/')
1922
        return reversed(get_soup_at_url(archive_url).find_all('td', class_='archive-title'))
1923
1924
    @classmethod
1925
    def get_url_from_archive_element(cls, td):
1926
        return td.find('a')['href']
1927
1928
    @classmethod
1929
    def get_comic_info(cls, soup, td):
1930
        """Get information about a particular comics."""
1931
        url = cls.get_url_from_archive_element(td)
1932
        title = td.find('a').string
1933
        month_and_day = td.previous_sibling.string
1934
        link_re = re.compile('^%s/([0-9]+)/' % cls.url)
1935
        year = link_re.match(url).groups()[0]
1936
        date_str = month_and_day + ' ' + year
1937
        day = string_to_date(date_str, '%b %d %Y')
1938
        imgs = [soup.find('div', id='comic').find('img')]
1939
        assert len(imgs) == 1
1940
        assert all(i['title'] == i['alt'] == title for i in imgs)
1941
        return {
1942
            'month': day.month,
1943
            'year': day.year,
1944
            'day': day.day,
1945
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1946
            'title': title,
1947
        }
1948
1949
1950
class DiscoBleach(GenericEmptyComic):  # Does not work anymore
1951
    """Class to retrieve Disco Bleach Comics."""
1952
    name = 'discobleach'
1953
    long_name = 'Disco Bleach'
1954
    url = 'http://discobleach.com'
1955
1956
1957
class TubeyToons(GenericEmptyComic):  # Does not work anymore
1958
    """Class to retrieve TubeyToons comics."""
1959
    # Also on http://tapastic.com/series/Tubey-Toons
1960
    # Also on http://tubeytoons.tumblr.com
1961
    name = 'tubeytoons'
1962
    long_name = 'Tubey Toons'
1963
    url = 'http://tubeytoons.com'
1964
    _categories = ('TUNEYTOONS', )
1965
1966
1967
class CompletelySeriousComics(GenericNavigableComic):
1968
    """Class to retrieve Completely Serious comics."""
1969
    name = 'completelyserious'
1970
    long_name = 'Completely Serious Comics'
1971
    url = 'http://completelyseriouscomics.com'
1972
    get_first_comic_link = get_a_navi_navifirst
1973
    get_navi_link = get_a_navi_navinext
1974
1975
    @classmethod
1976
    def get_comic_info(cls, soup, link):
1977
        """Get information about a particular comics."""
1978
        title = soup.find('h2', class_='post-title').string
1979
        author = soup.find('span', class_='post-author').contents[1].string
1980
        date_str = soup.find('span', class_='post-date').string
1981
        day = string_to_date(date_str, '%B %d, %Y')
1982
        imgs = soup.find('div', class_='comicpane').find_all('img')
1983
        assert imgs
1984
        alt = imgs[0]['title']
1985
        assert all(i['title'] == i['alt'] == alt for i in imgs)
1986
        return {
1987
            'month': day.month,
1988
            'year': day.year,
1989
            'day': day.day,
1990
            'img': [i['src'] for i in imgs],
1991
            'title': title,
1992
            'alt': alt,
1993
            'author': author,
1994
        }
1995
1996
1997
class PoorlyDrawnLines(GenericListableComic):
1998
    """Class to retrieve Poorly Drawn Lines comics."""
1999
    # Also on http://pdlcomics.tumblr.com
2000
    name = 'poorlydrawn'
2001
    long_name = 'Poorly Drawn Lines'
2002
    url = 'http://poorlydrawnlines.com'
2003
    _categories = ('POORLYDRAWN', )
2004
    get_url_from_archive_element = get_href
2005
2006
    @classmethod
2007
    def get_comic_info(cls, soup, link):
2008
        """Get information about a particular comics."""
2009
        imgs = soup.find('div', class_='post').find_all('img')
2010
        assert len(imgs) <= 1
2011
        return {
2012
            'img': [i['src'] for i in imgs],
2013
            'title': imgs[0].get('title', "") if imgs else "",
2014
        }
2015
2016
    @classmethod
2017
    def get_archive_elements(cls):
2018
        archive_url = urljoin_wrapper(cls.url, 'archive')
2019
        url_re = re.compile('^%s/comic/.' % cls.url)
2020
        return reversed(get_soup_at_url(archive_url).find_all('a', href=url_re))
2021
2022
2023 View Code Duplication
class LoadingComics(GenericNavigableComic):
2024
    """Class to retrieve Loading Artist comics."""
2025
    name = 'loadingartist'
2026
    long_name = 'Loading Artist'
2027
    url = 'http://www.loadingartist.com/latest'
2028
2029
    @classmethod
2030
    def get_first_comic_link(cls):
2031
        """Get link to first comics."""
2032
        return get_soup_at_url(cls.url).find('a', title="First")
2033
2034
    @classmethod
2035
    def get_navi_link(cls, last_soup, next_):
2036
        """Get link to next or previous comic."""
2037
        return last_soup.find('a', title='Next' if next_ else 'Previous')
2038
2039
    @classmethod
2040
    def get_comic_info(cls, soup, link):
2041
        """Get information about a particular comics."""
2042
        title = soup.find('h1').string
2043
        date_str = soup.find('span', class_='date').string.strip()
2044
        day = string_to_date(date_str, "%B %d, %Y")
2045
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
2046
        return {
2047
            'title': title,
2048
            'img': [i['src'] for i in imgs],
2049
            'month': day.month,
2050
            'year': day.year,
2051
            'day': day.day,
2052
        }
2053
2054
2055
class ChuckleADuck(GenericNavigableComic):
2056
    """Class to retrieve Chuckle-A-Duck comics."""
2057
    name = 'chuckleaduck'
2058
    long_name = 'Chuckle-A-duck'
2059
    url = 'http://chuckleaduck.com'
2060
    get_first_comic_link = get_div_navfirst_a
2061
    get_navi_link = get_link_rel_next
2062
2063
    @classmethod
2064
    def get_comic_info(cls, soup, link):
2065
        """Get information about a particular comics."""
2066
        date_str = soup.find('span', class_='post-date').string
2067
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
2068
        author = soup.find('span', class_='post-author').string
2069
        div = soup.find('div', id='comic')
2070
        imgs = div.find_all('img') if div else []
2071
        title = imgs[0]['title'] if imgs else ""
2072
        assert all(i['title'] == i['alt'] == title for i in imgs)
2073
        return {
2074
            'month': day.month,
2075
            'year': day.year,
2076
            'day': day.day,
2077
            'img': [i['src'] for i in imgs],
2078
            'title': title,
2079
            'author': author,
2080
        }
2081
2082
2083
class DepressedAlien(GenericNavigableComic):
2084
    """Class to retrieve Depressed Alien Comics."""
2085
    name = 'depressedalien'
2086
    long_name = 'Depressed Alien'
2087
    url = 'http://depressedalien.com'
2088
    get_url_from_link = join_cls_url_to_href
2089
2090
    @classmethod
2091
    def get_first_comic_link(cls):
2092
        """Get link to first comics."""
2093
        return get_soup_at_url(cls.url).find('img', attrs={'name': 'beginArrow'}).parent
2094
2095
    @classmethod
2096
    def get_navi_link(cls, last_soup, next_):
2097
        """Get link to next or previous comic."""
2098
        return last_soup.find('img', attrs={'name': 'rightArrow' if next_ else 'leftArrow'}).parent
2099
2100
    @classmethod
2101
    def get_comic_info(cls, soup, link):
2102
        """Get information about a particular comics."""
2103
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2104
        imgs = soup.find_all('meta', property='og:image')
2105
        return {
2106
            'title': title,
2107
            'img': [i['content'] for i in imgs],
2108
        }
2109
2110
2111
class ThingsInSquares(GenericListableComic):
2112
    """Class to retrieve Things In Squares comics."""
2113
    # This can be retrieved in other languages
2114
    # Also on https://tapastic.com/series/Things-in-Squares
2115
    name = 'squares'
2116
    long_name = 'Things in squares'
2117
    url = 'http://www.thingsinsquares.com'
2118
2119
    @classmethod
2120
    def get_comic_info(cls, soup, tr):
2121
        """Get information about a particular comics."""
2122
        _, td2, td3 = tr.find_all('td')
2123
        a = td2.find('a')
2124
        date_str = td3.string
2125
        day = string_to_date(date_str, "%m.%d.%y")
2126
        title = a.string
2127
        title2 = soup.find('meta', property='og:title')['content']
2128
        desc = soup.find('meta', property='og:description')
2129
        description = desc['content'] if desc else ''
2130
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2131
        imgs = soup.find('div', class_='entry-content').find_all('img')
2132
        return {
2133
            'day': day.day,
2134
            'month': day.month,
2135
            'year': day.year,
2136
            'title': title,
2137
            'title2': title2,
2138
            'description': description,
2139
            'tags': tags,
2140
            'img': [i['src'] for i in imgs],
2141
            'alt': ' '.join(i['alt'] for i in imgs),
2142
        }
2143
2144
    @classmethod
2145
    def get_url_from_archive_element(cls, tr):
2146
        _, td2, td3 = tr.find_all('td')
2147
        return td2.find('a')['href']
2148
2149
    @classmethod
2150
    def get_archive_elements(cls):
2151
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
2152
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2153
2154
2155
class HappleTea(GenericNavigableComic):
2156
    """Class to retrieve Happle Tea Comics."""
2157
    name = 'happletea'
2158
    long_name = 'Happle Tea'
2159
    url = 'http://www.happletea.com'
2160
    get_first_comic_link = get_a_navi_navifirst
2161
    get_navi_link = get_link_rel_next
2162
2163
    @classmethod
2164
    def get_comic_info(cls, soup, link):
2165
        """Get information about a particular comics."""
2166
        imgs = soup.find('div', id='comic').find_all('img')
2167
        post = soup.find('div', class_='post-content')
2168
        title = post.find('h2', class_='post-title').string
2169
        author = post.find('a', rel='author').string
2170
        date_str = post.find('span', class_='post-date').string
2171
        day = string_to_date(date_str, "%B %d, %Y")
2172
        assert all(i['alt'] == i['title'] for i in imgs)
2173
        return {
2174
            'title': title,
2175
            'img': [i['src'] for i in imgs],
2176
            'alt': ''.join(i['alt'] for i in imgs),
2177
            'month': day.month,
2178
            'year': day.year,
2179
            'day': day.day,
2180
            'author': author,
2181
        }
2182
2183
2184
class FatAwesomeComics(GenericNavigableComic):
2185
    """Class to retrieve Fat Awesome Comics."""
2186
    # Also on http://fatawesomecomedy.tumblr.com
2187
    name = 'fatawesome'
2188
    long_name = 'Fat Awesome'
2189
    url = 'http://fatawesome.com/comics'
2190
    get_navi_link = get_a_rel_next
2191
    get_first_comic_link = simulate_first_link
2192
    first_url = 'http://fatawesome.com/shortbus/'
2193
2194
    @classmethod
2195
    def get_comic_info(cls, soup, link):
2196
        """Get information about a particular comics."""
2197
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2198
        description = soup.find('meta', attrs={'name': 'description'})['content']
2199
        tags_prop = soup.find('meta', property='article:tag')
2200
        tags = tags_prop['content'] if tags_prop else ""
2201
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2202
        day = string_to_date(date_str, "%Y-%m-%d")
2203
        imgs = soup.find_all('img', attrs={'data-recalc-dims': "1"})
2204
        assert len(imgs) == 1
2205
        return {
2206
            'title': title,
2207
            'description': description,
2208
            'tags': tags,
2209
            'alt': "".join(i['alt'] for i in imgs),
2210
            'img': [i['src'].rsplit('?', 1)[0] for i in imgs],
2211
            'month': day.month,
2212
            'year': day.year,
2213
            'day': day.day,
2214
        }
2215
2216
2217
class AnythingComic(GenericListableComic):
2218
    """Class to retrieve Anything Comics."""
2219
    # Also on http://tapastic.com/series/anything
2220
    name = 'anythingcomic'
2221
    long_name = 'Anything Comic'
2222
    url = 'http://www.anythingcomic.com'
2223 View Code Duplication
2224
    @classmethod
2225
    def get_archive_elements(cls):
2226
        archive_url = urljoin_wrapper(cls.url, 'archive/')
2227
        # The first 2 <tr>'s do not correspond to comics
2228
        return get_soup_at_url(archive_url).find('table', id='chapter_table').find_all('tr')[2:]
2229
2230
    @classmethod
2231
    def get_url_from_archive_element(cls, tr):
2232
        """Get url corresponding to an archive element."""
2233
        td_num, td_comic, td_date, _ = tr.find_all('td')
2234
        link = td_comic.find('a')
2235
        return urljoin_wrapper(cls.url, link['href'])
2236
2237
    @classmethod
2238
    def get_comic_info(cls, soup, tr):
2239
        """Get information about a particular comics."""
2240
        td_num, td_comic, td_date, _ = tr.find_all('td')
2241
        num = int(td_num.string)
2242
        link = td_comic.find('a')
2243
        title = link.string
2244
        imgs = soup.find_all('img', id='comic_image')
2245
        date_str = td_date.string
2246
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y, %I:%M %p")
2247
        assert len(imgs) == 1
2248
        assert all(i.get('alt') == i.get('title') for i in imgs)
2249
        return {
2250
            'num': num,
2251
            'title': title,
2252
            'alt': imgs[0].get('alt', ''),
2253
            'img': [i['src'] for i in imgs],
2254
            'month': day.month,
2255
            'year': day.year,
2256
            'day': day.day,
2257
        }
2258
2259
2260
class LonnieMillsap(GenericNavigableComic):
2261
    """Class to retrieve Lonnie Millsap's comics."""
2262
    name = 'millsap'
2263
    long_name = 'Lonnie Millsap'
2264
    url = 'http://www.lonniemillsap.com'
2265
    get_navi_link = get_link_rel_next
2266
    get_first_comic_link = simulate_first_link
2267
    first_url = 'http://www.lonniemillsap.com/?p=42'
2268
2269
    @classmethod
2270
    def get_comic_info(cls, soup, link):
2271
        """Get information about a particular comics."""
2272
        title = soup.find('h2', class_='post-title').string
2273
        post = soup.find('div', class_='post-content')
2274
        author = post.find("span", class_="post-author").find("a").string
2275
        date_str = post.find("span", class_="post-date").string
2276
        day = string_to_date(date_str, "%B %d, %Y")
2277
        imgs = post.find("div", class_="entry").find_all("img")
2278
        return {
2279
            'title': title,
2280
            'author': author,
2281
            'img': [i['src'] for i in imgs],
2282
            'month': day.month,
2283
            'year': day.year,
2284
            'day': day.day,
2285
        }
2286
2287
2288
class LinsEditions(GenericNavigableComic):
2289
    """Class to retrieve L.I.N.S. Editions comics."""
2290
    # Also on http://linscomics.tumblr.com
2291
    # Now on https://warandpeas.com
2292
    name = 'lins'
2293
    long_name = 'L.I.N.S. Editions'
2294
    url = 'https://linsedition.com'
2295
    _categories = ('LINS', )
2296
    get_navi_link = get_link_rel_next
2297
    get_first_comic_link = simulate_first_link
2298
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2299
2300
    @classmethod
2301
    def get_comic_info(cls, soup, link):
2302
        """Get information about a particular comics."""
2303
        title = soup.find('meta', property='og:title')['content']
2304
        imgs = soup.find_all('meta', property='og:image')
2305
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2306
        day = string_to_date(date_str, "%Y-%m-%d")
2307
        return {
2308
            'title': title,
2309
            'img': [i['content'] for i in imgs],
2310
            'month': day.month,
2311
            'year': day.year,
2312
            'day': day.day,
2313
        }
2314
2315
2316
class ThorsThundershack(GenericNavigableComic):
2317
    """Class to retrieve Thor's Thundershack comics."""
2318
    # Also on http://tapastic.com/series/Thors-Thundershac
2319
    name = 'thor'
2320
    long_name = 'Thor\'s Thundershack'
2321
    url = 'http://www.thorsthundershack.com'
2322
    _categories = ('THOR', )
2323
    get_url_from_link = join_cls_url_to_href
2324
2325
    @classmethod
2326
    def get_first_comic_link(cls):
2327
        """Get link to first comics."""
2328
        return get_soup_at_url(cls.url).find('a', class_='first navlink')
2329
2330
    @classmethod
2331
    def get_navi_link(cls, last_soup, next_):
2332
        """Get link to next or previous comic."""
2333
        for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
2334
            if link['href'] != '/comic':
2335
                return link
2336
        return None
2337
2338
    @classmethod
2339
    def get_comic_info(cls, soup, link):
2340
        """Get information about a particular comics."""
2341
        title = soup.find('meta', attrs={'name': 'description'})["content"]
2342
        description = soup.find('div', itemprop='articleBody').text
2343
        author = soup.find('span', itemprop='author copyrightHolder').string
2344
        imgs = soup.find_all('img', itemprop='image')
2345
        assert all(i['title'] == i['alt'] for i in imgs)
2346
        alt = imgs[0]['alt'] if imgs else ""
2347
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
2348
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2349
        return {
2350
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2351
            'month': day.month,
2352
            'year': day.year,
2353
            'day': day.day,
2354
            'author': author,
2355
            'title': title,
2356
            'alt': alt,
2357
            'description': description,
2358
        }
2359
2360
2361 View Code Duplication
class GerbilWithAJetpack(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2362
    """Class to retrieve GerbilWithAJetpack comics."""
2363
    name = 'gerbil'
2364
    long_name = 'Gerbil With A Jetpack'
2365
    url = 'http://gerbilwithajetpack.com'
2366
    get_first_comic_link = get_a_navi_navifirst
2367
    get_navi_link = get_a_rel_next
2368
2369
    @classmethod
2370
    def get_comic_info(cls, soup, link):
2371
        """Get information about a particular comics."""
2372
        title = soup.find('h2', class_='post-title').string
2373
        author = soup.find("span", class_="post-author").find("a").string
2374
        date_str = soup.find("span", class_="post-date").string
2375
        day = string_to_date(date_str, "%B %d, %Y")
2376
        imgs = soup.find("div", id="comic").find_all("img")
2377
        alt = imgs[0]['alt']
2378
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2379
        return {
2380
            'img': [i['src'] for i in imgs],
2381
            'title': title,
2382
            'alt': alt,
2383
            'author': author,
2384
            'day': day.day,
2385
            'month': day.month,
2386
            'year': day.year
2387
        }
2388
2389
2390 View Code Duplication
class EveryDayBlues(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2391
    """Class to retrieve EveryDayBlues Comics."""
2392
    name = "blues"
2393
    long_name = "Every Day Blues"
2394
    url = "http://everydayblues.net"
2395
    get_first_comic_link = get_a_navi_navifirst
2396
    get_navi_link = get_link_rel_next
2397
2398
    @classmethod
2399
    def get_comic_info(cls, soup, link):
2400
        """Get information about a particular comics."""
2401
        title = soup.find("h2", class_="post-title").string
2402
        author = soup.find("span", class_="post-author").find("a").string
2403
        date_str = soup.find("span", class_="post-date").string
2404
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2405
        imgs = soup.find("div", id="comic").find_all("img")
2406
        assert all(i['alt'] == i['title'] == title for i in imgs)
2407
        assert len(imgs) <= 1
2408
        return {
2409
            'img': [i['src'] for i in imgs],
2410
            'title': title,
2411
            'author': author,
2412
            'day': day.day,
2413
            'month': day.month,
2414
            'year': day.year
2415
        }
2416
2417
2418
class BiterComics(GenericNavigableComic):
2419
    """Class to retrieve Biter Comics."""
2420
    name = "biter"
2421
    long_name = "Biter Comics"
2422
    url = "http://www.bitercomics.com"
2423
    get_first_comic_link = get_a_navi_navifirst
2424
    get_navi_link = get_link_rel_next
2425
2426
    @classmethod
2427
    def get_comic_info(cls, soup, link):
2428
        """Get information about a particular comics."""
2429
        title = soup.find("h1", class_="entry-title").string
2430
        author = soup.find("span", class_="author vcard").find("a").string
2431
        date_str = soup.find("span", class_="entry-date").string
2432
        day = string_to_date(date_str, "%B %d, %Y")
2433
        imgs = soup.find("div", id="comic").find_all("img")
2434
        assert all(i['alt'] == i['title'] for i in imgs)
2435
        assert len(imgs) == 1
2436
        alt = imgs[0]['alt']
2437
        return {
2438
            'img': [i['src'] for i in imgs],
2439
            'title': title,
2440
            'alt': alt,
2441
            'author': author,
2442
            'day': day.day,
2443
            'month': day.month,
2444
            'year': day.year
2445
        }
2446
2447
2448 View Code Duplication
class TheAwkwardYeti(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2449
    """Class to retrieve The Awkward Yeti comics."""
2450
    # Also on http://www.gocomics.com/the-awkward-yeti
2451
    # Also on http://larstheyeti.tumblr.com
2452
    # Also on https://tapastic.com/series/TheAwkwardYeti
2453
    name = 'yeti'
2454
    long_name = 'The Awkward Yeti'
2455
    url = 'http://theawkwardyeti.com'
2456
    _categories = ('YETI', )
2457
    get_first_comic_link = get_a_navi_navifirst
2458
    get_navi_link = get_link_rel_next
2459
2460
    @classmethod
2461
    def get_comic_info(cls, soup, link):
2462
        """Get information about a particular comics."""
2463
        title = soup.find('h2', class_='post-title').string
2464
        date_str = soup.find("span", class_="post-date").string
2465
        day = string_to_date(date_str, "%B %d, %Y")
2466
        imgs = soup.find("div", id="comic").find_all("img")
2467
        assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
2468
        return {
2469
            'img': [i['src'] for i in imgs],
2470
            'title': title,
2471
            'day': day.day,
2472
            'month': day.month,
2473
            'year': day.year
2474
        }
2475
2476
2477
class PleasantThoughts(GenericNavigableComic):
2478
    """Class to retrieve Pleasant Thoughts comics."""
2479
    name = 'pleasant'
2480
    long_name = 'Pleasant Thoughts'
2481
    url = 'http://pleasant-thoughts.com'
2482
    get_first_comic_link = get_a_navi_navifirst
2483
    get_navi_link = get_link_rel_next
2484
2485
    @classmethod
2486
    def get_comic_info(cls, soup, link):
2487
        """Get information about a particular comics."""
2488
        post = soup.find('div', class_='post-content')
2489
        title = post.find('h2', class_='post-title').string
2490
        imgs = post.find("div", class_="entry").find_all("img")
2491
        return {
2492
            'title': title,
2493
            'img': [i['src'] for i in imgs],
2494
        }
2495
2496
2497
class MisterAndMe(GenericNavigableComic):
2498
    """Class to retrieve Mister & Me Comics."""
2499
    # Also on http://www.gocomics.com/mister-and-me
2500
    # Also on https://tapastic.com/series/Mister-and-Me
2501
    name = 'mister'
2502
    long_name = 'Mister & Me'
2503
    url = 'http://www.mister-and-me.com'
2504
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2505
    get_navi_link = get_link_rel_next
2506
2507
    @classmethod
2508
    def get_comic_info(cls, soup, link):
2509
        """Get information about a particular comics."""
2510
        title = soup.find('h2', class_='post-title').string
2511
        author = soup.find("span", class_="post-author").find("a").string
2512
        date_str = soup.find("span", class_="post-date").string
2513
        day = string_to_date(date_str, "%B %d, %Y")
2514
        imgs = soup.find("div", id="comic").find_all("img")
2515
        assert all(i['alt'] == i['title'] for i in imgs)
2516
        assert len(imgs) <= 1
2517
        alt = imgs[0]['alt'] if imgs else ""
2518
        return {
2519
            'img': [i['src'] for i in imgs],
2520
            'title': title,
2521
            'alt': alt,
2522
            'author': author,
2523
            'day': day.day,
2524
            'month': day.month,
2525
            'year': day.year
2526
        }
2527
2528
2529
class LastPlaceComics(GenericNavigableComic):
2530
    """Class to retrieve Last Place Comics."""
2531
    name = 'lastplace'
2532
    long_name = 'Last Place Comics'
2533
    url = "http://lastplacecomics.com"
2534
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2535
    get_navi_link = get_link_rel_next
2536
2537
    @classmethod
2538
    def get_comic_info(cls, soup, link):
2539
        """Get information about a particular comics."""
2540
        title = soup.find('h2', class_='post-title').string
2541
        author = soup.find("span", class_="post-author").find("a").string
2542
        date_str = soup.find("span", class_="post-date").string
2543
        day = string_to_date(date_str, "%B %d, %Y")
2544
        imgs = soup.find("div", id="comic").find_all("img")
2545
        assert all(i['alt'] == i['title'] for i in imgs)
2546
        assert len(imgs) <= 1
2547
        alt = imgs[0]['alt'] if imgs else ""
2548
        return {
2549
            'img': [i['src'] for i in imgs],
2550
            'title': title,
2551
            'alt': alt,
2552
            'author': author,
2553
            'day': day.day,
2554
            'month': day.month,
2555
            'year': day.year
2556
        }
2557
2558
2559
class TalesOfAbsurdity(GenericNavigableComic):
2560
    """Class to retrieve Tales Of Absurdity comics."""
2561
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
2562
    # Also on http://talesofabsurdity.tumblr.com
2563
    name = 'absurdity'
2564
    long_name = 'Tales of Absurdity'
2565
    url = 'http://talesofabsurdity.com'
2566
    _categories = ('ABSURDITY', )
2567
    get_first_comic_link = get_a_navi_navifirst
2568
    get_navi_link = get_a_navi_comicnavnext_navinext
2569
2570
    @classmethod
2571
    def get_comic_info(cls, soup, link):
2572
        """Get information about a particular comics."""
2573
        title = soup.find('h2', class_='post-title').string
2574
        author = soup.find("span", class_="post-author").find("a").string
2575
        date_str = soup.find("span", class_="post-date").string
2576
        day = string_to_date(date_str, "%B %d, %Y")
2577
        imgs = soup.find("div", id="comic").find_all("img")
2578
        assert all(i['alt'] == i['title'] for i in imgs)
2579
        alt = imgs[0]['alt'] if imgs else ""
2580
        return {
2581
            'img': [i['src'] for i in imgs],
2582
            'title': title,
2583
            'alt': alt,
2584
            'author': author,
2585
            'day': day.day,
2586
            'month': day.month,
2587
            'year': day.year
2588
        }
2589
2590
2591
class EndlessOrigami(GenericEmptyComic, GenericNavigableComic):
2592
    """Class to retrieve Endless Origami Comics."""
2593
    name = "origami"
2594
    long_name = "Endless Origami"
2595
    url = "http://endlessorigami.com"
2596
    get_first_comic_link = get_a_navi_navifirst
2597
    get_navi_link = get_link_rel_next
2598
2599
    @classmethod
2600
    def get_comic_info(cls, soup, link):
2601
        """Get information about a particular comics."""
2602
        title = soup.find('h2', class_='post-title').string
2603
        author = soup.find("span", class_="post-author").find("a").string
2604
        date_str = soup.find("span", class_="post-date").string
2605
        day = string_to_date(date_str, "%B %d, %Y")
2606
        imgs = soup.find("div", id="comic").find_all("img")
2607
        assert all(i['alt'] == i['title'] for i in imgs)
2608
        alt = imgs[0]['alt'] if imgs else ""
2609
        return {
2610
            'img': [i['src'] for i in imgs],
2611
            'title': title,
2612
            'alt': alt,
2613
            'author': author,
2614
            'day': day.day,
2615
            'month': day.month,
2616
            'year': day.year
2617
        }
2618
2619
2620
class PlanC(GenericNavigableComic):
2621
    """Class to retrieve Plan C comics."""
2622
    name = 'planc'
2623
    long_name = 'Plan C'
2624
    url = 'http://www.plancomic.com'
2625
    get_first_comic_link = get_a_navi_navifirst
2626
    get_navi_link = get_a_navi_comicnavnext_navinext
2627
2628
    @classmethod
2629
    def get_comic_info(cls, soup, link):
2630
        """Get information about a particular comics."""
2631
        title = soup.find('h2', class_='post-title').string
2632
        date_str = soup.find("span", class_="post-date").string
2633
        day = string_to_date(date_str, "%B %d, %Y")
2634
        imgs = soup.find('div', id='comic').find_all('img')
2635
        return {
2636
            'title': title,
2637
            'img': [i['src'] for i in imgs],
2638
            'month': day.month,
2639
            'year': day.year,
2640
            'day': day.day,
2641
        }
2642
2643
2644
class BuniComic(GenericNavigableComic):
2645
    """Class to retrieve Buni Comics."""
2646
    name = 'buni'
2647
    long_name = 'BuniComics'
2648
    url = 'http://www.bunicomic.com'
2649
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2650
    get_navi_link = get_link_rel_next
2651
2652
    @classmethod
2653
    def get_comic_info(cls, soup, link):
2654
        """Get information about a particular comics."""
2655
        imgs = soup.find('div', id='comic').find_all('img')
2656 View Code Duplication
        assert all(i['alt'] == i['title'] for i in imgs)
2657
        assert len(imgs) == 1
2658
        return {
2659
            'img': [i['src'] for i in imgs],
2660
            'title': imgs[0]['title'],
2661
        }
2662
2663
2664
class GenericCommitStrip(GenericNavigableComic):
2665
    """Generic class to retrieve Commit Strips in different languages."""
2666
    get_navi_link = get_a_rel_next
2667
    get_first_comic_link = simulate_first_link
2668
    first_url = NotImplemented
2669
2670
    @classmethod
2671
    def get_comic_info(cls, soup, link):
2672
        """Get information about a particular comics."""
2673
        desc = soup.find('meta', property='og:description')['content']
2674
        title = soup.find('meta', property='og:title')['content']
2675
        imgs = soup.find('div', class_='entry-content').find_all('img')
2676
        title2 = ' '.join(i.get('title', '') for i in imgs)
2677
        return {
2678
            'title': title,
2679
            'title2': title2,
2680
            'description': desc,
2681
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2682
        }
2683
2684
2685
class CommitStripFr(GenericCommitStrip):
2686
    """Class to retrieve Commit Strips in French."""
2687
    name = 'commit_fr'
2688
    long_name = 'Commit Strip (Fr)'
2689
    url = 'http://www.commitstrip.com/fr'
2690
    _categories = ('FRANCAIS', )
2691
    first_url = 'http://www.commitstrip.com/fr/2012/02/22/interview/'
2692
2693
2694
class CommitStripEn(GenericCommitStrip):
2695
    """Class to retrieve Commit Strips in English."""
2696
    name = 'commit_en'
2697
    long_name = 'Commit Strip (En)'
2698
    url = 'http://www.commitstrip.com/en'
2699
    first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'
2700
2701
2702 View Code Duplication
class GenericBoumerie(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2703
    """Generic class to retrieve Boumeries comics in different languages."""
2704
    get_first_comic_link = get_a_navi_navifirst
2705
    get_navi_link = get_link_rel_next
2706
    date_format = NotImplemented
2707
    lang = NotImplemented
2708
2709
    @classmethod
2710
    def get_comic_info(cls, soup, link):
2711
        """Get information about a particular comics."""
2712
        title = soup.find('h2', class_='post-title').string
2713
        short_url = soup.find('link', rel='shortlink')['href']
2714
        author = soup.find("span", class_="post-author").find("a").string
2715
        date_str = soup.find('span', class_='post-date').string
2716
        day = string_to_date(date_str, cls.date_format, cls.lang)
2717
        imgs = soup.find('div', id='comic').find_all('img')
2718
        assert all(i['alt'] == i['title'] for i in imgs)
2719
        return {
2720
            'short_url': short_url,
2721
            'img': [i['src'] for i in imgs],
2722
            'title': title,
2723
            'author': author,
2724
            'month': day.month,
2725
            'year': day.year,
2726
            'day': day.day,
2727
        }
2728
2729
2730
class BoumerieEn(GenericBoumerie):
2731
    """Class to retrieve Boumeries comics in English."""
2732
    name = 'boumeries_en'
2733
    long_name = 'Boumeries (En)'
2734
    url = 'http://comics.boumerie.com'
2735
    date_format = "%B %d, %Y"
2736
    lang = 'en_GB.UTF-8'
2737
2738
2739
class BoumerieFr(GenericBoumerie):
2740
    """Class to retrieve Boumeries comics in French."""
2741
    name = 'boumeries_fr'
2742
    long_name = 'Boumeries (Fr)'
2743
    url = 'http://bd.boumerie.com'
2744
    _categories = ('FRANCAIS', )
2745
    date_format = "%A, %d %B %Y"
2746
    lang = "fr_FR.utf8"
2747
2748
2749 View Code Duplication
class UnearthedComics(GenericNavigableComic):
2750
    """Class to retrieve Unearthed comics."""
2751
    # Also on http://tapastic.com/series/UnearthedComics
2752
    # Also on http://unearthedcomics.tumblr.com
2753
    name = 'unearthed'
2754
    long_name = 'Unearthed Comics'
2755
    url = 'http://unearthedcomics.com'
2756
    _categories = ('UNEARTHED', )
2757
    get_navi_link = get_link_rel_next
2758
    get_first_comic_link = simulate_first_link
2759
    first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'
2760
2761
    @classmethod
2762
    def get_comic_info(cls, soup, link):
2763
        """Get information about a particular comics."""
2764
        short_url = soup.find('link', rel='shortlink')['href']
2765
        title_elt = soup.find('h1') or soup.find('h2')
2766
        title = title_elt.string if title_elt else ""
2767
        desc = soup.find('meta', property='og:description')
2768
        date_str = soup.find('time', class_='published updated hidden')['datetime']
2769
        day = string_to_date(date_str, "%Y-%m-%d")
2770
        post = soup.find('div', class_="entry content entry-content type-portfolio")
2771
        imgs = post.find_all('img')
2772
        return {
2773
            'title': title,
2774
            'description': desc,
2775
            'url2': short_url,
2776
            'img': [i['src'] for i in imgs],
2777
            'month': day.month,
2778
            'year': day.year,
2779
            'day': day.day,
2780
        }
2781
2782
2783
class Optipess(GenericNavigableComic):
2784
    """Class to retrieve Optipess comics."""
2785
    name = 'optipess'
2786
    long_name = 'Optipess'
2787
    url = 'http://www.optipess.com'
2788
    get_first_comic_link = get_a_navi_navifirst
2789
    get_navi_link = get_link_rel_next
2790
2791
    @classmethod
2792
    def get_comic_info(cls, soup, link):
2793
        """Get information about a particular comics."""
2794
        title = soup.find('h2', class_='post-title').string
2795
        author = soup.find("span", class_="post-author").find("a").string
2796
        comic = soup.find('div', id='comic')
2797
        imgs = comic.find_all('img') if comic else []
2798
        alt = imgs[0]['title'] if imgs else ""
2799
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2800
        date_str = soup.find('span', class_='post-date').string
2801
        day = string_to_date(date_str, "%B %d, %Y")
2802
        return {
2803
            'title': title,
2804
            'alt': alt,
2805
            'author': author,
2806
            'img': [i['src'] for i in imgs],
2807
            'month': day.month,
2808
            'year': day.year,
2809
            'day': day.day,
2810
        }
2811
2812
2813
class PainTrainComic(GenericNavigableComic):
2814
    """Class to retrieve Pain Train Comics."""
2815
    name = 'paintrain'
2816
    long_name = 'Pain Train Comics'
2817
    url = 'http://paintraincomic.com'
2818
    get_first_comic_link = get_a_navi_navifirst
2819
    get_navi_link = get_link_rel_next
2820
2821
    @classmethod
2822
    def get_comic_info(cls, soup, link):
2823
        """Get information about a particular comics."""
2824
        title = soup.find('h2', class_='post-title').string
2825
        short_url = soup.find('link', rel='shortlink')['href']
2826
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2827
        num = int(short_url_re.match(short_url).groups()[0])
2828
        imgs = soup.find('div', id='comic').find_all('img')
2829
        alt = imgs[0]['title']
2830
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2831
        date_str = soup.find('span', class_='post-date').string
2832
        day = string_to_date(date_str, "%d/%m/%Y")
2833
        return {
2834
            'short_url': short_url,
2835
            'num': num,
2836
            'img': [i['src'] for i in imgs],
2837
            'month': day.month,
2838
            'year': day.year,
2839
            'day': day.day,
2840
            'alt': alt,
2841
            'title': title,
2842
        }
2843
2844
2845
class MoonBeard(GenericNavigableComic):
2846
    """Class to retrieve MoonBeard comics."""
2847
    # Also on http://blog.squiresjam.es/moonbeard
2848
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
2849
    name = 'moonbeard'
2850
    long_name = 'Moon Beard'
2851
    url = 'http://moonbeard.com'
2852
    get_first_comic_link = get_a_navi_navifirst
2853
    get_navi_link = get_a_navi_navinext
2854
2855
    @classmethod
2856
    def get_comic_info(cls, soup, link):
2857
        """Get information about a particular comics."""
2858
        title = soup.find('h2', class_='post-title').string
2859
        short_url = soup.find('link', rel='shortlink')['href']
2860
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2861
        num = int(short_url_re.match(short_url).groups()[0])
2862
        imgs = soup.find('div', id='comic').find_all('img')
2863
        alt = imgs[0]['title']
2864
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2865
        date_str = soup.find('span', class_='post-date').string
2866
        day = string_to_date(date_str, "%B %d, %Y")
2867
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2868
        author = soup.find('span', class_='post-author').string
2869 View Code Duplication
        return {
2870
            'short_url': short_url,
2871
            'num': num,
2872
            'img': [i['src'] for i in imgs],
2873
            'month': day.month,
2874
            'year': day.year,
2875
            'day': day.day,
2876
            'title': title,
2877
            'tags': tags,
2878
            'alt': alt,
2879
            'author': author,
2880
        }
2881
2882
2883
class AHamADay(GenericNavigableComic):
2884
    """Class to retrieve class A Ham A Day comics."""
2885
    name = 'ham'
2886
    long_name = 'A Ham A Day'
2887
    url = 'http://www.ahammaday.com'
2888
    get_url_from_link = join_cls_url_to_href
2889
    get_first_comic_link = simulate_first_link
2890
    first_url = 'http://www.ahammaday.com/today/3/6/french'
2891
2892
    @classmethod
2893
    def get_navi_link(cls, last_soup, next_):
2894
        """Get link to next or previous comic."""
2895
        # prev is next / next is prev
2896
        return last_soup.find('li', class_='previous' if next_ else 'next').find('a')
2897
2898
    @classmethod
2899
    def get_comic_info(cls, soup, link):
2900
        """Get information about a particular comics."""
2901
        date_str = soup.find('time', class_='published')['datetime']
2902
        day = string_to_date(date_str, "%Y-%m-%d")
2903
        author = soup.find('span', class_='blog-author').find('a').string
2904
        title = soup.find('meta', property='og:title')['content']
2905
        imgs = soup.find_all('meta', itemprop='image')
2906
        return {
2907
            'img': [i['content'] for i in imgs],
2908
            'title': title,
2909
            'author': author,
2910
            'day': day.day,
2911
            'month': day.month,
2912
            'year': day.year,
2913
        }
2914
2915
2916
class LittleLifeLines(GenericNavigableComic):
2917
    """Class to retrieve Little Life Lines comics."""
2918
    # Also on https://little-life-lines.tumblr.com
2919
    name = 'life'
2920
    long_name = 'Little Life Lines'
2921
    url = 'http://www.littlelifelines.com'
2922
    get_url_from_link = join_cls_url_to_href
2923
    get_first_comic_link = simulate_first_link
2924
    first_url = 'http://www.littlelifelines.com/comics/well-done'
2925
2926
    @classmethod
2927
    def get_navi_link(cls, last_soup, next_):
2928
        """Get link to next or previous comic."""
2929
        # prev is next / next is prev
2930
        li = last_soup.find('li', class_='prev' if next_ else 'next')
2931
        return li.find('a') if li else None
2932
2933
    @classmethod
2934
    def get_comic_info(cls, soup, link):
2935
        """Get information about a particular comics."""
2936
        title = soup.find('meta', property='og:title')['content']
2937
        desc = soup.find('meta', property='og:description')['content']
2938
        date_str = soup.find('time', class_='published')['datetime']
2939
        day = string_to_date(date_str, "%Y-%m-%d")
2940
        author = soup.find('a', rel='author').string
2941
        div_content = soup.find('div', class_="body entry-content")
2942
        imgs = div_content.find_all('img')
2943
        imgs = [i for i in imgs if i.get('src') is not None]
2944
        alt = imgs[0]['alt']
2945
        return {
2946
            'title': title,
2947
            'alt': alt,
2948
            'description': desc,
2949
            'author': author,
2950
            'day': day.day,
2951
            'month': day.month,
2952 View Code Duplication
            'year': day.year,
2953
            'img': [i['src'] for i in imgs],
2954
        }
2955
2956
2957
class GenericWordPressInkblot(GenericNavigableComic):
2958
    """Generic class to retrieve comics using WordPress with Inkblot."""
2959
    get_navi_link = get_link_rel_next
2960
2961
    @classmethod
2962
    def get_first_comic_link(cls):
2963
        """Get link to first comics."""
2964
        return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')
2965
2966
    @classmethod
2967
    def get_comic_info(cls, soup, link):
2968
        """Get information about a particular comics."""
2969
        title = soup.find('meta', property='og:title')['content']
2970
        imgs = soup.find('div', class_='webcomic-image').find_all('img')
2971
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2972
        day = string_to_date(date_str, "%Y-%m-%d")
2973
        return {
2974
            'title': title,
2975
            'day': day.day,
2976
            'month': day.month,
2977
            'year': day.year,
2978
            'img': [i['src'] for i in imgs],
2979
        }
2980
2981
2982
class EverythingsStupid(GenericWordPressInkblot):
2983
    """Class to retrieve Everything's stupid Comics."""
2984
    # Also on http://tapastic.com/series/EverythingsStupid
2985
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
2986
    # Also on http://everythingsstupidcomics.tumblr.com
2987
    name = 'stupid'
2988
    long_name = "Everything's Stupid"
2989
    url = 'http://everythingsstupid.net'
2990
2991
2992
class TheIsmComics(GenericWordPressInkblot):
2993
    """Class to retrieve The Ism Comics."""
2994
    # Also on https://tapastic.com/series/TheIsm (?)
2995
    name = 'theism'
2996
    long_name = "The Ism"
2997
    url = 'http://www.theism-comics.com'
2998
2999
3000
class WoodenPlankStudios(GenericWordPressInkblot):
3001
    """Class to retrieve Wooden Plank Studios comics."""
3002
    name = 'woodenplank'
3003
    long_name = 'Wooden Plank Studios'
3004
    url = 'http://woodenplankstudios.com'
3005
3006
3007
class ElectricBunnyComic(GenericNavigableComic):
3008
    """Class to retrieve Electric Bunny Comics."""
3009
    # Also on http://electricbunnycomics.tumblr.com
3010
    name = 'bunny'
3011
    long_name = 'Electric Bunny Comic'
3012
    url = 'http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell'
3013
    get_url_from_link = join_cls_url_to_href
3014
3015
    @classmethod
3016
    def get_first_comic_link(cls):
3017
        """Get link to first comics."""
3018
        return get_soup_at_url(cls.url).find('img', alt='First').parent
3019
3020
    @classmethod
3021
    def get_navi_link(cls, last_soup, next_):
3022
        """Get link to next or previous comic."""
3023
        img = last_soup.find('img', alt='Next' if next_ else 'Back')
3024
        return img.parent if img else None
3025
3026
    @classmethod
3027
    def get_comic_info(cls, soup, link):
3028
        """Get information about a particular comics."""
3029
        title = soup.find('meta', property='og:title')['content']
3030
        imgs = soup.find_all('meta', property='og:image')
3031
        return {
3032
            'title': title,
3033
            'img': [i['content'] for i in imgs],
3034
        }
3035
3036
3037
class SheldonComics(GenericNavigableComic):
3038
    """Class to retrieve Sheldon comics."""
3039
    # Also on http://www.gocomics.com/sheldon
3040
    name = 'sheldon'
3041
    long_name = 'Sheldon Comics'
3042
    url = 'http://www.sheldoncomics.com'
3043
3044
    @classmethod
3045
    def get_first_comic_link(cls):
3046
        """Get link to first comics."""
3047
        return get_soup_at_url(cls.url).find("a", id="nav-first")
3048
3049
    @classmethod
3050
    def get_navi_link(cls, last_soup, next_):
3051
        """Get link to next or previous comic."""
3052
        for link in last_soup.find_all("a", id="nav-next" if next_ else "nav-prev"):
3053
            if link['href'] != 'http://www.sheldoncomics.com':
3054
                return link
3055
        return None
3056
3057
    @classmethod
3058
    def get_comic_info(cls, soup, link):
3059
        """Get information about a particular comics."""
3060
        imgs = soup.find("div", id="comic-foot").find_all("img")
3061
        assert all(i['alt'] == i['title'] for i in imgs)
3062
        assert len(imgs) == 1
3063
        title = imgs[0]['title']
3064
        return {
3065
            'title': title,
3066
            'img': [i['src'] for i in imgs],
3067
        }
3068
3069
3070
class Ubertool(GenericNavigableComic):
3071
    """Class to retrieve Ubertool comics."""
3072
    # Also on http://ubertool.tumblr.com
3073
    # Also on https://tapastic.com/series/ubertool
3074
    name = 'ubertool'
3075
    long_name = 'Ubertool'
3076
    url = 'http://ubertoolcomic.com'
3077
    _categories = ('UBERTOOL', )
3078
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
3079
    get_navi_link = get_a_comicnavbase_comicnavnext
3080
3081
    @classmethod
3082
    def get_comic_info(cls, soup, link):
3083
        """Get information about a particular comics."""
3084
        title = soup.find('h2', class_='post-title').string
3085
        date_str = soup.find('span', class_='post-date').string
3086
        day = string_to_date(date_str, "%B %d, %Y")
3087
        imgs = soup.find('div', id='comic').find_all('img')
3088
        return {
3089
            'img': [i['src'] for i in imgs],
3090
            'title': title,
3091
            'month': day.month,
3092
            'year': day.year,
3093
            'day': day.day,
3094
        }
3095
3096
3097
class EarthExplodes(GenericNavigableComic):
3098
    """Class to retrieve The Earth Explodes comics."""
3099
    name = 'earthexplodes'
3100
    long_name = 'The Earth Explodes'
3101
    url = 'http://www.earthexplodes.com'
3102
    get_url_from_link = join_cls_url_to_href
3103
    get_first_comic_link = simulate_first_link
3104
    first_url = 'http://www.earthexplodes.com/comics/000/'
3105
3106
    @classmethod
3107
    def get_navi_link(cls, last_soup, next_):
3108
        """Get link to next or previous comic."""
3109
        return last_soup.find('a', id='next' if next_ else 'prev')
3110
3111
    @classmethod
3112
    def get_comic_info(cls, soup, link):
3113
        """Get information about a particular comics."""
3114
        title = soup.find('title').string
3115
        imgs = soup.find('div', id='image').find_all('img')
3116
        alt = imgs[0].get('title', '')
3117
        return {
3118
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3119
            'title': title,
3120
            'alt': alt,
3121
        }
3122
3123
3124
class CubeDrone(GenericNavigableComic):
3125
    """Class to retrieve Cube Drone comics."""
3126
    name = 'cubedrone'
3127
    long_name = 'Cube Drone'
3128
    url = 'http://cube-drone.com/comics'
3129
    get_url_from_link = join_cls_url_to_href
3130
3131
    @classmethod
3132
    def get_first_comic_link(cls):
3133
        """Get link to first comics."""
3134
        return get_soup_at_url(cls.url).find('span', class_='glyphicon glyphicon-backward').parent
3135
3136
    @classmethod
3137
    def get_navi_link(cls, last_soup, next_):
3138
        """Get link to next or previous comic."""
3139
        class_ = 'glyphicon glyphicon-chevron-' + ('right' if next_ else 'left')
3140
        return last_soup.find('span', class_=class_).parent
3141
3142
    @classmethod
3143
    def get_comic_info(cls, soup, link):
3144
        """Get information about a particular comics."""
3145
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
3146
        url2 = soup.find('meta', attrs={'name': 'twitter:url'})['content']
3147
        # date_str = soup.find('h2', class_='comic_title').find('small').string
3148
        # day = string_to_date(date_str, "%B %d, %Y, %I:%M %p")
3149
        imgs = soup.find_all('img', class_='comic img-responsive')
3150
        title2 = imgs[0]['title']
3151
        alt = imgs[0]['alt']
3152
        return {
3153
            'url2': url2,
3154
            'title': title,
3155
            'title2': title2,
3156
            'alt': alt,
3157
            'img': [i['src'] for i in imgs],
3158
        }
3159
3160
3161
class MakeItStoopid(GenericNavigableComic):
3162
    """Class to retrieve Make It Stoopid Comics."""
3163
    name = 'stoopid'
3164
    long_name = 'Make it stoopid'
3165
    url = 'http://makeitstoopid.com/comic.php'
3166
3167
    @classmethod
3168
    def get_nav(cls, soup):
3169
        """Get the navigation elements from soup object."""
3170
        cnav = soup.find_all(class_='cnav')
3171
        nav1, nav2 = cnav[:5], cnav[5:]
3172
        assert nav1 == nav2
3173
        # begin, prev, archive, next_, end = nav1
3174
        return [None if i.get('href') is None else i for i in nav1]
3175
3176
    @classmethod
3177
    def get_first_comic_link(cls):
3178
        """Get link to first comics."""
3179
        return cls.get_nav(get_soup_at_url(cls.url))[0]
3180
3181
    @classmethod
3182
    def get_navi_link(cls, last_soup, next_):
3183
        """Get link to next or previous comic."""
3184
        return cls.get_nav(last_soup)[3 if next_ else 1]
3185
3186
    @classmethod
3187
    def get_comic_info(cls, soup, link):
3188
        """Get information about a particular comics."""
3189
        title = link['title']
3190
        imgs = soup.find_all('img', id='comicimg')
3191
        return {
3192
            'title': title,
3193
            'img': [i['src'] for i in imgs],
3194
        }
3195
3196
3197
class MarketoonistComics(GenericNavigableComic):
3198
    """Class to retrieve Marketoonist Comics."""
3199
    name = 'marketoonist'
3200
    long_name = 'Marketoonist'
3201
    url = 'https://marketoonist.com/cartoons'
3202
    get_first_comic_link = simulate_first_link
3203
    get_navi_link = get_link_rel_next
3204
    first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'
3205
3206
    @classmethod
3207
    def get_comic_info(cls, soup, link):
3208
        """Get information about a particular comics."""
3209
        imgs = soup.find_all('meta', property='og:image')
3210
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3211
        day = string_to_date(date_str, "%Y-%m-%d")
3212
        title = soup.find('meta', property='og:title')['content']
3213
        return {
3214
            'img': [i['content'] for i in imgs],
3215 View Code Duplication
            'day': day.day,
3216
            'month': day.month,
3217
            'year': day.year,
3218
            'title': title,
3219
        }
3220
3221
3222
class ConsoliaComics(GenericNavigableComic):
3223
    """Class to retrieve Consolia comics."""
3224
    name = 'consolia'
3225
    long_name = 'consolia'
3226
    url = 'https://consolia-comic.com'
3227
    get_url_from_link = join_cls_url_to_href
3228
3229
    @classmethod
3230
    def get_first_comic_link(cls):
3231
        """Get link to first comics."""
3232
        return get_soup_at_url(cls.url).find('span', class_='first').find('a')
3233
3234
    @classmethod
3235
    def get_navi_link(cls, last_soup, next_):
3236
        """Get link to next or previous comic."""
3237
        return last_soup.find('span', class_='next' if next_ else 'prev').find('a')
3238
3239
    @classmethod
3240
    def get_comic_info(cls, soup, link):
3241
        """Get information about a particular comics."""
3242
        title = soup.find('meta', property='og:title')['content']
3243
        date_str = soup.find('time')["datetime"]
3244
        day = string_to_date(date_str, "%Y-%m-%d")
3245
        imgs = soup.find('div', id='comic').find_all('img')
3246
        alt = imgs[0]['title']
3247
        # article = soup.find('div', id='blag')
3248
        # text = article.encode_contents()
3249
        return {
3250
            'title': title,
3251
            'alt': alt,
3252
            'img': [i['src'] for i in imgs],
3253
            # 'text': text,
3254 View Code Duplication
            'day': day.day,
3255
            'month': day.month,
3256
            'year': day.year,
3257
        }
3258
3259
3260
class TuMourrasMoinsBete(GenericNavigableComic):
3261
    """Class to retrieve Tu Mourras Moins Bete comics."""
3262
    name = 'mourrasmoinsbete'
3263
    long_name = 'Tu Mourras Moins Bete'
3264
    url = 'http://tumourrasmoinsbete.blogspot.fr'
3265
    _categories = ('FRANCAIS', )
3266
    get_first_comic_link = simulate_first_link
3267
    first_url = 'http://tumourrasmoinsbete.blogspot.fr/2008/06/essai.html'
3268
3269
    @classmethod
3270
    def get_navi_link(cls, last_soup, next_):
3271
        """Get link to next or previous comic."""
3272
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3273
3274
    @classmethod
3275
    def get_comic_info(cls, soup, link):
3276
        """Get information about a particular comics."""
3277
        title = soup.find('title').string
3278
        imgs = soup.find('div', itemprop='description articleBody').find_all('img')
3279
        author = soup.find('span', itemprop='author').string
3280
        return {
3281
            'img': [i['src'] for i in imgs],
3282
            'author': author,
3283
            'title': title,
3284
        }
3285
3286
3287
class GeekAndPoke(GenericNavigableComic):
3288
    """Class to retrieve Geek And Poke comics."""
3289
    name = 'geek'
3290
    long_name = 'Geek And Poke'
3291
    url = 'http://geek-and-poke.com'
3292
    get_url_from_link = join_cls_url_to_href
3293
    get_first_comic_link = simulate_first_link
3294
    first_url = 'http://geek-and-poke.com/geekandpoke/2006/8/27/a-new-place-for-a-not-so-old-blog.html'
3295
3296
    @classmethod
3297
    def get_navi_link(cls, last_soup, next_):
3298
        """Get link to next or previous comic."""
3299
        return last_soup.find('a', class_='prev-item' if next_ else 'next-item')
3300
3301
    @classmethod
3302
    def get_comic_info(cls, soup, link):
3303
        """Get information about a particular comics."""
3304
        title = soup.find('meta', property='og:title')['content']
3305
        desc = soup.find('meta', property='og:description')['content']
3306
        date_str = soup.find('time', class_='published')['datetime']
3307
        day = string_to_date(date_str, "%Y-%m-%d")
3308
        author = soup.find('a', rel='author').string
3309
        div_content = (soup.find('div', class_="body entry-content") or
3310
                       soup.find('div', class_="special-content"))
3311
        imgs = div_content.find_all('img')
3312
        imgs = [i for i in imgs if i.get('src') is not None]
3313
        assert all('title' not in i or i['alt'] == i['title'] for i in imgs)
3314
        alt = imgs[0].get('alt', "") if imgs else []
3315
        return {
3316
            'title': title,
3317
            'alt': alt,
3318
            'description': desc,
3319
            'author': author,
3320
            'day': day.day,
3321 View Code Duplication
            'month': day.month,
3322
            'year': day.year,
3323
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3324
        }
3325
3326
3327
class GloryOwlComix(GenericNavigableComic):
3328
    """Class to retrieve Glory Owl comics."""
3329
    name = 'gloryowl'
3330
    long_name = 'Glory Owl'
3331
    url = 'http://gloryowlcomix.blogspot.fr'
3332
    _categories = ('NSFW', 'FRANCAIS')
3333
    get_first_comic_link = simulate_first_link
3334
    first_url = 'http://gloryowlcomix.blogspot.fr/2013/02/1_7.html'
3335
3336
    @classmethod
3337
    def get_navi_link(cls, last_soup, next_):
3338
        """Get link to next or previous comic."""
3339
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3340
3341
    @classmethod
3342
    def get_comic_info(cls, soup, link):
3343
        """Get information about a particular comics."""
3344
        title = soup.find('title').string
3345
        imgs = soup.find_all('link', rel='image_src')
3346
        author = soup.find('a', rel='author').string
3347
        return {
3348
            'img': [i['href'] for i in imgs],
3349
            'author': author,
3350
            'title': title,
3351
        }
3352
3353
3354
class GenericTumblrV1(GenericComic):
3355
    """Generic class to retrieve comics from Tumblr using the V1 API."""
3356
    _categories = ('TUMBLR', )
3357
3358
    @classmethod
3359
    def get_next_comic(cls, last_comic):
3360
        """Generic implementation of get_next_comic for Tumblr comics."""
3361
        for p in cls.get_posts(last_comic):
3362
            comic = cls.get_comic_info(p)
3363
            if comic is not None:
3364
                yield comic
3365
3366
    @classmethod
3367
    def get_url_from_post(cls, post):
3368
        return post['url']
3369
3370
    @classmethod
3371
    def get_api_url(cls):
3372
        return urljoin_wrapper(cls.url, '/api/read/')
3373
3374
    @classmethod
3375
    def get_comic_info(cls, post):
3376
        """Get information about a particular comics."""
3377
        type_ = post['type']
3378
        if type_ != 'photo':
3379
            return None
3380
        tumblr_id = int(post['id'])
3381
        api_url = cls.get_api_url() + '?id=%d' % (tumblr_id)
3382
        day = datetime.datetime.fromtimestamp(int(post['unix-timestamp'])).date()
3383
        caption = post.find('photo-caption')
3384
        title = caption.string if caption else ""
3385
        tags = ' '.join(t.string for t in post.find_all('tag'))
3386
        # Photos may appear in 'photo' tags and/or straight in the post
3387
        photo_tags = post.find_all('photo')
3388
        if not photo_tags:
3389
            photo_tags = [post]
3390
        # Images are in multiple resolutions - taking the first one
3391
        imgs = [photo.find('photo-url') for photo in photo_tags]
3392
        return {
3393
            'url': cls.get_url_from_post(post),
3394
            'url2': post['url-with-slug'],
3395
            'day': day.day,
3396
            'month': day.month,
3397
            'year': day.year,
3398
            'title': title,
3399
            'tags': tags,
3400
            'img': [i.string for i in imgs],
3401
            'tumblr-id': tumblr_id,
3402
            'api_url': api_url,
3403
        }
3404
3405
    @classmethod
3406
    def get_posts(cls, last_comic, nb_post_per_call=10):
3407
        """Get posts using API. nb_post_per_call is max 50.
3408
3409
        Posts are retrieved from newer to older as per the tumblr v1 api
3410
        but are returned in chronological order."""
3411
        waiting_for_url = last_comic['url'] if last_comic else None
3412
        posts_acc = []
3413
        if last_comic is not None:
3414
            # Sometimes, tumblr posts are deleted. When previous post is deleted, we
3415
            # might end up spending a lot of time looking for something that
3416
            # doesn't exist. Failing early and clearly might be a better option.
3417
            last_api_url = last_comic['api_url']
3418
            try:
3419
                get_soup_at_url(last_api_url)
3420
            except urllib.error.HTTPError:
3421
                try:
3422
                    get_soup_at_url(cls.url)
3423
                except urllib.error.HTTPError:
3424
                    print("Did not find previous post nor main url %s" % cls.url)
3425
                else:
3426
                    print("Did not find previous post %s : it might have been deleted" % last_api_url)
3427
                return reversed(posts_acc)
3428
        api_url = cls.get_api_url()
3429
        posts = get_soup_at_url(api_url).find('posts')
3430
        start, total = int(posts['start']), int(posts['total'])
3431
        assert start == 0
3432
        for starting_num in range(0, total, nb_post_per_call):
3433
            api_url2 = api_url + '?start=%d&num=%d' % (starting_num, nb_post_per_call)
3434
            posts2 = get_soup_at_url(api_url2).find('posts')
3435
            start2, total2 = int(posts2['start']), int(posts2['total'])
3436
            assert starting_num == start2, "%d != %d" % (starting_num, start2)
3437
            # This may happen and should be handled in the future
3438
            assert total == total2, "%d != %d" % (total, total2)
3439
            for p in posts2.find_all('post'):
3440
                if waiting_for_url and waiting_for_url == cls.get_url_from_post(p):
3441
                    return reversed(posts_acc)
3442
                posts_acc.append(p)
3443
        if waiting_for_url is None:
3444
            return reversed(posts_acc)
3445
        print("Did not find %s : there might be a problem" % waiting_for_url)
3446
        return []
3447
3448
3449
class SaturdayMorningBreakfastCerealTumblr(GenericTumblrV1):
3450
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
3451
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
3452
    # Also on http://www.smbc-comics.com
3453
    name = 'smbc-tumblr'
3454
    long_name = 'Saturday Morning Breakfast Cereal (from Tumblr)'
3455
    url = 'http://smbc-comics.tumblr.com'
3456
    _categories = ('SMBC', )
3457
3458
3459
class IrwinCardozo(GenericTumblrV1):
3460
    """Class to retrieve Irwin Cardozo Comics."""
3461
    name = 'irwinc'
3462
    long_name = 'Irwin Cardozo'
3463
    url = 'http://irwincardozocomics.tumblr.com'
3464
3465
3466
class AccordingToDevin(GenericTumblrV1):
3467
    """Class to retrieve According To Devin comics."""
3468
    name = 'devin'
3469
    long_name = 'According To Devin'
3470
    url = 'http://accordingtodevin.tumblr.com'
3471
3472
3473
class ItsTheTieTumblr(GenericTumblrV1):
3474
    """Class to retrieve It's the tie comics."""
3475
    # Also on http://itsthetie.com
3476
    # Also on https://tapastic.com/series/itsthetie
3477
    name = 'tie-tumblr'
3478
    long_name = "It's the tie (from Tumblr)"
3479
    url = "http://itsthetie.tumblr.com"
3480
    _categories = ('TIE', )
3481
3482
3483
class OctopunsTumblr(GenericTumblrV1):
3484
    """Class to retrieve Octopuns comics."""
3485
    # Also on http://www.octopuns.net
3486
    name = 'octopuns-tumblr'
3487
    long_name = 'Octopuns (from Tumblr)'
3488
    url = 'http://octopuns.tumblr.com'
3489
3490
3491
class PicturesInBoxesTumblr(GenericTumblrV1):
3492
    """Class to retrieve Pictures In Boxes comics."""
3493
    # Also on http://www.picturesinboxes.com
3494
    name = 'picturesinboxes-tumblr'
3495
    long_name = 'Pictures in Boxes (from Tumblr)'
3496
    url = 'http://picturesinboxescomic.tumblr.com'
3497
3498
3499
class TubeyToonsTumblr(GenericTumblrV1):
3500
    """Class to retrieve TubeyToons comics."""
3501
    # Also on http://tapastic.com/series/Tubey-Toons
3502
    # Also on http://tubeytoons.com
3503
    name = 'tubeytoons-tumblr'
3504
    long_name = 'Tubey Toons (from Tumblr)'
3505
    url = 'http://tubeytoons.tumblr.com'
3506
    _categories = ('TUNEYTOONS', )
3507
3508
3509
class UnearthedComicsTumblr(GenericTumblrV1):
3510
    """Class to retrieve Unearthed comics."""
3511
    # Also on http://tapastic.com/series/UnearthedComics
3512
    # Also on http://unearthedcomics.com
3513
    name = 'unearthed-tumblr'
3514
    long_name = 'Unearthed Comics (from Tumblr)'
3515
    url = 'http://unearthedcomics.tumblr.com'
3516
    _categories = ('UNEARTHED', )
3517
3518
3519
class PieComic(GenericTumblrV1):
3520
    """Class to retrieve Pie Comic comics."""
3521
    name = 'pie'
3522
    long_name = 'Pie Comic'
3523
    url = "http://piecomic.tumblr.com"
3524
3525
3526
class MrEthanDiamond(GenericTumblrV1):
3527
    """Class to retrieve Mr Ethan Diamond comics."""
3528
    name = 'diamond'
3529
    long_name = 'Mr Ethan Diamond'
3530
    url = 'http://mrethandiamond.tumblr.com'
3531
3532
3533
class Flocci(GenericTumblrV1):
3534
    """Class to retrieve floccinaucinihilipilification comics."""
3535
    name = 'flocci'
3536
    long_name = 'floccinaucinihilipilification'
3537
    url = "http://floccinaucinihilipilificationa.tumblr.com"
3538
3539
3540
class UpAndOut(GenericTumblrV1):
3541
    """Class to retrieve Up & Out comics."""
3542
    # Also on http://tapastic.com/series/UP-and-OUT
3543
    name = 'upandout'
3544
    long_name = 'Up And Out (from Tumblr)'
3545
    url = 'http://upandoutcomic.tumblr.com'
3546
3547
3548
class Pundemonium(GenericTumblrV1):
3549
    """Class to retrieve Pundemonium comics."""
3550
    name = 'pundemonium'
3551
    long_name = 'Pundemonium'
3552
    url = 'http://monstika.tumblr.com'
3553
3554
3555
class PoorlyDrawnLinesTumblr(GenericTumblrV1):
3556
    """Class to retrieve Poorly Drawn Lines comics."""
3557
    # Also on http://poorlydrawnlines.com
3558
    name = 'poorlydrawn-tumblr'
3559
    long_name = 'Poorly Drawn Lines (from Tumblr)'
3560
    url = 'http://pdlcomics.tumblr.com'
3561
    _categories = ('POORLYDRAWN', )
3562
3563
3564
class PearShapedComics(GenericTumblrV1):
3565
    """Class to retrieve Pear Shaped Comics."""
3566
    name = 'pearshaped'
3567
    long_name = 'Pear-Shaped Comics'
3568
    url = 'http://pearshapedcomics.com'
3569
3570
3571
class PondScumComics(GenericTumblrV1):
3572
    """Class to retrieve Pond Scum Comics."""
3573
    name = 'pond'
3574
    long_name = 'Pond Scum'
3575
    url = 'http://pondscumcomic.tumblr.com'
3576
3577
3578
class MercworksTumblr(GenericTumblrV1):
3579
    """Class to retrieve Mercworks comics."""
3580
    # Also on http://mercworks.net
3581
    name = 'mercworks-tumblr'
3582
    long_name = 'Mercworks (from Tumblr)'
3583
    url = 'http://mercworks.tumblr.com'
3584
3585
3586
class OwlTurdTumblr(GenericTumblrV1):
3587
    """Class to retrieve Owl Turd comics."""
3588
    # Also on http://tapastic.com/series/Owl-Turd-Comix
3589
    name = 'owlturd-tumblr'
3590
    long_name = 'Owl Turd (from Tumblr)'
3591
    url = 'http://owlturd.com'
3592
    _categories = ('OWLTURD', )
3593
3594
3595
class VectorBelly(GenericTumblrV1):
3596
    """Class to retrieve Vector Belly comics."""
3597
    # Also on http://vectorbelly.com
3598
    name = 'vector'
3599
    long_name = 'Vector Belly'
3600
    url = 'http://vectorbelly.tumblr.com'
3601
3602
3603
class GoneIntoRapture(GenericTumblrV1):
3604
    """Class to retrieve Gone Into Rapture comics."""
3605
    # Also on http://goneintorapture.tumblr.com
3606
    # Also on http://tapastic.com/series/Goneintorapture
3607
    name = 'rapture'
3608
    long_name = 'Gone Into Rapture'
3609
    url = 'http://www.goneintorapture.com'
3610
3611
3612
class TheOatmealTumblr(GenericTumblrV1):
3613
    """Class to retrieve The Oatmeal comics."""
3614
    # Also on http://theoatmeal.com
3615
    name = 'oatmeal-tumblr'
3616
    long_name = 'The Oatmeal (from Tumblr)'
3617
    url = 'http://oatmeal.tumblr.com'
3618
3619
3620
class HeckIfIKnowComicsTumblr(GenericTumblrV1):
3621
    """Class to retrieve Heck If I Know Comics."""
3622
    # Also on http://tapastic.com/series/Regular
3623
    name = 'heck-tumblr'
3624
    long_name = 'Heck if I Know comics (from Tumblr)'
3625
    url = 'http://heckifiknowcomics.com'
3626
3627
3628
class MyJetPack(GenericTumblrV1):
3629
    """Class to retrieve My Jet Pack comics."""
3630
    name = 'jetpack'
3631
    long_name = 'My Jet Pack'
3632
    url = 'http://myjetpack.tumblr.com'
3633
3634
3635
class CheerUpEmoKidTumblr(GenericTumblrV1):
3636
    """Class to retrieve CheerUpEmoKid comics."""
3637
    # Also on http://www.cheerupemokid.com
3638
    # Also on http://tapastic.com/series/CUEK
3639
    name = 'cuek-tumblr'
3640
    long_name = 'Cheer Up Emo Kid (from Tumblr)'
3641
    url = 'http://enzocomics.tumblr.com'
3642
3643
3644
class ForLackOfABetterComic(GenericTumblrV1):
3645
    """Class to retrieve For Lack Of A Better Comics."""
3646
    # Also on http://forlackofabettercomic.com
3647
    name = 'lack'
3648
    long_name = 'For Lack Of A Better Comic'
3649
    url = 'http://forlackofabettercomic.tumblr.com'
3650
3651
3652
class ZenPencilsTumblr(GenericTumblrV1):
3653
    """Class to retrieve ZenPencils comics."""
3654
    # Also on http://zenpencils.com
3655
    # Also on http://www.gocomics.com/zen-pencils
3656
    name = 'zenpencils-tumblr'
3657
    long_name = 'Zen Pencils (from Tumblr)'
3658
    url = 'http://zenpencils.tumblr.com'
3659
    _categories = ('ZENPENCILS', )
3660
3661
3662
class ThreeWordPhraseTumblr(GenericTumblrV1):
3663
    """Class to retrieve Three Word Phrase comics."""
3664
    # Also on http://threewordphrase.com
3665
    name = 'threeword-tumblr'
3666
    long_name = 'Three Word Phrase (from Tumblr)'
3667
    url = 'http://www.threewordphrase.tumblr.com'
3668
3669
3670
class TimeTrabbleTumblr(GenericTumblrV1):
3671
    """Class to retrieve Time Trabble comics."""
3672
    # Also on http://timetrabble.com
3673
    name = 'timetrabble-tumblr'
3674
    long_name = 'Time Trabble (from Tumblr)'
3675
    url = 'http://timetrabble.tumblr.com'
3676
3677
3678
class SafelyEndangeredTumblr(GenericTumblrV1):
3679
    """Class to retrieve Safely Endangered comics."""
3680
    # Also on http://www.safelyendangered.com
3681
    name = 'endangered-tumblr'
3682
    long_name = 'Safely Endangered (from Tumblr)'
3683
    url = 'http://tumblr.safelyendangered.com'
3684
3685
3686
class MouseBearComedyTumblr(GenericTumblrV1):
3687
    """Class to retrieve Mouse Bear Comedy comics."""
3688
    # Also on http://www.mousebearcomedy.com
3689
    name = 'mousebear-tumblr'
3690
    long_name = 'Mouse Bear Comedy (from Tumblr)'
3691
    url = 'http://mousebearcomedy.tumblr.com'
3692
3693
3694
class BouletCorpTumblr(GenericTumblrV1):
3695
    """Class to retrieve BouletCorp comics."""
3696
    # Also on http://www.bouletcorp.com
3697
    name = 'boulet-tumblr'
3698
    long_name = 'Boulet Corp (from Tumblr)'
3699
    url = 'http://bouletcorp.tumblr.com'
3700
    _categories = ('BOULET', )
3701
3702
3703
class TheAwkwardYetiTumblr(GenericTumblrV1):
3704
    """Class to retrieve The Awkward Yeti comics."""
3705
    # Also on http://www.gocomics.com/the-awkward-yeti
3706
    # Also on http://theawkwardyeti.com
3707
    # Also on https://tapastic.com/series/TheAwkwardYeti
3708
    name = 'yeti-tumblr'
3709
    long_name = 'The Awkward Yeti (from Tumblr)'
3710
    url = 'http://larstheyeti.tumblr.com'
3711
    _categories = ('YETI', )
3712
3713
3714
class NellucNhoj(GenericTumblrV1):
3715
    """Class to retrieve NellucNhoj comics."""
3716
    name = 'nhoj'
3717
    long_name = 'Nelluc Nhoj'
3718
    url = 'http://nellucnhoj.com'
3719
3720
3721
class DownTheUpwardSpiralTumblr(GenericTumblrV1):
3722
    """Class to retrieve Down The Upward Spiral comics."""
3723
    # Also on http://www.downtheupwardspiral.com
3724
    name = 'spiral-tumblr'
3725
    long_name = 'Down the Upward Spiral (from Tumblr)'
3726
    url = 'http://downtheupwardspiral.tumblr.com'
3727
3728
3729
class AsPerUsualTumblr(GenericTumblrV1):
3730
    """Class to retrieve As Per Usual comics."""
3731
    # Also on https://tapastic.com/series/AsPerUsual
3732
    name = 'usual-tumblr'
3733
    long_name = 'As Per Usual (from Tumblr)'
3734
    url = 'http://as-per-usual.tumblr.com'
3735
    categories = ('DAMILEE', )
3736
3737
3738
class HotComicsForCoolPeopleTumblr(GenericTumblrV1):
3739
    """Class to retrieve Hot Comics For Cool People."""
3740
    # Also on https://tapastic.com/series/Hot-Comics-For-Cool-People
3741
    # Also on http://hotcomics.biz (links to tumblr)
3742
    # Also on http://hcfcp.com (links to tumblr)
3743
    name = 'hotcomics-tumblr'
3744
    long_name = 'Hot Comics For Cool People (from Tumblr)'
3745
    url = 'http://hotcomicsforcoolpeople.tumblr.com'
3746
    categories = ('DAMILEE', )
3747
3748
3749
class OneOneOneOneComicTumblr(GenericTumblrV1):
3750
    """Class to retrieve 1111 Comics."""
3751
    # Also on http://www.1111comics.me
3752
    # Also on https://tapastic.com/series/1111-Comics
3753
    name = '1111-tumblr'
3754
    long_name = '1111 Comics (from Tumblr)'
3755
    url = 'http://comics1111.tumblr.com'
3756
    _categories = ('ONEONEONEONE', )
3757
3758
3759
class JhallComicsTumblr(GenericTumblrV1):
3760
    """Class to retrieve Jhall Comics."""
3761
    # Also on http://jhallcomics.com
3762
    name = 'jhall-tumblr'
3763
    long_name = 'Jhall Comics (from Tumblr)'
3764
    url = 'http://jhallcomics.tumblr.com'
3765
3766
3767
class BerkeleyMewsTumblr(GenericTumblrV1):
3768
    """Class to retrieve Berkeley Mews comics."""
3769
    # Also on http://www.gocomics.com/berkeley-mews
3770
    # Also on http://www.berkeleymews.com
3771
    name = 'berkeley-tumblr'
3772
    long_name = 'Berkeley Mews (from Tumblr)'
3773
    url = 'http://mews.tumblr.com'
3774
    _categories = ('BERKELEY', )
3775
3776
3777
class JoanCornellaTumblr(GenericTumblrV1):
3778
    """Class to retrieve Joan Cornella comics."""
3779
    # Also on http://joancornella.net
3780
    name = 'cornella-tumblr'
3781
    long_name = 'Joan Cornella (from Tumblr)'
3782
    url = 'http://cornellajoan.tumblr.com'
3783
3784
3785
class RespawnComicTumblr(GenericTumblrV1):
3786
    """Class to retrieve Respawn Comic."""
3787
    # Also on http://respawncomic.com
3788
    name = 'respawn-tumblr'
3789
    long_name = 'Respawn Comic (from Tumblr)'
3790
    url = 'http://respawncomic.tumblr.com'
3791
3792
3793
class ChrisHallbeckTumblr(GenericTumblrV1):
3794
    """Class to retrieve Chris Hallbeck comics."""
3795
    # Also on https://tapastic.com/ChrisHallbeck
3796
    # Also on http://maximumble.com
3797
    # Also on http://minimumble.com
3798
    # Also on http://thebookofbiff.com
3799
    name = 'hallbeck-tumblr'
3800
    long_name = 'Chris Hallback (from Tumblr)'
3801
    url = 'http://chrishallbeck.tumblr.com'
3802
    _categories = ('HALLBACK', )
3803
3804
3805
class ComicNuggets(GenericTumblrV1):
3806
    """Class to retrieve Comic Nuggets."""
3807
    name = 'nuggets'
3808
    long_name = 'Comic Nuggets'
3809
    url = 'http://comicnuggets.com'
3810
3811
3812
class PigeonGazetteTumblr(GenericTumblrV1):
3813
    """Class to retrieve The Pigeon Gazette comics."""
3814
    # Also on https://tapastic.com/series/The-Pigeon-Gazette
3815
    name = 'pigeon-tumblr'
3816
    long_name = 'The Pigeon Gazette (from Tumblr)'
3817
    url = 'http://thepigeongazette.tumblr.com'
3818
3819
3820
class CancerOwl(GenericTumblrV1):
3821
    """Class to retrieve Cancer Owl comics."""
3822
    # Also on http://cancerowl.com
3823
    name = 'cancerowl-tumblr'
3824
    long_name = 'Cancer Owl (from Tumblr)'
3825
    url = 'http://cancerowl.tumblr.com'
3826
3827
3828
class FowlLanguageTumblr(GenericTumblrV1):
3829
    """Class to retrieve Fowl Language comics."""
3830
    # Also on http://www.fowllanguagecomics.com
3831
    # Also on http://tapastic.com/series/Fowl-Language-Comics
3832
    # Also on http://www.gocomics.com/fowl-language
3833
    name = 'fowllanguage-tumblr'
3834
    long_name = 'Fowl Language Comics (from Tumblr)'
3835
    url = 'http://fowllanguagecomics.tumblr.com'
3836
    _categories = ('FOWLLANGUAGE', )
3837
3838
3839
class TheOdd1sOutTumblr(GenericTumblrV1):
3840
    """Class to retrieve The Odd 1s Out comics."""
3841
    # Also on http://theodd1sout.com
3842
    # Also on https://tapastic.com/series/Theodd1sout
3843
    name = 'theodd-tumblr'
3844
    long_name = 'The Odd 1s Out (from Tumblr)'
3845
    url = 'http://theodd1sout.tumblr.com'
3846
3847
3848
class TheUnderfoldTumblr(GenericTumblrV1):
3849
    """Class to retrieve The Underfold comics."""
3850
    # Also on http://theunderfold.com
3851
    name = 'underfold-tumblr'
3852
    long_name = 'The Underfold (from Tumblr)'
3853
    url = 'http://theunderfold.tumblr.com'
3854
3855
3856
class LolNeinTumblr(GenericTumblrV1):
3857
    """Class to retrieve Lol Nein comics."""
3858
    # Also on http://lolnein.com
3859
    name = 'lolnein-tumblr'
3860
    long_name = 'Lol Nein (from Tumblr)'
3861
    url = 'http://lolneincom.tumblr.com'
3862
3863
3864
class FatAwesomeComicsTumblr(GenericTumblrV1):
3865
    """Class to retrieve Fat Awesome Comics."""
3866
    # Also on http://fatawesome.com/comics
3867
    name = 'fatawesome-tumblr'
3868
    long_name = 'Fat Awesome (from Tumblr)'
3869
    url = 'http://fatawesomecomedy.tumblr.com'
3870
3871
3872
class TheWorldIsFlatTumblr(GenericTumblrV1):
3873
    """Class to retrieve The World Is Flat Comics."""
3874
    # Also on https://tapastic.com/series/The-World-is-Flat
3875
    name = 'flatworld-tumblr'
3876
    long_name = 'The World Is Flat (from Tumblr)'
3877
    url = 'http://theworldisflatcomics.tumblr.com'
3878
3879
3880
class DorrisMc(GenericTumblrV1):
3881
    """Class to retrieve Dorris Mc Comics"""
3882
    # Also on http://www.gocomics.com/dorris-mccomics
3883
    name = 'dorrismc'
3884
    long_name = 'Dorris Mc'
3885
    url = 'http://dorrismccomics.com'
3886
3887
3888
class LeleozTumblr(GenericEmptyComic, GenericTumblrV1):
3889
    """Class to retrieve Leleoz comics."""
3890
    # Also on https://tapastic.com/series/Leleoz
3891
    name = 'leleoz-tumblr'
3892
    long_name = 'Leleoz (from Tumblr)'
3893
    url = 'http://leleozcomics.tumblr.com'
3894
3895
3896
class MoonBeardTumblr(GenericTumblrV1):
3897
    """Class to retrieve MoonBeard comics."""
3898
    # Also on http://moonbeard.com
3899
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
3900
    name = 'moonbeard-tumblr'
3901
    long_name = 'Moon Beard (from Tumblr)'
3902
    url = 'http://blog.squiresjam.es/moonbeard'
3903
3904
3905
class AComik(GenericTumblrV1):
3906
    """Class to retrieve A Comik"""
3907
    name = 'comik'
3908
    long_name = 'A Comik'
3909
    url = 'http://acomik.com'
3910
3911
3912
class ClassicRandy(GenericTumblrV1):
3913
    """Class to retrieve Classic Randy comics."""
3914
    name = 'randy'
3915
    long_name = 'Classic Randy'
3916
    url = 'http://classicrandy.tumblr.com'
3917
3918
3919
class DagssonTumblr(GenericTumblrV1):
3920
    """Class to retrieve Dagsson comics."""
3921
    # Also on http://www.dagsson.com
3922
    name = 'dagsson-tumblr'
3923
    long_name = 'Dagsson Hugleikur (from Tumblr)'
3924
    url = 'http://hugleikurdagsson.tumblr.com'
3925
3926
3927
class LinsEditionsTumblr(GenericTumblrV1):
3928
    """Class to retrieve L.I.N.S. Editions comics."""
3929
    # Also on https://linsedition.com
3930
    # Now on http://warandpeas.tumblr.com
3931
    name = 'lins-tumblr'
3932
    long_name = 'L.I.N.S. Editions (from Tumblr)'
3933
    url = 'http://linscomics.tumblr.com'
3934
    _categories = ('LINS', )
3935
3936
3937
class WarAndPeasTumblr(GenericTumblrV1):
3938
    """Class to retrieve War And Peas comics."""
3939
    # Was on http://linscomics.tumblr.com
3940
    name = 'warandpeas-tumblr'
3941
    long_name = 'War And Peas (from Tumblr)'
3942
    url = 'http://warandpeas.tumblr.com'
3943
    _categories = ('WARANDPEAS', )
3944
3945
3946
class OrigamiHotDish(GenericTumblrV1):
3947
    """Class to retrieve Origami Hot Dish comics."""
3948
    name = 'origamihotdish'
3949
    long_name = 'Origami Hot Dish'
3950
    url = 'http://origamihotdish.com'
3951
3952
3953
class HitAndMissComicsTumblr(GenericTumblrV1):
3954
    """Class to retrieve Hit and Miss Comics."""
3955
    name = 'hitandmiss'
3956
    long_name = 'Hit and Miss Comics'
3957
    url = 'http://hitandmisscomics.tumblr.com'
3958
3959
3960
class HMBlanc(GenericTumblrV1):
3961
    """Class to retrieve HM Blanc comics."""
3962
    name = 'hmblanc'
3963
    long_name = 'HM Blanc'
3964
    url = 'http://hmblanc.tumblr.com'
3965
3966
3967
class TalesOfAbsurdityTumblr(GenericTumblrV1):
3968
    """Class to retrieve Tales Of Absurdity comics."""
3969
    # Also on http://talesofabsurdity.com
3970
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
3971
    name = 'absurdity-tumblr'
3972
    long_name = 'Tales of Absurdity (from Tumblr)'
3973
    url = 'http://talesofabsurdity.tumblr.com'
3974
    _categories = ('ABSURDITY', )
3975
3976
3977
class RobbieAndBobby(GenericTumblrV1):
3978
    """Class to retrieve Robbie And Bobby comics."""
3979
    # Also on http://robbieandbobby.com
3980
    name = 'robbie-tumblr'
3981
    long_name = 'Robbie And Bobby (from Tumblr)'
3982
    url = 'http://robbieandbobby.tumblr.com'
3983
3984
3985
class ElectricBunnyComicTumblr(GenericTumblrV1):
3986
    """Class to retrieve Electric Bunny Comics."""
3987
    # Also on http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell
3988
    name = 'bunny-tumblr'
3989
    long_name = 'Electric Bunny Comic (from Tumblr)'
3990
    url = 'http://electricbunnycomics.tumblr.com'
3991
3992
3993
class Hoomph(GenericTumblrV1):
3994
    """Class to retrieve Hoomph comics."""
3995
    name = 'hoomph'
3996
    long_name = 'Hoomph'
3997
    url = 'http://hoom.ph'
3998
3999
4000
class BFGFSTumblr(GenericTumblrV1):
4001
    """Class to retrieve BFGFS comics."""
4002
    # Also on https://tapastic.com/series/BFGFS
4003
    # Also on http://bfgfs.com
4004
    name = 'bfgfs-tumblr'
4005
    long_name = 'BFGFS (from Tumblr)'
4006
    url = 'http://bfgfs.tumblr.com'
4007
4008
4009
class DoodleForFood(GenericTumblrV1):
4010
    """Class to retrieve Doodle For Food comics."""
4011
    # Also on http://doodleforfood.com
4012
    name = 'doodle'
4013
    long_name = 'Doodle For Food'
4014
    url = 'http://doodleforfood.com'
4015
4016
4017
class CassandraCalinTumblr(GenericTumblrV1):
4018
    """Class to retrieve C. Cassandra comics."""
4019
    # Also on http://cassandracalin.com
4020
    # Also on https://tapastic.com/series/C-Cassandra-comics
4021
    name = 'cassandra-tumblr'
4022
    long_name = 'Cassandra Calin (from Tumblr)'
4023
    url = 'http://c-cassandra.tumblr.com'
4024
4025
4026
class DougWasTaken(GenericTumblrV1):
4027
    """Class to retrieve Doug Was Taken comics."""
4028
    name = 'doug'
4029
    long_name = 'Doug Was Taken'
4030
    url = 'http://dougwastaken.tumblr.com'
4031
4032
4033
class MandatoryRollerCoaster(GenericTumblrV1):
4034
    """Class to retrieve Mandatory Roller Coaster comics."""
4035
    name = 'rollercoaster'
4036
    long_name = 'Mandatory Roller Coaster'
4037
    url = 'http://mandatoryrollercoaster.com'
4038
4039
4040
class CEstPasEnRegardantSesPompes(GenericTumblrV1):
4041
    """Class to retrieve C'Est Pas En Regardant Ses Pompes (...)  comics."""
4042
    name = 'cperspqccltt'
4043
    long_name = 'C Est Pas En Regardant Ses Pompes (...)'
4044
    url = 'http://cperspqccltt.tumblr.com'
4045
4046
4047
class TheGrohlTroll(GenericTumblrV1):
4048
    """Class to retrieve The Grohl Troll comics."""
4049
    name = 'grohltroll'
4050
    long_name = 'The Grohl Troll'
4051
    url = 'http://thegrohltroll.com'
4052
4053
4054
class WebcomicName(GenericTumblrV1):
4055
    """Class to retrieve Webcomic Name comics."""
4056
    name = 'webcomicname'
4057
    long_name = 'Webcomic Name'
4058
    url = 'http://webcomicname.com'
4059
4060
4061
class BooksOfAdam(GenericTumblrV1):
4062
    """Class to retrieve Books of Adam comics."""
4063
    # Also on http://www.booksofadam.com
4064
    name = 'booksofadam'
4065
    long_name = 'Books of Adam'
4066
    url = 'http://booksofadam.tumblr.com'
4067
4068
4069
class HarkAVagrant(GenericTumblrV1):
4070
    """Class to retrieve Hark A Vagrant comics."""
4071
    # Also on http://www.harkavagrant.com
4072
    name = 'hark-tumblr'
4073
    long_name = 'Hark A Vagrant (from Tumblr)'
4074
    url = 'http://beatonna.tumblr.com'
4075
4076
4077
class OurSuperAdventureTumblr(GenericTumblrV1):
4078
    """Class to retrieve Our Super Adventure comics."""
4079
    # Also on https://tapastic.com/series/Our-Super-Adventure
4080
    # Also on http://www.oursuperadventure.com
4081
    # http://sarahgraley.com
4082
    name = 'superadventure-tumblr'
4083
    long_name = 'Our Super Adventure (from Tumblr)'
4084
    url = 'http://sarahssketchbook.tumblr.com'
4085
4086
4087
class JakeLikesOnions(GenericTumblrV1):
4088
    """Class to retrieve Jake Likes Onions comics."""
4089
    name = 'jake'
4090
    long_name = 'Jake Likes Onions'
4091
    url = 'http://jakelikesonions.com'
4092
4093
4094
class InYourFaceCake(GenericTumblrV1):
4095
    """Class to retrieve In Your Face Cake comics."""
4096
    name = 'inyourfacecake-tumblr'
4097
    long_name = 'In Your Face Cake (from Tumblr)'
4098
    url = 'http://in-your-face-cake.tumblr.com'
4099
4100
4101
class Robospunk(GenericTumblrV1):
4102
    """Class to retrieve Robospunk comics."""
4103
    name = 'robospunk'
4104
    long_name = 'Robospunk'
4105
    url = 'http://robospunk.com'
4106
4107
4108
class BananaTwinky(GenericTumblrV1):
4109
    """Class to retrieve Banana Twinky comics."""
4110
    name = 'banana'
4111
    long_name = 'Banana Twinky'
4112
    url = 'http://bananatwinky.tumblr.com'
4113
4114
4115
class YesterdaysPopcornTumblr(GenericTumblrV1):
4116
    """Class to retrieve Yesterday's Popcorn comics."""
4117
    # Also on http://www.yesterdayspopcorn.com
4118
    # Also on https://tapastic.com/series/Yesterdays-Popcorn
4119
    name = 'popcorn-tumblr'
4120
    long_name = 'Yesterday\'s Popcorn (from Tumblr)'
4121
    url = 'http://yesterdayspopcorn.tumblr.com'
4122
4123
4124
class TwistedDoodles(GenericTumblrV1):
4125
    """Class to retrieve Twisted Doodles comics."""
4126
    name = 'twisted'
4127
    long_name = 'Twisted Doodles'
4128
    url = 'http://www.twisteddoodles.com'
4129
4130
4131
class UbertoolTumblr(GenericTumblrV1):
4132
    """Class to retrieve Ubertool comics."""
4133
    # Also on http://ubertoolcomic.com
4134
    # Also on https://tapastic.com/series/ubertool
4135
    name = 'ubertool-tumblr'
4136
    long_name = 'Ubertool (from Tumblr)'
4137
    url = 'http://ubertool.tumblr.com'
4138
    _categories = ('UBERTOOL', )
4139
4140
4141
class LittleLifeLinesTumblr(GenericTumblrV1):
4142
    """Class to retrieve Little Life Lines comics."""
4143
    # Also on http://www.littlelifelines.com
4144
    name = 'life-tumblr'
4145
    long_name = 'Little Life Lines (from Tumblr)'
4146
    url = 'https://little-life-lines.tumblr.com'
4147
4148
4149
class TheyCanTalk(GenericTumblrV1):
4150
    """Class to retrieve They Can Talk comics."""
4151
    name = 'theycantalk'
4152
    long_name = 'They Can Talk'
4153
    url = 'http://theycantalk.com'
4154
4155
4156
class Will5NeverCome(GenericTumblrV1):
4157
    """Class to retrieve Will 5:00 Never Come comics."""
4158
    name = 'will5'
4159
    long_name = 'Will 5:00 Never Come ?'
4160
    url = 'http://will5nevercome.com'
4161
4162
4163
class Sephko(GenericTumblrV1):
4164
    """Class to retrieve Sephko Comics."""
4165
    # Also on http://www.sephko.com
4166
    name = 'sephko'
4167
    long_name = 'Sephko'
4168
    url = 'http://sephko.tumblr.com'
4169
4170
4171
class BlazersAtDawn(GenericTumblrV1):
4172
    """Class to retrieve Blazers At Dawn Comics."""
4173
    name = 'blazers'
4174
    long_name = 'Blazers At Dawn'
4175
    url = 'http://blazersatdawn.tumblr.com'
4176
4177
4178
class ArtByMoga(GenericEmptyComic, GenericTumblrV1):  # Deactivated because it downloads too many things
4179
    """Class to retrieve Art By Moga Comics."""
4180
    name = 'moga'
4181
    long_name = 'Art By Moga'
4182
    url = 'http://artbymoga.tumblr.com'
4183
4184 View Code Duplication
4185
class VerbalVomitTumblr(GenericTumblrV1):
4186
    """Class to retrieve Verbal Vomit comics."""
4187
    # Also on http://www.verbal-vomit.com
4188
    name = 'vomit-tumblr'
4189
    long_name = 'Verbal Vomit (from Tumblr)'
4190
    url = 'http://verbalvomits.tumblr.com'
4191
4192
4193
class LibraryComic(GenericTumblrV1):
4194
    """Class to retrieve LibraryComic."""
4195
    # Also on http://librarycomic.com
4196
    name = 'library-tumblr'
4197
    long_name = 'LibraryComic (from Tumblr)'
4198
    url = 'http://librarycomic.tumblr.com'
4199
4200
4201
class TizzyStitchBirdTumblr(GenericTumblrV1):
4202
    """Class to retrieve Tizzy Stitch Bird comics."""
4203
    # Also on http://tizzystitchbird.com
4204
    # Also on https://tapastic.com/series/TizzyStitchbird
4205
    # Also on http://www.webtoons.com/en/challenge/tizzy-stitchbird/list?title_no=50082
4206
    name = 'tizzy-tumblr'
4207
    long_name = 'Tizzy Stitch Bird (from Tumblr)'
4208
    url = 'http://tizzystitchbird.tumblr.com'
4209
4210
4211
class HorovitzComics(GenericListableComic):
4212
    """Generic class to handle the logic common to the different comics from Horovitz."""
4213
    url = 'http://www.horovitzcomics.com'
4214
    _categories = ('HOROVITZ', )
4215
    img_re = re.compile('.*comics/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
4216
    link_re = NotImplemented
4217
    get_url_from_archive_element = join_cls_url_to_href
4218
4219
    @classmethod
4220
    def get_comic_info(cls, soup, link):
4221
        """Get information about a particular comics."""
4222
        href = link['href']
4223
        num = int(cls.link_re.match(href).groups()[0])
4224
        title = link.string
4225
        imgs = soup.find_all('img', id='comic')
4226
        assert len(imgs) == 1
4227
        year, month, day = [int(s)
4228
                            for s in cls.img_re.match(imgs[0]['src']).groups()]
4229
        return {
4230
            'title': title,
4231
            'day': day,
4232
            'month': month,
4233
            'year': year,
4234
            'img': [i['src'] for i in imgs],
4235
            'num': num,
4236
        }
4237
4238
    @classmethod
4239
    def get_archive_elements(cls):
4240
        archive_url = 'http://www.horovitzcomics.com/comics/archive/'
4241 View Code Duplication
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.link_re))
4242
4243
4244
class HorovitzNew(HorovitzComics):
4245
    """Class to retrieve Horovitz new comics."""
4246
    name = 'horovitznew'
4247
    long_name = 'Horovitz New'
4248
    link_re = re.compile('^/comics/new/([0-9]+)$')
4249
4250
4251
class HorovitzClassic(HorovitzComics):
4252
    """Class to retrieve Horovitz classic comics."""
4253
    name = 'horovitzclassic'
4254
    long_name = 'Horovitz Classic'
4255
    link_re = re.compile('^/comics/classic/([0-9]+)$')
4256
4257
4258
class GenericGoComic(GenericNavigableComic):
4259
    """Generic class to handle the logic common to comics from gocomics.com."""
4260
    _categories = ('GOCOMIC', )
4261
4262
    @classmethod
4263
    def get_first_comic_link(cls):
4264
        """Get link to first comics."""
4265
        return get_soup_at_url(cls.url).find('a', class_='fa btn btn-outline-default btn-circle fa-backward sm ')
4266
4267
    @classmethod
4268
    def get_navi_link(cls, last_soup, next_):
4269
        """Get link to next or previous comic."""
4270
        PREV = 'fa btn btn-outline-default btn-circle fa-caret-left sm '
4271
        NEXT = 'fa btn btn-outline-default btn-circle fa-caret-right sm '
4272
        return last_soup.find('a', class_=NEXT if next_ else PREV)
4273
4274
    @classmethod
4275
    def get_url_from_link(cls, link):
4276
        gocomics = 'http://www.gocomics.com'
4277
        return urljoin_wrapper(gocomics, link['href'])
4278
4279
    @classmethod
4280
    def get_comic_info(cls, soup, link):
4281
        """Get information about a particular comics."""
4282
        date_str = soup.find('meta', property='article:published_time')['content']
4283
        day = string_to_date(date_str, "%Y-%m-%d")
4284
        imgs = soup.find('picture', class_='img-fluid item-comic-image').find_all('img')
4285
        author = soup.find('meta', property='article:author')['content']
4286
        tags = soup.find('meta', property='article:tag')['content']
4287
        return {
4288
            'day': day.day,
4289
            'month': day.month,
4290
            'year': day.year,
4291
            'img': [i['src'] for i in imgs],
4292
            'author': author,
4293
            'tags': tags,
4294
        }
4295
4296
4297
class PearlsBeforeSwine(GenericGoComic):
4298
    """Class to retrieve Pearls Before Swine comics."""
4299
    name = 'pearls'
4300
    long_name = 'Pearls Before Swine'
4301
    url = 'http://www.gocomics.com/pearlsbeforeswine'
4302
4303
4304
class Peanuts(GenericGoComic):
4305
    """Class to retrieve Peanuts comics."""
4306
    name = 'peanuts'
4307
    long_name = 'Peanuts'
4308
    url = 'http://www.gocomics.com/peanuts'
4309
4310
4311
class MattWuerker(GenericGoComic):
4312
    """Class to retrieve Matt Wuerker comics."""
4313
    name = 'wuerker'
4314
    long_name = 'Matt Wuerker'
4315
    url = 'http://www.gocomics.com/mattwuerker'
4316
4317
4318
class TomToles(GenericGoComic):
4319
    """Class to retrieve Tom Toles comics."""
4320
    name = 'toles'
4321
    long_name = 'Tom Toles'
4322
    url = 'http://www.gocomics.com/tomtoles'
4323
4324
4325
class BreakOfDay(GenericGoComic):
4326
    """Class to retrieve Break Of Day comics."""
4327
    name = 'breakofday'
4328
    long_name = 'Break Of Day'
4329
    url = 'http://www.gocomics.com/break-of-day'
4330
4331
4332
class Brevity(GenericGoComic):
4333
    """Class to retrieve Brevity comics."""
4334
    name = 'brevity'
4335
    long_name = 'Brevity'
4336
    url = 'http://www.gocomics.com/brevitypanel'
4337
4338
4339
class MichaelRamirez(GenericGoComic):
4340
    """Class to retrieve Michael Ramirez comics."""
4341
    name = 'ramirez'
4342
    long_name = 'Michael Ramirez'
4343
    url = 'http://www.gocomics.com/michaelramirez'
4344
4345
4346
class MikeLuckovich(GenericGoComic):
4347
    """Class to retrieve Mike Luckovich comics."""
4348
    name = 'luckovich'
4349
    long_name = 'Mike Luckovich'
4350
    url = 'http://www.gocomics.com/mikeluckovich'
4351
4352
4353
class JimBenton(GenericGoComic):
4354
    """Class to retrieve Jim Benton comics."""
4355
    # Also on http://jimbenton.tumblr.com
4356
    name = 'benton'
4357
    long_name = 'Jim Benton'
4358
    url = 'http://www.gocomics.com/jim-benton-cartoons'
4359
4360
4361
class TheArgyleSweater(GenericGoComic):
4362
    """Class to retrieve the Argyle Sweater comics."""
4363
    name = 'argyle'
4364
    long_name = 'Argyle Sweater'
4365
    url = 'http://www.gocomics.com/theargylesweater'
4366
4367
4368
class SunnyStreet(GenericGoComic):
4369
    """Class to retrieve Sunny Street comics."""
4370
    # Also on http://www.sunnystreetcomics.com
4371
    name = 'sunny'
4372
    long_name = 'Sunny Street'
4373
    url = 'http://www.gocomics.com/sunny-street'
4374
4375
4376
class OffTheMark(GenericGoComic):
4377
    """Class to retrieve Off The Mark comics."""
4378
    # Also on https://www.offthemark.com
4379
    name = 'offthemark'
4380
    long_name = 'Off The Mark'
4381
    url = 'http://www.gocomics.com/offthemark'
4382
4383
4384
class WuMo(GenericGoComic):
4385
    """Class to retrieve WuMo comics."""
4386
    # Also on http://wumo.com
4387
    name = 'wumo'
4388
    long_name = 'WuMo'
4389
    url = 'http://www.gocomics.com/wumo'
4390
4391
4392
class LunarBaboon(GenericGoComic):
4393
    """Class to retrieve Lunar Baboon comics."""
4394
    # Also on http://www.lunarbaboon.com
4395
    # Also on https://tapastic.com/series/Lunarbaboon
4396
    name = 'lunarbaboon'
4397
    long_name = 'Lunar Baboon'
4398
    url = 'http://www.gocomics.com/lunarbaboon'
4399
4400
4401
class SandersenGocomic(GenericGoComic):
4402
    """Class to retrieve Sarah Andersen comics."""
4403
    # Also on http://sarahcandersen.com
4404
    # Also on http://tapastic.com/series/Doodle-Time
4405
    name = 'sandersen-goc'
4406
    long_name = 'Sarah Andersen (from GoComics)'
4407
    url = 'http://www.gocomics.com/sarahs-scribbles'
4408
4409
4410
class SaturdayMorningBreakfastCerealGoComic(GenericGoComic):
4411
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
4412
    # Also on http://smbc-comics.tumblr.com
4413
    # Also on http://www.smbc-comics.com
4414
    name = 'smbc-goc'
4415
    long_name = 'Saturday Morning Breakfast Cereal (from GoComics)'
4416
    url = 'http://www.gocomics.com/saturday-morning-breakfast-cereal'
4417
    _categories = ('SMBC', )
4418
4419
4420
class CalvinAndHobbesGoComic(GenericGoComic):
4421
    """Class to retrieve Calvin and Hobbes comics."""
4422
    # From gocomics, not http://marcel-oehler.marcellosendos.ch/comics/ch/
4423
    name = 'calvin-goc'
4424
    long_name = 'Calvin and Hobbes (from GoComics)'
4425
    url = 'http://www.gocomics.com/calvinandhobbes'
4426
4427
4428
class RallGoComic(GenericGoComic):
4429
    """Class to retrieve Ted Rall comics."""
4430
    # Also on http://rall.com/comic
4431
    name = 'rall-goc'
4432
    long_name = "Ted Rall (from GoComics)"
4433
    url = "http://www.gocomics.com/ted-rall"
4434
    _categories = ('RALL', )
4435
4436
4437
class TheAwkwardYetiGoComic(GenericGoComic):
4438
    """Class to retrieve The Awkward Yeti comics."""
4439
    # Also on http://larstheyeti.tumblr.com
4440
    # Also on http://theawkwardyeti.com
4441
    # Also on https://tapastic.com/series/TheAwkwardYeti
4442
    name = 'yeti-goc'
4443
    long_name = 'The Awkward Yeti (from GoComics)'
4444
    url = 'http://www.gocomics.com/the-awkward-yeti'
4445
    _categories = ('YETI', )
4446
4447
4448
class BerkeleyMewsGoComics(GenericGoComic):
4449
    """Class to retrieve Berkeley Mews comics."""
4450
    # Also on http://mews.tumblr.com
4451
    # Also on http://www.berkeleymews.com
4452
    name = 'berkeley-goc'
4453
    long_name = 'Berkeley Mews (from GoComics)'
4454
    url = 'http://www.gocomics.com/berkeley-mews'
4455
    _categories = ('BERKELEY', )
4456
4457
4458
class SheldonGoComics(GenericGoComic):
4459
    """Class to retrieve Sheldon comics."""
4460
    # Also on http://www.sheldoncomics.com
4461
    name = 'sheldon-goc'
4462
    long_name = 'Sheldon Comics (from GoComics)'
4463
    url = 'http://www.gocomics.com/sheldon'
4464
4465
4466
class FowlLanguageGoComics(GenericGoComic):
4467
    """Class to retrieve Fowl Language comics."""
4468
    # Also on http://www.fowllanguagecomics.com
4469
    # Also on http://tapastic.com/series/Fowl-Language-Comics
4470
    # Also on http://fowllanguagecomics.tumblr.com
4471
    name = 'fowllanguage-goc'
4472
    long_name = 'Fowl Language Comics (from GoComics)'
4473
    url = 'http://www.gocomics.com/fowl-language'
4474
    _categories = ('FOWLLANGUAGE', )
4475
4476
4477
class NickAnderson(GenericGoComic):
4478
    """Class to retrieve Nick Anderson comics."""
4479
    name = 'nickanderson'
4480
    long_name = 'Nick Anderson'
4481
    url = 'http://www.gocomics.com/nickanderson'
4482
4483
4484
class GarfieldGoComics(GenericGoComic):
4485
    """Class to retrieve Garfield comics."""
4486
    # Also on http://garfield.com
4487
    name = 'garfield-goc'
4488
    long_name = 'Garfield (from GoComics)'
4489
    url = 'http://www.gocomics.com/garfield'
4490
    _categories = ('GARFIELD', )
4491
4492
4493
class DorrisMcGoComics(GenericGoComic):
4494
    """Class to retrieve Dorris Mc Comics"""
4495
    # Also on http://dorrismccomics.com
4496
    name = 'dorrismc-goc'
4497
    long_name = 'Dorris Mc (from GoComics)'
4498
    url = 'http://www.gocomics.com/dorris-mccomics'
4499
4500
4501
class FoxTrot(GenericGoComic):
4502
    """Class to retrieve FoxTrot comics."""
4503
    name = 'foxtrot'
4504
    long_name = 'FoxTrot'
4505
    url = 'http://www.gocomics.com/foxtrot'
4506
4507
4508
class FoxTrotClassics(GenericGoComic):
4509
    """Class to retrieve FoxTrot Classics comics."""
4510
    name = 'foxtrot-classics'
4511
    long_name = 'FoxTrot Classics'
4512
    url = 'http://www.gocomics.com/foxtrotclassics'
4513
4514
4515
class MisterAndMeGoComics(GenericGoComic):
4516
    """Class to retrieve Mister & Me Comics."""
4517
    # Also on http://www.mister-and-me.com
4518
    # Also on https://tapastic.com/series/Mister-and-Me
4519
    name = 'mister-goc'
4520
    long_name = 'Mister & Me (from GoComics)'
4521
    url = 'http://www.gocomics.com/mister-and-me'
4522
4523
4524
class NonSequitur(GenericGoComic):
4525
    """Class to retrieve Non Sequitur (Wiley Miller) comics."""
4526
    name = 'nonsequitur'
4527
    long_name = 'Non Sequitur'
4528
    url = 'http://www.gocomics.com/nonsequitur'
4529
4530
4531
class GenericTapasticComic(GenericListableComic):
4532
    """Generic class to handle the logic common to comics from tapastic.com."""
4533
    _categories = ('TAPASTIC', )
4534
4535
    @classmethod
4536
    def get_comic_info(cls, soup, archive_elt):
4537
        """Get information about a particular comics."""
4538
        timestamp = int(archive_elt['publishDate']) / 1000.0
4539
        day = datetime.datetime.fromtimestamp(timestamp).date()
4540
        imgs = soup.find_all('img', class_='art-image')
4541
        if not imgs:
4542
            print("Comic %s is being uploaded, retry later" % cls.get_url_from_archive_element(archive_elt))
4543
            return None
4544
        assert len(imgs) > 0
4545
        return {
4546
            'day': day.day,
4547
            'year': day.year,
4548
            'month': day.month,
4549
            'img': [i['src'] for i in imgs],
4550
            'title': archive_elt['title'],
4551
        }
4552
4553
    @classmethod
4554
    def get_url_from_archive_element(cls, archive_elt):
4555
        return 'http://tapastic.com/episode/' + str(archive_elt['id'])
4556
4557
    @classmethod
4558
    def get_archive_elements(cls):
4559
        pref, suff = 'episodeList : ', ','
4560
        # Information is stored in the javascript part
4561
        # I don't know the clean way to get it so this is the ugly way.
4562
        string = [s[len(pref):-len(suff)] for s in (s.decode('utf-8').strip() for s in urlopen_wrapper(cls.url).readlines()) if s.startswith(pref) and s.endswith(suff)][0]
4563
        return json.loads(string)
4564
4565
4566
class VegetablesForDessert(GenericTapasticComic):
4567
    """Class to retrieve Vegetables For Dessert comics."""
4568
    # Also on http://vegetablesfordessert.tumblr.com
4569
    name = 'vegetables'
4570
    long_name = 'Vegetables For Dessert'
4571
    url = 'http://tapastic.com/series/vegetablesfordessert'
4572
4573
4574
class FowlLanguageTapa(GenericTapasticComic):
4575
    """Class to retrieve Fowl Language comics."""
4576
    # Also on http://www.fowllanguagecomics.com
4577
    # Also on http://fowllanguagecomics.tumblr.com
4578
    # Also on http://www.gocomics.com/fowl-language
4579
    name = 'fowllanguage-tapa'
4580
    long_name = 'Fowl Language Comics (from Tapastic)'
4581
    url = 'http://tapastic.com/series/Fowl-Language-Comics'
4582
    _categories = ('FOWLLANGUAGE', )
4583
4584
4585
class OscillatingProfundities(GenericTapasticComic):
4586
    """Class to retrieve Oscillating Profundities comics."""
4587
    name = 'oscillating'
4588
    long_name = 'Oscillating Profundities'
4589
    url = 'http://tapastic.com/series/oscillatingprofundities'
4590
4591
4592
class ZnoflatsComics(GenericTapasticComic):
4593
    """Class to retrieve Znoflats comics."""
4594
    name = 'znoflats'
4595
    long_name = 'Znoflats Comics'
4596
    url = 'http://tapastic.com/series/Znoflats-Comics'
4597
4598
4599
class SandersenTapastic(GenericTapasticComic):
4600
    """Class to retrieve Sarah Andersen comics."""
4601
    # Also on http://sarahcandersen.com
4602
    # Also on http://www.gocomics.com/sarahs-scribbles
4603
    name = 'sandersen-tapa'
4604
    long_name = 'Sarah Andersen (from Tapastic)'
4605
    url = 'http://tapastic.com/series/Doodle-Time'
4606
4607
4608
class TubeyToonsTapastic(GenericTapasticComic):
4609
    """Class to retrieve TubeyToons comics."""
4610
    # Also on http://tubeytoons.com
4611
    # Also on http://tubeytoons.tumblr.com
4612
    name = 'tubeytoons-tapa'
4613
    long_name = 'Tubey Toons (from Tapastic)'
4614
    url = 'http://tapastic.com/series/Tubey-Toons'
4615
    _categories = ('TUNEYTOONS', )
4616
4617
4618
class AnythingComicTapastic(GenericTapasticComic):
4619
    """Class to retrieve Anything Comics."""
4620
    # Also on http://www.anythingcomic.com
4621
    name = 'anythingcomic-tapa'
4622
    long_name = 'Anything Comic (from Tapastic)'
4623
    url = 'http://tapastic.com/series/anything'
4624
4625
4626
class UnearthedComicsTapastic(GenericTapasticComic):
4627
    """Class to retrieve Unearthed comics."""
4628
    # Also on http://unearthedcomics.com
4629
    # Also on http://unearthedcomics.tumblr.com
4630
    name = 'unearthed-tapa'
4631
    long_name = 'Unearthed Comics (from Tapastic)'
4632
    url = 'http://tapastic.com/series/UnearthedComics'
4633
    _categories = ('UNEARTHED', )
4634
4635
4636
class EverythingsStupidTapastic(GenericTapasticComic):
4637
    """Class to retrieve Everything's stupid Comics."""
4638
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
4639
    # Also on http://everythingsstupid.net
4640
    name = 'stupid-tapa'
4641
    long_name = "Everything's Stupid (from Tapastic)"
4642
    url = 'http://tapastic.com/series/EverythingsStupid'
4643
4644
4645
class JustSayEhTapastic(GenericTapasticComic):
4646
    """Class to retrieve Just Say Eh comics."""
4647
    # Also on http://www.justsayeh.com
4648
    name = 'justsayeh-tapa'
4649
    long_name = 'Just Say Eh (from Tapastic)'
4650
    url = 'http://tapastic.com/series/Just-Say-Eh'
4651
4652
4653
class ThorsThundershackTapastic(GenericTapasticComic):
4654
    """Class to retrieve Thor's Thundershack comics."""
4655
    # Also on http://www.thorsthundershack.com
4656
    name = 'thor-tapa'
4657
    long_name = 'Thor\'s Thundershack (from Tapastic)'
4658
    url = 'http://tapastic.com/series/Thors-Thundershac'
4659
    _categories = ('THOR', )
4660
4661
4662
class OwlTurdTapastic(GenericTapasticComic):
4663
    """Class to retrieve Owl Turd comics."""
4664
    # Also on http://owlturd.com
4665
    name = 'owlturd-tapa'
4666
    long_name = 'Owl Turd (from Tapastic)'
4667
    url = 'http://tapastic.com/series/Owl-Turd-Comix'
4668
    _categories = ('OWLTURD', )
4669
4670
4671
class GoneIntoRaptureTapastic(GenericTapasticComic):
4672
    """Class to retrieve Gone Into Rapture comics."""
4673
    # Also on http://goneintorapture.tumblr.com
4674
    # Also on http://www.goneintorapture.com
4675
    name = 'rapture-tapa'
4676
    long_name = 'Gone Into Rapture (from Tapastic)'
4677
    url = 'http://tapastic.com/series/Goneintorapture'
4678
4679
4680
class HeckIfIKnowComicsTapa(GenericTapasticComic):
4681
    """Class to retrieve Heck If I Know Comics."""
4682
    # Also on http://heckifiknowcomics.com
4683
    name = 'heck-tapa'
4684
    long_name = 'Heck if I Know comics (from Tapastic)'
4685
    url = 'http://tapastic.com/series/Regular'
4686
4687
4688
class CheerUpEmoKidTapa(GenericTapasticComic):
4689
    """Class to retrieve CheerUpEmoKid comics."""
4690
    # Also on http://www.cheerupemokid.com
4691
    # Also on http://enzocomics.tumblr.com
4692
    name = 'cuek-tapa'
4693
    long_name = 'Cheer Up Emo Kid (from Tapastic)'
4694
    url = 'http://tapastic.com/series/CUEK'
4695
4696
4697
class BigFootJusticeTapa(GenericTapasticComic):
4698
    """Class to retrieve Big Foot Justice comics."""
4699
    # Also on http://bigfootjustice.com
4700
    name = 'bigfoot-tapa'
4701
    long_name = 'Big Foot Justice (from Tapastic)'
4702
    url = 'http://tapastic.com/series/bigfoot-justice'
4703
4704
4705
class UpAndOutTapa(GenericTapasticComic):
4706
    """Class to retrieve Up & Out comics."""
4707
    # Also on http://upandoutcomic.tumblr.com
4708
    name = 'upandout-tapa'
4709
    long_name = 'Up And Out (from Tapastic)'
4710
    url = 'http://tapastic.com/series/UP-and-OUT'
4711
4712
4713
class ToonHoleTapa(GenericTapasticComic):
4714
    """Class to retrieve Toon Holes comics."""
4715
    # Also on http://www.toonhole.com
4716
    name = 'toonhole-tapa'
4717
    long_name = 'Toon Hole (from Tapastic)'
4718
    url = 'http://tapastic.com/series/TOONHOLE'
4719
4720
4721
class AngryAtNothingTapa(GenericTapasticComic):
4722
    """Class to retrieve Angry at Nothing comics."""
4723
    # Also on http://www.angryatnothing.net
4724
    name = 'angry-tapa'
4725
    long_name = 'Angry At Nothing (from Tapastic)'
4726
    url = 'http://tapastic.com/series/Comics-yeah-definitely-comics-'
4727
4728
4729
class LeleozTapa(GenericTapasticComic):
4730
    """Class to retrieve Leleoz comics."""
4731
    # Also on http://leleozcomics.tumblr.com
4732
    name = 'leleoz-tapa'
4733
    long_name = 'Leleoz (from Tapastic)'
4734
    url = 'https://tapastic.com/series/Leleoz'
4735
4736
4737
class TheAwkwardYetiTapa(GenericTapasticComic):
4738
    """Class to retrieve The Awkward Yeti comics."""
4739
    # Also on http://www.gocomics.com/the-awkward-yeti
4740
    # Also on http://theawkwardyeti.com
4741
    # Also on http://larstheyeti.tumblr.com
4742
    name = 'yeti-tapa'
4743
    long_name = 'The Awkward Yeti (from Tapastic)'
4744
    url = 'https://tapastic.com/series/TheAwkwardYeti'
4745
    _categories = ('YETI', )
4746
4747
4748
class AsPerUsualTapa(GenericTapasticComic):
4749
    """Class to retrieve As Per Usual comics."""
4750
    # Also on http://as-per-usual.tumblr.com
4751
    name = 'usual-tapa'
4752
    long_name = 'As Per Usual (from Tapastic)'
4753
    url = 'https://tapastic.com/series/AsPerUsual'
4754
    categories = ('DAMILEE', )
4755
4756
4757
class HotComicsForCoolPeopleTapa(GenericTapasticComic):
4758
    """Class to retrieve Hot Comics For Cool People."""
4759
    # Also on http://hotcomicsforcoolpeople.tumblr.com
4760
    # Also on http://hotcomics.biz (links to tumblr)
4761
    # Also on http://hcfcp.com (links to tumblr)
4762
    name = 'hotcomics-tapa'
4763
    long_name = 'Hot Comics For Cool People (from Tapastic)'
4764
    url = 'https://tapastic.com/series/Hot-Comics-For-Cool-People'
4765
    categories = ('DAMILEE', )
4766
4767
4768
class OneOneOneOneComicTapa(GenericTapasticComic):
4769
    """Class to retrieve 1111 Comics."""
4770
    # Also on http://www.1111comics.me
4771
    # Also on http://comics1111.tumblr.com
4772
    name = '1111-tapa'
4773
    long_name = '1111 Comics (from Tapastic)'
4774
    url = 'https://tapastic.com/series/1111-Comics'
4775
    _categories = ('ONEONEONEONE', )
4776
4777
4778
class TumbleDryTapa(GenericTapasticComic):
4779
    """Class to retrieve Tumble Dry comics."""
4780
    # Also on http://tumbledrycomics.com
4781
    name = 'tumbledry-tapa'
4782
    long_name = 'Tumblr Dry (from Tapastic)'
4783
    url = 'https://tapastic.com/series/TumbleDryComics'
4784
4785
4786
class DeadlyPanelTapa(GenericTapasticComic):
4787
    """Class to retrieve Deadly Panel comics."""
4788
    # Also on http://www.deadlypanel.com
4789
    name = 'deadly-tapa'
4790
    long_name = 'Deadly Panel (from Tapastic)'
4791
    url = 'https://tapastic.com/series/deadlypanel'
4792
4793
4794
class ChrisHallbeckMaxiTapa(GenericTapasticComic):
4795
    """Class to retrieve Chris Hallbeck comics."""
4796
    # Also on http://chrishallbeck.tumblr.com
4797
    # Also on http://maximumble.com
4798
    name = 'hallbeckmaxi-tapa'
4799
    long_name = 'Chris Hallback - Maximumble (from Tapastic)'
4800
    url = 'https://tapastic.com/series/Maximumble'
4801
    _categories = ('HALLBACK', )
4802
4803
4804
class ChrisHallbeckMiniTapa(GenericTapasticComic):
4805
    """Class to retrieve Chris Hallbeck comics."""
4806
    # Also on http://chrishallbeck.tumblr.com
4807
    # Also on http://minimumble.com
4808
    name = 'hallbeckmini-tapa'
4809
    long_name = 'Chris Hallback - Minimumble (from Tapastic)'
4810
    url = 'https://tapastic.com/series/Minimumble'
4811
    _categories = ('HALLBACK', )
4812
4813
4814
class ChrisHallbeckBiffTapa(GenericTapasticComic):
4815
    """Class to retrieve Chris Hallbeck comics."""
4816
    # Also on http://chrishallbeck.tumblr.com
4817
    # Also on http://thebookofbiff.com
4818
    name = 'hallbeckbiff-tapa'
4819
    long_name = 'Chris Hallback - The Book of Biff (from Tapastic)'
4820
    url = 'https://tapastic.com/series/Biff'
4821
    _categories = ('HALLBACK', )
4822
4823
4824
class RandoWisTapa(GenericTapasticComic):
4825
    """Class to retrieve RandoWis comics."""
4826
    # Also on https://randowis.com
4827
    name = 'randowis-tapa'
4828
    long_name = 'RandoWis (from Tapastic)'
4829
    url = 'https://tapastic.com/series/RandoWis'
4830
4831
4832
class PigeonGazetteTapa(GenericTapasticComic):
4833
    """Class to retrieve The Pigeon Gazette comics."""
4834
    # Also on http://thepigeongazette.tumblr.com
4835
    name = 'pigeon-tapa'
4836
    long_name = 'The Pigeon Gazette (from Tapastic)'
4837
    url = 'https://tapastic.com/series/The-Pigeon-Gazette'
4838
4839
4840
class TheOdd1sOutTapa(GenericTapasticComic):
4841
    """Class to retrieve The Odd 1s Out comics."""
4842
    # Also on http://theodd1sout.com
4843
    # Also on http://theodd1sout.tumblr.com
4844
    name = 'theodd-tapa'
4845
    long_name = 'The Odd 1s Out (from Tapastic)'
4846
    url = 'https://tapastic.com/series/Theodd1sout'
4847
4848
4849
class TheWorldIsFlatTapa(GenericTapasticComic):
4850
    """Class to retrieve The World Is Flat Comics."""
4851
    # Also on http://theworldisflatcomics.tumblr.com
4852
    name = 'flatworld-tapa'
4853
    long_name = 'The World Is Flat (from Tapastic)'
4854
    url = 'https://tapastic.com/series/The-World-is-Flat'
4855
4856
4857
class MisterAndMeTapa(GenericTapasticComic):
4858
    """Class to retrieve Mister & Me Comics."""
4859
    # Also on http://www.mister-and-me.com
4860
    # Also on http://www.gocomics.com/mister-and-me
4861
    name = 'mister-tapa'
4862
    long_name = 'Mister & Me (from Tapastic)'
4863
    url = 'https://tapastic.com/series/Mister-and-Me'
4864
4865
4866
class TalesOfAbsurdityTapa(GenericTapasticComic):
4867
    """Class to retrieve Tales Of Absurdity comics."""
4868
    # Also on http://talesofabsurdity.com
4869
    # Also on http://talesofabsurdity.tumblr.com
4870
    name = 'absurdity-tapa'
4871
    long_name = 'Tales of Absurdity (from Tapastic)'
4872
    url = 'http://tapastic.com/series/Tales-Of-Absurdity'
4873
    _categories = ('ABSURDITY', )
4874
4875
4876
class BFGFSTapa(GenericTapasticComic):
4877
    """Class to retrieve BFGFS comics."""
4878
    # Also on http://bfgfs.com
4879
    # Also on http://bfgfs.tumblr.com
4880
    name = 'bfgfs-tapa'
4881
    long_name = 'BFGFS (from Tapastic)'
4882
    url = 'https://tapastic.com/series/BFGFS'
4883
4884
4885
class DoodleForFoodTapa(GenericTapasticComic):
4886
    """Class to retrieve Doodle For Food comics."""
4887
    # Also on http://doodleforfood.com
4888
    name = 'doodle-tapa'
4889
    long_name = 'Doodle For Food (from Tapastic)'
4890
    url = 'https://tapastic.com/series/Doodle-for-Food'
4891
4892
4893
class MrLovensteinTapa(GenericTapasticComic):
4894
    """Class to retrieve Mr Lovenstein comics."""
4895
    # Also on  https://tapastic.com/series/MrLovenstein
4896
    name = 'mrlovenstein-tapa'
4897
    long_name = 'Mr. Lovenstein (from Tapastic)'
4898
    url = 'https://tapastic.com/series/MrLovenstein'
4899
4900
4901
class CassandraCalinTapa(GenericTapasticComic):
4902
    """Class to retrieve C. Cassandra comics."""
4903
    # Also on http://cassandracalin.com
4904
    # Also on http://c-cassandra.tumblr.com
4905
    name = 'cassandra-tapa'
4906
    long_name = 'Cassandra Calin (from Tapastic)'
4907
    url = 'https://tapastic.com/series/C-Cassandra-comics'
4908
4909
4910
class WafflesAndPancakes(GenericTapasticComic):
4911
    """Class to retrieve Waffles And Pancakes comics."""
4912
    # Also on http://wandpcomic.com
4913
    name = 'waffles'
4914
    long_name = 'Waffles And Pancakes'
4915
    url = 'https://tapastic.com/series/Waffles-and-Pancakes'
4916
4917
4918
class YesterdaysPopcornTapastic(GenericTapasticComic):
4919
    """Class to retrieve Yesterday's Popcorn comics."""
4920
    # Also on http://www.yesterdayspopcorn.com
4921
    # Also on http://yesterdayspopcorn.tumblr.com
4922
    name = 'popcorn-tapa'
4923
    long_name = 'Yesterday\'s Popcorn (from Tapastic)'
4924
    url = 'https://tapastic.com/series/Yesterdays-Popcorn'
4925
4926
4927
class OurSuperAdventureTapastic(GenericTapasticComic):
4928
    """Class to retrieve Our Super Adventure comics."""
4929
    # Also on http://www.oursuperadventure.com
4930
    # http://sarahssketchbook.tumblr.com
4931
    # http://sarahgraley.com
4932
    name = 'superadventure-tapastic'
4933
    long_name = 'Our Super Adventure (from Tapastic)'
4934
    url = 'https://tapastic.com/series/Our-Super-Adventure'
4935
4936
4937
class NamelessPCs(GenericTapasticComic):
4938
    """Class to retrieve Nameless PCs comics."""
4939
    # Also on http://namelesspcs.com
4940
    name = 'namelesspcs-tapa'
4941
    long_name = 'NamelessPCs (from Tapastic)'
4942
    url = 'https://tapastic.com/series/NamelessPC'
4943
4944
4945
class UbertoolTapa(GenericTapasticComic):
4946
    """Class to retrieve Ubertool comics."""
4947
    # Also on http://ubertoolcomic.com
4948
    # Also on http://ubertool.tumblr.com
4949
    name = 'ubertool-tapa'
4950
    long_name = 'Ubertool (from Tapastic)'
4951
    url = 'https://tapastic.com/series/ubertool'
4952
    _categories = ('UBERTOOL', )
4953
4954
4955
class SmallBlueYonderTapa(GenericTapasticComic):
4956
    """Class to retrieve Small Blue Yonder comics."""
4957
    # Also on http://www.smallblueyonder.com
4958
    name = 'smallblue-tapa'
4959
    long_name = 'Small Blue Yonder (from Tapastic)'
4960
    url = 'https://tapastic.com/series/Small-Blue-Yonder'
4961
4962
4963
class TizzyStitchBirdTapa(GenericTapasticComic):
4964
    """Class to retrieve Tizzy Stitch Bird comics."""
4965
    # Also on http://tizzystitchbird.com
4966
    # Also on http://tizzystitchbird.tumblr.com
4967
    # Also on http://www.webtoons.com/en/challenge/tizzy-stitchbird/list?title_no=50082
4968
    name = 'tizzy-tapa'
4969
    long_name = 'Tizzy Stitch Bird (from Tapastic)'
4970
    url = 'https://tapastic.com/series/TizzyStitchbird'
4971
4972
4973
def get_subclasses(klass):
4974
    """Gets the list of direct/indirect subclasses of a class"""
4975
    subclasses = klass.__subclasses__()
4976
    for derived in list(subclasses):
4977
        subclasses.extend(get_subclasses(derived))
4978
    return subclasses
4979
4980
4981
def remove_st_nd_rd_th_from_date(string):
4982
    """Function to transform 1st/2nd/3rd/4th in a parsable date format."""
4983
    # Hackish way to convert string with numeral "1st"/"2nd"/etc to date
4984
    return (string.replace('st', '')
4985
            .replace('nd', '')
4986
            .replace('rd', '')
4987
            .replace('th', '')
4988
            .replace('Augu', 'August'))
4989
4990
4991
def string_to_date(string, date_format, local=DEFAULT_LOCAL):
4992
    """Function to convert string to date object.
4993
    Wrapper around datetime.datetime.strptime."""
4994
    # format described in https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior
4995
    prev_locale = locale.setlocale(locale.LC_ALL)
4996
    if local != prev_locale:
4997
        locale.setlocale(locale.LC_ALL, local)
4998
    ret = datetime.datetime.strptime(string, date_format).date()
4999
    if local != prev_locale:
5000
        locale.setlocale(locale.LC_ALL, prev_locale)
5001
    return ret
5002
5003
5004
COMICS = set(get_subclasses(GenericComic))
5005
VALID_COMICS = [c for c in COMICS if c.name is not None]
5006
COMIC_NAMES = {c.name: c for c in VALID_COMICS}
5007
assert len(VALID_COMICS) == len(COMIC_NAMES)
5008
CLASS_NAMES = {c.__name__ for c in VALID_COMICS}
5009
assert len(VALID_COMICS) == len(CLASS_NAMES)
5010