Completed
Push — master ( b54672...5b477e )
by De
01:11
created

comics.py (26 issues)

1
#! /usr/bin/python3
2
# vim: set expandtab tabstop=4 shiftwidth=4 :
3
"""Module to retrieve webcomics"""
4
5
from comic_abstract import GenericComic, get_date_for_comic
6
import re
7
from datetime import date, timedelta
8
import datetime
9
from urlfunctions import get_soup_at_url, urljoin_wrapper,\
10
    convert_iri_to_plain_ascii_uri, load_json_at_url, urlopen_wrapper
11
import json
12
import locale
13
import urllib
14
15
DEFAULT_LOCAL = 'en_GB.UTF-8'
16
17
18
class Xkcd(GenericComic):
19
    """Class to retrieve Xkcd comics."""
20
    name = 'xkcd'
21
    long_name = 'xkcd'
22
    url = 'http://xkcd.com'
23
24
    @classmethod
25
    def get_next_comic(cls, last_comic):
26
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
27
        first_num = last_comic['num'] if last_comic else 0
28
        last_num = load_json_at_url(
29
            urljoin_wrapper(cls.url, 'info.0.json'))['num']
30
31
        for num in range(first_num + 1, last_num + 1):
32
            if num != 404:
33
                json_url = urljoin_wrapper(cls.url, '%d/info.0.json' % num)
34
                comic = load_json_at_url(json_url)
35
                comic['img'] = [comic['img']]
36
                comic['prefix'] = '%d-' % num
37
                comic['json_url'] = json_url
38
                comic['url'] = urljoin_wrapper(cls.url, str(num))
39
                comic['day'] = int(comic['day'])
40
                comic['month'] = int(comic['month'])
41
                comic['year'] = int(comic['year'])
42
                assert comic['num'] == num
43
                yield comic
44
45
46
# Helper functions corresponding to get_url_from_link/get_url_from_archive_element
47
48
49
@classmethod
50
def get_href(cls, link):
51
    """Implementation of get_url_from_link/get_url_from_archive_element."""
52
    return link['href']
53
54
55
@classmethod
56
def join_cls_url_to_href(cls, link):
57
    """Implementation of get_url_from_link/get_url_from_archive_element."""
58
    return urljoin_wrapper(cls.url, link['href'])
59
60
61
class GenericNavigableComic(GenericComic):
62
    """Generic class for "navigable" comics : with first/next arrows.
63
64
    This class applies to comic where previous and next comics can be
65
    accessed from a given comic. Once given a starting point (either
66
    the first comic or the last comic retrieved), it will handle the
67
    navigation, the retrieval of the soup object and the setting of
68
    the 'url' attribute on retrieved comics. This limits a lot the
69
    amount of boilerplate code in the different implementation classes.
70
71
    The method `get_next_comic` methods is implemented in terms of new
72
    more specialized methods to be implemented/overridden:
73
        - get_first_comic_link
74
        - get_navi_link
75
        - get_comic_info
76
        - get_url_from_link
77
    """
78
    _categories = ('NAVIGABLE', )
79
80
    @classmethod
81
    def get_first_comic_link(cls):
82
        """Get link to first comics.
83
84
        Sometimes this can be retrieved of any comic page, sometimes on
85
        the archive page, sometimes it doesn't exist at all and one has
86
        to iterate backward to find it before hardcoding the result found.
87
        """
88
        raise NotImplementedError
89
90
    @classmethod
91
    def get_navi_link(cls, last_soup, next_):
92
        """Get link to next (or previous - for dev purposes) comic."""
93
        raise NotImplementedError
94
95
    @classmethod
96
    def get_comic_info(cls, soup, link):
97
        """Get information about a particular comics."""
98
        raise NotImplementedError
99
100
    @classmethod
101
    def get_url_from_link(cls, link):
102
        """Get url corresponding to a link. Default implementation is similar to get_href."""
103
        return link['href']
104
105
    @classmethod
106
    def get_next_link(cls, last_soup):
107
        """Get link to next comic."""
108
        link = cls.get_navi_link(last_soup, True)
109
        cls.log("Next link is %s" % link)
110
        return link
111
112
    @classmethod
113
    def get_prev_link(cls, last_soup):
114
        """Get link to previous comic."""
115
        link = cls.get_navi_link(last_soup, False)
116
        cls.log("Prev link is %s" % link)
117
        return link
118
119
    @classmethod
120
    def get_next_comic(cls, last_comic):
121
        """Generic implementation of get_next_comic for navigable comics."""
122
        url = last_comic['url'] if last_comic else None
123
        cls.log("starting 'get_next_comic' from %s" % url)
124
        next_comic = \
125
            cls.get_next_link(get_soup_at_url(url)) \
126
            if url else \
127
            cls.get_first_comic_link()
128
        cls.log("next/first comic will be %s (url is %s)" % (str(next_comic), url))
129
        # cls.check_navigation(url)
130
        while next_comic:
131
            prev_url, url = url, cls.get_url_from_link(next_comic)
132
            if prev_url == url:
133
                cls.log("got same url %s" % url)
134
                break
135
            cls.log("about to get %s (%s)" % (url, str(next_comic)))
136
            soup = get_soup_at_url(url)
137
            comic = cls.get_comic_info(soup, next_comic)
138
            if comic is not None:
139
                assert 'url' not in comic
140
                comic['url'] = url
141
                yield comic
142
            next_comic = cls.get_next_link(soup)
143
            cls.log("next comic will be %s" % str(next_comic))
144
145
    @classmethod
146
    def check_first_link(cls):
147
        """Check that navigation to first comic seems to be working - for dev purposes."""
148
        cls.log("about to check first link")
149
        ok = True
150
        firstlink = cls.get_first_comic_link()
151
        if firstlink is None:
152
            print("From %s : no first link" % cls.url)
153
            ok = False
154
        else:
155
            firsturl = cls.get_url_from_link(firstlink)
156
            try:
157
                get_soup_at_url(firsturl)
158
            except urllib.error.HTTPError:
159
                print("From %s : invalid first url" % cls.url)
160
                ok = False
161
        cls.log("checked first link -> returned %d" % ok)
162
        return ok
163
164
    @classmethod
165
    def check_prev_next_links(cls, url):
166
        """Check that navigation to prev/next from a given URL seems to be working - for dev purposes."""
167
        cls.log("about to check prev/next from %s" % url)
168
        ok = True
169
        if url is None:
170
            prevlink, nextlink = None, None
171
        else:
172
            soup = get_soup_at_url(url)
173
            prevlink, nextlink = cls.get_prev_link(soup), cls.get_next_link(soup)
174
        if prevlink is None and nextlink is None:
175
            print("From %s : no previous nor next" % url)
176
            ok = False
177
        else:
178
            if prevlink:
179
                prevurl = cls.get_url_from_link(prevlink)
180
                prevsoup = get_soup_at_url(prevurl)
181
                prevnextlink = cls.get_next_link(prevsoup)
182
                prevnext = cls.get_url_from_link(prevnextlink) if prevnextlink is not None else "NO URL"
183
                if prevnext != url:
184
                    print("From %s, going backward then forward leads to %s" % (url, prevnext))
185
                    ok = False
186
            if nextlink:
187
                nexturl = cls.get_url_from_link(nextlink)
188
                if nexturl != url:
189
                    nextsoup = get_soup_at_url(nexturl)
190
                    nextprevlink = cls.get_prev_link(nextsoup)
191
                    nextprev = cls.get_url_from_link(nextprevlink) if nextprevlink is not None else "NO URL"
192
                    if nextprev != url:
193
                        print("From %s, going forward then backward leads to %s" % (url, nextprev))
194
                        ok = False
195
        cls.log("checked prev/next from %s -> returned %d" % (url, ok))
196
        return ok
197
198
    @classmethod
199
    def check_navigation(cls, url):
200
        """Check that navigation functions seem to be working - for dev purposes."""
201
        cls.log("about to check navigation from %s" % url)
202
        first = cls.check_first_link()
203
        prevnext = cls.check_prev_next_links(url)
204
        ok = first and prevnext
205
        cls.log("checked navigation from %s -> returned %d" % (url, ok))
206
        return ok
207
208
209
class GenericListableComic(GenericComic):
210
    """Generic class for "listable" comics : with a list of comics (aka 'archive')
211
212
    The method `get_next_comic` methods is implemented in terms of new
213
    more specialized methods to be implemented/overridden:
214
        - get_archive_elements
215
        - get_url_from_archive_element
216
        - get_comic_info
217
    """
218
    _categories = ('LISTABLE', )
219
220
    @classmethod
221
    def get_archive_elements(cls):
222
        """Get the archive elements (iterable)."""
223
        raise NotImplementedError
224
225
    @classmethod
226
    def get_url_from_archive_element(cls, archive_elt):
227
        """Get url corresponding to an archive element."""
228
        raise NotImplementedError
229
230
    @classmethod
231
    def get_comic_info(cls, soup, archive_elt):
232
        """Get information about a particular comics."""
233
        raise NotImplementedError
234
235
    @classmethod
236
    def get_next_comic(cls, last_comic):
237
        """Generic implementation of get_next_comic for listable comics."""
238
        waiting_for_url = last_comic['url'] if last_comic else None
239
        for archive_elt in cls.get_archive_elements():
240
            url = cls.get_url_from_archive_element(archive_elt)
241
            cls.log("considering %s" % url)
242
            if waiting_for_url is None:
243
                cls.log("about to get %s (%s)" % (url, str(archive_elt)))
244
                soup = get_soup_at_url(url)
245
                comic = cls.get_comic_info(soup, archive_elt)
246
                if comic is not None:
247
                    assert 'url' not in comic
248
                    comic['url'] = url
249
                    yield comic
250
            elif waiting_for_url == url:
251
                waiting_for_url = None
252
        if waiting_for_url is not None:
253
            print("Did not find %s : there might be a problem" % waiting_for_url)
254
255
# Helper functions corresponding to get_first_comic_link/get_navi_link
256
257
258
@classmethod
259
def get_link_rel_next(cls, last_soup, next_):
260
    """Implementation of get_navi_link."""
261
    return last_soup.find('link', rel='next' if next_ else 'prev')
262
263
264
@classmethod
265
def get_a_rel_next(cls, last_soup, next_):
266
    """Implementation of get_navi_link."""
267
    return last_soup.find('a', rel='next' if next_ else 'prev')
268
269
270
@classmethod
271
def get_a_navi_navinext(cls, last_soup, next_):
272
    """Implementation of get_navi_link."""
273
    return last_soup.find('a', class_='navi navi-next' if next_ else 'navi navi-prev')
274
275
276
@classmethod
277
def get_a_navi_comicnavnext_navinext(cls, last_soup, next_):
278
    """Implementation of get_navi_link."""
279
    return last_soup.find('a', class_='navi comic-nav-next navi-next' if next_ else 'navi comic-nav-previous navi-prev')
280
281
282
@classmethod
283
def get_a_comicnavbase_comicnavnext(cls, last_soup, next_):
284
    """Implementation of get_navi_link."""
285
    return last_soup.find('a', class_='comic-nav-base comic-nav-next' if next_ else 'comic-nav-base comic-nav-previous')
286
287
288
@classmethod
289
def get_a_navi_navifirst(cls):
290
    """Implementation of get_first_comic_link."""
291
    return get_soup_at_url(cls.url).find('a', class_='navi navi-first')
292
293
294
@classmethod
295
def get_div_navfirst_a(cls):
296
    """Implementation of get_first_comic_link."""
297
    return get_soup_at_url(cls.url).find('div', class_="nav-first").find('a')
298
299
300
@classmethod
301
def get_a_comicnavbase_comicnavfirst(cls):
302
    """Implementation of get_first_comic_link."""
303
    return get_soup_at_url(cls.url).find('a', class_='comic-nav-base comic-nav-first')
304
305
306
@classmethod
307
def simulate_first_link(cls):
308
    """Implementation of get_first_comic_link creating a link-like object from
309
    an URL provided by the class.
310
311
    Note: The first URL can easily be found using :
312
    `get_first_comic_link = navigate_to_first_comic`.
313
    """
314
    return {'href': cls.first_url}
315
316
317
@classmethod
318
def navigate_to_first_comic(cls):
319
    """Implementation of get_first_comic_link navigating from a user provided
320
    URL to the first comic.
321
322
    Sometimes, the first comic cannot be reached directly so to start
323
    from the first comic one has to go to the previous comic until
324
    there is no previous comics. Once this URL is reached, it
325
    is better to hardcode it but for development purposes, it
326
    is convenient to have an automatic way to find it.
327
328
    Then, the URL found can easily be used via `simulate_first_link`.
329
    """
330
    url = input("Get starting URL: ")
331
    print(url)
332
    comic = cls.get_prev_link(get_soup_at_url(url))
333
    while comic:
334
        url = cls.get_url_from_link(comic)
335
        print(url)
336
        comic = cls.get_prev_link(get_soup_at_url(url))
337
    return {'href': url}
338
339
340
class GenericEmptyComic(GenericComic):
341
    """Generic class for comics where nothing is to be done.
342
343
    It can be useful to deactivate temporarily comics that do not work
344
    properly by replacing `def MyComic(GenericWhateverComic)` with
345
    `def MyComic(GenericEmptyComic, GenericWhateverComic)`."""
346
    _categories = ('EMPTY', )
347
348
    @classmethod
349
    def get_next_comic(cls, last_comic):
350
        """Implementation of get_next_comic returning no comics."""
351
        cls.log("comic is considered as empty - returning no comic")
352
        return []
353
354
355 View Code Duplication
class ExtraFabulousComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
356
    """Class to retrieve Extra Fabulous Comics."""
357
    name = 'efc'
358
    long_name = 'Extra Fabulous Comics'
359
    url = 'http://extrafabulouscomics.com'
360
    get_first_comic_link = get_a_navi_navifirst
361
    get_navi_link = get_link_rel_next
362
363
    @classmethod
364
    def get_comic_info(cls, soup, link):
365
        """Get information about a particular comics."""
366
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
367
        imgs = soup.find_all('img', src=img_src_re)
368
        title = soup.find('meta', property='og:title')['content']
369
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
370
        day = string_to_date(date_str, "%Y-%m-%d")
371
        return {
372
            'title': title,
373
            'img': [i['src'] for i in imgs],
374
            'month': day.month,
375
            'year': day.year,
376
            'day': day.day,
377
            'prefix': title + '-'
378
        }
379
380
381 View Code Duplication
class GenericLeMondeBlog(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
382
    """Generic class to retrieve comics from Le Monde blogs."""
383
    _categories = ('LEMONDE', 'FRANCAIS')
384
    get_navi_link = get_link_rel_next
385
    get_first_comic_link = simulate_first_link
386
    first_url = NotImplemented
387
388
    @classmethod
389
    def get_comic_info(cls, soup, link):
390
        """Get information about a particular comics."""
391
        url2 = soup.find('link', rel='shortlink')['href']
392
        title = soup.find('meta', property='og:title')['content']
393
        date_str = soup.find("span", class_="entry-date").string
394
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
395
        imgs = soup.find_all('meta', property='og:image')
396
        return {
397
            'title': title,
398
            'url2': url2,
399
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
400
            'month': day.month,
401
            'year': day.year,
402
            'day': day.day,
403
        }
404
405
406
class ZepWorld(GenericLeMondeBlog):
407
    """Class to retrieve Zep World comics."""
408
    name = "zep"
409
    long_name = "Zep World"
410
    url = "http://zepworld.blog.lemonde.fr"
411
    first_url = "http://zepworld.blog.lemonde.fr/2014/10/31/bientot-le-blog-de-zep/"
412
413
414
class Vidberg(GenericLeMondeBlog):
415
    """Class to retrieve Vidberg comics."""
416
    name = 'vidberg'
417
    long_name = "Vidberg - l'actu en patates"
418
    url = "http://vidberg.blog.lemonde.fr"
419
    # Not the first but I didn't find an efficient way to retrieve it
420
    first_url = "http://vidberg.blog.lemonde.fr/2012/02/09/revue-de-campagne-la-campagne-du-modem-semballe/"
421
422
423
class Plantu(GenericLeMondeBlog):
424
    """Class to retrieve Plantu comics."""
425
    name = 'plantu'
426
    long_name = "Plantu"
427
    url = "http://plantu.blog.lemonde.fr"
428
    first_url = "http://plantu.blog.lemonde.fr/2014/10/28/stress-test-a-bruxelles/"
429
430
431
class XavierGorce(GenericLeMondeBlog):
432
    """Class to retrieve Xavier Gorce comics."""
433
    name = 'gorce'
434
    long_name = "Xavier Gorce"
435
    url = "http://xaviergorce.blog.lemonde.fr"
436
    first_url = "http://xaviergorce.blog.lemonde.fr/2015/01/09/distinction/"
437
438
439
class CartooningForPeace(GenericLeMondeBlog):
440
    """Class to retrieve Cartooning For Peace comics."""
441
    name = 'forpeace'
442
    long_name = "Cartooning For Peace"
443
    url = "http://cartooningforpeace.blog.lemonde.fr"
444
    first_url = "http://cartooningforpeace.blog.lemonde.fr/2014/12/15/bado/"
445
446
447
class Aurel(GenericLeMondeBlog):
448
    """Class to retrieve Aurel comics."""
449
    name = 'aurel'
450
    long_name = "Aurel"
451
    url = "http://aurel.blog.lemonde.fr"
452
    first_url = "http://aurel.blog.lemonde.fr/2014/09/29/le-senat-repasse-a-droite/"
453
454
455
class LesCulottees(GenericLeMondeBlog):
456
    """Class to retrieve Les Culottees comics."""
457
    name = 'culottees'
458
    long_name = 'Les Culottees'
459
    url = "http://lesculottees.blog.lemonde.fr"
460
    first_url = "http://lesculottees.blog.lemonde.fr/2016/01/11/clementine-delait-femme-a-barbe/"
461
462
463
class UneAnneeAuLycee(GenericLeMondeBlog):
464
    """Class to retrieve Une Annee Au Lycee comics."""
465
    name = 'lycee'
466
    long_name = 'Une Annee au Lycee'
467
    url = 'http://uneanneeaulycee.blog.lemonde.fr'
468
    first_url = "http://uneanneeaulycee.blog.lemonde.fr/2016/06/13/la-semaine-du-bac-est-arrivee/"
469
470
471 View Code Duplication
class Rall(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
472
    """Class to retrieve Ted Rall comics."""
473
    # Also on http://www.gocomics.com/tedrall
474
    name = 'rall'
475
    long_name = "Ted Rall"
476
    url = "http://rall.com/comic"
477
    _categories = ('RALL', )
478
    get_navi_link = get_link_rel_next
479
    get_first_comic_link = simulate_first_link
480
    # Not the first but I didn't find an efficient way to retrieve it
481
    first_url = "http://rall.com/2014/01/30/los-angeles-times-cartoon-well-miss-those-california-flowers"
482
483
    @classmethod
484
    def get_comic_info(cls, soup, link):
485
        """Get information about a particular comics."""
486
        title = soup.find('meta', property='og:title')['content']
487
        author = soup.find("span", class_="author vcard").find("a").string
488
        date_str = soup.find("span", class_="entry-date").string
489
        day = string_to_date(date_str, "%B %d, %Y")
490
        desc = soup.find('meta', property='og:description')['content']
491
        imgs = soup.find('div', class_='entry-content').find_all('img')
492
        imgs = imgs[:-7]  # remove social media buttons
493
        return {
494
            'title': title,
495
            'author': author,
496
            'month': day.month,
497
            'year': day.year,
498
            'day': day.day,
499
            'description': desc,
500
            'img': [i['src'] for i in imgs],
501
        }
502
503
504
class Dilem(GenericNavigableComic):
505
    """Class to retrieve Ali Dilem comics."""
506
    name = 'dilem'
507
    long_name = 'Ali Dilem'
508
    url = 'http://information.tv5monde.com/dilem'
509
    _categories = ('FRANCAIS', )
510
    get_url_from_link = join_cls_url_to_href
511
    get_first_comic_link = simulate_first_link
512
    first_url = "http://information.tv5monde.com/dilem/2004-06-26"
513
514
    @classmethod
515
    def get_navi_link(cls, last_soup, next_):
516
        """Get link to next or previous comic."""
517
        # prev is next / next is prev
518
        li = last_soup.find('li', class_='prev' if next_ else 'next')
519
        return li.find('a') if li else None
520
521 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
522
    def get_comic_info(cls, soup, link):
523
        """Get information about a particular comics."""
524
        short_url = soup.find('link', rel='shortlink')['href']
525
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
526
        imgs = soup.find_all('meta', property='og:image')
527
        date_str = soup.find('span', property='dc:date')['content']
528
        date_str = date_str[:10]
529
        day = string_to_date(date_str, "%Y-%m-%d")
530
        return {
531
            'short_url': short_url,
532
            'title': title,
533
            'img': [i['content'] for i in imgs],
534
            'day': day.day,
535
            'month': day.month,
536
            'year': day.year,
537
        }
538
539
540
class SpaceAvalanche(GenericNavigableComic):
541
    """Class to retrieve Space Avalanche comics."""
542
    name = 'avalanche'
543
    long_name = 'Space Avalanche'
544
    url = 'http://www.spaceavalanche.com'
545
    get_navi_link = get_link_rel_next
546
547
    @classmethod
548
    def get_first_comic_link(cls):
549
        """Get link to first comics."""
550
        return {'href': "http://www.spaceavalanche.com/2009/02/02/irish-sea/", 'title': "Irish Sea"}
551
552
    @classmethod
553
    def get_comic_info(cls, soup, link):
554
        """Get information about a particular comics."""
555
        url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
556
        title = link['title']
557
        url = cls.get_url_from_link(link)
558
        year, month, day = [int(s)
559
                            for s in url_date_re.match(url).groups()]
560
        imgs = soup.find("div", class_="entry").find_all("img")
561
        return {
562
            'title': title,
563
            'day': day,
564
            'month': month,
565
            'year': year,
566
            'img': [i['src'] for i in imgs],
567
        }
568
569
570
class ZenPencils(GenericNavigableComic):
571
    """Class to retrieve ZenPencils comics."""
572
    # Also on http://zenpencils.tumblr.com
573
    # Also on http://www.gocomics.com/zen-pencils
574
    name = 'zenpencils'
575
    long_name = 'Zen Pencils'
576
    url = 'http://zenpencils.com'
577
    _categories = ('ZENPENCILS', )
578
    get_navi_link = get_link_rel_next
579
    get_first_comic_link = simulate_first_link
580
    first_url = "http://zenpencils.com/comic/1-ralph-waldo-emerson-make-them-cry/"
581
582
    @classmethod
583
    def get_comic_info(cls, soup, link):
584
        """Get information about a particular comics."""
585
        imgs = soup.find('div', id='comic').find_all('img')
586
        # imgs2 = soup.find_all('meta', property='og:image')
587
        post = soup.find('div', class_='post-content')
588
        author = post.find("span", class_="post-author").find("a").string
589
        title = soup.find('meta', property='og:title')['content']
590
        date_str = post.find('span', class_='post-date').string
591
        day = string_to_date(date_str, "%B %d, %Y")
592
        assert imgs
593
        assert all(i['alt'] == i['title'] for i in imgs)
594
        assert all(i['alt'] in (title, "") for i in imgs)
595
        desc = soup.find('meta', property='og:description')['content']
596
        return {
597
            'title': title,
598
            'description': desc,
599
            'author': author,
600
            'day': day.day,
601
            'month': day.month,
602
            'year': day.year,
603
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
604
        }
605
606
607
class ItsTheTie(GenericNavigableComic):
608
    """Class to retrieve It's the tie comics."""
609
    # Also on http://itsthetie.tumblr.com
610
    # Also on https://tapastic.com/series/itsthetie
611
    name = 'tie'
612
    long_name = "It's the tie"
613
    url = "http://itsthetie.com"
614
    _categories = ('TIE', )
615
    get_first_comic_link = get_div_navfirst_a
616
    get_navi_link = get_a_rel_next
617
618
    @classmethod
619
    def get_comic_info(cls, soup, link):
620
        """Get information about a particular comics."""
621
        title = soup.find('h1', class_='comic-title').find('a').string
622
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
623
        day = string_to_date(date_str, "%B %d, %Y")
624
        # Bonus images may or may not be in meta og:image.
625
        imgs = soup.find_all('meta', property='og:image')
626
        imgs_src = [i['content'] for i in imgs]
627
        bonus = soup.find_all('img', attrs={'data-oversrc': True})
628
        bonus_src = [b['data-oversrc'] for b in bonus]
629
        all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src]
630
        all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")]
631
        tag_meta = soup.find('meta', property='article:tag')
632
        tags = tag_meta['content'] if tag_meta else ""
633
        return {
634
            'title': title,
635
            'month': day.month,
636
            'year': day.year,
637
            'day': day.day,
638
            'img': all_imgs_src,
639
            'tags': tags,
640
        }
641
642
643
class PenelopeBagieu(GenericNavigableComic):
644
    """Class to retrieve comics from Penelope Bagieu's blog."""
645 View Code Duplication
    name = 'bagieu'
646
    long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
647
    url = 'http://www.penelope-jolicoeur.com'
648
    _categories = ('FRANCAIS', )
649
    get_navi_link = get_link_rel_next
650
    get_first_comic_link = simulate_first_link
651
    first_url = 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'
652
653
    @classmethod
654
    def get_comic_info(cls, soup, link):
655
        """Get information about a particular comics."""
656
        date_str = soup.find('h2', class_='date-header').string
657
        day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
658
        imgs = soup.find('div', class_='entry-body').find_all('img')
659
        title = soup.find('h3', class_='entry-header').string
660
        return {
661
            'title': title,
662
            'img': [i['src'] for i in imgs],
663
            'month': day.month,
664
            'year': day.year,
665
            'day': day.day,
666
        }
667
668
669 View Code Duplication
class OneOneOneOneComic(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
670
    """Class to retrieve 1111 Comics."""
671
    # Also on http://comics1111.tumblr.com
672
    # Also on https://tapastic.com/series/1111-Comics
673
    name = '1111'
674
    long_name = '1111 Comics'
675
    url = 'http://www.1111comics.me'
676
    _categories = ('ONEONEONEONE', )
677
    get_first_comic_link = get_div_navfirst_a
678
    get_navi_link = get_link_rel_next
679
680
    @classmethod
681
    def get_comic_info(cls, soup, link):
682
        """Get information about a particular comics."""
683
        title = soup.find('h1', class_='comic-title').find('a').string
684
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
685
        day = string_to_date(date_str, "%B %d, %Y")
686
        imgs = soup.find_all('meta', property='og:image')
687
        return {
688
            'title': title,
689
            'month': day.month,
690
            'year': day.year,
691
            'day': day.day,
692
            'img': [i['content'] for i in imgs],
693
        }
694
695
696 View Code Duplication
class AngryAtNothing(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
697
    """Class to retrieve Angry at Nothing comics."""
698
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
699
    name = 'angry'
700
    long_name = 'Angry At Nothing'
701
    url = 'http://www.angryatnothing.net'
702
    get_first_comic_link = get_div_navfirst_a
703
    get_navi_link = get_a_rel_next
704
705
    @classmethod
706
    def get_comic_info(cls, soup, link):
707
        """Get information about a particular comics."""
708
        title = soup.find('h1', class_='comic-title').find('a').string
709
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
710
        day = string_to_date(date_str, "%B %d, %Y")
711
        imgs = soup.find_all('meta', property='og:image')
712
        return {
713
            'title': title,
714
            'month': day.month,
715
            'year': day.year,
716
            'day': day.day,
717
            'img': [i['content'] for i in imgs],
718
        }
719
720
721
class NeDroid(GenericNavigableComic):
722
    """Class to retrieve NeDroid comics."""
723
    name = 'nedroid'
724
    long_name = 'NeDroid'
725
    url = 'http://nedroid.com'
726
    get_first_comic_link = get_div_navfirst_a
727
    get_navi_link = get_link_rel_next
728
    get_url_from_link = join_cls_url_to_href
729
730
    @classmethod
731
    def get_comic_info(cls, soup, link):
732
        """Get information about a particular comics."""
733
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
734
        comic_url_re = re.compile('//nedroid.com/comics/([0-9]*)-([0-9]*)-([0-9]*).*')
735
        short_url = cls.get_url_from_link(soup.find('link', rel='shortlink'))
736
        num = int(short_url_re.match(short_url).groups()[0])
737
        imgs = soup.find('div', id='comic').find_all('img')
738
        year, month, day = [int(s) for s in comic_url_re.match(imgs[0]['src']).groups()]
739
        assert len(imgs) == 1
740
        title = imgs[0]['alt']
741
        title2 = imgs[0]['title']
742
        return {
743
            'short_url': short_url,
744
            'title': title,
745
            'title2': title2,
746
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
747
            'day': day,
748
            'month': month,
749
            'year': year,
750
            'num': num,
751
        }
752
753
754
class Garfield(GenericNavigableComic):
755
    """Class to retrieve Garfield comics."""
756
    # Also on http://www.gocomics.com/garfield
757
    name = 'garfield'
758 View Code Duplication
    long_name = 'Garfield'
759
    url = 'https://garfield.com'
760
    _categories = ('GARFIELD', )
761
    get_first_comic_link = simulate_first_link
762
    first_url = 'https://garfield.com/comic/1978/06/19'
763
764
    @classmethod
765
    def get_navi_link(cls, last_soup, next_):
766
        """Get link to next or previous comic."""
767
        return last_soup.find('a', class_='comic-arrow-right' if next_ else 'comic-arrow-left')
768
769
    @classmethod
770
    def get_comic_info(cls, soup, link):
771
        """Get information about a particular comics."""
772
        url = cls.get_url_from_link(link)
773
        date_re = re.compile('^%s/comic/([0-9]*)/([0-9]*)/([0-9]*)' % cls.url)
774
        year, month, day = [int(s) for s in date_re.match(url).groups()]
775
        imgs = soup.find('div', class_='comic-display').find_all('img', class_='img-responsive')
776
        return {
777
            'month': month,
778
            'year': year,
779
            'day': day,
780
            'img': [i['src'] for i in imgs],
781
        }
782
783
784
class Dilbert(GenericNavigableComic):
785
    """Class to retrieve Dilbert comics."""
786
    # Also on http://www.gocomics.com/dilbert-classics
787
    name = 'dilbert'
788
    long_name = 'Dilbert'
789
    url = 'http://dilbert.com'
790
    get_url_from_link = join_cls_url_to_href
791
    get_first_comic_link = simulate_first_link
792
    first_url = 'http://dilbert.com/strip/1989-04-16'
793
794
    @classmethod
795
    def get_navi_link(cls, last_soup, next_):
796
        """Get link to next or previous comic."""
797
        link = last_soup.find('div', class_='nav-comic nav-right' if next_ else 'nav-comic nav-left')
798
        return link.find('a') if link else None
799
800
    @classmethod
801
    def get_comic_info(cls, soup, link):
802
        """Get information about a particular comics."""
803
        title = soup.find('meta', property='og:title')['content']
804
        imgs = soup.find_all('meta', property='og:image')
805
        desc = soup.find('meta', property='og:description')['content']
806
        date_str = soup.find('meta', property='article:publish_date')['content']
807
        day = string_to_date(date_str, "%B %d, %Y")
808
        author = soup.find('meta', property='article:author')['content']
809
        tags = soup.find('meta', property='article:tag')['content']
810
        return {
811
            'title': title,
812
            'description': desc,
813
            'img': [i['content'] for i in imgs],
814
            'author': author,
815
            'tags': tags,
816
            'day': day.day,
817
            'month': day.month,
818
            'year': day.year
819
        }
820
821
822
class VictimsOfCircumsolar(GenericNavigableComic):
823
    """Class to retrieve VictimsOfCircumsolar comics."""
824
    # Also on http://victimsofcomics.tumblr.com
825
    name = 'circumsolar'
826
    long_name = 'Victims Of Circumsolar'
827
    url = 'http://www.victimsofcircumsolar.com'
828
    get_navi_link = get_a_navi_comicnavnext_navinext
829
    get_first_comic_link = simulate_first_link
830
    first_url = 'http://www.victimsofcircumsolar.com/comic/modern-addiction'
831
832
    @classmethod
833
    def get_comic_info(cls, soup, link):
834
        """Get information about a particular comics."""
835
        # Date is on the archive page
836
        title = soup.find_all('meta', property='og:title')[-1]['content']
837
        desc = soup.find_all('meta', property='og:description')[-1]['content']
838
        imgs = soup.find('div', id='comic').find_all('img')
839
        assert all(i['title'] == i['alt'] == title for i in imgs)
840
        return {
841
            'title': title,
842
            'description': desc,
843
            'img': [i['src'] for i in imgs],
844
        }
845
846
847
class ThreeWordPhrase(GenericNavigableComic):
848
    """Class to retrieve Three Word Phrase comics."""
849
    # Also on http://www.threewordphrase.tumblr.com
850
    name = 'threeword'
851
    long_name = 'Three Word Phrase'
852
    url = 'http://threewordphrase.com'
853
    get_url_from_link = join_cls_url_to_href
854
855
    @classmethod
856
    def get_first_comic_link(cls):
857
        """Get link to first comics."""
858
        return get_soup_at_url(cls.url).find('img', src='/firstlink.gif').parent
859
860
    @classmethod
861
    def get_navi_link(cls, last_soup, next_):
862
        """Get link to next or previous comic."""
863
        link = last_soup.find('img', src='/nextlink.gif' if next_ else '/prevlink.gif').parent
864
        return None if link.get('href') is None else link
865
866
    @classmethod
867
    def get_comic_info(cls, soup, link):
868
        """Get information about a particular comics."""
869
        title = soup.find('title')
870
        imgs = [img for img in soup.find_all('img')
871
                if not img['src'].endswith(
872
                    ('link.gif', '32.png', 'twpbookad.jpg',
873
                     'merchad.jpg', 'header.gif', 'tipjar.jpg'))]
874
        return {
875
            'title': title.string if title else None,
876
            'title2': '  '.join(img.get('alt') for img in imgs if img.get('alt')),
877
            'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs],
878
        }
879
880
881
class DeadlyPanel(GenericEmptyComic, GenericNavigableComic):
882
    """Class to retrieve Deadly Panel comics."""
883
    # Also on https://tapastic.com/series/deadlypanel
884
    name = 'deadly'
885
    long_name = 'Deadly Panel'
886
    url = 'http://www.deadlypanel.com'
887
    get_first_comic_link = get_a_navi_navifirst
888
    get_navi_link = get_a_navi_comicnavnext_navinext
889
890
    @classmethod
891
    def get_comic_info(cls, soup, link):
892
        """Get information about a particular comics."""
893
        imgs = soup.find('div', id='comic').find_all('img')
894
        assert all(i['alt'] == i['title'] for i in imgs)
895
        return {
896
            'img': [i['src'] for i in imgs],
897
        }
898
899
900 View Code Duplication
class TheGentlemanArmchair(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
901
    """Class to retrieve The Gentleman Armchair comics."""
902
    name = 'gentlemanarmchair'
903
    long_name = 'The Gentleman Armchair'
904
    url = 'http://thegentlemansarmchair.com'
905
    get_first_comic_link = get_a_navi_navifirst
906
    get_navi_link = get_link_rel_next
907
908
    @classmethod
909
    def get_comic_info(cls, soup, link):
910
        """Get information about a particular comics."""
911
        title = soup.find('h2', class_='post-title').string
912
        author = soup.find("span", class_="post-author").find("a").string
913
        date_str = soup.find('span', class_='post-date').string
914
        day = string_to_date(date_str, "%B %d, %Y")
915
        imgs = soup.find('div', id='comic').find_all('img')
916
        return {
917
            'img': [i['src'] for i in imgs],
918
            'title': title,
919
            'author': author,
920
            'month': day.month,
921
            'year': day.year,
922
            'day': day.day,
923
        }
924
925
926
class MyExtraLife(GenericNavigableComic):
927
    """Class to retrieve My Extra Life comics."""
928
    name = 'extralife'
929
    long_name = 'My Extra Life'
930
    url = 'http://www.myextralife.com'
931
    get_navi_link = get_link_rel_next
932
933
    @classmethod
934
    def get_first_comic_link(cls):
935
        """Get link to first comics."""
936
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
937
938
    @classmethod
939
    def get_comic_info(cls, soup, link):
940
        """Get information about a particular comics."""
941
        title = soup.find("h1", class_="comic_title").string
942
        date_str = soup.find("span", class_="comic_date").string
943
        day = string_to_date(date_str, "%B %d, %Y")
944
        imgs = soup.find_all("img", class_="comic")
945
        assert all(i['alt'] == i['title'] == title for i in imgs)
946
        return {
947
            'title': title,
948
            'img': [i['src'] for i in imgs if i["src"]],
949
            'day': day.day,
950
            'month': day.month,
951
            'year': day.year
952
        }
953
954
955
class SaturdayMorningBreakfastCereal(GenericNavigableComic):
956
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
957
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
958
    # Also on http://smbc-comics.tumblr.com
959
    name = 'smbc'
960
    long_name = 'Saturday Morning Breakfast Cereal'
961
    url = 'http://www.smbc-comics.com'
962
    _categories = ('SMBC', )
963
    get_navi_link = get_a_rel_next
964
965
    @classmethod
966
    def get_first_comic_link(cls):
967
        """Get link to first comics."""
968
        return get_soup_at_url(cls.url).find('a', rel='start')
969
970
    @classmethod
971
    def get_comic_info(cls, soup, link):
972
        """Get information about a particular comics."""
973
        image1 = soup.find('img', id='cc-comic')
974
        image_url1 = image1['src']
975
        aftercomic = soup.find('div', id='aftercomic')
976
        image_url2 = aftercomic.find('img')['src'] if aftercomic else ''
977
        imgs = [image_url1] + ([image_url2] if image_url2 else [])
978
        date_str = soup.find('div', class_='cc-publishtime').contents[0]
979
        day = string_to_date(date_str, "%B %d, %Y")
980
        return {
981
            'title': image1['title'],
982
            'img': [urljoin_wrapper(cls.url, i) for i in imgs],
983
            'day': day.day,
984
            'month': day.month,
985
            'year': day.year
986
        }
987
988
989
class PerryBibleFellowship(GenericListableComic):
990
    """Class to retrieve Perry Bible Fellowship comics."""
991
    name = 'pbf'
992
    long_name = 'Perry Bible Fellowship'
993
    url = 'http://pbfcomics.com'
994
    get_url_from_archive_element = join_cls_url_to_href
995
996
    @classmethod
997
    def get_archive_elements(cls):
998
        comic_link_re = re.compile('^/[0-9]*/$')
999
        return reversed(get_soup_at_url(cls.url).find_all('a', href=comic_link_re))
1000
1001
    @classmethod
1002
    def get_comic_info(cls, soup, link):
1003
        """Get information about a particular comics."""
1004
        url = cls.get_url_from_archive_element(link)
1005
        comic_img_re = re.compile('^/archive_b/PBF.*')
1006
        name = link.string
1007
        num = int(link['name'])
1008
        href = link['href']
1009
        assert href == '/%d/' % num
1010
        imgs = soup.find_all('img', src=comic_img_re)
1011
        assert len(imgs) == 1
1012
        assert imgs[0]['alt'] == name
1013
        return {
1014
            'num': num,
1015
            'name': name,
1016
            'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1017
            'prefix': '%d-' % num,
1018
        }
1019
1020
1021 View Code Duplication
class Mercworks(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1022
    """Class to retrieve Mercworks comics."""
1023
    # Also on http://mercworks.tumblr.com
1024
    name = 'mercworks'
1025
    long_name = 'Mercworks'
1026
    url = 'http://mercworks.net'
1027
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1028
    get_navi_link = get_link_rel_next
1029
1030
    @classmethod
1031
    def get_comic_info(cls, soup, link):
1032
        """Get information about a particular comics."""
1033
        title = soup.find('meta', property='og:title')['content']
1034
        metadesc = soup.find('meta', property='og:description')
1035
        desc = metadesc['content'] if metadesc else ""
1036
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
1037
        day = string_to_date(date_str, "%Y-%m-%d")
1038
        imgs = soup.find_all('meta', property='og:image')
1039
        return {
1040
            'img': [i['content'] for i in imgs],
1041
            'title': title,
1042
            'desc': desc,
1043
            'day': day.day,
1044
            'month': day.month,
1045
            'year': day.year
1046
        }
1047
1048
1049
class BerkeleyMews(GenericListableComic):
1050
    """Class to retrieve Berkeley Mews comics."""
1051
    # Also on http://mews.tumblr.com
1052
    # Also on http://www.gocomics.com/berkeley-mews
1053
    name = 'berkeley'
1054
    long_name = 'Berkeley Mews'
1055
    url = 'http://www.berkeleymews.com'
1056
    _categories = ('BERKELEY', )
1057
    get_url_from_archive_element = get_href
1058
    comic_num_re = re.compile('%s/\\?p=([0-9]*)$' % url)
1059
1060
    @classmethod
1061
    def get_archive_elements(cls):
1062
        archive_url = urljoin_wrapper(cls.url, "?page_id=2")
1063
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_num_re))
1064
1065
    @classmethod
1066
    def get_comic_info(cls, soup, link):
1067
        """Get information about a particular comics."""
1068
        comic_date_re = re.compile('.*/([0-9]*)-([0-9]*)-([0-9]*)-.*')
1069
        url = cls.get_url_from_archive_element(link)
1070
        num = int(cls.comic_num_re.match(url).groups()[0])
1071
        img = soup.find('div', id='comic').find('img')
1072
        assert all(i['alt'] == i['title'] for i in [img])
1073
        title2 = img['title']
1074
        img_url = img['src']
1075
        year, month, day = [int(s) for s in comic_date_re.match(img_url).groups()]
1076
        return {
1077
            'num': num,
1078
            'title': link.string,
1079
            'title2': title2,
1080
            'img': [img_url],
1081
            'year': year,
1082
            'month': month,
1083
            'day': day,
1084
        }
1085
1086
1087
class GenericBouletCorp(GenericNavigableComic):
1088
    """Generic class to retrieve BouletCorp comics in different languages."""
1089
    # Also on http://bouletcorp.tumblr.com
1090
    _categories = ('BOULET', )
1091
    get_navi_link = get_link_rel_next
1092
1093
    @classmethod
1094
    def get_first_comic_link(cls):
1095
        """Get link to first comics."""
1096
        return get_soup_at_url(cls.url).find('div', id='centered_nav').find_all('a')[0]
1097
1098
    @classmethod
1099
    def get_comic_info(cls, soup, link):
1100
        """Get information about a particular comics."""
1101
        url = cls.get_url_from_link(link)
1102
        date_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/' % cls.url)
1103
        year, month, day = [int(s) for s in date_re.match(url).groups()]
1104
        imgs = soup.find('div', id='notes').find('div', class_='storycontent').find_all('img')
1105
        texts = '  '.join(t for t in (i.get('title') for i in imgs) if t)
1106
        title = soup.find('title').string
1107
        return {
1108
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs if i.get('src') is not None],
1109
            'title': title,
1110
            'texts': texts,
1111
            'year': year,
1112
            'month': month,
1113
            'day': day,
1114
        }
1115
1116
1117
class BouletCorp(GenericBouletCorp):
1118
    """Class to retrieve BouletCorp comics."""
1119
    name = 'boulet'
1120
    long_name = 'Boulet Corp'
1121
    url = 'http://www.bouletcorp.com'
1122
    _categories = ('FRANCAIS', )
1123
1124
1125
class BouletCorpEn(GenericBouletCorp):
1126
    """Class to retrieve EnglishBouletCorp comics."""
1127
    name = 'boulet_en'
1128
    long_name = 'Boulet Corp English'
1129
    url = 'http://english.bouletcorp.com'
1130
1131
1132 View Code Duplication
class AmazingSuperPowers(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1133
    """Class to retrieve Amazing Super Powers comics."""
1134
    name = 'asp'
1135
    long_name = 'Amazing Super Powers'
1136
    url = 'http://www.amazingsuperpowers.com'
1137
    get_first_comic_link = get_a_navi_navifirst
1138
    get_navi_link = get_a_navi_navinext
1139
1140
    @classmethod
1141
    def get_comic_info(cls, soup, link):
1142
        """Get information about a particular comics."""
1143
        author = soup.find("span", class_="post-author").find("a").string
1144
        date_str = soup.find('span', class_='post-date').string
1145
        day = string_to_date(date_str, "%B %d, %Y")
1146
        imgs = soup.find('div', id='comic').find_all('img')
1147
        title = ' '.join(i['title'] for i in imgs)
1148
        assert all(i['alt'] == i['title'] for i in imgs)
1149
        return {
1150
            'title': title,
1151
            'author': author,
1152
            'img': [img['src'] for img in imgs],
1153
            'day': day.day,
1154
            'month': day.month,
1155
            'year': day.year
1156
        }
1157
1158
1159
class ToonHole(GenericNavigableComic):
1160
    """Class to retrieve Toon Holes comics."""
1161
    # Also on http://tapastic.com/series/TOONHOLE
1162
    name = 'toonhole'
1163
    long_name = 'Toon Hole'
1164
    url = 'http://www.toonhole.com'
1165
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1166
    get_navi_link = get_link_rel_next
1167
1168
    @classmethod
1169
    def get_comic_info(cls, soup, link):
1170
        """Get information about a particular comics."""
1171
        short_url = soup.find('link', rel='shortlink')['href']
1172
        date_str = soup.find('div', class_='entry-meta').contents[0].strip()
1173
        day = string_to_date(date_str, "%B %d, %Y")
1174
        imgs = soup.find('div', id='comic').find_all('img')
1175
        if imgs:
1176
            img = imgs[0]
1177
            title = img['alt']
1178
            assert img['title'] == title
1179
        else:
1180
            title = ""
1181
        return {
1182
            'short_url': short_url,
1183
            'title': title,
1184
            'month': day.month,
1185
            'year': day.year,
1186
            'day': day.day,
1187
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs],
1188
        }
1189
1190
1191
class Channelate(GenericNavigableComic):
1192
    """Class to retrieve Channelate comics."""
1193
    name = 'channelate'
1194
    long_name = 'Channelate'
1195
    url = 'http://www.channelate.com'
1196
    get_first_comic_link = get_div_navfirst_a
1197
    get_navi_link = get_link_rel_next
1198
    get_url_from_link = join_cls_url_to_href
1199
1200
    @classmethod
1201
    def get_comic_info(cls, soup, link):
1202
        """Get information about a particular comics."""
1203
        author = soup.find("span", class_="post-author").find("a").string
1204
        date_str = soup.find('span', class_='post-date').string
1205
        day = string_to_date(date_str, '%Y/%m/%d')
1206
        title = soup.find('meta', property='og:title')['content']
1207
        post = soup.find('div', id='comic')
1208
        imgs = post.find_all('img') if post else []
1209
        extra_url = None
1210
        extra_div = soup.find('div', id='extrapanelbutton')
1211
        if extra_div:
1212
            extra_url = extra_div.find('a')['href']
1213
            extra_soup = get_soup_at_url(extra_url)
1214
            extra_imgs = extra_soup.find_all('img', class_='extrapanelimage')
1215
            imgs.extend(extra_imgs)
1216
        return {
1217
            'url_extra': extra_url,
1218
            'title': title,
1219
            'author': author,
1220
            'month': day.month,
1221
            'year': day.year,
1222
            'day': day.day,
1223
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1224
        }
1225
1226
1227
class CyanideAndHappiness(GenericNavigableComic):
1228
    """Class to retrieve Cyanide And Happiness comics."""
1229
    name = 'cyanide'
1230
    long_name = 'Cyanide and Happiness'
1231
    url = 'http://explosm.net'
1232
    _categories = ('NSFW', )
1233
    get_url_from_link = join_cls_url_to_href
1234
1235
    @classmethod
1236
    def get_first_comic_link(cls):
1237
        """Get link to first comics."""
1238
        return get_soup_at_url(cls.url).find('a', title='Oldest comic')
1239
1240
    @classmethod
1241
    def get_navi_link(cls, last_soup, next_):
1242
        """Get link to next or previous comic."""
1243
        link = last_soup.find('a', class_='next-comic' if next_ else 'previous-comic ')
1244
        return None if link.get('href') is None else link
1245
1246
    @classmethod
1247
    def get_comic_info(cls, soup, link):
1248
        """Get information about a particular comics."""
1249
        url2 = soup.find('meta', property='og:url')['content']
1250
        num = int(url2.split('/')[-2])
1251
        date_str = soup.find('h3').find('a').string
1252
        day = string_to_date(date_str, '%Y.%m.%d')
1253
        author = soup.find('small', class_="author-credit-name").string
1254
        assert author.startswith('by ')
1255
        author = author[3:]
1256
        imgs = soup.find_all('img', id='main-comic')
1257
        return {
1258
            'num': num,
1259
            'author': author,
1260
            'month': day.month,
1261
            'year': day.year,
1262
            'day': day.day,
1263
            'prefix': '%d-' % num,
1264
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i['src'])) for i in imgs]
1265
        }
1266
1267
1268
class MrLovenstein(GenericComic):
1269
    """Class to retrieve Mr Lovenstein comics."""
1270
    # Also on https://tapastic.com/series/MrLovenstein
1271
    name = 'mrlovenstein'
1272
    long_name = 'Mr. Lovenstein'
1273
    url = 'http://www.mrlovenstein.com'
1274
1275
    @classmethod
1276
    def get_next_comic(cls, last_comic):
1277
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1278
        # TODO: more info from http://www.mrlovenstein.com/archive
1279
        comic_num_re = re.compile('^/comic/([0-9]*)$')
1280
        nums = [int(comic_num_re.match(link['href']).groups()[0])
1281
                for link in get_soup_at_url(cls.url).find_all('a', href=comic_num_re)]
1282
        first, last = min(nums), max(nums)
1283
        if last_comic:
1284
            first = last_comic['num'] + 1
1285
        for num in range(first, last + 1):
1286
            url = urljoin_wrapper(cls.url, '/comic/%d' % num)
1287
            soup = get_soup_at_url(url)
1288
            imgs = list(
1289
                reversed(soup.find_all('img', src=re.compile('^/images/comics/'))))
1290
            description = soup.find('meta', attrs={'name': 'description'})['content']
1291
            yield {
1292
                'url': url,
1293
                'num': num,
1294
                'texts': '  '.join(t for t in (i.get('title') for i in imgs) if t),
1295
                'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1296
                'description': description,
1297
            }
1298
1299
1300
class DinosaurComics(GenericListableComic):
1301
    """Class to retrieve Dinosaur Comics comics."""
1302
    name = 'dinosaur'
1303
    long_name = 'Dinosaur Comics'
1304
    url = 'http://www.qwantz.com'
1305
    get_url_from_archive_element = get_href
1306
    comic_link_re = re.compile('^%s/index.php\\?comic=([0-9]*)$' % url)
1307
1308
    @classmethod
1309
    def get_archive_elements(cls):
1310
        archive_url = urljoin_wrapper(cls.url, 'archive.php')
1311
        # first link is random -> skip it
1312
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re)[1:])
1313
1314
    @classmethod
1315
    def get_comic_info(cls, soup, link):
1316
        """Get information about a particular comics."""
1317
        url = cls.get_url_from_archive_element(link)
1318
        num = int(cls.comic_link_re.match(url).groups()[0])
1319
        date_str = link.string
1320
        text = link.next_sibling.string
1321
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1322
        comic_img_re = re.compile('^%s/comics/' % cls.url)
1323
        img = soup.find('img', src=comic_img_re)
1324
        return {
1325
            'month': day.month,
1326
            'year': day.year,
1327 View Code Duplication
            'day': day.day,
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1328
            'img': [img.get('src')],
1329
            'title': img.get('title'),
1330
            'text': text,
1331
            'num': num,
1332
        }
1333
1334
1335
class ButterSafe(GenericListableComic):
1336
    """Class to retrieve Butter Safe comics."""
1337
    name = 'butter'
1338
    long_name = 'ButterSafe'
1339
    url = 'http://buttersafe.com'
1340
    get_url_from_archive_element = get_href
1341
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)
1342
1343
    @classmethod
1344
    def get_archive_elements(cls):
1345
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1346
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
1347
1348
    @classmethod
1349
    def get_comic_info(cls, soup, link):
1350
        """Get information about a particular comics."""
1351
        url = cls.get_url_from_archive_element(link)
1352
        title = link.string
1353
        year, month, day = [int(s) for s in cls.comic_link_re.match(url).groups()]
1354
        img = soup.find('div', id='comic').find('img')
1355
        assert img['alt'] == title
1356
        return {
1357
            'title': title,
1358
            'day': day,
1359
            'month': month,
1360
            'year': year,
1361
            'img': [img['src']],
1362
        }
1363
1364
1365
class CalvinAndHobbes(GenericComic):
1366
    """Class to retrieve Calvin and Hobbes comics."""
1367
    # Also on http://www.gocomics.com/calvinandhobbes/
1368
    name = 'calvin'
1369
    long_name = 'Calvin and Hobbes'
1370
    # This is not through any official webpage but eh...
1371
    url = 'http://marcel-oehler.marcellosendos.ch/comics/ch/'
1372
1373
    @classmethod
1374
    def get_next_comic(cls, last_comic):
1375
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1376
        last_date = get_date_for_comic(
1377
            last_comic) if last_comic else date(1985, 11, 1)
1378
        link_re = re.compile('^([0-9]*)/([0-9]*)/')
1379
        img_re = re.compile('')
1380
        for link in get_soup_at_url(cls.url).find_all('a', href=link_re):
1381
            url = link['href']
1382
            year, month = link_re.match(url).groups()
1383
            if date(int(year), int(month), 1) + timedelta(days=31) >= last_date:
1384
                img_re = re.compile('^%s%s([0-9]*)' % (year, month))
1385
                month_url = urljoin_wrapper(cls.url, url)
1386
                for img in get_soup_at_url(month_url).find_all('img', src=img_re):
1387
                    img_src = img['src']
1388
                    day = int(img_re.match(img_src).groups()[0])
1389
                    comic_date = date(int(year), int(month), day)
1390
                    if comic_date > last_date:
1391
                        yield {
1392
                            'url': month_url,
1393 View Code Duplication
                            'year': int(year),
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1394
                            'month': int(month),
1395
                            'day': int(day),
1396
                            'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)],
1397
                        }
1398
                        last_date = comic_date
1399
1400
1401
class AbstruseGoose(GenericListableComic):
1402
    """Class to retrieve AbstruseGoose Comics."""
1403
    name = 'abstruse'
1404
    long_name = 'Abstruse Goose'
1405
    url = 'http://abstrusegoose.com'
1406
    get_url_from_archive_element = get_href
1407
    comic_url_re = re.compile('^%s/([0-9]*)$' % url)
1408
    comic_img_re = re.compile('^%s/strips/.*' % url)
1409
1410
    @classmethod
1411
    def get_archive_elements(cls):
1412
        archive_url = urljoin_wrapper(cls.url, 'archive')
1413
        return get_soup_at_url(archive_url).find_all('a', href=cls.comic_url_re)
1414
1415
    @classmethod
1416
    def get_comic_info(cls, soup, archive_elt):
1417
        comic_url = cls.get_url_from_archive_element(archive_elt)
1418
        num = int(cls.comic_url_re.match(comic_url).groups()[0])
1419
        return {
1420
            'num': num,
1421
            'title': archive_elt.string,
1422
            'img': [soup.find('img', src=cls.comic_img_re)['src']]
1423
        }
1424
1425
1426
class PhDComics(GenericNavigableComic):
1427
    """Class to retrieve PHD Comics."""
1428
    name = 'phd'
1429
    long_name = 'PhD Comics'
1430
    url = 'http://phdcomics.com/comics/archive.php'
1431
1432
    @classmethod
1433
    def get_first_comic_link(cls):
1434
        """Get link to first comics."""
1435
        soup = get_soup_at_url(cls.url)
1436
        img = soup.find('img', src='http://phdcomics.com/comics/images/first_button.gif')
1437
        return None if img is None else img.parent
1438
1439
    @classmethod
1440
    def get_navi_link(cls, last_soup, next_):
1441
        """Get link to next or previous comic."""
1442
        url = 'http://phdcomics.com/comics/images/%s_button.gif' % ('next' if next_ else 'prev')
1443
        img = last_soup.find('img', src=url)
1444
        return None if img is None else img.parent
1445
1446
    @classmethod
1447
    def get_comic_info(cls, soup, link):
1448
        """Get information about a particular comics."""
1449
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
1450
        imgs = soup.find_all('meta', property='og:image')
1451
        return {
1452
            'img': [i['content'] for i in imgs],
1453
            'title': title,
1454
        }
1455
1456 View Code Duplication
1457
class Octopuns(GenericEmptyComic, GenericNavigableComic):
1458
    """Class to retrieve Octopuns comics."""
1459
    # Also on http://octopuns.tumblr.com
1460
    name = 'octopuns'
1461
    long_name = 'Octopuns'
1462
    url = 'http://www.octopuns.net'
1463
1464
    @classmethod
1465
    def get_first_comic_link(cls):
1466
        """Get link to first comics."""
1467
        return get_soup_at_url(cls.url).find('img', src=re.compile('.*/First.png')).parent
1468
1469
    @classmethod
1470
    def get_navi_link(cls, last_soup, next_):
1471
        """Get link to next or previous comic."""
1472
        link = last_soup.find('img', src=re.compile('.*/Next.png' if next_ else '.*/Back.png')).parent
1473
        return None if link.get('href') is None else link
1474
1475
    @classmethod
1476
    def get_comic_info(cls, soup, link):
1477
        """Get information about a particular comics."""
1478
        title = soup.find('h3', class_='post-title entry-title').string
1479
        date_str = soup.find('h2', class_='date-header').string
1480
        day = string_to_date(date_str, "%A, %B %d, %Y")
1481
        imgs = soup.find_all('link', rel='image_src')
1482
        return {
1483
            'img': [i['href'] for i in imgs],
1484
            'title': title,
1485
            'day': day.day,
1486
            'month': day.month,
1487
            'year': day.year,
1488
        }
1489
1490
1491
class Quarktees(GenericNavigableComic):
1492
    """Class to retrieve the Quarktees comics."""
1493
    name = 'quarktees'
1494
    long_name = 'Quarktees'
1495
    url = 'http://www.quarktees.com/blogs/news'
1496
    get_url_from_link = join_cls_url_to_href
1497
    get_first_comic_link = simulate_first_link
1498
    first_url = 'http://www.quarktees.com/blogs/news/12486621-coming-soon'
1499
1500
    @classmethod
1501
    def get_navi_link(cls, last_soup, next_):
1502
        """Get link to next or previous comic."""
1503
        return last_soup.find('a', id='article-next' if next_ else 'article-prev')
1504
1505
    @classmethod
1506
    def get_comic_info(cls, soup, link):
1507
        """Get information about a particular comics."""
1508
        title = soup.find('meta', property='og:title')['content']
1509
        article = soup.find('div', class_='single-article')
1510
        imgs = article.find_all('img')
1511
        return {
1512
            'title': title,
1513
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1514
        }
1515
1516
1517
class OverCompensating(GenericNavigableComic):
1518
    """Class to retrieve the Over Compensating comics."""
1519
    name = 'compensating'
1520
    long_name = 'Over Compensating'
1521
    url = 'http://www.overcompensating.com'
1522
    get_url_from_link = join_cls_url_to_href
1523
1524
    @classmethod
1525
    def get_first_comic_link(cls):
1526
        """Get link to first comics."""
1527
        return get_soup_at_url(cls.url).find('a', href=re.compile('comic=1$'))
1528
1529
    @classmethod
1530
    def get_navi_link(cls, last_soup, next_):
1531
        """Get link to next or previous comic."""
1532
        return last_soup.find('a', title='next comic' if next_ else 'go back already')
1533
1534
    @classmethod
1535
    def get_comic_info(cls, soup, link):
1536
        """Get information about a particular comics."""
1537
        img_src_re = re.compile('^/oc/comics/.*')
1538
        comic_num_re = re.compile('.*comic=([0-9]*)$')
1539
        comic_url = cls.get_url_from_link(link)
1540
        num = int(comic_num_re.match(comic_url).groups()[0])
1541
        img = soup.find('img', src=img_src_re)
1542
        return {
1543
            'num': num,
1544
            'img': [urljoin_wrapper(comic_url, img['src'])],
1545
            'title': img.get('title')
1546
        }
1547
1548
1549
class Oglaf(GenericNavigableComic):
1550
    """Class to retrieve Oglaf comics."""
1551
    name = 'oglaf'
1552
    long_name = 'Oglaf [NSFW]'
1553
    url = 'http://oglaf.com'
1554
    _categories = ('NSFW', )
1555
    get_url_from_link = join_cls_url_to_href
1556
1557
    @classmethod
1558
    def get_first_comic_link(cls):
1559
        """Get link to first comics."""
1560
        return get_soup_at_url(cls.url).find("div", id="st").parent
1561
1562
    @classmethod
1563
    def get_navi_link(cls, last_soup, next_):
1564
        """Get link to next or previous comic."""
1565
        div = last_soup.find("div", id="nx" if next_ else "pvs")
1566
        return div.parent if div else None
1567
1568
    @classmethod
1569
    def get_comic_info(cls, soup, link):
1570
        """Get information about a particular comics."""
1571
        title = soup.find('title').string
1572
        title_imgs = soup.find('div', id='tt').find_all('img')
1573
        assert len(title_imgs) == 1
1574
        strip_imgs = soup.find_all('img', id='strip')
1575
        assert len(strip_imgs) == 1
1576
        imgs = title_imgs + strip_imgs
1577
        desc = ' '.join(i['title'] for i in imgs)
1578
        return {
1579
            'title': title,
1580
            'img': [i['src'] for i in imgs],
1581
            'description': desc,
1582
        }
1583
1584
1585
class ScandinaviaAndTheWorld(GenericNavigableComic):
1586
    """Class to retrieve Scandinavia And The World comics."""
1587
    name = 'satw'
1588
    long_name = 'Scandinavia And The World'
1589
    url = 'http://satwcomic.com'
1590
    get_first_comic_link = simulate_first_link
1591
    first_url = 'http://satwcomic.com/sweden-denmark-and-norway'
1592
1593
    @classmethod
1594
    def get_navi_link(cls, last_soup, next_):
1595
        """Get link to next or previous comic."""
1596
        return last_soup.find('a', accesskey='n' if next_ else 'p')
1597
1598
    @classmethod
1599
    def get_comic_info(cls, soup, link):
1600
        """Get information about a particular comics."""
1601
        title = soup.find('meta', attrs={'name': 'twitter:label1'})['content']
1602
        desc = soup.find('meta', property='og:description')['content']
1603
        imgs = soup.find_all('img', itemprop="image")
1604
        return {
1605
            'title': title,
1606
            'description': desc,
1607
            'img': [i['src'] for i in imgs],
1608
        }
1609
1610
1611
class SomethingOfThatIlk(GenericEmptyComic):  # Does not exist anymore
1612
    """Class to retrieve the Something Of That Ilk comics."""
1613
    name = 'somethingofthatilk'
1614
    long_name = 'Something Of That Ilk'
1615
    url = 'http://www.somethingofthatilk.com'
1616
1617
1618
class InfiniteMonkeyBusiness(GenericNavigableComic):
1619
    """Generic class to retrieve InfiniteMonkeyBusiness comics."""
1620
    name = 'monkey'
1621
    long_name = 'Infinite Monkey Business'
1622
    url = 'http://infinitemonkeybusiness.net'
1623
    get_navi_link = get_a_navi_comicnavnext_navinext
1624
    get_first_comic_link = simulate_first_link
1625
    first_url = 'http://infinitemonkeybusiness.net/comic/pillory/'
1626
1627
    @classmethod
1628
    def get_comic_info(cls, soup, link):
1629
        """Get information about a particular comics."""
1630
        title = soup.find('meta', property='og:title')['content']
1631
        imgs = soup.find('div', id='comic').find_all('img')
1632
        return {
1633
            'title': title,
1634
            'img': [i['src'] for i in imgs],
1635
        }
1636
1637
1638
class Wondermark(GenericListableComic):
1639
    """Class to retrieve the Wondermark comics."""
1640
    name = 'wondermark'
1641
    long_name = 'Wondermark'
1642
    url = 'http://wondermark.com'
1643
    get_url_from_archive_element = get_href
1644
1645
    @classmethod
1646
    def get_archive_elements(cls):
1647
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1648
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1649
1650 View Code Duplication
    @classmethod
1651
    def get_comic_info(cls, soup, link):
1652
        """Get information about a particular comics."""
1653
        date_str = soup.find('div', class_='postdate').find('em').string
1654
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1655
        div = soup.find('div', id='comic')
1656
        if div:
1657
            img = div.find('img')
1658
            img_src = [img['src']]
1659
            alt = img['alt']
1660
            assert alt == img['title']
1661
            title = soup.find('meta', property='og:title')['content']
1662
        else:
1663
            img_src = []
1664
            alt = ''
1665
            title = ''
1666
        return {
1667
            'month': day.month,
1668
            'year': day.year,
1669
            'day': day.day,
1670
            'img': img_src,
1671
            'title': title,
1672
            'alt': alt,
1673
            'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')),
1674
        }
1675
1676
1677
class WarehouseComic(GenericNavigableComic):
1678
    """Class to retrieve Warehouse Comic comics."""
1679
    name = 'warehouse'
1680
    long_name = 'Warehouse Comic'
1681
    url = 'http://warehousecomic.com'
1682
    get_first_comic_link = get_a_navi_navifirst
1683
    get_navi_link = get_link_rel_next
1684
1685
    @classmethod
1686
    def get_comic_info(cls, soup, link):
1687
        """Get information about a particular comics."""
1688
        title = soup.find('h2', class_='post-title').string
1689
        date_str = soup.find('span', class_='post-date').string
1690
        day = string_to_date(date_str, "%B %d, %Y")
1691
        imgs = soup.find('div', id='comic').find_all('img')
1692
        return {
1693
            'img': [i['src'] for i in imgs],
1694
            'title': title,
1695
            'day': day.day,
1696
            'month': day.month,
1697
            'year': day.year,
1698
        }
1699
1700
1701
class JustSayEh(GenericNavigableComic):
1702
    """Class to retrieve Just Say Eh comics."""
1703
    # Also on http//tapastic.com/series/Just-Say-Eh
1704
    name = 'justsayeh'
1705
    long_name = 'Just Say Eh'
1706
    url = 'http://www.justsayeh.com'
1707
    get_first_comic_link = get_a_navi_navifirst
1708
    get_navi_link = get_a_navi_comicnavnext_navinext
1709
1710
    @classmethod
1711
    def get_comic_info(cls, soup, link):
1712
        """Get information about a particular comics."""
1713
        title = soup.find('h2', class_='post-title').string
1714
        imgs = soup.find("div", id="comic").find_all("img")
1715
        assert all(i['alt'] == i['title'] for i in imgs)
1716
        alt = imgs[0]['alt']
1717
        return {
1718
            'img': [i['src'] for i in imgs],
1719
            'title': title,
1720
            'alt': alt,
1721
        }
1722
1723
1724 View Code Duplication
class MouseBearComedy(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1725
    """Class to retrieve Mouse Bear Comedy comics."""
1726
    # Also on http://mousebearcomedy.tumblr.com
1727
    name = 'mousebear'
1728
    long_name = 'Mouse Bear Comedy'
1729
    url = 'http://www.mousebearcomedy.com'
1730
    get_first_comic_link = get_a_navi_navifirst
1731
    get_navi_link = get_a_navi_comicnavnext_navinext
1732
1733
    @classmethod
1734
    def get_comic_info(cls, soup, link):
1735
        """Get information about a particular comics."""
1736
        title = soup.find('h2', class_='post-title').string
1737
        author = soup.find("span", class_="post-author").find("a").string
1738
        date_str = soup.find("span", class_="post-date").string
1739
        day = string_to_date(date_str, '%B %d, %Y')
1740
        imgs = soup.find("div", id="comic").find_all("img")
1741
        assert all(i['alt'] == i['title'] == title for i in imgs)
1742
        return {
1743
            'day': day.day,
1744
            'month': day.month,
1745
            'year': day.year,
1746
            'img': [i['src'] for i in imgs],
1747
            'title': title,
1748
            'author': author,
1749
        }
1750
1751
1752 View Code Duplication
class BigFootJustice(GenericNavigableComic):
1753
    """Class to retrieve Big Foot Justice comics."""
1754
    # Also on http://tapastic.com/series/bigfoot-justice
1755
    name = 'bigfoot'
1756
    long_name = 'Big Foot Justice'
1757
    url = 'http://bigfootjustice.com'
1758
    get_first_comic_link = get_a_navi_navifirst
1759
    get_navi_link = get_a_navi_comicnavnext_navinext
1760
1761
    @classmethod
1762
    def get_comic_info(cls, soup, link):
1763
        """Get information about a particular comics."""
1764
        imgs = soup.find('div', id='comic').find_all('img')
1765
        assert all(i['title'] == i['alt'] for i in imgs)
1766
        title = ' '.join(i['title'] for i in imgs)
1767
        return {
1768
            'img': [i['src'] for i in imgs],
1769
            'title': title,
1770
        }
1771
1772
1773
class RespawnComic(GenericNavigableComic):
1774
    """Class to retrieve Respawn Comic."""
1775
    # Also on http://respawncomic.tumblr.com
1776
    name = 'respawn'
1777
    long_name = 'Respawn Comic'
1778
    url = 'http://respawncomic.com '
1779
    _categories = ('RESPAWN', )
1780
    get_navi_link = get_a_rel_next
1781
    get_first_comic_link = simulate_first_link
1782
    first_url = 'http://respawncomic.com/comic/c0001/'
1783
1784 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1785
    def get_comic_info(cls, soup, link):
1786
        """Get information about a particular comics."""
1787
        title = soup.find('meta', property='og:title')['content']
1788
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1789
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1790
        date_str = date_str[:10]
1791
        day = string_to_date(date_str, "%Y-%m-%d")
1792
        imgs = soup.find_all('meta', property='og:image')
1793
        skip_imgs = {
1794
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1795
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1796
        }
1797
        return {
1798
            'title': title,
1799
            'author': author,
1800
            'day': day.day,
1801
            'month': day.month,
1802
            'year': day.year,
1803
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1804
        }
1805
1806
1807 View Code Duplication
class SafelyEndangered(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1808
    """Class to retrieve Safely Endangered comics."""
1809
    # Also on http://tumblr.safelyendangered.com
1810
    name = 'endangered'
1811
    long_name = 'Safely Endangered'
1812
    url = 'http://www.safelyendangered.com'
1813
    get_navi_link = get_link_rel_next
1814
    get_first_comic_link = simulate_first_link
1815
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1816
1817
    @classmethod
1818
    def get_comic_info(cls, soup, link):
1819
        """Get information about a particular comics."""
1820
        title = soup.find('h2', class_='post-title').string
1821
        date_str = soup.find('span', class_='post-date').string
1822
        day = string_to_date(date_str, '%B %d, %Y')
1823
        imgs = soup.find('div', id='comic').find_all('img')
1824
        alt = imgs[0]['alt']
1825
        assert all(i['alt'] == i['title'] for i in imgs)
1826
        return {
1827
            'day': day.day,
1828
            'month': day.month,
1829
            'year': day.year,
1830
            'img': [i['src'] for i in imgs],
1831
            'title': title,
1832
            'alt': alt,
1833
        }
1834
1835
1836 View Code Duplication
class PicturesInBoxes(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1837
    """Class to retrieve Pictures In Boxes comics."""
1838
    # Also on http://picturesinboxescomic.tumblr.com
1839
    name = 'picturesinboxes'
1840
    long_name = 'Pictures in Boxes'
1841
    url = 'http://www.picturesinboxes.com'
1842
    get_navi_link = get_a_navi_navinext
1843
    get_first_comic_link = simulate_first_link
1844
    first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'
1845
1846
    @classmethod
1847
    def get_comic_info(cls, soup, link):
1848
        """Get information about a particular comics."""
1849
        title = soup.find('h2', class_='post-title').string
1850
        author = soup.find("span", class_="post-author").find("a").string
1851
        date_str = soup.find('span', class_='post-date').string
1852
        day = string_to_date(date_str, '%B %d, %Y')
1853
        imgs = soup.find('div', class_='comicpane').find_all('img')
1854
        assert imgs
1855
        assert all(i['title'] == i['alt'] == title for i in imgs)
1856
        return {
1857
            'day': day.day,
1858
            'month': day.month,
1859
            'year': day.year,
1860
            'img': [i['src'] for i in imgs],
1861
            'title': title,
1862
            'author': author,
1863
        }
1864
1865
1866 View Code Duplication
class Penmen(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1867
    """Class to retrieve Penmen comics."""
1868
    name = 'penmen'
1869
    long_name = 'Penmen'
1870
    url = 'http://penmen.com'
1871
    get_navi_link = get_link_rel_next
1872
    get_first_comic_link = simulate_first_link
1873
    first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/'
1874
1875
    @classmethod
1876
    def get_comic_info(cls, soup, link):
1877
        """Get information about a particular comics."""
1878
        title = soup.find('title').string
1879
        imgs = soup.find('div', class_='entry-content').find_all('img')
1880
        short_url = soup.find('link', rel='shortlink')['href']
1881
        tags = ' '.join(t.string for t in soup.find_all('a', rel='tag'))
1882
        date_str = soup.find('time')['datetime'][:10]
1883
        day = string_to_date(date_str, "%Y-%m-%d")
1884
        return {
1885
            'title': title,
1886
            'short_url': short_url,
1887
            'img': [i['src'] for i in imgs],
1888
            'tags': tags,
1889
            'month': day.month,
1890
            'year': day.year,
1891
            'day': day.day,
1892
        }
1893
1894
1895
class TheDoghouseDiaries(GenericNavigableComic):
1896
    """Class to retrieve The Dog House Diaries comics."""
1897
    name = 'doghouse'
1898
    long_name = 'The Dog House Diaries'
1899
    url = 'http://thedoghousediaries.com'
1900
1901
    @classmethod
1902
    def get_first_comic_link(cls):
1903
        """Get link to first comics."""
1904
        return get_soup_at_url(cls.url).find('a', id='firstlink')
1905
1906
    @classmethod
1907
    def get_navi_link(cls, last_soup, next_):
1908
        """Get link to next or previous comic."""
1909
        return last_soup.find('a', id='nextlink' if next_ else 'previouslink')
1910
1911
    @classmethod
1912
    def get_comic_info(cls, soup, link):
1913
        """Get information about a particular comics."""
1914 View Code Duplication
        comic_img_re = re.compile('^dhdcomics/.*')
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1915
        img = soup.find('img', src=comic_img_re)
1916
        comic_url = cls.get_url_from_link(link)
1917
        return {
1918
            'title': soup.find('h2', id='titleheader').string,
1919
            'title2': soup.find('div', id='subtext').string,
1920
            'alt': img.get('title'),
1921
            'img': [urljoin_wrapper(comic_url, img['src'].strip())],
1922
            'num': int(comic_url.split('/')[-1]),
1923
        }
1924
1925
1926
class InvisibleBread(GenericListableComic):
1927
    """Class to retrieve Invisible Bread comics."""
1928
    # Also on http://www.gocomics.com/invisible-bread
1929
    name = 'invisiblebread'
1930
    long_name = 'Invisible Bread'
1931
    url = 'http://invisiblebread.com'
1932
1933
    @classmethod
1934
    def get_archive_elements(cls):
1935
        archive_url = urljoin_wrapper(cls.url, 'archives/')
1936
        return reversed(get_soup_at_url(archive_url).find_all('td', class_='archive-title'))
1937
1938
    @classmethod
1939
    def get_url_from_archive_element(cls, td):
1940
        return td.find('a')['href']
1941
1942
    @classmethod
1943
    def get_comic_info(cls, soup, td):
1944
        """Get information about a particular comics."""
1945
        url = cls.get_url_from_archive_element(td)
1946
        title = td.find('a').string
1947
        month_and_day = td.previous_sibling.string
1948
        link_re = re.compile('^%s/([0-9]+)/' % cls.url)
1949
        year = link_re.match(url).groups()[0]
1950
        date_str = month_and_day + ' ' + year
1951
        day = string_to_date(date_str, '%b %d %Y')
1952
        imgs = [soup.find('div', id='comic').find('img')]
1953
        assert len(imgs) == 1
1954
        assert all(i['title'] == i['alt'] == title for i in imgs)
1955
        return {
1956
            'month': day.month,
1957
            'year': day.year,
1958
            'day': day.day,
1959
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1960
            'title': title,
1961
        }
1962
1963
1964
class DiscoBleach(GenericEmptyComic):  # Does not work anymore
1965
    """Class to retrieve Disco Bleach Comics."""
1966
    name = 'discobleach'
1967
    long_name = 'Disco Bleach'
1968
    url = 'http://discobleach.com'
1969
1970
1971
class TubeyToons(GenericEmptyComic):  # Does not work anymore
1972
    """Class to retrieve TubeyToons comics."""
1973
    # Also on http://tapastic.com/series/Tubey-Toons
1974
    # Also on http://tubeytoons.tumblr.com
1975
    name = 'tubeytoons'
1976
    long_name = 'Tubey Toons'
1977
    url = 'http://tubeytoons.com'
1978
    _categories = ('TUNEYTOONS', )
1979
1980
1981
class CompletelySeriousComics(GenericNavigableComic):
1982
    """Class to retrieve Completely Serious comics."""
1983
    name = 'completelyserious'
1984
    long_name = 'Completely Serious Comics'
1985
    url = 'http://completelyseriouscomics.com'
1986
    get_first_comic_link = get_a_navi_navifirst
1987
    get_navi_link = get_a_navi_navinext
1988
1989
    @classmethod
1990
    def get_comic_info(cls, soup, link):
1991
        """Get information about a particular comics."""
1992
        title = soup.find('h2', class_='post-title').string
1993
        author = soup.find('span', class_='post-author').contents[1].string
1994
        date_str = soup.find('span', class_='post-date').string
1995
        day = string_to_date(date_str, '%B %d, %Y')
1996
        imgs = soup.find('div', class_='comicpane').find_all('img')
1997
        assert imgs
1998
        alt = imgs[0]['title']
1999
        assert all(i['title'] == i['alt'] == alt for i in imgs)
2000
        return {
2001
            'month': day.month,
2002
            'year': day.year,
2003
            'day': day.day,
2004
            'img': [i['src'] for i in imgs],
2005
            'title': title,
2006
            'alt': alt,
2007
            'author': author,
2008
        }
2009
2010
2011
class PoorlyDrawnLines(GenericListableComic):
2012
    """Class to retrieve Poorly Drawn Lines comics."""
2013
    # Also on http://pdlcomics.tumblr.com
2014
    name = 'poorlydrawn'
2015
    long_name = 'Poorly Drawn Lines'
2016
    url = 'http://poorlydrawnlines.com'
2017
    _categories = ('POORLYDRAWN', )
2018
    get_url_from_archive_element = get_href
2019
2020
    @classmethod
2021
    def get_comic_info(cls, soup, link):
2022
        """Get information about a particular comics."""
2023
        imgs = soup.find('div', class_='post').find_all('img')
2024
        assert len(imgs) <= 1
2025
        return {
2026
            'img': [i['src'] for i in imgs],
2027
            'title': imgs[0].get('title', "") if imgs else "",
2028
        }
2029
2030
    @classmethod
2031
    def get_archive_elements(cls):
2032
        archive_url = urljoin_wrapper(cls.url, 'archive')
2033
        url_re = re.compile('^%s/comic/.' % cls.url)
2034
        return reversed(get_soup_at_url(archive_url).find_all('a', href=url_re))
2035
2036 View Code Duplication
2037
class LoadingComics(GenericNavigableComic):
2038
    """Class to retrieve Loading Artist comics."""
2039
    name = 'loadingartist'
2040
    long_name = 'Loading Artist'
2041
    url = 'http://www.loadingartist.com/latest'
2042
2043
    @classmethod
2044
    def get_first_comic_link(cls):
2045
        """Get link to first comics."""
2046
        return get_soup_at_url(cls.url).find('a', title="First")
2047
2048
    @classmethod
2049
    def get_navi_link(cls, last_soup, next_):
2050
        """Get link to next or previous comic."""
2051
        return last_soup.find('a', title='Next' if next_ else 'Previous')
2052
2053
    @classmethod
2054
    def get_comic_info(cls, soup, link):
2055
        """Get information about a particular comics."""
2056
        title = soup.find('h1').string
2057
        date_str = soup.find('span', class_='date').string.strip()
2058
        day = string_to_date(date_str, "%B %d, %Y")
2059
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
2060
        return {
2061
            'title': title,
2062
            'img': [i['src'] for i in imgs],
2063
            'month': day.month,
2064
            'year': day.year,
2065
            'day': day.day,
2066
        }
2067
2068
2069
class ChuckleADuck(GenericNavigableComic):
2070
    """Class to retrieve Chuckle-A-Duck comics."""
2071
    name = 'chuckleaduck'
2072
    long_name = 'Chuckle-A-duck'
2073
    url = 'http://chuckleaduck.com'
2074
    get_first_comic_link = get_div_navfirst_a
2075
    get_navi_link = get_link_rel_next
2076
2077
    @classmethod
2078
    def get_comic_info(cls, soup, link):
2079
        """Get information about a particular comics."""
2080
        date_str = soup.find('span', class_='post-date').string
2081
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
2082
        author = soup.find('span', class_='post-author').string
2083
        div = soup.find('div', id='comic')
2084
        imgs = div.find_all('img') if div else []
2085
        title = imgs[0]['title'] if imgs else ""
2086
        assert all(i['title'] == i['alt'] == title for i in imgs)
2087
        return {
2088
            'month': day.month,
2089
            'year': day.year,
2090
            'day': day.day,
2091
            'img': [i['src'] for i in imgs],
2092
            'title': title,
2093
            'author': author,
2094
        }
2095
2096
2097
class DepressedAlien(GenericNavigableComic):
2098
    """Class to retrieve Depressed Alien Comics."""
2099
    name = 'depressedalien'
2100
    long_name = 'Depressed Alien'
2101
    url = 'http://depressedalien.com'
2102
    get_url_from_link = join_cls_url_to_href
2103
2104
    @classmethod
2105
    def get_first_comic_link(cls):
2106
        """Get link to first comics."""
2107
        return get_soup_at_url(cls.url).find('img', attrs={'name': 'beginArrow'}).parent
2108
2109
    @classmethod
2110
    def get_navi_link(cls, last_soup, next_):
2111
        """Get link to next or previous comic."""
2112
        return last_soup.find('img', attrs={'name': 'rightArrow' if next_ else 'leftArrow'}).parent
2113
2114
    @classmethod
2115
    def get_comic_info(cls, soup, link):
2116
        """Get information about a particular comics."""
2117
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2118
        imgs = soup.find_all('meta', property='og:image')
2119
        return {
2120
            'title': title,
2121
            'img': [i['content'] for i in imgs],
2122
        }
2123
2124
2125
class ThingsInSquares(GenericListableComic):
2126
    """Class to retrieve Things In Squares comics."""
2127
    # This can be retrieved in other languages
2128
    # Also on https://tapastic.com/series/Things-in-Squares
2129
    name = 'squares'
2130
    long_name = 'Things in squares'
2131
    url = 'http://www.thingsinsquares.com'
2132
2133
    @classmethod
2134
    def get_comic_info(cls, soup, tr):
2135
        """Get information about a particular comics."""
2136
        _, td2, td3 = tr.find_all('td')
2137
        a = td2.find('a')
2138
        date_str = td3.string
2139
        day = string_to_date(date_str, "%m.%d.%y")
2140
        title = a.string
2141
        title2 = soup.find('meta', property='og:title')['content']
2142
        desc = soup.find('meta', property='og:description')
2143
        description = desc['content'] if desc else ''
2144
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2145
        imgs = soup.find('div', class_='entry-content').find_all('img')
2146
        return {
2147
            'day': day.day,
2148
            'month': day.month,
2149
            'year': day.year,
2150
            'title': title,
2151
            'title2': title2,
2152
            'description': description,
2153
            'tags': tags,
2154
            'img': [i['src'] for i in imgs],
2155
            'alt': ' '.join(i['alt'] for i in imgs),
2156
        }
2157
2158
    @classmethod
2159
    def get_url_from_archive_element(cls, tr):
2160
        _, td2, td3 = tr.find_all('td')
2161
        return td2.find('a')['href']
2162
2163
    @classmethod
2164
    def get_archive_elements(cls):
2165
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
2166
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2167
2168
2169
class HappleTea(GenericNavigableComic):
2170
    """Class to retrieve Happle Tea Comics."""
2171
    name = 'happletea'
2172
    long_name = 'Happle Tea'
2173
    url = 'http://www.happletea.com'
2174
    get_first_comic_link = get_a_navi_navifirst
2175
    get_navi_link = get_link_rel_next
2176
2177
    @classmethod
2178
    def get_comic_info(cls, soup, link):
2179
        """Get information about a particular comics."""
2180
        imgs = soup.find('div', id='comic').find_all('img')
2181
        post = soup.find('div', class_='post-content')
2182
        title = post.find('h2', class_='post-title').string
2183
        author = post.find('a', rel='author').string
2184
        date_str = post.find('span', class_='post-date').string
2185
        day = string_to_date(date_str, "%B %d, %Y")
2186
        assert all(i['alt'] == i['title'] for i in imgs)
2187
        return {
2188
            'title': title,
2189
            'img': [i['src'] for i in imgs],
2190
            'alt': ''.join(i['alt'] for i in imgs),
2191
            'month': day.month,
2192
            'year': day.year,
2193
            'day': day.day,
2194
            'author': author,
2195
        }
2196
2197
2198
class RockPaperScissors(GenericNavigableComic):
2199
    """Class to retrieve Rock Paper Scissors comics."""
2200
    name = 'rps'
2201
    long_name = 'Rock Paper Scissors'
2202
    url = 'http://rps-comics.com'
2203
    get_first_comic_link = get_a_navi_navifirst
2204
    get_navi_link = get_link_rel_next
2205
2206
    @classmethod
2207
    def get_comic_info(cls, soup, link):
2208
        """Get information about a particular comics."""
2209
        title = soup.find('title').string
2210
        imgs = soup.find_all('meta', property='og:image')
2211
        short_url = soup.find('link', rel='shortlink')['href']
2212
        transcript = soup.find('div', id='transcript-content').string
2213
        return {
2214
            'title': title,
2215
            'transcript': transcript,
2216
            'short_url': short_url,
2217
            'img': [i['content'] for i in imgs],
2218
        }
2219
2220
2221
class FatAwesomeComics(GenericNavigableComic):
2222
    """Class to retrieve Fat Awesome Comics."""
2223 View Code Duplication
    # Also on http://fatawesomecomedy.tumblr.com
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2224
    name = 'fatawesome'
2225
    long_name = 'Fat Awesome'
2226
    url = 'http://fatawesome.com/comics'
2227
    get_navi_link = get_a_rel_next
2228
    get_first_comic_link = simulate_first_link
2229
    first_url = 'http://fatawesome.com/shortbus/'
2230
2231
    @classmethod
2232
    def get_comic_info(cls, soup, link):
2233
        """Get information about a particular comics."""
2234
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2235
        description = soup.find('meta', attrs={'name': 'description'})['content']
2236
        tags_prop = soup.find('meta', property='article:tag')
2237
        tags = tags_prop['content'] if tags_prop else ""
2238
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2239
        day = string_to_date(date_str, "%Y-%m-%d")
2240
        imgs = soup.find_all('img', attrs={'data-recalc-dims': "1"})
2241
        assert len(imgs) == 1
2242
        return {
2243
            'title': title,
2244
            'description': description,
2245
            'tags': tags,
2246
            'alt': "".join(i['alt'] for i in imgs),
2247
            'img': [i['src'].rsplit('?', 1)[0] for i in imgs],
2248
            'month': day.month,
2249
            'year': day.year,
2250
            'day': day.day,
2251
        }
2252
2253
2254
class AnythingComic(GenericListableComic):
2255
    """Class to retrieve Anything Comics."""
2256
    # Also on http://tapastic.com/series/anything
2257
    name = 'anythingcomic'
2258
    long_name = 'Anything Comic'
2259
    url = 'http://www.anythingcomic.com'
2260
2261
    @classmethod
2262
    def get_archive_elements(cls):
2263
        archive_url = urljoin_wrapper(cls.url, 'archive/')
2264
        # The first 2 <tr>'s do not correspond to comics
2265
        return get_soup_at_url(archive_url).find('table', id='chapter_table').find_all('tr')[2:]
2266
2267
    @classmethod
2268
    def get_url_from_archive_element(cls, tr):
2269
        """Get url corresponding to an archive element."""
2270
        td_num, td_comic, td_date, _ = tr.find_all('td')
2271
        link = td_comic.find('a')
2272
        return urljoin_wrapper(cls.url, link['href'])
2273
2274
    @classmethod
2275
    def get_comic_info(cls, soup, tr):
2276
        """Get information about a particular comics."""
2277
        td_num, td_comic, td_date, _ = tr.find_all('td')
2278
        num = int(td_num.string)
2279
        link = td_comic.find('a')
2280
        title = link.string
2281
        imgs = soup.find_all('img', id='comic_image')
2282
        date_str = td_date.string
2283
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y, %I:%M %p")
2284
        assert len(imgs) == 1
2285
        assert all(i.get('alt') == i.get('title') for i in imgs)
2286
        return {
2287
            'num': num,
2288
            'title': title,
2289
            'alt': imgs[0].get('alt', ''),
2290
            'img': [i['src'] for i in imgs],
2291
            'month': day.month,
2292
            'year': day.year,
2293
            'day': day.day,
2294
        }
2295
2296
2297
class LonnieMillsap(GenericNavigableComic):
2298
    """Class to retrieve Lonnie Millsap's comics."""
2299
    name = 'millsap'
2300
    long_name = 'Lonnie Millsap'
2301
    url = 'http://www.lonniemillsap.com'
2302
    get_navi_link = get_link_rel_next
2303
    get_first_comic_link = simulate_first_link
2304
    first_url = 'http://www.lonniemillsap.com/?p=42'
2305
2306
    @classmethod
2307
    def get_comic_info(cls, soup, link):
2308
        """Get information about a particular comics."""
2309
        title = soup.find('h2', class_='post-title').string
2310
        post = soup.find('div', class_='post-content')
2311
        author = post.find("span", class_="post-author").find("a").string
2312
        date_str = post.find("span", class_="post-date").string
2313
        day = string_to_date(date_str, "%B %d, %Y")
2314
        imgs = post.find("div", class_="entry").find_all("img")
2315
        return {
2316
            'title': title,
2317
            'author': author,
2318
            'img': [i['src'] for i in imgs],
2319
            'month': day.month,
2320
            'year': day.year,
2321
            'day': day.day,
2322
        }
2323
2324
2325 View Code Duplication
class LinsEditions(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2326
    """Class to retrieve L.I.N.S. Editions comics."""
2327
    # Also on http://linscomics.tumblr.com
2328
    # Now on https://warandpeas.com
2329
    name = 'lins'
2330
    long_name = 'L.I.N.S. Editions'
2331
    url = 'https://linsedition.com'
2332
    _categories = ('LINS', )
2333
    get_navi_link = get_link_rel_next
2334
    get_first_comic_link = simulate_first_link
2335
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2336
2337
    @classmethod
2338
    def get_comic_info(cls, soup, link):
2339
        """Get information about a particular comics."""
2340
        title = soup.find('meta', property='og:title')['content']
2341
        imgs = soup.find_all('meta', property='og:image')
2342
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2343
        day = string_to_date(date_str, "%Y-%m-%d")
2344
        return {
2345
            'title': title,
2346
            'img': [i['content'] for i in imgs],
2347
            'month': day.month,
2348
            'year': day.year,
2349
            'day': day.day,
2350
        }
2351
2352
2353
class ThorsThundershack(GenericNavigableComic):
2354
    """Class to retrieve Thor's Thundershack comics."""
2355
    # Also on http://tapastic.com/series/Thors-Thundershac
2356
    name = 'thor'
2357
    long_name = 'Thor\'s Thundershack'
2358
    url = 'http://www.thorsthundershack.com'
2359
    _categories = ('THOR', )
2360
    get_url_from_link = join_cls_url_to_href
2361
2362
    @classmethod
2363
    def get_first_comic_link(cls):
2364
        """Get link to first comics."""
2365
        return get_soup_at_url(cls.url).find('a', class_='first navlink')
2366
2367
    @classmethod
2368
    def get_navi_link(cls, last_soup, next_):
2369
        """Get link to next or previous comic."""
2370
        for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
2371
            if link['href'] != '/comic':
2372
                return link
2373
        return None
2374
2375
    @classmethod
2376
    def get_comic_info(cls, soup, link):
2377
        """Get information about a particular comics."""
2378
        title = soup.find('meta', attrs={'name': 'description'})["content"]
2379
        description = soup.find('div', itemprop='articleBody').text
2380
        author = soup.find('span', itemprop='author copyrightHolder').string
2381
        imgs = soup.find_all('img', itemprop='image')
2382
        assert all(i['title'] == i['alt'] for i in imgs)
2383
        alt = imgs[0]['alt'] if imgs else ""
2384
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
2385
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2386
        return {
2387
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2388
            'month': day.month,
2389
            'year': day.year,
2390
            'day': day.day,
2391
            'author': author,
2392
            'title': title,
2393
            'alt': alt,
2394
            'description': description,
2395
        }
2396
2397
2398 View Code Duplication
class GerbilWithAJetpack(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2399
    """Class to retrieve GerbilWithAJetpack comics."""
2400
    name = 'gerbil'
2401
    long_name = 'Gerbil With A Jetpack'
2402
    url = 'http://gerbilwithajetpack.com'
2403
    get_first_comic_link = get_a_navi_navifirst
2404
    get_navi_link = get_a_rel_next
2405
2406
    @classmethod
2407
    def get_comic_info(cls, soup, link):
2408
        """Get information about a particular comics."""
2409
        title = soup.find('h2', class_='post-title').string
2410
        author = soup.find("span", class_="post-author").find("a").string
2411
        date_str = soup.find("span", class_="post-date").string
2412
        day = string_to_date(date_str, "%B %d, %Y")
2413
        imgs = soup.find("div", id="comic").find_all("img")
2414
        alt = imgs[0]['alt']
2415
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2416
        return {
2417
            'img': [i['src'] for i in imgs],
2418
            'title': title,
2419
            'alt': alt,
2420
            'author': author,
2421
            'day': day.day,
2422
            'month': day.month,
2423
            'year': day.year
2424
        }
2425
2426
2427 View Code Duplication
class EveryDayBlues(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2428
    """Class to retrieve EveryDayBlues Comics."""
2429
    name = "blues"
2430
    long_name = "Every Day Blues"
2431
    url = "http://everydayblues.net"
2432
    get_first_comic_link = get_a_navi_navifirst
2433
    get_navi_link = get_link_rel_next
2434
2435
    @classmethod
2436
    def get_comic_info(cls, soup, link):
2437
        """Get information about a particular comics."""
2438
        title = soup.find("h2", class_="post-title").string
2439
        author = soup.find("span", class_="post-author").find("a").string
2440
        date_str = soup.find("span", class_="post-date").string
2441
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2442
        imgs = soup.find("div", id="comic").find_all("img")
2443
        assert all(i['alt'] == i['title'] == title for i in imgs)
2444
        assert len(imgs) <= 1
2445
        return {
2446
            'img': [i['src'] for i in imgs],
2447
            'title': title,
2448
            'author': author,
2449
            'day': day.day,
2450
            'month': day.month,
2451
            'year': day.year
2452
        }
2453
2454
2455
class BiterComics(GenericNavigableComic):
2456
    """Class to retrieve Biter Comics."""
2457
    name = "biter"
2458
    long_name = "Biter Comics"
2459
    url = "http://www.bitercomics.com"
2460
    get_first_comic_link = get_a_navi_navifirst
2461
    get_navi_link = get_link_rel_next
2462
2463
    @classmethod
2464
    def get_comic_info(cls, soup, link):
2465
        """Get information about a particular comics."""
2466
        title = soup.find("h1", class_="entry-title").string
2467
        author = soup.find("span", class_="author vcard").find("a").string
2468
        date_str = soup.find("span", class_="entry-date").string
2469
        day = string_to_date(date_str, "%B %d, %Y")
2470
        imgs = soup.find("div", id="comic").find_all("img")
2471
        assert all(i['alt'] == i['title'] for i in imgs)
2472
        assert len(imgs) == 1
2473
        alt = imgs[0]['alt']
2474
        return {
2475
            'img': [i['src'] for i in imgs],
2476
            'title': title,
2477
            'alt': alt,
2478
            'author': author,
2479
            'day': day.day,
2480
            'month': day.month,
2481
            'year': day.year
2482
        }
2483
2484
2485 View Code Duplication
class TheAwkwardYeti(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2486
    """Class to retrieve The Awkward Yeti comics."""
2487
    # Also on http://www.gocomics.com/the-awkward-yeti
2488
    # Also on http://larstheyeti.tumblr.com
2489
    # Also on https://tapastic.com/series/TheAwkwardYeti
2490
    name = 'yeti'
2491
    long_name = 'The Awkward Yeti'
2492
    url = 'http://theawkwardyeti.com'
2493
    _categories = ('YETI', )
2494
    get_first_comic_link = get_a_navi_navifirst
2495
    get_navi_link = get_link_rel_next
2496
2497
    @classmethod
2498
    def get_comic_info(cls, soup, link):
2499
        """Get information about a particular comics."""
2500
        title = soup.find('h2', class_='post-title').string
2501
        date_str = soup.find("span", class_="post-date").string
2502
        day = string_to_date(date_str, "%B %d, %Y")
2503
        imgs = soup.find("div", id="comic").find_all("img")
2504
        assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
2505
        return {
2506
            'img': [i['src'] for i in imgs],
2507
            'title': title,
2508
            'day': day.day,
2509
            'month': day.month,
2510
            'year': day.year
2511
        }
2512
2513
2514
class PleasantThoughts(GenericNavigableComic):
2515
    """Class to retrieve Pleasant Thoughts comics."""
2516
    name = 'pleasant'
2517
    long_name = 'Pleasant Thoughts'
2518
    url = 'http://pleasant-thoughts.com'
2519
    get_first_comic_link = get_a_navi_navifirst
2520
    get_navi_link = get_link_rel_next
2521
2522
    @classmethod
2523
    def get_comic_info(cls, soup, link):
2524
        """Get information about a particular comics."""
2525
        post = soup.find('div', class_='post-content')
2526
        title = post.find('h2', class_='post-title').string
2527
        imgs = post.find("div", class_="entry").find_all("img")
2528
        return {
2529
            'title': title,
2530
            'img': [i['src'] for i in imgs],
2531
        }
2532
2533
2534
class MisterAndMe(GenericNavigableComic):
2535
    """Class to retrieve Mister & Me Comics."""
2536
    # Also on http://www.gocomics.com/mister-and-me
2537
    # Also on https://tapastic.com/series/Mister-and-Me
2538
    name = 'mister'
2539
    long_name = 'Mister & Me'
2540
    url = 'http://www.mister-and-me.com'
2541
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2542
    get_navi_link = get_link_rel_next
2543
2544
    @classmethod
2545
    def get_comic_info(cls, soup, link):
2546
        """Get information about a particular comics."""
2547
        title = soup.find('h2', class_='post-title').string
2548
        author = soup.find("span", class_="post-author").find("a").string
2549
        date_str = soup.find("span", class_="post-date").string
2550
        day = string_to_date(date_str, "%B %d, %Y")
2551
        imgs = soup.find("div", id="comic").find_all("img")
2552
        assert all(i['alt'] == i['title'] for i in imgs)
2553
        assert len(imgs) <= 1
2554
        alt = imgs[0]['alt'] if imgs else ""
2555
        return {
2556
            'img': [i['src'] for i in imgs],
2557
            'title': title,
2558
            'alt': alt,
2559
            'author': author,
2560
            'day': day.day,
2561
            'month': day.month,
2562
            'year': day.year
2563
        }
2564
2565
2566
class LastPlaceComics(GenericNavigableComic):
2567
    """Class to retrieve Last Place Comics."""
2568
    name = 'lastplace'
2569
    long_name = 'Last Place Comics'
2570
    url = "http://lastplacecomics.com"
2571
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2572
    get_navi_link = get_link_rel_next
2573
2574
    @classmethod
2575
    def get_comic_info(cls, soup, link):
2576
        """Get information about a particular comics."""
2577
        title = soup.find('h2', class_='post-title').string
2578
        author = soup.find("span", class_="post-author").find("a").string
2579
        date_str = soup.find("span", class_="post-date").string
2580
        day = string_to_date(date_str, "%B %d, %Y")
2581
        imgs = soup.find("div", id="comic").find_all("img")
2582
        assert all(i['alt'] == i['title'] for i in imgs)
2583
        assert len(imgs) <= 1
2584
        alt = imgs[0]['alt'] if imgs else ""
2585
        return {
2586
            'img': [i['src'] for i in imgs],
2587
            'title': title,
2588
            'alt': alt,
2589
            'author': author,
2590
            'day': day.day,
2591
            'month': day.month,
2592
            'year': day.year
2593
        }
2594
2595
2596
class TalesOfAbsurdity(GenericNavigableComic):
2597
    """Class to retrieve Tales Of Absurdity comics."""
2598
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
2599
    # Also on http://talesofabsurdity.tumblr.com
2600
    name = 'absurdity'
2601
    long_name = 'Tales of Absurdity'
2602
    url = 'http://talesofabsurdity.com'
2603
    _categories = ('ABSURDITY', )
2604
    get_first_comic_link = get_a_navi_navifirst
2605
    get_navi_link = get_a_navi_comicnavnext_navinext
2606
2607
    @classmethod
2608
    def get_comic_info(cls, soup, link):
2609
        """Get information about a particular comics."""
2610
        title = soup.find('h2', class_='post-title').string
2611
        author = soup.find("span", class_="post-author").find("a").string
2612
        date_str = soup.find("span", class_="post-date").string
2613
        day = string_to_date(date_str, "%B %d, %Y")
2614
        imgs = soup.find("div", id="comic").find_all("img")
2615
        assert all(i['alt'] == i['title'] for i in imgs)
2616
        alt = imgs[0]['alt'] if imgs else ""
2617
        return {
2618
            'img': [i['src'] for i in imgs],
2619
            'title': title,
2620
            'alt': alt,
2621
            'author': author,
2622
            'day': day.day,
2623
            'month': day.month,
2624
            'year': day.year
2625
        }
2626
2627
2628
class EndlessOrigami(GenericEmptyComic, GenericNavigableComic):
2629
    """Class to retrieve Endless Origami Comics."""
2630
    name = "origami"
2631
    long_name = "Endless Origami"
2632
    url = "http://endlessorigami.com"
2633
    get_first_comic_link = get_a_navi_navifirst
2634
    get_navi_link = get_link_rel_next
2635
2636
    @classmethod
2637
    def get_comic_info(cls, soup, link):
2638
        """Get information about a particular comics."""
2639
        title = soup.find('h2', class_='post-title').string
2640
        author = soup.find("span", class_="post-author").find("a").string
2641
        date_str = soup.find("span", class_="post-date").string
2642
        day = string_to_date(date_str, "%B %d, %Y")
2643
        imgs = soup.find("div", id="comic").find_all("img")
2644
        assert all(i['alt'] == i['title'] for i in imgs)
2645
        alt = imgs[0]['alt'] if imgs else ""
2646
        return {
2647
            'img': [i['src'] for i in imgs],
2648
            'title': title,
2649
            'alt': alt,
2650
            'author': author,
2651
            'day': day.day,
2652
            'month': day.month,
2653
            'year': day.year
2654
        }
2655
2656
2657
class PlanC(GenericNavigableComic):
2658
    """Class to retrieve Plan C comics."""
2659
    name = 'planc'
2660
    long_name = 'Plan C'
2661
    url = 'http://www.plancomic.com'
2662
    get_first_comic_link = get_a_navi_navifirst
2663
    get_navi_link = get_a_navi_comicnavnext_navinext
2664
2665
    @classmethod
2666
    def get_comic_info(cls, soup, link):
2667
        """Get information about a particular comics."""
2668
        title = soup.find('h2', class_='post-title').string
2669
        date_str = soup.find("span", class_="post-date").string
2670
        day = string_to_date(date_str, "%B %d, %Y")
2671
        imgs = soup.find('div', id='comic').find_all('img')
2672
        return {
2673
            'title': title,
2674
            'img': [i['src'] for i in imgs],
2675
            'month': day.month,
2676
            'year': day.year,
2677
            'day': day.day,
2678
        }
2679
2680
2681 View Code Duplication
class BuniComic(GenericNavigableComic):
2682
    """Class to retrieve Buni Comics."""
2683
    name = 'buni'
2684
    long_name = 'BuniComics'
2685
    url = 'http://www.bunicomic.com'
2686
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2687
    get_navi_link = get_link_rel_next
2688
2689
    @classmethod
2690
    def get_comic_info(cls, soup, link):
2691
        """Get information about a particular comics."""
2692
        imgs = soup.find('div', id='comic').find_all('img')
2693
        assert all(i['alt'] == i['title'] for i in imgs)
2694
        assert len(imgs) == 1
2695
        return {
2696
            'img': [i['src'] for i in imgs],
2697
            'title': imgs[0]['title'],
2698
        }
2699
2700
2701
class GenericCommitStrip(GenericNavigableComic):
2702
    """Generic class to retrieve Commit Strips in different languages."""
2703
    get_navi_link = get_a_rel_next
2704
    get_first_comic_link = simulate_first_link
2705
    first_url = NotImplemented
2706
2707
    @classmethod
2708
    def get_comic_info(cls, soup, link):
2709
        """Get information about a particular comics."""
2710
        desc = soup.find('meta', property='og:description')['content']
2711
        title = soup.find('meta', property='og:title')['content']
2712
        imgs = soup.find('div', class_='entry-content').find_all('img')
2713
        title2 = ' '.join(i.get('title', '') for i in imgs)
2714
        return {
2715
            'title': title,
2716
            'title2': title2,
2717
            'description': desc,
2718
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2719
        }
2720
2721
2722
class CommitStripFr(GenericCommitStrip):
2723
    """Class to retrieve Commit Strips in French."""
2724
    name = 'commit_fr'
2725
    long_name = 'Commit Strip (Fr)'
2726
    url = 'http://www.commitstrip.com/fr'
2727
    _categories = ('FRANCAIS', )
2728
    first_url = 'http://www.commitstrip.com/fr/2012/02/22/interview/'
2729
2730
2731
class CommitStripEn(GenericCommitStrip):
2732
    """Class to retrieve Commit Strips in English."""
2733
    name = 'commit_en'
2734
    long_name = 'Commit Strip (En)'
2735
    url = 'http://www.commitstrip.com/en'
2736
    first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'
2737
2738
2739 View Code Duplication
class GenericBoumerie(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2740
    """Generic class to retrieve Boumeries comics in different languages."""
2741
    get_first_comic_link = get_a_navi_navifirst
2742
    get_navi_link = get_link_rel_next
2743
    date_format = NotImplemented
2744
    lang = NotImplemented
2745
2746
    @classmethod
2747
    def get_comic_info(cls, soup, link):
2748
        """Get information about a particular comics."""
2749
        title = soup.find('h2', class_='post-title').string
2750
        short_url = soup.find('link', rel='shortlink')['href']
2751
        author = soup.find("span", class_="post-author").find("a").string
2752
        date_str = soup.find('span', class_='post-date').string
2753
        day = string_to_date(date_str, cls.date_format, cls.lang)
2754
        imgs = soup.find('div', id='comic').find_all('img')
2755
        assert all(i['alt'] == i['title'] for i in imgs)
2756
        return {
2757
            'short_url': short_url,
2758
            'img': [i['src'] for i in imgs],
2759
            'title': title,
2760
            'author': author,
2761
            'month': day.month,
2762
            'year': day.year,
2763
            'day': day.day,
2764
        }
2765
2766
2767
class BoumerieEn(GenericBoumerie):
2768
    """Class to retrieve Boumeries comics in English."""
2769
    name = 'boumeries_en'
2770
    long_name = 'Boumeries (En)'
2771
    url = 'http://comics.boumerie.com'
2772
    date_format = "%B %d, %Y"
2773
    lang = 'en_GB.UTF-8'
2774
2775
2776
class BoumerieFr(GenericBoumerie):
2777
    """Class to retrieve Boumeries comics in French."""
2778
    name = 'boumeries_fr'
2779
    long_name = 'Boumeries (Fr)'
2780
    url = 'http://bd.boumerie.com'
2781
    _categories = ('FRANCAIS', )
2782
    date_format = "%A, %d %B %Y"
2783
    lang = "fr_FR.utf8"
2784
2785
2786 View Code Duplication
class UnearthedComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2787
    """Class to retrieve Unearthed comics."""
2788
    # Also on http://tapastic.com/series/UnearthedComics
2789
    # Also on http://unearthedcomics.tumblr.com
2790
    name = 'unearthed'
2791
    long_name = 'Unearthed Comics'
2792
    url = 'http://unearthedcomics.com'
2793
    _categories = ('UNEARTHED', )
2794
    get_navi_link = get_link_rel_next
2795
    get_first_comic_link = simulate_first_link
2796
    first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'
2797
2798
    @classmethod
2799
    def get_comic_info(cls, soup, link):
2800
        """Get information about a particular comics."""
2801
        short_url = soup.find('link', rel='shortlink')['href']
2802
        title_elt = soup.find('h1') or soup.find('h2')
2803
        title = title_elt.string if title_elt else ""
2804
        desc = soup.find('meta', property='og:description')
2805
        date_str = soup.find('time', class_='published updated hidden')['datetime']
2806
        day = string_to_date(date_str, "%Y-%m-%d")
2807
        post = soup.find('div', class_="entry content entry-content type-portfolio")
2808
        imgs = post.find_all('img')
2809
        return {
2810
            'title': title,
2811
            'description': desc,
2812
            'url2': short_url,
2813
            'img': [i['src'] for i in imgs],
2814
            'month': day.month,
2815
            'year': day.year,
2816
            'day': day.day,
2817
        }
2818
2819
2820
class Optipess(GenericNavigableComic):
2821
    """Class to retrieve Optipess comics."""
2822
    name = 'optipess'
2823
    long_name = 'Optipess'
2824
    url = 'http://www.optipess.com'
2825
    get_first_comic_link = get_a_navi_navifirst
2826
    get_navi_link = get_link_rel_next
2827
2828
    @classmethod
2829
    def get_comic_info(cls, soup, link):
2830
        """Get information about a particular comics."""
2831
        title = soup.find('h2', class_='post-title').string
2832
        author = soup.find("span", class_="post-author").find("a").string
2833
        comic = soup.find('div', id='comic')
2834
        imgs = comic.find_all('img') if comic else []
2835
        alt = imgs[0]['title'] if imgs else ""
2836
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2837
        date_str = soup.find('span', class_='post-date').string
2838
        day = string_to_date(date_str, "%B %d, %Y")
2839
        return {
2840
            'title': title,
2841
            'alt': alt,
2842
            'author': author,
2843
            'img': [i['src'] for i in imgs],
2844
            'month': day.month,
2845
            'year': day.year,
2846
            'day': day.day,
2847
        }
2848
2849
2850
class PainTrainComic(GenericNavigableComic):
2851
    """Class to retrieve Pain Train Comics."""
2852
    name = 'paintrain'
2853
    long_name = 'Pain Train Comics'
2854
    url = 'http://paintraincomic.com'
2855
    get_first_comic_link = get_a_navi_navifirst
2856
    get_navi_link = get_link_rel_next
2857
2858
    @classmethod
2859
    def get_comic_info(cls, soup, link):
2860
        """Get information about a particular comics."""
2861
        title = soup.find('h2', class_='post-title').string
2862
        short_url = soup.find('link', rel='shortlink')['href']
2863
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2864
        num = int(short_url_re.match(short_url).groups()[0])
2865
        imgs = soup.find('div', id='comic').find_all('img')
2866
        alt = imgs[0]['title']
2867
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2868
        date_str = soup.find('span', class_='post-date').string
2869 View Code Duplication
        day = string_to_date(date_str, "%d/%m/%Y")
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2870
        return {
2871
            'short_url': short_url,
2872
            'num': num,
2873
            'img': [i['src'] for i in imgs],
2874
            'month': day.month,
2875
            'year': day.year,
2876
            'day': day.day,
2877
            'alt': alt,
2878
            'title': title,
2879
        }
2880
2881
2882
class MoonBeard(GenericNavigableComic):
2883
    """Class to retrieve MoonBeard comics."""
2884
    # Also on http://blog.squiresjam.es/moonbeard
2885
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
2886
    name = 'moonbeard'
2887
    long_name = 'Moon Beard'
2888
    url = 'http://moonbeard.com'
2889
    get_first_comic_link = get_a_navi_navifirst
2890
    get_navi_link = get_a_navi_navinext
2891
2892
    @classmethod
2893
    def get_comic_info(cls, soup, link):
2894
        """Get information about a particular comics."""
2895
        title = soup.find('h2', class_='post-title').string
2896
        short_url = soup.find('link', rel='shortlink')['href']
2897
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2898
        num = int(short_url_re.match(short_url).groups()[0])
2899
        imgs = soup.find('div', id='comic').find_all('img')
2900
        alt = imgs[0]['title']
2901
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2902
        date_str = soup.find('span', class_='post-date').string
2903
        day = string_to_date(date_str, "%B %d, %Y")
2904
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2905
        author = soup.find('span', class_='post-author').string
2906
        return {
2907
            'short_url': short_url,
2908
            'num': num,
2909
            'img': [i['src'] for i in imgs],
2910
            'month': day.month,
2911
            'year': day.year,
2912
            'day': day.day,
2913
            'title': title,
2914
            'tags': tags,
2915
            'alt': alt,
2916
            'author': author,
2917
        }
2918
2919
2920
class AHamADay(GenericNavigableComic):
2921
    """Class to retrieve class A Ham A Day comics."""
2922
    name = 'ham'
2923
    long_name = 'A Ham A Day'
2924
    url = 'http://www.ahammaday.com'
2925
    get_url_from_link = join_cls_url_to_href
2926
    get_first_comic_link = simulate_first_link
2927
    first_url = 'http://www.ahammaday.com/today/3/6/french'
2928
2929
    @classmethod
2930
    def get_navi_link(cls, last_soup, next_):
2931
        """Get link to next or previous comic."""
2932
        # prev is next / next is prev
2933
        return last_soup.find('li', class_='previous' if next_ else 'next').find('a')
2934
2935
    @classmethod
2936
    def get_comic_info(cls, soup, link):
2937
        """Get information about a particular comics."""
2938
        date_str = soup.find('time', class_='published')['datetime']
2939
        day = string_to_date(date_str, "%Y-%m-%d")
2940
        author = soup.find('span', class_='blog-author').find('a').string
2941
        title = soup.find('meta', property='og:title')['content']
2942
        imgs = soup.find_all('meta', itemprop='image')
2943
        return {
2944
            'img': [i['content'] for i in imgs],
2945
            'title': title,
2946
            'author': author,
2947
            'day': day.day,
2948
            'month': day.month,
2949
            'year': day.year,
2950
        }
2951
2952
2953
class LittleLifeLines(GenericNavigableComic):
2954
    """Class to retrieve Little Life Lines comics."""
2955
    # Also on https://little-life-lines.tumblr.com
2956
    name = 'life'
2957
    long_name = 'Little Life Lines'
2958
    url = 'http://www.littlelifelines.com'
2959
    get_url_from_link = join_cls_url_to_href
2960
    get_first_comic_link = simulate_first_link
2961
    first_url = 'http://www.littlelifelines.com/comics/well-done'
2962
2963
    @classmethod
2964
    def get_navi_link(cls, last_soup, next_):
2965
        """Get link to next or previous comic."""
2966
        # prev is next / next is prev
2967
        li = last_soup.find('li', class_='prev' if next_ else 'next')
2968
        return li.find('a') if li else None
2969
2970
    @classmethod
2971
    def get_comic_info(cls, soup, link):
2972
        """Get information about a particular comics."""
2973
        title = soup.find('meta', property='og:title')['content']
2974
        desc = soup.find('meta', property='og:description')['content']
2975
        date_str = soup.find('time', class_='published')['datetime']
2976
        day = string_to_date(date_str, "%Y-%m-%d")
2977
        author = soup.find('a', rel='author').string
2978
        div_content = soup.find('div', class_="body entry-content")
2979
        imgs = div_content.find_all('img')
2980
        imgs = [i for i in imgs if i.get('src') is not None]
2981
        alt = imgs[0]['alt']
2982
        return {
2983
            'title': title,
2984
            'alt': alt,
2985
            'description': desc,
2986
            'author': author,
2987
            'day': day.day,
2988
            'month': day.month,
2989
            'year': day.year,
2990
            'img': [i['src'] for i in imgs],
2991
        }
2992
2993
2994
class GenericWordPressInkblot(GenericNavigableComic):
2995
    """Generic class to retrieve comics using WordPress with Inkblot."""
2996
    get_navi_link = get_link_rel_next
2997
2998
    @classmethod
2999
    def get_first_comic_link(cls):
3000
        """Get link to first comics."""
3001
        return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')
3002
3003
    @classmethod
3004
    def get_comic_info(cls, soup, link):
3005
        """Get information about a particular comics."""
3006
        title = soup.find('meta', property='og:title')['content']
3007
        imgs = soup.find('div', class_='webcomic-image').find_all('img')
3008
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3009
        day = string_to_date(date_str, "%Y-%m-%d")
3010
        return {
3011
            'title': title,
3012
            'day': day.day,
3013
            'month': day.month,
3014
            'year': day.year,
3015
            'img': [i['src'] for i in imgs],
3016
        }
3017
3018
3019
class EverythingsStupid(GenericWordPressInkblot):
3020
    """Class to retrieve Everything's stupid Comics."""
3021
    # Also on http://tapastic.com/series/EverythingsStupid
3022
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
3023
    # Also on http://everythingsstupidcomics.tumblr.com
3024
    name = 'stupid'
3025
    long_name = "Everything's Stupid"
3026
    url = 'http://everythingsstupid.net'
3027
3028
3029
class TheIsmComics(GenericWordPressInkblot):
3030
    """Class to retrieve The Ism Comics."""
3031
    # Also on https://tapastic.com/series/TheIsm (?)
3032
    name = 'theism'
3033
    long_name = "The Ism"
3034
    url = 'http://www.theism-comics.com'
3035
3036
3037
class WoodenPlankStudios(GenericWordPressInkblot):
3038
    """Class to retrieve Wooden Plank Studios comics."""
3039
    name = 'woodenplank'
3040
    long_name = 'Wooden Plank Studios'
3041
    url = 'http://woodenplankstudios.com'
3042
3043
3044
class ElectricBunnyComic(GenericNavigableComic):
3045
    """Class to retrieve Electric Bunny Comics."""
3046
    # Also on http://electricbunnycomics.tumblr.com
3047
    name = 'bunny'
3048
    long_name = 'Electric Bunny Comic'
3049
    url = 'http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell'
3050
    get_url_from_link = join_cls_url_to_href
3051
3052
    @classmethod
3053
    def get_first_comic_link(cls):
3054
        """Get link to first comics."""
3055
        return get_soup_at_url(cls.url).find('img', alt='First').parent
3056
3057
    @classmethod
3058
    def get_navi_link(cls, last_soup, next_):
3059
        """Get link to next or previous comic."""
3060
        img = last_soup.find('img', alt='Next' if next_ else 'Back')
3061
        return img.parent if img else None
3062
3063
    @classmethod
3064
    def get_comic_info(cls, soup, link):
3065
        """Get information about a particular comics."""
3066
        title = soup.find('meta', property='og:title')['content']
3067
        imgs = soup.find_all('meta', property='og:image')
3068
        return {
3069
            'title': title,
3070
            'img': [i['content'] for i in imgs],
3071
        }
3072
3073
3074
class SheldonComics(GenericNavigableComic):
3075
    """Class to retrieve Sheldon comics."""
3076
    # Also on http://www.gocomics.com/sheldon
3077
    name = 'sheldon'
3078
    long_name = 'Sheldon Comics'
3079
    url = 'http://www.sheldoncomics.com'
3080
3081
    @classmethod
3082
    def get_first_comic_link(cls):
3083
        """Get link to first comics."""
3084
        return get_soup_at_url(cls.url).find("a", id="nav-first")
3085
3086
    @classmethod
3087
    def get_navi_link(cls, last_soup, next_):
3088
        """Get link to next or previous comic."""
3089
        for link in last_soup.find_all("a", id="nav-next" if next_ else "nav-prev"):
3090
            if link['href'] != 'http://www.sheldoncomics.com':
3091
                return link
3092
        return None
3093
3094
    @classmethod
3095
    def get_comic_info(cls, soup, link):
3096
        """Get information about a particular comics."""
3097
        imgs = soup.find("div", id="comic-foot").find_all("img")
3098
        assert all(i['alt'] == i['title'] for i in imgs)
3099
        assert len(imgs) == 1
3100
        title = imgs[0]['title']
3101
        return {
3102
            'title': title,
3103
            'img': [i['src'] for i in imgs],
3104
        }
3105
3106
3107
class Ubertool(GenericNavigableComic):
3108
    """Class to retrieve Ubertool comics."""
3109
    # Also on http://ubertool.tumblr.com
3110
    # Also on https://tapastic.com/series/ubertool
3111
    name = 'ubertool'
3112
    long_name = 'Ubertool'
3113
    url = 'http://ubertoolcomic.com'
3114
    _categories = ('UBERTOOL', )
3115
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
3116
    get_navi_link = get_a_comicnavbase_comicnavnext
3117
3118
    @classmethod
3119
    def get_comic_info(cls, soup, link):
3120
        """Get information about a particular comics."""
3121
        title = soup.find('h2', class_='post-title').string
3122
        date_str = soup.find('span', class_='post-date').string
3123
        day = string_to_date(date_str, "%B %d, %Y")
3124
        imgs = soup.find('div', id='comic').find_all('img')
3125
        return {
3126
            'img': [i['src'] for i in imgs],
3127
            'title': title,
3128
            'month': day.month,
3129
            'year': day.year,
3130
            'day': day.day,
3131
        }
3132
3133
3134 View Code Duplication
class EarthExplodes(GenericNavigableComic):
3135
    """Class to retrieve The Earth Explodes comics."""
3136
    name = 'earthexplodes'
3137
    long_name = 'The Earth Explodes'
3138
    url = 'http://www.earthexplodes.com'
3139
    get_url_from_link = join_cls_url_to_href
3140
    get_first_comic_link = simulate_first_link
3141
    first_url = 'http://www.earthexplodes.com/comics/000/'
3142
3143
    @classmethod
3144
    def get_navi_link(cls, last_soup, next_):
3145
        """Get link to next or previous comic."""
3146
        return last_soup.find('a', id='next' if next_ else 'prev')
3147
3148
    @classmethod
3149
    def get_comic_info(cls, soup, link):
3150
        """Get information about a particular comics."""
3151
        title = soup.find('title').string
3152
        imgs = soup.find('div', id='image').find_all('img')
3153
        alt = imgs[0].get('title', '')
3154
        return {
3155
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3156
            'title': title,
3157
            'alt': alt,
3158
        }
3159
3160
3161 View Code Duplication
class PomComics(GenericNavigableComic):
3162
    """Class to retrieve PomComics."""
3163
    name = 'pom'
3164
    long_name = 'Pom Comics / Piece of Me'
3165
    url = 'http://www.pomcomic.com'
3166
    get_url_from_link = join_cls_url_to_href
3167
3168
    @classmethod
3169
    def get_first_comic_link(cls):
3170
        """Get link to first comics."""
3171
        return get_soup_at_url(cls.url).find('a', class_='btn_first')
3172
3173
    @classmethod
3174
    def get_navi_link(cls, last_soup, next_):
3175
        """Get link to next or previous comic."""
3176
        return last_soup.find('a', class_='btn_next' if next_ else 'btn_prev')
3177
3178
    @classmethod
3179
    def get_comic_info(cls, soup, link):
3180
        """Get information about a particular comics."""
3181
        title = soup.find('h1', id="comic-name").string
3182
        desc = soup.find('meta', property='og:description')['content']
3183
        tags = soup.find('meta', attrs={'name': 'keywords'})['content']
3184
        imgs = soup.find('div', class_='comic').find_all('img')
3185
        return {
3186
            'title': title,
3187
            'desc': desc,
3188
            'tags': tags,
3189
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3190
        }
3191
3192
3193
class CubeDrone(GenericNavigableComic):
3194
    """Class to retrieve Cube Drone comics."""
3195
    name = 'cubedrone'
3196
    long_name = 'Cube Drone'
3197
    url = 'http://cube-drone.com/comics'
3198
    get_url_from_link = join_cls_url_to_href
3199
3200
    @classmethod
3201
    def get_first_comic_link(cls):
3202
        """Get link to first comics."""
3203
        return get_soup_at_url(cls.url).find('span', class_='glyphicon glyphicon-backward').parent
3204
3205
    @classmethod
3206
    def get_navi_link(cls, last_soup, next_):
3207
        """Get link to next or previous comic."""
3208
        class_ = 'glyphicon glyphicon-chevron-' + ('right' if next_ else 'left')
3209
        return last_soup.find('span', class_=class_).parent
3210
3211
    @classmethod
3212
    def get_comic_info(cls, soup, link):
3213
        """Get information about a particular comics."""
3214
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
3215
        url2 = soup.find('meta', attrs={'name': 'twitter:url'})['content']
3216
        # date_str = soup.find('h2', class_='comic_title').find('small').string
3217
        # day = string_to_date(date_str, "%B %d, %Y, %I:%M %p")
3218
        imgs = soup.find_all('img', class_='comic img-responsive')
3219
        title2 = imgs[0]['title']
3220
        alt = imgs[0]['alt']
3221
        return {
3222
            'url2': url2,
3223
            'title': title,
3224
            'title2': title2,
3225
            'alt': alt,
3226
            'img': [i['src'] for i in imgs],
3227
        }
3228
3229
3230
class MakeItStoopid(GenericNavigableComic):
3231
    """Class to retrieve Make It Stoopid Comics."""
3232
    name = 'stoopid'
3233
    long_name = 'Make it stoopid'
3234
    url = 'http://makeitstoopid.com/comic.php'
3235
3236
    @classmethod
3237
    def get_nav(cls, soup):
3238
        """Get the navigation elements from soup object."""
3239
        cnav = soup.find_all(class_='cnav')
3240
        nav1, nav2 = cnav[:5], cnav[5:]
3241
        assert nav1 == nav2
3242
        # begin, prev, archive, next_, end = nav1
3243
        return [None if i.get('href') is None else i for i in nav1]
3244
3245
    @classmethod
3246
    def get_first_comic_link(cls):
3247
        """Get link to first comics."""
3248
        return cls.get_nav(get_soup_at_url(cls.url))[0]
3249
3250
    @classmethod
3251
    def get_navi_link(cls, last_soup, next_):
3252
        """Get link to next or previous comic."""
3253
        return cls.get_nav(last_soup)[3 if next_ else 1]
3254
3255
    @classmethod
3256
    def get_comic_info(cls, soup, link):
3257
        """Get information about a particular comics."""
3258
        title = link['title']
3259
        imgs = soup.find_all('img', id='comicimg')
3260
        return {
3261
            'title': title,
3262
            'img': [i['src'] for i in imgs],
3263
        }
3264
3265
3266 View Code Duplication
class MarketoonistComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3267
    """Class to retrieve Marketoonist Comics."""
3268
    name = 'marketoonist'
3269
    long_name = 'Marketoonist'
3270
    url = 'https://marketoonist.com/cartoons'
3271
    get_first_comic_link = simulate_first_link
3272
    get_navi_link = get_link_rel_next
3273
    first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'
3274
3275
    @classmethod
3276
    def get_comic_info(cls, soup, link):
3277
        """Get information about a particular comics."""
3278
        imgs = soup.find_all('meta', property='og:image')
3279
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3280
        day = string_to_date(date_str, "%Y-%m-%d")
3281
        title = soup.find('meta', property='og:title')['content']
3282
        return {
3283
            'img': [i['content'] for i in imgs],
3284
            'day': day.day,
3285
            'month': day.month,
3286
            'year': day.year,
3287
            'title': title,
3288
        }
3289
3290
3291 View Code Duplication
class ConsoliaComics(GenericNavigableComic):
3292
    """Class to retrieve Consolia comics."""
3293
    name = 'consolia'
3294
    long_name = 'consolia'
3295
    url = 'https://consolia-comic.com'
3296
    get_url_from_link = join_cls_url_to_href
3297
3298
    @classmethod
3299
    def get_first_comic_link(cls):
3300
        """Get link to first comics."""
3301
        return get_soup_at_url(cls.url).find('a', class_='first')
3302
3303
    @classmethod
3304
    def get_navi_link(cls, last_soup, next_):
3305
        """Get link to next or previous comic."""
3306
        return last_soup.find('a', class_='next' if next_ else 'prev')
3307
3308
    @classmethod
3309
    def get_comic_info(cls, soup, link):
3310
        """Get information about a particular comics."""
3311
        title = soup.find('meta', property='og:title')['content']
3312
        date_str = soup.find('time')["datetime"]
3313
        day = string_to_date(date_str, "%Y-%m-%d")
3314
        imgs = soup.find_all('meta', property='og:image')
3315
        return {
3316
            'title': title,
3317
            'img': [i['content'] for i in imgs],
3318
            'day': day.day,
3319
            'month': day.month,
3320
            'year': day.year,
3321
        }
3322
3323
3324 View Code Duplication
class TuMourrasMoinsBete(GenericNavigableComic):
3325
    """Class to retrieve Tu Mourras Moins Bete comics."""
3326
    name = 'mourrasmoinsbete'
3327
    long_name = 'Tu Mourras Moins Bete'
3328
    url = 'http://tumourrasmoinsbete.blogspot.fr'
3329
    _categories = ('FRANCAIS', )
3330
    get_first_comic_link = simulate_first_link
3331
    first_url = 'http://tumourrasmoinsbete.blogspot.fr/2008/06/essai.html'
3332
3333
    @classmethod
3334
    def get_navi_link(cls, last_soup, next_):
3335
        """Get link to next or previous comic."""
3336
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3337
3338
    @classmethod
3339
    def get_comic_info(cls, soup, link):
3340
        """Get information about a particular comics."""
3341
        title = soup.find('title').string
3342
        imgs = soup.find('div', itemprop='description articleBody').find_all('img')
3343
        author = soup.find('span', itemprop='author').string
3344
        return {
3345
            'img': [i['src'] for i in imgs],
3346
            'author': author,
3347
            'title': title,
3348
        }
3349
3350
3351
class GeekAndPoke(GenericNavigableComic):
3352
    """Class to retrieve Geek And Poke comics."""
3353
    name = 'geek'
3354
    long_name = 'Geek And Poke'
3355
    url = 'http://geek-and-poke.com'
3356
    get_url_from_link = join_cls_url_to_href
3357
    get_first_comic_link = simulate_first_link
3358
    first_url = 'http://geek-and-poke.com/geekandpoke/2006/8/27/a-new-place-for-a-not-so-old-blog.html'
3359
3360
    @classmethod
3361
    def get_navi_link(cls, last_soup, next_):
3362
        """Get link to next or previous comic."""
3363
        return last_soup.find('a', class_='prev-item' if next_ else 'next-item')
3364
3365
    @classmethod
3366
    def get_comic_info(cls, soup, link):
3367
        """Get information about a particular comics."""
3368
        title = soup.find('meta', property='og:title')['content']
3369
        desc = soup.find('meta', property='og:description')['content']
3370
        date_str = soup.find('time', class_='published')['datetime']
3371
        day = string_to_date(date_str, "%Y-%m-%d")
3372
        author = soup.find('a', rel='author').string
3373
        div_content = (soup.find('div', class_="body entry-content") or
3374
                       soup.find('div', class_="special-content"))
3375
        imgs = div_content.find_all('img')
3376
        imgs = [i for i in imgs if i.get('src') is not None]
3377
        assert all('title' not in i or i['alt'] == i['title'] for i in imgs)
3378
        alt = imgs[0].get('alt', "") if imgs else []
3379
        return {
3380
            'title': title,
3381
            'alt': alt,
3382
            'description': desc,
3383
            'author': author,
3384
            'day': day.day,
3385
            'month': day.month,
3386
            'year': day.year,
3387
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3388
        }
3389
3390
3391 View Code Duplication
class GloryOwlComix(GenericNavigableComic):
3392
    """Class to retrieve Glory Owl comics."""
3393
    name = 'gloryowl'
3394
    long_name = 'Glory Owl'
3395
    url = 'http://gloryowlcomix.blogspot.fr'
3396
    _categories = ('NSFW', 'FRANCAIS')
3397
    get_first_comic_link = simulate_first_link
3398
    first_url = 'http://gloryowlcomix.blogspot.fr/2013/02/1_7.html'
3399
3400
    @classmethod
3401
    def get_navi_link(cls, last_soup, next_):
3402
        """Get link to next or previous comic."""
3403
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3404
3405
    @classmethod
3406
    def get_comic_info(cls, soup, link):
3407
        """Get information about a particular comics."""
3408
        title = soup.find('title').string
3409
        imgs = soup.find_all('link', rel='image_src')
3410
        author = soup.find('a', rel='author').string
3411
        return {
3412
            'img': [i['href'] for i in imgs],
3413
            'author': author,
3414
            'title': title,
3415
        }
3416
3417
3418
class GenericTumblrV1(GenericComic):
3419
    """Generic class to retrieve comics from Tumblr using the V1 API."""
3420
    _categories = ('TUMBLR', )
3421
3422
    @classmethod
3423
    def get_next_comic(cls, last_comic):
3424
        """Generic implementation of get_next_comic for Tumblr comics."""
3425
        for p in cls.get_posts(last_comic):
3426
            comic = cls.get_comic_info(p)
3427
            if comic is not None:
3428
                yield comic
3429
3430
    @classmethod
3431
    def get_url_from_post(cls, post):
3432
        return post['url']
3433
3434
    @classmethod
3435
    def get_api_url(cls):
3436
        return urljoin_wrapper(cls.url, '/api/read/')
3437
3438
    @classmethod
3439
    def get_comic_info(cls, post):
3440
        """Get information about a particular comics."""
3441
        type_ = post['type']
3442
        if type_ != 'photo':
3443
            return None
3444
        tumblr_id = int(post['id'])
3445
        api_url = cls.get_api_url() + '?id=%d' % (tumblr_id)
3446
        day = datetime.datetime.fromtimestamp(int(post['unix-timestamp'])).date()
3447
        caption = post.find('photo-caption')
3448
        title = caption.string if caption else ""
3449
        tags = ' '.join(t.string for t in post.find_all('tag'))
3450
        # Photos may appear in 'photo' tags and/or straight in the post
3451
        photo_tags = post.find_all('photo')
3452
        if not photo_tags:
3453
            photo_tags = [post]
3454
        # Images are in multiple resolutions - taking the first one
3455
        imgs = [photo.find('photo-url') for photo in photo_tags]
3456
        return {
3457
            'url': cls.get_url_from_post(post),
3458
            'url2': post['url-with-slug'],
3459
            'day': day.day,
3460
            'month': day.month,
3461
            'year': day.year,
3462
            'title': title,
3463
            'tags': tags,
3464
            'img': [i.string for i in imgs],
3465
            'tumblr-id': tumblr_id,
3466
            'api_url': api_url,
3467
        }
3468
3469
    @classmethod
3470
    def get_posts(cls, last_comic, nb_post_per_call=10):
3471
        """Get posts using API. nb_post_per_call is max 50.
3472
3473
        Posts are retrieved from newer to older as per the tumblr v1 api
3474
        but are returned in chronological order."""
3475
        waiting_for_url = last_comic['url'] if last_comic else None
3476
        posts_acc = []
3477
        if last_comic is not None:
3478
            # Sometimes, tumblr posts are deleted. When previous post is deleted, we
3479
            # might end up spending a lot of time looking for something that
3480
            # doesn't exist. Failing early and clearly might be a better option.
3481
            last_api_url = last_comic['api_url']
3482
            try:
3483
                get_soup_at_url(last_api_url)
3484
            except urllib.error.HTTPError:
3485
                try:
3486
                    get_soup_at_url(cls.url)
3487
                except urllib.error.HTTPError:
3488
                    print("Did not find previous post nor main url %s" % cls.url)
3489
                else:
3490
                    print("Did not find previous post %s : it might have been deleted" % last_api_url)
3491
                return reversed(posts_acc)
3492
        api_url = cls.get_api_url()
3493
        posts = get_soup_at_url(api_url).find('posts')
3494
        start, total = int(posts['start']), int(posts['total'])
3495
        assert start == 0
3496
        for starting_num in range(0, total, nb_post_per_call):
3497
            api_url2 = api_url + '?start=%d&num=%d' % (starting_num, nb_post_per_call)
3498
            posts2 = get_soup_at_url(api_url2).find('posts')
3499
            start2, total2 = int(posts2['start']), int(posts2['total'])
3500
            assert starting_num == start2, "%d != %d" % (starting_num, start2)
3501
            # This may happen and should be handled in the future
3502
            assert total == total2, "%d != %d" % (total, total2)
3503
            for p in posts2.find_all('post'):
3504
                if waiting_for_url and waiting_for_url == cls.get_url_from_post(p):
3505
                    return reversed(posts_acc)
3506
                posts_acc.append(p)
3507
        if waiting_for_url is None:
3508
            return reversed(posts_acc)
3509
        print("Did not find %s : there might be a problem" % waiting_for_url)
3510
        return []
3511
3512
3513
class SaturdayMorningBreakfastCerealTumblr(GenericTumblrV1):
3514
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
3515
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
3516
    # Also on http://www.smbc-comics.com
3517
    name = 'smbc-tumblr'
3518
    long_name = 'Saturday Morning Breakfast Cereal (from Tumblr)'
3519
    url = 'http://smbc-comics.tumblr.com'
3520
    _categories = ('SMBC', )
3521
3522
3523
class IrwinCardozo(GenericTumblrV1):
3524
    """Class to retrieve Irwin Cardozo Comics."""
3525
    name = 'irwinc'
3526
    long_name = 'Irwin Cardozo'
3527
    url = 'http://irwincardozocomics.tumblr.com'
3528
3529
3530
class AccordingToDevin(GenericTumblrV1):
3531
    """Class to retrieve According To Devin comics."""
3532
    name = 'devin'
3533
    long_name = 'According To Devin'
3534
    url = 'http://accordingtodevin.tumblr.com'
3535
3536
3537
class ItsTheTieTumblr(GenericTumblrV1):
3538
    """Class to retrieve It's the tie comics."""
3539
    # Also on http://itsthetie.com
3540
    # Also on https://tapastic.com/series/itsthetie
3541
    name = 'tie-tumblr'
3542
    long_name = "It's the tie (from Tumblr)"
3543
    url = "http://itsthetie.tumblr.com"
3544
    _categories = ('TIE', )
3545
3546
3547
class OctopunsTumblr(GenericTumblrV1):
3548
    """Class to retrieve Octopuns comics."""
3549
    # Also on http://www.octopuns.net
3550
    name = 'octopuns-tumblr'
3551
    long_name = 'Octopuns (from Tumblr)'
3552
    url = 'http://octopuns.tumblr.com'
3553
3554
3555
class PicturesInBoxesTumblr(GenericTumblrV1):
3556
    """Class to retrieve Pictures In Boxes comics."""
3557
    # Also on http://www.picturesinboxes.com
3558
    name = 'picturesinboxes-tumblr'
3559
    long_name = 'Pictures in Boxes (from Tumblr)'
3560
    url = 'http://picturesinboxescomic.tumblr.com'
3561
3562
3563
class TubeyToonsTumblr(GenericTumblrV1):
3564
    """Class to retrieve TubeyToons comics."""
3565
    # Also on http://tapastic.com/series/Tubey-Toons
3566
    # Also on http://tubeytoons.com
3567
    name = 'tubeytoons-tumblr'
3568
    long_name = 'Tubey Toons (from Tumblr)'
3569
    url = 'http://tubeytoons.tumblr.com'
3570
    _categories = ('TUNEYTOONS', )
3571
3572
3573
class UnearthedComicsTumblr(GenericTumblrV1):
3574
    """Class to retrieve Unearthed comics."""
3575
    # Also on http://tapastic.com/series/UnearthedComics
3576
    # Also on http://unearthedcomics.com
3577
    name = 'unearthed-tumblr'
3578
    long_name = 'Unearthed Comics (from Tumblr)'
3579
    url = 'http://unearthedcomics.tumblr.com'
3580
    _categories = ('UNEARTHED', )
3581
3582
3583
class PieComic(GenericTumblrV1):
3584
    """Class to retrieve Pie Comic comics."""
3585
    name = 'pie'
3586
    long_name = 'Pie Comic'
3587
    url = "http://piecomic.tumblr.com"
3588
3589
3590
class MrEthanDiamond(GenericTumblrV1):
3591
    """Class to retrieve Mr Ethan Diamond comics."""
3592
    name = 'diamond'
3593
    long_name = 'Mr Ethan Diamond'
3594
    url = 'http://mrethandiamond.tumblr.com'
3595
3596
3597
class Flocci(GenericTumblrV1):
3598
    """Class to retrieve floccinaucinihilipilification comics."""
3599
    name = 'flocci'
3600
    long_name = 'floccinaucinihilipilification'
3601
    url = "http://floccinaucinihilipilificationa.tumblr.com"
3602
3603
3604
class UpAndOut(GenericTumblrV1):
3605
    """Class to retrieve Up & Out comics."""
3606
    # Also on http://tapastic.com/series/UP-and-OUT
3607
    name = 'upandout'
3608
    long_name = 'Up And Out (from Tumblr)'
3609
    url = 'http://upandoutcomic.tumblr.com'
3610
3611
3612
class Pundemonium(GenericTumblrV1):
3613
    """Class to retrieve Pundemonium comics."""
3614
    name = 'pundemonium'
3615
    long_name = 'Pundemonium'
3616
    url = 'http://monstika.tumblr.com'
3617
3618
3619
class PoorlyDrawnLinesTumblr(GenericTumblrV1):
3620
    """Class to retrieve Poorly Drawn Lines comics."""
3621
    # Also on http://poorlydrawnlines.com
3622
    name = 'poorlydrawn-tumblr'
3623
    long_name = 'Poorly Drawn Lines (from Tumblr)'
3624
    url = 'http://pdlcomics.tumblr.com'
3625
    _categories = ('POORLYDRAWN', )
3626
3627
3628
class PearShapedComics(GenericTumblrV1):
3629
    """Class to retrieve Pear Shaped Comics."""
3630
    name = 'pearshaped'
3631
    long_name = 'Pear-Shaped Comics'
3632
    url = 'http://pearshapedcomics.com'
3633
3634
3635
class PondScumComics(GenericTumblrV1):
3636
    """Class to retrieve Pond Scum Comics."""
3637
    name = 'pond'
3638
    long_name = 'Pond Scum'
3639
    url = 'http://pondscumcomic.tumblr.com'
3640
3641
3642
class MercworksTumblr(GenericTumblrV1):
3643
    """Class to retrieve Mercworks comics."""
3644
    # Also on http://mercworks.net
3645
    name = 'mercworks-tumblr'
3646
    long_name = 'Mercworks (from Tumblr)'
3647
    url = 'http://mercworks.tumblr.com'
3648
3649
3650
class OwlTurdTumblr(GenericTumblrV1):
3651
    """Class to retrieve Owl Turd comics."""
3652
    # Also on http://tapastic.com/series/Owl-Turd-Comix
3653
    name = 'owlturd-tumblr'
3654
    long_name = 'Owl Turd (from Tumblr)'
3655
    url = 'http://owlturd.com'
3656
    _categories = ('OWLTURD', )
3657
3658
3659
class VectorBelly(GenericTumblrV1):
3660
    """Class to retrieve Vector Belly comics."""
3661
    # Also on http://vectorbelly.com
3662
    name = 'vector'
3663
    long_name = 'Vector Belly'
3664
    url = 'http://vectorbelly.tumblr.com'
3665
3666
3667
class GoneIntoRapture(GenericTumblrV1):
3668
    """Class to retrieve Gone Into Rapture comics."""
3669
    # Also on http://goneintorapture.tumblr.com
3670
    # Also on http://tapastic.com/series/Goneintorapture
3671
    name = 'rapture'
3672
    long_name = 'Gone Into Rapture'
3673
    url = 'http://www.goneintorapture.com'
3674
3675
3676
class TheOatmealTumblr(GenericTumblrV1):
3677
    """Class to retrieve The Oatmeal comics."""
3678
    # Also on http://theoatmeal.com
3679
    name = 'oatmeal-tumblr'
3680
    long_name = 'The Oatmeal (from Tumblr)'
3681
    url = 'http://oatmeal.tumblr.com'
3682
3683
3684
class HeckIfIKnowComicsTumblr(GenericTumblrV1):
3685
    """Class to retrieve Heck If I Know Comics."""
3686
    # Also on http://tapastic.com/series/Regular
3687
    name = 'heck-tumblr'
3688
    long_name = 'Heck if I Know comics (from Tumblr)'
3689
    url = 'http://heckifiknowcomics.com'
3690
3691
3692
class MyJetPack(GenericTumblrV1):
3693
    """Class to retrieve My Jet Pack comics."""
3694
    name = 'jetpack'
3695
    long_name = 'My Jet Pack'
3696
    url = 'http://myjetpack.tumblr.com'
3697
3698
3699
class CheerUpEmoKidTumblr(GenericTumblrV1):
3700
    """Class to retrieve CheerUpEmoKid comics."""
3701
    # Also on http://www.cheerupemokid.com
3702
    # Also on http://tapastic.com/series/CUEK
3703
    name = 'cuek-tumblr'
3704
    long_name = 'Cheer Up Emo Kid (from Tumblr)'
3705
    url = 'http://enzocomics.tumblr.com'
3706
3707
3708
class ForLackOfABetterComic(GenericTumblrV1):
3709
    """Class to retrieve For Lack Of A Better Comics."""
3710
    # Also on http://forlackofabettercomic.com
3711
    name = 'lack'
3712
    long_name = 'For Lack Of A Better Comic'
3713
    url = 'http://forlackofabettercomic.tumblr.com'
3714
3715
3716
class ZenPencilsTumblr(GenericTumblrV1):
3717
    """Class to retrieve ZenPencils comics."""
3718
    # Also on http://zenpencils.com
3719
    # Also on http://www.gocomics.com/zen-pencils
3720
    name = 'zenpencils-tumblr'
3721
    long_name = 'Zen Pencils (from Tumblr)'
3722
    url = 'http://zenpencils.tumblr.com'
3723
    _categories = ('ZENPENCILS', )
3724
3725
3726
class ThreeWordPhraseTumblr(GenericTumblrV1):
3727
    """Class to retrieve Three Word Phrase comics."""
3728
    # Also on http://threewordphrase.com
3729
    name = 'threeword-tumblr'
3730
    long_name = 'Three Word Phrase (from Tumblr)'
3731
    url = 'http://www.threewordphrase.tumblr.com'
3732
3733
3734
class TimeTrabbleTumblr(GenericTumblrV1):
3735
    """Class to retrieve Time Trabble comics."""
3736
    # Also on http://timetrabble.com
3737
    name = 'timetrabble-tumblr'
3738
    long_name = 'Time Trabble (from Tumblr)'
3739
    url = 'http://timetrabble.tumblr.com'
3740
3741
3742
class SafelyEndangeredTumblr(GenericTumblrV1):
3743
    """Class to retrieve Safely Endangered comics."""
3744
    # Also on http://www.safelyendangered.com
3745
    name = 'endangered-tumblr'
3746
    long_name = 'Safely Endangered (from Tumblr)'
3747
    url = 'http://tumblr.safelyendangered.com'
3748
3749
3750
class MouseBearComedyTumblr(GenericTumblrV1):
3751
    """Class to retrieve Mouse Bear Comedy comics."""
3752
    # Also on http://www.mousebearcomedy.com
3753
    name = 'mousebear-tumblr'
3754
    long_name = 'Mouse Bear Comedy (from Tumblr)'
3755
    url = 'http://mousebearcomedy.tumblr.com'
3756
3757
3758
class BouletCorpTumblr(GenericTumblrV1):
3759
    """Class to retrieve BouletCorp comics."""
3760
    # Also on http://www.bouletcorp.com
3761
    name = 'boulet-tumblr'
3762
    long_name = 'Boulet Corp (from Tumblr)'
3763
    url = 'http://bouletcorp.tumblr.com'
3764
    _categories = ('BOULET', )
3765
3766
3767
class TheAwkwardYetiTumblr(GenericTumblrV1):
3768
    """Class to retrieve The Awkward Yeti comics."""
3769
    # Also on http://www.gocomics.com/the-awkward-yeti
3770
    # Also on http://theawkwardyeti.com
3771
    # Also on https://tapastic.com/series/TheAwkwardYeti
3772
    name = 'yeti-tumblr'
3773
    long_name = 'The Awkward Yeti (from Tumblr)'
3774
    url = 'http://larstheyeti.tumblr.com'
3775
    _categories = ('YETI', )
3776
3777
3778
class NellucNhoj(GenericTumblrV1):
3779
    """Class to retrieve NellucNhoj comics."""
3780
    name = 'nhoj'
3781
    long_name = 'Nelluc Nhoj'
3782
    url = 'http://nellucnhoj.com'
3783
3784
3785
class DownTheUpwardSpiralTumblr(GenericTumblrV1):
3786
    """Class to retrieve Down The Upward Spiral comics."""
3787
    # Also on http://www.downtheupwardspiral.com
3788
    name = 'spiral-tumblr'
3789
    long_name = 'Down the Upward Spiral (from Tumblr)'
3790
    url = 'http://downtheupwardspiral.tumblr.com'
3791
3792
3793
class AsPerUsualTumblr(GenericTumblrV1):
3794
    """Class to retrieve As Per Usual comics."""
3795
    # Also on https://tapastic.com/series/AsPerUsual
3796
    name = 'usual-tumblr'
3797
    long_name = 'As Per Usual (from Tumblr)'
3798
    url = 'http://as-per-usual.tumblr.com'
3799
    categories = ('DAMILEE', )
3800
3801
3802
class HotComicsForCoolPeopleTumblr(GenericTumblrV1):
3803
    """Class to retrieve Hot Comics For Cool People."""
3804
    # Also on https://tapastic.com/series/Hot-Comics-For-Cool-People
3805
    # Also on http://hotcomics.biz (links to tumblr)
3806
    # Also on http://hcfcp.com (links to tumblr)
3807
    name = 'hotcomics-tumblr'
3808
    long_name = 'Hot Comics For Cool People (from Tumblr)'
3809
    url = 'http://hotcomicsforcoolpeople.tumblr.com'
3810
    categories = ('DAMILEE', )
3811
3812
3813
class OneOneOneOneComicTumblr(GenericTumblrV1):
3814
    """Class to retrieve 1111 Comics."""
3815
    # Also on http://www.1111comics.me
3816
    # Also on https://tapastic.com/series/1111-Comics
3817
    name = '1111-tumblr'
3818
    long_name = '1111 Comics (from Tumblr)'
3819
    url = 'http://comics1111.tumblr.com'
3820
    _categories = ('ONEONEONEONE', )
3821
3822
3823
class JhallComicsTumblr(GenericTumblrV1):
3824
    """Class to retrieve Jhall Comics."""
3825
    # Also on http://jhallcomics.com
3826
    name = 'jhall-tumblr'
3827
    long_name = 'Jhall Comics (from Tumblr)'
3828
    url = 'http://jhallcomics.tumblr.com'
3829
3830
3831
class BerkeleyMewsTumblr(GenericTumblrV1):
3832
    """Class to retrieve Berkeley Mews comics."""
3833
    # Also on http://www.gocomics.com/berkeley-mews
3834
    # Also on http://www.berkeleymews.com
3835
    name = 'berkeley-tumblr'
3836
    long_name = 'Berkeley Mews (from Tumblr)'
3837
    url = 'http://mews.tumblr.com'
3838
    _categories = ('BERKELEY', )
3839
3840
3841
class JoanCornellaTumblr(GenericTumblrV1):
3842
    """Class to retrieve Joan Cornella comics."""
3843
    # Also on http://joancornella.net
3844
    name = 'cornella-tumblr'
3845
    long_name = 'Joan Cornella (from Tumblr)'
3846
    url = 'http://cornellajoan.tumblr.com'
3847
3848
3849
class RespawnComicTumblr(GenericTumblrV1):
3850
    """Class to retrieve Respawn Comic."""
3851
    # Also on http://respawncomic.com
3852
    name = 'respawn-tumblr'
3853
    long_name = 'Respawn Comic (from Tumblr)'
3854
    url = 'http://respawncomic.tumblr.com'
3855
3856
3857
class ChrisHallbeckTumblr(GenericTumblrV1):
3858
    """Class to retrieve Chris Hallbeck comics."""
3859
    # Also on https://tapastic.com/ChrisHallbeck
3860
    # Also on http://maximumble.com
3861
    # Also on http://minimumble.com
3862
    # Also on http://thebookofbiff.com
3863
    name = 'hallbeck-tumblr'
3864
    long_name = 'Chris Hallback (from Tumblr)'
3865
    url = 'http://chrishallbeck.tumblr.com'
3866
    _categories = ('HALLBACK', )
3867
3868
3869
class ComicNuggets(GenericTumblrV1):
3870
    """Class to retrieve Comic Nuggets."""
3871
    name = 'nuggets'
3872
    long_name = 'Comic Nuggets'
3873
    url = 'http://comicnuggets.com'
3874
3875
3876
class PigeonGazetteTumblr(GenericTumblrV1):
3877
    """Class to retrieve The Pigeon Gazette comics."""
3878
    # Also on https://tapastic.com/series/The-Pigeon-Gazette
3879
    name = 'pigeon-tumblr'
3880
    long_name = 'The Pigeon Gazette (from Tumblr)'
3881
    url = 'http://thepigeongazette.tumblr.com'
3882
3883
3884
class CancerOwl(GenericTumblrV1):
3885
    """Class to retrieve Cancer Owl comics."""
3886
    # Also on http://cancerowl.com
3887
    name = 'cancerowl-tumblr'
3888
    long_name = 'Cancer Owl (from Tumblr)'
3889
    url = 'http://cancerowl.tumblr.com'
3890
3891
3892
class FowlLanguageTumblr(GenericTumblrV1):
3893
    """Class to retrieve Fowl Language comics."""
3894
    # Also on http://www.fowllanguagecomics.com
3895
    # Also on http://tapastic.com/series/Fowl-Language-Comics
3896
    # Also on http://www.gocomics.com/fowl-language
3897
    name = 'fowllanguage-tumblr'
3898
    long_name = 'Fowl Language Comics (from Tumblr)'
3899
    url = 'http://fowllanguagecomics.tumblr.com'
3900
    _categories = ('FOWLLANGUAGE', )
3901
3902
3903
class TheOdd1sOutTumblr(GenericTumblrV1):
3904
    """Class to retrieve The Odd 1s Out comics."""
3905
    # Also on http://theodd1sout.com
3906
    # Also on https://tapastic.com/series/Theodd1sout
3907
    name = 'theodd-tumblr'
3908
    long_name = 'The Odd 1s Out (from Tumblr)'
3909
    url = 'http://theodd1sout.tumblr.com'
3910
3911
3912
class TheUnderfoldTumblr(GenericTumblrV1):
3913
    """Class to retrieve The Underfold comics."""
3914
    # Also on http://theunderfold.com
3915
    name = 'underfold-tumblr'
3916
    long_name = 'The Underfold (from Tumblr)'
3917
    url = 'http://theunderfold.tumblr.com'
3918
3919
3920
class LolNeinTumblr(GenericTumblrV1):
3921
    """Class to retrieve Lol Nein comics."""
3922
    # Also on http://lolnein.com
3923
    name = 'lolnein-tumblr'
3924
    long_name = 'Lol Nein (from Tumblr)'
3925
    url = 'http://lolneincom.tumblr.com'
3926
3927
3928
class FatAwesomeComicsTumblr(GenericTumblrV1):
3929
    """Class to retrieve Fat Awesome Comics."""
3930
    # Also on http://fatawesome.com/comics
3931
    name = 'fatawesome-tumblr'
3932
    long_name = 'Fat Awesome (from Tumblr)'
3933
    url = 'http://fatawesomecomedy.tumblr.com'
3934
3935
3936
class TheWorldIsFlatTumblr(GenericTumblrV1):
3937
    """Class to retrieve The World Is Flat Comics."""
3938
    # Also on https://tapastic.com/series/The-World-is-Flat
3939
    name = 'flatworld-tumblr'
3940
    long_name = 'The World Is Flat (from Tumblr)'
3941
    url = 'http://theworldisflatcomics.tumblr.com'
3942
3943
3944
class DorrisMc(GenericTumblrV1):
3945
    """Class to retrieve Dorris Mc Comics"""
3946
    # Also on http://www.gocomics.com/dorris-mccomics
3947
    name = 'dorrismc'
3948
    long_name = 'Dorris Mc'
3949
    url = 'http://dorrismccomics.com'
3950
3951
3952
class LeleozTumblr(GenericEmptyComic, GenericTumblrV1):
3953
    """Class to retrieve Leleoz comics."""
3954
    # Also on https://tapastic.com/series/Leleoz
3955
    name = 'leleoz-tumblr'
3956
    long_name = 'Leleoz (from Tumblr)'
3957
    url = 'http://leleozcomics.tumblr.com'
3958
3959
3960
class MoonBeardTumblr(GenericTumblrV1):
3961
    """Class to retrieve MoonBeard comics."""
3962
    # Also on http://moonbeard.com
3963
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
3964
    name = 'moonbeard-tumblr'
3965
    long_name = 'Moon Beard (from Tumblr)'
3966
    url = 'http://blog.squiresjam.es/moonbeard'
3967
3968
3969
class AComik(GenericTumblrV1):
3970
    """Class to retrieve A Comik"""
3971
    name = 'comik'
3972
    long_name = 'A Comik'
3973
    url = 'http://acomik.com'
3974
3975
3976
class ClassicRandy(GenericTumblrV1):
3977
    """Class to retrieve Classic Randy comics."""
3978
    name = 'randy'
3979
    long_name = 'Classic Randy'
3980
    url = 'http://classicrandy.tumblr.com'
3981
3982
3983
class DagssonTumblr(GenericTumblrV1):
3984
    """Class to retrieve Dagsson comics."""
3985
    # Also on http://www.dagsson.com
3986
    name = 'dagsson-tumblr'
3987
    long_name = 'Dagsson Hugleikur (from Tumblr)'
3988
    url = 'http://hugleikurdagsson.tumblr.com'
3989
3990
3991
class LinsEditionsTumblr(GenericTumblrV1):
3992
    """Class to retrieve L.I.N.S. Editions comics."""
3993
    # Also on https://linsedition.com
3994
    # Now on http://warandpeas.tumblr.com
3995
    name = 'lins-tumblr'
3996
    long_name = 'L.I.N.S. Editions (from Tumblr)'
3997
    url = 'http://linscomics.tumblr.com'
3998
    _categories = ('LINS', )
3999
4000
4001
class WarAndPeasTumblr(GenericTumblrV1):
4002
    """Class to retrieve War And Peas comics."""
4003
    # Was on http://linscomics.tumblr.com
4004
    name = 'warandpeas-tumblr'
4005
    long_name = 'War And Peas (from Tumblr)'
4006
    url = 'http://warandpeas.tumblr.com'
4007
    _categories = ('WARANDPEAS', )
4008
4009
4010
class OrigamiHotDish(GenericTumblrV1):
4011
    """Class to retrieve Origami Hot Dish comics."""
4012
    name = 'origamihotdish'
4013
    long_name = 'Origami Hot Dish'
4014
    url = 'http://origamihotdish.com'
4015
4016
4017
class HitAndMissComicsTumblr(GenericTumblrV1):
4018
    """Class to retrieve Hit and Miss Comics."""
4019
    name = 'hitandmiss'
4020
    long_name = 'Hit and Miss Comics'
4021
    url = 'http://hitandmisscomics.tumblr.com'
4022
4023
4024
class HMBlanc(GenericTumblrV1):
4025
    """Class to retrieve HM Blanc comics."""
4026
    name = 'hmblanc'
4027
    long_name = 'HM Blanc'
4028
    url = 'http://hmblanc.tumblr.com'
4029
4030
4031
class TalesOfAbsurdityTumblr(GenericTumblrV1):
4032
    """Class to retrieve Tales Of Absurdity comics."""
4033
    # Also on http://talesofabsurdity.com
4034
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
4035
    name = 'absurdity-tumblr'
4036
    long_name = 'Tales of Absurdity (from Tumblr)'
4037
    url = 'http://talesofabsurdity.tumblr.com'
4038
    _categories = ('ABSURDITY', )
4039
4040
4041
class RobbieAndBobby(GenericTumblrV1):
4042
    """Class to retrieve Robbie And Bobby comics."""
4043
    # Also on http://robbieandbobby.com
4044
    name = 'robbie-tumblr'
4045
    long_name = 'Robbie And Bobby (from Tumblr)'
4046
    url = 'http://robbieandbobby.tumblr.com'
4047
4048
4049
class ElectricBunnyComicTumblr(GenericTumblrV1):
4050
    """Class to retrieve Electric Bunny Comics."""
4051
    # Also on http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell
4052
    name = 'bunny-tumblr'
4053
    long_name = 'Electric Bunny Comic (from Tumblr)'
4054
    url = 'http://electricbunnycomics.tumblr.com'
4055
4056
4057
class Hoomph(GenericTumblrV1):
4058
    """Class to retrieve Hoomph comics."""
4059
    name = 'hoomph'
4060
    long_name = 'Hoomph'
4061
    url = 'http://hoom.ph'
4062
4063
4064
class BFGFSTumblr(GenericTumblrV1):
4065
    """Class to retrieve BFGFS comics."""
4066
    # Also on https://tapastic.com/series/BFGFS
4067
    # Also on http://bfgfs.com
4068
    name = 'bfgfs-tumblr'
4069
    long_name = 'BFGFS (from Tumblr)'
4070
    url = 'http://bfgfs.tumblr.com'
4071
4072
4073
class DoodleForFood(GenericTumblrV1):
4074
    """Class to retrieve Doodle For Food comics."""
4075
    # Also on http://doodleforfood.com
4076
    name = 'doodle'
4077
    long_name = 'Doodle For Food'
4078
    url = 'http://doodleforfood.com'
4079
4080
4081
class CassandraCalinTumblr(GenericTumblrV1):
4082
    """Class to retrieve C. Cassandra comics."""
4083
    # Also on http://cassandracalin.com
4084
    # Also on https://tapastic.com/series/C-Cassandra-comics
4085
    name = 'cassandra-tumblr'
4086
    long_name = 'Cassandra Calin (from Tumblr)'
4087
    url = 'http://c-cassandra.tumblr.com'
4088
4089
4090
class DougWasTaken(GenericTumblrV1):
4091
    """Class to retrieve Doug Was Taken comics."""
4092
    name = 'doug'
4093
    long_name = 'Doug Was Taken'
4094
    url = 'http://dougwastaken.tumblr.com'
4095
4096
4097
class MandatoryRollerCoaster(GenericTumblrV1):
4098
    """Class to retrieve Mandatory Roller Coaster comics."""
4099
    name = 'rollercoaster'
4100
    long_name = 'Mandatory Roller Coaster'
4101
    url = 'http://mandatoryrollercoaster.com'
4102
4103
4104
class CEstPasEnRegardantSesPompes(GenericTumblrV1):
4105
    """Class to retrieve C'Est Pas En Regardant Ses Pompes (...)  comics."""
4106
    name = 'cperspqccltt'
4107
    long_name = 'C Est Pas En Regardant Ses Pompes (...)'
4108
    url = 'http://cperspqccltt.tumblr.com'
4109
4110
4111
class TheGrohlTroll(GenericTumblrV1):
4112
    """Class to retrieve The Grohl Troll comics."""
4113
    name = 'grohltroll'
4114
    long_name = 'The Grohl Troll'
4115
    url = 'http://thegrohltroll.com'
4116
4117
4118
class WebcomicName(GenericTumblrV1):
4119
    """Class to retrieve Webcomic Name comics."""
4120
    name = 'webcomicname'
4121
    long_name = 'Webcomic Name'
4122
    url = 'http://webcomicname.com'
4123
4124
4125
class BooksOfAdam(GenericTumblrV1):
4126
    """Class to retrieve Books of Adam comics."""
4127
    # Also on http://www.booksofadam.com
4128
    name = 'booksofadam'
4129
    long_name = 'Books of Adam'
4130
    url = 'http://booksofadam.tumblr.com'
4131
4132
4133
class HarkAVagrant(GenericTumblrV1):
4134
    """Class to retrieve Hark A Vagrant comics."""
4135
    # Also on http://www.harkavagrant.com
4136
    name = 'hark-tumblr'
4137
    long_name = 'Hark A Vagrant (from Tumblr)'
4138
    url = 'http://beatonna.tumblr.com'
4139
4140
4141
class OurSuperAdventureTumblr(GenericTumblrV1):
4142
    """Class to retrieve Our Super Adventure comics."""
4143
    # Also on https://tapastic.com/series/Our-Super-Adventure
4144
    # Also on http://www.oursuperadventure.com
4145
    # http://sarahgraley.com
4146
    name = 'superadventure-tumblr'
4147
    long_name = 'Our Super Adventure (from Tumblr)'
4148
    url = 'http://sarahssketchbook.tumblr.com'
4149
4150
4151
class JakeLikesOnions(GenericTumblrV1):
4152
    """Class to retrieve Jake Likes Onions comics."""
4153
    name = 'jake'
4154
    long_name = 'Jake Likes Onions'
4155
    url = 'http://jakelikesonions.com'
4156
4157
4158
class InYourFaceCake(GenericTumblrV1):
4159
    """Class to retrieve In Your Face Cake comics."""
4160
    name = 'inyourfacecake-tumblr'
4161
    long_name = 'In Your Face Cake (from Tumblr)'
4162
    url = 'http://in-your-face-cake.tumblr.com'
4163
4164
4165
class Robospunk(GenericTumblrV1):
4166
    """Class to retrieve Robospunk comics."""
4167
    name = 'robospunk'
4168
    long_name = 'Robospunk'
4169
    url = 'http://robospunk.com'
4170
4171
4172
class BananaTwinky(GenericTumblrV1):
4173
    """Class to retrieve Banana Twinky comics."""
4174
    name = 'banana'
4175
    long_name = 'Banana Twinky'
4176
    url = 'http://bananatwinky.tumblr.com'
4177
4178
4179
class YesterdaysPopcornTumblr(GenericTumblrV1):
4180
    """Class to retrieve Yesterday's Popcorn comics."""
4181
    # Also on http://www.yesterdayspopcorn.com
4182
    # Also on https://tapastic.com/series/Yesterdays-Popcorn
4183
    name = 'popcorn-tumblr'
4184
    long_name = 'Yesterday\'s Popcorn (from Tumblr)'
4185
    url = 'http://yesterdayspopcorn.tumblr.com'
4186
4187
4188
class TwistedDoodles(GenericTumblrV1):
4189
    """Class to retrieve Twisted Doodles comics."""
4190
    name = 'twisted'
4191
    long_name = 'Twisted Doodles'
4192
    url = 'http://www.twisteddoodles.com'
4193
4194
4195
class UbertoolTumblr(GenericTumblrV1):
4196
    """Class to retrieve Ubertool comics."""
4197
    # Also on http://ubertoolcomic.com
4198
    # Also on https://tapastic.com/series/ubertool
4199
    name = 'ubertool-tumblr'
4200
    long_name = 'Ubertool (from Tumblr)'
4201
    url = 'http://ubertool.tumblr.com'
4202
    _categories = ('UBERTOOL', )
4203
4204
4205
class LittleLifeLinesTumblr(GenericTumblrV1):
4206
    """Class to retrieve Little Life Lines comics."""
4207
    # Also on http://www.littlelifelines.com
4208
    name = 'life-tumblr'
4209
    long_name = 'Little Life Lines (from Tumblr)'
4210
    url = 'https://little-life-lines.tumblr.com'
4211
4212
4213
class TheyCanTalk(GenericTumblrV1):
4214
    """Class to retrieve They Can Talk comics."""
4215
    name = 'theycantalk'
4216
    long_name = 'They Can Talk'
4217
    url = 'http://theycantalk.com'
4218
4219
4220
class Will5NeverCome(GenericTumblrV1):
4221
    """Class to retrieve Will 5:00 Never Come comics."""
4222
    name = 'will5'
4223
    long_name = 'Will 5:00 Never Come ?'
4224
    url = 'http://will5nevercome.com'
4225
4226
4227
class Sephko(GenericTumblrV1):
4228
    """Class to retrieve Sephko Comics."""
4229
    # Also on http://www.sephko.com
4230
    name = 'sephko'
4231
    long_name = 'Sephko'
4232
    url = 'http://sephko.tumblr.com'
4233
4234
4235
class BlazersAtDawn(GenericTumblrV1):
4236
    """Class to retrieve Blazers At Dawn Comics."""
4237
    name = 'blazers'
4238
    long_name = 'Blazers At Dawn'
4239
    url = 'http://blazersatdawn.tumblr.com'
4240
4241
4242
class ArtByMoga(GenericEmptyComic, GenericTumblrV1):  # Deactivated because it downloads too many things
4243
    """Class to retrieve Art By Moga Comics."""
4244
    name = 'moga'
4245
    long_name = 'Art By Moga'
4246
    url = 'http://artbymoga.tumblr.com'
4247
4248
4249
class VerbalVomitTumblr(GenericTumblrV1):
4250
    """Class to retrieve Verbal Vomit comics."""
4251
    # Also on http://www.verbal-vomit.com
4252
    name = 'vomit-tumblr'
4253
    long_name = 'Verbal Vomit (from Tumblr)'
4254
    url = 'http://verbalvomits.tumblr.com'
4255
4256
4257
class LibraryComic(GenericTumblrV1):
4258
    """Class to retrieve LibraryComic."""
4259
    # Also on http://librarycomic.com
4260
    name = 'library-tumblr'
4261
    long_name = 'LibraryComic (from Tumblr)'
4262
    url = 'http://librarycomic.tumblr.com'
4263
4264
4265
class TizzyStitchBirdTumblr(GenericTumblrV1):
4266
    """Class to retrieve Tizzy Stitch Bird comics."""
4267
    # Also on http://tizzystitchbird.com
4268
    # Also on https://tapastic.com/series/TizzyStitchbird
4269
    # Also on http://www.webtoons.com/en/challenge/tizzy-stitchbird/list?title_no=50082
4270
    name = 'tizzy-tumblr'
4271
    long_name = 'Tizzy Stitch Bird (from Tumblr)'
4272
    url = 'http://tizzystitchbird.tumblr.com'
4273
4274
4275
class VictimsOfCircumsolarTumblr(GenericTumblrV1):
4276
    """Class to retrieve VictimsOfCircumsolar comics."""
4277
    # Also on http://www.victimsofcircumsolar.com
4278
    name = 'circumsolar-tumblr'
4279
    long_name = 'Victims Of Circumsolar (from Tumblr)'
4280
    url = 'http://victimsofcomics.tumblr.com'
4281
4282
4283
class RockPaperCynicTumblr(GenericTumblrV1):
4284
    """Class to retrieve RockPaperCynic comics."""
4285
    # Also on http://www.rockpapercynic.com
4286
    # Also on https://tapastic.com/series/rockpapercynic
4287
    name = 'rpc-tumblr'
4288
    long_name = 'Rock Paper Cynic (from Tumblr)'
4289
    url = 'http://rockpapercynic.tumblr.com'
4290
4291
4292
class CatanaComics(GenericTumblrV1):
4293
    """Class to retrieve Catana comics."""
4294
    name = 'catana'
4295
    long_name = 'Catana'
4296
    url = 'http://www.catanacomics.com'
4297
4298
4299
class OffTheLeashDogTumblr(GenericTumblrV1):
4300
    """Class to retrieve Off The Leash Dog comics."""
4301
    # Also on http://offtheleashdogcartoons.com
4302
    # Also on http://www.rupertfawcettcartoons.com
4303
    name = 'offtheleash-tumblr'
4304
    long_name = 'Off The Leash Dog (from Tumblr)'
4305
    url = 'http://rupertfawcettsdoggyblog.tumblr.com'
4306
    _categories = ('FAWCETT', )
4307
4308
4309
class HorovitzComics(GenericListableComic):
4310
    """Generic class to handle the logic common to the different comics from Horovitz."""
4311
    url = 'http://www.horovitzcomics.com'
4312
    _categories = ('HOROVITZ', )
4313
    img_re = re.compile('.*comics/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
4314
    link_re = NotImplemented
4315
    get_url_from_archive_element = join_cls_url_to_href
4316
4317
    @classmethod
4318
    def get_comic_info(cls, soup, link):
4319
        """Get information about a particular comics."""
4320
        href = link['href']
4321
        num = int(cls.link_re.match(href).groups()[0])
4322
        title = link.string
4323
        imgs = soup.find_all('img', id='comic')
4324
        assert len(imgs) == 1
4325
        year, month, day = [int(s)
4326
                            for s in cls.img_re.match(imgs[0]['src']).groups()]
4327
        return {
4328
            'title': title,
4329
            'day': day,
4330
            'month': month,
4331
            'year': year,
4332
            'img': [i['src'] for i in imgs],
4333
            'num': num,
4334
        }
4335
4336
    @classmethod
4337
    def get_archive_elements(cls):
4338
        archive_url = 'http://www.horovitzcomics.com/comics/archive/'
4339
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.link_re))
4340
4341
4342
class HorovitzNew(HorovitzComics):
4343
    """Class to retrieve Horovitz new comics."""
4344
    name = 'horovitznew'
4345
    long_name = 'Horovitz New'
4346
    link_re = re.compile('^/comics/new/([0-9]+)$')
4347
4348
4349
class HorovitzClassic(HorovitzComics):
4350
    """Class to retrieve Horovitz classic comics."""
4351
    name = 'horovitzclassic'
4352
    long_name = 'Horovitz Classic'
4353
    link_re = re.compile('^/comics/classic/([0-9]+)$')
4354
4355
4356
class GenericGoComic(GenericNavigableComic):
4357
    """Generic class to handle the logic common to comics from gocomics.com."""
4358
    _categories = ('GOCOMIC', )
4359
4360
    @classmethod
4361
    def get_first_comic_link(cls):
4362
        """Get link to first comics."""
4363
        return get_soup_at_url(cls.url).find('a', class_='fa btn btn-outline-default btn-circle fa-backward sm ')
4364
4365
    @classmethod
4366
    def get_navi_link(cls, last_soup, next_):
4367
        """Get link to next or previous comic."""
4368
        PREV = 'fa btn btn-outline-default btn-circle fa-caret-left sm '
4369
        NEXT = 'fa btn btn-outline-default btn-circle fa-caret-right sm '
4370
        return last_soup.find('a', class_=NEXT if next_ else PREV)
4371
4372
    @classmethod
4373
    def get_url_from_link(cls, link):
4374
        gocomics = 'http://www.gocomics.com'
4375
        return urljoin_wrapper(gocomics, link['href'])
4376
4377
    @classmethod
4378
    def get_comic_info(cls, soup, link):
4379
        """Get information about a particular comics."""
4380
        date_str = soup.find('meta', property='article:published_time')['content']
4381
        day = string_to_date(date_str, "%Y-%m-%d")
4382
        imgs = soup.find('picture', class_='img-fluid item-comic-image').find_all('img')
4383
        author = soup.find('meta', property='article:author')['content']
4384
        tags = soup.find('meta', property='article:tag')['content']
4385
        return {
4386
            'day': day.day,
4387
            'month': day.month,
4388
            'year': day.year,
4389
            'img': [i['src'] for i in imgs],
4390
            'author': author,
4391
            'tags': tags,
4392
        }
4393
4394
4395
class PearlsBeforeSwine(GenericGoComic):
4396
    """Class to retrieve Pearls Before Swine comics."""
4397
    name = 'pearls'
4398
    long_name = 'Pearls Before Swine'
4399
    url = 'http://www.gocomics.com/pearlsbeforeswine'
4400
4401
4402
class Peanuts(GenericGoComic):
4403
    """Class to retrieve Peanuts comics."""
4404
    name = 'peanuts'
4405
    long_name = 'Peanuts'
4406
    url = 'http://www.gocomics.com/peanuts'
4407
4408
4409
class MattWuerker(GenericGoComic):
4410
    """Class to retrieve Matt Wuerker comics."""
4411
    name = 'wuerker'
4412
    long_name = 'Matt Wuerker'
4413
    url = 'http://www.gocomics.com/mattwuerker'
4414
4415
4416
class TomToles(GenericGoComic):
4417
    """Class to retrieve Tom Toles comics."""
4418
    name = 'toles'
4419
    long_name = 'Tom Toles'
4420
    url = 'http://www.gocomics.com/tomtoles'
4421
4422
4423
class BreakOfDay(GenericGoComic):
4424
    """Class to retrieve Break Of Day comics."""
4425
    name = 'breakofday'
4426
    long_name = 'Break Of Day'
4427
    url = 'http://www.gocomics.com/break-of-day'
4428
4429
4430
class Brevity(GenericGoComic):
4431
    """Class to retrieve Brevity comics."""
4432
    name = 'brevity'
4433
    long_name = 'Brevity'
4434
    url = 'http://www.gocomics.com/brevitypanel'
4435
4436
4437
class MichaelRamirez(GenericGoComic):
4438
    """Class to retrieve Michael Ramirez comics."""
4439
    name = 'ramirez'
4440
    long_name = 'Michael Ramirez'
4441
    url = 'http://www.gocomics.com/michaelramirez'
4442
4443
4444
class MikeLuckovich(GenericGoComic):
4445
    """Class to retrieve Mike Luckovich comics."""
4446
    name = 'luckovich'
4447
    long_name = 'Mike Luckovich'
4448
    url = 'http://www.gocomics.com/mikeluckovich'
4449
4450
4451
class JimBenton(GenericGoComic):
4452
    """Class to retrieve Jim Benton comics."""
4453
    # Also on http://jimbenton.tumblr.com
4454
    name = 'benton'
4455
    long_name = 'Jim Benton'
4456
    url = 'http://www.gocomics.com/jim-benton-cartoons'
4457
4458
4459
class TheArgyleSweater(GenericGoComic):
4460
    """Class to retrieve the Argyle Sweater comics."""
4461
    name = 'argyle'
4462
    long_name = 'Argyle Sweater'
4463
    url = 'http://www.gocomics.com/theargylesweater'
4464
4465
4466
class SunnyStreet(GenericGoComic):
4467
    """Class to retrieve Sunny Street comics."""
4468
    # Also on http://www.sunnystreetcomics.com
4469
    name = 'sunny'
4470
    long_name = 'Sunny Street'
4471
    url = 'http://www.gocomics.com/sunny-street'
4472
4473
4474
class OffTheMark(GenericGoComic):
4475
    """Class to retrieve Off The Mark comics."""
4476
    # Also on https://www.offthemark.com
4477
    name = 'offthemark'
4478
    long_name = 'Off The Mark'
4479
    url = 'http://www.gocomics.com/offthemark'
4480
4481
4482
class WuMo(GenericGoComic):
4483
    """Class to retrieve WuMo comics."""
4484
    # Also on http://wumo.com
4485
    name = 'wumo'
4486
    long_name = 'WuMo'
4487
    url = 'http://www.gocomics.com/wumo'
4488
4489
4490
class LunarBaboon(GenericGoComic):
4491
    """Class to retrieve Lunar Baboon comics."""
4492
    # Also on http://www.lunarbaboon.com
4493
    # Also on https://tapastic.com/series/Lunarbaboon
4494
    name = 'lunarbaboon'
4495
    long_name = 'Lunar Baboon'
4496
    url = 'http://www.gocomics.com/lunarbaboon'
4497
4498
4499
class SandersenGocomic(GenericGoComic):
4500
    """Class to retrieve Sarah Andersen comics."""
4501
    # Also on http://sarahcandersen.com
4502
    # Also on http://tapastic.com/series/Doodle-Time
4503
    name = 'sandersen-goc'
4504
    long_name = 'Sarah Andersen (from GoComics)'
4505
    url = 'http://www.gocomics.com/sarahs-scribbles'
4506
4507
4508
class SaturdayMorningBreakfastCerealGoComic(GenericGoComic):
4509
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
4510
    # Also on http://smbc-comics.tumblr.com
4511
    # Also on http://www.smbc-comics.com
4512
    name = 'smbc-goc'
4513
    long_name = 'Saturday Morning Breakfast Cereal (from GoComics)'
4514
    url = 'http://www.gocomics.com/saturday-morning-breakfast-cereal'
4515
    _categories = ('SMBC', )
4516
4517
4518
class CalvinAndHobbesGoComic(GenericGoComic):
4519
    """Class to retrieve Calvin and Hobbes comics."""
4520
    # From gocomics, not http://marcel-oehler.marcellosendos.ch/comics/ch/
4521
    name = 'calvin-goc'
4522
    long_name = 'Calvin and Hobbes (from GoComics)'
4523
    url = 'http://www.gocomics.com/calvinandhobbes'
4524
4525
4526
class RallGoComic(GenericGoComic):
4527
    """Class to retrieve Ted Rall comics."""
4528
    # Also on http://rall.com/comic
4529
    name = 'rall-goc'
4530
    long_name = "Ted Rall (from GoComics)"
4531
    url = "http://www.gocomics.com/ted-rall"
4532
    _categories = ('RALL', )
4533
4534
4535
class TheAwkwardYetiGoComic(GenericGoComic):
4536
    """Class to retrieve The Awkward Yeti comics."""
4537
    # Also on http://larstheyeti.tumblr.com
4538
    # Also on http://theawkwardyeti.com
4539
    # Also on https://tapastic.com/series/TheAwkwardYeti
4540
    name = 'yeti-goc'
4541
    long_name = 'The Awkward Yeti (from GoComics)'
4542
    url = 'http://www.gocomics.com/the-awkward-yeti'
4543
    _categories = ('YETI', )
4544
4545
4546
class BerkeleyMewsGoComics(GenericGoComic):
4547
    """Class to retrieve Berkeley Mews comics."""
4548
    # Also on http://mews.tumblr.com
4549
    # Also on http://www.berkeleymews.com
4550
    name = 'berkeley-goc'
4551
    long_name = 'Berkeley Mews (from GoComics)'
4552
    url = 'http://www.gocomics.com/berkeley-mews'
4553
    _categories = ('BERKELEY', )
4554
4555
4556
class SheldonGoComics(GenericGoComic):
4557
    """Class to retrieve Sheldon comics."""
4558
    # Also on http://www.sheldoncomics.com
4559
    name = 'sheldon-goc'
4560
    long_name = 'Sheldon Comics (from GoComics)'
4561
    url = 'http://www.gocomics.com/sheldon'
4562
4563
4564
class FowlLanguageGoComics(GenericGoComic):
4565
    """Class to retrieve Fowl Language comics."""
4566
    # Also on http://www.fowllanguagecomics.com
4567
    # Also on http://tapastic.com/series/Fowl-Language-Comics
4568
    # Also on http://fowllanguagecomics.tumblr.com
4569
    name = 'fowllanguage-goc'
4570
    long_name = 'Fowl Language Comics (from GoComics)'
4571
    url = 'http://www.gocomics.com/fowl-language'
4572
    _categories = ('FOWLLANGUAGE', )
4573
4574
4575
class NickAnderson(GenericGoComic):
4576
    """Class to retrieve Nick Anderson comics."""
4577
    name = 'nickanderson'
4578
    long_name = 'Nick Anderson'
4579
    url = 'http://www.gocomics.com/nickanderson'
4580
4581
4582
class GarfieldGoComics(GenericGoComic):
4583
    """Class to retrieve Garfield comics."""
4584
    # Also on http://garfield.com
4585
    name = 'garfield-goc'
4586
    long_name = 'Garfield (from GoComics)'
4587
    url = 'http://www.gocomics.com/garfield'
4588
    _categories = ('GARFIELD', )
4589
4590
4591
class DorrisMcGoComics(GenericGoComic):
4592
    """Class to retrieve Dorris Mc Comics"""
4593
    # Also on http://dorrismccomics.com
4594
    name = 'dorrismc-goc'
4595
    long_name = 'Dorris Mc (from GoComics)'
4596
    url = 'http://www.gocomics.com/dorris-mccomics'
4597
4598
4599
class FoxTrot(GenericGoComic):
4600
    """Class to retrieve FoxTrot comics."""
4601
    name = 'foxtrot'
4602
    long_name = 'FoxTrot'
4603
    url = 'http://www.gocomics.com/foxtrot'
4604
4605
4606
class FoxTrotClassics(GenericGoComic):
4607
    """Class to retrieve FoxTrot Classics comics."""
4608
    name = 'foxtrot-classics'
4609
    long_name = 'FoxTrot Classics'
4610
    url = 'http://www.gocomics.com/foxtrotclassics'
4611
4612
4613
class MisterAndMeGoComics(GenericEmptyComic, GenericGoComic):  # Removed ?
4614
    """Class to retrieve Mister & Me Comics."""
4615
    # Also on http://www.mister-and-me.com
4616
    # Also on https://tapastic.com/series/Mister-and-Me
4617
    name = 'mister-goc'
4618
    long_name = 'Mister & Me (from GoComics)'
4619
    url = 'http://www.gocomics.com/mister-and-me'
4620
4621
4622
class NonSequitur(GenericGoComic):
4623
    """Class to retrieve Non Sequitur (Wiley Miller) comics."""
4624
    name = 'nonsequitur'
4625
    long_name = 'Non Sequitur'
4626
    url = 'http://www.gocomics.com/nonsequitur'
4627
4628
4629
class GenericTapasticComic(GenericListableComic):
4630
    """Generic class to handle the logic common to comics from tapastic.com."""
4631
    _categories = ('TAPASTIC', )
4632
4633
    @classmethod
4634
    def get_comic_info(cls, soup, archive_elt):
4635
        """Get information about a particular comics."""
4636
        timestamp = int(archive_elt['publishDate']) / 1000.0
4637
        day = datetime.datetime.fromtimestamp(timestamp).date()
4638
        imgs = soup.find_all('img', class_='art-image')
4639
        if not imgs:
4640
            print("Comic %s is being uploaded, retry later" % cls.get_url_from_archive_element(archive_elt))
4641
            return None
4642
        assert len(imgs) > 0
4643
        return {
4644
            'day': day.day,
4645
            'year': day.year,
4646
            'month': day.month,
4647
            'img': [i['src'] for i in imgs],
4648
            'title': archive_elt['title'],
4649
        }
4650
4651
    @classmethod
4652
    def get_url_from_archive_element(cls, archive_elt):
4653
        return 'http://tapastic.com/episode/' + str(archive_elt['id'])
4654
4655
    @classmethod
4656
    def get_archive_elements(cls):
4657
        pref, suff = 'episodeList : ', ','
4658
        # Information is stored in the javascript part
4659
        # I don't know the clean way to get it so this is the ugly way.
4660
        string = [s[len(pref):-len(suff)] for s in (s.decode('utf-8').strip() for s in urlopen_wrapper(cls.url).readlines()) if s.startswith(pref) and s.endswith(suff)][0]
4661
        return json.loads(string)
4662
4663
4664
class VegetablesForDessert(GenericTapasticComic):
4665
    """Class to retrieve Vegetables For Dessert comics."""
4666
    # Also on http://vegetablesfordessert.tumblr.com
4667
    name = 'vegetables'
4668
    long_name = 'Vegetables For Dessert'
4669
    url = 'http://tapastic.com/series/vegetablesfordessert'
4670
4671
4672
class FowlLanguageTapa(GenericTapasticComic):
4673
    """Class to retrieve Fowl Language comics."""
4674
    # Also on http://www.fowllanguagecomics.com
4675
    # Also on http://fowllanguagecomics.tumblr.com
4676
    # Also on http://www.gocomics.com/fowl-language
4677
    name = 'fowllanguage-tapa'
4678
    long_name = 'Fowl Language Comics (from Tapastic)'
4679
    url = 'http://tapastic.com/series/Fowl-Language-Comics'
4680
    _categories = ('FOWLLANGUAGE', )
4681
4682
4683
class OscillatingProfundities(GenericTapasticComic):
4684
    """Class to retrieve Oscillating Profundities comics."""
4685
    name = 'oscillating'
4686
    long_name = 'Oscillating Profundities'
4687
    url = 'http://tapastic.com/series/oscillatingprofundities'
4688
4689
4690
class ZnoflatsComics(GenericTapasticComic):
4691
    """Class to retrieve Znoflats comics."""
4692
    name = 'znoflats'
4693
    long_name = 'Znoflats Comics'
4694
    url = 'http://tapastic.com/series/Znoflats-Comics'
4695
4696
4697
class SandersenTapastic(GenericTapasticComic):
4698
    """Class to retrieve Sarah Andersen comics."""
4699
    # Also on http://sarahcandersen.com
4700
    # Also on http://www.gocomics.com/sarahs-scribbles
4701
    name = 'sandersen-tapa'
4702
    long_name = 'Sarah Andersen (from Tapastic)'
4703
    url = 'http://tapastic.com/series/Doodle-Time'
4704
4705
4706
class TubeyToonsTapastic(GenericTapasticComic):
4707
    """Class to retrieve TubeyToons comics."""
4708
    # Also on http://tubeytoons.com
4709
    # Also on http://tubeytoons.tumblr.com
4710
    name = 'tubeytoons-tapa'
4711
    long_name = 'Tubey Toons (from Tapastic)'
4712
    url = 'http://tapastic.com/series/Tubey-Toons'
4713
    _categories = ('TUNEYTOONS', )
4714
4715
4716
class AnythingComicTapastic(GenericTapasticComic):
4717
    """Class to retrieve Anything Comics."""
4718
    # Also on http://www.anythingcomic.com
4719
    name = 'anythingcomic-tapa'
4720
    long_name = 'Anything Comic (from Tapastic)'
4721
    url = 'http://tapastic.com/series/anything'
4722
4723
4724
class UnearthedComicsTapastic(GenericTapasticComic):
4725
    """Class to retrieve Unearthed comics."""
4726
    # Also on http://unearthedcomics.com
4727
    # Also on http://unearthedcomics.tumblr.com
4728
    name = 'unearthed-tapa'
4729
    long_name = 'Unearthed Comics (from Tapastic)'
4730
    url = 'http://tapastic.com/series/UnearthedComics'
4731
    _categories = ('UNEARTHED', )
4732
4733
4734
class EverythingsStupidTapastic(GenericTapasticComic):
4735
    """Class to retrieve Everything's stupid Comics."""
4736
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
4737
    # Also on http://everythingsstupid.net
4738
    name = 'stupid-tapa'
4739
    long_name = "Everything's Stupid (from Tapastic)"
4740
    url = 'http://tapastic.com/series/EverythingsStupid'
4741
4742
4743
class JustSayEhTapastic(GenericTapasticComic):
4744
    """Class to retrieve Just Say Eh comics."""
4745
    # Also on http://www.justsayeh.com
4746
    name = 'justsayeh-tapa'
4747
    long_name = 'Just Say Eh (from Tapastic)'
4748
    url = 'http://tapastic.com/series/Just-Say-Eh'
4749
4750
4751
class ThorsThundershackTapastic(GenericTapasticComic):
4752
    """Class to retrieve Thor's Thundershack comics."""
4753
    # Also on http://www.thorsthundershack.com
4754
    name = 'thor-tapa'
4755
    long_name = 'Thor\'s Thundershack (from Tapastic)'
4756
    url = 'http://tapastic.com/series/Thors-Thundershac'
4757
    _categories = ('THOR', )
4758
4759
4760
class OwlTurdTapastic(GenericTapasticComic):
4761
    """Class to retrieve Owl Turd comics."""
4762
    # Also on http://owlturd.com
4763
    name = 'owlturd-tapa'
4764
    long_name = 'Owl Turd (from Tapastic)'
4765
    url = 'http://tapastic.com/series/Owl-Turd-Comix'
4766
    _categories = ('OWLTURD', )
4767
4768
4769
class GoneIntoRaptureTapastic(GenericTapasticComic):
4770
    """Class to retrieve Gone Into Rapture comics."""
4771
    # Also on http://goneintorapture.tumblr.com
4772
    # Also on http://www.goneintorapture.com
4773
    name = 'rapture-tapa'
4774
    long_name = 'Gone Into Rapture (from Tapastic)'
4775
    url = 'http://tapastic.com/series/Goneintorapture'
4776
4777
4778
class HeckIfIKnowComicsTapa(GenericTapasticComic):
4779
    """Class to retrieve Heck If I Know Comics."""
4780
    # Also on http://heckifiknowcomics.com
4781
    name = 'heck-tapa'
4782
    long_name = 'Heck if I Know comics (from Tapastic)'
4783
    url = 'http://tapastic.com/series/Regular'
4784
4785
4786
class CheerUpEmoKidTapa(GenericTapasticComic):
4787
    """Class to retrieve CheerUpEmoKid comics."""
4788
    # Also on http://www.cheerupemokid.com
4789
    # Also on http://enzocomics.tumblr.com
4790
    name = 'cuek-tapa'
4791
    long_name = 'Cheer Up Emo Kid (from Tapastic)'
4792
    url = 'http://tapastic.com/series/CUEK'
4793
4794
4795
class BigFootJusticeTapa(GenericTapasticComic):
4796
    """Class to retrieve Big Foot Justice comics."""
4797
    # Also on http://bigfootjustice.com
4798
    name = 'bigfoot-tapa'
4799
    long_name = 'Big Foot Justice (from Tapastic)'
4800
    url = 'http://tapastic.com/series/bigfoot-justice'
4801
4802
4803
class UpAndOutTapa(GenericTapasticComic):
4804
    """Class to retrieve Up & Out comics."""
4805
    # Also on http://upandoutcomic.tumblr.com
4806
    name = 'upandout-tapa'
4807
    long_name = 'Up And Out (from Tapastic)'
4808
    url = 'http://tapastic.com/series/UP-and-OUT'
4809
4810
4811
class ToonHoleTapa(GenericTapasticComic):
4812
    """Class to retrieve Toon Holes comics."""
4813
    # Also on http://www.toonhole.com
4814
    name = 'toonhole-tapa'
4815
    long_name = 'Toon Hole (from Tapastic)'
4816
    url = 'http://tapastic.com/series/TOONHOLE'
4817
4818
4819
class AngryAtNothingTapa(GenericTapasticComic):
4820
    """Class to retrieve Angry at Nothing comics."""
4821
    # Also on http://www.angryatnothing.net
4822
    name = 'angry-tapa'
4823
    long_name = 'Angry At Nothing (from Tapastic)'
4824
    url = 'http://tapastic.com/series/Comics-yeah-definitely-comics-'
4825
4826
4827
class LeleozTapa(GenericTapasticComic):
4828
    """Class to retrieve Leleoz comics."""
4829
    # Also on http://leleozcomics.tumblr.com
4830
    name = 'leleoz-tapa'
4831
    long_name = 'Leleoz (from Tapastic)'
4832
    url = 'https://tapastic.com/series/Leleoz'
4833
4834
4835
class TheAwkwardYetiTapa(GenericTapasticComic):
4836
    """Class to retrieve The Awkward Yeti comics."""
4837
    # Also on http://www.gocomics.com/the-awkward-yeti
4838
    # Also on http://theawkwardyeti.com
4839
    # Also on http://larstheyeti.tumblr.com
4840
    name = 'yeti-tapa'
4841
    long_name = 'The Awkward Yeti (from Tapastic)'
4842
    url = 'https://tapastic.com/series/TheAwkwardYeti'
4843
    _categories = ('YETI', )
4844
4845
4846
class AsPerUsualTapa(GenericTapasticComic):
4847
    """Class to retrieve As Per Usual comics."""
4848
    # Also on http://as-per-usual.tumblr.com
4849
    name = 'usual-tapa'
4850
    long_name = 'As Per Usual (from Tapastic)'
4851
    url = 'https://tapastic.com/series/AsPerUsual'
4852
    categories = ('DAMILEE', )
4853
4854
4855
class HotComicsForCoolPeopleTapa(GenericTapasticComic):
4856
    """Class to retrieve Hot Comics For Cool People."""
4857
    # Also on http://hotcomicsforcoolpeople.tumblr.com
4858
    # Also on http://hotcomics.biz (links to tumblr)
4859
    # Also on http://hcfcp.com (links to tumblr)
4860
    name = 'hotcomics-tapa'
4861
    long_name = 'Hot Comics For Cool People (from Tapastic)'
4862
    url = 'https://tapastic.com/series/Hot-Comics-For-Cool-People'
4863
    categories = ('DAMILEE', )
4864
4865
4866
class OneOneOneOneComicTapa(GenericTapasticComic):
4867
    """Class to retrieve 1111 Comics."""
4868
    # Also on http://www.1111comics.me
4869
    # Also on http://comics1111.tumblr.com
4870
    name = '1111-tapa'
4871
    long_name = '1111 Comics (from Tapastic)'
4872
    url = 'https://tapastic.com/series/1111-Comics'
4873
    _categories = ('ONEONEONEONE', )
4874
4875
4876
class TumbleDryTapa(GenericTapasticComic):
4877
    """Class to retrieve Tumble Dry comics."""
4878
    # Also on http://tumbledrycomics.com
4879
    name = 'tumbledry-tapa'
4880
    long_name = 'Tumblr Dry (from Tapastic)'
4881
    url = 'https://tapastic.com/series/TumbleDryComics'
4882
4883
4884
class DeadlyPanelTapa(GenericTapasticComic):
4885
    """Class to retrieve Deadly Panel comics."""
4886
    # Also on http://www.deadlypanel.com
4887
    name = 'deadly-tapa'
4888
    long_name = 'Deadly Panel (from Tapastic)'
4889
    url = 'https://tapastic.com/series/deadlypanel'
4890
4891
4892
class ChrisHallbeckMaxiTapa(GenericTapasticComic):
4893
    """Class to retrieve Chris Hallbeck comics."""
4894
    # Also on http://chrishallbeck.tumblr.com
4895
    # Also on http://maximumble.com
4896
    name = 'hallbeckmaxi-tapa'
4897
    long_name = 'Chris Hallback - Maximumble (from Tapastic)'
4898
    url = 'https://tapastic.com/series/Maximumble'
4899
    _categories = ('HALLBACK', )
4900
4901
4902
class ChrisHallbeckMiniTapa(GenericTapasticComic):
4903
    """Class to retrieve Chris Hallbeck comics."""
4904
    # Also on http://chrishallbeck.tumblr.com
4905
    # Also on http://minimumble.com
4906
    name = 'hallbeckmini-tapa'
4907
    long_name = 'Chris Hallback - Minimumble (from Tapastic)'
4908
    url = 'https://tapastic.com/series/Minimumble'
4909
    _categories = ('HALLBACK', )
4910
4911
4912
class ChrisHallbeckBiffTapa(GenericTapasticComic):
4913
    """Class to retrieve Chris Hallbeck comics."""
4914
    # Also on http://chrishallbeck.tumblr.com
4915
    # Also on http://thebookofbiff.com
4916
    name = 'hallbeckbiff-tapa'
4917
    long_name = 'Chris Hallback - The Book of Biff (from Tapastic)'
4918
    url = 'https://tapastic.com/series/Biff'
4919
    _categories = ('HALLBACK', )
4920
4921
4922
class RandoWisTapa(GenericTapasticComic):
4923
    """Class to retrieve RandoWis comics."""
4924
    # Also on https://randowis.com
4925
    name = 'randowis-tapa'
4926
    long_name = 'RandoWis (from Tapastic)'
4927
    url = 'https://tapastic.com/series/RandoWis'
4928
4929
4930
class PigeonGazetteTapa(GenericTapasticComic):
4931
    """Class to retrieve The Pigeon Gazette comics."""
4932
    # Also on http://thepigeongazette.tumblr.com
4933
    name = 'pigeon-tapa'
4934
    long_name = 'The Pigeon Gazette (from Tapastic)'
4935
    url = 'https://tapastic.com/series/The-Pigeon-Gazette'
4936
4937
4938
class TheOdd1sOutTapa(GenericTapasticComic):
4939
    """Class to retrieve The Odd 1s Out comics."""
4940
    # Also on http://theodd1sout.com
4941
    # Also on http://theodd1sout.tumblr.com
4942
    name = 'theodd-tapa'
4943
    long_name = 'The Odd 1s Out (from Tapastic)'
4944
    url = 'https://tapastic.com/series/Theodd1sout'
4945
4946
4947
class TheWorldIsFlatTapa(GenericTapasticComic):
4948
    """Class to retrieve The World Is Flat Comics."""
4949
    # Also on http://theworldisflatcomics.tumblr.com
4950
    name = 'flatworld-tapa'
4951
    long_name = 'The World Is Flat (from Tapastic)'
4952
    url = 'https://tapastic.com/series/The-World-is-Flat'
4953
4954
4955
class MisterAndMeTapa(GenericTapasticComic):
4956
    """Class to retrieve Mister & Me Comics."""
4957
    # Also on http://www.mister-and-me.com
4958
    # Also on http://www.gocomics.com/mister-and-me
4959
    name = 'mister-tapa'
4960
    long_name = 'Mister & Me (from Tapastic)'
4961
    url = 'https://tapastic.com/series/Mister-and-Me'
4962
4963
4964
class TalesOfAbsurdityTapa(GenericTapasticComic):
4965
    """Class to retrieve Tales Of Absurdity comics."""
4966
    # Also on http://talesofabsurdity.com
4967
    # Also on http://talesofabsurdity.tumblr.com
4968
    name = 'absurdity-tapa'
4969
    long_name = 'Tales of Absurdity (from Tapastic)'
4970
    url = 'http://tapastic.com/series/Tales-Of-Absurdity'
4971
    _categories = ('ABSURDITY', )
4972
4973
4974
class BFGFSTapa(GenericTapasticComic):
4975
    """Class to retrieve BFGFS comics."""
4976
    # Also on http://bfgfs.com
4977
    # Also on http://bfgfs.tumblr.com
4978
    name = 'bfgfs-tapa'
4979
    long_name = 'BFGFS (from Tapastic)'
4980
    url = 'https://tapastic.com/series/BFGFS'
4981
4982
4983
class DoodleForFoodTapa(GenericTapasticComic):
4984
    """Class to retrieve Doodle For Food comics."""
4985
    # Also on http://doodleforfood.com
4986
    name = 'doodle-tapa'
4987
    long_name = 'Doodle For Food (from Tapastic)'
4988
    url = 'https://tapastic.com/series/Doodle-for-Food'
4989
4990
4991
class MrLovensteinTapa(GenericTapasticComic):
4992
    """Class to retrieve Mr Lovenstein comics."""
4993
    # Also on  https://tapastic.com/series/MrLovenstein
4994
    name = 'mrlovenstein-tapa'
4995
    long_name = 'Mr. Lovenstein (from Tapastic)'
4996
    url = 'https://tapastic.com/series/MrLovenstein'
4997
4998
4999
class CassandraCalinTapa(GenericTapasticComic):
5000
    """Class to retrieve C. Cassandra comics."""
5001
    # Also on http://cassandracalin.com
5002
    # Also on http://c-cassandra.tumblr.com
5003
    name = 'cassandra-tapa'
5004
    long_name = 'Cassandra Calin (from Tapastic)'
5005
    url = 'https://tapastic.com/series/C-Cassandra-comics'
5006
5007
5008
class WafflesAndPancakes(GenericTapasticComic):
5009
    """Class to retrieve Waffles And Pancakes comics."""
5010
    # Also on http://wandpcomic.com
5011
    name = 'waffles'
5012
    long_name = 'Waffles And Pancakes'
5013
    url = 'https://tapastic.com/series/Waffles-and-Pancakes'
5014
5015
5016
class YesterdaysPopcornTapastic(GenericTapasticComic):
5017
    """Class to retrieve Yesterday's Popcorn comics."""
5018
    # Also on http://www.yesterdayspopcorn.com
5019
    # Also on http://yesterdayspopcorn.tumblr.com
5020
    name = 'popcorn-tapa'
5021
    long_name = 'Yesterday\'s Popcorn (from Tapastic)'
5022
    url = 'https://tapastic.com/series/Yesterdays-Popcorn'
5023
5024
5025
class OurSuperAdventureTapastic(GenericTapasticComic):
5026
    """Class to retrieve Our Super Adventure comics."""
5027
    # Also on http://www.oursuperadventure.com
5028
    # http://sarahssketchbook.tumblr.com
5029
    # http://sarahgraley.com
5030
    name = 'superadventure-tapastic'
5031
    long_name = 'Our Super Adventure (from Tapastic)'
5032
    url = 'https://tapastic.com/series/Our-Super-Adventure'
5033
5034
5035
class NamelessPCs(GenericTapasticComic):
5036
    """Class to retrieve Nameless PCs comics."""
5037
    # Also on http://namelesspcs.com
5038
    name = 'namelesspcs-tapa'
5039
    long_name = 'NamelessPCs (from Tapastic)'
5040
    url = 'https://tapastic.com/series/NamelessPC'
5041
5042
5043
class UbertoolTapa(GenericTapasticComic):
5044
    """Class to retrieve Ubertool comics."""
5045
    # Also on http://ubertoolcomic.com
5046
    # Also on http://ubertool.tumblr.com
5047
    name = 'ubertool-tapa'
5048
    long_name = 'Ubertool (from Tapastic)'
5049
    url = 'https://tapastic.com/series/ubertool'
5050
    _categories = ('UBERTOOL', )
5051
5052
5053
class BarteNerdsTapa(GenericTapasticComic):
5054
    """Class to retrieve BarteNerds comics."""
5055
    # Also on http://www.bartenerds.com
5056
    name = 'bartenerds-tapa'
5057
    long_name = 'BarteNerds (from Tapastic)'
5058
    url = 'https://tapastic.com/series/BarteNERDS'
5059
5060
5061
class SmallBlueYonderTapa(GenericTapasticComic):
5062
    """Class to retrieve Small Blue Yonder comics."""
5063
    # Also on http://www.smallblueyonder.com
5064
    name = 'smallblue-tapa'
5065
    long_name = 'Small Blue Yonder (from Tapastic)'
5066
    url = 'https://tapastic.com/series/Small-Blue-Yonder'
5067
5068
5069
class TizzyStitchBirdTapa(GenericTapasticComic):
5070
    """Class to retrieve Tizzy Stitch Bird comics."""
5071
    # Also on http://tizzystitchbird.com
5072
    # Also on http://tizzystitchbird.tumblr.com
5073
    # Also on http://www.webtoons.com/en/challenge/tizzy-stitchbird/list?title_no=50082
5074
    name = 'tizzy-tapa'
5075
    long_name = 'Tizzy Stitch Bird (from Tapastic)'
5076
    url = 'https://tapastic.com/series/TizzyStitchbird'
5077
5078
5079
class RockPaperCynicTapa(GenericTapasticComic):
5080
    """Class to retrieve RockPaperCynic comics."""
5081
    # Also on http://www.rockpapercynic.com
5082
    # Also on http://rockpapercynic.tumblr.com
5083
    name = 'rpc-tapa'
5084
    long_name = 'Rock Paper Cynic (from Tapastic)'
5085
    url = 'https://tapastic.com/series/rockpapercynic'
5086
5087
5088
def get_subclasses(klass):
5089
    """Gets the list of direct/indirect subclasses of a class"""
5090
    subclasses = klass.__subclasses__()
5091
    for derived in list(subclasses):
5092
        subclasses.extend(get_subclasses(derived))
5093
    return subclasses
5094
5095
5096
def remove_st_nd_rd_th_from_date(string):
5097
    """Function to transform 1st/2nd/3rd/4th in a parsable date format."""
5098
    # Hackish way to convert string with numeral "1st"/"2nd"/etc to date
5099
    return (string.replace('st', '')
5100
            .replace('nd', '')
5101
            .replace('rd', '')
5102
            .replace('th', '')
5103
            .replace('Augu', 'August'))
5104
5105
5106
def string_to_date(string, date_format, local=DEFAULT_LOCAL):
5107
    """Function to convert string to date object.
5108
    Wrapper around datetime.datetime.strptime."""
5109
    # format described in https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior
5110
    prev_locale = locale.setlocale(locale.LC_ALL)
5111
    if local != prev_locale:
5112
        locale.setlocale(locale.LC_ALL, local)
5113
    ret = datetime.datetime.strptime(string, date_format).date()
5114
    if local != prev_locale:
5115
        locale.setlocale(locale.LC_ALL, prev_locale)
5116
    return ret
5117
5118
5119
COMICS = set(get_subclasses(GenericComic))
5120
VALID_COMICS = [c for c in COMICS if c.name is not None]
5121
COMIC_NAMES = {c.name: c for c in VALID_COMICS}
5122
assert len(VALID_COMICS) == len(COMIC_NAMES)
5123
CLASS_NAMES = {c.__name__ for c in VALID_COMICS}
5124
assert len(VALID_COMICS) == len(CLASS_NAMES)
5125