Completed
Push — master ( 77a11c...60575d )
by De
01:15
created

comics.py (10 issues)

1
#! /usr/bin/python3
2
# vim: set expandtab tabstop=4 shiftwidth=4 :
3
"""Module to retrieve webcomics"""
4
5
from comic_abstract import GenericComic, get_date_for_comic
6
import re
7
from datetime import date, timedelta
8
import datetime
9
from urlfunctions import get_soup_at_url, urljoin_wrapper,\
10
    convert_iri_to_plain_ascii_uri, load_json_at_url, urlopen_wrapper
11
import json
12
import locale
13
import urllib
14
15
DEFAULT_LOCAL = 'en_GB.UTF-8'
16
17
18
class Xkcd(GenericComic):
19
    """Class to retrieve Xkcd comics."""
20
    name = 'xkcd'
21
    long_name = 'xkcd'
22
    url = 'http://xkcd.com'
23
24
    @classmethod
25
    def get_next_comic(cls, last_comic):
26
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
27
        first_num = last_comic['num'] if last_comic else 0
28
        last_num = load_json_at_url(
29
            urljoin_wrapper(cls.url, 'info.0.json'))['num']
30
31
        for num in range(first_num + 1, last_num + 1):
32
            if num != 404:
33
                json_url = urljoin_wrapper(cls.url, '%d/info.0.json' % num)
34
                comic = load_json_at_url(json_url)
35
                comic['img'] = [comic['img']]
36
                comic['prefix'] = '%d-' % num
37
                comic['json_url'] = json_url
38
                comic['url'] = urljoin_wrapper(cls.url, str(num))
39
                comic['day'] = int(comic['day'])
40
                comic['month'] = int(comic['month'])
41
                comic['year'] = int(comic['year'])
42
                assert comic['num'] == num
43
                yield comic
44
45
46
# Helper functions corresponding to get_url_from_link/get_url_from_archive_element
47
48
49
@classmethod
50
def get_href(cls, link):
51
    """Implementation of get_url_from_link/get_url_from_archive_element."""
52
    return link['href']
53
54
55
@classmethod
56
def join_cls_url_to_href(cls, link):
57
    """Implementation of get_url_from_link/get_url_from_archive_element."""
58
    return urljoin_wrapper(cls.url, link['href'])
59
60
61
class GenericNavigableComic(GenericComic):
62
    """Generic class for "navigable" comics : with first/next arrows.
63
64
    This class applies to comic where previous and next comics can be
65
    accessed from a given comic. Once given a starting point (either
66
    the first comic or the last comic retrieved), it will handle the
67
    navigation, the retrieval of the soup object and the setting of
68
    the 'url' attribute on retrieved comics. This limits a lot the
69
    amount of boilerplate code in the different implementation classes.
70
71
    The method `get_next_comic` methods is implemented in terms of new
72
    more specialized methods to be implemented/overridden:
73
        - get_first_comic_link
74
        - get_navi_link
75
        - get_comic_info
76
        - get_url_from_link
77
    """
78
    _categories = ('NAVIGABLE', )
79
80
    @classmethod
81
    def get_first_comic_link(cls):
82
        """Get link to first comics.
83
84
        Sometimes this can be retrieved of any comic page, sometimes on
85
        the archive page, sometimes it doesn't exist at all and one has
86
        to iterate backward to find it before hardcoding the result found.
87
        """
88
        raise NotImplementedError
89
90
    @classmethod
91
    def get_navi_link(cls, last_soup, next_):
92
        """Get link to next (or previous - for dev purposes) comic."""
93
        raise NotImplementedError
94
95
    @classmethod
96
    def get_comic_info(cls, soup, link):
97
        """Get information about a particular comics."""
98
        raise NotImplementedError
99
100
    @classmethod
101
    def get_url_from_link(cls, link):
102
        """Get url corresponding to a link. Default implementation is similar to get_href."""
103
        return link['href']
104
105
    @classmethod
106
    def get_next_link(cls, last_soup):
107
        """Get link to next comic."""
108
        return cls.get_navi_link(last_soup, True)
109
110
    @classmethod
111
    def get_prev_link(cls, last_soup):
112
        """Get link to previous comic."""
113
        return cls.get_navi_link(last_soup, False)
114
115
    @classmethod
116
    def get_next_comic(cls, last_comic):
117
        """Generic implementation of get_next_comic for navigable comics."""
118
        url = last_comic['url'] if last_comic else None
119
        cls.log("starting 'get_next_comic' from %s" % url)
120
        next_comic = \
121
            cls.get_next_link(get_soup_at_url(url)) \
122
            if url else \
123
            cls.get_first_comic_link()
124
        cls.log("next/first comic will be %s (url is %s)" % (str(next_comic), url))
125
        # cls.check_navigation(url)
126
        while next_comic:
127
            prev_url, url = url, cls.get_url_from_link(next_comic)
128
            if prev_url == url:
129
                cls.log("got same url %s" % url)
130
                break
131
            cls.log("about to get %s (%s)" % (url, str(next_comic)))
132
            soup = get_soup_at_url(url)
133
            comic = cls.get_comic_info(soup, next_comic)
134
            if comic is not None:
135
                assert 'url' not in comic
136
                comic['url'] = url
137
                yield comic
138
            next_comic = cls.get_next_link(soup)
139
            cls.log("next comic will be %s" % str(next_comic))
140
141
    @classmethod
142
    def check_first_link(cls):
143
        """Check that navigation to first comic seems to be working - for dev purposes."""
144
        cls.log("about to check first link")
145
        ok = True
146
        firstlink = cls.get_first_comic_link()
147
        if firstlink is None:
148
            print("From %s : no first link" % cls.url)
149
            ok = False
150
        else:
151
            firsturl = cls.get_url_from_link(firstlink)
152
            try:
153
                get_soup_at_url(firsturl)
154
            except urllib.error.HTTPError:
155
                print("From %s : invalid first url" % cls.url)
156
                ok = False
157
        cls.log("checked first link -> returned %d" % ok)
158
        return ok
159
160
    @classmethod
161
    def check_prev_next_links(cls, url):
162
        """Check that navigation to prev/next from a given URL seems to be working - for dev purposes."""
163
        cls.log("about to check prev/next from %s" % url)
164
        ok = True
165
        if url is None:
166
            prevlink, nextlink = None, None
167
        else:
168
            soup = get_soup_at_url(url)
169
            prevlink, nextlink = cls.get_prev_link(soup), cls.get_next_link(soup)
170
        if prevlink is None and nextlink is None:
171
            print("From %s : no previous nor next" % url)
172
            ok = False
173
        else:
174
            if prevlink:
175
                prevurl = cls.get_url_from_link(prevlink)
176
                prevsoup = get_soup_at_url(prevurl)
177
                prevnext = cls.get_url_from_link(cls.get_next_link(prevsoup))
178
                if prevnext != url:
179
                    print("From %s, going backward then forward leads to %s" % (url, prevnext))
180
                    ok = False
181
            if nextlink:
182
                nexturl = cls.get_url_from_link(nextlink)
183
                if nexturl != url:
184
                    nextsoup = get_soup_at_url(nexturl)
185
                    nextprev = cls.get_url_from_link(cls.get_prev_link(nextsoup))
186
                    if nextprev != url:
187
                        print("From %s, going forward then backward leads to %s" % (url, nextprev))
188
                        ok = False
189
        cls.log("checked prev/next from %s -> returned %d" % (url, ok))
190
        return ok
191
192
    @classmethod
193
    def check_navigation(cls, url):
194
        """Check that navigation functions seem to be working - for dev purposes."""
195
        cls.log("about to check navigation from %s" % url)
196
        first = cls.check_first_link()
197
        prevnext = cls.check_prev_next_links(url)
198
        ok = first and prevnext
199
        cls.log("checked navigation from %s -> returned %d" % (url, ok))
200
        return ok
201
202
203
class GenericListableComic(GenericComic):
204
    """Generic class for "listable" comics : with a list of comics (aka 'archive')
205
206
    The method `get_next_comic` methods is implemented in terms of new
207
    more specialized methods to be implemented/overridden:
208
        - get_archive_elements
209
        - get_url_from_archive_element
210
        - get_comic_info
211
    """
212
    _categories = ('LISTABLE', )
213
214
    @classmethod
215
    def get_archive_elements(cls):
216
        """Get the archive elements (iterable)."""
217
        raise NotImplementedError
218
219
    @classmethod
220
    def get_url_from_archive_element(cls, archive_elt):
221
        """Get url corresponding to an archive element."""
222
        raise NotImplementedError
223
224
    @classmethod
225
    def get_comic_info(cls, soup, archive_elt):
226
        """Get information about a particular comics."""
227
        raise NotImplementedError
228
229
    @classmethod
230
    def get_next_comic(cls, last_comic):
231
        """Generic implementation of get_next_comic for listable comics."""
232
        waiting_for_url = last_comic['url'] if last_comic else None
233
        for archive_elt in cls.get_archive_elements():
234
            url = cls.get_url_from_archive_element(archive_elt)
235
            cls.log("considering %s" % url)
236
            if waiting_for_url is None:
237
                cls.log("about to get %s (%s)" % (url, str(archive_elt)))
238
                soup = get_soup_at_url(url)
239
                comic = cls.get_comic_info(soup, archive_elt)
240
                if comic is not None:
241
                    assert 'url' not in comic
242
                    comic['url'] = url
243
                    yield comic
244
            elif waiting_for_url == url:
245
                waiting_for_url = None
246
        if waiting_for_url is not None:
247
            print("Did not find %s : there might be a problem" % waiting_for_url)
248
249
# Helper functions corresponding to get_first_comic_link/get_navi_link
250
251
252
@classmethod
253
def get_link_rel_next(cls, last_soup, next_):
254
    """Implementation of get_navi_link."""
255
    return last_soup.find('link', rel='next' if next_ else 'prev')
256
257
258
@classmethod
259
def get_a_rel_next(cls, last_soup, next_):
260
    """Implementation of get_navi_link."""
261
    return last_soup.find('a', rel='next' if next_ else 'prev')
262
263
264
@classmethod
265
def get_a_navi_navinext(cls, last_soup, next_):
266
    """Implementation of get_navi_link."""
267
    return last_soup.find('a', class_='navi navi-next' if next_ else 'navi navi-prev')
268
269
270
@classmethod
271
def get_a_navi_comicnavnext_navinext(cls, last_soup, next_):
272
    """Implementation of get_navi_link."""
273
    return last_soup.find('a', class_='navi comic-nav-next navi-next' if next_ else 'navi comic-nav-previous navi-prev')
274
275
276
@classmethod
277
def get_a_comicnavbase_comicnavnext(cls, last_soup, next_):
278
    """Implementation of get_navi_link."""
279
    return last_soup.find('a', class_='comic-nav-base comic-nav-next' if next_ else 'comic-nav-base comic-nav-previous')
280
281
282
@classmethod
283
def get_a_navi_navifirst(cls):
284
    """Implementation of get_first_comic_link."""
285
    return get_soup_at_url(cls.url).find('a', class_='navi navi-first')
286
287
288
@classmethod
289
def get_div_navfirst_a(cls):
290
    """Implementation of get_first_comic_link."""
291
    return get_soup_at_url(cls.url).find('div', class_="nav-first").find('a')
292
293
294
@classmethod
295
def get_a_comicnavbase_comicnavfirst(cls):
296
    """Implementation of get_first_comic_link."""
297
    return get_soup_at_url(cls.url).find('a', class_='comic-nav-base comic-nav-first')
298
299
300
@classmethod
301
def simulate_first_link(cls):
302
    """Implementation of get_first_comic_link creating a link-like object from
303
    an URL provided by the class.
304
305
    Note: The first URL can easily be found using :
306
    `get_first_comic_link = navigate_to_first_comic`.
307
    """
308
    return {'href': cls.first_url}
309
310
311
@classmethod
312
def navigate_to_first_comic(cls):
313
    """Implementation of get_first_comic_link navigating from a user provided
314
    URL to the first comic.
315
316
    Sometimes, the first comic cannot be reached directly so to start
317
    from the first comic one has to go to the previous comic until
318
    there is no previous comics. Once this URL is reached, it
319
    is better to hardcode it but for development purposes, it
320
    is convenient to have an automatic way to find it.
321
322
    Then, the URL found can easily be used via `simulate_first_link`.
323
    """
324
    url = input("Get starting URL: ")
325
    print(url)
326
    comic = cls.get_prev_link(get_soup_at_url(url))
327
    while comic:
328
        url = cls.get_url_from_link(comic)
329
        print(url)
330
        comic = cls.get_prev_link(get_soup_at_url(url))
331
    return {'href': url}
332
333
334
class GenericEmptyComic(GenericComic):
335
    """Generic class for comics where nothing is to be done.
336
337
    It can be useful to deactivate temporarily comics that do not work
338
    properly by replacing `def MyComic(GenericWhateverComic)` with
339
    `def MyComic(GenericEmptyComic, GenericWhateverComic)`."""
340
    _categories = ('EMPTY', )
341
342
    @classmethod
343
    def get_next_comic(cls, last_comic):
344
        """Implementation of get_next_comic returning no comics."""
345
        cls.log("comic is considered as empty - returning no comic")
346
        return []
347
348
349 View Code Duplication
class ExtraFabulousComics(GenericNavigableComic):
350
    """Class to retrieve Extra Fabulous Comics."""
351
    name = 'efc'
352
    long_name = 'Extra Fabulous Comics'
353
    url = 'http://extrafabulouscomics.com'
354
    get_first_comic_link = get_a_navi_navifirst
355
    get_navi_link = get_link_rel_next
356
357
    @classmethod
358
    def get_comic_info(cls, soup, link):
359
        """Get information about a particular comics."""
360
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
361
        imgs = soup.find_all('img', src=img_src_re)
362
        title = soup.find('meta', property='og:title')['content']
363
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
364
        day = string_to_date(date_str, "%Y-%m-%d")
365
        return {
366
            'title': title,
367
            'img': [i['src'] for i in imgs],
368
            'month': day.month,
369
            'year': day.year,
370
            'day': day.day,
371
            'prefix': title + '-'
372
        }
373
374
375 View Code Duplication
class GenericLeMondeBlog(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
376
    """Generic class to retrieve comics from Le Monde blogs."""
377
    _categories = ('LEMONDE', 'FRANCAIS')
378
    get_navi_link = get_link_rel_next
379
    get_first_comic_link = simulate_first_link
380
    first_url = NotImplemented
381
382
    @classmethod
383
    def get_comic_info(cls, soup, link):
384
        """Get information about a particular comics."""
385
        url2 = soup.find('link', rel='shortlink')['href']
386
        title = soup.find('meta', property='og:title')['content']
387
        date_str = soup.find("span", class_="entry-date").string
388
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
389
        imgs = soup.find_all('meta', property='og:image')
390
        return {
391
            'title': title,
392
            'url2': url2,
393
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
394
            'month': day.month,
395
            'year': day.year,
396
            'day': day.day,
397
        }
398
399
400
class ZepWorld(GenericLeMondeBlog):
401
    """Class to retrieve Zep World comics."""
402
    name = "zep"
403
    long_name = "Zep World"
404
    url = "http://zepworld.blog.lemonde.fr"
405
    first_url = "http://zepworld.blog.lemonde.fr/2014/10/31/bientot-le-blog-de-zep/"
406
407
408
class Vidberg(GenericLeMondeBlog):
409
    """Class to retrieve Vidberg comics."""
410
    name = 'vidberg'
411
    long_name = "Vidberg - l'actu en patates"
412
    url = "http://vidberg.blog.lemonde.fr"
413
    # Not the first but I didn't find an efficient way to retrieve it
414
    first_url = "http://vidberg.blog.lemonde.fr/2012/02/09/revue-de-campagne-la-campagne-du-modem-semballe/"
415
416
417
class Plantu(GenericLeMondeBlog):
418
    """Class to retrieve Plantu comics."""
419
    name = 'plantu'
420
    long_name = "Plantu"
421
    url = "http://plantu.blog.lemonde.fr"
422
    first_url = "http://plantu.blog.lemonde.fr/2014/10/28/stress-test-a-bruxelles/"
423
424
425
class XavierGorce(GenericLeMondeBlog):
426
    """Class to retrieve Xavier Gorce comics."""
427
    name = 'gorce'
428
    long_name = "Xavier Gorce"
429
    url = "http://xaviergorce.blog.lemonde.fr"
430
    first_url = "http://xaviergorce.blog.lemonde.fr/2015/01/09/distinction/"
431
432
433
class CartooningForPeace(GenericLeMondeBlog):
434
    """Class to retrieve Cartooning For Peace comics."""
435
    name = 'forpeace'
436
    long_name = "Cartooning For Peace"
437
    url = "http://cartooningforpeace.blog.lemonde.fr"
438
    first_url = "http://cartooningforpeace.blog.lemonde.fr/2014/12/15/bado/"
439
440
441
class Aurel(GenericLeMondeBlog):
442
    """Class to retrieve Aurel comics."""
443
    name = 'aurel'
444
    long_name = "Aurel"
445
    url = "http://aurel.blog.lemonde.fr"
446
    first_url = "http://aurel.blog.lemonde.fr/2014/09/29/le-senat-repasse-a-droite/"
447
448
449
class LesCulottees(GenericLeMondeBlog):
450
    """Class to retrieve Les Culottees comics."""
451
    name = 'culottees'
452
    long_name = 'Les Culottees'
453
    url = "http://lesculottees.blog.lemonde.fr"
454
    first_url = "http://lesculottees.blog.lemonde.fr/2016/01/11/clementine-delait-femme-a-barbe/"
455
456
457
class UneAnneeAuLycee(GenericLeMondeBlog):
458
    """Class to retrieve Une Annee Au Lycee comics."""
459 View Code Duplication
    name = 'lycee'
460
    long_name = 'Une Annee au Lycee'
461
    url = 'http://uneanneeaulycee.blog.lemonde.fr'
462
    first_url = "http://uneanneeaulycee.blog.lemonde.fr/2016/06/13/la-semaine-du-bac-est-arrivee/"
463
464
465
class Rall(GenericNavigableComic):
466
    """Class to retrieve Ted Rall comics."""
467
    # Also on http://www.gocomics.com/tedrall
468
    name = 'rall'
469
    long_name = "Ted Rall"
470
    url = "http://rall.com/comic"
471
    _categories = ('RALL', )
472
    get_navi_link = get_link_rel_next
473
    get_first_comic_link = simulate_first_link
474
    # Not the first but I didn't find an efficient way to retrieve it
475
    first_url = "http://rall.com/2014/01/30/los-angeles-times-cartoon-well-miss-those-california-flowers"
476
477
    @classmethod
478
    def get_comic_info(cls, soup, link):
479
        """Get information about a particular comics."""
480
        title = soup.find('meta', property='og:title')['content']
481
        author = soup.find("span", class_="author vcard").find("a").string
482
        date_str = soup.find("span", class_="entry-date").string
483
        day = string_to_date(date_str, "%B %d, %Y")
484
        desc = soup.find('meta', property='og:description')['content']
485
        imgs = soup.find('div', class_='entry-content').find_all('img')
486
        imgs = imgs[:-7]  # remove social media buttons
487
        return {
488
            'title': title,
489
            'author': author,
490
            'month': day.month,
491
            'year': day.year,
492
            'day': day.day,
493
            'description': desc,
494
            'img': [i['src'] for i in imgs],
495
        }
496
497
498
class Dilem(GenericNavigableComic):
499
    """Class to retrieve Ali Dilem comics."""
500
    name = 'dilem'
501
    long_name = 'Ali Dilem'
502
    url = 'http://information.tv5monde.com/dilem'
503
    _categories = ('FRANCAIS', )
504
    get_url_from_link = join_cls_url_to_href
505
    get_first_comic_link = simulate_first_link
506
    first_url = "http://information.tv5monde.com/dilem/2004-06-26"
507
508
    @classmethod
509
    def get_navi_link(cls, last_soup, next_):
510
        """Get link to next or previous comic."""
511
        # prev is next / next is prev
512
        li = last_soup.find('li', class_='prev' if next_ else 'next')
513
        return li.find('a') if li else None
514
515
    @classmethod
516
    def get_comic_info(cls, soup, link):
517
        """Get information about a particular comics."""
518
        short_url = soup.find('link', rel='shortlink')['href']
519
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
520
        imgs = soup.find_all('meta', property='og:image')
521
        date_str = soup.find('span', property='dc:date')['content']
522
        date_str = date_str[:10]
523
        day = string_to_date(date_str, "%Y-%m-%d")
524
        return {
525
            'short_url': short_url,
526
            'title': title,
527
            'img': [i['content'] for i in imgs],
528
            'day': day.day,
529
            'month': day.month,
530
            'year': day.year,
531
        }
532
533
534
class SpaceAvalanche(GenericNavigableComic):
535
    """Class to retrieve Space Avalanche comics."""
536
    name = 'avalanche'
537
    long_name = 'Space Avalanche'
538
    url = 'http://www.spaceavalanche.com'
539
    get_navi_link = get_link_rel_next
540
541
    @classmethod
542
    def get_first_comic_link(cls):
543
        """Get link to first comics."""
544
        return {'href': "http://www.spaceavalanche.com/2009/02/02/irish-sea/", 'title': "Irish Sea"}
545
546
    @classmethod
547
    def get_comic_info(cls, soup, link):
548
        """Get information about a particular comics."""
549
        url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
550
        title = link['title']
551
        url = cls.get_url_from_link(link)
552
        year, month, day = [int(s)
553
                            for s in url_date_re.match(url).groups()]
554
        imgs = soup.find("div", class_="entry").find_all("img")
555
        return {
556
            'title': title,
557
            'day': day,
558
            'month': month,
559
            'year': year,
560
            'img': [i['src'] for i in imgs],
561
        }
562
563
564
class ZenPencils(GenericNavigableComic):
565
    """Class to retrieve ZenPencils comics."""
566
    # Also on http://zenpencils.tumblr.com
567
    # Also on http://www.gocomics.com/zen-pencils
568
    name = 'zenpencils'
569
    long_name = 'Zen Pencils'
570
    url = 'http://zenpencils.com'
571
    _categories = ('ZENPENCILS', )
572
    get_navi_link = get_link_rel_next
573
    get_first_comic_link = simulate_first_link
574
    first_url = "http://zenpencils.com/comic/1-ralph-waldo-emerson-make-them-cry/"
575
576
    @classmethod
577
    def get_comic_info(cls, soup, link):
578
        """Get information about a particular comics."""
579
        imgs = soup.find('div', id='comic').find_all('img')
580
        # imgs2 = soup.find_all('meta', property='og:image')
581
        post = soup.find('div', class_='post-content')
582
        author = post.find("span", class_="post-author").find("a").string
583
        title = soup.find('meta', property='og:title')['content']
584
        date_str = post.find('span', class_='post-date').string
585
        day = string_to_date(date_str, "%B %d, %Y")
586
        assert imgs
587
        assert all(i['alt'] == i['title'] for i in imgs)
588
        assert all(i['alt'] in (title, "") for i in imgs)
589
        desc = soup.find('meta', property='og:description')['content']
590
        return {
591
            'title': title,
592
            'description': desc,
593
            'author': author,
594
            'day': day.day,
595
            'month': day.month,
596
            'year': day.year,
597
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
598
        }
599
600
601
class ItsTheTie(GenericNavigableComic):
602
    """Class to retrieve It's the tie comics."""
603
    # Also on http://itsthetie.tumblr.com
604
    # Also on https://tapastic.com/series/itsthetie
605
    name = 'tie'
606
    long_name = "It's the tie"
607
    url = "http://itsthetie.com"
608
    _categories = ('TIE', )
609
    get_first_comic_link = get_div_navfirst_a
610
    get_navi_link = get_a_rel_next
611
612
    @classmethod
613
    def get_comic_info(cls, soup, link):
614
        """Get information about a particular comics."""
615
        title = soup.find('h1', class_='comic-title').find('a').string
616
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
617
        day = string_to_date(date_str, "%B %d, %Y")
618
        # Bonus images may or may not be in meta og:image.
619
        imgs = soup.find_all('meta', property='og:image')
620
        imgs_src = [i['content'] for i in imgs]
621
        bonus = soup.find_all('img', attrs={'data-oversrc': True})
622
        bonus_src = [b['data-oversrc'] for b in bonus]
623
        all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src]
624
        all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")]
625
        tag_meta = soup.find('meta', property='article:tag')
626
        tags = tag_meta['content'] if tag_meta else ""
627
        return {
628
            'title': title,
629
            'month': day.month,
630
            'year': day.year,
631
            'day': day.day,
632
            'img': all_imgs_src,
633
            'tags': tags,
634
        }
635
636
637
class PenelopeBagieu(GenericNavigableComic):
638
    """Class to retrieve comics from Penelope Bagieu's blog."""
639
    name = 'bagieu'
640
    long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
641
    url = 'http://www.penelope-jolicoeur.com'
642
    _categories = ('FRANCAIS', )
643
    get_navi_link = get_link_rel_next
644
    get_first_comic_link = simulate_first_link
645 View Code Duplication
    first_url = 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
646
647
    @classmethod
648
    def get_comic_info(cls, soup, link):
649
        """Get information about a particular comics."""
650
        date_str = soup.find('h2', class_='date-header').string
651
        day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
652
        imgs = soup.find('div', class_='entry-body').find_all('img')
653
        title = soup.find('h3', class_='entry-header').string
654
        return {
655
            'title': title,
656
            'img': [i['src'] for i in imgs],
657
            'month': day.month,
658
            'year': day.year,
659
            'day': day.day,
660
        }
661
662
663
class OneOneOneOneComic(GenericNavigableComic):
664
    """Class to retrieve 1111 Comics."""
665
    # Also on http://comics1111.tumblr.com
666
    # Also on https://tapastic.com/series/1111-Comics
667
    name = '1111'
668
    long_name = '1111 Comics'
669
    url = 'http://www.1111comics.me'
670
    _categories = ('ONEONEONEONE', )
671
    get_first_comic_link = get_div_navfirst_a
672
    get_navi_link = get_link_rel_next
673
674
    @classmethod
675
    def get_comic_info(cls, soup, link):
676
        """Get information about a particular comics."""
677
        title = soup.find('h1', class_='comic-title').find('a').string
678
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
679
        day = string_to_date(date_str, "%B %d, %Y")
680
        imgs = soup.find_all('meta', property='og:image')
681
        return {
682
            'title': title,
683
            'month': day.month,
684
            'year': day.year,
685
            'day': day.day,
686
            'img': [i['content'] for i in imgs],
687
        }
688
689
690
class AngryAtNothing(GenericNavigableComic):
691
    """Class to retrieve Angry at Nothing comics."""
692
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
693
    name = 'angry'
694
    long_name = 'Angry At Nothing'
695
    url = 'http://www.angryatnothing.net'
696
    get_first_comic_link = get_div_navfirst_a
697
    get_navi_link = get_a_rel_next
698
699
    @classmethod
700
    def get_comic_info(cls, soup, link):
701
        """Get information about a particular comics."""
702
        title = soup.find('h1', class_='comic-title').find('a').string
703
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
704
        day = string_to_date(date_str, "%B %d, %Y")
705
        imgs = soup.find_all('meta', property='og:image')
706
        return {
707
            'title': title,
708
            'month': day.month,
709
            'year': day.year,
710
            'day': day.day,
711
            'img': [i['content'] for i in imgs],
712
        }
713
714
715
class NeDroid(GenericNavigableComic):
716
    """Class to retrieve NeDroid comics."""
717
    name = 'nedroid'
718
    long_name = 'NeDroid'
719
    url = 'http://nedroid.com'
720
    get_first_comic_link = get_div_navfirst_a
721
    get_navi_link = get_link_rel_next
722
    get_url_from_link = join_cls_url_to_href
723
724
    @classmethod
725
    def get_comic_info(cls, soup, link):
726
        """Get information about a particular comics."""
727
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
728
        comic_url_re = re.compile('//nedroid.com/comics/([0-9]*)-([0-9]*)-([0-9]*).*')
729
        short_url = cls.get_url_from_link(soup.find('link', rel='shortlink'))
730
        num = int(short_url_re.match(short_url).groups()[0])
731
        imgs = soup.find('div', id='comic').find_all('img')
732
        year, month, day = [int(s) for s in comic_url_re.match(imgs[0]['src']).groups()]
733
        assert len(imgs) == 1
734
        title = imgs[0]['alt']
735
        title2 = imgs[0]['title']
736
        return {
737
            'short_url': short_url,
738
            'title': title,
739
            'title2': title2,
740
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
741
            'day': day,
742
            'month': month,
743
            'year': year,
744
            'num': num,
745
        }
746
747
748
class Garfield(GenericNavigableComic):
749
    """Class to retrieve Garfield comics."""
750
    # Also on http://www.gocomics.com/garfield
751
    name = 'garfield'
752
    long_name = 'Garfield'
753
    url = 'https://garfield.com'
754
    _categories = ('GARFIELD', )
755
    get_first_comic_link = simulate_first_link
756
    first_url = 'https://garfield.com/comic/1978/06/19'
757
758 View Code Duplication
    @classmethod
759
    def get_navi_link(cls, last_soup, next_):
760
        """Get link to next or previous comic."""
761
        return last_soup.find('a', class_='comic-arrow-right' if next_ else 'comic-arrow-left')
762
763
    @classmethod
764
    def get_comic_info(cls, soup, link):
765
        """Get information about a particular comics."""
766
        url = cls.get_url_from_link(link)
767
        date_re = re.compile('^%s/comic/([0-9]*)/([0-9]*)/([0-9]*)' % cls.url)
768
        year, month, day = [int(s) for s in date_re.match(url).groups()]
769
        imgs = soup.find('div', class_='comic-display').find_all('img', class_='img-responsive')
770
        return {
771
            'month': month,
772
            'year': year,
773
            'day': day,
774
            'img': [i['src'] for i in imgs],
775
        }
776
777
778
class Dilbert(GenericNavigableComic):
779
    """Class to retrieve Dilbert comics."""
780
    # Also on http://www.gocomics.com/dilbert-classics
781
    name = 'dilbert'
782
    long_name = 'Dilbert'
783
    url = 'http://dilbert.com'
784
    get_url_from_link = join_cls_url_to_href
785
    get_first_comic_link = simulate_first_link
786
    first_url = 'http://dilbert.com/strip/1989-04-16'
787
788
    @classmethod
789
    def get_navi_link(cls, last_soup, next_):
790
        """Get link to next or previous comic."""
791
        link = last_soup.find('div', class_='nav-comic nav-right' if next_ else 'nav-comic nav-left')
792
        return link.find('a') if link else None
793
794
    @classmethod
795
    def get_comic_info(cls, soup, link):
796
        """Get information about a particular comics."""
797
        title = soup.find('meta', property='og:title')['content']
798
        imgs = soup.find_all('meta', property='og:image')
799
        desc = soup.find('meta', property='og:description')['content']
800
        date_str = soup.find('meta', property='article:publish_date')['content']
801
        day = string_to_date(date_str, "%B %d, %Y")
802
        author = soup.find('meta', property='article:author')['content']
803
        tags = soup.find('meta', property='article:tag')['content']
804
        return {
805
            'title': title,
806
            'description': desc,
807
            'img': [i['content'] for i in imgs],
808
            'author': author,
809
            'tags': tags,
810
            'day': day.day,
811
            'month': day.month,
812
            'year': day.year
813
        }
814
815
816
class VictimsOfCircumsolar(GenericNavigableComic):
817
    """Class to retrieve VictimsOfCircumsolar comics."""
818
    name = 'circumsolar'
819
    long_name = 'Victims Of Circumsolar'
820
    url = 'http://www.victimsofcircumsolar.com'
821
    get_navi_link = get_a_navi_comicnavnext_navinext
822
    get_first_comic_link = simulate_first_link
823
    first_url = 'http://www.victimsofcircumsolar.com/comic/modern-addiction'
824
825
    @classmethod
826
    def get_comic_info(cls, soup, link):
827
        """Get information about a particular comics."""
828
        # Date is on the archive page
829
        title = soup.find_all('meta', property='og:title')[-1]['content']
830
        desc = soup.find_all('meta', property='og:description')[-1]['content']
831
        imgs = soup.find('div', id='comic').find_all('img')
832
        assert all(i['title'] == i['alt'] == title for i in imgs)
833
        return {
834
            'title': title,
835
            'description': desc,
836
            'img': [i['src'] for i in imgs],
837
        }
838
839
840
class ThreeWordPhrase(GenericNavigableComic):
841
    """Class to retrieve Three Word Phrase comics."""
842
    # Also on http://www.threewordphrase.tumblr.com
843
    name = 'threeword'
844
    long_name = 'Three Word Phrase'
845
    url = 'http://threewordphrase.com'
846
    get_url_from_link = join_cls_url_to_href
847
848
    @classmethod
849
    def get_first_comic_link(cls):
850
        """Get link to first comics."""
851
        return get_soup_at_url(cls.url).find('img', src='/firstlink.gif').parent
852
853
    @classmethod
854
    def get_navi_link(cls, last_soup, next_):
855
        """Get link to next or previous comic."""
856
        link = last_soup.find('img', src='/nextlink.gif' if next_ else '/prevlink.gif').parent
857
        return None if link.get('href') is None else link
858
859
    @classmethod
860
    def get_comic_info(cls, soup, link):
861
        """Get information about a particular comics."""
862
        title = soup.find('title')
863
        imgs = [img for img in soup.find_all('img')
864
                if not img['src'].endswith(
865
                    ('link.gif', '32.png', 'twpbookad.jpg',
866
                     'merchad.jpg', 'header.gif', 'tipjar.jpg'))]
867
        return {
868
            'title': title.string if title else None,
869
            'title2': '  '.join(img.get('alt') for img in imgs if img.get('alt')),
870
            'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs],
871
        }
872
873
874
class DeadlyPanel(GenericEmptyComic, GenericNavigableComic):
875
    """Class to retrieve Deadly Panel comics."""
876
    # Also on https://tapastic.com/series/deadlypanel
877
    name = 'deadly'
878
    long_name = 'Deadly Panel'
879
    url = 'http://www.deadlypanel.com'
880
    get_first_comic_link = get_a_navi_navifirst
881
    get_navi_link = get_a_navi_comicnavnext_navinext
882
883
    @classmethod
884
    def get_comic_info(cls, soup, link):
885
        """Get information about a particular comics."""
886
        imgs = soup.find('div', id='comic').find_all('img')
887
        assert all(i['alt'] == i['title'] for i in imgs)
888
        return {
889
            'img': [i['src'] for i in imgs],
890
        }
891
892
893
class TheGentlemanArmchair(GenericNavigableComic):
894
    """Class to retrieve The Gentleman Armchair comics."""
895
    name = 'gentlemanarmchair'
896
    long_name = 'The Gentleman Armchair'
897
    url = 'http://thegentlemansarmchair.com'
898
    get_first_comic_link = get_a_navi_navifirst
899
    get_navi_link = get_link_rel_next
900
901
    @classmethod
902
    def get_comic_info(cls, soup, link):
903
        """Get information about a particular comics."""
904
        title = soup.find('h2', class_='post-title').string
905
        author = soup.find("span", class_="post-author").find("a").string
906
        date_str = soup.find('span', class_='post-date').string
907
        day = string_to_date(date_str, "%B %d, %Y")
908
        imgs = soup.find('div', id='comic').find_all('img')
909
        return {
910
            'img': [i['src'] for i in imgs],
911
            'title': title,
912
            'author': author,
913
            'month': day.month,
914
            'year': day.year,
915
            'day': day.day,
916
        }
917
918
919 View Code Duplication
class MyExtraLife(GenericNavigableComic):
920
    """Class to retrieve My Extra Life comics."""
921
    name = 'extralife'
922
    long_name = 'My Extra Life'
923
    url = 'http://www.myextralife.com'
924
    get_navi_link = get_link_rel_next
925
926
    @classmethod
927
    def get_first_comic_link(cls):
928
        """Get link to first comics."""
929
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
930
931
    @classmethod
932
    def get_comic_info(cls, soup, link):
933
        """Get information about a particular comics."""
934
        title = soup.find("h1", class_="comic_title").string
935
        date_str = soup.find("span", class_="comic_date").string
936
        day = string_to_date(date_str, "%B %d, %Y")
937
        imgs = soup.find_all("img", class_="comic")
938
        assert all(i['alt'] == i['title'] == title for i in imgs)
939
        return {
940
            'title': title,
941
            'img': [i['src'] for i in imgs if i["src"]],
942
            'day': day.day,
943
            'month': day.month,
944
            'year': day.year
945
        }
946
947
948
class SaturdayMorningBreakfastCereal(GenericNavigableComic):
949
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
950
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
951
    # Also on http://smbc-comics.tumblr.com
952
    name = 'smbc'
953
    long_name = 'Saturday Morning Breakfast Cereal'
954
    url = 'http://www.smbc-comics.com'
955
    _categories = ('SMBC', )
956
    get_navi_link = get_a_rel_next
957
958
    @classmethod
959
    def get_first_comic_link(cls):
960
        """Get link to first comics."""
961
        return get_soup_at_url(cls.url).find('a', rel='start')
962
963
    @classmethod
964
    def get_comic_info(cls, soup, link):
965
        """Get information about a particular comics."""
966
        image1 = soup.find('img', id='cc-comic')
967
        image_url1 = image1['src']
968
        aftercomic = soup.find('div', id='aftercomic')
969
        image_url2 = aftercomic.find('img')['src'] if aftercomic else ''
970
        imgs = [image_url1] + ([image_url2] if image_url2 else [])
971
        date_str = soup.find('div', class_='cc-publishtime').contents[0]
972
        day = string_to_date(date_str, "%B %d, %Y")
973
        return {
974
            'title': image1['title'],
975
            'img': [urljoin_wrapper(cls.url, i) for i in imgs],
976
            'day': day.day,
977
            'month': day.month,
978
            'year': day.year
979
        }
980
981
982
class PerryBibleFellowship(GenericListableComic):
983
    """Class to retrieve Perry Bible Fellowship comics."""
984
    name = 'pbf'
985
    long_name = 'Perry Bible Fellowship'
986
    url = 'http://pbfcomics.com'
987
    get_url_from_archive_element = join_cls_url_to_href
988
989
    @classmethod
990
    def get_archive_elements(cls):
991
        comic_link_re = re.compile('^/[0-9]*/$')
992
        return reversed(get_soup_at_url(cls.url).find_all('a', href=comic_link_re))
993
994
    @classmethod
995
    def get_comic_info(cls, soup, link):
996
        """Get information about a particular comics."""
997
        url = cls.get_url_from_archive_element(link)
998
        comic_img_re = re.compile('^/archive_b/PBF.*')
999
        name = link.string
1000
        num = int(link['name'])
1001
        href = link['href']
1002
        assert href == '/%d/' % num
1003
        imgs = soup.find_all('img', src=comic_img_re)
1004
        assert len(imgs) == 1
1005
        assert imgs[0]['alt'] == name
1006 View Code Duplication
        return {
1007
            'num': num,
1008
            'name': name,
1009
            'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1010
            'prefix': '%d-' % num,
1011
        }
1012
1013
1014
class Mercworks(GenericNavigableComic):
1015
    """Class to retrieve Mercworks comics."""
1016
    # Also on http://mercworks.tumblr.com
1017
    name = 'mercworks'
1018
    long_name = 'Mercworks'
1019
    url = 'http://mercworks.net'
1020
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1021
    get_navi_link = get_a_rel_next
1022
1023
    @classmethod
1024
    def get_comic_info(cls, soup, link):
1025
        """Get information about a particular comics."""
1026
        title = soup.find('meta', property='og:title')['content']
1027
        metadesc = soup.find('meta', property='og:description')
1028
        desc = metadesc['content'] if metadesc else ""
1029
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1030
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1031
        date_str = date_str[:10]
1032
        day = string_to_date(date_str, "%Y-%m-%d")
1033
        imgs = soup.find_all('meta', property='og:image')
1034
        return {
1035
            'img': [i['content'] for i in imgs],
1036
            'title': title,
1037
            'author': author,
1038
            'desc': desc,
1039
            'day': day.day,
1040
            'month': day.month,
1041
            'year': day.year
1042
        }
1043
1044
1045
class BerkeleyMews(GenericListableComic):
1046
    """Class to retrieve Berkeley Mews comics."""
1047
    # Also on http://mews.tumblr.com
1048
    # Also on http://www.gocomics.com/berkeley-mews
1049
    name = 'berkeley'
1050
    long_name = 'Berkeley Mews'
1051
    url = 'http://www.berkeleymews.com'
1052
    _categories = ('BERKELEY', )
1053
    get_url_from_archive_element = get_href
1054
    comic_num_re = re.compile('%s/\\?p=([0-9]*)$' % url)
1055
1056
    @classmethod
1057
    def get_archive_elements(cls):
1058
        archive_url = urljoin_wrapper(cls.url, "?page_id=2")
1059
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_num_re))
1060
1061
    @classmethod
1062
    def get_comic_info(cls, soup, link):
1063
        """Get information about a particular comics."""
1064
        comic_date_re = re.compile('.*/([0-9]*)-([0-9]*)-([0-9]*)-.*')
1065
        url = cls.get_url_from_archive_element(link)
1066
        num = int(cls.comic_num_re.match(url).groups()[0])
1067
        img = soup.find('div', id='comic').find('img')
1068
        assert all(i['alt'] == i['title'] for i in [img])
1069
        title2 = img['title']
1070
        img_url = img['src']
1071
        year, month, day = [int(s) for s in comic_date_re.match(img_url).groups()]
1072
        return {
1073
            'num': num,
1074
            'title': link.string,
1075
            'title2': title2,
1076
            'img': [img_url],
1077
            'year': year,
1078
            'month': month,
1079
            'day': day,
1080
        }
1081
1082
1083
class GenericBouletCorp(GenericNavigableComic):
1084
    """Generic class to retrieve BouletCorp comics in different languages."""
1085
    # Also on http://bouletcorp.tumblr.com
1086
    _categories = ('BOULET', )
1087
    get_navi_link = get_link_rel_next
1088
1089
    @classmethod
1090
    def get_first_comic_link(cls):
1091
        """Get link to first comics."""
1092
        return get_soup_at_url(cls.url).find('div', id='centered_nav').find_all('a')[0]
1093
1094
    @classmethod
1095
    def get_comic_info(cls, soup, link):
1096
        """Get information about a particular comics."""
1097
        url = cls.get_url_from_link(link)
1098
        date_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/' % cls.url)
1099
        year, month, day = [int(s) for s in date_re.match(url).groups()]
1100
        imgs = soup.find('div', id='notes').find('div', class_='storycontent').find_all('img')
1101
        texts = '  '.join(t for t in (i.get('title') for i in imgs) if t)
1102
        title = soup.find('title').string
1103
        return {
1104
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs if i.get('src') is not None],
1105
            'title': title,
1106
            'texts': texts,
1107
            'year': year,
1108
            'month': month,
1109
            'day': day,
1110
        }
1111
1112
1113
class BouletCorp(GenericBouletCorp):
1114
    """Class to retrieve BouletCorp comics."""
1115
    name = 'boulet'
1116
    long_name = 'Boulet Corp'
1117
    url = 'http://www.bouletcorp.com'
1118
    _categories = ('FRANCAIS', )
1119
1120
1121
class BouletCorpEn(GenericBouletCorp):
1122
    """Class to retrieve EnglishBouletCorp comics."""
1123
    name = 'boulet_en'
1124
    long_name = 'Boulet Corp English'
1125
    url = 'http://english.bouletcorp.com'
1126
1127
1128
class AmazingSuperPowers(GenericNavigableComic):
1129
    """Class to retrieve Amazing Super Powers comics."""
1130
    name = 'asp'
1131
    long_name = 'Amazing Super Powers'
1132
    url = 'http://www.amazingsuperpowers.com'
1133
    get_first_comic_link = get_a_navi_navifirst
1134
    get_navi_link = get_a_navi_navinext
1135
1136
    @classmethod
1137
    def get_comic_info(cls, soup, link):
1138
        """Get information about a particular comics."""
1139
        author = soup.find("span", class_="post-author").find("a").string
1140
        date_str = soup.find('span', class_='post-date').string
1141
        day = string_to_date(date_str, "%B %d, %Y")
1142
        imgs = soup.find('div', id='comic').find_all('img')
1143
        title = ' '.join(i['title'] for i in imgs)
1144
        assert all(i['alt'] == i['title'] for i in imgs)
1145
        return {
1146
            'title': title,
1147
            'author': author,
1148
            'img': [img['src'] for img in imgs],
1149
            'day': day.day,
1150
            'month': day.month,
1151
            'year': day.year
1152
        }
1153
1154
1155
class ToonHole(GenericNavigableComic):
1156
    """Class to retrieve Toon Holes comics."""
1157
    # Also on http://tapastic.com/series/TOONHOLE
1158
    name = 'toonhole'
1159
    long_name = 'Toon Hole'
1160
    url = 'http://www.toonhole.com'
1161
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1162
    get_navi_link = get_link_rel_next
1163
1164
    @classmethod
1165
    def get_comic_info(cls, soup, link):
1166
        """Get information about a particular comics."""
1167
        short_url = soup.find('link', rel='shortlink')['href']
1168
        date_str = soup.find('div', class_='entry-meta').contents[0].strip()
1169
        day = string_to_date(date_str, "%B %d, %Y")
1170
        imgs = soup.find('div', id='comic').find_all('img')
1171
        if imgs:
1172
            img = imgs[0]
1173
            title = img['alt']
1174
            assert img['title'] == title
1175
        else:
1176
            title = ""
1177
        return {
1178
            'short_url': short_url,
1179
            'title': title,
1180
            'month': day.month,
1181
            'year': day.year,
1182
            'day': day.day,
1183
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs],
1184
        }
1185
1186
1187
class Channelate(GenericNavigableComic):
1188
    """Class to retrieve Channelate comics."""
1189
    name = 'channelate'
1190
    long_name = 'Channelate'
1191
    url = 'http://www.channelate.com'
1192
    get_first_comic_link = get_div_navfirst_a
1193
    get_navi_link = get_link_rel_next
1194
    get_url_from_link = join_cls_url_to_href
1195
1196
    @classmethod
1197
    def get_comic_info(cls, soup, link):
1198
        """Get information about a particular comics."""
1199
        author = soup.find("span", class_="post-author").find("a").string
1200
        date_str = soup.find('span', class_='post-date').string
1201
        day = string_to_date(date_str, '%Y/%m/%d')
1202
        title = soup.find('meta', property='og:title')['content']
1203
        post = soup.find('div', id='comic')
1204
        imgs = post.find_all('img') if post else []
1205
        extra_url = None
1206
        extra_div = soup.find('div', id='extrapanelbutton')
1207
        if extra_div:
1208
            extra_url = extra_div.find('a')['href']
1209
            extra_soup = get_soup_at_url(extra_url)
1210
            extra_imgs = extra_soup.find_all('img', class_='extrapanelimage')
1211
            imgs.extend(extra_imgs)
1212
        return {
1213
            'url_extra': extra_url,
1214
            'title': title,
1215
            'author': author,
1216
            'month': day.month,
1217
            'year': day.year,
1218
            'day': day.day,
1219
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1220
        }
1221
1222
1223
class CyanideAndHappiness(GenericNavigableComic):
1224
    """Class to retrieve Cyanide And Happiness comics."""
1225
    name = 'cyanide'
1226
    long_name = 'Cyanide and Happiness'
1227
    url = 'http://explosm.net'
1228
    _categories = ('NSFW', )
1229
    get_url_from_link = join_cls_url_to_href
1230
1231
    @classmethod
1232
    def get_first_comic_link(cls):
1233
        """Get link to first comics."""
1234
        return get_soup_at_url(cls.url).find('a', title='Oldest comic')
1235
1236
    @classmethod
1237
    def get_navi_link(cls, last_soup, next_):
1238
        """Get link to next or previous comic."""
1239
        link = last_soup.find('a', class_='next-comic' if next_ else 'previous-comic ')
1240
        return None if link.get('href') is None else link
1241
1242
    @classmethod
1243
    def get_comic_info(cls, soup, link):
1244
        """Get information about a particular comics."""
1245
        url2 = soup.find('meta', property='og:url')['content']
1246
        num = int(url2.split('/')[-2])
1247
        date_str = soup.find('h3').find('a').string
1248
        day = string_to_date(date_str, '%Y.%m.%d')
1249
        author = soup.find('small', class_="author-credit-name").string
1250
        assert author.startswith('by ')
1251
        author = author[3:]
1252
        imgs = soup.find_all('img', id='main-comic')
1253
        return {
1254
            'num': num,
1255
            'author': author,
1256
            'month': day.month,
1257
            'year': day.year,
1258
            'day': day.day,
1259
            'prefix': '%d-' % num,
1260
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i['src'])) for i in imgs]
1261
        }
1262
1263
1264
class MrLovenstein(GenericComic):
1265
    """Class to retrieve Mr Lovenstein comics."""
1266
    # Also on https://tapastic.com/series/MrLovenstein
1267
    name = 'mrlovenstein'
1268
    long_name = 'Mr. Lovenstein'
1269
    url = 'http://www.mrlovenstein.com'
1270
1271
    @classmethod
1272
    def get_next_comic(cls, last_comic):
1273
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1274
        # TODO: more info from http://www.mrlovenstein.com/archive
1275
        comic_num_re = re.compile('^/comic/([0-9]*)$')
1276
        nums = [int(comic_num_re.match(link['href']).groups()[0])
1277
                for link in get_soup_at_url(cls.url).find_all('a', href=comic_num_re)]
1278
        first, last = min(nums), max(nums)
1279
        if last_comic:
1280
            first = last_comic['num'] + 1
1281
        for num in range(first, last + 1):
1282
            url = urljoin_wrapper(cls.url, '/comic/%d' % num)
1283
            soup = get_soup_at_url(url)
1284
            imgs = list(
1285
                reversed(soup.find_all('img', src=re.compile('^/images/comics/'))))
1286
            description = soup.find('meta', attrs={'name': 'description'})['content']
1287
            yield {
1288
                'url': url,
1289
                'num': num,
1290
                'texts': '  '.join(t for t in (i.get('title') for i in imgs) if t),
1291
                'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1292
                'description': description,
1293
            }
1294
1295
1296
class DinosaurComics(GenericListableComic):
1297
    """Class to retrieve Dinosaur Comics comics."""
1298
    name = 'dinosaur'
1299
    long_name = 'Dinosaur Comics'
1300
    url = 'http://www.qwantz.com'
1301
    get_url_from_archive_element = get_href
1302
    comic_link_re = re.compile('^%s/index.php\\?comic=([0-9]*)$' % url)
1303
1304
    @classmethod
1305
    def get_archive_elements(cls):
1306
        archive_url = urljoin_wrapper(cls.url, 'archive.php')
1307
        # first link is random -> skip it
1308
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re)[1:])
1309
1310
    @classmethod
1311
    def get_comic_info(cls, soup, link):
1312
        """Get information about a particular comics."""
1313
        url = cls.get_url_from_archive_element(link)
1314
        num = int(cls.comic_link_re.match(url).groups()[0])
1315
        date_str = link.string
1316
        text = link.next_sibling.string
1317
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1318
        comic_img_re = re.compile('^%s/comics/' % cls.url)
1319
        img = soup.find('img', src=comic_img_re)
1320
        return {
1321
            'month': day.month,
1322
            'year': day.year,
1323
            'day': day.day,
1324
            'img': [img.get('src')],
1325
            'title': img.get('title'),
1326
            'text': text,
1327 View Code Duplication
            'num': num,
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1328
        }
1329
1330
1331
class ButterSafe(GenericListableComic):
1332
    """Class to retrieve Butter Safe comics."""
1333
    name = 'butter'
1334
    long_name = 'ButterSafe'
1335
    url = 'http://buttersafe.com'
1336
    get_url_from_archive_element = get_href
1337
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)
1338
1339
    @classmethod
1340
    def get_archive_elements(cls):
1341
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1342
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
1343
1344
    @classmethod
1345
    def get_comic_info(cls, soup, link):
1346
        """Get information about a particular comics."""
1347
        url = cls.get_url_from_archive_element(link)
1348
        title = link.string
1349
        year, month, day = [int(s) for s in cls.comic_link_re.match(url).groups()]
1350
        img = soup.find('div', id='comic').find('img')
1351
        assert img['alt'] == title
1352
        return {
1353
            'title': title,
1354
            'day': day,
1355
            'month': month,
1356
            'year': year,
1357
            'img': [img['src']],
1358
        }
1359
1360
1361
class CalvinAndHobbes(GenericComic):
1362
    """Class to retrieve Calvin and Hobbes comics."""
1363
    # Also on http://www.gocomics.com/calvinandhobbes/
1364
    name = 'calvin'
1365
    long_name = 'Calvin and Hobbes'
1366
    # This is not through any official webpage but eh...
1367
    url = 'http://marcel-oehler.marcellosendos.ch/comics/ch/'
1368
1369
    @classmethod
1370
    def get_next_comic(cls, last_comic):
1371
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1372
        last_date = get_date_for_comic(
1373
            last_comic) if last_comic else date(1985, 11, 1)
1374
        link_re = re.compile('^([0-9]*)/([0-9]*)/')
1375
        img_re = re.compile('')
1376
        for link in get_soup_at_url(cls.url).find_all('a', href=link_re):
1377
            url = link['href']
1378
            year, month = link_re.match(url).groups()
1379
            if date(int(year), int(month), 1) + timedelta(days=31) >= last_date:
1380
                img_re = re.compile('^%s%s([0-9]*)' % (year, month))
1381
                month_url = urljoin_wrapper(cls.url, url)
1382
                for img in get_soup_at_url(month_url).find_all('img', src=img_re):
1383
                    img_src = img['src']
1384
                    day = int(img_re.match(img_src).groups()[0])
1385
                    comic_date = date(int(year), int(month), day)
1386
                    if comic_date > last_date:
1387
                        yield {
1388
                            'url': month_url,
1389
                            'year': int(year),
1390
                            'month': int(month),
1391
                            'day': int(day),
1392
                            'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)],
1393 View Code Duplication
                        }
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1394
                        last_date = comic_date
1395
1396
1397
class AbstruseGoose(GenericListableComic):
1398
    """Class to retrieve AbstruseGoose Comics."""
1399
    name = 'abstruse'
1400
    long_name = 'Abstruse Goose'
1401
    url = 'http://abstrusegoose.com'
1402
    get_url_from_archive_element = get_href
1403
    comic_url_re = re.compile('^%s/([0-9]*)$' % url)
1404
    comic_img_re = re.compile('^%s/strips/.*' % url)
1405
1406
    @classmethod
1407
    def get_archive_elements(cls):
1408
        archive_url = urljoin_wrapper(cls.url, 'archive')
1409
        return get_soup_at_url(archive_url).find_all('a', href=cls.comic_url_re)
1410
1411
    @classmethod
1412
    def get_comic_info(cls, soup, archive_elt):
1413
        comic_url = cls.get_url_from_archive_element(archive_elt)
1414
        num = int(cls.comic_url_re.match(comic_url).groups()[0])
1415
        return {
1416
            'num': num,
1417
            'title': archive_elt.string,
1418
            'img': [soup.find('img', src=cls.comic_img_re)['src']]
1419
        }
1420
1421
1422
class PhDComics(GenericNavigableComic):
1423
    """Class to retrieve PHD Comics."""
1424
    name = 'phd'
1425
    long_name = 'PhD Comics'
1426
    url = 'http://phdcomics.com/comics/archive.php'
1427
    get_url_from_link = join_cls_url_to_href
1428
1429
    @classmethod
1430
    def get_first_comic_link(cls):
1431
        """Get link to first comics."""
1432
        return get_soup_at_url(cls.url).find('img', src='images/first_button.gif').parent
1433
1434
    @classmethod
1435
    def get_navi_link(cls, last_soup, next_):
1436
        """Get link to next or previous comic."""
1437
        img = last_soup.find('img', src='images/next_button.gif' if next_ else 'images/prev_button.gif')
1438
        return None if img is None else img.parent
1439
1440
    @classmethod
1441
    def get_comic_info(cls, soup, link):
1442
        """Get information about a particular comics."""
1443
        date_str = soup.find('font', face='Arial,Helvetica,Geneva,Swiss,SunSans-Regular', color='white').string.strip()
1444
        try:
1445
            day = string_to_date(date_str, '%m/%d/%Y')
1446
        except ValueError:
1447
            print("Invalid date %s" % date_str)
1448
            day = date.today()
1449
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
1450
        return {
1451
            'year': day.year,
1452
            'month': day.month,
1453
            'day': day.day,
1454
            'img': [soup.find('img', id='comic')['src']],
1455
            'title': title,
1456
        }
1457
1458
1459 View Code Duplication
class Octopuns(GenericEmptyComic, GenericNavigableComic):
1460
    """Class to retrieve Octopuns comics."""
1461
    # Also on http://octopuns.tumblr.com
1462
    name = 'octopuns'
1463
    long_name = 'Octopuns'
1464
    url = 'http://www.octopuns.net'
1465
1466
    @classmethod
1467
    def get_first_comic_link(cls):
1468
        """Get link to first comics."""
1469
        return get_soup_at_url(cls.url).find('img', src=re.compile('.*/First.png')).parent
1470
1471
    @classmethod
1472
    def get_navi_link(cls, last_soup, next_):
1473
        """Get link to next or previous comic."""
1474
        link = last_soup.find('img', src=re.compile('.*/Next.png' if next_ else '.*/Back.png')).parent
1475
        return None if link.get('href') is None else link
1476
1477
    @classmethod
1478
    def get_comic_info(cls, soup, link):
1479
        """Get information about a particular comics."""
1480
        title = soup.find('h3', class_='post-title entry-title').string
1481
        date_str = soup.find('h2', class_='date-header').string
1482
        day = string_to_date(date_str, "%A, %B %d, %Y")
1483
        imgs = soup.find_all('link', rel='image_src')
1484
        return {
1485
            'img': [i['href'] for i in imgs],
1486
            'title': title,
1487
            'day': day.day,
1488
            'month': day.month,
1489
            'year': day.year,
1490
        }
1491
1492
1493
class Quarktees(GenericNavigableComic):
1494
    """Class to retrieve the Quarktees comics."""
1495
    name = 'quarktees'
1496
    long_name = 'Quarktees'
1497
    url = 'http://www.quarktees.com/blogs/news'
1498
    get_url_from_link = join_cls_url_to_href
1499
    get_first_comic_link = simulate_first_link
1500
    first_url = 'http://www.quarktees.com/blogs/news/12486621-coming-soon'
1501
1502
    @classmethod
1503
    def get_navi_link(cls, last_soup, next_):
1504
        """Get link to next or previous comic."""
1505
        return last_soup.find('a', id='article-next' if next_ else 'article-prev')
1506
1507
    @classmethod
1508
    def get_comic_info(cls, soup, link):
1509
        """Get information about a particular comics."""
1510
        title = soup.find('meta', property='og:title')['content']
1511
        article = soup.find('div', class_='single-article')
1512
        imgs = article.find_all('img')
1513
        return {
1514
            'title': title,
1515
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1516
        }
1517
1518
1519
class OverCompensating(GenericNavigableComic):
1520
    """Class to retrieve the Over Compensating comics."""
1521
    name = 'compensating'
1522
    long_name = 'Over Compensating'
1523
    url = 'http://www.overcompensating.com'
1524
    get_url_from_link = join_cls_url_to_href
1525
1526
    @classmethod
1527
    def get_first_comic_link(cls):
1528
        """Get link to first comics."""
1529
        return get_soup_at_url(cls.url).find('a', href=re.compile('comic=1$'))
1530
1531
    @classmethod
1532
    def get_navi_link(cls, last_soup, next_):
1533
        """Get link to next or previous comic."""
1534
        return last_soup.find('a', title='next comic' if next_ else 'go back already')
1535
1536
    @classmethod
1537
    def get_comic_info(cls, soup, link):
1538
        """Get information about a particular comics."""
1539
        img_src_re = re.compile('^/oc/comics/.*')
1540
        comic_num_re = re.compile('.*comic=([0-9]*)$')
1541
        comic_url = cls.get_url_from_link(link)
1542
        num = int(comic_num_re.match(comic_url).groups()[0])
1543
        img = soup.find('img', src=img_src_re)
1544
        return {
1545
            'num': num,
1546
            'img': [urljoin_wrapper(comic_url, img['src'])],
1547
            'title': img.get('title')
1548
        }
1549
1550
1551
class Oglaf(GenericNavigableComic):
1552
    """Class to retrieve Oglaf comics."""
1553
    name = 'oglaf'
1554
    long_name = 'Oglaf [NSFW]'
1555
    url = 'http://oglaf.com'
1556
    _categories = ('NSFW', )
1557
    get_url_from_link = join_cls_url_to_href
1558
1559
    @classmethod
1560
    def get_first_comic_link(cls):
1561
        """Get link to first comics."""
1562
        return get_soup_at_url(cls.url).find("div", id="st").parent
1563
1564
    @classmethod
1565
    def get_navi_link(cls, last_soup, next_):
1566
        """Get link to next or previous comic."""
1567
        div = last_soup.find("div", id="nx" if next_ else "pv")
1568
        return div.parent if div else None
1569
1570
    @classmethod
1571
    def get_comic_info(cls, soup, link):
1572
        """Get information about a particular comics."""
1573
        title = soup.find('title').string
1574
        title_imgs = soup.find('div', id='tt').find_all('img')
1575
        assert len(title_imgs) == 1
1576
        strip_imgs = soup.find_all('img', id='strip')
1577
        assert len(strip_imgs) == 1
1578
        imgs = title_imgs + strip_imgs
1579
        desc = ' '.join(i['title'] for i in imgs)
1580
        return {
1581
            'title': title,
1582
            'img': [i['src'] for i in imgs],
1583
            'description': desc,
1584
        }
1585
1586
1587
class ScandinaviaAndTheWorld(GenericNavigableComic):
1588
    """Class to retrieve Scandinavia And The World comics."""
1589
    name = 'satw'
1590
    long_name = 'Scandinavia And The World'
1591
    url = 'http://satwcomic.com'
1592
    get_first_comic_link = simulate_first_link
1593
    first_url = 'http://satwcomic.com/sweden-denmark-and-norway'
1594
1595
    @classmethod
1596
    def get_navi_link(cls, last_soup, next_):
1597
        """Get link to next or previous comic."""
1598
        return last_soup.find('a', accesskey='n' if next_ else 'p')
1599
1600
    @classmethod
1601
    def get_comic_info(cls, soup, link):
1602
        """Get information about a particular comics."""
1603
        title = soup.find('meta', attrs={'name': 'twitter:label1'})['content']
1604
        desc = soup.find('meta', property='og:description')['content']
1605
        imgs = soup.find_all('img', itemprop="image")
1606
        return {
1607
            'title': title,
1608
            'description': desc,
1609
            'img': [i['src'] for i in imgs],
1610
        }
1611
1612
1613
class SomethingOfThatIlk(GenericEmptyComic):  # Does not exist anymore
1614
    """Class to retrieve the Something Of That Ilk comics."""
1615
    name = 'somethingofthatilk'
1616
    long_name = 'Something Of That Ilk'
1617
    url = 'http://www.somethingofthatilk.com'
1618
1619
1620
class InfiniteMonkeyBusiness(GenericNavigableComic):
1621
    """Generic class to retrieve InfiniteMonkeyBusiness comics."""
1622
    name = 'monkey'
1623
    long_name = 'Infinite Monkey Business'
1624
    url = 'http://infinitemonkeybusiness.net'
1625
    get_navi_link = get_a_navi_comicnavnext_navinext
1626
    get_first_comic_link = simulate_first_link
1627
    first_url = 'http://infinitemonkeybusiness.net/comic/pillory/'
1628
1629
    @classmethod
1630
    def get_comic_info(cls, soup, link):
1631
        """Get information about a particular comics."""
1632
        title = soup.find('meta', property='og:title')['content']
1633
        imgs = soup.find('div', id='comic').find_all('img')
1634
        return {
1635
            'title': title,
1636
            'img': [i['src'] for i in imgs],
1637
        }
1638
1639
1640
class Wondermark(GenericListableComic):
1641
    """Class to retrieve the Wondermark comics."""
1642
    name = 'wondermark'
1643
    long_name = 'Wondermark'
1644
    url = 'http://wondermark.com'
1645
    get_url_from_archive_element = get_href
1646
1647
    @classmethod
1648
    def get_archive_elements(cls):
1649
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1650 View Code Duplication
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1651
1652
    @classmethod
1653
    def get_comic_info(cls, soup, link):
1654
        """Get information about a particular comics."""
1655
        date_str = soup.find('div', class_='postdate').find('em').string
1656
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1657
        div = soup.find('div', id='comic')
1658
        if div:
1659
            img = div.find('img')
1660
            img_src = [img['src']]
1661
            alt = img['alt']
1662
            assert alt == img['title']
1663
            title = soup.find('meta', property='og:title')['content']
1664
        else:
1665
            img_src = []
1666
            alt = ''
1667
            title = ''
1668
        return {
1669
            'month': day.month,
1670
            'year': day.year,
1671
            'day': day.day,
1672
            'img': img_src,
1673
            'title': title,
1674
            'alt': alt,
1675
            'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')),
1676
        }
1677
1678
1679
class WarehouseComic(GenericNavigableComic):
1680
    """Class to retrieve Warehouse Comic comics."""
1681
    name = 'warehouse'
1682
    long_name = 'Warehouse Comic'
1683
    url = 'http://warehousecomic.com'
1684
    get_first_comic_link = get_a_navi_navifirst
1685
    get_navi_link = get_link_rel_next
1686
1687
    @classmethod
1688
    def get_comic_info(cls, soup, link):
1689
        """Get information about a particular comics."""
1690
        title = soup.find('h2', class_='post-title').string
1691
        date_str = soup.find('span', class_='post-date').string
1692
        day = string_to_date(date_str, "%B %d, %Y")
1693
        imgs = soup.find('div', id='comic').find_all('img')
1694
        return {
1695
            'img': [i['src'] for i in imgs],
1696
            'title': title,
1697
            'day': day.day,
1698
            'month': day.month,
1699
            'year': day.year,
1700
        }
1701
1702
1703
class JustSayEh(GenericNavigableComic):
1704
    """Class to retrieve Just Say Eh comics."""
1705
    # Also on http//tapastic.com/series/Just-Say-Eh
1706
    name = 'justsayeh'
1707
    long_name = 'Just Say Eh'
1708
    url = 'http://www.justsayeh.com'
1709
    get_first_comic_link = get_a_navi_navifirst
1710
    get_navi_link = get_a_navi_comicnavnext_navinext
1711
1712
    @classmethod
1713
    def get_comic_info(cls, soup, link):
1714
        """Get information about a particular comics."""
1715
        title = soup.find('h2', class_='post-title').string
1716
        imgs = soup.find("div", id="comic").find_all("img")
1717
        assert all(i['alt'] == i['title'] for i in imgs)
1718
        alt = imgs[0]['alt']
1719
        return {
1720
            'img': [i['src'] for i in imgs],
1721
            'title': title,
1722
            'alt': alt,
1723
        }
1724
1725
1726
class MouseBearComedy(GenericNavigableComic):
1727
    """Class to retrieve Mouse Bear Comedy comics."""
1728
    # Also on http://mousebearcomedy.tumblr.com
1729
    name = 'mousebear'
1730
    long_name = 'Mouse Bear Comedy'
1731
    url = 'http://www.mousebearcomedy.com'
1732
    get_first_comic_link = get_a_navi_navifirst
1733
    get_navi_link = get_a_navi_comicnavnext_navinext
1734
1735
    @classmethod
1736
    def get_comic_info(cls, soup, link):
1737
        """Get information about a particular comics."""
1738
        title = soup.find('h2', class_='post-title').string
1739
        author = soup.find("span", class_="post-author").find("a").string
1740
        date_str = soup.find("span", class_="post-date").string
1741
        day = string_to_date(date_str, '%B %d, %Y')
1742
        imgs = soup.find("div", id="comic").find_all("img")
1743
        assert all(i['alt'] == i['title'] == title for i in imgs)
1744
        return {
1745
            'day': day.day,
1746
            'month': day.month,
1747
            'year': day.year,
1748
            'img': [i['src'] for i in imgs],
1749
            'title': title,
1750
            'author': author,
1751
        }
1752
1753
1754
class BigFootJustice(GenericNavigableComic):
1755
    """Class to retrieve Big Foot Justice comics."""
1756
    # Also on http://tapastic.com/series/bigfoot-justice
1757
    name = 'bigfoot'
1758
    long_name = 'Big Foot Justice'
1759
    url = 'http://bigfootjustice.com'
1760
    get_first_comic_link = get_a_navi_navifirst
1761
    get_navi_link = get_a_navi_comicnavnext_navinext
1762
1763
    @classmethod
1764
    def get_comic_info(cls, soup, link):
1765
        """Get information about a particular comics."""
1766
        imgs = soup.find('div', id='comic').find_all('img')
1767 View Code Duplication
        assert all(i['title'] == i['alt'] for i in imgs)
1768
        title = ' '.join(i['title'] for i in imgs)
1769
        return {
1770
            'img': [i['src'] for i in imgs],
1771
            'title': title,
1772
        }
1773
1774
1775
class RespawnComic(GenericNavigableComic):
1776
    """Class to retrieve Respawn Comic."""
1777
    # Also on http://respawncomic.tumblr.com
1778
    name = 'respawn'
1779
    long_name = 'Respawn Comic'
1780
    url = 'http://respawncomic.com '
1781
    _categories = ('RESPAWN', )
1782
    get_navi_link = get_a_rel_next
1783
    get_first_comic_link = simulate_first_link
1784
    first_url = 'http://respawncomic.com/comic/c0001/'
1785
1786
    @classmethod
1787
    def get_comic_info(cls, soup, link):
1788
        """Get information about a particular comics."""
1789
        title = soup.find('meta', property='og:title')['content']
1790
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1791
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1792
        date_str = date_str[:10]
1793
        day = string_to_date(date_str, "%Y-%m-%d")
1794
        imgs = soup.find_all('meta', property='og:image')
1795
        skip_imgs = {
1796
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1797
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1798
        }
1799
        return {
1800
            'title': title,
1801
            'author': author,
1802
            'day': day.day,
1803
            'month': day.month,
1804
            'year': day.year,
1805
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1806
        }
1807
1808
1809 View Code Duplication
class SafelyEndangered(GenericNavigableComic):
1810
    """Class to retrieve Safely Endangered comics."""
1811
    # Also on http://tumblr.safelyendangered.com
1812
    name = 'endangered'
1813
    long_name = 'Safely Endangered'
1814
    url = 'http://www.safelyendangered.com'
1815
    get_navi_link = get_link_rel_next
1816
    get_first_comic_link = simulate_first_link
1817
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1818
1819
    @classmethod
1820
    def get_comic_info(cls, soup, link):
1821
        """Get information about a particular comics."""
1822
        title = soup.find('h2', class_='post-title').string
1823
        date_str = soup.find('span', class_='post-date').string
1824
        day = string_to_date(date_str, '%B %d, %Y')
1825
        imgs = soup.find('div', id='comic').find_all('img')
1826
        alt = imgs[0]['alt']
1827
        assert all(i['alt'] == i['title'] for i in imgs)
1828
        return {
1829
            'day': day.day,
1830
            'month': day.month,
1831
            'year': day.year,
1832
            'img': [i['src'] for i in imgs],
1833
            'title': title,
1834
            'alt': alt,
1835
        }
1836
1837
1838
class PicturesInBoxes(GenericNavigableComic):
1839
    """Class to retrieve Pictures In Boxes comics."""
1840
    # Also on http://picturesinboxescomic.tumblr.com
1841
    name = 'picturesinboxes'
1842
    long_name = 'Pictures in Boxes'
1843
    url = 'http://www.picturesinboxes.com'
1844
    get_navi_link = get_a_navi_navinext
1845
    get_first_comic_link = simulate_first_link
1846
    first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'
1847
1848
    @classmethod
1849
    def get_comic_info(cls, soup, link):
1850
        """Get information about a particular comics."""
1851
        title = soup.find('h2', class_='post-title').string
1852
        author = soup.find("span", class_="post-author").find("a").string
1853
        date_str = soup.find('span', class_='post-date').string
1854
        day = string_to_date(date_str, '%B %d, %Y')
1855
        imgs = soup.find('div', class_='comicpane').find_all('img')
1856
        assert imgs
1857
        assert all(i['title'] == i['alt'] == title for i in imgs)
1858
        return {
1859
            'day': day.day,
1860
            'month': day.month,
1861
            'year': day.year,
1862
            'img': [i['src'] for i in imgs],
1863
            'title': title,
1864
            'author': author,
1865
        }
1866
1867
1868
class Penmen(GenericEmptyComic):
1869
    """Class to retrieve Penmen comics."""
1870
    name = 'penmen'
1871
    long_name = 'Penmen'
1872
    url = 'http://penmen.com'
1873
1874
1875
class TheDoghouseDiaries(GenericNavigableComic):
1876
    """Class to retrieve The Dog House Diaries comics."""
1877
    name = 'doghouse'
1878
    long_name = 'The Dog House Diaries'
1879
    url = 'http://thedoghousediaries.com'
1880
1881
    @classmethod
1882
    def get_first_comic_link(cls):
1883
        """Get link to first comics."""
1884
        return get_soup_at_url(cls.url).find('a', id='firstlink')
1885
1886
    @classmethod
1887
    def get_navi_link(cls, last_soup, next_):
1888
        """Get link to next or previous comic."""
1889
        return last_soup.find('a', id='nextlink' if next_ else 'previouslink')
1890
1891
    @classmethod
1892
    def get_comic_info(cls, soup, link):
1893
        """Get information about a particular comics."""
1894
        comic_img_re = re.compile('^dhdcomics/.*')
1895
        img = soup.find('img', src=comic_img_re)
1896
        comic_url = cls.get_url_from_link(link)
1897
        return {
1898
            'title': soup.find('h2', id='titleheader').string,
1899
            'title2': soup.find('div', id='subtext').string,
1900
            'alt': img.get('title'),
1901
            'img': [urljoin_wrapper(comic_url, img['src'].strip())],
1902
            'num': int(comic_url.split('/')[-1]),
1903
        }
1904
1905
1906
class InvisibleBread(GenericListableComic):
1907
    """Class to retrieve Invisible Bread comics."""
1908
    # Also on http://www.gocomics.com/invisible-bread
1909
    name = 'invisiblebread'
1910
    long_name = 'Invisible Bread'
1911
    url = 'http://invisiblebread.com'
1912
1913
    @classmethod
1914 View Code Duplication
    def get_archive_elements(cls):
1915
        archive_url = urljoin_wrapper(cls.url, 'archives/')
1916
        return reversed(get_soup_at_url(archive_url).find_all('td', class_='archive-title'))
1917
1918
    @classmethod
1919
    def get_url_from_archive_element(cls, td):
1920
        return td.find('a')['href']
1921
1922
    @classmethod
1923
    def get_comic_info(cls, soup, td):
1924
        """Get information about a particular comics."""
1925
        url = cls.get_url_from_archive_element(td)
1926
        title = td.find('a').string
1927
        month_and_day = td.previous_sibling.string
1928
        link_re = re.compile('^%s/([0-9]+)/' % cls.url)
1929
        year = link_re.match(url).groups()[0]
1930
        date_str = month_and_day + ' ' + year
1931
        day = string_to_date(date_str, '%b %d %Y')
1932
        imgs = [soup.find('div', id='comic').find('img')]
1933
        assert len(imgs) == 1
1934
        assert all(i['title'] == i['alt'] == title for i in imgs)
1935
        return {
1936
            'month': day.month,
1937
            'year': day.year,
1938
            'day': day.day,
1939
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1940
            'title': title,
1941
        }
1942
1943
1944
class DiscoBleach(GenericEmptyComic):  # Does not work anymore
1945
    """Class to retrieve Disco Bleach Comics."""
1946
    name = 'discobleach'
1947
    long_name = 'Disco Bleach'
1948
    url = 'http://discobleach.com'
1949
1950
1951
class TubeyToons(GenericEmptyComic):  # Does not work anymore
1952
    """Class to retrieve TubeyToons comics."""
1953
    # Also on http://tapastic.com/series/Tubey-Toons
1954
    # Also on http://tubeytoons.tumblr.com
1955
    name = 'tubeytoons'
1956
    long_name = 'Tubey Toons'
1957
    url = 'http://tubeytoons.com'
1958
    _categories = ('TUNEYTOONS', )
1959
1960
1961
class CompletelySeriousComics(GenericNavigableComic):
1962
    """Class to retrieve Completely Serious comics."""
1963
    name = 'completelyserious'
1964
    long_name = 'Completely Serious Comics'
1965
    url = 'http://completelyseriouscomics.com'
1966
    get_first_comic_link = get_a_navi_navifirst
1967
    get_navi_link = get_a_navi_navinext
1968
1969
    @classmethod
1970
    def get_comic_info(cls, soup, link):
1971
        """Get information about a particular comics."""
1972
        title = soup.find('h2', class_='post-title').string
1973
        author = soup.find('span', class_='post-author').contents[1].string
1974
        date_str = soup.find('span', class_='post-date').string
1975
        day = string_to_date(date_str, '%B %d, %Y')
1976
        imgs = soup.find('div', class_='comicpane').find_all('img')
1977
        assert imgs
1978
        alt = imgs[0]['title']
1979
        assert all(i['title'] == i['alt'] == alt for i in imgs)
1980
        return {
1981
            'month': day.month,
1982
            'year': day.year,
1983
            'day': day.day,
1984
            'img': [i['src'] for i in imgs],
1985
            'title': title,
1986
            'alt': alt,
1987
            'author': author,
1988
        }
1989
1990
1991
class PoorlyDrawnLines(GenericListableComic):
1992
    """Class to retrieve Poorly Drawn Lines comics."""
1993
    # Also on http://pdlcomics.tumblr.com
1994
    name = 'poorlydrawn'
1995
    long_name = 'Poorly Drawn Lines'
1996
    url = 'http://poorlydrawnlines.com'
1997
    _categories = ('POORLYDRAWN', )
1998
    get_url_from_archive_element = get_href
1999
2000
    @classmethod
2001
    def get_comic_info(cls, soup, link):
2002
        """Get information about a particular comics."""
2003
        imgs = soup.find('div', class_='post').find_all('img')
2004
        assert len(imgs) <= 1
2005
        return {
2006
            'img': [i['src'] for i in imgs],
2007
            'title': imgs[0].get('title', "") if imgs else "",
2008
        }
2009
2010
    @classmethod
2011
    def get_archive_elements(cls):
2012
        archive_url = urljoin_wrapper(cls.url, 'archive')
2013
        url_re = re.compile('^%s/comic/.' % cls.url)
2014
        return reversed(get_soup_at_url(archive_url).find_all('a', href=url_re))
2015
2016
2017 View Code Duplication
class LoadingComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2018
    """Class to retrieve Loading Artist comics."""
2019
    name = 'loadingartist'
2020
    long_name = 'Loading Artist'
2021
    url = 'http://www.loadingartist.com/latest'
2022
2023
    @classmethod
2024
    def get_first_comic_link(cls):
2025
        """Get link to first comics."""
2026
        return get_soup_at_url(cls.url).find('a', title="First")
2027
2028
    @classmethod
2029
    def get_navi_link(cls, last_soup, next_):
2030
        """Get link to next or previous comic."""
2031
        return last_soup.find('a', title='Next' if next_ else 'Previous')
2032
2033
    @classmethod
2034
    def get_comic_info(cls, soup, link):
2035
        """Get information about a particular comics."""
2036
        title = soup.find('h1').string
2037
        date_str = soup.find('span', class_='date').string.strip()
2038
        day = string_to_date(date_str, "%B %d, %Y")
2039
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
2040
        return {
2041
            'title': title,
2042
            'img': [i['src'] for i in imgs],
2043
            'month': day.month,
2044
            'year': day.year,
2045
            'day': day.day,
2046
        }
2047
2048
2049
class ChuckleADuck(GenericNavigableComic):
2050
    """Class to retrieve Chuckle-A-Duck comics."""
2051
    name = 'chuckleaduck'
2052
    long_name = 'Chuckle-A-duck'
2053
    url = 'http://chuckleaduck.com'
2054
    get_first_comic_link = get_div_navfirst_a
2055
    get_navi_link = get_link_rel_next
2056
2057
    @classmethod
2058
    def get_comic_info(cls, soup, link):
2059
        """Get information about a particular comics."""
2060
        date_str = soup.find('span', class_='post-date').string
2061
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
2062
        author = soup.find('span', class_='post-author').string
2063
        div = soup.find('div', id='comic')
2064
        imgs = div.find_all('img') if div else []
2065
        title = imgs[0]['title'] if imgs else ""
2066
        assert all(i['title'] == i['alt'] == title for i in imgs)
2067
        return {
2068
            'month': day.month,
2069
            'year': day.year,
2070
            'day': day.day,
2071
            'img': [i['src'] for i in imgs],
2072
            'title': title,
2073
            'author': author,
2074
        }
2075
2076
2077
class DepressedAlien(GenericNavigableComic):
2078
    """Class to retrieve Depressed Alien Comics."""
2079
    name = 'depressedalien'
2080
    long_name = 'Depressed Alien'
2081
    url = 'http://depressedalien.com'
2082
    get_url_from_link = join_cls_url_to_href
2083
2084
    @classmethod
2085
    def get_first_comic_link(cls):
2086
        """Get link to first comics."""
2087
        return get_soup_at_url(cls.url).find('img', attrs={'name': 'beginArrow'}).parent
2088
2089
    @classmethod
2090
    def get_navi_link(cls, last_soup, next_):
2091
        """Get link to next or previous comic."""
2092
        return last_soup.find('img', attrs={'name': 'rightArrow' if next_ else 'leftArrow'}).parent
2093
2094
    @classmethod
2095
    def get_comic_info(cls, soup, link):
2096
        """Get information about a particular comics."""
2097
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2098
        imgs = soup.find_all('meta', property='og:image')
2099
        return {
2100
            'title': title,
2101
            'img': [i['content'] for i in imgs],
2102
        }
2103
2104
2105
class ThingsInSquares(GenericListableComic):
2106
    """Class to retrieve Things In Squares comics."""
2107
    # This can be retrieved in other languages
2108
    # Also on https://tapastic.com/series/Things-in-Squares
2109
    name = 'squares'
2110
    long_name = 'Things in squares'
2111
    url = 'http://www.thingsinsquares.com'
2112
2113
    @classmethod
2114
    def get_comic_info(cls, soup, tr):
2115
        """Get information about a particular comics."""
2116
        _, td2, td3 = tr.find_all('td')
2117
        a = td2.find('a')
2118
        date_str = td3.string
2119
        day = string_to_date(date_str, "%m.%d.%y")
2120
        title = a.string
2121
        title2 = soup.find('meta', property='og:title')['content']
2122
        desc = soup.find('meta', property='og:description')
2123
        description = desc['content'] if desc else ''
2124
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2125
        imgs = soup.find('div', class_='entry-content').find_all('img')
2126
        return {
2127
            'day': day.day,
2128
            'month': day.month,
2129
            'year': day.year,
2130
            'title': title,
2131
            'title2': title2,
2132
            'description': description,
2133
            'tags': tags,
2134
            'img': [i['src'] for i in imgs],
2135
            'alt': ' '.join(i['alt'] for i in imgs),
2136
        }
2137
2138
    @classmethod
2139
    def get_url_from_archive_element(cls, tr):
2140
        _, td2, td3 = tr.find_all('td')
2141
        return td2.find('a')['href']
2142
2143
    @classmethod
2144
    def get_archive_elements(cls):
2145
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
2146
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2147
2148
2149
class HappleTea(GenericNavigableComic):
2150
    """Class to retrieve Happle Tea Comics."""
2151
    name = 'happletea'
2152
    long_name = 'Happle Tea'
2153
    url = 'http://www.happletea.com'
2154
    get_first_comic_link = get_a_navi_navifirst
2155
    get_navi_link = get_link_rel_next
2156
2157
    @classmethod
2158
    def get_comic_info(cls, soup, link):
2159
        """Get information about a particular comics."""
2160
        imgs = soup.find('div', id='comic').find_all('img')
2161
        post = soup.find('div', class_='post-content')
2162
        title = post.find('h2', class_='post-title').string
2163
        author = post.find('a', rel='author').string
2164
        date_str = post.find('span', class_='post-date').string
2165
        day = string_to_date(date_str, "%B %d, %Y")
2166
        assert all(i['alt'] == i['title'] for i in imgs)
2167
        return {
2168
            'title': title,
2169
            'img': [i['src'] for i in imgs],
2170
            'alt': ''.join(i['alt'] for i in imgs),
2171
            'month': day.month,
2172
            'year': day.year,
2173
            'day': day.day,
2174
            'author': author,
2175
        }
2176
2177
2178
class FatAwesomeComics(GenericNavigableComic):
2179
    """Class to retrieve Fat Awesome Comics."""
2180
    # Also on http://fatawesomecomedy.tumblr.com
2181
    name = 'fatawesome'
2182
    long_name = 'Fat Awesome'
2183
    url = 'http://fatawesome.com/comics'
2184
    get_navi_link = get_a_rel_next
2185
    get_first_comic_link = simulate_first_link
2186
    first_url = 'http://fatawesome.com/shortbus/'
2187
2188
    @classmethod
2189
    def get_comic_info(cls, soup, link):
2190
        """Get information about a particular comics."""
2191
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2192
        description = soup.find('meta', attrs={'name': 'description'})['content']
2193
        tags_prop = soup.find('meta', property='article:tag')
2194
        tags = tags_prop['content'] if tags_prop else ""
2195
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2196
        day = string_to_date(date_str, "%Y-%m-%d")
2197
        imgs = soup.find_all('img', attrs={'data-recalc-dims': "1"})
2198
        assert len(imgs) == 1
2199
        return {
2200
            'title': title,
2201
            'description': description,
2202
            'tags': tags,
2203
            'alt': "".join(i['alt'] for i in imgs),
2204
            'img': [i['src'].rsplit('?', 1)[0] for i in imgs],
2205
            'month': day.month,
2206
            'year': day.year,
2207
            'day': day.day,
2208
        }
2209
2210
2211
class AnythingComic(GenericListableComic):
2212
    """Class to retrieve Anything Comics."""
2213
    # Also on http://tapastic.com/series/anything
2214
    name = 'anythingcomic'
2215
    long_name = 'Anything Comic'
2216
    url = 'http://www.anythingcomic.com'
2217
2218
    @classmethod
2219
    def get_archive_elements(cls):
2220
        archive_url = urljoin_wrapper(cls.url, 'archive/')
2221
        # The first 2 <tr>'s do not correspond to comics
2222
        return get_soup_at_url(archive_url).find('table', id='chapter_table').find_all('tr')[2:]
2223 View Code Duplication
2224
    @classmethod
2225
    def get_url_from_archive_element(cls, tr):
2226
        """Get url corresponding to an archive element."""
2227
        td_num, td_comic, td_date, _ = tr.find_all('td')
2228
        link = td_comic.find('a')
2229
        return urljoin_wrapper(cls.url, link['href'])
2230
2231
    @classmethod
2232
    def get_comic_info(cls, soup, tr):
2233
        """Get information about a particular comics."""
2234
        td_num, td_comic, td_date, _ = tr.find_all('td')
2235
        num = int(td_num.string)
2236
        link = td_comic.find('a')
2237
        title = link.string
2238
        imgs = soup.find_all('img', id='comic_image')
2239
        date_str = td_date.string
2240
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y, %I:%M %p")
2241
        assert len(imgs) == 1
2242
        assert all(i.get('alt') == i.get('title') for i in imgs)
2243
        return {
2244
            'num': num,
2245
            'title': title,
2246
            'alt': imgs[0].get('alt', ''),
2247
            'img': [i['src'] for i in imgs],
2248
            'month': day.month,
2249
            'year': day.year,
2250
            'day': day.day,
2251
        }
2252
2253
2254
class LonnieMillsap(GenericNavigableComic):
2255
    """Class to retrieve Lonnie Millsap's comics."""
2256
    name = 'millsap'
2257
    long_name = 'Lonnie Millsap'
2258
    url = 'http://www.lonniemillsap.com'
2259
    get_navi_link = get_link_rel_next
2260
    get_first_comic_link = simulate_first_link
2261
    first_url = 'http://www.lonniemillsap.com/?p=42'
2262
2263
    @classmethod
2264
    def get_comic_info(cls, soup, link):
2265
        """Get information about a particular comics."""
2266
        title = soup.find('h2', class_='post-title').string
2267
        post = soup.find('div', class_='post-content')
2268
        author = post.find("span", class_="post-author").find("a").string
2269
        date_str = post.find("span", class_="post-date").string
2270
        day = string_to_date(date_str, "%B %d, %Y")
2271
        imgs = post.find("div", class_="entry").find_all("img")
2272
        return {
2273
            'title': title,
2274
            'author': author,
2275
            'img': [i['src'] for i in imgs],
2276
            'month': day.month,
2277
            'year': day.year,
2278
            'day': day.day,
2279
        }
2280
2281
2282 View Code Duplication
class LinsEditions(GenericNavigableComic):
2283
    """Class to retrieve L.I.N.S. Editions comics."""
2284
    # Also on http://linscomics.tumblr.com
2285
    # Now on https://warandpeas.com
2286
    name = 'lins'
2287
    long_name = 'L.I.N.S. Editions'
2288
    url = 'https://linsedition.com'
2289
    _categories = ('LINS', )
2290
    get_navi_link = get_link_rel_next
2291
    get_first_comic_link = simulate_first_link
2292
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2293
2294
    @classmethod
2295
    def get_comic_info(cls, soup, link):
2296
        """Get information about a particular comics."""
2297
        title = soup.find('meta', property='og:title')['content']
2298
        imgs = soup.find_all('meta', property='og:image')
2299
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2300
        day = string_to_date(date_str, "%Y-%m-%d")
2301
        return {
2302
            'title': title,
2303
            'img': [i['content'] for i in imgs],
2304
            'month': day.month,
2305
            'year': day.year,
2306
            'day': day.day,
2307
        }
2308
2309
2310
class ThorsThundershack(GenericNavigableComic):
2311
    """Class to retrieve Thor's Thundershack comics."""
2312
    # Also on http://tapastic.com/series/Thors-Thundershac
2313
    name = 'thor'
2314
    long_name = 'Thor\'s Thundershack'
2315
    url = 'http://www.thorsthundershack.com'
2316
    _categories = ('THOR', )
2317
    get_url_from_link = join_cls_url_to_href
2318
2319
    @classmethod
2320
    def get_first_comic_link(cls):
2321
        """Get link to first comics."""
2322
        return get_soup_at_url(cls.url).find('a', class_='first navlink')
2323
2324
    @classmethod
2325
    def get_navi_link(cls, last_soup, next_):
2326
        """Get link to next or previous comic."""
2327
        for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
2328
            if link['href'] != '/comic':
2329
                return link
2330
        return None
2331
2332 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2333
    def get_comic_info(cls, soup, link):
2334
        """Get information about a particular comics."""
2335
        title = soup.find('meta', attrs={'name': 'description'})["content"]
2336
        description = soup.find('div', itemprop='articleBody').text
2337
        author = soup.find('span', itemprop='author copyrightHolder').string
2338
        imgs = soup.find_all('img', itemprop='image')
2339
        assert all(i['title'] == i['alt'] for i in imgs)
2340
        alt = imgs[0]['alt'] if imgs else ""
2341
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
2342
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2343
        return {
2344
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2345
            'month': day.month,
2346
            'year': day.year,
2347
            'day': day.day,
2348
            'author': author,
2349
            'title': title,
2350
            'alt': alt,
2351
            'description': description,
2352
        }
2353
2354
2355
class GerbilWithAJetpack(GenericNavigableComic):
2356
    """Class to retrieve GerbilWithAJetpack comics."""
2357
    name = 'gerbil'
2358
    long_name = 'Gerbil With A Jetpack'
2359
    url = 'http://gerbilwithajetpack.com'
2360
    get_first_comic_link = get_a_navi_navifirst
2361
    get_navi_link = get_a_rel_next
2362
2363
    @classmethod
2364
    def get_comic_info(cls, soup, link):
2365
        """Get information about a particular comics."""
2366
        title = soup.find('h2', class_='post-title').string
2367
        author = soup.find("span", class_="post-author").find("a").string
2368
        date_str = soup.find("span", class_="post-date").string
2369
        day = string_to_date(date_str, "%B %d, %Y")
2370
        imgs = soup.find("div", id="comic").find_all("img")
2371
        alt = imgs[0]['alt']
2372
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2373
        return {
2374
            'img': [i['src'] for i in imgs],
2375
            'title': title,
2376
            'alt': alt,
2377
            'author': author,
2378
            'day': day.day,
2379
            'month': day.month,
2380
            'year': day.year
2381
        }
2382
2383
2384
class EveryDayBlues(GenericNavigableComic):
2385
    """Class to retrieve EveryDayBlues Comics."""
2386
    name = "blues"
2387
    long_name = "Every Day Blues"
2388
    url = "http://everydayblues.net"
2389
    get_first_comic_link = get_a_navi_navifirst
2390
    get_navi_link = get_link_rel_next
2391
2392
    @classmethod
2393
    def get_comic_info(cls, soup, link):
2394
        """Get information about a particular comics."""
2395
        title = soup.find("h2", class_="post-title").string
2396
        author = soup.find("span", class_="post-author").find("a").string
2397
        date_str = soup.find("span", class_="post-date").string
2398
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2399
        imgs = soup.find("div", id="comic").find_all("img")
2400
        assert all(i['alt'] == i['title'] == title for i in imgs)
2401
        assert len(imgs) <= 1
2402
        return {
2403
            'img': [i['src'] for i in imgs],
2404
            'title': title,
2405
            'author': author,
2406
            'day': day.day,
2407
            'month': day.month,
2408
            'year': day.year
2409
        }
2410
2411
2412
class BiterComics(GenericNavigableComic):
2413
    """Class to retrieve Biter Comics."""
2414
    name = "biter"
2415
    long_name = "Biter Comics"
2416
    url = "http://www.bitercomics.com"
2417
    get_first_comic_link = get_a_navi_navifirst
2418
    get_navi_link = get_link_rel_next
2419
2420
    @classmethod
2421
    def get_comic_info(cls, soup, link):
2422
        """Get information about a particular comics."""
2423
        title = soup.find("h1", class_="entry-title").string
2424
        author = soup.find("span", class_="author vcard").find("a").string
2425
        date_str = soup.find("span", class_="entry-date").string
2426
        day = string_to_date(date_str, "%B %d, %Y")
2427
        imgs = soup.find("div", id="comic").find_all("img")
2428
        assert all(i['alt'] == i['title'] for i in imgs)
2429
        assert len(imgs) == 1
2430
        alt = imgs[0]['alt']
2431
        return {
2432
            'img': [i['src'] for i in imgs],
2433
            'title': title,
2434
            'alt': alt,
2435
            'author': author,
2436
            'day': day.day,
2437
            'month': day.month,
2438
            'year': day.year
2439
        }
2440
2441
2442
class TheAwkwardYeti(GenericNavigableComic):
2443
    """Class to retrieve The Awkward Yeti comics."""
2444
    # Also on http://www.gocomics.com/the-awkward-yeti
2445
    # Also on http://larstheyeti.tumblr.com
2446
    # Also on https://tapastic.com/series/TheAwkwardYeti
2447
    name = 'yeti'
2448
    long_name = 'The Awkward Yeti'
2449
    url = 'http://theawkwardyeti.com'
2450
    _categories = ('YETI', )
2451
    get_first_comic_link = get_a_navi_navifirst
2452
    get_navi_link = get_link_rel_next
2453
2454
    @classmethod
2455
    def get_comic_info(cls, soup, link):
2456
        """Get information about a particular comics."""
2457
        title = soup.find('h2', class_='post-title').string
2458
        date_str = soup.find("span", class_="post-date").string
2459
        day = string_to_date(date_str, "%B %d, %Y")
2460
        imgs = soup.find("div", id="comic").find_all("img")
2461
        assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
2462
        return {
2463
            'img': [i['src'] for i in imgs],
2464
            'title': title,
2465
            'day': day.day,
2466
            'month': day.month,
2467
            'year': day.year
2468
        }
2469
2470
2471
class PleasantThoughts(GenericNavigableComic):
2472
    """Class to retrieve Pleasant Thoughts comics."""
2473
    name = 'pleasant'
2474
    long_name = 'Pleasant Thoughts'
2475
    url = 'http://pleasant-thoughts.com'
2476
    get_first_comic_link = get_a_navi_navifirst
2477
    get_navi_link = get_link_rel_next
2478
2479
    @classmethod
2480
    def get_comic_info(cls, soup, link):
2481
        """Get information about a particular comics."""
2482
        post = soup.find('div', class_='post-content')
2483
        title = post.find('h2', class_='post-title').string
2484
        imgs = post.find("div", class_="entry").find_all("img")
2485
        return {
2486
            'title': title,
2487
            'img': [i['src'] for i in imgs],
2488
        }
2489
2490
2491
class MisterAndMe(GenericNavigableComic):
2492
    """Class to retrieve Mister & Me Comics."""
2493
    # Also on http://www.gocomics.com/mister-and-me
2494
    # Also on https://tapastic.com/series/Mister-and-Me
2495
    name = 'mister'
2496
    long_name = 'Mister & Me'
2497
    url = 'http://www.mister-and-me.com'
2498
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2499
    get_navi_link = get_link_rel_next
2500
2501
    @classmethod
2502
    def get_comic_info(cls, soup, link):
2503
        """Get information about a particular comics."""
2504
        title = soup.find('h2', class_='post-title').string
2505
        author = soup.find("span", class_="post-author").find("a").string
2506
        date_str = soup.find("span", class_="post-date").string
2507
        day = string_to_date(date_str, "%B %d, %Y")
2508
        imgs = soup.find("div", id="comic").find_all("img")
2509
        assert all(i['alt'] == i['title'] for i in imgs)
2510
        assert len(imgs) <= 1
2511
        alt = imgs[0]['alt'] if imgs else ""
2512
        return {
2513
            'img': [i['src'] for i in imgs],
2514
            'title': title,
2515
            'alt': alt,
2516
            'author': author,
2517
            'day': day.day,
2518
            'month': day.month,
2519
            'year': day.year
2520
        }
2521
2522
2523
class LastPlaceComics(GenericNavigableComic):
2524
    """Class to retrieve Last Place Comics."""
2525
    name = 'lastplace'
2526
    long_name = 'Last Place Comics'
2527
    url = "http://lastplacecomics.com"
2528
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2529
    get_navi_link = get_link_rel_next
2530
2531
    @classmethod
2532
    def get_comic_info(cls, soup, link):
2533
        """Get information about a particular comics."""
2534
        title = soup.find('h2', class_='post-title').string
2535
        author = soup.find("span", class_="post-author").find("a").string
2536
        date_str = soup.find("span", class_="post-date").string
2537
        day = string_to_date(date_str, "%B %d, %Y")
2538
        imgs = soup.find("div", id="comic").find_all("img")
2539
        assert all(i['alt'] == i['title'] for i in imgs)
2540
        assert len(imgs) <= 1
2541
        alt = imgs[0]['alt'] if imgs else ""
2542
        return {
2543
            'img': [i['src'] for i in imgs],
2544
            'title': title,
2545
            'alt': alt,
2546
            'author': author,
2547
            'day': day.day,
2548
            'month': day.month,
2549
            'year': day.year
2550
        }
2551
2552
2553
class TalesOfAbsurdity(GenericNavigableComic):
2554
    """Class to retrieve Tales Of Absurdity comics."""
2555
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
2556
    # Also on http://talesofabsurdity.tumblr.com
2557
    name = 'absurdity'
2558
    long_name = 'Tales of Absurdity'
2559
    url = 'http://talesofabsurdity.com'
2560
    _categories = ('ABSURDITY', )
2561
    get_first_comic_link = get_a_navi_navifirst
2562
    get_navi_link = get_a_navi_comicnavnext_navinext
2563
2564
    @classmethod
2565
    def get_comic_info(cls, soup, link):
2566
        """Get information about a particular comics."""
2567
        title = soup.find('h2', class_='post-title').string
2568
        author = soup.find("span", class_="post-author").find("a").string
2569
        date_str = soup.find("span", class_="post-date").string
2570
        day = string_to_date(date_str, "%B %d, %Y")
2571
        imgs = soup.find("div", id="comic").find_all("img")
2572
        assert all(i['alt'] == i['title'] for i in imgs)
2573
        alt = imgs[0]['alt'] if imgs else ""
2574
        return {
2575
            'img': [i['src'] for i in imgs],
2576
            'title': title,
2577
            'alt': alt,
2578
            'author': author,
2579
            'day': day.day,
2580
            'month': day.month,
2581
            'year': day.year
2582
        }
2583
2584
2585
class EndlessOrigami(GenericEmptyComic, GenericNavigableComic):
2586
    """Class to retrieve Endless Origami Comics."""
2587
    name = "origami"
2588
    long_name = "Endless Origami"
2589
    url = "http://endlessorigami.com"
2590
    get_first_comic_link = get_a_navi_navifirst
2591
    get_navi_link = get_link_rel_next
2592
2593
    @classmethod
2594
    def get_comic_info(cls, soup, link):
2595
        """Get information about a particular comics."""
2596
        title = soup.find('h2', class_='post-title').string
2597
        author = soup.find("span", class_="post-author").find("a").string
2598
        date_str = soup.find("span", class_="post-date").string
2599
        day = string_to_date(date_str, "%B %d, %Y")
2600
        imgs = soup.find("div", id="comic").find_all("img")
2601
        assert all(i['alt'] == i['title'] for i in imgs)
2602
        alt = imgs[0]['alt'] if imgs else ""
2603
        return {
2604
            'img': [i['src'] for i in imgs],
2605
            'title': title,
2606
            'alt': alt,
2607
            'author': author,
2608
            'day': day.day,
2609
            'month': day.month,
2610
            'year': day.year
2611
        }
2612
2613
2614
class PlanC(GenericNavigableComic):
2615
    """Class to retrieve Plan C comics."""
2616
    name = 'planc'
2617
    long_name = 'Plan C'
2618
    url = 'http://www.plancomic.com'
2619
    get_first_comic_link = get_a_navi_navifirst
2620
    get_navi_link = get_a_navi_comicnavnext_navinext
2621
2622
    @classmethod
2623
    def get_comic_info(cls, soup, link):
2624
        """Get information about a particular comics."""
2625
        title = soup.find('h2', class_='post-title').string
2626
        date_str = soup.find("span", class_="post-date").string
2627
        day = string_to_date(date_str, "%B %d, %Y")
2628
        imgs = soup.find('div', id='comic').find_all('img')
2629
        return {
2630
            'title': title,
2631
            'img': [i['src'] for i in imgs],
2632
            'month': day.month,
2633
            'year': day.year,
2634
            'day': day.day,
2635
        }
2636
2637
2638
class BuniComic(GenericNavigableComic):
2639
    """Class to retrieve Buni Comics."""
2640
    name = 'buni'
2641
    long_name = 'BuniComics'
2642
    url = 'http://www.bunicomic.com'
2643
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2644
    get_navi_link = get_link_rel_next
2645
2646
    @classmethod
2647
    def get_comic_info(cls, soup, link):
2648
        """Get information about a particular comics."""
2649
        imgs = soup.find('div', id='comic').find_all('img')
2650
        assert all(i['alt'] == i['title'] for i in imgs)
2651
        assert len(imgs) == 1
2652
        return {
2653
            'img': [i['src'] for i in imgs],
2654
            'title': imgs[0]['title'],
2655
        }
2656 View Code Duplication
2657
2658
class GenericCommitStrip(GenericNavigableComic):
2659
    """Generic class to retrieve Commit Strips in different languages."""
2660
    get_navi_link = get_a_rel_next
2661
    get_first_comic_link = simulate_first_link
2662
    first_url = NotImplemented
2663
2664
    @classmethod
2665
    def get_comic_info(cls, soup, link):
2666
        """Get information about a particular comics."""
2667
        desc = soup.find('meta', property='og:description')['content']
2668
        title = soup.find('meta', property='og:title')['content']
2669
        imgs = soup.find('div', class_='entry-content').find_all('img')
2670
        title2 = ' '.join(i.get('title', '') for i in imgs)
2671
        return {
2672
            'title': title,
2673
            'title2': title2,
2674
            'description': desc,
2675
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2676
        }
2677
2678
2679
class CommitStripFr(GenericCommitStrip):
2680
    """Class to retrieve Commit Strips in French."""
2681
    name = 'commit_fr'
2682
    long_name = 'Commit Strip (Fr)'
2683
    url = 'http://www.commitstrip.com/fr'
2684
    _categories = ('FRANCAIS', )
2685
    first_url = 'http://www.commitstrip.com/fr/2012/02/22/interview/'
2686
2687
2688
class CommitStripEn(GenericCommitStrip):
2689
    """Class to retrieve Commit Strips in English."""
2690
    name = 'commit_en'
2691
    long_name = 'Commit Strip (En)'
2692
    url = 'http://www.commitstrip.com/en'
2693
    first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'
2694
2695
2696
class GenericBoumerie(GenericNavigableComic):
2697
    """Generic class to retrieve Boumeries comics in different languages."""
2698
    get_first_comic_link = get_a_navi_navifirst
2699
    get_navi_link = get_link_rel_next
2700
    date_format = NotImplemented
2701
    lang = NotImplemented
2702
2703
    @classmethod
2704
    def get_comic_info(cls, soup, link):
2705
        """Get information about a particular comics."""
2706
        title = soup.find('h2', class_='post-title').string
2707
        short_url = soup.find('link', rel='shortlink')['href']
2708
        author = soup.find("span", class_="post-author").find("a").string
2709
        date_str = soup.find('span', class_='post-date').string
2710
        day = string_to_date(date_str, cls.date_format, cls.lang)
2711
        imgs = soup.find('div', id='comic').find_all('img')
2712
        assert all(i['alt'] == i['title'] for i in imgs)
2713
        return {
2714
            'short_url': short_url,
2715
            'img': [i['src'] for i in imgs],
2716
            'title': title,
2717
            'author': author,
2718
            'month': day.month,
2719
            'year': day.year,
2720
            'day': day.day,
2721
        }
2722
2723
2724
class BoumerieEn(GenericBoumerie):
2725
    """Class to retrieve Boumeries comics in English."""
2726
    name = 'boumeries_en'
2727
    long_name = 'Boumeries (En)'
2728
    url = 'http://comics.boumerie.com'
2729
    date_format = "%B %d, %Y"
2730
    lang = 'en_GB.UTF-8'
2731
2732
2733
class BoumerieFr(GenericBoumerie):
2734
    """Class to retrieve Boumeries comics in French."""
2735
    name = 'boumeries_fr'
2736
    long_name = 'Boumeries (Fr)'
2737 View Code Duplication
    url = 'http://bd.boumerie.com'
2738
    _categories = ('FRANCAIS', )
2739
    date_format = "%A, %d %B %Y"
2740
    lang = "fr_FR.utf8"
2741
2742
2743
class UnearthedComics(GenericNavigableComic):
2744
    """Class to retrieve Unearthed comics."""
2745
    # Also on http://tapastic.com/series/UnearthedComics
2746
    # Also on http://unearthedcomics.tumblr.com
2747
    name = 'unearthed'
2748
    long_name = 'Unearthed Comics'
2749
    url = 'http://unearthedcomics.com'
2750
    _categories = ('UNEARTHED', )
2751
    get_navi_link = get_link_rel_next
2752
    get_first_comic_link = simulate_first_link
2753
    first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'
2754
2755
    @classmethod
2756
    def get_comic_info(cls, soup, link):
2757
        """Get information about a particular comics."""
2758
        short_url = soup.find('link', rel='shortlink')['href']
2759
        title_elt = soup.find('h1') or soup.find('h2')
2760
        title = title_elt.string if title_elt else ""
2761
        desc = soup.find('meta', property='og:description')
2762
        date_str = soup.find('time', class_='published updated hidden')['datetime']
2763
        day = string_to_date(date_str, "%Y-%m-%d")
2764
        post = soup.find('div', class_="entry content entry-content type-portfolio")
2765
        imgs = post.find_all('img')
2766
        return {
2767
            'title': title,
2768
            'description': desc,
2769
            'url2': short_url,
2770
            'img': [i['src'] for i in imgs],
2771
            'month': day.month,
2772
            'year': day.year,
2773
            'day': day.day,
2774
        }
2775
2776
2777
class Optipess(GenericNavigableComic):
2778
    """Class to retrieve Optipess comics."""
2779
    name = 'optipess'
2780
    long_name = 'Optipess'
2781
    url = 'http://www.optipess.com'
2782
    get_first_comic_link = get_a_navi_navifirst
2783
    get_navi_link = get_link_rel_next
2784
2785
    @classmethod
2786
    def get_comic_info(cls, soup, link):
2787
        """Get information about a particular comics."""
2788
        title = soup.find('h2', class_='post-title').string
2789
        author = soup.find("span", class_="post-author").find("a").string
2790
        comic = soup.find('div', id='comic')
2791
        imgs = comic.find_all('img') if comic else []
2792
        alt = imgs[0]['title'] if imgs else ""
2793
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2794
        date_str = soup.find('span', class_='post-date').string
2795
        day = string_to_date(date_str, "%B %d, %Y")
2796
        return {
2797
            'title': title,
2798
            'alt': alt,
2799
            'author': author,
2800
            'img': [i['src'] for i in imgs],
2801
            'month': day.month,
2802
            'year': day.year,
2803
            'day': day.day,
2804
        }
2805
2806
2807
class PainTrainComic(GenericNavigableComic):
2808
    """Class to retrieve Pain Train Comics."""
2809
    name = 'paintrain'
2810
    long_name = 'Pain Train Comics'
2811
    url = 'http://paintraincomic.com'
2812
    get_first_comic_link = get_a_navi_navifirst
2813
    get_navi_link = get_link_rel_next
2814
2815 View Code Duplication
    @classmethod
2816
    def get_comic_info(cls, soup, link):
2817
        """Get information about a particular comics."""
2818
        title = soup.find('h2', class_='post-title').string
2819
        short_url = soup.find('link', rel='shortlink')['href']
2820
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2821
        num = int(short_url_re.match(short_url).groups()[0])
2822
        imgs = soup.find('div', id='comic').find_all('img')
2823
        alt = imgs[0]['title']
2824
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2825
        date_str = soup.find('span', class_='post-date').string
2826
        day = string_to_date(date_str, "%d/%m/%Y")
2827
        return {
2828
            'short_url': short_url,
2829
            'num': num,
2830
            'img': [i['src'] for i in imgs],
2831
            'month': day.month,
2832
            'year': day.year,
2833
            'day': day.day,
2834
            'alt': alt,
2835
            'title': title,
2836
        }
2837
2838
2839
class MoonBeard(GenericNavigableComic):
2840
    """Class to retrieve MoonBeard comics."""
2841
    # Also on http://blog.squiresjam.es/moonbeard
2842
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
2843
    name = 'moonbeard'
2844
    long_name = 'Moon Beard'
2845
    url = 'http://moonbeard.com'
2846
    get_first_comic_link = get_a_navi_navifirst
2847
    get_navi_link = get_a_navi_navinext
2848
2849
    @classmethod
2850
    def get_comic_info(cls, soup, link):
2851
        """Get information about a particular comics."""
2852
        title = soup.find('h2', class_='post-title').string
2853
        short_url = soup.find('link', rel='shortlink')['href']
2854
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2855
        num = int(short_url_re.match(short_url).groups()[0])
2856
        imgs = soup.find('div', id='comic').find_all('img')
2857
        alt = imgs[0]['title']
2858
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2859
        date_str = soup.find('span', class_='post-date').string
2860
        day = string_to_date(date_str, "%B %d, %Y")
2861
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2862
        author = soup.find('span', class_='post-author').string
2863
        return {
2864
            'short_url': short_url,
2865
            'num': num,
2866
            'img': [i['src'] for i in imgs],
2867
            'month': day.month,
2868
            'year': day.year,
2869 View Code Duplication
            'day': day.day,
2870
            'title': title,
2871
            'tags': tags,
2872
            'alt': alt,
2873
            'author': author,
2874
        }
2875
2876
2877
class AHamADay(GenericNavigableComic):
2878
    """Class to retrieve class A Ham A Day comics."""
2879
    name = 'ham'
2880
    long_name = 'A Ham A Day'
2881
    url = 'http://www.ahammaday.com'
2882
    get_url_from_link = join_cls_url_to_href
2883
    get_first_comic_link = simulate_first_link
2884
    first_url = 'http://www.ahammaday.com/today/3/6/french'
2885
2886
    @classmethod
2887
    def get_navi_link(cls, last_soup, next_):
2888
        """Get link to next or previous comic."""
2889
        # prev is next / next is prev
2890
        return last_soup.find('li', class_='previous' if next_ else 'next').find('a')
2891
2892
    @classmethod
2893
    def get_comic_info(cls, soup, link):
2894
        """Get information about a particular comics."""
2895
        date_str = soup.find('time', class_='published')['datetime']
2896
        day = string_to_date(date_str, "%Y-%m-%d")
2897
        author = soup.find('span', class_='blog-author').find('a').string
2898
        title = soup.find('meta', property='og:title')['content']
2899
        imgs = soup.find_all('meta', itemprop='image')
2900
        return {
2901
            'img': [i['content'] for i in imgs],
2902
            'title': title,
2903
            'author': author,
2904
            'day': day.day,
2905
            'month': day.month,
2906
            'year': day.year,
2907
        }
2908
2909
2910
class LittleLifeLines(GenericNavigableComic):
2911
    """Class to retrieve Little Life Lines comics."""
2912
    # Also on https://little-life-lines.tumblr.com
2913
    name = 'life'
2914
    long_name = 'Little Life Lines'
2915
    url = 'http://www.littlelifelines.com'
2916
    get_url_from_link = join_cls_url_to_href
2917
    get_first_comic_link = simulate_first_link
2918
    first_url = 'http://www.littlelifelines.com/comics/well-done'
2919
2920
    @classmethod
2921
    def get_navi_link(cls, last_soup, next_):
2922
        """Get link to next or previous comic."""
2923
        # prev is next / next is prev
2924
        li = last_soup.find('li', class_='prev' if next_ else 'next')
2925
        return li.find('a') if li else None
2926
2927 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2928
    def get_comic_info(cls, soup, link):
2929
        """Get information about a particular comics."""
2930
        title = soup.find('meta', property='og:title')['content']
2931
        desc = soup.find('meta', property='og:description')['content']
2932
        date_str = soup.find('time', class_='published')['datetime']
2933
        day = string_to_date(date_str, "%Y-%m-%d")
2934
        author = soup.find('a', rel='author').string
2935
        div_content = soup.find('div', class_="body entry-content")
2936
        imgs = div_content.find_all('img')
2937
        imgs = [i for i in imgs if i.get('src') is not None]
2938
        alt = imgs[0]['alt']
2939
        return {
2940
            'title': title,
2941
            'alt': alt,
2942
            'description': desc,
2943
            'author': author,
2944
            'day': day.day,
2945
            'month': day.month,
2946
            'year': day.year,
2947
            'img': [i['src'] for i in imgs],
2948
        }
2949
2950
2951
class GenericWordPressInkblot(GenericNavigableComic):
2952 View Code Duplication
    """Generic class to retrieve comics using WordPress with Inkblot."""
2953
    get_navi_link = get_link_rel_next
2954
2955
    @classmethod
2956
    def get_first_comic_link(cls):
2957
        """Get link to first comics."""
2958
        return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')
2959
2960
    @classmethod
2961
    def get_comic_info(cls, soup, link):
2962
        """Get information about a particular comics."""
2963
        title = soup.find('meta', property='og:title')['content']
2964
        imgs = soup.find('div', class_='webcomic-image').find_all('img')
2965
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2966
        day = string_to_date(date_str, "%Y-%m-%d")
2967
        return {
2968
            'title': title,
2969
            'day': day.day,
2970
            'month': day.month,
2971
            'year': day.year,
2972
            'img': [i['src'] for i in imgs],
2973
        }
2974
2975
2976
class EverythingsStupid(GenericWordPressInkblot):
2977
    """Class to retrieve Everything's stupid Comics."""
2978
    # Also on http://tapastic.com/series/EverythingsStupid
2979
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
2980
    # Also on http://everythingsstupidcomics.tumblr.com
2981
    name = 'stupid'
2982
    long_name = "Everything's Stupid"
2983
    url = 'http://everythingsstupid.net'
2984
2985
2986
class TheIsmComics(GenericWordPressInkblot):
2987
    """Class to retrieve The Ism Comics."""
2988
    # Also on https://tapastic.com/series/TheIsm (?)
2989
    name = 'theism'
2990
    long_name = "The Ism"
2991
    url = 'http://www.theism-comics.com'
2992
2993
2994
class WoodenPlankStudios(GenericWordPressInkblot):
2995
    """Class to retrieve Wooden Plank Studios comics."""
2996
    name = 'woodenplank'
2997
    long_name = 'Wooden Plank Studios'
2998
    url = 'http://woodenplankstudios.com'
2999
3000
3001
class ElectricBunnyComic(GenericNavigableComic):
3002
    """Class to retrieve Electric Bunny Comics."""
3003
    # Also on http://electricbunnycomics.tumblr.com
3004
    name = 'bunny'
3005
    long_name = 'Electric Bunny Comic'
3006
    url = 'http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell'
3007
    get_url_from_link = join_cls_url_to_href
3008
3009
    @classmethod
3010
    def get_first_comic_link(cls):
3011
        """Get link to first comics."""
3012
        return get_soup_at_url(cls.url).find('img', alt='First').parent
3013
3014
    @classmethod
3015
    def get_navi_link(cls, last_soup, next_):
3016
        """Get link to next or previous comic."""
3017
        img = last_soup.find('img', alt='Next' if next_ else 'Back')
3018
        return img.parent if img else None
3019
3020
    @classmethod
3021
    def get_comic_info(cls, soup, link):
3022
        """Get information about a particular comics."""
3023
        title = soup.find('meta', property='og:title')['content']
3024
        imgs = soup.find_all('meta', property='og:image')
3025
        return {
3026
            'title': title,
3027
            'img': [i['content'] for i in imgs],
3028
        }
3029
3030
3031
class SheldonComics(GenericNavigableComic):
3032
    """Class to retrieve Sheldon comics."""
3033
    # Also on http://www.gocomics.com/sheldon
3034
    name = 'sheldon'
3035
    long_name = 'Sheldon Comics'
3036
    url = 'http://www.sheldoncomics.com'
3037
3038
    @classmethod
3039
    def get_first_comic_link(cls):
3040
        """Get link to first comics."""
3041
        return get_soup_at_url(cls.url).find("a", id="nav-first")
3042
3043
    @classmethod
3044
    def get_navi_link(cls, last_soup, next_):
3045
        """Get link to next or previous comic."""
3046
        for link in last_soup.find_all("a", id="nav-next" if next_ else "nav-prev"):
3047
            if link['href'] != 'http://www.sheldoncomics.com':
3048
                return link
3049
        return None
3050
3051
    @classmethod
3052
    def get_comic_info(cls, soup, link):
3053
        """Get information about a particular comics."""
3054
        imgs = soup.find("div", id="comic-foot").find_all("img")
3055
        assert all(i['alt'] == i['title'] for i in imgs)
3056
        assert len(imgs) == 1
3057
        title = imgs[0]['title']
3058
        return {
3059
            'title': title,
3060
            'img': [i['src'] for i in imgs],
3061
        }
3062
3063
3064
class Ubertool(GenericNavigableComic):
3065
    """Class to retrieve Ubertool comics."""
3066
    # Also on http://ubertool.tumblr.com
3067
    # Also on https://tapastic.com/series/ubertool
3068
    name = 'ubertool'
3069
    long_name = 'Ubertool'
3070
    url = 'http://ubertoolcomic.com'
3071
    _categories = ('UBERTOOL', )
3072
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
3073
    get_navi_link = get_a_comicnavbase_comicnavnext
3074
3075
    @classmethod
3076
    def get_comic_info(cls, soup, link):
3077
        """Get information about a particular comics."""
3078
        title = soup.find('h2', class_='post-title').string
3079
        date_str = soup.find('span', class_='post-date').string
3080
        day = string_to_date(date_str, "%B %d, %Y")
3081
        imgs = soup.find('div', id='comic').find_all('img')
3082
        return {
3083
            'img': [i['src'] for i in imgs],
3084
            'title': title,
3085
            'month': day.month,
3086
            'year': day.year,
3087
            'day': day.day,
3088
        }
3089
3090
3091
class EarthExplodes(GenericNavigableComic):
3092
    """Class to retrieve The Earth Explodes comics."""
3093
    name = 'earthexplodes'
3094
    long_name = 'The Earth Explodes'
3095
    url = 'http://www.earthexplodes.com'
3096
    get_url_from_link = join_cls_url_to_href
3097
    get_first_comic_link = simulate_first_link
3098
    first_url = 'http://www.earthexplodes.com/comics/000/'
3099
3100
    @classmethod
3101
    def get_navi_link(cls, last_soup, next_):
3102
        """Get link to next or previous comic."""
3103
        return last_soup.find('a', id='next' if next_ else 'prev')
3104
3105
    @classmethod
3106
    def get_comic_info(cls, soup, link):
3107
        """Get information about a particular comics."""
3108
        title = soup.find('title').string
3109
        imgs = soup.find('div', id='image').find_all('img')
3110
        alt = imgs[0].get('title', '')
3111
        return {
3112
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3113
            'title': title,
3114
            'alt': alt,
3115
        }
3116
3117
3118
class CubeDrone(GenericNavigableComic):
3119
    """Class to retrieve Cube Drone comics."""
3120
    name = 'cubedrone'
3121
    long_name = 'Cube Drone'
3122
    url = 'http://cube-drone.com/comics'
3123
    get_url_from_link = join_cls_url_to_href
3124
3125
    @classmethod
3126
    def get_first_comic_link(cls):
3127
        """Get link to first comics."""
3128
        return get_soup_at_url(cls.url).find('span', class_='glyphicon glyphicon-backward').parent
3129
3130
    @classmethod
3131
    def get_navi_link(cls, last_soup, next_):
3132
        """Get link to next or previous comic."""
3133
        class_ = 'glyphicon glyphicon-chevron-' + ('right' if next_ else 'left')
3134
        return last_soup.find('span', class_=class_).parent
3135
3136
    @classmethod
3137
    def get_comic_info(cls, soup, link):
3138
        """Get information about a particular comics."""
3139
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
3140
        url2 = soup.find('meta', attrs={'name': 'twitter:url'})['content']
3141
        # date_str = soup.find('h2', class_='comic_title').find('small').string
3142
        # day = string_to_date(date_str, "%B %d, %Y, %I:%M %p")
3143
        imgs = soup.find_all('img', class_='comic img-responsive')
3144
        title2 = imgs[0]['title']
3145
        alt = imgs[0]['alt']
3146
        return {
3147
            'url2': url2,
3148
            'title': title,
3149
            'title2': title2,
3150
            'alt': alt,
3151
            'img': [i['src'] for i in imgs],
3152
        }
3153
3154
3155
class MakeItStoopid(GenericNavigableComic):
3156
    """Class to retrieve Make It Stoopid Comics."""
3157
    name = 'stoopid'
3158
    long_name = 'Make it stoopid'
3159
    url = 'http://makeitstoopid.com/comic.php'
3160
3161
    @classmethod
3162
    def get_nav(cls, soup):
3163
        """Get the navigation elements from soup object."""
3164
        cnav = soup.find_all(class_='cnav')
3165
        nav1, nav2 = cnav[:5], cnav[5:]
3166
        assert nav1 == nav2
3167
        # begin, prev, archive, next_, end = nav1
3168
        return [None if i.get('href') is None else i for i in nav1]
3169
3170
    @classmethod
3171
    def get_first_comic_link(cls):
3172
        """Get link to first comics."""
3173
        return cls.get_nav(get_soup_at_url(cls.url))[0]
3174
3175
    @classmethod
3176
    def get_navi_link(cls, last_soup, next_):
3177
        """Get link to next or previous comic."""
3178
        return cls.get_nav(last_soup)[3 if next_ else 1]
3179
3180
    @classmethod
3181
    def get_comic_info(cls, soup, link):
3182
        """Get information about a particular comics."""
3183
        title = link['title']
3184
        imgs = soup.find_all('img', id='comicimg')
3185
        return {
3186
            'title': title,
3187
            'img': [i['src'] for i in imgs],
3188
        }
3189
3190
3191 View Code Duplication
class MarketoonistComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3192
    """Class to retrieve Marketoonist Comics."""
3193
    name = 'marketoonist'
3194
    long_name = 'Marketoonist'
3195
    url = 'https://marketoonist.com/cartoons'
3196
    get_first_comic_link = simulate_first_link
3197
    get_navi_link = get_link_rel_next
3198
    first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'
3199
3200
    @classmethod
3201
    def get_comic_info(cls, soup, link):
3202
        """Get information about a particular comics."""
3203
        imgs = soup.find_all('meta', property='og:image')
3204
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3205
        day = string_to_date(date_str, "%Y-%m-%d")
3206
        title = soup.find('meta', property='og:title')['content']
3207
        return {
3208
            'img': [i['content'] for i in imgs],
3209
            'day': day.day,
3210
            'month': day.month,
3211
            'year': day.year,
3212
            'title': title,
3213
        }
3214
3215 View Code Duplication
3216
class ConsoliaComics(GenericNavigableComic):
3217
    """Class to retrieve Consolia comics."""
3218
    name = 'consolia'
3219
    long_name = 'consolia'
3220
    url = 'https://consolia-comic.com'
3221
    get_url_from_link = join_cls_url_to_href
3222
3223
    @classmethod
3224
    def get_first_comic_link(cls):
3225
        """Get link to first comics."""
3226
        return get_soup_at_url(cls.url).find('span', class_='first').find('a')
3227
3228
    @classmethod
3229
    def get_navi_link(cls, last_soup, next_):
3230
        """Get link to next or previous comic."""
3231
        return last_soup.find('span', class_='next' if next_ else 'prev').find('a')
3232
3233
    @classmethod
3234
    def get_comic_info(cls, soup, link):
3235
        """Get information about a particular comics."""
3236
        title = soup.find('meta', property='og:title')['content']
3237
        date_str = soup.find('time')["datetime"]
3238
        day = string_to_date(date_str, "%Y-%m-%d")
3239
        imgs = soup.find('div', id='comic').find_all('img')
3240
        alt = imgs[0]['title']
3241
        # article = soup.find('div', id='blag')
3242
        # text = article.encode_contents()
3243
        return {
3244
            'title': title,
3245
            'alt': alt,
3246
            'img': [i['src'] for i in imgs],
3247
            # 'text': text,
3248
            'day': day.day,
3249
            'month': day.month,
3250
            'year': day.year,
3251
        }
3252
3253
3254 View Code Duplication
class TuMourrasMoinsBete(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3255
    """Class to retrieve Tu Mourras Moins Bete comics."""
3256
    name = 'mourrasmoinsbete'
3257
    long_name = 'Tu Mourras Moins Bete'
3258
    url = 'http://tumourrasmoinsbete.blogspot.fr'
3259
    _categories = ('FRANCAIS', )
3260
    get_first_comic_link = simulate_first_link
3261
    first_url = 'http://tumourrasmoinsbete.blogspot.fr/2008/06/essai.html'
3262
3263
    @classmethod
3264
    def get_navi_link(cls, last_soup, next_):
3265
        """Get link to next or previous comic."""
3266
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3267
3268
    @classmethod
3269
    def get_comic_info(cls, soup, link):
3270
        """Get information about a particular comics."""
3271
        title = soup.find('title').string
3272
        imgs = soup.find('div', itemprop='description articleBody').find_all('img')
3273
        author = soup.find('span', itemprop='author').string
3274
        return {
3275
            'img': [i['src'] for i in imgs],
3276
            'author': author,
3277
            'title': title,
3278
        }
3279
3280
3281
class GeekAndPoke(GenericNavigableComic):
3282
    """Class to retrieve Geek And Poke comics."""
3283
    name = 'geek'
3284
    long_name = 'Geek And Poke'
3285
    url = 'http://geek-and-poke.com'
3286
    get_url_from_link = join_cls_url_to_href
3287
    get_first_comic_link = simulate_first_link
3288
    first_url = 'http://geek-and-poke.com/geekandpoke/2006/8/27/a-new-place-for-a-not-so-old-blog.html'
3289
3290
    @classmethod
3291
    def get_navi_link(cls, last_soup, next_):
3292
        """Get link to next or previous comic."""
3293
        return last_soup.find('a', class_='prev-item' if next_ else 'next-item')
3294
3295
    @classmethod
3296
    def get_comic_info(cls, soup, link):
3297
        """Get information about a particular comics."""
3298
        title = soup.find('meta', property='og:title')['content']
3299
        desc = soup.find('meta', property='og:description')['content']
3300
        date_str = soup.find('time', class_='published')['datetime']
3301
        day = string_to_date(date_str, "%Y-%m-%d")
3302
        author = soup.find('a', rel='author').string
3303
        div_content = (soup.find('div', class_="body entry-content") or
3304
                       soup.find('div', class_="special-content"))
3305
        imgs = div_content.find_all('img')
3306
        imgs = [i for i in imgs if i.get('src') is not None]
3307
        assert all('title' not in i or i['alt'] == i['title'] for i in imgs)
3308
        alt = imgs[0].get('alt', "") if imgs else []
3309
        return {
3310
            'title': title,
3311
            'alt': alt,
3312
            'description': desc,
3313
            'author': author,
3314
            'day': day.day,
3315
            'month': day.month,
3316
            'year': day.year,
3317
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3318
        }
3319
3320
3321 View Code Duplication
class GloryOwlComix(GenericNavigableComic):
3322
    """Class to retrieve Glory Owl comics."""
3323
    name = 'gloryowl'
3324
    long_name = 'Glory Owl'
3325
    url = 'http://gloryowlcomix.blogspot.fr'
3326
    _categories = ('NSFW', 'FRANCAIS')
3327
    get_first_comic_link = simulate_first_link
3328
    first_url = 'http://gloryowlcomix.blogspot.fr/2013/02/1_7.html'
3329
3330
    @classmethod
3331
    def get_navi_link(cls, last_soup, next_):
3332
        """Get link to next or previous comic."""
3333
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3334
3335
    @classmethod
3336
    def get_comic_info(cls, soup, link):
3337
        """Get information about a particular comics."""
3338
        title = soup.find('title').string
3339
        imgs = soup.find_all('link', rel='image_src')
3340
        author = soup.find('a', rel='author').string
3341
        return {
3342
            'img': [i['href'] for i in imgs],
3343
            'author': author,
3344
            'title': title,
3345
        }
3346
3347
3348
class GenericTumblrV1(GenericComic):
3349
    """Generic class to retrieve comics from Tumblr using the V1 API."""
3350
    _categories = ('TUMBLR', )
3351
3352
    @classmethod
3353
    def get_next_comic(cls, last_comic):
3354
        """Generic implementation of get_next_comic for Tumblr comics."""
3355
        for p in cls.get_posts(last_comic):
3356
            comic = cls.get_comic_info(p)
3357
            if comic is not None:
3358
                yield comic
3359
3360
    @classmethod
3361
    def get_url_from_post(cls, post):
3362
        return post['url']
3363
3364
    @classmethod
3365
    def get_api_url(cls):
3366
        return urljoin_wrapper(cls.url, '/api/read/')
3367
3368
    @classmethod
3369
    def get_comic_info(cls, post):
3370
        """Get information about a particular comics."""
3371
        type_ = post['type']
3372
        if type_ != 'photo':
3373
            return None
3374
        tumblr_id = int(post['id'])
3375
        api_url = cls.get_api_url() + '?id=%d' % (tumblr_id)
3376
        day = datetime.datetime.fromtimestamp(int(post['unix-timestamp'])).date()
3377
        caption = post.find('photo-caption')
3378
        title = caption.string if caption else ""
3379
        tags = ' '.join(t.string for t in post.find_all('tag'))
3380
        # Photos may appear in 'photo' tags and/or straight in the post
3381
        photo_tags = post.find_all('photo')
3382
        if not photo_tags:
3383
            photo_tags = [post]
3384
        # Images are in multiple resolutions - taking the first one
3385
        imgs = [photo.find('photo-url') for photo in photo_tags]
3386
        return {
3387
            'url': cls.get_url_from_post(post),
3388
            'url2': post['url-with-slug'],
3389
            'day': day.day,
3390
            'month': day.month,
3391
            'year': day.year,
3392
            'title': title,
3393
            'tags': tags,
3394
            'img': [i.string for i in imgs],
3395
            'tumblr-id': tumblr_id,
3396
            'api_url': api_url,
3397
        }
3398
3399
    @classmethod
3400
    def get_posts(cls, last_comic, nb_post_per_call=10):
3401
        """Get posts using API. nb_post_per_call is max 50.
3402
3403
        Posts are retrieved from newer to older as per the tumblr v1 api
3404
        but are returned in chronological order."""
3405
        waiting_for_url = last_comic['url'] if last_comic else None
3406
        posts_acc = []
3407
        if last_comic is not None:
3408
            # Sometimes, tumblr posts are deleted. When previous post is deleted, we
3409
            # might end up spending a lot of time looking for something that
3410
            # doesn't exist. Failing early and clearly might be a better option.
3411
            last_api_url = last_comic['api_url']
3412
            try:
3413
                get_soup_at_url(last_api_url)
3414
            except urllib.error.HTTPError:
3415
                try:
3416
                    get_soup_at_url(cls.url)
3417
                except urllib.error.HTTPError:
3418
                    print("Did not find previous post nor main url %s" % cls.url)
3419
                else:
3420
                    print("Did not find previous post %s : it might have been deleted" % last_api_url)
3421
                return reversed(posts_acc)
3422
        api_url = cls.get_api_url()
3423
        posts = get_soup_at_url(api_url).find('posts')
3424
        start, total = int(posts['start']), int(posts['total'])
3425
        assert start == 0
3426
        for starting_num in range(0, total, nb_post_per_call):
3427
            api_url2 = api_url + '?start=%d&num=%d' % (starting_num, nb_post_per_call)
3428
            posts2 = get_soup_at_url(api_url2).find('posts')
3429
            start2, total2 = int(posts2['start']), int(posts2['total'])
3430
            assert starting_num == start2, "%d != %d" % (starting_num, start2)
3431
            # This may happen and should be handled in the future
3432
            assert total == total2, "%d != %d" % (total, total2)
3433
            for p in posts2.find_all('post'):
3434
                if waiting_for_url and waiting_for_url == cls.get_url_from_post(p):
3435
                    return reversed(posts_acc)
3436
                posts_acc.append(p)
3437
        if waiting_for_url is None:
3438
            return reversed(posts_acc)
3439
        print("Did not find %s : there might be a problem" % waiting_for_url)
3440
        return []
3441
3442
3443
class SaturdayMorningBreakfastCerealTumblr(GenericTumblrV1):
3444
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
3445
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
3446
    # Also on http://www.smbc-comics.com
3447
    name = 'smbc-tumblr'
3448
    long_name = 'Saturday Morning Breakfast Cereal (from Tumblr)'
3449
    url = 'http://smbc-comics.tumblr.com'
3450
    _categories = ('SMBC', )
3451
3452
3453
class IrwinCardozo(GenericTumblrV1):
3454
    """Class to retrieve Irwin Cardozo Comics."""
3455
    name = 'irwinc'
3456
    long_name = 'Irwin Cardozo'
3457
    url = 'http://irwincardozocomics.tumblr.com'
3458
3459
3460
class AccordingToDevin(GenericTumblrV1):
3461
    """Class to retrieve According To Devin comics."""
3462
    name = 'devin'
3463
    long_name = 'According To Devin'
3464
    url = 'http://accordingtodevin.tumblr.com'
3465
3466
3467
class ItsTheTieTumblr(GenericTumblrV1):
3468
    """Class to retrieve It's the tie comics."""
3469
    # Also on http://itsthetie.com
3470
    # Also on https://tapastic.com/series/itsthetie
3471
    name = 'tie-tumblr'
3472
    long_name = "It's the tie (from Tumblr)"
3473
    url = "http://itsthetie.tumblr.com"
3474
    _categories = ('TIE', )
3475
3476
3477
class OctopunsTumblr(GenericTumblrV1):
3478
    """Class to retrieve Octopuns comics."""
3479
    # Also on http://www.octopuns.net
3480
    name = 'octopuns-tumblr'
3481
    long_name = 'Octopuns (from Tumblr)'
3482
    url = 'http://octopuns.tumblr.com'
3483
3484
3485
class PicturesInBoxesTumblr(GenericTumblrV1):
3486
    """Class to retrieve Pictures In Boxes comics."""
3487
    # Also on http://www.picturesinboxes.com
3488
    name = 'picturesinboxes-tumblr'
3489
    long_name = 'Pictures in Boxes (from Tumblr)'
3490
    url = 'http://picturesinboxescomic.tumblr.com'
3491
3492
3493
class TubeyToonsTumblr(GenericTumblrV1):
3494
    """Class to retrieve TubeyToons comics."""
3495
    # Also on http://tapastic.com/series/Tubey-Toons
3496
    # Also on http://tubeytoons.com
3497
    name = 'tubeytoons-tumblr'
3498
    long_name = 'Tubey Toons (from Tumblr)'
3499
    url = 'http://tubeytoons.tumblr.com'
3500
    _categories = ('TUNEYTOONS', )
3501
3502
3503
class UnearthedComicsTumblr(GenericTumblrV1):
3504
    """Class to retrieve Unearthed comics."""
3505
    # Also on http://tapastic.com/series/UnearthedComics
3506
    # Also on http://unearthedcomics.com
3507
    name = 'unearthed-tumblr'
3508
    long_name = 'Unearthed Comics (from Tumblr)'
3509
    url = 'http://unearthedcomics.tumblr.com'
3510
    _categories = ('UNEARTHED', )
3511
3512
3513
class PieComic(GenericTumblrV1):
3514
    """Class to retrieve Pie Comic comics."""
3515
    name = 'pie'
3516
    long_name = 'Pie Comic'
3517
    url = "http://piecomic.tumblr.com"
3518
3519
3520
class MrEthanDiamond(GenericTumblrV1):
3521
    """Class to retrieve Mr Ethan Diamond comics."""
3522
    name = 'diamond'
3523
    long_name = 'Mr Ethan Diamond'
3524
    url = 'http://mrethandiamond.tumblr.com'
3525
3526
3527
class Flocci(GenericTumblrV1):
3528
    """Class to retrieve floccinaucinihilipilification comics."""
3529
    name = 'flocci'
3530
    long_name = 'floccinaucinihilipilification'
3531
    url = "http://floccinaucinihilipilificationa.tumblr.com"
3532
3533
3534
class UpAndOut(GenericTumblrV1):
3535
    """Class to retrieve Up & Out comics."""
3536
    # Also on http://tapastic.com/series/UP-and-OUT
3537
    name = 'upandout'
3538
    long_name = 'Up And Out (from Tumblr)'
3539
    url = 'http://upandoutcomic.tumblr.com'
3540
3541
3542
class Pundemonium(GenericTumblrV1):
3543
    """Class to retrieve Pundemonium comics."""
3544
    name = 'pundemonium'
3545
    long_name = 'Pundemonium'
3546
    url = 'http://monstika.tumblr.com'
3547
3548
3549
class PoorlyDrawnLinesTumblr(GenericTumblrV1):
3550
    """Class to retrieve Poorly Drawn Lines comics."""
3551
    # Also on http://poorlydrawnlines.com
3552
    name = 'poorlydrawn-tumblr'
3553
    long_name = 'Poorly Drawn Lines (from Tumblr)'
3554
    url = 'http://pdlcomics.tumblr.com'
3555
    _categories = ('POORLYDRAWN', )
3556
3557
3558
class PearShapedComics(GenericTumblrV1):
3559
    """Class to retrieve Pear Shaped Comics."""
3560
    name = 'pearshaped'
3561
    long_name = 'Pear-Shaped Comics'
3562
    url = 'http://pearshapedcomics.com'
3563
3564
3565
class PondScumComics(GenericTumblrV1):
3566
    """Class to retrieve Pond Scum Comics."""
3567
    name = 'pond'
3568
    long_name = 'Pond Scum'
3569
    url = 'http://pondscumcomic.tumblr.com'
3570
3571
3572
class MercworksTumblr(GenericTumblrV1):
3573
    """Class to retrieve Mercworks comics."""
3574
    # Also on http://mercworks.net
3575
    name = 'mercworks-tumblr'
3576
    long_name = 'Mercworks (from Tumblr)'
3577
    url = 'http://mercworks.tumblr.com'
3578
3579
3580
class OwlTurdTumblr(GenericTumblrV1):
3581
    """Class to retrieve Owl Turd comics."""
3582
    # Also on http://tapastic.com/series/Owl-Turd-Comix
3583
    name = 'owlturd-tumblr'
3584
    long_name = 'Owl Turd (from Tumblr)'
3585
    url = 'http://owlturd.com'
3586
    _categories = ('OWLTURD', )
3587
3588
3589
class VectorBelly(GenericTumblrV1):
3590
    """Class to retrieve Vector Belly comics."""
3591
    # Also on http://vectorbelly.com
3592
    name = 'vector'
3593
    long_name = 'Vector Belly'
3594
    url = 'http://vectorbelly.tumblr.com'
3595
3596
3597
class GoneIntoRapture(GenericTumblrV1):
3598
    """Class to retrieve Gone Into Rapture comics."""
3599
    # Also on http://goneintorapture.tumblr.com
3600
    # Also on http://tapastic.com/series/Goneintorapture
3601
    name = 'rapture'
3602
    long_name = 'Gone Into Rapture'
3603
    url = 'http://www.goneintorapture.com'
3604
3605
3606
class TheOatmealTumblr(GenericTumblrV1):
3607
    """Class to retrieve The Oatmeal comics."""
3608
    # Also on http://theoatmeal.com
3609
    name = 'oatmeal-tumblr'
3610
    long_name = 'The Oatmeal (from Tumblr)'
3611
    url = 'http://oatmeal.tumblr.com'
3612
3613
3614
class HeckIfIKnowComicsTumblr(GenericTumblrV1):
3615
    """Class to retrieve Heck If I Know Comics."""
3616
    # Also on http://tapastic.com/series/Regular
3617
    name = 'heck-tumblr'
3618
    long_name = 'Heck if I Know comics (from Tumblr)'
3619
    url = 'http://heckifiknowcomics.com'
3620
3621
3622
class MyJetPack(GenericTumblrV1):
3623
    """Class to retrieve My Jet Pack comics."""
3624
    name = 'jetpack'
3625
    long_name = 'My Jet Pack'
3626
    url = 'http://myjetpack.tumblr.com'
3627
3628
3629
class CheerUpEmoKidTumblr(GenericTumblrV1):
3630
    """Class to retrieve CheerUpEmoKid comics."""
3631
    # Also on http://www.cheerupemokid.com
3632
    # Also on http://tapastic.com/series/CUEK
3633
    name = 'cuek-tumblr'
3634
    long_name = 'Cheer Up Emo Kid (from Tumblr)'
3635
    url = 'http://enzocomics.tumblr.com'
3636
3637
3638
class ForLackOfABetterComic(GenericTumblrV1):
3639
    """Class to retrieve For Lack Of A Better Comics."""
3640
    # Also on http://forlackofabettercomic.com
3641
    name = 'lack'
3642
    long_name = 'For Lack Of A Better Comic'
3643
    url = 'http://forlackofabettercomic.tumblr.com'
3644
3645
3646
class ZenPencilsTumblr(GenericTumblrV1):
3647
    """Class to retrieve ZenPencils comics."""
3648
    # Also on http://zenpencils.com
3649
    # Also on http://www.gocomics.com/zen-pencils
3650
    name = 'zenpencils-tumblr'
3651
    long_name = 'Zen Pencils (from Tumblr)'
3652
    url = 'http://zenpencils.tumblr.com'
3653
    _categories = ('ZENPENCILS', )
3654
3655
3656
class ThreeWordPhraseTumblr(GenericTumblrV1):
3657
    """Class to retrieve Three Word Phrase comics."""
3658
    # Also on http://threewordphrase.com
3659
    name = 'threeword-tumblr'
3660
    long_name = 'Three Word Phrase (from Tumblr)'
3661
    url = 'http://www.threewordphrase.tumblr.com'
3662
3663
3664
class TimeTrabbleTumblr(GenericTumblrV1):
3665
    """Class to retrieve Time Trabble comics."""
3666
    # Also on http://timetrabble.com
3667
    name = 'timetrabble-tumblr'
3668
    long_name = 'Time Trabble (from Tumblr)'
3669
    url = 'http://timetrabble.tumblr.com'
3670
3671
3672
class SafelyEndangeredTumblr(GenericTumblrV1):
3673
    """Class to retrieve Safely Endangered comics."""
3674
    # Also on http://www.safelyendangered.com
3675
    name = 'endangered-tumblr'
3676
    long_name = 'Safely Endangered (from Tumblr)'
3677
    url = 'http://tumblr.safelyendangered.com'
3678
3679
3680
class MouseBearComedyTumblr(GenericTumblrV1):
3681
    """Class to retrieve Mouse Bear Comedy comics."""
3682
    # Also on http://www.mousebearcomedy.com
3683
    name = 'mousebear-tumblr'
3684
    long_name = 'Mouse Bear Comedy (from Tumblr)'
3685
    url = 'http://mousebearcomedy.tumblr.com'
3686
3687
3688
class BouletCorpTumblr(GenericTumblrV1):
3689
    """Class to retrieve BouletCorp comics."""
3690
    # Also on http://www.bouletcorp.com
3691
    name = 'boulet-tumblr'
3692
    long_name = 'Boulet Corp (from Tumblr)'
3693
    url = 'http://bouletcorp.tumblr.com'
3694
    _categories = ('BOULET', )
3695
3696
3697
class TheAwkwardYetiTumblr(GenericTumblrV1):
3698
    """Class to retrieve The Awkward Yeti comics."""
3699
    # Also on http://www.gocomics.com/the-awkward-yeti
3700
    # Also on http://theawkwardyeti.com
3701
    # Also on https://tapastic.com/series/TheAwkwardYeti
3702
    name = 'yeti-tumblr'
3703
    long_name = 'The Awkward Yeti (from Tumblr)'
3704
    url = 'http://larstheyeti.tumblr.com'
3705
    _categories = ('YETI', )
3706
3707
3708
class NellucNhoj(GenericTumblrV1):
3709
    """Class to retrieve NellucNhoj comics."""
3710
    name = 'nhoj'
3711
    long_name = 'Nelluc Nhoj'
3712
    url = 'http://nellucnhoj.com'
3713
3714
3715
class DownTheUpwardSpiralTumblr(GenericTumblrV1):
3716
    """Class to retrieve Down The Upward Spiral comics."""
3717
    # Also on http://www.downtheupwardspiral.com
3718
    name = 'spiral-tumblr'
3719
    long_name = 'Down the Upward Spiral (from Tumblr)'
3720
    url = 'http://downtheupwardspiral.tumblr.com'
3721
3722
3723
class AsPerUsualTumblr(GenericTumblrV1):
3724
    """Class to retrieve As Per Usual comics."""
3725
    # Also on https://tapastic.com/series/AsPerUsual
3726
    name = 'usual-tumblr'
3727
    long_name = 'As Per Usual (from Tumblr)'
3728
    url = 'http://as-per-usual.tumblr.com'
3729
    categories = ('DAMILEE', )
3730
3731
3732
class HotComicsForCoolPeopleTumblr(GenericTumblrV1):
3733
    """Class to retrieve Hot Comics For Cool People."""
3734
    # Also on https://tapastic.com/series/Hot-Comics-For-Cool-People
3735
    # Also on http://hotcomics.biz (links to tumblr)
3736
    # Also on http://hcfcp.com (links to tumblr)
3737
    name = 'hotcomics-tumblr'
3738
    long_name = 'Hot Comics For Cool People (from Tumblr)'
3739
    url = 'http://hotcomicsforcoolpeople.tumblr.com'
3740
    categories = ('DAMILEE', )
3741
3742
3743
class OneOneOneOneComicTumblr(GenericTumblrV1):
3744
    """Class to retrieve 1111 Comics."""
3745
    # Also on http://www.1111comics.me
3746
    # Also on https://tapastic.com/series/1111-Comics
3747
    name = '1111-tumblr'
3748
    long_name = '1111 Comics (from Tumblr)'
3749
    url = 'http://comics1111.tumblr.com'
3750
    _categories = ('ONEONEONEONE', )
3751
3752
3753
class JhallComicsTumblr(GenericTumblrV1):
3754
    """Class to retrieve Jhall Comics."""
3755
    # Also on http://jhallcomics.com
3756
    name = 'jhall-tumblr'
3757
    long_name = 'Jhall Comics (from Tumblr)'
3758
    url = 'http://jhallcomics.tumblr.com'
3759
3760
3761
class BerkeleyMewsTumblr(GenericTumblrV1):
3762
    """Class to retrieve Berkeley Mews comics."""
3763
    # Also on http://www.gocomics.com/berkeley-mews
3764
    # Also on http://www.berkeleymews.com
3765
    name = 'berkeley-tumblr'
3766
    long_name = 'Berkeley Mews (from Tumblr)'
3767
    url = 'http://mews.tumblr.com'
3768
    _categories = ('BERKELEY', )
3769
3770
3771
class JoanCornellaTumblr(GenericTumblrV1):
3772
    """Class to retrieve Joan Cornella comics."""
3773
    # Also on http://joancornella.net
3774
    name = 'cornella-tumblr'
3775
    long_name = 'Joan Cornella (from Tumblr)'
3776
    url = 'http://cornellajoan.tumblr.com'
3777
3778
3779
class RespawnComicTumblr(GenericTumblrV1):
3780
    """Class to retrieve Respawn Comic."""
3781
    # Also on http://respawncomic.com
3782
    name = 'respawn-tumblr'
3783
    long_name = 'Respawn Comic (from Tumblr)'
3784
    url = 'http://respawncomic.tumblr.com'
3785
3786
3787
class ChrisHallbeckTumblr(GenericTumblrV1):
3788
    """Class to retrieve Chris Hallbeck comics."""
3789
    # Also on https://tapastic.com/ChrisHallbeck
3790
    # Also on http://maximumble.com
3791
    # Also on http://minimumble.com
3792
    # Also on http://thebookofbiff.com
3793
    name = 'hallbeck-tumblr'
3794
    long_name = 'Chris Hallback (from Tumblr)'
3795
    url = 'http://chrishallbeck.tumblr.com'
3796
    _categories = ('HALLBACK', )
3797
3798
3799
class ComicNuggets(GenericTumblrV1):
3800
    """Class to retrieve Comic Nuggets."""
3801
    name = 'nuggets'
3802
    long_name = 'Comic Nuggets'
3803
    url = 'http://comicnuggets.com'
3804
3805
3806
class PigeonGazetteTumblr(GenericTumblrV1):
3807
    """Class to retrieve The Pigeon Gazette comics."""
3808
    # Also on https://tapastic.com/series/The-Pigeon-Gazette
3809
    name = 'pigeon-tumblr'
3810
    long_name = 'The Pigeon Gazette (from Tumblr)'
3811
    url = 'http://thepigeongazette.tumblr.com'
3812
3813
3814
class CancerOwl(GenericTumblrV1):
3815
    """Class to retrieve Cancer Owl comics."""
3816
    # Also on http://cancerowl.com
3817
    name = 'cancerowl-tumblr'
3818
    long_name = 'Cancer Owl (from Tumblr)'
3819
    url = 'http://cancerowl.tumblr.com'
3820
3821
3822
class FowlLanguageTumblr(GenericTumblrV1):
3823
    """Class to retrieve Fowl Language comics."""
3824
    # Also on http://www.fowllanguagecomics.com
3825
    # Also on http://tapastic.com/series/Fowl-Language-Comics
3826
    # Also on http://www.gocomics.com/fowl-language
3827
    name = 'fowllanguage-tumblr'
3828
    long_name = 'Fowl Language Comics (from Tumblr)'
3829
    url = 'http://fowllanguagecomics.tumblr.com'
3830
    _categories = ('FOWLLANGUAGE', )
3831
3832
3833
class TheOdd1sOutTumblr(GenericTumblrV1):
3834
    """Class to retrieve The Odd 1s Out comics."""
3835
    # Also on http://theodd1sout.com
3836
    # Also on https://tapastic.com/series/Theodd1sout
3837
    name = 'theodd-tumblr'
3838
    long_name = 'The Odd 1s Out (from Tumblr)'
3839
    url = 'http://theodd1sout.tumblr.com'
3840
3841
3842
class TheUnderfoldTumblr(GenericTumblrV1):
3843
    """Class to retrieve The Underfold comics."""
3844
    # Also on http://theunderfold.com
3845
    name = 'underfold-tumblr'
3846
    long_name = 'The Underfold (from Tumblr)'
3847
    url = 'http://theunderfold.tumblr.com'
3848
3849
3850
class LolNeinTumblr(GenericTumblrV1):
3851
    """Class to retrieve Lol Nein comics."""
3852
    # Also on http://lolnein.com
3853
    name = 'lolnein-tumblr'
3854
    long_name = 'Lol Nein (from Tumblr)'
3855
    url = 'http://lolneincom.tumblr.com'
3856
3857
3858
class FatAwesomeComicsTumblr(GenericTumblrV1):
3859
    """Class to retrieve Fat Awesome Comics."""
3860
    # Also on http://fatawesome.com/comics
3861
    name = 'fatawesome-tumblr'
3862
    long_name = 'Fat Awesome (from Tumblr)'
3863
    url = 'http://fatawesomecomedy.tumblr.com'
3864
3865
3866
class TheWorldIsFlatTumblr(GenericTumblrV1):
3867
    """Class to retrieve The World Is Flat Comics."""
3868
    # Also on https://tapastic.com/series/The-World-is-Flat
3869
    name = 'flatworld-tumblr'
3870
    long_name = 'The World Is Flat (from Tumblr)'
3871
    url = 'http://theworldisflatcomics.tumblr.com'
3872
3873
3874
class DorrisMc(GenericTumblrV1):
3875
    """Class to retrieve Dorris Mc Comics"""
3876
    # Also on http://www.gocomics.com/dorris-mccomics
3877
    name = 'dorrismc'
3878
    long_name = 'Dorris Mc'
3879
    url = 'http://dorrismccomics.com'
3880
3881
3882
class LeleozTumblr(GenericEmptyComic, GenericTumblrV1):
3883
    """Class to retrieve Leleoz comics."""
3884
    # Also on https://tapastic.com/series/Leleoz
3885
    name = 'leleoz-tumblr'
3886
    long_name = 'Leleoz (from Tumblr)'
3887
    url = 'http://leleozcomics.tumblr.com'
3888
3889
3890
class MoonBeardTumblr(GenericTumblrV1):
3891
    """Class to retrieve MoonBeard comics."""
3892
    # Also on http://moonbeard.com
3893
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
3894
    name = 'moonbeard-tumblr'
3895
    long_name = 'Moon Beard (from Tumblr)'
3896
    url = 'http://blog.squiresjam.es/moonbeard'
3897
3898
3899
class AComik(GenericTumblrV1):
3900
    """Class to retrieve A Comik"""
3901
    name = 'comik'
3902
    long_name = 'A Comik'
3903
    url = 'http://acomik.com'
3904
3905
3906
class ClassicRandy(GenericTumblrV1):
3907
    """Class to retrieve Classic Randy comics."""
3908
    name = 'randy'
3909
    long_name = 'Classic Randy'
3910
    url = 'http://classicrandy.tumblr.com'
3911
3912
3913
class DagssonTumblr(GenericTumblrV1):
3914
    """Class to retrieve Dagsson comics."""
3915
    # Also on http://www.dagsson.com
3916
    name = 'dagsson-tumblr'
3917
    long_name = 'Dagsson Hugleikur (from Tumblr)'
3918
    url = 'http://hugleikurdagsson.tumblr.com'
3919
3920
3921
class LinsEditionsTumblr(GenericTumblrV1):
3922
    """Class to retrieve L.I.N.S. Editions comics."""
3923
    # Also on https://linsedition.com
3924
    # Now on http://warandpeas.tumblr.com
3925
    name = 'lins-tumblr'
3926
    long_name = 'L.I.N.S. Editions (from Tumblr)'
3927
    url = 'http://linscomics.tumblr.com'
3928
    _categories = ('LINS', )
3929
3930
3931
class WarAndPeasTumblr(GenericTumblrV1):
3932
    """Class to retrieve War And Peas comics."""
3933
    # Was on http://linscomics.tumblr.com
3934
    name = 'warandpeas-tumblr'
3935
    long_name = 'War And Peas (from Tumblr)'
3936
    url = 'http://warandpeas.tumblr.com'
3937
    _categories = ('WARANDPEAS', )
3938
3939
3940
class OrigamiHotDish(GenericTumblrV1):
3941
    """Class to retrieve Origami Hot Dish comics."""
3942
    name = 'origamihotdish'
3943
    long_name = 'Origami Hot Dish'
3944
    url = 'http://origamihotdish.com'
3945
3946
3947
class HitAndMissComicsTumblr(GenericTumblrV1):
3948
    """Class to retrieve Hit and Miss Comics."""
3949
    name = 'hitandmiss'
3950
    long_name = 'Hit and Miss Comics'
3951
    url = 'http://hitandmisscomics.tumblr.com'
3952
3953
3954
class HMBlanc(GenericTumblrV1):
3955
    """Class to retrieve HM Blanc comics."""
3956
    name = 'hmblanc'
3957
    long_name = 'HM Blanc'
3958
    url = 'http://hmblanc.tumblr.com'
3959
3960
3961
class TalesOfAbsurdityTumblr(GenericTumblrV1):
3962
    """Class to retrieve Tales Of Absurdity comics."""
3963
    # Also on http://talesofabsurdity.com
3964
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
3965
    name = 'absurdity-tumblr'
3966
    long_name = 'Tales of Absurdity (from Tumblr)'
3967
    url = 'http://talesofabsurdity.tumblr.com'
3968
    _categories = ('ABSURDITY', )
3969
3970
3971
class RobbieAndBobby(GenericTumblrV1):
3972
    """Class to retrieve Robbie And Bobby comics."""
3973
    # Also on http://robbieandbobby.com
3974
    name = 'robbie-tumblr'
3975
    long_name = 'Robbie And Bobby (from Tumblr)'
3976
    url = 'http://robbieandbobby.tumblr.com'
3977
3978
3979
class ElectricBunnyComicTumblr(GenericTumblrV1):
3980
    """Class to retrieve Electric Bunny Comics."""
3981
    # Also on http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell
3982
    name = 'bunny-tumblr'
3983
    long_name = 'Electric Bunny Comic (from Tumblr)'
3984
    url = 'http://electricbunnycomics.tumblr.com'
3985
3986
3987
class Hoomph(GenericTumblrV1):
3988
    """Class to retrieve Hoomph comics."""
3989
    name = 'hoomph'
3990
    long_name = 'Hoomph'
3991
    url = 'http://hoom.ph'
3992
3993
3994
class BFGFSTumblr(GenericTumblrV1):
3995
    """Class to retrieve BFGFS comics."""
3996
    # Also on https://tapastic.com/series/BFGFS
3997
    # Also on http://bfgfs.com
3998
    name = 'bfgfs-tumblr'
3999
    long_name = 'BFGFS (from Tumblr)'
4000
    url = 'http://bfgfs.tumblr.com'
4001
4002
4003
class DoodleForFood(GenericTumblrV1):
4004
    """Class to retrieve Doodle For Food comics."""
4005
    # Also on http://doodleforfood.com
4006
    name = 'doodle'
4007
    long_name = 'Doodle For Food'
4008
    url = 'http://doodleforfood.com'
4009
4010
4011
class CassandraCalinTumblr(GenericTumblrV1):
4012
    """Class to retrieve C. Cassandra comics."""
4013
    # Also on http://cassandracalin.com
4014
    # Also on https://tapastic.com/series/C-Cassandra-comics
4015
    name = 'cassandra-tumblr'
4016
    long_name = 'Cassandra Calin (from Tumblr)'
4017
    url = 'http://c-cassandra.tumblr.com'
4018
4019
4020
class DougWasTaken(GenericTumblrV1):
4021
    """Class to retrieve Doug Was Taken comics."""
4022
    name = 'doug'
4023
    long_name = 'Doug Was Taken'
4024
    url = 'http://dougwastaken.tumblr.com'
4025
4026
4027
class MandatoryRollerCoaster(GenericTumblrV1):
4028
    """Class to retrieve Mandatory Roller Coaster comics."""
4029
    name = 'rollercoaster'
4030
    long_name = 'Mandatory Roller Coaster'
4031
    url = 'http://mandatoryrollercoaster.com'
4032
4033
4034
class CEstPasEnRegardantSesPompes(GenericTumblrV1):
4035
    """Class to retrieve C'Est Pas En Regardant Ses Pompes (...)  comics."""
4036
    name = 'cperspqccltt'
4037
    long_name = 'C Est Pas En Regardant Ses Pompes (...)'
4038
    url = 'http://cperspqccltt.tumblr.com'
4039
4040
4041
class TheGrohlTroll(GenericTumblrV1):
4042
    """Class to retrieve The Grohl Troll comics."""
4043
    name = 'grohltroll'
4044
    long_name = 'The Grohl Troll'
4045
    url = 'http://thegrohltroll.com'
4046
4047
4048
class WebcomicName(GenericTumblrV1):
4049
    """Class to retrieve Webcomic Name comics."""
4050
    name = 'webcomicname'
4051
    long_name = 'Webcomic Name'
4052
    url = 'http://webcomicname.com'
4053
4054
4055
class BooksOfAdam(GenericTumblrV1):
4056
    """Class to retrieve Books of Adam comics."""
4057
    # Also on http://www.booksofadam.com
4058
    name = 'booksofadam'
4059
    long_name = 'Books of Adam'
4060
    url = 'http://booksofadam.tumblr.com'
4061
4062
4063
class HarkAVagrant(GenericTumblrV1):
4064
    """Class to retrieve Hark A Vagrant comics."""
4065
    # Also on http://www.harkavagrant.com
4066
    name = 'hark-tumblr'
4067
    long_name = 'Hark A Vagrant (from Tumblr)'
4068
    url = 'http://beatonna.tumblr.com'
4069
4070
4071
class OurSuperAdventureTumblr(GenericTumblrV1):
4072
    """Class to retrieve Our Super Adventure comics."""
4073
    # Also on https://tapastic.com/series/Our-Super-Adventure
4074
    # Also on http://www.oursuperadventure.com
4075
    # http://sarahgraley.com
4076
    name = 'superadventure-tumblr'
4077
    long_name = 'Our Super Adventure (from Tumblr)'
4078
    url = 'http://sarahssketchbook.tumblr.com'
4079
4080
4081
class JakeLikesOnions(GenericTumblrV1):
4082
    """Class to retrieve Jake Likes Onions comics."""
4083
    name = 'jake'
4084
    long_name = 'Jake Likes Onions'
4085
    url = 'http://jakelikesonions.com'
4086
4087
4088
class InYourFaceCake(GenericTumblrV1):
4089
    """Class to retrieve In Your Face Cake comics."""
4090
    name = 'inyourfacecake-tumblr'
4091
    long_name = 'In Your Face Cake (from Tumblr)'
4092
    url = 'http://in-your-face-cake.tumblr.com'
4093
4094
4095
class Robospunk(GenericTumblrV1):
4096
    """Class to retrieve Robospunk comics."""
4097
    name = 'robospunk'
4098
    long_name = 'Robospunk'
4099
    url = 'http://robospunk.com'
4100
4101
4102
class BananaTwinky(GenericTumblrV1):
4103
    """Class to retrieve Banana Twinky comics."""
4104
    name = 'banana'
4105
    long_name = 'Banana Twinky'
4106
    url = 'http://bananatwinky.tumblr.com'
4107
4108
4109
class YesterdaysPopcornTumblr(GenericTumblrV1):
4110
    """Class to retrieve Yesterday's Popcorn comics."""
4111
    # Also on http://www.yesterdayspopcorn.com
4112
    # Also on https://tapastic.com/series/Yesterdays-Popcorn
4113
    name = 'popcorn-tumblr'
4114
    long_name = 'Yesterday\'s Popcorn (from Tumblr)'
4115
    url = 'http://yesterdayspopcorn.tumblr.com'
4116
4117
4118
class TwistedDoodles(GenericTumblrV1):
4119
    """Class to retrieve Twisted Doodles comics."""
4120
    name = 'twisted'
4121
    long_name = 'Twisted Doodles'
4122
    url = 'http://www.twisteddoodles.com'
4123
4124
4125
class UbertoolTumblr(GenericTumblrV1):
4126
    """Class to retrieve Ubertool comics."""
4127
    # Also on http://ubertoolcomic.com
4128
    # Also on https://tapastic.com/series/ubertool
4129
    name = 'ubertool-tumblr'
4130
    long_name = 'Ubertool (from Tumblr)'
4131
    url = 'http://ubertool.tumblr.com'
4132
    _categories = ('UBERTOOL', )
4133
4134
4135
class LittleLifeLinesTumblr(GenericTumblrV1):
4136
    """Class to retrieve Little Life Lines comics."""
4137
    # Also on http://www.littlelifelines.com
4138
    name = 'life-tumblr'
4139
    long_name = 'Little Life Lines (from Tumblr)'
4140
    url = 'https://little-life-lines.tumblr.com'
4141
4142
4143
class TheyCanTalk(GenericTumblrV1):
4144
    """Class to retrieve They Can Talk comics."""
4145
    name = 'theycantalk'
4146
    long_name = 'They Can Talk'
4147
    url = 'http://theycantalk.com'
4148
4149
4150
class Will5NeverCome(GenericTumblrV1):
4151
    """Class to retrieve Will 5:00 Never Come comics."""
4152
    name = 'will5'
4153
    long_name = 'Will 5:00 Never Come ?'
4154
    url = 'http://will5nevercome.com'
4155
4156
4157
class Sephko(GenericTumblrV1):
4158
    """Class to retrieve Sephko Comics."""
4159
    # Also on http://www.sephko.com
4160
    name = 'sephko'
4161
    long_name = 'Sephko'
4162
    url = 'http://sephko.tumblr.com'
4163
4164
4165
class BlazersAtDawn(GenericTumblrV1):
4166
    """Class to retrieve Blazers At Dawn Comics."""
4167
    name = 'blazers'
4168
    long_name = 'Blazers At Dawn'
4169
    url = 'http://blazersatdawn.tumblr.com'
4170
4171
4172
class ArtByMoga(GenericEmptyComic, GenericTumblrV1):  # Deactivated because it downloads too many things
4173
    """Class to retrieve Art By Moga Comics."""
4174
    name = 'moga'
4175
    long_name = 'Art By Moga'
4176
    url = 'http://artbymoga.tumblr.com'
4177
4178
4179
class VerbalVomitTumblr(GenericTumblrV1):
4180
    """Class to retrieve Verbal Vomit comics."""
4181
    # Also on http://www.verbal-vomit.com
4182
    name = 'vomit-tumblr'
4183
    long_name = 'Verbal Vomit (from Tumblr)'
4184 View Code Duplication
    url = 'http://verbalvomits.tumblr.com'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
4185
4186
4187
class LibraryComic(GenericTumblrV1):
4188
    """Class to retrieve LibraryComic."""
4189
    # Also on http://librarycomic.com
4190
    name = 'library-tumblr'
4191
    long_name = 'LibraryComic (from Tumblr)'
4192
    url = 'http://librarycomic.tumblr.com'
4193
4194
4195
class HorovitzComics(GenericListableComic):
4196
    """Generic class to handle the logic common to the different comics from Horovitz."""
4197
    url = 'http://www.horovitzcomics.com'
4198
    _categories = ('HOROVITZ', )
4199
    img_re = re.compile('.*comics/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
4200
    link_re = NotImplemented
4201
    get_url_from_archive_element = join_cls_url_to_href
4202
4203
    @classmethod
4204
    def get_comic_info(cls, soup, link):
4205
        """Get information about a particular comics."""
4206
        href = link['href']
4207
        num = int(cls.link_re.match(href).groups()[0])
4208
        title = link.string
4209
        imgs = soup.find_all('img', id='comic')
4210
        assert len(imgs) == 1
4211
        year, month, day = [int(s)
4212
                            for s in cls.img_re.match(imgs[0]['src']).groups()]
4213
        return {
4214
            'title': title,
4215
            'day': day,
4216
            'month': month,
4217
            'year': year,
4218
            'img': [i['src'] for i in imgs],
4219
            'num': num,
4220
        }
4221
4222
    @classmethod
4223
    def get_archive_elements(cls):
4224
        archive_url = 'http://www.horovitzcomics.com/comics/archive/'
4225
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.link_re))
4226
4227
4228
class HorovitzNew(HorovitzComics):
4229
    """Class to retrieve Horovitz new comics."""
4230
    name = 'horovitznew'
4231
    long_name = 'Horovitz New'
4232
    link_re = re.compile('^/comics/new/([0-9]+)$')
4233
4234
4235
class HorovitzClassic(HorovitzComics):
4236
    """Class to retrieve Horovitz classic comics."""
4237
    name = 'horovitzclassic'
4238
    long_name = 'Horovitz Classic'
4239
    link_re = re.compile('^/comics/classic/([0-9]+)$')
4240
4241 View Code Duplication
4242
class GenericGoComic(GenericNavigableComic):
4243
    """Generic class to handle the logic common to comics from gocomics.com."""
4244
    _categories = ('GOCOMIC', )
4245
4246
    @classmethod
4247
    def get_first_comic_link(cls):
4248
        """Get link to first comics."""
4249
        return get_soup_at_url(cls.url).find('a', class_='fa btn btn-outline-default btn-circle fa-backward sm ')
4250
4251
    @classmethod
4252
    def get_navi_link(cls, last_soup, next_):
4253
        """Get link to next or previous comic."""
4254
        PREV = 'fa btn btn-outline-default btn-circle fa-caret-left sm '
4255
        NEXT = 'fa btn btn-outline-default btn-circle fa-caret-right sm '
4256
        return last_soup.find('a', class_=NEXT if next_ else PREV)
4257
4258
    @classmethod
4259
    def get_url_from_link(cls, link):
4260
        gocomics = 'http://www.gocomics.com'
4261
        return urljoin_wrapper(gocomics, link['href'])
4262
4263
    @classmethod
4264
    def get_comic_info(cls, soup, link):
4265
        """Get information about a particular comics."""
4266
        date_str = soup.find('meta', property='article:published_time')['content']
4267
        day = string_to_date(date_str, "%Y-%m-%d")
4268
        imgs = soup.find('picture', class_='img-fluid item-comic-image').find_all('img')
4269
        author = soup.find('meta', property='article:author')['content']
4270
        tags = soup.find('meta', property='article:tag')['content']
4271
        return {
4272
            'day': day.day,
4273
            'month': day.month,
4274
            'year': day.year,
4275
            'img': [i['src'] for i in imgs],
4276
            'author': author,
4277
            'tags': tags,
4278
        }
4279
4280
4281
class PearlsBeforeSwine(GenericGoComic):
4282
    """Class to retrieve Pearls Before Swine comics."""
4283
    name = 'pearls'
4284
    long_name = 'Pearls Before Swine'
4285
    url = 'http://www.gocomics.com/pearlsbeforeswine'
4286
4287
4288
class Peanuts(GenericGoComic):
4289
    """Class to retrieve Peanuts comics."""
4290
    name = 'peanuts'
4291
    long_name = 'Peanuts'
4292
    url = 'http://www.gocomics.com/peanuts'
4293
4294
4295
class MattWuerker(GenericGoComic):
4296
    """Class to retrieve Matt Wuerker comics."""
4297
    name = 'wuerker'
4298
    long_name = 'Matt Wuerker'
4299
    url = 'http://www.gocomics.com/mattwuerker'
4300
4301
4302
class TomToles(GenericGoComic):
4303
    """Class to retrieve Tom Toles comics."""
4304
    name = 'toles'
4305
    long_name = 'Tom Toles'
4306
    url = 'http://www.gocomics.com/tomtoles'
4307
4308
4309
class BreakOfDay(GenericGoComic):
4310
    """Class to retrieve Break Of Day comics."""
4311
    name = 'breakofday'
4312
    long_name = 'Break Of Day'
4313
    url = 'http://www.gocomics.com/break-of-day'
4314
4315
4316
class Brevity(GenericGoComic):
4317
    """Class to retrieve Brevity comics."""
4318
    name = 'brevity'
4319
    long_name = 'Brevity'
4320
    url = 'http://www.gocomics.com/brevitypanel'
4321
4322
4323
class MichaelRamirez(GenericGoComic):
4324
    """Class to retrieve Michael Ramirez comics."""
4325
    name = 'ramirez'
4326
    long_name = 'Michael Ramirez'
4327
    url = 'http://www.gocomics.com/michaelramirez'
4328
4329
4330
class MikeLuckovich(GenericGoComic):
4331
    """Class to retrieve Mike Luckovich comics."""
4332
    name = 'luckovich'
4333
    long_name = 'Mike Luckovich'
4334
    url = 'http://www.gocomics.com/mikeluckovich'
4335
4336
4337
class JimBenton(GenericGoComic):
4338
    """Class to retrieve Jim Benton comics."""
4339
    # Also on http://jimbenton.tumblr.com
4340
    name = 'benton'
4341
    long_name = 'Jim Benton'
4342
    url = 'http://www.gocomics.com/jim-benton-cartoons'
4343
4344
4345
class TheArgyleSweater(GenericGoComic):
4346
    """Class to retrieve the Argyle Sweater comics."""
4347
    name = 'argyle'
4348
    long_name = 'Argyle Sweater'
4349
    url = 'http://www.gocomics.com/theargylesweater'
4350
4351
4352
class SunnyStreet(GenericGoComic):
4353
    """Class to retrieve Sunny Street comics."""
4354
    # Also on http://www.sunnystreetcomics.com
4355
    name = 'sunny'
4356
    long_name = 'Sunny Street'
4357
    url = 'http://www.gocomics.com/sunny-street'
4358
4359
4360
class OffTheMark(GenericGoComic):
4361
    """Class to retrieve Off The Mark comics."""
4362
    # Also on https://www.offthemark.com
4363
    name = 'offthemark'
4364
    long_name = 'Off The Mark'
4365
    url = 'http://www.gocomics.com/offthemark'
4366
4367
4368
class WuMo(GenericGoComic):
4369
    """Class to retrieve WuMo comics."""
4370
    # Also on http://wumo.com
4371
    name = 'wumo'
4372
    long_name = 'WuMo'
4373
    url = 'http://www.gocomics.com/wumo'
4374
4375
4376
class LunarBaboon(GenericGoComic):
4377
    """Class to retrieve Lunar Baboon comics."""
4378
    # Also on http://www.lunarbaboon.com
4379
    # Also on https://tapastic.com/series/Lunarbaboon
4380
    name = 'lunarbaboon'
4381
    long_name = 'Lunar Baboon'
4382
    url = 'http://www.gocomics.com/lunarbaboon'
4383
4384
4385
class SandersenGocomic(GenericGoComic):
4386
    """Class to retrieve Sarah Andersen comics."""
4387
    # Also on http://sarahcandersen.com
4388
    # Also on http://tapastic.com/series/Doodle-Time
4389
    name = 'sandersen-goc'
4390
    long_name = 'Sarah Andersen (from GoComics)'
4391
    url = 'http://www.gocomics.com/sarahs-scribbles'
4392
4393
4394
class SaturdayMorningBreakfastCerealGoComic(GenericGoComic):
4395
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
4396
    # Also on http://smbc-comics.tumblr.com
4397
    # Also on http://www.smbc-comics.com
4398
    name = 'smbc-goc'
4399
    long_name = 'Saturday Morning Breakfast Cereal (from GoComics)'
4400
    url = 'http://www.gocomics.com/saturday-morning-breakfast-cereal'
4401
    _categories = ('SMBC', )
4402
4403
4404
class CalvinAndHobbesGoComic(GenericGoComic):
4405
    """Class to retrieve Calvin and Hobbes comics."""
4406
    # From gocomics, not http://marcel-oehler.marcellosendos.ch/comics/ch/
4407
    name = 'calvin-goc'
4408
    long_name = 'Calvin and Hobbes (from GoComics)'
4409
    url = 'http://www.gocomics.com/calvinandhobbes'
4410
4411
4412
class RallGoComic(GenericGoComic):
4413
    """Class to retrieve Ted Rall comics."""
4414
    # Also on http://rall.com/comic
4415
    name = 'rall-goc'
4416
    long_name = "Ted Rall (from GoComics)"
4417
    url = "http://www.gocomics.com/ted-rall"
4418
    _categories = ('RALL', )
4419
4420
4421
class TheAwkwardYetiGoComic(GenericGoComic):
4422
    """Class to retrieve The Awkward Yeti comics."""
4423
    # Also on http://larstheyeti.tumblr.com
4424
    # Also on http://theawkwardyeti.com
4425
    # Also on https://tapastic.com/series/TheAwkwardYeti
4426
    name = 'yeti-goc'
4427
    long_name = 'The Awkward Yeti (from GoComics)'
4428
    url = 'http://www.gocomics.com/the-awkward-yeti'
4429
    _categories = ('YETI', )
4430
4431
4432
class BerkeleyMewsGoComics(GenericGoComic):
4433
    """Class to retrieve Berkeley Mews comics."""
4434
    # Also on http://mews.tumblr.com
4435
    # Also on http://www.berkeleymews.com
4436
    name = 'berkeley-goc'
4437
    long_name = 'Berkeley Mews (from GoComics)'
4438
    url = 'http://www.gocomics.com/berkeley-mews'
4439
    _categories = ('BERKELEY', )
4440
4441
4442
class SheldonGoComics(GenericGoComic):
4443
    """Class to retrieve Sheldon comics."""
4444
    # Also on http://www.sheldoncomics.com
4445
    name = 'sheldon-goc'
4446
    long_name = 'Sheldon Comics (from GoComics)'
4447
    url = 'http://www.gocomics.com/sheldon'
4448
4449
4450
class FowlLanguageGoComics(GenericGoComic):
4451
    """Class to retrieve Fowl Language comics."""
4452
    # Also on http://www.fowllanguagecomics.com
4453
    # Also on http://tapastic.com/series/Fowl-Language-Comics
4454
    # Also on http://fowllanguagecomics.tumblr.com
4455
    name = 'fowllanguage-goc'
4456
    long_name = 'Fowl Language Comics (from GoComics)'
4457
    url = 'http://www.gocomics.com/fowl-language'
4458
    _categories = ('FOWLLANGUAGE', )
4459
4460
4461
class NickAnderson(GenericGoComic):
4462
    """Class to retrieve Nick Anderson comics."""
4463
    name = 'nickanderson'
4464
    long_name = 'Nick Anderson'
4465
    url = 'http://www.gocomics.com/nickanderson'
4466
4467
4468
class GarfieldGoComics(GenericGoComic):
4469
    """Class to retrieve Garfield comics."""
4470
    # Also on http://garfield.com
4471
    name = 'garfield-goc'
4472
    long_name = 'Garfield (from GoComics)'
4473
    url = 'http://www.gocomics.com/garfield'
4474
    _categories = ('GARFIELD', )
4475
4476
4477
class DorrisMcGoComics(GenericGoComic):
4478
    """Class to retrieve Dorris Mc Comics"""
4479
    # Also on http://dorrismccomics.com
4480
    name = 'dorrismc-goc'
4481
    long_name = 'Dorris Mc (from GoComics)'
4482
    url = 'http://www.gocomics.com/dorris-mccomics'
4483
4484
4485
class FoxTrot(GenericGoComic):
4486
    """Class to retrieve FoxTrot comics."""
4487
    name = 'foxtrot'
4488
    long_name = 'FoxTrot'
4489
    url = 'http://www.gocomics.com/foxtrot'
4490
4491
4492
class FoxTrotClassics(GenericGoComic):
4493
    """Class to retrieve FoxTrot Classics comics."""
4494
    name = 'foxtrot-classics'
4495
    long_name = 'FoxTrot Classics'
4496
    url = 'http://www.gocomics.com/foxtrotclassics'
4497
4498
4499
class MisterAndMeGoComics(GenericGoComic):
4500
    """Class to retrieve Mister & Me Comics."""
4501
    # Also on http://www.mister-and-me.com
4502
    # Also on https://tapastic.com/series/Mister-and-Me
4503
    name = 'mister-goc'
4504
    long_name = 'Mister & Me (from GoComics)'
4505
    url = 'http://www.gocomics.com/mister-and-me'
4506
4507
4508
class NonSequitur(GenericGoComic):
4509
    """Class to retrieve Non Sequitur (Wiley Miller) comics."""
4510
    name = 'nonsequitur'
4511
    long_name = 'Non Sequitur'
4512
    url = 'http://www.gocomics.com/nonsequitur'
4513
4514
4515
class GenericTapasticComic(GenericListableComic):
4516
    """Generic class to handle the logic common to comics from tapastic.com."""
4517
    _categories = ('TAPASTIC', )
4518
4519
    @classmethod
4520
    def get_comic_info(cls, soup, archive_elt):
4521
        """Get information about a particular comics."""
4522
        timestamp = int(archive_elt['publishDate']) / 1000.0
4523
        day = datetime.datetime.fromtimestamp(timestamp).date()
4524
        imgs = soup.find_all('img', class_='art-image')
4525
        if not imgs:
4526
            print("Comic %s is being uploaded, retry later" % cls.get_url_from_archive_element(archive_elt))
4527
            return None
4528
        assert len(imgs) > 0
4529
        return {
4530
            'day': day.day,
4531
            'year': day.year,
4532
            'month': day.month,
4533
            'img': [i['src'] for i in imgs],
4534
            'title': archive_elt['title'],
4535
        }
4536
4537
    @classmethod
4538
    def get_url_from_archive_element(cls, archive_elt):
4539
        return 'http://tapastic.com/episode/' + str(archive_elt['id'])
4540
4541
    @classmethod
4542
    def get_archive_elements(cls):
4543
        pref, suff = 'episodeList : ', ','
4544
        # Information is stored in the javascript part
4545
        # I don't know the clean way to get it so this is the ugly way.
4546
        string = [s[len(pref):-len(suff)] for s in (s.decode('utf-8').strip() for s in urlopen_wrapper(cls.url).readlines()) if s.startswith(pref) and s.endswith(suff)][0]
4547
        return json.loads(string)
4548
4549
4550
class VegetablesForDessert(GenericTapasticComic):
4551
    """Class to retrieve Vegetables For Dessert comics."""
4552
    # Also on http://vegetablesfordessert.tumblr.com
4553
    name = 'vegetables'
4554
    long_name = 'Vegetables For Dessert'
4555
    url = 'http://tapastic.com/series/vegetablesfordessert'
4556
4557
4558
class FowlLanguageTapa(GenericTapasticComic):
4559
    """Class to retrieve Fowl Language comics."""
4560
    # Also on http://www.fowllanguagecomics.com
4561
    # Also on http://fowllanguagecomics.tumblr.com
4562
    # Also on http://www.gocomics.com/fowl-language
4563
    name = 'fowllanguage-tapa'
4564
    long_name = 'Fowl Language Comics (from Tapastic)'
4565
    url = 'http://tapastic.com/series/Fowl-Language-Comics'
4566
    _categories = ('FOWLLANGUAGE', )
4567
4568
4569
class OscillatingProfundities(GenericTapasticComic):
4570
    """Class to retrieve Oscillating Profundities comics."""
4571
    name = 'oscillating'
4572
    long_name = 'Oscillating Profundities'
4573
    url = 'http://tapastic.com/series/oscillatingprofundities'
4574
4575
4576
class ZnoflatsComics(GenericTapasticComic):
4577
    """Class to retrieve Znoflats comics."""
4578
    name = 'znoflats'
4579
    long_name = 'Znoflats Comics'
4580
    url = 'http://tapastic.com/series/Znoflats-Comics'
4581
4582
4583
class SandersenTapastic(GenericTapasticComic):
4584
    """Class to retrieve Sarah Andersen comics."""
4585
    # Also on http://sarahcandersen.com
4586
    # Also on http://www.gocomics.com/sarahs-scribbles
4587
    name = 'sandersen-tapa'
4588
    long_name = 'Sarah Andersen (from Tapastic)'
4589
    url = 'http://tapastic.com/series/Doodle-Time'
4590
4591
4592
class TubeyToonsTapastic(GenericTapasticComic):
4593
    """Class to retrieve TubeyToons comics."""
4594
    # Also on http://tubeytoons.com
4595
    # Also on http://tubeytoons.tumblr.com
4596
    name = 'tubeytoons-tapa'
4597
    long_name = 'Tubey Toons (from Tapastic)'
4598
    url = 'http://tapastic.com/series/Tubey-Toons'
4599
    _categories = ('TUNEYTOONS', )
4600
4601
4602
class AnythingComicTapastic(GenericTapasticComic):
4603
    """Class to retrieve Anything Comics."""
4604
    # Also on http://www.anythingcomic.com
4605
    name = 'anythingcomic-tapa'
4606
    long_name = 'Anything Comic (from Tapastic)'
4607
    url = 'http://tapastic.com/series/anything'
4608
4609
4610
class UnearthedComicsTapastic(GenericTapasticComic):
4611
    """Class to retrieve Unearthed comics."""
4612
    # Also on http://unearthedcomics.com
4613
    # Also on http://unearthedcomics.tumblr.com
4614
    name = 'unearthed-tapa'
4615
    long_name = 'Unearthed Comics (from Tapastic)'
4616
    url = 'http://tapastic.com/series/UnearthedComics'
4617
    _categories = ('UNEARTHED', )
4618
4619
4620
class EverythingsStupidTapastic(GenericTapasticComic):
4621
    """Class to retrieve Everything's stupid Comics."""
4622
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
4623
    # Also on http://everythingsstupid.net
4624
    name = 'stupid-tapa'
4625
    long_name = "Everything's Stupid (from Tapastic)"
4626
    url = 'http://tapastic.com/series/EverythingsStupid'
4627
4628
4629
class JustSayEhTapastic(GenericTapasticComic):
4630
    """Class to retrieve Just Say Eh comics."""
4631
    # Also on http://www.justsayeh.com
4632
    name = 'justsayeh-tapa'
4633
    long_name = 'Just Say Eh (from Tapastic)'
4634
    url = 'http://tapastic.com/series/Just-Say-Eh'
4635
4636
4637
class ThorsThundershackTapastic(GenericTapasticComic):
4638
    """Class to retrieve Thor's Thundershack comics."""
4639
    # Also on http://www.thorsthundershack.com
4640
    name = 'thor-tapa'
4641
    long_name = 'Thor\'s Thundershack (from Tapastic)'
4642
    url = 'http://tapastic.com/series/Thors-Thundershac'
4643
    _categories = ('THOR', )
4644
4645
4646
class OwlTurdTapastic(GenericTapasticComic):
4647
    """Class to retrieve Owl Turd comics."""
4648
    # Also on http://owlturd.com
4649
    name = 'owlturd-tapa'
4650
    long_name = 'Owl Turd (from Tapastic)'
4651
    url = 'http://tapastic.com/series/Owl-Turd-Comix'
4652
    _categories = ('OWLTURD', )
4653
4654
4655
class GoneIntoRaptureTapastic(GenericTapasticComic):
4656
    """Class to retrieve Gone Into Rapture comics."""
4657
    # Also on http://goneintorapture.tumblr.com
4658
    # Also on http://www.goneintorapture.com
4659
    name = 'rapture-tapa'
4660
    long_name = 'Gone Into Rapture (from Tapastic)'
4661
    url = 'http://tapastic.com/series/Goneintorapture'
4662
4663
4664
class HeckIfIKnowComicsTapa(GenericTapasticComic):
4665
    """Class to retrieve Heck If I Know Comics."""
4666
    # Also on http://heckifiknowcomics.com
4667
    name = 'heck-tapa'
4668
    long_name = 'Heck if I Know comics (from Tapastic)'
4669
    url = 'http://tapastic.com/series/Regular'
4670
4671
4672
class CheerUpEmoKidTapa(GenericTapasticComic):
4673
    """Class to retrieve CheerUpEmoKid comics."""
4674
    # Also on http://www.cheerupemokid.com
4675
    # Also on http://enzocomics.tumblr.com
4676
    name = 'cuek-tapa'
4677
    long_name = 'Cheer Up Emo Kid (from Tapastic)'
4678
    url = 'http://tapastic.com/series/CUEK'
4679
4680
4681
class BigFootJusticeTapa(GenericTapasticComic):
4682
    """Class to retrieve Big Foot Justice comics."""
4683
    # Also on http://bigfootjustice.com
4684
    name = 'bigfoot-tapa'
4685
    long_name = 'Big Foot Justice (from Tapastic)'
4686
    url = 'http://tapastic.com/series/bigfoot-justice'
4687
4688
4689
class UpAndOutTapa(GenericTapasticComic):
4690
    """Class to retrieve Up & Out comics."""
4691
    # Also on http://upandoutcomic.tumblr.com
4692
    name = 'upandout-tapa'
4693
    long_name = 'Up And Out (from Tapastic)'
4694
    url = 'http://tapastic.com/series/UP-and-OUT'
4695
4696
4697
class ToonHoleTapa(GenericTapasticComic):
4698
    """Class to retrieve Toon Holes comics."""
4699
    # Also on http://www.toonhole.com
4700
    name = 'toonhole-tapa'
4701
    long_name = 'Toon Hole (from Tapastic)'
4702
    url = 'http://tapastic.com/series/TOONHOLE'
4703
4704
4705
class AngryAtNothingTapa(GenericTapasticComic):
4706
    """Class to retrieve Angry at Nothing comics."""
4707
    # Also on http://www.angryatnothing.net
4708
    name = 'angry-tapa'
4709
    long_name = 'Angry At Nothing (from Tapastic)'
4710
    url = 'http://tapastic.com/series/Comics-yeah-definitely-comics-'
4711
4712
4713
class LeleozTapa(GenericTapasticComic):
4714
    """Class to retrieve Leleoz comics."""
4715
    # Also on http://leleozcomics.tumblr.com
4716
    name = 'leleoz-tapa'
4717
    long_name = 'Leleoz (from Tapastic)'
4718
    url = 'https://tapastic.com/series/Leleoz'
4719
4720
4721
class TheAwkwardYetiTapa(GenericTapasticComic):
4722
    """Class to retrieve The Awkward Yeti comics."""
4723
    # Also on http://www.gocomics.com/the-awkward-yeti
4724
    # Also on http://theawkwardyeti.com
4725
    # Also on http://larstheyeti.tumblr.com
4726
    name = 'yeti-tapa'
4727
    long_name = 'The Awkward Yeti (from Tapastic)'
4728
    url = 'https://tapastic.com/series/TheAwkwardYeti'
4729
    _categories = ('YETI', )
4730
4731
4732
class AsPerUsualTapa(GenericTapasticComic):
4733
    """Class to retrieve As Per Usual comics."""
4734
    # Also on http://as-per-usual.tumblr.com
4735
    name = 'usual-tapa'
4736
    long_name = 'As Per Usual (from Tapastic)'
4737
    url = 'https://tapastic.com/series/AsPerUsual'
4738
    categories = ('DAMILEE', )
4739
4740
4741
class HotComicsForCoolPeopleTapa(GenericTapasticComic):
4742
    """Class to retrieve Hot Comics For Cool People."""
4743
    # Also on http://hotcomicsforcoolpeople.tumblr.com
4744
    # Also on http://hotcomics.biz (links to tumblr)
4745
    # Also on http://hcfcp.com (links to tumblr)
4746
    name = 'hotcomics-tapa'
4747
    long_name = 'Hot Comics For Cool People (from Tapastic)'
4748
    url = 'https://tapastic.com/series/Hot-Comics-For-Cool-People'
4749
    categories = ('DAMILEE', )
4750
4751
4752
class OneOneOneOneComicTapa(GenericTapasticComic):
4753
    """Class to retrieve 1111 Comics."""
4754
    # Also on http://www.1111comics.me
4755
    # Also on http://comics1111.tumblr.com
4756
    name = '1111-tapa'
4757
    long_name = '1111 Comics (from Tapastic)'
4758
    url = 'https://tapastic.com/series/1111-Comics'
4759
    _categories = ('ONEONEONEONE', )
4760
4761
4762
class TumbleDryTapa(GenericTapasticComic):
4763
    """Class to retrieve Tumble Dry comics."""
4764
    # Also on http://tumbledrycomics.com
4765
    name = 'tumbledry-tapa'
4766
    long_name = 'Tumblr Dry (from Tapastic)'
4767
    url = 'https://tapastic.com/series/TumbleDryComics'
4768
4769
4770
class DeadlyPanelTapa(GenericTapasticComic):
4771
    """Class to retrieve Deadly Panel comics."""
4772
    # Also on http://www.deadlypanel.com
4773
    name = 'deadly-tapa'
4774
    long_name = 'Deadly Panel (from Tapastic)'
4775
    url = 'https://tapastic.com/series/deadlypanel'
4776
4777
4778
class ChrisHallbeckMaxiTapa(GenericTapasticComic):
4779
    """Class to retrieve Chris Hallbeck comics."""
4780
    # Also on http://chrishallbeck.tumblr.com
4781
    # Also on http://maximumble.com
4782
    name = 'hallbeckmaxi-tapa'
4783
    long_name = 'Chris Hallback - Maximumble (from Tapastic)'
4784
    url = 'https://tapastic.com/series/Maximumble'
4785
    _categories = ('HALLBACK', )
4786
4787
4788
class ChrisHallbeckMiniTapa(GenericTapasticComic):
4789
    """Class to retrieve Chris Hallbeck comics."""
4790
    # Also on http://chrishallbeck.tumblr.com
4791
    # Also on http://minimumble.com
4792
    name = 'hallbeckmini-tapa'
4793
    long_name = 'Chris Hallback - Minimumble (from Tapastic)'
4794
    url = 'https://tapastic.com/series/Minimumble'
4795
    _categories = ('HALLBACK', )
4796
4797
4798
class ChrisHallbeckBiffTapa(GenericTapasticComic):
4799
    """Class to retrieve Chris Hallbeck comics."""
4800
    # Also on http://chrishallbeck.tumblr.com
4801
    # Also on http://thebookofbiff.com
4802
    name = 'hallbeckbiff-tapa'
4803
    long_name = 'Chris Hallback - The Book of Biff (from Tapastic)'
4804
    url = 'https://tapastic.com/series/Biff'
4805
    _categories = ('HALLBACK', )
4806
4807
4808
class RandoWisTapa(GenericTapasticComic):
4809
    """Class to retrieve RandoWis comics."""
4810
    # Also on https://randowis.com
4811
    name = 'randowis-tapa'
4812
    long_name = 'RandoWis (from Tapastic)'
4813
    url = 'https://tapastic.com/series/RandoWis'
4814
4815
4816
class PigeonGazetteTapa(GenericTapasticComic):
4817
    """Class to retrieve The Pigeon Gazette comics."""
4818
    # Also on http://thepigeongazette.tumblr.com
4819
    name = 'pigeon-tapa'
4820
    long_name = 'The Pigeon Gazette (from Tapastic)'
4821
    url = 'https://tapastic.com/series/The-Pigeon-Gazette'
4822
4823
4824
class TheOdd1sOutTapa(GenericTapasticComic):
4825
    """Class to retrieve The Odd 1s Out comics."""
4826
    # Also on http://theodd1sout.com
4827
    # Also on http://theodd1sout.tumblr.com
4828
    name = 'theodd-tapa'
4829
    long_name = 'The Odd 1s Out (from Tapastic)'
4830
    url = 'https://tapastic.com/series/Theodd1sout'
4831
4832
4833
class TheWorldIsFlatTapa(GenericTapasticComic):
4834
    """Class to retrieve The World Is Flat Comics."""
4835
    # Also on http://theworldisflatcomics.tumblr.com
4836
    name = 'flatworld-tapa'
4837
    long_name = 'The World Is Flat (from Tapastic)'
4838
    url = 'https://tapastic.com/series/The-World-is-Flat'
4839
4840
4841
class MisterAndMeTapa(GenericTapasticComic):
4842
    """Class to retrieve Mister & Me Comics."""
4843
    # Also on http://www.mister-and-me.com
4844
    # Also on http://www.gocomics.com/mister-and-me
4845
    name = 'mister-tapa'
4846
    long_name = 'Mister & Me (from Tapastic)'
4847
    url = 'https://tapastic.com/series/Mister-and-Me'
4848
4849
4850
class TalesOfAbsurdityTapa(GenericTapasticComic):
4851
    """Class to retrieve Tales Of Absurdity comics."""
4852
    # Also on http://talesofabsurdity.com
4853
    # Also on http://talesofabsurdity.tumblr.com
4854
    name = 'absurdity-tapa'
4855
    long_name = 'Tales of Absurdity (from Tapastic)'
4856
    url = 'http://tapastic.com/series/Tales-Of-Absurdity'
4857
    _categories = ('ABSURDITY', )
4858
4859
4860
class BFGFSTapa(GenericTapasticComic):
4861
    """Class to retrieve BFGFS comics."""
4862
    # Also on http://bfgfs.com
4863
    # Also on http://bfgfs.tumblr.com
4864
    name = 'bfgfs-tapa'
4865
    long_name = 'BFGFS (from Tapastic)'
4866
    url = 'https://tapastic.com/series/BFGFS'
4867
4868
4869
class DoodleForFoodTapa(GenericTapasticComic):
4870
    """Class to retrieve Doodle For Food comics."""
4871
    # Also on http://doodleforfood.com
4872
    name = 'doodle-tapa'
4873
    long_name = 'Doodle For Food (from Tapastic)'
4874
    url = 'https://tapastic.com/series/Doodle-for-Food'
4875
4876
4877
class MrLovensteinTapa(GenericTapasticComic):
4878
    """Class to retrieve Mr Lovenstein comics."""
4879
    # Also on  https://tapastic.com/series/MrLovenstein
4880
    name = 'mrlovenstein-tapa'
4881
    long_name = 'Mr. Lovenstein (from Tapastic)'
4882
    url = 'https://tapastic.com/series/MrLovenstein'
4883
4884
4885
class CassandraCalinTapa(GenericTapasticComic):
4886
    """Class to retrieve C. Cassandra comics."""
4887
    # Also on http://cassandracalin.com
4888
    # Also on http://c-cassandra.tumblr.com
4889
    name = 'cassandra-tapa'
4890
    long_name = 'Cassandra Calin (from Tapastic)'
4891
    url = 'https://tapastic.com/series/C-Cassandra-comics'
4892
4893
4894
class WafflesAndPancakes(GenericTapasticComic):
4895
    """Class to retrieve Waffles And Pancakes comics."""
4896
    # Also on http://wandpcomic.com
4897
    name = 'waffles'
4898
    long_name = 'Waffles And Pancakes'
4899
    url = 'https://tapastic.com/series/Waffles-and-Pancakes'
4900
4901
4902
class YesterdaysPopcornTapastic(GenericTapasticComic):
4903
    """Class to retrieve Yesterday's Popcorn comics."""
4904
    # Also on http://www.yesterdayspopcorn.com
4905
    # Also on http://yesterdayspopcorn.tumblr.com
4906
    name = 'popcorn-tapa'
4907
    long_name = 'Yesterday\'s Popcorn (from Tapastic)'
4908
    url = 'https://tapastic.com/series/Yesterdays-Popcorn'
4909
4910
4911
class OurSuperAdventureTapastic(GenericTapasticComic):
4912
    """Class to retrieve Our Super Adventure comics."""
4913
    # Also on http://www.oursuperadventure.com
4914
    # http://sarahssketchbook.tumblr.com
4915
    # http://sarahgraley.com
4916
    name = 'superadventure-tapastic'
4917
    long_name = 'Our Super Adventure (from Tapastic)'
4918
    url = 'https://tapastic.com/series/Our-Super-Adventure'
4919
4920
4921
class NamelessPCs(GenericTapasticComic):
4922
    """Class to retrieve Nameless PCs comics."""
4923
    # Also on http://namelesspcs.com
4924
    name = 'namelesspcs-tapa'
4925
    long_name = 'NamelessPCs (from Tapastic)'
4926
    url = 'https://tapastic.com/series/NamelessPC'
4927
4928
4929
class UbertoolTapa(GenericTapasticComic):
4930
    """Class to retrieve Ubertool comics."""
4931
    # Also on http://ubertoolcomic.com
4932
    # Also on http://ubertool.tumblr.com
4933
    name = 'ubertool-tapa'
4934
    long_name = 'Ubertool (from Tapastic)'
4935
    url = 'https://tapastic.com/series/ubertool'
4936
    _categories = ('UBERTOOL', )
4937
4938
4939
class SmallBlueYonderTapa(GenericTapasticComic):
4940
    """Class to retrieve Small Blue Yonder comics."""
4941
    # Also on http://www.smallblueyonder.com
4942
    name = 'smallblue-tapa'
4943
    long_name = 'Small Blue Yonder (from Tapastic)'
4944
    url = 'https://tapastic.com/series/Small-Blue-Yonder'
4945
4946
4947
def get_subclasses(klass):
4948
    """Gets the list of direct/indirect subclasses of a class"""
4949
    subclasses = klass.__subclasses__()
4950
    for derived in list(subclasses):
4951
        subclasses.extend(get_subclasses(derived))
4952
    return subclasses
4953
4954
4955
def remove_st_nd_rd_th_from_date(string):
4956
    """Function to transform 1st/2nd/3rd/4th in a parsable date format."""
4957
    # Hackish way to convert string with numeral "1st"/"2nd"/etc to date
4958
    return (string.replace('st', '')
4959
            .replace('nd', '')
4960
            .replace('rd', '')
4961
            .replace('th', '')
4962
            .replace('Augu', 'August'))
4963
4964
4965
def string_to_date(string, date_format, local=DEFAULT_LOCAL):
4966
    """Function to convert string to date object.
4967
    Wrapper around datetime.datetime.strptime."""
4968
    # format described in https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior
4969
    prev_locale = locale.setlocale(locale.LC_ALL)
4970
    if local != prev_locale:
4971
        locale.setlocale(locale.LC_ALL, local)
4972
    ret = datetime.datetime.strptime(string, date_format).date()
4973
    if local != prev_locale:
4974
        locale.setlocale(locale.LC_ALL, prev_locale)
4975
    return ret
4976
4977
4978
COMICS = set(get_subclasses(GenericComic))
4979
VALID_COMICS = [c for c in COMICS if c.name is not None]
4980
COMIC_NAMES = {c.name: c for c in VALID_COMICS}
4981
assert len(VALID_COMICS) == len(COMIC_NAMES)
4982
CLASS_NAMES = {c.__name__ for c in VALID_COMICS}
4983
assert len(VALID_COMICS) == len(CLASS_NAMES)
4984