Completed
Push — master ( 03ef23...f1eb23 )
by De
01:17
created

comics.py (10 issues)

1
#! /usr/bin/python3
2
# vim: set expandtab tabstop=4 shiftwidth=4 :
3
"""Module to retrieve webcomics"""
4
5
from comic_abstract import GenericComic, get_date_for_comic
6
import re
7
from datetime import date, timedelta
8
import datetime
9
from urlfunctions import get_soup_at_url, urljoin_wrapper,\
10
    convert_iri_to_plain_ascii_uri, load_json_at_url, urlopen_wrapper
11
import json
12
import locale
13
import urllib
14
15
DEFAULT_LOCAL = 'en_GB.UTF-8'
16
17
18
class Xkcd(GenericComic):
19
    """Class to retrieve Xkcd comics."""
20
    name = 'xkcd'
21
    long_name = 'xkcd'
22
    url = 'http://xkcd.com'
23
24
    @classmethod
25
    def get_next_comic(cls, last_comic):
26
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
27
        first_num = last_comic['num'] if last_comic else 0
28
        last_num = load_json_at_url(
29
            urljoin_wrapper(cls.url, 'info.0.json'))['num']
30
31
        for num in range(first_num + 1, last_num + 1):
32
            if num != 404:
33
                json_url = urljoin_wrapper(cls.url, '%d/info.0.json' % num)
34
                comic = load_json_at_url(json_url)
35
                comic['img'] = [comic['img']]
36
                comic['prefix'] = '%d-' % num
37
                comic['json_url'] = json_url
38
                comic['url'] = urljoin_wrapper(cls.url, str(num))
39
                comic['day'] = int(comic['day'])
40
                comic['month'] = int(comic['month'])
41
                comic['year'] = int(comic['year'])
42
                assert comic['num'] == num
43
                yield comic
44
45
46
# Helper functions corresponding to get_url_from_link/get_url_from_archive_element
47
48
49
@classmethod
50
def get_href(cls, link):
51
    """Implementation of get_url_from_link/get_url_from_archive_element."""
52
    return link['href']
53
54
55
@classmethod
56
def join_cls_url_to_href(cls, link):
57
    """Implementation of get_url_from_link/get_url_from_archive_element."""
58
    return urljoin_wrapper(cls.url, link['href'])
59
60
61
class GenericNavigableComic(GenericComic):
62
    """Generic class for "navigable" comics : with first/next arrows.
63
64
    This class applies to comic where previous and next comics can be
65
    accessed from a given comic. Once given a starting point (either
66
    the first comic or the last comic retrieved), it will handle the
67
    navigation, the retrieval of the soup object and the setting of
68
    the 'url' attribute on retrieved comics. This limits a lot the
69
    amount of boilerplate code in the different implementation classes.
70
71
    The method `get_next_comic` methods is implemented in terms of new
72
    more specialized methods to be implemented/overridden:
73
        - get_first_comic_link
74
        - get_navi_link
75
        - get_comic_info
76
        - get_url_from_link
77
    """
78
    _categories = ('NAVIGABLE', )
79
80
    @classmethod
81
    def get_first_comic_link(cls):
82
        """Get link to first comics.
83
84
        Sometimes this can be retrieved of any comic page, sometimes on
85
        the archive page, sometimes it doesn't exist at all and one has
86
        to iterate backward to find it before hardcoding the result found.
87
        """
88
        raise NotImplementedError
89
90
    @classmethod
91
    def get_navi_link(cls, last_soup, next_):
92
        """Get link to next (or previous - for dev purposes) comic."""
93
        raise NotImplementedError
94
95
    @classmethod
96
    def get_comic_info(cls, soup, link):
97
        """Get information about a particular comics."""
98
        raise NotImplementedError
99
100
    @classmethod
101
    def get_url_from_link(cls, link):
102
        """Get url corresponding to a link. Default implementation is similar to get_href."""
103
        return link['href']
104
105
    @classmethod
106
    def get_next_link(cls, last_soup):
107
        """Get link to next comic."""
108
        link = cls.get_navi_link(last_soup, True)
109
        cls.log("Next link is %s" % link)
110
        return link
111
112
    @classmethod
113
    def get_prev_link(cls, last_soup):
114
        """Get link to previous comic."""
115
        link = cls.get_navi_link(last_soup, False)
116
        cls.log("Prev link is %s" % link)
117
        return link
118
119
    @classmethod
120
    def get_next_comic(cls, last_comic):
121
        """Generic implementation of get_next_comic for navigable comics."""
122
        url = last_comic['url'] if last_comic else None
123
        cls.log("starting 'get_next_comic' from %s" % url)
124
        next_comic = \
125
            cls.get_next_link(get_soup_at_url(url)) \
126
            if url else \
127
            cls.get_first_comic_link()
128
        cls.log("next/first comic will be %s (url is %s)" % (str(next_comic), url))
129
        # cls.check_navigation(url)
130
        while next_comic:
131
            prev_url, url = url, cls.get_url_from_link(next_comic)
132
            if prev_url == url:
133
                cls.log("got same url %s" % url)
134
                break
135
            cls.log("about to get %s (%s)" % (url, str(next_comic)))
136
            soup = get_soup_at_url(url)
137
            comic = cls.get_comic_info(soup, next_comic)
138
            if comic is not None:
139
                assert 'url' not in comic
140
                comic['url'] = url
141
                yield comic
142
            next_comic = cls.get_next_link(soup)
143
            cls.log("next comic will be %s" % str(next_comic))
144
145
    @classmethod
146
    def check_first_link(cls):
147
        """Check that navigation to first comic seems to be working - for dev purposes."""
148
        cls.log("about to check first link")
149
        ok = True
150
        firstlink = cls.get_first_comic_link()
151
        if firstlink is None:
152
            print("From %s : no first link" % cls.url)
153
            ok = False
154
        else:
155
            firsturl = cls.get_url_from_link(firstlink)
156
            try:
157
                get_soup_at_url(firsturl)
158
            except urllib.error.HTTPError:
159
                print("From %s : invalid first url" % cls.url)
160
                ok = False
161
        cls.log("checked first link -> returned %d" % ok)
162
        return ok
163
164
    @classmethod
165
    def check_prev_next_links(cls, url):
166
        """Check that navigation to prev/next from a given URL seems to be working - for dev purposes."""
167
        cls.log("about to check prev/next from %s" % url)
168
        ok = True
169
        if url is None:
170
            prevlink, nextlink = None, None
171
        else:
172
            soup = get_soup_at_url(url)
173
            prevlink, nextlink = cls.get_prev_link(soup), cls.get_next_link(soup)
174
        if prevlink is None and nextlink is None:
175
            print("From %s : no previous nor next" % url)
176
            ok = False
177
        else:
178
            if prevlink:
179
                prevurl = cls.get_url_from_link(prevlink)
180
                prevsoup = get_soup_at_url(prevurl)
181
                prevnextlink = cls.get_next_link(prevsoup)
182
                prevnext = cls.get_url_from_link(prevnextlink) if prevnextlink is not None else "NO URL"
183
                if prevnext != url:
184
                    print("From %s, going backward then forward leads to %s" % (url, prevnext))
185
                    ok = False
186
            if nextlink:
187
                nexturl = cls.get_url_from_link(nextlink)
188
                if nexturl != url:
189
                    nextsoup = get_soup_at_url(nexturl)
190
                    nextprevlink = cls.get_prev_link(nextsoup)
191
                    nextprev = cls.get_url_from_link(nextprevlink) if nextprevlink is not None else "NO URL"
192
                    if nextprev != url:
193
                        print("From %s, going forward then backward leads to %s" % (url, nextprev))
194
                        ok = False
195
        cls.log("checked prev/next from %s -> returned %d" % (url, ok))
196
        return ok
197
198
    @classmethod
199
    def check_navigation(cls, url):
200
        """Check that navigation functions seem to be working - for dev purposes."""
201
        cls.log("about to check navigation from %s" % url)
202
        first = cls.check_first_link()
203
        prevnext = cls.check_prev_next_links(url)
204
        ok = first and prevnext
205
        cls.log("checked navigation from %s -> returned %d" % (url, ok))
206
        return ok
207
208
209
class GenericListableComic(GenericComic):
210
    """Generic class for "listable" comics : with a list of comics (aka 'archive')
211
212
    The method `get_next_comic` methods is implemented in terms of new
213
    more specialized methods to be implemented/overridden:
214
        - get_archive_elements
215
        - get_url_from_archive_element
216
        - get_comic_info
217
    """
218
    _categories = ('LISTABLE', )
219
220
    @classmethod
221
    def get_archive_elements(cls):
222
        """Get the archive elements (iterable)."""
223
        raise NotImplementedError
224
225
    @classmethod
226
    def get_url_from_archive_element(cls, archive_elt):
227
        """Get url corresponding to an archive element."""
228
        raise NotImplementedError
229
230
    @classmethod
231
    def get_comic_info(cls, soup, archive_elt):
232
        """Get information about a particular comics."""
233
        raise NotImplementedError
234
235
    @classmethod
236
    def get_next_comic(cls, last_comic):
237
        """Generic implementation of get_next_comic for listable comics."""
238
        waiting_for_url = last_comic['url'] if last_comic else None
239
        archive_elts = list(cls.get_archive_elements())
240
        for archive_elt in archive_elts:
241
            url = cls.get_url_from_archive_element(archive_elt)
242
            cls.log("considering %s" % url)
243
            if waiting_for_url is None:
244
                cls.log("about to get %s (%s)" % (url, str(archive_elt)))
245
                soup = get_soup_at_url(url)
246
                comic = cls.get_comic_info(soup, archive_elt)
247
                if comic is not None:
248
                    assert 'url' not in comic
249
                    comic['url'] = url
250
                    yield comic
251
            elif waiting_for_url == url:
252
                waiting_for_url = None
253
        if waiting_for_url is not None:
254
            print("Did not find %s in the %d comics: there might be a problem" %
255
                  (waiting_for_url, len(archive_elts)))
256
257
# Helper functions corresponding to get_first_comic_link/get_navi_link
258
259
260
@classmethod
261
def get_link_rel_next(cls, last_soup, next_):
262
    """Implementation of get_navi_link."""
263
    return last_soup.find('link', rel='next' if next_ else 'prev')
264
265
266
@classmethod
267
def get_a_rel_next(cls, last_soup, next_):
268
    """Implementation of get_navi_link."""
269
    return last_soup.find('a', rel='next' if next_ else 'prev')
270
271
272
@classmethod
273
def get_a_navi_navinext(cls, last_soup, next_):
274
    """Implementation of get_navi_link."""
275
    return last_soup.find('a', class_='navi navi-next' if next_ else 'navi navi-prev')
276
277
278
@classmethod
279
def get_a_navi_comicnavnext_navinext(cls, last_soup, next_):
280
    """Implementation of get_navi_link."""
281
    return last_soup.find('a', class_='navi comic-nav-next navi-next' if next_ else 'navi comic-nav-previous navi-prev')
282
283
284
@classmethod
285
def get_a_comicnavbase_comicnavnext(cls, last_soup, next_):
286
    """Implementation of get_navi_link."""
287
    return last_soup.find('a', class_='comic-nav-base comic-nav-next' if next_ else 'comic-nav-base comic-nav-previous')
288
289
290
@classmethod
291
def get_a_navi_navifirst(cls):
292
    """Implementation of get_first_comic_link."""
293
    return get_soup_at_url(cls.url).find('a', class_='navi navi-first')
294
295
296
@classmethod
297
def get_div_navfirst_a(cls):
298
    """Implementation of get_first_comic_link."""
299
    return get_soup_at_url(cls.url).find('div', class_="nav-first").find('a')
300
301
302
@classmethod
303
def get_a_comicnavbase_comicnavfirst(cls):
304
    """Implementation of get_first_comic_link."""
305
    return get_soup_at_url(cls.url).find('a', class_='comic-nav-base comic-nav-first')
306
307
308
@classmethod
309
def simulate_first_link(cls):
310
    """Implementation of get_first_comic_link creating a link-like object from
311
    an URL provided by the class.
312
313
    Note: The first URL can easily be found using :
314
    `get_first_comic_link = navigate_to_first_comic`.
315
    """
316
    return {'href': cls.first_url}
317
318
319
@classmethod
320
def navigate_to_first_comic(cls):
321
    """Implementation of get_first_comic_link navigating from a user provided
322
    URL to the first comic.
323
324
    Sometimes, the first comic cannot be reached directly so to start
325
    from the first comic one has to go to the previous comic until
326
    there is no previous comics. Once this URL is reached, it
327
    is better to hardcode it but for development purposes, it
328
    is convenient to have an automatic way to find it.
329
330
    Then, the URL found can easily be used via `simulate_first_link`.
331
    """
332
    url = input("Get starting URL: ")
333
    print(url)
334
    comic = cls.get_prev_link(get_soup_at_url(url))
335
    while comic:
336
        url = cls.get_url_from_link(comic)
337
        print(url)
338
        comic = cls.get_prev_link(get_soup_at_url(url))
339
    return {'href': url}
340
341
342
class GenericEmptyComic(GenericComic):
343
    """Generic class for comics where nothing is to be done.
344
345
    It can be useful to deactivate temporarily comics that do not work
346
    properly by replacing `def MyComic(GenericWhateverComic)` with
347
    `def MyComic(GenericEmptyComic, GenericWhateverComic)`."""
348
    _categories = ('EMPTY', )
349
350
    @classmethod
351
    def get_next_comic(cls, last_comic):
352
        """Implementation of get_next_comic returning no comics."""
353
        cls.log("comic is considered as empty - returning no comic")
354
        return []
355
356
357 View Code Duplication
class ExtraFabulousComics(GenericNavigableComic):
358
    """Class to retrieve Extra Fabulous Comics."""
359
    name = 'efc'
360
    long_name = 'Extra Fabulous Comics'
361
    url = 'http://extrafabulouscomics.com'
362
    get_first_comic_link = get_a_navi_navifirst
363
    get_navi_link = get_link_rel_next
364
365
    @classmethod
366
    def get_comic_info(cls, soup, link):
367
        """Get information about a particular comics."""
368
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
369
        imgs = soup.find_all('img', src=img_src_re)
370
        title = soup.find('meta', property='og:title')['content']
371
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
372
        day = string_to_date(date_str, "%Y-%m-%d")
373
        return {
374
            'title': title,
375
            'img': [i['src'] for i in imgs],
376
            'month': day.month,
377
            'year': day.year,
378
            'day': day.day,
379
            'prefix': title + '-'
380
        }
381
382
383 View Code Duplication
class GenericLeMondeBlog(GenericNavigableComic):
384
    """Generic class to retrieve comics from Le Monde blogs."""
385
    _categories = ('LEMONDE', 'FRANCAIS')
386
    get_navi_link = get_link_rel_next
387
    get_first_comic_link = simulate_first_link
388
    first_url = NotImplemented
389
390
    @classmethod
391
    def get_comic_info(cls, soup, link):
392
        """Get information about a particular comics."""
393
        url2 = soup.find('link', rel='shortlink')['href']
394
        title = soup.find('meta', property='og:title')['content']
395
        date_str = soup.find("span", class_="entry-date").string
396
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
397
        imgs = soup.find_all('meta', property='og:image')
398
        return {
399
            'title': title,
400
            'url2': url2,
401
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
402
            'month': day.month,
403
            'year': day.year,
404
            'day': day.day,
405
        }
406
407
408
class ZepWorld(GenericLeMondeBlog):
409
    """Class to retrieve Zep World comics."""
410
    name = "zep"
411
    long_name = "Zep World"
412
    url = "http://zepworld.blog.lemonde.fr"
413
    first_url = "http://zepworld.blog.lemonde.fr/2014/10/31/bientot-le-blog-de-zep/"
414
415
416
class Vidberg(GenericLeMondeBlog):
417
    """Class to retrieve Vidberg comics."""
418
    name = 'vidberg'
419
    long_name = "Vidberg - l'actu en patates"
420
    url = "http://vidberg.blog.lemonde.fr"
421
    # Not the first but I didn't find an efficient way to retrieve it
422
    first_url = "http://vidberg.blog.lemonde.fr/2012/02/09/revue-de-campagne-la-campagne-du-modem-semballe/"
423
424
425
class Plantu(GenericLeMondeBlog):
426
    """Class to retrieve Plantu comics."""
427
    name = 'plantu'
428
    long_name = "Plantu"
429
    url = "http://plantu.blog.lemonde.fr"
430
    first_url = "http://plantu.blog.lemonde.fr/2014/10/28/stress-test-a-bruxelles/"
431
432
433
class XavierGorce(GenericLeMondeBlog):
434
    """Class to retrieve Xavier Gorce comics."""
435
    name = 'gorce'
436
    long_name = "Xavier Gorce"
437
    url = "http://xaviergorce.blog.lemonde.fr"
438
    first_url = "http://xaviergorce.blog.lemonde.fr/2015/01/09/distinction/"
439
440
441
class CartooningForPeace(GenericLeMondeBlog):
442
    """Class to retrieve Cartooning For Peace comics."""
443
    name = 'forpeace'
444
    long_name = "Cartooning For Peace"
445
    url = "http://cartooningforpeace.blog.lemonde.fr"
446
    first_url = "http://cartooningforpeace.blog.lemonde.fr/2014/12/15/bado/"
447
448
449
class Aurel(GenericLeMondeBlog):
450
    """Class to retrieve Aurel comics."""
451
    name = 'aurel'
452
    long_name = "Aurel"
453
    url = "http://aurel.blog.lemonde.fr"
454
    first_url = "http://aurel.blog.lemonde.fr/2014/09/29/le-senat-repasse-a-droite/"
455
456
457
class LesCulottees(GenericLeMondeBlog):
458
    """Class to retrieve Les Culottees comics."""
459
    name = 'culottees'
460
    long_name = 'Les Culottees'
461
    url = "http://lesculottees.blog.lemonde.fr"
462
    first_url = "http://lesculottees.blog.lemonde.fr/2016/01/11/clementine-delait-femme-a-barbe/"
463
464
465
class UneAnneeAuLycee(GenericLeMondeBlog):
466
    """Class to retrieve Une Annee Au Lycee comics."""
467
    name = 'lycee'
468
    long_name = 'Une Annee au Lycee'
469
    url = 'http://uneanneeaulycee.blog.lemonde.fr'
470
    first_url = "http://uneanneeaulycee.blog.lemonde.fr/2016/06/13/la-semaine-du-bac-est-arrivee/"
471
472
473 View Code Duplication
class Rall(GenericNavigableComic):
474
    """Class to retrieve Ted Rall comics."""
475
    # Also on http://www.gocomics.com/tedrall
476
    name = 'rall'
477
    long_name = "Ted Rall"
478
    url = "http://rall.com/comic"
479
    _categories = ('RALL', )
480
    get_navi_link = get_link_rel_next
481
    get_first_comic_link = simulate_first_link
482
    # Not the first but I didn't find an efficient way to retrieve it
483
    first_url = "http://rall.com/2014/01/30/los-angeles-times-cartoon-well-miss-those-california-flowers"
484
485
    @classmethod
486
    def get_comic_info(cls, soup, link):
487
        """Get information about a particular comics."""
488
        title = soup.find('meta', property='og:title')['content']
489
        author = soup.find("span", class_="author vcard").find("a").string
490
        date_str = soup.find("span", class_="entry-date").string
491
        day = string_to_date(date_str, "%B %d, %Y")
492
        desc = soup.find('meta', property='og:description')['content']
493
        imgs = soup.find('div', class_='entry-content').find_all('img')
494
        imgs = imgs[:-7]  # remove social media buttons
495
        return {
496
            'title': title,
497
            'author': author,
498
            'month': day.month,
499
            'year': day.year,
500
            'day': day.day,
501
            'description': desc,
502
            'img': [i['src'] for i in imgs],
503
        }
504
505
506
class Dilem(GenericNavigableComic):
507
    """Class to retrieve Ali Dilem comics."""
508
    name = 'dilem'
509
    long_name = 'Ali Dilem'
510
    url = 'http://information.tv5monde.com/dilem'
511
    _categories = ('FRANCAIS', )
512
    get_url_from_link = join_cls_url_to_href
513
    get_first_comic_link = simulate_first_link
514
    first_url = "http://information.tv5monde.com/dilem/2004-06-26"
515
516
    @classmethod
517
    def get_navi_link(cls, last_soup, next_):
518
        """Get link to next or previous comic."""
519
        # prev is next / next is prev
520
        li = last_soup.find('li', class_='prev' if next_ else 'next')
521
        return li.find('a') if li else None
522
523
    @classmethod
524
    def get_comic_info(cls, soup, link):
525
        """Get information about a particular comics."""
526
        short_url = soup.find('link', rel='shortlink')['href']
527
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
528
        imgs = soup.find_all('meta', property='og:image')
529
        date_str = soup.find('span', property='dc:date')['content']
530
        date_str = date_str[:10]
531
        day = string_to_date(date_str, "%Y-%m-%d")
532
        return {
533
            'short_url': short_url,
534
            'title': title,
535
            'img': [i['content'] for i in imgs],
536
            'day': day.day,
537
            'month': day.month,
538
            'year': day.year,
539
        }
540
541
542
class SpaceAvalanche(GenericNavigableComic):
543
    """Class to retrieve Space Avalanche comics."""
544
    name = 'avalanche'
545
    long_name = 'Space Avalanche'
546
    url = 'http://www.spaceavalanche.com'
547
    get_navi_link = get_link_rel_next
548
549
    @classmethod
550
    def get_first_comic_link(cls):
551
        """Get link to first comics."""
552
        return {'href': "http://www.spaceavalanche.com/2009/02/02/irish-sea/", 'title': "Irish Sea"}
553
554
    @classmethod
555
    def get_comic_info(cls, soup, link):
556
        """Get information about a particular comics."""
557
        url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
558
        title = link['title']
559
        url = cls.get_url_from_link(link)
560
        year, month, day = [int(s)
561
                            for s in url_date_re.match(url).groups()]
562
        imgs = soup.find("div", class_="entry").find_all("img")
563
        return {
564
            'title': title,
565
            'day': day,
566
            'month': month,
567
            'year': year,
568
            'img': [i['src'] for i in imgs],
569
        }
570
571
572
class ZenPencils(GenericNavigableComic):
573
    """Class to retrieve ZenPencils comics."""
574
    # Also on http://zenpencils.tumblr.com
575
    # Also on http://www.gocomics.com/zen-pencils
576
    name = 'zenpencils'
577
    long_name = 'Zen Pencils'
578
    url = 'http://zenpencils.com'
579
    _categories = ('ZENPENCILS', )
580
    get_navi_link = get_link_rel_next
581
    get_first_comic_link = simulate_first_link
582
    first_url = "http://zenpencils.com/comic/1-ralph-waldo-emerson-make-them-cry/"
583
584
    @classmethod
585
    def get_comic_info(cls, soup, link):
586
        """Get information about a particular comics."""
587
        imgs = soup.find('div', id='comic').find_all('img')
588
        # imgs2 = soup.find_all('meta', property='og:image')
589
        post = soup.find('div', class_='post-content')
590
        author = post.find("span", class_="post-author").find("a").string
591
        title = soup.find('meta', property='og:title')['content']
592
        date_str = post.find('span', class_='post-date').string
593
        day = string_to_date(date_str, "%B %d, %Y")
594
        assert imgs
595
        assert all(i['alt'] == i['title'] for i in imgs)
596
        assert all(i['alt'] in (title, "") for i in imgs)
597
        desc = soup.find('meta', property='og:description')['content']
598
        return {
599
            'title': title,
600
            'description': desc,
601
            'author': author,
602
            'day': day.day,
603
            'month': day.month,
604
            'year': day.year,
605
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
606
        }
607
608
609
class ItsTheTie(GenericEmptyComic, GenericNavigableComic):
610
    """Class to retrieve It's the tie comics."""
611
    # Also on http://itsthetie.tumblr.com
612
    # Also on https://tapastic.com/series/itsthetie
613
    name = 'tie'
614
    long_name = "It's the tie"
615
    url = "http://itsthetie.com"
616
    _categories = ('TIE', )
617
    get_first_comic_link = get_div_navfirst_a
618
    get_navi_link = get_a_rel_next
619
620
    @classmethod
621
    def get_comic_info(cls, soup, link):
622
        """Get information about a particular comics."""
623
        title = soup.find('h1', class_='comic-title').find('a').string
624
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
625
        day = string_to_date(date_str, "%B %d, %Y")
626
        # Bonus images may or may not be in meta og:image.
627
        imgs = soup.find_all('meta', property='og:image')
628
        imgs_src = [i['content'] for i in imgs]
629
        bonus = soup.find_all('img', attrs={'data-oversrc': True})
630
        bonus_src = [b['data-oversrc'] for b in bonus]
631
        all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src]
632
        all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")]
633
        tag_meta = soup.find('meta', property='article:tag')
634
        tags = tag_meta['content'] if tag_meta else ""
635
        return {
636
            'title': title,
637
            'month': day.month,
638
            'year': day.year,
639
            'day': day.day,
640
            'img': all_imgs_src,
641
            'tags': tags,
642
        }
643
644
645 View Code Duplication
class PenelopeBagieu(GenericNavigableComic):
646
    """Class to retrieve comics from Penelope Bagieu's blog."""
647
    name = 'bagieu'
648
    long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
649
    url = 'http://www.penelope-jolicoeur.com'
650
    _categories = ('FRANCAIS', )
651
    get_navi_link = get_link_rel_next
652
    get_first_comic_link = simulate_first_link
653
    first_url = 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'
654
655
    @classmethod
656
    def get_comic_info(cls, soup, link):
657
        """Get information about a particular comics."""
658
        date_str = soup.find('h2', class_='date-header').string
659
        day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
660
        imgs = soup.find('div', class_='entry-body').find_all('img')
661
        title = soup.find('h3', class_='entry-header').string
662
        return {
663
            'title': title,
664
            'img': [i['src'] for i in imgs],
665
            'month': day.month,
666
            'year': day.year,
667
            'day': day.day,
668
        }
669
670
671 View Code Duplication
class OneOneOneOneComic(GenericNavigableComic):
672
    """Class to retrieve 1111 Comics."""
673
    # Also on http://comics1111.tumblr.com
674
    # Also on https://tapastic.com/series/1111-Comics
675
    name = '1111'
676
    long_name = '1111 Comics'
677
    url = 'http://www.1111comics.me'
678
    _categories = ('ONEONEONEONE', )
679
    get_first_comic_link = get_div_navfirst_a
680
    get_navi_link = get_link_rel_next
681
682
    @classmethod
683
    def get_comic_info(cls, soup, link):
684
        """Get information about a particular comics."""
685
        title = soup.find('h1', class_='comic-title').find('a').string
686
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
687
        day = string_to_date(date_str, "%B %d, %Y")
688
        imgs = soup.find_all('meta', property='og:image')
689
        return {
690
            'title': title,
691
            'month': day.month,
692
            'year': day.year,
693
            'day': day.day,
694
            'img': [i['content'] for i in imgs],
695
        }
696
697
698 View Code Duplication
class AngryAtNothing(GenericEmptyComic, GenericNavigableComic):
699
    """Class to retrieve Angry at Nothing comics."""
700
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
701
    # Also on http://angryatnothing.tumblr.com
702
    name = 'angry'
703
    long_name = 'Angry At Nothing'
704
    url = 'http://www.angryatnothing.net'
705
    get_first_comic_link = get_div_navfirst_a
706
    get_navi_link = get_a_rel_next
707
708
    @classmethod
709
    def get_comic_info(cls, soup, link):
710
        """Get information about a particular comics."""
711
        title = soup.find('h1', class_='comic-title').find('a').string
712
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
713
        day = string_to_date(date_str, "%B %d, %Y")
714
        imgs = soup.find_all('meta', property='og:image')
715
        return {
716
            'title': title,
717
            'month': day.month,
718
            'year': day.year,
719
            'day': day.day,
720
            'img': [i['content'] for i in imgs],
721
        }
722
723
724
class NeDroid(GenericNavigableComic):
725
    """Class to retrieve NeDroid comics."""
726
    name = 'nedroid'
727
    long_name = 'NeDroid'
728
    url = 'http://nedroid.com'
729
    get_first_comic_link = get_div_navfirst_a
730 View Code Duplication
    get_navi_link = get_link_rel_next
731
    get_url_from_link = join_cls_url_to_href
732
733
    @classmethod
734
    def get_comic_info(cls, soup, link):
735
        """Get information about a particular comics."""
736
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
737
        short_url = cls.get_url_from_link(soup.find('link', rel='shortlink'))
738
        num = int(short_url_re.match(short_url).groups()[0])
739
        imgs = soup.find('div', id='comic').find_all('img')
740
        assert len(imgs) == 1
741
        title = imgs[0]['alt']
742
        title2 = imgs[0]['title']
743
        return {
744
            'short_url': short_url,
745
            'title': title,
746
            'title2': title2,
747
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
748
            'num': num,
749
        }
750
751
752
class Garfield(GenericNavigableComic):
753
    """Class to retrieve Garfield comics."""
754
    # Also on http://www.gocomics.com/garfield
755
    name = 'garfield'
756
    long_name = 'Garfield'
757
    url = 'https://garfield.com'
758
    _categories = ('GARFIELD', )
759
    get_first_comic_link = simulate_first_link
760
    first_url = 'https://garfield.com/comic/1978/06/19'
761
762
    @classmethod
763
    def get_navi_link(cls, last_soup, next_):
764
        """Get link to next or previous comic."""
765
        return last_soup.find('a', class_='comic-arrow-right' if next_ else 'comic-arrow-left')
766
767
    @classmethod
768
    def get_comic_info(cls, soup, link):
769
        """Get information about a particular comics."""
770
        url = cls.get_url_from_link(link)
771
        date_re = re.compile('^%s/comic/([0-9]*)/([0-9]*)/([0-9]*)' % cls.url)
772
        year, month, day = [int(s) for s in date_re.match(url).groups()]
773
        imgs = soup.find('div', class_='comic-display').find_all('img', class_='img-responsive')
774
        return {
775
            'month': month,
776
            'year': year,
777
            'day': day,
778
            'img': [i['src'] for i in imgs],
779
        }
780
781 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
782
class Dilbert(GenericNavigableComic):
783
    """Class to retrieve Dilbert comics."""
784
    # Also on http://www.gocomics.com/dilbert-classics
785
    name = 'dilbert'
786
    long_name = 'Dilbert'
787
    url = 'http://dilbert.com'
788
    get_url_from_link = join_cls_url_to_href
789
    get_first_comic_link = simulate_first_link
790
    first_url = 'http://dilbert.com/strip/1989-04-16'
791
792
    @classmethod
793
    def get_navi_link(cls, last_soup, next_):
794
        """Get link to next or previous comic."""
795
        link = last_soup.find('div', class_='nav-comic nav-right' if next_ else 'nav-comic nav-left')
796
        return link.find('a') if link else None
797
798
    @classmethod
799
    def get_comic_info(cls, soup, link):
800
        """Get information about a particular comics."""
801
        title = soup.find('meta', property='og:title')['content']
802
        imgs = soup.find_all('meta', property='og:image')
803
        desc = soup.find('meta', property='og:description')['content']
804
        date_str = soup.find('meta', property='article:publish_date')['content']
805
        day = string_to_date(date_str, "%B %d, %Y")
806
        author = soup.find('meta', property='article:author')['content']
807
        tags = soup.find('meta', property='article:tag')['content']
808
        return {
809
            'title': title,
810
            'description': desc,
811
            'img': [i['content'] for i in imgs],
812
            'author': author,
813
            'tags': tags,
814
            'day': day.day,
815
            'month': day.month,
816
            'year': day.year
817
        }
818
819
820
class VictimsOfCircumsolar(GenericEmptyComic, GenericNavigableComic):
821
    """Class to retrieve VictimsOfCircumsolar comics."""
822
    # Also on https://victimsofcomics.tumblr.com
823
    name = 'circumsolar'
824
    long_name = 'Victims Of Circumsolar'
825
    url = 'http://www.victimsofcircumsolar.com'
826
    get_navi_link = get_a_navi_comicnavnext_navinext
827
    get_first_comic_link = simulate_first_link
828
    first_url = 'http://www.victimsofcircumsolar.com/comic/modern-addiction'
829
830
    @classmethod
831
    def get_comic_info(cls, soup, link):
832
        """Get information about a particular comics."""
833
        # Date is on the archive page
834
        title = soup.find_all('meta', property='og:title')[-1]['content']
835
        desc = soup.find_all('meta', property='og:description')[-1]['content']
836
        imgs = soup.find('div', id='comic').find_all('img')
837
        assert all(i['title'] == i['alt'] == title for i in imgs)
838
        return {
839
            'title': title,
840
            'description': desc,
841
            'img': [i['src'] for i in imgs],
842
        }
843
844
845
class ThreeWordPhrase(GenericNavigableComic):
846
    """Class to retrieve Three Word Phrase comics."""
847
    # Also on http://www.threewordphrase.tumblr.com
848
    name = 'threeword'
849
    long_name = 'Three Word Phrase'
850
    url = 'http://threewordphrase.com'
851
    get_url_from_link = join_cls_url_to_href
852
853
    @classmethod
854
    def get_first_comic_link(cls):
855
        """Get link to first comics."""
856
        return get_soup_at_url(cls.url).find('img', src='/firstlink.gif').parent
857
858
    @classmethod
859
    def get_navi_link(cls, last_soup, next_):
860
        """Get link to next or previous comic."""
861
        link = last_soup.find('img', src='/nextlink.gif' if next_ else '/prevlink.gif').parent
862
        return None if link.get('href') is None else link
863
864
    @classmethod
865
    def get_comic_info(cls, soup, link):
866
        """Get information about a particular comics."""
867
        title = soup.find('title')
868
        imgs = [img for img in soup.find_all('img')
869
                if not img['src'].endswith(
870
                    ('link.gif', '32.png', 'twpbookad.jpg',
871
                     'merchad.jpg', 'header.gif', 'tipjar.jpg'))]
872
        return {
873
            'title': title.string if title else None,
874
            'title2': '  '.join(img.get('alt') for img in imgs if img.get('alt')),
875
            'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs],
876
        }
877
878
879
class DeadlyPanel(GenericEmptyComic, GenericNavigableComic):
880
    """Class to retrieve Deadly Panel comics."""
881
    # Also on https://tapastic.com/series/deadlypanel
882
    # Also on https://deadlypanel.tumblr.com
883
    name = 'deadly'
884
    long_name = 'Deadly Panel'
885
    url = 'http://www.deadlypanel.com'
886
    get_first_comic_link = get_a_navi_navifirst
887
    get_navi_link = get_a_navi_comicnavnext_navinext
888
889
    @classmethod
890
    def get_comic_info(cls, soup, link):
891
        """Get information about a particular comics."""
892
        imgs = soup.find('div', id='comic').find_all('img')
893
        assert all(i['alt'] == i['title'] for i in imgs)
894
        return {
895
            'img': [i['src'] for i in imgs],
896
        }
897
898
899 View Code Duplication
class TheGentlemanArmchair(GenericNavigableComic):
900
    """Class to retrieve The Gentleman Armchair comics."""
901
    name = 'gentlemanarmchair'
902
    long_name = 'The Gentleman Armchair'
903
    url = 'http://thegentlemansarmchair.com'
904
    get_first_comic_link = get_a_navi_navifirst
905
    get_navi_link = get_link_rel_next
906
907
    @classmethod
908
    def get_comic_info(cls, soup, link):
909
        """Get information about a particular comics."""
910
        title = soup.find('h2', class_='post-title').string
911
        author = soup.find("span", class_="post-author").find("a").string
912
        date_str = soup.find('span', class_='post-date').string
913
        day = string_to_date(date_str, "%B %d, %Y")
914
        imgs = soup.find('div', id='comic').find_all('img')
915
        return {
916
            'img': [i['src'] for i in imgs],
917
            'title': title,
918
            'author': author,
919
            'month': day.month,
920
            'year': day.year,
921
            'day': day.day,
922
        }
923
924
925 View Code Duplication
class ImogenQuest(GenericNavigableComic):
926
    """Class to retrieve Imogen Quest comics."""
927
    # Also on http://imoquest.tumblr.com
928
    name = 'imogen'
929
    long_name = 'Imogen Quest'
930
    url = 'http://imogenquest.net'
931
    get_first_comic_link = get_div_navfirst_a
932
    get_navi_link = get_a_rel_next
933
934
    @classmethod
935
    def get_comic_info(cls, soup, link):
936
        """Get information about a particular comics."""
937
        title = soup.find('h2', class_='post-title').string
938
        author = soup.find("span", class_="post-author").find("a").string
939
        date_str = soup.find('span', class_='post-date').string
940
        day = string_to_date(date_str, '%B %d, %Y')
941
        imgs = soup.find('div', class_='comicpane').find_all('img')
942
        assert all(i['alt'] == i['title'] for i in imgs)
943
        title2 = imgs[0]['title']
944
        return {
945
            'day': day.day,
946
            'month': day.month,
947
            'year': day.year,
948
            'img': [i['src'] for i in imgs],
949
            'title': title,
950
            'title2': title2,
951
            'author': author,
952
        }
953
954
955 View Code Duplication
class MyExtraLife(GenericNavigableComic):
956
    """Class to retrieve My Extra Life comics."""
957
    name = 'extralife'
958
    long_name = 'My Extra Life'
959
    url = 'http://www.myextralife.com'
960
    get_navi_link = get_link_rel_next
961
962
    @classmethod
963
    def get_first_comic_link(cls):
964
        """Get link to first comics."""
965
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
966
967
    @classmethod
968
    def get_comic_info(cls, soup, link):
969
        """Get information about a particular comics."""
970
        title = soup.find("h1", class_="comic_title").string
971
        date_str = soup.find("span", class_="comic_date").string
972
        day = string_to_date(date_str, "%B %d, %Y")
973
        imgs = soup.find_all("img", class_="comic")
974
        assert all(i['alt'] == i['title'] == title for i in imgs)
975
        return {
976
            'title': title,
977
            'img': [i['src'] for i in imgs if i["src"]],
978
            'day': day.day,
979
            'month': day.month,
980
            'year': day.year
981
        }
982
983
984
class SaturdayMorningBreakfastCereal(GenericNavigableComic):
985
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
986
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
987
    # Also on http://smbc-comics.tumblr.com
988
    name = 'smbc'
989
    long_name = 'Saturday Morning Breakfast Cereal'
990
    url = 'http://www.smbc-comics.com'
991
    _categories = ('SMBC', )
992
    get_navi_link = get_a_rel_next
993
994
    @classmethod
995
    def get_first_comic_link(cls):
996
        """Get link to first comics."""
997
        return get_soup_at_url(cls.url).find('a', rel='start')
998
999
    @classmethod
1000
    def get_comic_info(cls, soup, link):
1001
        """Get information about a particular comics."""
1002
        image1 = soup.find('img', id='cc-comic')
1003
        image_url1 = image1['src']
1004
        aftercomic = soup.find('div', id='aftercomic')
1005
        image_url2 = aftercomic.find('img')['src'] if aftercomic else ''
1006
        imgs = [image_url1] + ([image_url2] if image_url2 else [])
1007
        date_str = soup.find('div', class_='cc-publishtime').contents[0]
1008
        day = string_to_date(date_str, "%B %d, %Y")
1009
        return {
1010
            'title': image1['title'],
1011
            'img': [urljoin_wrapper(cls.url, i) for i in imgs],
1012
            'day': day.day,
1013
            'month': day.month,
1014
            'year': day.year
1015
        }
1016
1017
1018
class PerryBibleFellowship(GenericListableComic):
1019
    """Class to retrieve Perry Bible Fellowship comics."""
1020
    name = 'pbf'
1021
    long_name = 'Perry Bible Fellowship'
1022
    url = 'http://pbfcomics.com'
1023
    get_url_from_archive_element = join_cls_url_to_href
1024
1025
    @classmethod
1026
    def get_archive_elements(cls):
1027
        comic_link_re = re.compile('^/[0-9]*/$')
1028
        return reversed(get_soup_at_url(cls.url).find_all('a', href=comic_link_re))
1029
1030
    @classmethod
1031
    def get_comic_info(cls, soup, link):
1032
        """Get information about a particular comics."""
1033
        url = cls.get_url_from_archive_element(link)
1034
        comic_img_re = re.compile('^/archive_b/PBF.*')
1035
        name = link.string
1036
        num = int(link['name'])
1037
        href = link['href']
1038
        assert href == '/%d/' % num
1039
        imgs = soup.find_all('img', src=comic_img_re)
1040
        assert len(imgs) == 1
1041
        assert imgs[0]['alt'] == name
1042
        return {
1043
            'num': num,
1044
            'name': name,
1045
            'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1046
            'prefix': '%d-' % num,
1047
        }
1048
1049
1050
class Mercworks(GenericNavigableComic):
1051
    """Class to retrieve Mercworks comics."""
1052
    # Also on http://mercworks.tumblr.com
1053
    name = 'mercworks'
1054
    long_name = 'Mercworks'
1055
    url = 'http://mercworks.net'
1056
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1057
    get_navi_link = get_link_rel_next
1058
1059
    @classmethod
1060
    def get_comic_info(cls, soup, link):
1061
        """Get information about a particular comics."""
1062
        title = soup.find('meta', property='og:title')['content']
1063
        metadesc = soup.find('meta', property='og:description')
1064
        desc = metadesc['content'] if metadesc else ""
1065
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
1066
        day = string_to_date(date_str, "%Y-%m-%d")
1067
        imgs = soup.find_all('meta', property='og:image')
1068
        return {
1069
            'img': [i['content'] for i in imgs],
1070
            'title': title,
1071
            'desc': desc,
1072
            'day': day.day,
1073
            'month': day.month,
1074
            'year': day.year
1075
        }
1076
1077
1078
class BerkeleyMews(GenericListableComic):
1079
    """Class to retrieve Berkeley Mews comics."""
1080
    # Also on http://mews.tumblr.com
1081
    # Also on http://www.gocomics.com/berkeley-mews
1082
    name = 'berkeley'
1083
    long_name = 'Berkeley Mews'
1084
    url = 'http://www.berkeleymews.com'
1085
    _categories = ('BERKELEY', )
1086
    get_url_from_archive_element = get_href
1087
    comic_num_re = re.compile('%s/\\?p=([0-9]*)$' % url)
1088
1089
    @classmethod
1090
    def get_archive_elements(cls):
1091
        archive_url = urljoin_wrapper(cls.url, "?page_id=2")
1092
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_num_re))
1093
1094
    @classmethod
1095
    def get_comic_info(cls, soup, link):
1096
        """Get information about a particular comics."""
1097
        comic_date_re = re.compile('.*/([0-9]*)-([0-9]*)-([0-9]*)-.*')
1098
        url = cls.get_url_from_archive_element(link)
1099
        num = int(cls.comic_num_re.match(url).groups()[0])
1100
        img = soup.find('div', id='comic').find('img')
1101
        assert all(i['alt'] == i['title'] for i in [img])
1102
        title2 = img['title']
1103
        img_url = img['src']
1104
        year, month, day = [int(s) for s in comic_date_re.match(img_url).groups()]
1105
        return {
1106
            'num': num,
1107
            'title': link.string,
1108
            'title2': title2,
1109
            'img': [img_url],
1110
            'year': year,
1111
            'month': month,
1112
            'day': day,
1113
        }
1114
1115
1116
class GenericBouletCorp(GenericNavigableComic):
1117
    """Generic class to retrieve BouletCorp comics in different languages."""
1118
    # Also on https://bouletcorp.tumblr.com
1119
    _categories = ('BOULET', )
1120
    get_navi_link = get_link_rel_next
1121
1122
    @classmethod
1123
    def get_first_comic_link(cls):
1124
        """Get link to first comics."""
1125
        return get_soup_at_url(cls.url).find('div', id='centered_nav').find_all('a')[0]
1126
1127
    @classmethod
1128
    def get_comic_info(cls, soup, link):
1129
        """Get information about a particular comics."""
1130
        url = cls.get_url_from_link(link)
1131
        date_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/' % cls.url)
1132
        year, month, day = [int(s) for s in date_re.match(url).groups()]
1133
        imgs = soup.find('div', id='notes').find('div', class_='storycontent').find_all('img')
1134
        texts = '  '.join(t for t in (i.get('title') for i in imgs) if t)
1135
        title = soup.find('title').string
1136
        return {
1137
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs if i.get('src') is not None],
1138
            'title': title,
1139
            'texts': texts,
1140
            'year': year,
1141
            'month': month,
1142
            'day': day,
1143
        }
1144
1145
1146
class BouletCorp(GenericBouletCorp):
1147
    """Class to retrieve BouletCorp comics."""
1148
    name = 'boulet'
1149
    long_name = 'Boulet Corp'
1150
    url = 'http://www.bouletcorp.com'
1151
    _categories = ('FRANCAIS', )
1152
1153
1154
class BouletCorpEn(GenericBouletCorp):
1155
    """Class to retrieve EnglishBouletCorp comics."""
1156
    name = 'boulet_en'
1157
    long_name = 'Boulet Corp English'
1158
    url = 'http://english.bouletcorp.com'
1159
1160
1161 View Code Duplication
class AmazingSuperPowers(GenericNavigableComic):
1162
    """Class to retrieve Amazing Super Powers comics."""
1163
    name = 'asp'
1164
    long_name = 'Amazing Super Powers'
1165
    url = 'http://www.amazingsuperpowers.com'
1166
    get_first_comic_link = get_a_navi_navifirst
1167
    get_navi_link = get_a_navi_navinext
1168
1169
    @classmethod
1170
    def get_comic_info(cls, soup, link):
1171
        """Get information about a particular comics."""
1172
        author = soup.find("span", class_="post-author").find("a").string
1173
        date_str = soup.find('span', class_='post-date').string
1174
        day = string_to_date(date_str, "%B %d, %Y")
1175
        imgs = soup.find('div', id='comic').find_all('img')
1176
        title = ' '.join(i['title'] for i in imgs)
1177
        assert all(i['alt'] == i['title'] for i in imgs)
1178
        return {
1179
            'title': title,
1180
            'author': author,
1181
            'img': [img['src'] for img in imgs],
1182
            'day': day.day,
1183
            'month': day.month,
1184
            'year': day.year
1185
        }
1186
1187
1188
class ToonHole(GenericNavigableComic):
1189
    """Class to retrieve Toon Holes comics."""
1190
    # Also on http://tapastic.com/series/TOONHOLE
1191
    name = 'toonhole'
1192
    long_name = 'Toon Hole'
1193
    url = 'http://www.toonhole.com'
1194
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1195
    get_navi_link = get_link_rel_next
1196
1197
    @classmethod
1198
    def get_comic_info(cls, soup, link):
1199
        """Get information about a particular comics."""
1200
        short_url = soup.find('link', rel='shortlink')['href']
1201
        date_str = soup.find('div', class_='entry-meta').contents[0].strip()
1202
        day = string_to_date(date_str, "%B %d, %Y")
1203
        imgs = soup.find('div', id='comic').find_all('img')
1204
        if imgs:
1205
            img = imgs[0]
1206
            title = img['alt']
1207
            assert img['title'] == title
1208
        else:
1209
            title = ""
1210
        return {
1211
            'short_url': short_url,
1212
            'title': title,
1213
            'month': day.month,
1214
            'year': day.year,
1215
            'day': day.day,
1216
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs],
1217
        }
1218
1219
1220
class Channelate(GenericNavigableComic):
1221
    """Class to retrieve Channelate comics."""
1222
    name = 'channelate'
1223
    long_name = 'Channelate'
1224
    url = 'http://www.channelate.com'
1225
    get_first_comic_link = get_div_navfirst_a
1226
    get_navi_link = get_link_rel_next
1227
    get_url_from_link = join_cls_url_to_href
1228
1229
    @classmethod
1230
    def get_comic_info(cls, soup, link):
1231
        """Get information about a particular comics."""
1232
        author = soup.find("span", class_="post-author").find("a").string
1233
        date_str = soup.find('span', class_='post-date').string
1234
        day = string_to_date(date_str, '%Y/%m/%d')
1235
        title = soup.find('meta', property='og:title')['content']
1236
        post = soup.find('div', id='comic')
1237
        imgs = post.find_all('img') if post else []
1238
        extra_url = None
1239
        extra_div = soup.find('div', id='extrapanelbutton')
1240
        if extra_div:
1241
            extra_url = extra_div.find('a')['href']
1242
            extra_soup = get_soup_at_url(extra_url)
1243
            extra_imgs = extra_soup.find_all('img', class_='extrapanelimage')
1244
            imgs.extend(extra_imgs)
1245
        return {
1246
            'url_extra': extra_url,
1247
            'title': title,
1248
            'author': author,
1249
            'month': day.month,
1250
            'year': day.year,
1251
            'day': day.day,
1252
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1253
        }
1254
1255
1256
class CyanideAndHappiness(GenericNavigableComic):
1257
    """Class to retrieve Cyanide And Happiness comics."""
1258
    name = 'cyanide'
1259
    long_name = 'Cyanide and Happiness'
1260
    url = 'http://explosm.net'
1261
    _categories = ('NSFW', )
1262
    get_url_from_link = join_cls_url_to_href
1263
1264
    @classmethod
1265
    def get_first_comic_link(cls):
1266
        """Get link to first comics."""
1267
        return get_soup_at_url(cls.url).find('a', title='Oldest comic')
1268
1269
    @classmethod
1270
    def get_navi_link(cls, last_soup, next_):
1271
        """Get link to next or previous comic."""
1272
        link = last_soup.find('a', class_='next-comic' if next_ else 'previous-comic ')
1273
        return None if link.get('href') is None else link
1274
1275
    @classmethod
1276
    def get_comic_info(cls, soup, link):
1277
        """Get information about a particular comics."""
1278
        url2 = soup.find('meta', property='og:url')['content']
1279
        num = int(url2.split('/')[-2])
1280
        date_str = soup.find('h3').find('a').string
1281
        day = string_to_date(date_str, '%Y.%m.%d')
1282
        author = soup.find('small', class_="author-credit-name").string
1283
        assert author.startswith('by ')
1284
        author = author[3:]
1285
        imgs = soup.find_all('img', id='main-comic')
1286
        return {
1287
            'num': num,
1288
            'author': author,
1289
            'month': day.month,
1290
            'year': day.year,
1291
            'day': day.day,
1292
            'prefix': '%d-' % num,
1293
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i['src'])) for i in imgs]
1294
        }
1295
1296
1297
class MrLovenstein(GenericComic):
1298
    """Class to retrieve Mr Lovenstein comics."""
1299
    # Also on https://tapastic.com/series/MrLovenstein
1300
    name = 'mrlovenstein'
1301
    long_name = 'Mr. Lovenstein'
1302
    url = 'http://www.mrlovenstein.com'
1303
1304
    @classmethod
1305
    def get_next_comic(cls, last_comic):
1306
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1307
        # TODO: more info from http://www.mrlovenstein.com/archive
1308
        comic_num_re = re.compile('^/comic/([0-9]*)$')
1309
        nums = [int(comic_num_re.match(link['href']).groups()[0])
1310
                for link in get_soup_at_url(cls.url).find_all('a', href=comic_num_re)]
1311
        first, last = min(nums), max(nums)
1312
        if last_comic:
1313
            first = last_comic['num'] + 1
1314
        for num in range(first, last + 1):
1315
            url = urljoin_wrapper(cls.url, '/comic/%d' % num)
1316
            soup = get_soup_at_url(url)
1317
            imgs = list(
1318
                reversed(soup.find_all('img', src=re.compile('^/images/comics/'))))
1319
            description = soup.find('meta', attrs={'name': 'description'})['content']
1320
            yield {
1321
                'url': url,
1322
                'num': num,
1323
                'texts': '  '.join(t for t in (i.get('title') for i in imgs) if t),
1324
                'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1325
                'description': description,
1326
            }
1327
1328
1329
class DinosaurComics(GenericListableComic):
1330
    """Class to retrieve Dinosaur Comics comics."""
1331
    name = 'dinosaur'
1332
    long_name = 'Dinosaur Comics'
1333
    url = 'http://www.qwantz.com'
1334
    get_url_from_archive_element = get_href
1335
    comic_link_re = re.compile('^%s/index.php\\?comic=([0-9]*)$' % url)
1336
1337
    @classmethod
1338
    def get_archive_elements(cls):
1339
        archive_url = urljoin_wrapper(cls.url, 'archive.php')
1340
        # first link is random -> skip it
1341
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re)[1:])
1342
1343
    @classmethod
1344
    def get_comic_info(cls, soup, link):
1345
        """Get information about a particular comics."""
1346
        url = cls.get_url_from_archive_element(link)
1347
        num = int(cls.comic_link_re.match(url).groups()[0])
1348
        date_str = link.string
1349
        text = link.next_sibling.string
1350
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1351
        comic_img_re = re.compile('^%s/comics/' % cls.url)
1352
        img = soup.find('img', src=comic_img_re)
1353
        return {
1354
            'month': day.month,
1355
            'year': day.year,
1356
            'day': day.day,
1357
            'img': [img.get('src')],
1358
            'title': img.get('title'),
1359
            'text': text,
1360
            'num': num,
1361 View Code Duplication
        }
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1362
1363
1364
class ButterSafe(GenericListableComic):
1365
    """Class to retrieve Butter Safe comics."""
1366
    name = 'butter'
1367
    long_name = 'ButterSafe'
1368
    url = 'http://buttersafe.com'
1369
    get_url_from_archive_element = get_href
1370
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)
1371
1372
    @classmethod
1373
    def get_archive_elements(cls):
1374
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1375
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
1376
1377
    @classmethod
1378
    def get_comic_info(cls, soup, link):
1379
        """Get information about a particular comics."""
1380
        url = cls.get_url_from_archive_element(link)
1381
        title = link.string
1382
        year, month, day = [int(s) for s in cls.comic_link_re.match(url).groups()]
1383
        img = soup.find('div', id='comic').find('img')
1384
        assert img['alt'] == title
1385
        return {
1386
            'title': title,
1387
            'day': day,
1388
            'month': month,
1389
            'year': year,
1390
            'img': [img['src']],
1391
        }
1392
1393
1394
class CalvinAndHobbes(GenericComic):
1395
    """Class to retrieve Calvin and Hobbes comics."""
1396
    # Also on http://www.gocomics.com/calvinandhobbes/
1397
    name = 'calvin'
1398
    long_name = 'Calvin and Hobbes'
1399
    # This is not through any official webpage but eh...
1400
    url = 'http://marcel-oehler.marcellosendos.ch/comics/ch/'
1401
1402
    @classmethod
1403
    def get_next_comic(cls, last_comic):
1404
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1405
        last_date = get_date_for_comic(
1406
            last_comic) if last_comic else date(1985, 11, 1)
1407
        link_re = re.compile('^([0-9]*)/([0-9]*)/')
1408
        img_re = re.compile('')
1409
        for link in get_soup_at_url(cls.url).find_all('a', href=link_re):
1410
            url = link['href']
1411
            year, month = link_re.match(url).groups()
1412
            if date(int(year), int(month), 1) + timedelta(days=31) >= last_date:
1413
                img_re = re.compile('^%s%s([0-9]*)' % (year, month))
1414
                month_url = urljoin_wrapper(cls.url, url)
1415
                for img in get_soup_at_url(month_url).find_all('img', src=img_re):
1416
                    img_src = img['src']
1417
                    day = int(img_re.match(img_src).groups()[0])
1418
                    comic_date = date(int(year), int(month), day)
1419
                    if comic_date > last_date:
1420
                        yield {
1421
                            'url': month_url,
1422
                            'year': int(year),
1423
                            'month': int(month),
1424
                            'day': int(day),
1425
                            'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)],
1426
                        }
1427 View Code Duplication
                        last_date = comic_date
1428
1429
1430
class AbstruseGoose(GenericListableComic):
1431
    """Class to retrieve AbstruseGoose Comics."""
1432
    name = 'abstruse'
1433
    long_name = 'Abstruse Goose'
1434
    url = 'http://abstrusegoose.com'
1435
    get_url_from_archive_element = get_href
1436
    comic_url_re = re.compile('^%s/([0-9]*)$' % url)
1437
    comic_img_re = re.compile('^%s/strips/.*' % url)
1438
1439
    @classmethod
1440
    def get_archive_elements(cls):
1441
        archive_url = urljoin_wrapper(cls.url, 'archive')
1442
        return get_soup_at_url(archive_url).find_all('a', href=cls.comic_url_re)
1443
1444
    @classmethod
1445
    def get_comic_info(cls, soup, archive_elt):
1446
        comic_url = cls.get_url_from_archive_element(archive_elt)
1447
        num = int(cls.comic_url_re.match(comic_url).groups()[0])
1448
        return {
1449
            'num': num,
1450
            'title': archive_elt.string,
1451
            'img': [soup.find('img', src=cls.comic_img_re)['src']]
1452
        }
1453
1454
1455
class PhDComics(GenericNavigableComic):
1456
    """Class to retrieve PHD Comics."""
1457
    name = 'phd'
1458
    long_name = 'PhD Comics'
1459
    url = 'http://phdcomics.com/comics/archive.php'
1460
1461
    @classmethod
1462
    def get_first_comic_link(cls):
1463
        """Get link to first comics."""
1464
        soup = get_soup_at_url(cls.url)
1465
        img = soup.find('img', src='http://phdcomics.com/comics/images/first_button.gif')
1466
        return None if img is None else img.parent
1467
1468
    @classmethod
1469
    def get_navi_link(cls, last_soup, next_):
1470
        """Get link to next or previous comic."""
1471
        url = 'http://phdcomics.com/comics/images/%s_button.gif' % ('next' if next_ else 'prev')
1472
        img = last_soup.find('img', src=url)
1473
        return None if img is None else img.parent
1474
1475
    @classmethod
1476
    def get_comic_info(cls, soup, link):
1477
        """Get information about a particular comics."""
1478
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
1479
        imgs = soup.find_all('meta', property='og:image')
1480
        return {
1481
            'img': [i['content'] for i in imgs],
1482
            'title': title,
1483
        }
1484
1485
1486 View Code Duplication
class Octopuns(GenericEmptyComic, GenericNavigableComic):
1487
    """Class to retrieve Octopuns comics."""
1488
    # Also on http://octopuns.tumblr.com
1489
    name = 'octopuns'
1490
    long_name = 'Octopuns'
1491
    url = 'http://www.octopuns.net'
1492
1493
    @classmethod
1494
    def get_first_comic_link(cls):
1495
        """Get link to first comics."""
1496
        return get_soup_at_url(cls.url).find('img', src=re.compile('.*/First.png')).parent
1497
1498
    @classmethod
1499
    def get_navi_link(cls, last_soup, next_):
1500
        """Get link to next or previous comic."""
1501
        link = last_soup.find('img', src=re.compile('.*/Next.png' if next_ else '.*/Back.png')).parent
1502
        return None if link.get('href') is None else link
1503
1504
    @classmethod
1505
    def get_comic_info(cls, soup, link):
1506
        """Get information about a particular comics."""
1507
        title = soup.find('h3', class_='post-title entry-title').string
1508
        date_str = soup.find('h2', class_='date-header').string
1509
        day = string_to_date(date_str, "%A, %B %d, %Y")
1510
        imgs = soup.find_all('link', rel='image_src')
1511
        return {
1512
            'img': [i['href'] for i in imgs],
1513
            'title': title,
1514
            'day': day.day,
1515
            'month': day.month,
1516
            'year': day.year,
1517
        }
1518
1519
1520
class Quarktees(GenericNavigableComic):
1521
    """Class to retrieve the Quarktees comics."""
1522
    name = 'quarktees'
1523
    long_name = 'Quarktees'
1524
    url = 'http://www.quarktees.com/blogs/news'
1525
    get_url_from_link = join_cls_url_to_href
1526
    get_first_comic_link = simulate_first_link
1527
    first_url = 'http://www.quarktees.com/blogs/news/12486621-coming-soon'
1528
1529
    @classmethod
1530
    def get_navi_link(cls, last_soup, next_):
1531
        """Get link to next or previous comic."""
1532
        return last_soup.find('a', id='article-next' if next_ else 'article-prev')
1533
1534
    @classmethod
1535
    def get_comic_info(cls, soup, link):
1536
        """Get information about a particular comics."""
1537
        title = soup.find('meta', property='og:title')['content']
1538
        article = soup.find('div', class_='single-article')
1539
        imgs = article.find_all('img')
1540
        return {
1541
            'title': title,
1542
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1543
        }
1544
1545
1546
class OverCompensating(GenericNavigableComic):
1547
    """Class to retrieve the Over Compensating comics."""
1548
    name = 'compensating'
1549
    long_name = 'Over Compensating'
1550
    url = 'http://www.overcompensating.com'
1551
    get_url_from_link = join_cls_url_to_href
1552
1553
    @classmethod
1554
    def get_first_comic_link(cls):
1555
        """Get link to first comics."""
1556
        return get_soup_at_url(cls.url).find('a', href=re.compile('comic=1$'))
1557
1558
    @classmethod
1559
    def get_navi_link(cls, last_soup, next_):
1560
        """Get link to next or previous comic."""
1561
        return last_soup.find('a', title='next comic' if next_ else 'go back already')
1562
1563
    @classmethod
1564
    def get_comic_info(cls, soup, link):
1565
        """Get information about a particular comics."""
1566
        img_src_re = re.compile('^/oc/comics/.*')
1567
        comic_num_re = re.compile('.*comic=([0-9]*)$')
1568
        comic_url = cls.get_url_from_link(link)
1569
        num = int(comic_num_re.match(comic_url).groups()[0])
1570
        img = soup.find('img', src=img_src_re)
1571
        return {
1572
            'num': num,
1573
            'img': [urljoin_wrapper(comic_url, img['src'])],
1574
            'title': img.get('title')
1575
        }
1576
1577
1578
class Oglaf(GenericNavigableComic):
1579
    """Class to retrieve Oglaf comics."""
1580
    name = 'oglaf'
1581
    long_name = 'Oglaf [NSFW]'
1582
    url = 'http://oglaf.com'
1583
    _categories = ('NSFW', )
1584
    get_url_from_link = join_cls_url_to_href
1585
1586
    @classmethod
1587
    def get_first_comic_link(cls):
1588
        """Get link to first comics."""
1589
        return get_soup_at_url(cls.url).find("div", id="st").parent
1590
1591
    @classmethod
1592
    def get_navi_link(cls, last_soup, next_):
1593
        """Get link to next or previous comic."""
1594
        div = last_soup.find("div", id="nx" if next_ else "pvs")
1595
        return div.parent if div else None
1596
1597
    @classmethod
1598
    def get_comic_info(cls, soup, link):
1599
        """Get information about a particular comics."""
1600
        title = soup.find('title').string
1601
        title_imgs = soup.find('div', id='tt').find_all('img')
1602
        assert len(title_imgs) == 1
1603
        strip_imgs = soup.find_all('img', id='strip')
1604
        assert len(strip_imgs) == 1
1605
        imgs = title_imgs + strip_imgs
1606
        desc = ' '.join(i['title'] for i in imgs)
1607
        return {
1608
            'title': title,
1609
            'img': [i['src'] for i in imgs],
1610
            'description': desc,
1611
        }
1612
1613
1614
class ScandinaviaAndTheWorld(GenericNavigableComic):
1615
    """Class to retrieve Scandinavia And The World comics."""
1616
    name = 'satw'
1617
    long_name = 'Scandinavia And The World'
1618
    url = 'http://satwcomic.com'
1619
    get_first_comic_link = simulate_first_link
1620
    first_url = 'http://satwcomic.com/sweden-denmark-and-norway'
1621
1622
    @classmethod
1623
    def get_navi_link(cls, last_soup, next_):
1624
        """Get link to next or previous comic."""
1625
        return last_soup.find('a', accesskey='n' if next_ else 'p')
1626
1627
    @classmethod
1628
    def get_comic_info(cls, soup, link):
1629
        """Get information about a particular comics."""
1630
        title = soup.find('meta', attrs={'name': 'twitter:label1'})['content']
1631
        desc = soup.find('meta', property='og:description')['content']
1632
        imgs = soup.find_all('img', itemprop="image")
1633
        return {
1634
            'title': title,
1635
            'description': desc,
1636
            'img': [i['src'] for i in imgs],
1637
        }
1638
1639
1640
class SomethingOfThatIlk(GenericEmptyComic):  # Does not exist anymore
1641
    """Class to retrieve the Something Of That Ilk comics."""
1642
    name = 'somethingofthatilk'
1643
    long_name = 'Something Of That Ilk'
1644
    url = 'http://www.somethingofthatilk.com'
1645
1646
1647
class InfiniteMonkeyBusiness(GenericNavigableComic):
1648
    """Generic class to retrieve InfiniteMonkeyBusiness comics."""
1649
    name = 'monkey'
1650
    long_name = 'Infinite Monkey Business'
1651
    url = 'http://infinitemonkeybusiness.net'
1652
    get_navi_link = get_a_navi_comicnavnext_navinext
1653
    get_first_comic_link = simulate_first_link
1654
    first_url = 'http://infinitemonkeybusiness.net/comic/pillory/'
1655
1656
    @classmethod
1657
    def get_comic_info(cls, soup, link):
1658
        """Get information about a particular comics."""
1659
        title = soup.find('meta', property='og:title')['content']
1660
        imgs = soup.find('div', id='comic').find_all('img')
1661
        return {
1662
            'title': title,
1663
            'img': [i['src'] for i in imgs],
1664
        }
1665
1666
1667
class Wondermark(GenericListableComic):
1668
    """Class to retrieve the Wondermark comics."""
1669
    name = 'wondermark'
1670
    long_name = 'Wondermark'
1671
    url = 'http://wondermark.com'
1672
    get_url_from_archive_element = get_href
1673
1674
    @classmethod
1675
    def get_archive_elements(cls):
1676
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1677
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1678
1679
    @classmethod
1680
    def get_comic_info(cls, soup, link):
1681
        """Get information about a particular comics."""
1682
        date_str = soup.find('div', class_='postdate').find('em').string
1683
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1684
        div = soup.find('div', id='comic')
1685
        if div:
1686
            img = div.find('img')
1687
            img_src = [img['src']]
1688
            alt = img['alt']
1689
            assert alt == img['title']
1690
            title = soup.find('meta', property='og:title')['content']
1691
        else:
1692
            img_src = []
1693
            alt = ''
1694
            title = ''
1695
        return {
1696
            'month': day.month,
1697
            'year': day.year,
1698
            'day': day.day,
1699
            'img': img_src,
1700
            'title': title,
1701
            'alt': alt,
1702
            'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')),
1703
        }
1704
1705
1706
class WarehouseComic(GenericNavigableComic):
1707
    """Class to retrieve Warehouse Comic comics."""
1708
    name = 'warehouse'
1709
    long_name = 'Warehouse Comic'
1710
    url = 'http://warehousecomic.com'
1711
    get_first_comic_link = get_a_navi_navifirst
1712
    get_navi_link = get_link_rel_next
1713
1714
    @classmethod
1715
    def get_comic_info(cls, soup, link):
1716
        """Get information about a particular comics."""
1717
        title = soup.find('h2', class_='post-title').string
1718
        date_str = soup.find('span', class_='post-date').string
1719
        day = string_to_date(date_str, "%B %d, %Y")
1720
        imgs = soup.find('div', id='comic').find_all('img')
1721
        return {
1722
            'img': [i['src'] for i in imgs],
1723
            'title': title,
1724
            'day': day.day,
1725
            'month': day.month,
1726
            'year': day.year,
1727
        }
1728
1729
1730
class JustSayEh(GenericNavigableComic):
1731
    """Class to retrieve Just Say Eh comics."""
1732
    # Also on http//tapastic.com/series/Just-Say-Eh
1733
    name = 'justsayeh'
1734
    long_name = 'Just Say Eh'
1735
    url = 'http://www.justsayeh.com'
1736
    get_first_comic_link = get_a_navi_navifirst
1737
    get_navi_link = get_a_navi_comicnavnext_navinext
1738
1739
    @classmethod
1740
    def get_comic_info(cls, soup, link):
1741
        """Get information about a particular comics."""
1742
        title = soup.find('h2', class_='post-title').string
1743
        imgs = soup.find("div", id="comic").find_all("img")
1744
        assert all(i['alt'] == i['title'] for i in imgs)
1745
        alt = imgs[0]['alt']
1746
        return {
1747
            'img': [i['src'] for i in imgs],
1748
            'title': title,
1749
            'alt': alt,
1750
        }
1751
1752
1753 View Code Duplication
class MouseBearComedy(GenericNavigableComic):
1754
    """Class to retrieve Mouse Bear Comedy comics."""
1755
    # Also on http://mousebearcomedy.tumblr.com
1756
    name = 'mousebear'
1757
    long_name = 'Mouse Bear Comedy'
1758
    url = 'http://www.mousebearcomedy.com'
1759
    get_first_comic_link = get_a_navi_navifirst
1760
    get_navi_link = get_a_navi_comicnavnext_navinext
1761
1762
    @classmethod
1763
    def get_comic_info(cls, soup, link):
1764
        """Get information about a particular comics."""
1765
        title = soup.find('h2', class_='post-title').string
1766
        author = soup.find("span", class_="post-author").find("a").string
1767
        date_str = soup.find("span", class_="post-date").string
1768
        day = string_to_date(date_str, '%B %d, %Y')
1769
        imgs = soup.find("div", id="comic").find_all("img")
1770
        assert all(i['alt'] == i['title'] == title for i in imgs)
1771
        return {
1772
            'day': day.day,
1773
            'month': day.month,
1774
            'year': day.year,
1775
            'img': [i['src'] for i in imgs],
1776
            'title': title,
1777
            'author': author,
1778 View Code Duplication
        }
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1779
1780
1781
class BigFootJustice(GenericNavigableComic):
1782
    """Class to retrieve Big Foot Justice comics."""
1783
    # Also on http://tapastic.com/series/bigfoot-justice
1784
    name = 'bigfoot'
1785
    long_name = 'Big Foot Justice'
1786
    url = 'http://bigfootjustice.com'
1787
    get_first_comic_link = get_a_navi_navifirst
1788
    get_navi_link = get_a_navi_comicnavnext_navinext
1789
1790
    @classmethod
1791
    def get_comic_info(cls, soup, link):
1792
        """Get information about a particular comics."""
1793
        imgs = soup.find('div', id='comic').find_all('img')
1794
        assert all(i['title'] == i['alt'] for i in imgs)
1795
        title = ' '.join(i['title'] for i in imgs)
1796
        return {
1797
            'img': [i['src'] for i in imgs],
1798
            'title': title,
1799
        }
1800
1801
1802
class RespawnComic(GenericNavigableComic):
1803
    """Class to retrieve Respawn Comic."""
1804
    # Also on https://respawncomic.tumblr.com
1805
    name = 'respawn'
1806
    long_name = 'Respawn Comic'
1807
    url = 'http://respawncomic.com '
1808
    _categories = ('RESPAWN', )
1809
    get_navi_link = get_a_rel_next
1810
    get_first_comic_link = simulate_first_link
1811
    first_url = 'http://respawncomic.com/comic/c0001/'
1812
1813
    @classmethod
1814
    def get_comic_info(cls, soup, link):
1815
        """Get information about a particular comics."""
1816
        title = soup.find('meta', property='og:title')['content']
1817
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1818
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1819
        date_str = date_str[:10]
1820
        day = string_to_date(date_str, "%Y-%m-%d")
1821
        imgs = soup.find_all('meta', property='og:image')
1822
        skip_imgs = {
1823
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1824
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1825
        }
1826
        return {
1827
            'title': title,
1828
            'author': author,
1829
            'day': day.day,
1830
            'month': day.month,
1831
            'year': day.year,
1832
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1833
        }
1834
1835
1836 View Code Duplication
class SafelyEndangered(GenericNavigableComic):
1837
    """Class to retrieve Safely Endangered comics."""
1838
    # Also on http://tumblr.safelyendangered.com
1839
    name = 'endangered'
1840
    long_name = 'Safely Endangered'
1841
    url = 'http://www.safelyendangered.com'
1842
    get_navi_link = get_link_rel_next
1843
    get_first_comic_link = simulate_first_link
1844
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1845
1846
    @classmethod
1847
    def get_comic_info(cls, soup, link):
1848
        """Get information about a particular comics."""
1849
        title = soup.find('h2', class_='post-title').string
1850
        date_str = soup.find('span', class_='post-date').string
1851
        day = string_to_date(date_str, '%B %d, %Y')
1852
        imgs = soup.find('div', id='comic').find_all('img')
1853
        alt = imgs[0]['alt']
1854
        assert all(i['alt'] == i['title'] for i in imgs)
1855
        return {
1856
            'day': day.day,
1857
            'month': day.month,
1858
            'year': day.year,
1859
            'img': [i['src'] for i in imgs],
1860
            'title': title,
1861
            'alt': alt,
1862
        }
1863
1864
1865 View Code Duplication
class PicturesInBoxes(GenericNavigableComic):
1866
    """Class to retrieve Pictures In Boxes comics."""
1867
    # Also on https://picturesinboxescomic.tumblr.com
1868
    name = 'picturesinboxes'
1869
    long_name = 'Pictures in Boxes'
1870
    url = 'http://www.picturesinboxes.com'
1871
    get_navi_link = get_a_navi_navinext
1872
    get_first_comic_link = simulate_first_link
1873
    first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'
1874
1875
    @classmethod
1876
    def get_comic_info(cls, soup, link):
1877
        """Get information about a particular comics."""
1878
        title = soup.find('h2', class_='post-title').string
1879
        author = soup.find("span", class_="post-author").find("a").string
1880
        date_str = soup.find('span', class_='post-date').string
1881
        day = string_to_date(date_str, '%B %d, %Y')
1882
        imgs = soup.find('div', class_='comicpane').find_all('img')
1883
        assert imgs
1884
        assert all(i['title'] == i['alt'] == title for i in imgs)
1885
        return {
1886
            'day': day.day,
1887
            'month': day.month,
1888
            'year': day.year,
1889
            'img': [i['src'] for i in imgs],
1890
            'title': title,
1891
            'author': author,
1892
        }
1893
1894
1895 View Code Duplication
class Penmen(GenericNavigableComic):
1896
    """Class to retrieve Penmen comics."""
1897
    name = 'penmen'
1898
    long_name = 'Penmen'
1899
    url = 'http://penmen.com'
1900
    get_navi_link = get_link_rel_next
1901
    get_first_comic_link = simulate_first_link
1902
    first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/'
1903
1904
    @classmethod
1905
    def get_comic_info(cls, soup, link):
1906
        """Get information about a particular comics."""
1907
        title = soup.find('title').string
1908
        imgs = soup.find('div', class_='entry-content').find_all('img')
1909
        short_url = soup.find('link', rel='shortlink')['href']
1910
        tags = ' '.join(t.string for t in soup.find_all('a', rel='tag'))
1911
        date_str = soup.find('time')['datetime'][:10]
1912
        day = string_to_date(date_str, "%Y-%m-%d")
1913
        return {
1914
            'title': title,
1915
            'short_url': short_url,
1916
            'img': [i['src'] for i in imgs],
1917
            'tags': tags,
1918
            'month': day.month,
1919
            'year': day.year,
1920
            'day': day.day,
1921
        }
1922
1923
1924
class TheDoghouseDiaries(GenericNavigableComic):
1925
    """Class to retrieve The Dog House Diaries comics."""
1926
    name = 'doghouse'
1927
    long_name = 'The Dog House Diaries'
1928
    url = 'http://thedoghousediaries.com'
1929
1930
    @classmethod
1931
    def get_first_comic_link(cls):
1932
        """Get link to first comics."""
1933
        return get_soup_at_url(cls.url).find('a', id='firstlink')
1934
1935
    @classmethod
1936
    def get_navi_link(cls, last_soup, next_):
1937
        """Get link to next or previous comic."""
1938
        return last_soup.find('a', id='nextlink' if next_ else 'previouslink')
1939
1940
    @classmethod
1941
    def get_comic_info(cls, soup, link):
1942
        """Get information about a particular comics."""
1943
        comic_img_re = re.compile('^dhdcomics/.*')
1944
        img = soup.find('img', src=comic_img_re)
1945
        comic_url = cls.get_url_from_link(link)
1946
        return {
1947
            'title': soup.find('h2', id='titleheader').string,
1948
            'title2': soup.find('div', id='subtext').string,
1949
            'alt': img.get('title'),
1950
            'img': [urljoin_wrapper(comic_url, img['src'].strip())],
1951
            'num': int(comic_url.split('/')[-1]),
1952
        }
1953
1954
1955
class InvisibleBread(GenericListableComic):
1956
    """Class to retrieve Invisible Bread comics."""
1957
    # Also on http://www.gocomics.com/invisible-bread
1958
    name = 'invisiblebread'
1959
    long_name = 'Invisible Bread'
1960
    url = 'http://invisiblebread.com'
1961
1962
    @classmethod
1963
    def get_archive_elements(cls):
1964
        archive_url = urljoin_wrapper(cls.url, 'archives/')
1965
        return reversed(get_soup_at_url(archive_url).find_all('td', class_='archive-title'))
1966
1967
    @classmethod
1968 View Code Duplication
    def get_url_from_archive_element(cls, td):
1969
        return td.find('a')['href']
1970
1971
    @classmethod
1972
    def get_comic_info(cls, soup, td):
1973
        """Get information about a particular comics."""
1974
        url = cls.get_url_from_archive_element(td)
1975
        title = td.find('a').string
1976
        month_and_day = td.previous_sibling.string
1977
        link_re = re.compile('^%s/([0-9]+)/' % cls.url)
1978
        year = link_re.match(url).groups()[0]
1979
        date_str = month_and_day + ' ' + year
1980
        day = string_to_date(date_str, '%b %d %Y')
1981
        imgs = [soup.find('div', id='comic').find('img')]
1982
        assert len(imgs) == 1
1983
        assert all(i['title'] == i['alt'] == title for i in imgs)
1984
        return {
1985
            'month': day.month,
1986
            'year': day.year,
1987
            'day': day.day,
1988
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1989
            'title': title,
1990
        }
1991
1992
1993
class DiscoBleach(GenericEmptyComic):  # Does not work anymore
1994
    """Class to retrieve Disco Bleach Comics."""
1995
    name = 'discobleach'
1996
    long_name = 'Disco Bleach'
1997
    url = 'http://discobleach.com'
1998
1999
2000
class TubeyToons(GenericEmptyComic):  # Does not work anymore
2001
    """Class to retrieve TubeyToons comics."""
2002
    # Also on http://tapastic.com/series/Tubey-Toons
2003
    # Also on https://tubeytoons.tumblr.com
2004
    name = 'tubeytoons'
2005
    long_name = 'Tubey Toons'
2006
    url = 'http://tubeytoons.com'
2007
    _categories = ('TUNEYTOONS', )
2008
2009
2010 View Code Duplication
class CompletelySeriousComics(GenericNavigableComic):
2011
    """Class to retrieve Completely Serious comics."""
2012
    name = 'completelyserious'
2013
    long_name = 'Completely Serious Comics'
2014
    url = 'http://completelyseriouscomics.com'
2015
    get_first_comic_link = get_a_navi_navifirst
2016
    get_navi_link = get_a_navi_navinext
2017
2018
    @classmethod
2019
    def get_comic_info(cls, soup, link):
2020
        """Get information about a particular comics."""
2021
        title = soup.find('h2', class_='post-title').string
2022
        author = soup.find('span', class_='post-author').contents[1].string
2023
        date_str = soup.find('span', class_='post-date').string
2024
        day = string_to_date(date_str, '%B %d, %Y')
2025
        imgs = soup.find('div', class_='comicpane').find_all('img')
2026
        assert imgs
2027
        alt = imgs[0]['title']
2028
        assert all(i['title'] == i['alt'] == alt for i in imgs)
2029
        return {
2030
            'month': day.month,
2031
            'year': day.year,
2032
            'day': day.day,
2033
            'img': [i['src'] for i in imgs],
2034
            'title': title,
2035
            'alt': alt,
2036
            'author': author,
2037 View Code Duplication
        }
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2038
2039
2040
class PoorlyDrawnLines(GenericListableComic):
2041
    """Class to retrieve Poorly Drawn Lines comics."""
2042
    # Also on http://pdlcomics.tumblr.com
2043
    name = 'poorlydrawn'
2044
    long_name = 'Poorly Drawn Lines'
2045
    url = 'https://www.poorlydrawnlines.com'
2046
    _categories = ('POORLYDRAWN', )
2047
    get_url_from_archive_element = get_href
2048
2049
    @classmethod
2050
    def get_comic_info(cls, soup, link):
2051
        """Get information about a particular comics."""
2052
        imgs = soup.find('div', class_='post').find_all('img')
2053
        assert len(imgs) <= 1
2054
        return {
2055
            'img': [i['src'] for i in imgs],
2056
            'title': imgs[0].get('title', "") if imgs else "",
2057
        }
2058
2059
    @classmethod
2060
    def get_archive_elements(cls):
2061
        archive_url = urljoin_wrapper(cls.url, 'archive')
2062
        url_re = re.compile('^%s/comic/.' % cls.url)
2063
        return reversed(get_soup_at_url(archive_url).find_all('a', href=url_re))
2064
2065
2066 View Code Duplication
class LoadingComics(GenericNavigableComic):
2067
    """Class to retrieve Loading Artist comics."""
2068
    name = 'loadingartist'
2069
    long_name = 'Loading Artist'
2070
    url = 'http://www.loadingartist.com/latest'
2071
2072
    @classmethod
2073
    def get_first_comic_link(cls):
2074
        """Get link to first comics."""
2075
        return get_soup_at_url(cls.url).find('a', title="First")
2076
2077
    @classmethod
2078
    def get_navi_link(cls, last_soup, next_):
2079
        """Get link to next or previous comic."""
2080
        return last_soup.find('a', title='Next' if next_ else 'Previous')
2081
2082
    @classmethod
2083
    def get_comic_info(cls, soup, link):
2084
        """Get information about a particular comics."""
2085
        title = soup.find('h1').string
2086
        date_str = soup.find('span', class_='date').string.strip()
2087
        day = string_to_date(date_str, "%B %d, %Y")
2088
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
2089
        return {
2090
            'title': title,
2091
            'img': [i['src'] for i in imgs],
2092
            'month': day.month,
2093
            'year': day.year,
2094
            'day': day.day,
2095
        }
2096
2097
2098 View Code Duplication
class ChuckleADuck(GenericNavigableComic):
2099
    """Class to retrieve Chuckle-A-Duck comics."""
2100
    name = 'chuckleaduck'
2101
    long_name = 'Chuckle-A-duck'
2102
    url = 'http://chuckleaduck.com'
2103
    get_first_comic_link = get_div_navfirst_a
2104
    get_navi_link = get_link_rel_next
2105
2106
    @classmethod
2107
    def get_comic_info(cls, soup, link):
2108
        """Get information about a particular comics."""
2109
        date_str = soup.find('span', class_='post-date').string
2110
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
2111
        author = soup.find('span', class_='post-author').string
2112
        div = soup.find('div', id='comic')
2113
        imgs = div.find_all('img') if div else []
2114
        title = imgs[0]['title'] if imgs else ""
2115
        assert all(i['title'] == i['alt'] == title for i in imgs)
2116
        return {
2117
            'month': day.month,
2118
            'year': day.year,
2119
            'day': day.day,
2120
            'img': [i['src'] for i in imgs],
2121
            'title': title,
2122
            'author': author,
2123
        }
2124
2125
2126
class DepressedAlien(GenericNavigableComic):
2127
    """Class to retrieve Depressed Alien Comics."""
2128
    name = 'depressedalien'
2129
    long_name = 'Depressed Alien'
2130
    url = 'http://depressedalien.com'
2131
    get_url_from_link = join_cls_url_to_href
2132
2133
    @classmethod
2134
    def get_first_comic_link(cls):
2135
        """Get link to first comics."""
2136
        return get_soup_at_url(cls.url).find('img', attrs={'name': 'beginArrow'}).parent
2137
2138
    @classmethod
2139
    def get_navi_link(cls, last_soup, next_):
2140
        """Get link to next or previous comic."""
2141
        return last_soup.find('img', attrs={'name': 'rightArrow' if next_ else 'leftArrow'}).parent
2142
2143
    @classmethod
2144
    def get_comic_info(cls, soup, link):
2145
        """Get information about a particular comics."""
2146
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2147
        imgs = soup.find_all('meta', property='og:image')
2148
        return {
2149
            'title': title,
2150
            'img': [i['content'] for i in imgs],
2151
        }
2152
2153
2154
class ThingsInSquares(GenericListableComic):
2155
    """Class to retrieve Things In Squares comics."""
2156
    # This can be retrieved in other languages
2157
    # Also on https://tapastic.com/series/Things-in-Squares
2158
    name = 'squares'
2159
    long_name = 'Things in squares'
2160
    url = 'http://www.thingsinsquares.com'
2161
2162
    @classmethod
2163
    def get_comic_info(cls, soup, tr):
2164
        """Get information about a particular comics."""
2165
        _, td2, td3 = tr.find_all('td')
2166
        a = td2.find('a')
2167
        date_str = td3.string
2168
        day = string_to_date(date_str, "%m.%d.%y")
2169
        title = a.string
2170
        title2 = soup.find('meta', property='og:title')['content']
2171
        desc = soup.find('meta', property='og:description')
2172
        description = desc['content'] if desc else ''
2173
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2174
        imgs = soup.find('div', class_='entry-content').find_all('img')
2175
        return {
2176
            'day': day.day,
2177
            'month': day.month,
2178
            'year': day.year,
2179
            'title': title,
2180
            'title2': title2,
2181
            'description': description,
2182
            'tags': tags,
2183
            'img': [i['src'] for i in imgs],
2184
            'alt': ' '.join(i['alt'] for i in imgs),
2185
        }
2186
2187
    @classmethod
2188
    def get_url_from_archive_element(cls, tr):
2189
        _, td2, td3 = tr.find_all('td')
2190
        return td2.find('a')['href']
2191
2192
    @classmethod
2193
    def get_archive_elements(cls):
2194
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
2195
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2196
2197
2198 View Code Duplication
class HappleTea(GenericNavigableComic):
2199
    """Class to retrieve Happle Tea Comics."""
2200
    name = 'happletea'
2201
    long_name = 'Happle Tea'
2202
    url = 'http://www.happletea.com'
2203
    get_first_comic_link = get_a_navi_navifirst
2204
    get_navi_link = get_link_rel_next
2205
2206
    @classmethod
2207
    def get_comic_info(cls, soup, link):
2208
        """Get information about a particular comics."""
2209
        imgs = soup.find('div', id='comic').find_all('img')
2210
        post = soup.find('div', class_='post-content')
2211
        title = post.find('h2', class_='post-title').string
2212
        author = post.find('a', rel='author').string
2213
        date_str = post.find('span', class_='post-date').string
2214
        day = string_to_date(date_str, "%B %d, %Y")
2215
        assert all(i['alt'] == i['title'] for i in imgs)
2216
        return {
2217
            'title': title,
2218
            'img': [i['src'] for i in imgs],
2219
            'alt': ''.join(i['alt'] for i in imgs),
2220
            'month': day.month,
2221
            'year': day.year,
2222
            'day': day.day,
2223
            'author': author,
2224
        }
2225
2226
2227
class RockPaperScissors(GenericNavigableComic):
2228
    """Class to retrieve Rock Paper Scissors comics."""
2229
    name = 'rps'
2230
    long_name = 'Rock Paper Scissors'
2231
    url = 'http://rps-comics.com'
2232
    get_first_comic_link = get_a_navi_navifirst
2233
    get_navi_link = get_link_rel_next
2234
2235
    @classmethod
2236
    def get_comic_info(cls, soup, link):
2237
        """Get information about a particular comics."""
2238
        title = soup.find('title').string
2239
        imgs = soup.find_all('meta', property='og:image')
2240
        short_url = soup.find('link', rel='shortlink')['href']
2241
        transcript = soup.find('div', id='transcript-content').string
2242
        return {
2243
            'title': title,
2244
            'transcript': transcript,
2245
            'short_url': short_url,
2246
            'img': [i['content'] for i in imgs],
2247
        }
2248
2249
2250
class FatAwesomeComics(GenericNavigableComic):
2251
    """Class to retrieve Fat Awesome Comics."""
2252
    # Also on http://fatawesomecomedy.tumblr.com
2253
    name = 'fatawesome'
2254
    long_name = 'Fat Awesome'
2255
    url = 'http://fatawesome.com/comics'
2256
    get_navi_link = get_a_rel_next
2257
    get_first_comic_link = simulate_first_link
2258
    first_url = 'http://fatawesome.com/shortbus/'
2259
2260
    @classmethod
2261
    def get_comic_info(cls, soup, link):
2262
        """Get information about a particular comics."""
2263
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2264
        description = soup.find('meta', attrs={'name': 'description'})['content']
2265
        tags_prop = soup.find('meta', property='article:tag')
2266
        tags = tags_prop['content'] if tags_prop else ""
2267
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2268
        day = string_to_date(date_str, "%Y-%m-%d")
2269
        imgs = soup.find_all('img', attrs={'data-recalc-dims': "1"})
2270
        assert len(imgs) == 1
2271
        return {
2272
            'title': title,
2273
            'description': description,
2274
            'tags': tags,
2275
            'alt': "".join(i['alt'] for i in imgs),
2276
            'img': [i['src'].rsplit('?', 1)[0] for i in imgs],
2277
            'month': day.month,
2278
            'year': day.year,
2279
            'day': day.day,
2280
        }
2281
2282
2283
class AnythingComic(GenericListableComic):
2284
    """Class to retrieve Anything Comics."""
2285
    # Also on http://tapastic.com/series/anything
2286
    name = 'anythingcomic'
2287
    long_name = 'Anything Comic'
2288
    url = 'http://www.anythingcomic.com'
2289
2290
    @classmethod
2291
    def get_archive_elements(cls):
2292
        archive_url = urljoin_wrapper(cls.url, 'archive/')
2293
        # The first 2 <tr>'s do not correspond to comics
2294
        return get_soup_at_url(archive_url).find('table', id='chapter_table').find_all('tr')[2:]
2295
2296
    @classmethod
2297
    def get_url_from_archive_element(cls, tr):
2298
        """Get url corresponding to an archive element."""
2299
        td_num, td_comic, td_date, _ = tr.find_all('td')
2300 View Code Duplication
        link = td_comic.find('a')
2301
        return urljoin_wrapper(cls.url, link['href'])
2302
2303
    @classmethod
2304
    def get_comic_info(cls, soup, tr):
2305
        """Get information about a particular comics."""
2306
        td_num, td_comic, td_date, _ = tr.find_all('td')
2307
        num = int(td_num.string)
2308
        link = td_comic.find('a')
2309
        title = link.string
2310
        imgs = soup.find_all('img', id='comic_image')
2311
        date_str = td_date.string
2312
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y, %I:%M %p")
2313
        assert len(imgs) == 1
2314
        assert all(i.get('alt') == i.get('title') for i in imgs)
2315
        return {
2316
            'num': num,
2317
            'title': title,
2318
            'alt': imgs[0].get('alt', ''),
2319
            'img': [i['src'] for i in imgs],
2320
            'month': day.month,
2321
            'year': day.year,
2322
            'day': day.day,
2323
        }
2324
2325
2326 View Code Duplication
class LonnieMillsap(GenericNavigableComic):
2327
    """Class to retrieve Lonnie Millsap's comics."""
2328
    name = 'millsap'
2329
    long_name = 'Lonnie Millsap'
2330
    url = 'http://www.lonniemillsap.com'
2331
    get_navi_link = get_link_rel_next
2332
    get_first_comic_link = simulate_first_link
2333
    first_url = 'http://www.lonniemillsap.com/?p=42'
2334
2335
    @classmethod
2336
    def get_comic_info(cls, soup, link):
2337
        """Get information about a particular comics."""
2338
        title = soup.find('h2', class_='post-title').string
2339
        post = soup.find('div', class_='post-content')
2340
        author = post.find("span", class_="post-author").find("a").string
2341
        date_str = post.find("span", class_="post-date").string
2342
        day = string_to_date(date_str, "%B %d, %Y")
2343
        imgs = post.find("div", class_="entry").find_all("img")
2344
        return {
2345
            'title': title,
2346
            'author': author,
2347
            'img': [i['src'] for i in imgs],
2348
            'month': day.month,
2349
            'year': day.year,
2350
            'day': day.day,
2351
        }
2352
2353
2354 View Code Duplication
class LinsEditions(GenericNavigableComic):
2355
    """Class to retrieve L.I.N.S. Editions comics."""
2356
    # Also on https://linscomics.tumblr.com
2357
    # Now on https://warandpeas.com
2358
    name = 'lins'
2359
    long_name = 'L.I.N.S. Editions'
2360
    url = 'https://linsedition.com'
2361
    _categories = ('LINS', )
2362
    get_navi_link = get_link_rel_next
2363
    get_first_comic_link = simulate_first_link
2364
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2365
2366
    @classmethod
2367
    def get_comic_info(cls, soup, link):
2368
        """Get information about a particular comics."""
2369
        title = soup.find('meta', property='og:title')['content']
2370
        imgs = soup.find_all('meta', property='og:image')
2371
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2372
        day = string_to_date(date_str, "%Y-%m-%d")
2373
        return {
2374
            'title': title,
2375
            'img': [i['content'] for i in imgs],
2376
            'month': day.month,
2377
            'year': day.year,
2378
            'day': day.day,
2379
        }
2380
2381
2382
class ThorsThundershack(GenericNavigableComic):
2383
    """Class to retrieve Thor's Thundershack comics."""
2384
    # Also on http://tapastic.com/series/Thors-Thundershac
2385
    name = 'thor'
2386
    long_name = 'Thor\'s Thundershack'
2387
    url = 'http://www.thorsthundershack.com'
2388
    _categories = ('THOR', )
2389
    get_url_from_link = join_cls_url_to_href
2390
2391
    @classmethod
2392
    def get_first_comic_link(cls):
2393
        """Get link to first comics."""
2394
        return get_soup_at_url(cls.url).find('a', class_='first navlink')
2395
2396
    @classmethod
2397
    def get_navi_link(cls, last_soup, next_):
2398
        """Get link to next or previous comic."""
2399
        for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
2400
            if link['href'] != '/comic':
2401
                return link
2402
        return None
2403
2404
    @classmethod
2405
    def get_comic_info(cls, soup, link):
2406
        """Get information about a particular comics."""
2407
        title = soup.find('meta', attrs={'name': 'description'})["content"]
2408
        description = soup.find('div', itemprop='articleBody').text
2409
        author = soup.find('span', itemprop='author copyrightHolder').string
2410
        imgs = soup.find_all('img', itemprop='image')
2411
        assert all(i['title'] == i['alt'] for i in imgs)
2412
        alt = imgs[0]['alt'] if imgs else ""
2413
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
2414
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2415
        return {
2416
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2417
            'month': day.month,
2418
            'year': day.year,
2419
            'day': day.day,
2420
            'author': author,
2421
            'title': title,
2422
            'alt': alt,
2423
            'description': description,
2424
        }
2425
2426
2427
class GerbilWithAJetpack(GenericNavigableComic):
2428
    """Class to retrieve GerbilWithAJetpack comics."""
2429
    name = 'gerbil'
2430
    long_name = 'Gerbil With A Jetpack'
2431
    url = 'http://gerbilwithajetpack.com'
2432
    get_first_comic_link = get_a_navi_navifirst
2433
    get_navi_link = get_a_rel_next
2434
2435
    @classmethod
2436
    def get_comic_info(cls, soup, link):
2437
        """Get information about a particular comics."""
2438
        title = soup.find('h2', class_='post-title').string
2439
        author = soup.find("span", class_="post-author").find("a").string
2440
        date_str = soup.find("span", class_="post-date").string
2441
        day = string_to_date(date_str, "%B %d, %Y")
2442
        imgs = soup.find("div", id="comic").find_all("img")
2443
        alt = imgs[0]['alt']
2444
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2445
        return {
2446
            'img': [i['src'] for i in imgs],
2447
            'title': title,
2448
            'alt': alt,
2449
            'author': author,
2450
            'day': day.day,
2451
            'month': day.month,
2452
            'year': day.year
2453
        }
2454
2455
2456 View Code Duplication
class EveryDayBlues(GenericEmptyComic, GenericNavigableComic):
2457
    """Class to retrieve EveryDayBlues Comics."""
2458
    name = "blues"
2459
    long_name = "Every Day Blues"
2460
    url = "http://everydayblues.net"
2461
    get_first_comic_link = get_a_navi_navifirst
2462
    get_navi_link = get_link_rel_next
2463
2464
    @classmethod
2465
    def get_comic_info(cls, soup, link):
2466
        """Get information about a particular comics."""
2467
        title = soup.find("h2", class_="post-title").string
2468
        author = soup.find("span", class_="post-author").find("a").string
2469
        date_str = soup.find("span", class_="post-date").string
2470
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2471
        imgs = soup.find("div", id="comic").find_all("img")
2472
        assert all(i['alt'] == i['title'] == title for i in imgs)
2473
        assert len(imgs) <= 1
2474
        return {
2475
            'img': [i['src'] for i in imgs],
2476
            'title': title,
2477
            'author': author,
2478
            'day': day.day,
2479
            'month': day.month,
2480
            'year': day.year
2481
        }
2482
2483
2484 View Code Duplication
class BiterComics(GenericNavigableComic):
2485
    """Class to retrieve Biter Comics."""
2486
    name = "biter"
2487
    long_name = "Biter Comics"
2488
    url = "http://www.bitercomics.com"
2489
    get_first_comic_link = get_a_navi_navifirst
2490
    get_navi_link = get_link_rel_next
2491
2492
    @classmethod
2493
    def get_comic_info(cls, soup, link):
2494
        """Get information about a particular comics."""
2495
        title = soup.find("h1", class_="entry-title").string
2496
        author = soup.find("span", class_="author vcard").find("a").string
2497
        date_str = soup.find("span", class_="entry-date").string
2498
        day = string_to_date(date_str, "%B %d, %Y")
2499
        imgs = soup.find("div", id="comic").find_all("img")
2500
        assert all(i['alt'] == i['title'] for i in imgs)
2501
        assert len(imgs) == 1
2502
        alt = imgs[0]['alt']
2503
        return {
2504
            'img': [i['src'] for i in imgs],
2505
            'title': title,
2506
            'alt': alt,
2507
            'author': author,
2508
            'day': day.day,
2509
            'month': day.month,
2510
            'year': day.year
2511
        }
2512
2513
2514 View Code Duplication
class TheAwkwardYeti(GenericNavigableComic):
2515
    """Class to retrieve The Awkward Yeti comics."""
2516
    # Also on http://www.gocomics.com/the-awkward-yeti
2517
    # Also on http://larstheyeti.tumblr.com
2518
    # Also on https://tapastic.com/series/TheAwkwardYeti
2519
    name = 'yeti'
2520
    long_name = 'The Awkward Yeti'
2521
    url = 'http://theawkwardyeti.com'
2522
    _categories = ('YETI', )
2523
    get_first_comic_link = get_a_navi_navifirst
2524
    get_navi_link = get_link_rel_next
2525
2526
    @classmethod
2527
    def get_comic_info(cls, soup, link):
2528
        """Get information about a particular comics."""
2529
        title = soup.find('h2', class_='post-title').string
2530
        date_str = soup.find("span", class_="post-date").string
2531
        day = string_to_date(date_str, "%B %d, %Y")
2532
        imgs = soup.find("div", id="comic").find_all("img")
2533
        assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
2534
        return {
2535
            'img': [i['src'] for i in imgs],
2536
            'title': title,
2537
            'day': day.day,
2538
            'month': day.month,
2539
            'year': day.year
2540
        }
2541
2542
2543
class PleasantThoughts(GenericNavigableComic):
2544
    """Class to retrieve Pleasant Thoughts comics."""
2545
    name = 'pleasant'
2546
    long_name = 'Pleasant Thoughts'
2547
    url = 'http://pleasant-thoughts.com'
2548
    get_first_comic_link = get_a_navi_navifirst
2549
    get_navi_link = get_link_rel_next
2550
2551
    @classmethod
2552
    def get_comic_info(cls, soup, link):
2553
        """Get information about a particular comics."""
2554
        post = soup.find('div', class_='post-content')
2555
        title = post.find('h2', class_='post-title').string
2556
        imgs = post.find("div", class_="entry").find_all("img")
2557
        return {
2558
            'title': title,
2559
            'img': [i['src'] for i in imgs],
2560
        }
2561
2562
2563
class MisterAndMe(GenericNavigableComic):
2564
    """Class to retrieve Mister & Me Comics."""
2565
    # Also on http://www.gocomics.com/mister-and-me
2566
    # Also on https://tapastic.com/series/Mister-and-Me
2567
    name = 'mister'
2568
    long_name = 'Mister & Me'
2569
    url = 'http://www.mister-and-me.com'
2570
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2571
    get_navi_link = get_link_rel_next
2572
2573
    @classmethod
2574
    def get_comic_info(cls, soup, link):
2575
        """Get information about a particular comics."""
2576
        title = soup.find('h2', class_='post-title').string
2577
        author = soup.find("span", class_="post-author").find("a").string
2578
        date_str = soup.find("span", class_="post-date").string
2579
        day = string_to_date(date_str, "%B %d, %Y")
2580
        imgs = soup.find("div", id="comic").find_all("img")
2581
        assert all(i['alt'] == i['title'] for i in imgs)
2582
        assert len(imgs) <= 1
2583
        alt = imgs[0]['alt'] if imgs else ""
2584
        return {
2585
            'img': [i['src'] for i in imgs],
2586
            'title': title,
2587
            'alt': alt,
2588
            'author': author,
2589
            'day': day.day,
2590
            'month': day.month,
2591
            'year': day.year
2592
        }
2593
2594
2595 View Code Duplication
class LastPlaceComics(GenericNavigableComic):
2596
    """Class to retrieve Last Place Comics."""
2597
    name = 'lastplace'
2598
    long_name = 'Last Place Comics'
2599
    url = "http://lastplacecomics.com"
2600
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2601
    get_navi_link = get_link_rel_next
2602
2603
    @classmethod
2604
    def get_comic_info(cls, soup, link):
2605
        """Get information about a particular comics."""
2606
        title = soup.find('h2', class_='post-title').string
2607
        author = soup.find("span", class_="post-author").find("a").string
2608
        date_str = soup.find("span", class_="post-date").string
2609
        day = string_to_date(date_str, "%B %d, %Y")
2610
        imgs = soup.find("div", id="comic").find_all("img")
2611
        assert all(i['alt'] == i['title'] for i in imgs)
2612
        assert len(imgs) <= 1
2613
        alt = imgs[0]['alt'] if imgs else ""
2614
        return {
2615
            'img': [i['src'] for i in imgs],
2616
            'title': title,
2617
            'alt': alt,
2618
            'author': author,
2619
            'day': day.day,
2620
            'month': day.month,
2621
            'year': day.year
2622
        }
2623
2624
2625 View Code Duplication
class TalesOfAbsurdity(GenericNavigableComic):
2626
    """Class to retrieve Tales Of Absurdity comics."""
2627
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
2628
    # Also on http://talesofabsurdity.tumblr.com
2629
    name = 'absurdity'
2630
    long_name = 'Tales of Absurdity'
2631
    url = 'http://talesofabsurdity.com'
2632
    _categories = ('ABSURDITY', )
2633
    get_first_comic_link = get_a_navi_navifirst
2634
    get_navi_link = get_a_navi_comicnavnext_navinext
2635
2636
    @classmethod
2637
    def get_comic_info(cls, soup, link):
2638
        """Get information about a particular comics."""
2639
        title = soup.find('h2', class_='post-title').string
2640
        author = soup.find("span", class_="post-author").find("a").string
2641
        date_str = soup.find("span", class_="post-date").string
2642
        day = string_to_date(date_str, "%B %d, %Y")
2643
        imgs = soup.find("div", id="comic").find_all("img")
2644
        assert all(i['alt'] == i['title'] for i in imgs)
2645
        alt = imgs[0]['alt'] if imgs else ""
2646
        return {
2647
            'img': [i['src'] for i in imgs],
2648
            'title': title,
2649
            'alt': alt,
2650
            'author': author,
2651
            'day': day.day,
2652
            'month': day.month,
2653
            'year': day.year
2654
        }
2655
2656
2657 View Code Duplication
class EndlessOrigami(GenericEmptyComic, GenericNavigableComic):
2658
    """Class to retrieve Endless Origami Comics."""
2659
    name = "origami"
2660
    long_name = "Endless Origami"
2661
    url = "http://endlessorigami.com"
2662
    get_first_comic_link = get_a_navi_navifirst
2663
    get_navi_link = get_link_rel_next
2664
2665
    @classmethod
2666
    def get_comic_info(cls, soup, link):
2667
        """Get information about a particular comics."""
2668
        title = soup.find('h2', class_='post-title').string
2669
        author = soup.find("span", class_="post-author").find("a").string
2670
        date_str = soup.find("span", class_="post-date").string
2671
        day = string_to_date(date_str, "%B %d, %Y")
2672
        imgs = soup.find("div", id="comic").find_all("img")
2673
        assert all(i['alt'] == i['title'] for i in imgs)
2674
        alt = imgs[0]['alt'] if imgs else ""
2675
        return {
2676
            'img': [i['src'] for i in imgs],
2677
            'title': title,
2678
            'alt': alt,
2679
            'author': author,
2680
            'day': day.day,
2681
            'month': day.month,
2682
            'year': day.year
2683
        }
2684
2685
2686 View Code Duplication
class PlanC(GenericNavigableComic):
2687
    """Class to retrieve Plan C comics."""
2688
    name = 'planc'
2689
    long_name = 'Plan C'
2690
    url = 'http://www.plancomic.com'
2691
    get_first_comic_link = get_a_navi_navifirst
2692
    get_navi_link = get_a_navi_comicnavnext_navinext
2693
2694
    @classmethod
2695
    def get_comic_info(cls, soup, link):
2696
        """Get information about a particular comics."""
2697
        title = soup.find('h2', class_='post-title').string
2698
        date_str = soup.find("span", class_="post-date").string
2699
        day = string_to_date(date_str, "%B %d, %Y")
2700
        imgs = soup.find('div', id='comic').find_all('img')
2701
        return {
2702
            'title': title,
2703
            'img': [i['src'] for i in imgs],
2704
            'month': day.month,
2705
            'year': day.year,
2706
            'day': day.day,
2707 View Code Duplication
        }
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2708
2709
2710
class BuniComic(GenericNavigableComic):
2711
    """Class to retrieve Buni Comics."""
2712
    name = 'buni'
2713
    long_name = 'BuniComics'
2714
    url = 'http://www.bunicomic.com'
2715
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2716
    get_navi_link = get_link_rel_next
2717
2718
    @classmethod
2719
    def get_comic_info(cls, soup, link):
2720
        """Get information about a particular comics."""
2721
        imgs = soup.find('div', id='comic').find_all('img')
2722
        assert all(i['alt'] == i['title'] for i in imgs)
2723
        assert len(imgs) == 1
2724
        return {
2725
            'img': [i['src'] for i in imgs],
2726
            'title': imgs[0]['title'],
2727
        }
2728
2729
2730
class GenericCommitStrip(GenericNavigableComic):
2731
    """Generic class to retrieve Commit Strips in different languages."""
2732
    get_navi_link = get_a_rel_next
2733
    get_first_comic_link = simulate_first_link
2734
    first_url = NotImplemented
2735
2736
    @classmethod
2737
    def get_comic_info(cls, soup, link):
2738
        """Get information about a particular comics."""
2739
        desc = soup.find('meta', property='og:description')['content']
2740
        title = soup.find('meta', property='og:title')['content']
2741
        imgs = soup.find('div', class_='entry-content').find_all('img')
2742
        title2 = ' '.join(i.get('title', '') for i in imgs)
2743
        return {
2744
            'title': title,
2745
            'title2': title2,
2746
            'description': desc,
2747
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2748
        }
2749
2750
2751
class CommitStripFr(GenericCommitStrip):
2752
    """Class to retrieve Commit Strips in French."""
2753
    name = 'commit_fr'
2754
    long_name = 'Commit Strip (Fr)'
2755
    url = 'http://www.commitstrip.com/fr'
2756
    _categories = ('FRANCAIS', )
2757
    first_url = 'http://www.commitstrip.com/fr/2012/02/22/interview/'
2758
2759
2760
class CommitStripEn(GenericCommitStrip):
2761
    """Class to retrieve Commit Strips in English."""
2762
    name = 'commit_en'
2763
    long_name = 'Commit Strip (En)'
2764
    url = 'http://www.commitstrip.com/en'
2765
    first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'
2766
2767
2768 View Code Duplication
class GenericBoumerie(GenericNavigableComic):
2769
    """Generic class to retrieve Boumeries comics in different languages."""
2770
    get_first_comic_link = get_a_navi_navifirst
2771
    get_navi_link = get_link_rel_next
2772
    date_format = NotImplemented
2773
    lang = NotImplemented
2774
2775
    @classmethod
2776
    def get_comic_info(cls, soup, link):
2777
        """Get information about a particular comics."""
2778
        title = soup.find('h2', class_='post-title').string
2779
        short_url = soup.find('link', rel='shortlink')['href']
2780
        author = soup.find("span", class_="post-author").find("a").string
2781
        date_str = soup.find('span', class_='post-date').string
2782
        day = string_to_date(date_str, cls.date_format, cls.lang)
2783
        imgs = soup.find('div', id='comic').find_all('img')
2784
        assert all(i['alt'] == i['title'] for i in imgs)
2785
        return {
2786
            'short_url': short_url,
2787
            'img': [i['src'] for i in imgs],
2788
            'title': title,
2789
            'author': author,
2790
            'month': day.month,
2791
            'year': day.year,
2792
            'day': day.day,
2793
        }
2794
2795
2796
class BoumerieEn(GenericBoumerie):
2797
    """Class to retrieve Boumeries comics in English."""
2798
    name = 'boumeries_en'
2799
    long_name = 'Boumeries (En)'
2800
    url = 'http://comics.boumerie.com'
2801
    date_format = "%B %d, %Y"
2802
    lang = 'en_GB.UTF-8'
2803
2804
2805
class BoumerieFr(GenericBoumerie):
2806
    """Class to retrieve Boumeries comics in French."""
2807
    name = 'boumeries_fr'
2808
    long_name = 'Boumeries (Fr)'
2809
    url = 'http://bd.boumerie.com'
2810
    _categories = ('FRANCAIS', )
2811
    date_format = "%A, %d %B %Y"
2812
    lang = "fr_FR.utf8"
2813
2814
2815 View Code Duplication
class UnearthedComics(GenericNavigableComic):
2816
    """Class to retrieve Unearthed comics."""
2817
    # Also on http://tapastic.com/series/UnearthedComics
2818
    # Also on https://unearthedcomics.tumblr.com
2819
    name = 'unearthed'
2820
    long_name = 'Unearthed Comics'
2821
    url = 'http://unearthedcomics.com'
2822
    _categories = ('UNEARTHED', )
2823
    get_navi_link = get_link_rel_next
2824
    get_first_comic_link = simulate_first_link
2825
    first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'
2826
2827
    @classmethod
2828
    def get_comic_info(cls, soup, link):
2829
        """Get information about a particular comics."""
2830
        short_url = soup.find('link', rel='shortlink')['href']
2831
        title_elt = soup.find('h1') or soup.find('h2')
2832
        title = title_elt.string if title_elt else ""
2833
        desc = soup.find('meta', property='og:description')
2834
        date_str = soup.find('time', class_='published updated hidden')['datetime']
2835
        day = string_to_date(date_str, "%Y-%m-%d")
2836
        post = soup.find('div', class_="entry content entry-content type-portfolio")
2837
        imgs = post.find_all('img')
2838
        return {
2839
            'title': title,
2840
            'description': desc,
2841
            'url2': short_url,
2842
            'img': [i['src'] for i in imgs],
2843
            'month': day.month,
2844
            'year': day.year,
2845
            'day': day.day,
2846
        }
2847
2848
2849 View Code Duplication
class Optipess(GenericNavigableComic):
2850
    """Class to retrieve Optipess comics."""
2851
    name = 'optipess'
2852
    long_name = 'Optipess'
2853
    url = 'http://www.optipess.com'
2854
    get_first_comic_link = get_a_navi_navifirst
2855
    get_navi_link = get_link_rel_next
2856
2857
    @classmethod
2858
    def get_comic_info(cls, soup, link):
2859
        """Get information about a particular comics."""
2860
        title = soup.find('h2', class_='post-title').string
2861
        author = soup.find("span", class_="post-author").find("a").string
2862
        comic = soup.find('div', id='comic')
2863
        imgs = comic.find_all('img') if comic else []
2864
        alt = imgs[0]['title'] if imgs else ""
2865
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2866
        date_str = soup.find('span', class_='post-date').string
2867
        day = string_to_date(date_str, "%B %d, %Y")
2868
        return {
2869
            'title': title,
2870
            'alt': alt,
2871
            'author': author,
2872
            'img': [i['src'] for i in imgs],
2873
            'month': day.month,
2874
            'year': day.year,
2875
            'day': day.day,
2876
        }
2877
2878
2879
class PainTrainComic(GenericNavigableComic):
2880
    """Class to retrieve Pain Train Comics."""
2881
    name = 'paintrain'
2882
    long_name = 'Pain Train Comics'
2883
    url = 'http://paintraincomic.com'
2884
    get_first_comic_link = get_a_navi_navifirst
2885
    get_navi_link = get_link_rel_next
2886
2887
    @classmethod
2888
    def get_comic_info(cls, soup, link):
2889
        """Get information about a particular comics."""
2890
        title = soup.find('h2', class_='post-title').string
2891
        short_url = soup.find('link', rel='shortlink')['href']
2892
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2893
        num = int(short_url_re.match(short_url).groups()[0])
2894
        imgs = soup.find('div', id='comic').find_all('img')
2895
        alt = imgs[0]['title']
2896
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2897
        date_str = soup.find('span', class_='post-date').string
2898
        day = string_to_date(date_str, "%d/%m/%Y")
2899
        return {
2900
            'short_url': short_url,
2901
            'num': num,
2902
            'img': [i['src'] for i in imgs],
2903
            'month': day.month,
2904
            'year': day.year,
2905
            'day': day.day,
2906
            'alt': alt,
2907
            'title': title,
2908
        }
2909
2910
2911
class MoonBeard(GenericNavigableComic):
2912
    """Class to retrieve MoonBeard comics."""
2913
    # Also on http://blog.squiresjam.es
2914
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
2915
    name = 'moonbeard'
2916
    long_name = 'Moon Beard'
2917
    url = 'http://moonbeard.com'
2918
    get_first_comic_link = get_a_navi_navifirst
2919
    get_navi_link = get_a_navi_navinext
2920
2921
    @classmethod
2922
    def get_comic_info(cls, soup, link):
2923
        """Get information about a particular comics."""
2924
        title = soup.find('h2', class_='post-title').string
2925
        short_url = soup.find('link', rel='shortlink')['href']
2926
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2927
        num = int(short_url_re.match(short_url).groups()[0])
2928
        imgs = soup.find('div', id='comic').find_all('img')
2929
        alt = imgs[0]['title']
2930
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2931
        date_str = soup.find('span', class_='post-date').string
2932
        day = string_to_date(date_str, "%B %d, %Y")
2933
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2934
        author = soup.find('span', class_='post-author').string
2935
        return {
2936
            'short_url': short_url,
2937
            'num': num,
2938
            'img': [i['src'] for i in imgs],
2939
            'month': day.month,
2940
            'year': day.year,
2941
            'day': day.day,
2942
            'title': title,
2943
            'tags': tags,
2944
            'alt': alt,
2945
            'author': author,
2946
        }
2947
2948
2949
class AHammADay(GenericEmptyComic, GenericNavigableComic):
2950
    """Class to retrieve class A Hamm A Day comics."""
2951
    name = 'hamm'
2952
    long_name = 'A Hamm A Day'
2953
    url = 'http://www.ahammaday.com'
2954
    get_url_from_link = join_cls_url_to_href
2955
    get_first_comic_link = simulate_first_link
2956
    first_url = 'http://www.ahammaday.com/today/3/6/french'
2957
2958
    @classmethod
2959
    def get_navi_link(cls, last_soup, next_):
2960
        """Get link to next or previous comic."""
2961
        # prev is next / next is prev
2962
        return last_soup.find('li', class_='previous' if next_ else 'next').find('a')
2963
2964
    @classmethod
2965
    def get_comic_info(cls, soup, link):
2966
        """Get information about a particular comics."""
2967
        date_str = soup.find('time', class_='published')['datetime']
2968
        day = string_to_date(date_str, "%Y-%m-%d")
2969
        author = soup.find('span', class_='blog-author').find('a').string
2970
        title = soup.find('meta', property='og:title')['content']
2971
        imgs = soup.find_all('meta', itemprop='image')
2972
        return {
2973
            'img': [i['content'] for i in imgs],
2974
            'title': title,
2975
            'author': author,
2976
            'day': day.day,
2977
            'month': day.month,
2978
            'year': day.year,
2979
        }
2980
2981 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2982
class LittleLifeLines(GenericNavigableComic):
2983
    """Class to retrieve Little Life Lines comics."""
2984
    # Also on https://little-life-lines.tumblr.com
2985
    name = 'life'
2986
    long_name = 'Little Life Lines'
2987
    url = 'http://www.littlelifelines.com'
2988
    get_url_from_link = join_cls_url_to_href
2989
    get_first_comic_link = simulate_first_link
2990
    first_url = 'http://www.littlelifelines.com/comics/well-done'
2991
2992
    @classmethod
2993
    def get_navi_link(cls, last_soup, next_):
2994
        """Get link to next or previous comic."""
2995
        # prev is next / next is prev
2996
        li = last_soup.find('li', class_='prev' if next_ else 'next')
2997
        return li.find('a') if li else None
2998
2999
    @classmethod
3000
    def get_comic_info(cls, soup, link):
3001
        """Get information about a particular comics."""
3002
        title = soup.find('meta', property='og:title')['content']
3003
        desc = soup.find('meta', property='og:description')['content']
3004
        date_str = soup.find('time', class_='published')['datetime']
3005
        day = string_to_date(date_str, "%Y-%m-%d")
3006
        author = soup.find('a', rel='author').string
3007
        div_content = soup.find('div', class_="body entry-content")
3008
        imgs = div_content.find_all('img')
3009
        imgs = [i for i in imgs if i.get('src') is not None]
3010
        alt = imgs[0]['alt']
3011
        return {
3012
            'title': title,
3013
            'alt': alt,
3014
            'description': desc,
3015
            'author': author,
3016
            'day': day.day,
3017
            'month': day.month,
3018
            'year': day.year,
3019
            'img': [i['src'] for i in imgs],
3020
        }
3021
3022
3023
class GenericWordPressInkblot(GenericNavigableComic):
3024
    """Generic class to retrieve comics using WordPress with Inkblot."""
3025
    get_navi_link = get_link_rel_next
3026
3027
    @classmethod
3028
    def get_first_comic_link(cls):
3029
        """Get link to first comics."""
3030
        return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')
3031
3032
    @classmethod
3033
    def get_comic_info(cls, soup, link):
3034
        """Get information about a particular comics."""
3035
        title = soup.find('meta', property='og:title')['content']
3036
        imgs = soup.find('div', class_='webcomic-image').find_all('img')
3037
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3038
        day = string_to_date(date_str, "%Y-%m-%d")
3039
        return {
3040
            'title': title,
3041
            'day': day.day,
3042
            'month': day.month,
3043
            'year': day.year,
3044
            'img': [i['src'] for i in imgs],
3045
        }
3046
3047
3048
class EverythingsStupid(GenericWordPressInkblot):
3049
    """Class to retrieve Everything's stupid Comics."""
3050
    # Also on http://tapastic.com/series/EverythingsStupid
3051
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
3052
    # Also on http://everythingsstupidcomics.tumblr.com
3053
    name = 'stupid'
3054
    long_name = "Everything's Stupid"
3055
    url = 'http://everythingsstupid.net'
3056
3057
3058
class TheIsmComics(GenericWordPressInkblot):
3059
    """Class to retrieve The Ism Comics."""
3060
    # Also on https://tapastic.com/series/TheIsm (?)
3061
    name = 'theism'
3062
    long_name = "The Ism"
3063
    url = 'http://www.theism-comics.com'
3064
3065
3066
class WoodenPlankStudios(GenericWordPressInkblot):
3067
    """Class to retrieve Wooden Plank Studios comics."""
3068
    name = 'woodenplank'
3069
    long_name = 'Wooden Plank Studios'
3070
    url = 'http://woodenplankstudios.com'
3071
3072
3073
class ElectricBunnyComic(GenericNavigableComic):
3074
    """Class to retrieve Electric Bunny Comics."""
3075
    # Also on http://electricbunnycomics.tumblr.com
3076
    name = 'bunny'
3077
    long_name = 'Electric Bunny Comic'
3078
    url = 'http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell'
3079
    get_url_from_link = join_cls_url_to_href
3080
3081
    @classmethod
3082
    def get_first_comic_link(cls):
3083
        """Get link to first comics."""
3084
        return get_soup_at_url(cls.url).find('img', alt='First').parent
3085
3086
    @classmethod
3087
    def get_navi_link(cls, last_soup, next_):
3088
        """Get link to next or previous comic."""
3089
        img = last_soup.find('img', alt='Next' if next_ else 'Back')
3090
        return img.parent if img else None
3091
3092
    @classmethod
3093
    def get_comic_info(cls, soup, link):
3094
        """Get information about a particular comics."""
3095
        title = soup.find('meta', property='og:title')['content']
3096
        imgs = soup.find_all('meta', property='og:image')
3097
        return {
3098
            'title': title,
3099
            'img': [i['content'] for i in imgs],
3100
        }
3101
3102
3103
class SheldonComics(GenericNavigableComic):
3104
    """Class to retrieve Sheldon comics."""
3105
    # Also on http://www.gocomics.com/sheldon
3106
    name = 'sheldon'
3107
    long_name = 'Sheldon Comics'
3108
    url = 'http://www.sheldoncomics.com'
3109
3110
    @classmethod
3111
    def get_first_comic_link(cls):
3112
        """Get link to first comics."""
3113
        return get_soup_at_url(cls.url).find("a", id="nav-first")
3114
3115
    @classmethod
3116
    def get_navi_link(cls, last_soup, next_):
3117
        """Get link to next or previous comic."""
3118
        for link in last_soup.find_all("a", id="nav-next" if next_ else "nav-prev"):
3119
            if link['href'] != 'http://www.sheldoncomics.com':
3120
                return link
3121
        return None
3122
3123
    @classmethod
3124
    def get_comic_info(cls, soup, link):
3125
        """Get information about a particular comics."""
3126
        imgs = soup.find("div", id="comic-foot").find_all("img")
3127
        assert all(i['alt'] == i['title'] for i in imgs)
3128
        assert len(imgs) == 1
3129
        title = imgs[0]['title']
3130
        return {
3131
            'title': title,
3132
            'img': [i['src'] for i in imgs],
3133
        }
3134
3135
3136 View Code Duplication
class Ubertool(GenericNavigableComic):
3137
    """Class to retrieve Ubertool comics."""
3138
    # Also on https://ubertool.tumblr.com
3139
    # Also on https://tapastic.com/series/ubertool
3140
    name = 'ubertool'
3141
    long_name = 'Ubertool'
3142
    url = 'http://ubertoolcomic.com'
3143
    _categories = ('UBERTOOL', )
3144
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
3145
    get_navi_link = get_a_comicnavbase_comicnavnext
3146
3147
    @classmethod
3148
    def get_comic_info(cls, soup, link):
3149
        """Get information about a particular comics."""
3150
        title = soup.find('h2', class_='post-title').string
3151
        date_str = soup.find('span', class_='post-date').string
3152
        day = string_to_date(date_str, "%B %d, %Y")
3153
        imgs = soup.find('div', id='comic').find_all('img')
3154
        return {
3155
            'img': [i['src'] for i in imgs],
3156
            'title': title,
3157
            'month': day.month,
3158
            'year': day.year,
3159
            'day': day.day,
3160
        }
3161
3162 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3163
class EarthExplodes(GenericNavigableComic):
3164
    """Class to retrieve The Earth Explodes comics."""
3165
    name = 'earthexplodes'
3166
    long_name = 'The Earth Explodes'
3167
    url = 'http://www.earthexplodes.com'
3168
    get_url_from_link = join_cls_url_to_href
3169
    get_first_comic_link = simulate_first_link
3170
    first_url = 'http://www.earthexplodes.com/comics/000/'
3171
3172
    @classmethod
3173
    def get_navi_link(cls, last_soup, next_):
3174
        """Get link to next or previous comic."""
3175
        return last_soup.find('a', id='next' if next_ else 'prev')
3176
3177
    @classmethod
3178
    def get_comic_info(cls, soup, link):
3179
        """Get information about a particular comics."""
3180
        title = soup.find('title').string
3181
        imgs = soup.find('div', id='image').find_all('img')
3182
        alt = imgs[0].get('title', '')
3183
        return {
3184
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3185
            'title': title,
3186
            'alt': alt,
3187
        }
3188
3189
3190 View Code Duplication
class PomComics(GenericNavigableComic):
3191
    """Class to retrieve PomComics."""
3192
    name = 'pom'
3193
    long_name = 'Pom Comics / Piece of Me'
3194
    url = 'http://www.pomcomic.com'
3195
    get_url_from_link = join_cls_url_to_href
3196
3197
    @classmethod
3198
    def get_first_comic_link(cls):
3199
        """Get link to first comics."""
3200
        return get_soup_at_url(cls.url).find('a', class_='btn_first')
3201
3202
    @classmethod
3203
    def get_navi_link(cls, last_soup, next_):
3204
        """Get link to next or previous comic."""
3205
        return last_soup.find('a', class_='btn_next' if next_ else 'btn_prev')
3206
3207
    @classmethod
3208
    def get_comic_info(cls, soup, link):
3209
        """Get information about a particular comics."""
3210
        title = soup.find('h1', id="comic-name").string
3211
        desc = soup.find('meta', property='og:description')['content']
3212
        tags = soup.find('meta', attrs={'name': 'keywords'})['content']
3213
        imgs = soup.find('div', class_='comic').find_all('img')
3214
        return {
3215
            'title': title,
3216
            'desc': desc,
3217
            'tags': tags,
3218
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3219
        }
3220
3221
3222
class CubeDrone(GenericEmptyComic, GenericNavigableComic):
3223
    """Class to retrieve Cube Drone comics."""
3224
    name = 'cubedrone'
3225
    long_name = 'Cube Drone'
3226
    url = 'http://cube-drone.com/comics'
3227
    get_url_from_link = join_cls_url_to_href
3228
3229
    @classmethod
3230
    def get_first_comic_link(cls):
3231
        """Get link to first comics."""
3232
        return get_soup_at_url(cls.url).find('span', class_='glyphicon glyphicon-backward').parent
3233
3234
    @classmethod
3235
    def get_navi_link(cls, last_soup, next_):
3236
        """Get link to next or previous comic."""
3237
        class_ = 'glyphicon glyphicon-chevron-' + ('right' if next_ else 'left')
3238
        return last_soup.find('span', class_=class_).parent
3239
3240
    @classmethod
3241
    def get_comic_info(cls, soup, link):
3242
        """Get information about a particular comics."""
3243
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
3244
        url2 = soup.find('meta', attrs={'name': 'twitter:url'})['content']
3245
        # date_str = soup.find('h2', class_='comic_title').find('small').string
3246
        # day = string_to_date(date_str, "%B %d, %Y, %I:%M %p")
3247
        imgs = soup.find_all('img', class_='comic img-responsive')
3248
        title2 = imgs[0]['title']
3249
        alt = imgs[0]['alt']
3250
        return {
3251
            'url2': url2,
3252
            'title': title,
3253
            'title2': title2,
3254
            'alt': alt,
3255
            'img': [i['src'] for i in imgs],
3256
        }
3257
3258
3259
class MakeItStoopid(GenericNavigableComic):
3260
    """Class to retrieve Make It Stoopid Comics."""
3261
    name = 'stoopid'
3262
    long_name = 'Make it stoopid'
3263
    url = 'http://makeitstoopid.com/comic.php'
3264
3265
    @classmethod
3266
    def get_nav(cls, soup):
3267
        """Get the navigation elements from soup object."""
3268
        cnav = soup.find_all(class_='cnav')
3269
        nav1, nav2 = cnav[:5], cnav[5:]
3270
        assert nav1 == nav2
3271
        # begin, prev, archive, next_, end = nav1
3272
        return [None if i.get('href') is None else i for i in nav1]
3273
3274
    @classmethod
3275
    def get_first_comic_link(cls):
3276
        """Get link to first comics."""
3277
        return cls.get_nav(get_soup_at_url(cls.url))[0]
3278
3279
    @classmethod
3280
    def get_navi_link(cls, last_soup, next_):
3281
        """Get link to next or previous comic."""
3282
        return cls.get_nav(last_soup)[3 if next_ else 1]
3283
3284
    @classmethod
3285
    def get_comic_info(cls, soup, link):
3286
        """Get information about a particular comics."""
3287
        title = link['title']
3288
        imgs = soup.find_all('img', id='comicimg')
3289
        return {
3290
            'title': title,
3291
            'img': [i['src'] for i in imgs],
3292
        }
3293
3294
3295 View Code Duplication
class MarketoonistComics(GenericNavigableComic):
3296
    """Class to retrieve Marketoonist Comics."""
3297
    name = 'marketoonist'
3298
    long_name = 'Marketoonist'
3299
    url = 'https://marketoonist.com/cartoons'
3300
    get_first_comic_link = simulate_first_link
3301
    get_navi_link = get_link_rel_next
3302
    first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'
3303
3304
    @classmethod
3305
    def get_comic_info(cls, soup, link):
3306
        """Get information about a particular comics."""
3307
        imgs = soup.find_all('meta', property='og:image')
3308
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3309
        day = string_to_date(date_str, "%Y-%m-%d")
3310
        title = soup.find('meta', property='og:title')['content']
3311
        return {
3312
            'img': [i['content'] for i in imgs],
3313
            'day': day.day,
3314
            'month': day.month,
3315
            'year': day.year,
3316
            'title': title,
3317
        }
3318
3319
3320 View Code Duplication
class ConsoliaComics(GenericNavigableComic):
3321
    """Class to retrieve Consolia comics."""
3322
    name = 'consolia'
3323
    long_name = 'consolia'
3324
    url = 'https://consolia-comic.com'
3325
    get_url_from_link = join_cls_url_to_href
3326
3327
    @classmethod
3328
    def get_first_comic_link(cls):
3329
        """Get link to first comics."""
3330
        return get_soup_at_url(cls.url).find('a', class_='first')
3331
3332
    @classmethod
3333
    def get_navi_link(cls, last_soup, next_):
3334
        """Get link to next or previous comic."""
3335
        return last_soup.find('a', class_='next' if next_ else 'prev')
3336
3337
    @classmethod
3338
    def get_comic_info(cls, soup, link):
3339
        """Get information about a particular comics."""
3340
        title = soup.find('meta', property='og:title')['content']
3341
        date_str = soup.find('time')["datetime"]
3342
        day = string_to_date(date_str, "%Y-%m-%d")
3343
        imgs = soup.find_all('meta', property='og:image')
3344
        return {
3345
            'title': title,
3346
            'img': [i['content'] for i in imgs],
3347
            'day': day.day,
3348
            'month': day.month,
3349
            'year': day.year,
3350
        }
3351
3352 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3353
class TuMourrasMoinsBete(GenericNavigableComic):
3354
    """Class to retrieve Tu Mourras Moins Bete comics."""
3355
    name = 'mourrasmoinsbete'
3356
    long_name = 'Tu Mourras Moins Bete'
3357
    url = 'http://tumourrasmoinsbete.blogspot.fr'
3358
    _categories = ('FRANCAIS', )
3359
    get_first_comic_link = simulate_first_link
3360
    first_url = 'http://tumourrasmoinsbete.blogspot.fr/2008/06/essai.html'
3361
3362
    @classmethod
3363
    def get_navi_link(cls, last_soup, next_):
3364
        """Get link to next or previous comic."""
3365
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3366
3367
    @classmethod
3368
    def get_comic_info(cls, soup, link):
3369
        """Get information about a particular comics."""
3370
        title = soup.find('title').string
3371
        imgs = soup.find('div', itemprop='description articleBody').find_all('img')
3372
        author = soup.find('span', itemprop='author').string
3373
        return {
3374
            'img': [i['src'] for i in imgs],
3375
            'author': author,
3376
            'title': title,
3377
        }
3378
3379
3380
class GeekAndPoke(GenericNavigableComic):
3381
    """Class to retrieve Geek And Poke comics."""
3382
    name = 'geek'
3383
    long_name = 'Geek And Poke'
3384
    url = 'http://geek-and-poke.com'
3385
    get_url_from_link = join_cls_url_to_href
3386
    get_first_comic_link = simulate_first_link
3387
    first_url = 'http://geek-and-poke.com/geekandpoke/2006/8/27/a-new-place-for-a-not-so-old-blog.html'
3388
3389
    @classmethod
3390
    def get_navi_link(cls, last_soup, next_):
3391
        """Get link to next or previous comic."""
3392
        return last_soup.find('a', class_='prev-item' if next_ else 'next-item')
3393
3394
    @classmethod
3395
    def get_comic_info(cls, soup, link):
3396
        """Get information about a particular comics."""
3397
        title = soup.find('meta', property='og:title')['content']
3398
        desc = soup.find('meta', property='og:description')['content']
3399
        date_str = soup.find('time', class_='published')['datetime']
3400
        day = string_to_date(date_str, "%Y-%m-%d")
3401
        author = soup.find('a', rel='author').string
3402
        div_content = (soup.find('div', class_="body entry-content") or
3403
                       soup.find('div', class_="special-content"))
3404
        imgs = div_content.find_all('img')
3405
        imgs = [i for i in imgs if i.get('src') is not None]
3406
        assert all('title' not in i or i['alt'] == i['title'] for i in imgs)
3407
        alt = imgs[0].get('alt', "") if imgs else []
3408
        return {
3409
            'title': title,
3410
            'alt': alt,
3411
            'description': desc,
3412
            'author': author,
3413
            'day': day.day,
3414
            'month': day.month,
3415
            'year': day.year,
3416
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3417
        }
3418
3419 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3420
class GloryOwlComix(GenericNavigableComic):
3421
    """Class to retrieve Glory Owl comics."""
3422
    name = 'gloryowl'
3423
    long_name = 'Glory Owl'
3424
    url = 'http://gloryowlcomix.blogspot.fr'
3425
    _categories = ('NSFW', 'FRANCAIS')
3426
    get_first_comic_link = simulate_first_link
3427
    first_url = 'http://gloryowlcomix.blogspot.fr/2013/02/1_7.html'
3428
3429
    @classmethod
3430
    def get_navi_link(cls, last_soup, next_):
3431
        """Get link to next or previous comic."""
3432
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3433
3434
    @classmethod
3435
    def get_comic_info(cls, soup, link):
3436
        """Get information about a particular comics."""
3437
        title = soup.find('title').string
3438
        imgs = soup.find_all('link', rel='image_src')
3439
        author = soup.find('a', rel='author').string
3440
        return {
3441
            'img': [i['href'] for i in imgs],
3442
            'author': author,
3443
            'title': title,
3444
        }
3445
3446
3447
class GenericTumblrV1(GenericComic):
3448
    """Generic class to retrieve comics from Tumblr using the V1 API."""
3449
    _categories = ('TUMBLR', )
3450
3451
    @classmethod
3452
    def get_next_comic(cls, last_comic):
3453
        """Generic implementation of get_next_comic for Tumblr comics."""
3454
        for p in cls.get_posts(last_comic):
3455
            comic = cls.get_comic_info(p)
3456
            if comic is not None:
3457
                yield comic
3458
3459
    @classmethod
3460
    def get_url_from_post(cls, post):
3461
        url = post['url']
3462
        if not url.startswith(cls.url):
3463
            print("url '%s' does not start with '%s'" % (url, cls.url))
3464
        return url
3465
3466
    @classmethod
3467
    def get_api_url(cls):
3468
        return urljoin_wrapper(cls.url, '/api/read/')
3469
3470
    @classmethod
3471
    def get_api_url_for_id(cls, tumblr_id):
3472
        return cls.get_api_url() + '?id=%d' % (tumblr_id)
3473
3474
    @classmethod
3475
    def get_comic_info(cls, post):
3476
        """Get information about a particular comics."""
3477
        type_ = post['type']
3478
        if type_ != 'photo':
3479
            return None
3480
        tumblr_id = int(post['id'])
3481
        api_url = cls.get_api_url_for_id(tumblr_id)
3482
        day = datetime.datetime.fromtimestamp(int(post['unix-timestamp'])).date()
3483
        caption = post.find('photo-caption')
3484
        title = caption.string if caption else ""
3485
        tags = ' '.join(t.string for t in post.find_all('tag'))
3486
        # Photos may appear in 'photo' tags and/or straight in the post
3487
        photo_tags = post.find_all('photo')
3488
        if not photo_tags:
3489
            photo_tags = [post]
3490
        # Images are in multiple resolutions - taking the first one
3491
        imgs = [photo.find('photo-url') for photo in photo_tags]
3492
        return {
3493
            'url': cls.get_url_from_post(post),
3494
            'url2': post['url-with-slug'],
3495
            'day': day.day,
3496
            'month': day.month,
3497
            'year': day.year,
3498
            'title': title,
3499
            'tags': tags,
3500
            'img': [i.string for i in imgs],
3501
            'tumblr-id': tumblr_id,
3502
            'api_url': api_url,
3503
        }
3504
3505
    @classmethod
3506
    def get_posts(cls, last_comic, nb_post_per_call=10):
3507
        """Get posts using API. nb_post_per_call is max 50.
3508
3509
        Posts are retrieved from newer to older as per the tumblr v1 api
3510
        but are returned in chronological order."""
3511
        waiting_for_id = last_comic['tumblr-id'] if last_comic else None
3512
        posts_acc = []
3513
        if last_comic is not None:
3514
            # Sometimes, tumblr posts are deleted. When previous post is deleted, we
3515
            # might end up spending a lot of time looking for something that
3516
            # doesn't exist. Failing early and clearly might be a better option.
3517
            last_api_url = cls.get_api_url_for_id(waiting_for_id)
3518
            try:
3519
                get_soup_at_url(last_api_url)
3520
            except urllib.error.HTTPError:
3521
                try:
3522
                    get_soup_at_url(cls.url)
3523
                except urllib.error.HTTPError:
3524
                    print("Did not find previous post nor main url %s" % cls.url)
3525
                else:
3526
                    print("Did not find previous post %s : it might have been deleted" % last_api_url)
3527
                return reversed(posts_acc)
3528
        api_url = cls.get_api_url()
3529
        posts = get_soup_at_url(api_url).find('posts')
3530
        start, total = int(posts['start']), int(posts['total'])
3531
        assert start == 0
3532
        for starting_num in range(0, total, nb_post_per_call):
3533
            api_url2 = api_url + '?start=%d&num=%d' % (starting_num, nb_post_per_call)
3534
            posts2 = get_soup_at_url(api_url2).find('posts')
3535
            start2, total2 = int(posts2['start']), int(posts2['total'])
3536
            assert starting_num == start2, "%d != %d" % (starting_num, start2)
3537
            # This may happen and should be handled in the future
3538
            assert total == total2, "%d != %d" % (total, total2)
3539
            for p in posts2.find_all('post'):
3540
                tumblr_id = int(p['id'])
3541
                if waiting_for_id and waiting_for_id == tumblr_id:
3542
                    return reversed(posts_acc)
3543
                posts_acc.append(p)
3544
        if waiting_for_id is None:
3545
            return reversed(posts_acc)
3546
        print("Did not find %s : there might be a problem" % waiting_for_id)
3547
        return []
3548
3549
3550
class SaturdayMorningBreakfastCerealTumblr(GenericTumblrV1):
3551
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
3552
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
3553
    # Also on http://www.smbc-comics.com
3554
    name = 'smbc-tumblr'
3555
    long_name = 'Saturday Morning Breakfast Cereal (from Tumblr)'
3556
    url = 'http://smbc-comics.tumblr.com'
3557
    _categories = ('SMBC', )
3558
3559
3560
class IrwinCardozo(GenericTumblrV1):
3561
    """Class to retrieve Irwin Cardozo Comics."""
3562
    name = 'irwinc'
3563
    long_name = 'Irwin Cardozo'
3564
    url = 'http://irwincardozocomics.tumblr.com'
3565
3566
3567
class AccordingToDevin(GenericTumblrV1):
3568
    """Class to retrieve According To Devin comics."""
3569
    name = 'devin'
3570
    long_name = 'According To Devin'
3571
    url = 'http://accordingtodevin.tumblr.com'
3572
3573
3574
class ItsTheTieTumblr(GenericTumblrV1):
3575
    """Class to retrieve It's the tie comics."""
3576
    # Also on http://itsthetie.com
3577
    # Also on https://tapastic.com/series/itsthetie
3578
    name = 'tie-tumblr'
3579
    long_name = "It's the tie (from Tumblr)"
3580
    url = "http://itsthetie.tumblr.com"
3581
    _categories = ('TIE', )
3582
3583
3584
class OctopunsTumblr(GenericTumblrV1):
3585
    """Class to retrieve Octopuns comics."""
3586
    # Also on http://www.octopuns.net
3587
    name = 'octopuns-tumblr'
3588
    long_name = 'Octopuns (from Tumblr)'
3589
    url = 'http://octopuns.tumblr.com'
3590
3591
3592
class PicturesInBoxesTumblr(GenericTumblrV1):
3593
    """Class to retrieve Pictures In Boxes comics."""
3594
    # Also on http://www.picturesinboxes.com
3595
    name = 'picturesinboxes-tumblr'
3596
    long_name = 'Pictures in Boxes (from Tumblr)'
3597
    url = 'https://picturesinboxescomic.tumblr.com'
3598
3599
3600
class TubeyToonsTumblr(GenericTumblrV1):
3601
    """Class to retrieve TubeyToons comics."""
3602
    # Also on http://tapastic.com/series/Tubey-Toons
3603
    # Also on http://tubeytoons.com
3604
    name = 'tubeytoons-tumblr'
3605
    long_name = 'Tubey Toons (from Tumblr)'
3606
    url = 'https://tubeytoons.tumblr.com'
3607
    _categories = ('TUNEYTOONS', )
3608
3609
3610
class UnearthedComicsTumblr(GenericTumblrV1):
3611
    """Class to retrieve Unearthed comics."""
3612
    # Also on http://tapastic.com/series/UnearthedComics
3613
    # Also on http://unearthedcomics.com
3614
    name = 'unearthed-tumblr'
3615
    long_name = 'Unearthed Comics (from Tumblr)'
3616
    url = 'https://unearthedcomics.tumblr.com'
3617
    _categories = ('UNEARTHED', )
3618
3619
3620
class PieComic(GenericTumblrV1):
3621
    """Class to retrieve Pie Comic comics."""
3622
    name = 'pie'
3623
    long_name = 'Pie Comic'
3624
    url = "http://piecomic.tumblr.com"
3625
3626
3627
class MrEthanDiamond(GenericTumblrV1):
3628
    """Class to retrieve Mr Ethan Diamond comics."""
3629
    name = 'diamond'
3630
    long_name = 'Mr Ethan Diamond'
3631
    url = 'http://mrethandiamond.tumblr.com'
3632
3633
3634
class Flocci(GenericTumblrV1):
3635
    """Class to retrieve floccinaucinihilipilification comics."""
3636
    name = 'flocci'
3637
    long_name = 'floccinaucinihilipilification'
3638
    url = "http://floccinaucinihilipilificationa.tumblr.com"
3639
3640
3641
class UpAndOut(GenericTumblrV1):
3642
    """Class to retrieve Up & Out comics."""
3643
    # Also on http://tapastic.com/series/UP-and-OUT
3644
    name = 'upandout'
3645
    long_name = 'Up And Out (from Tumblr)'
3646
    url = 'http://upandoutcomic.tumblr.com'
3647
3648
3649
class Pundemonium(GenericTumblrV1):
3650
    """Class to retrieve Pundemonium comics."""
3651
    name = 'pundemonium'
3652
    long_name = 'Pundemonium'
3653
    url = 'http://monstika.tumblr.com'
3654
3655
3656
class PoorlyDrawnLinesTumblr(GenericTumblrV1):
3657
    """Class to retrieve Poorly Drawn Lines comics."""
3658
    # Also on http://poorlydrawnlines.com
3659
    name = 'poorlydrawn-tumblr'
3660
    long_name = 'Poorly Drawn Lines (from Tumblr)'
3661
    url = 'http://pdlcomics.tumblr.com'
3662
    _categories = ('POORLYDRAWN', )
3663
3664
3665
class PearShapedComics(GenericTumblrV1):
3666
    """Class to retrieve Pear Shaped Comics."""
3667
    name = 'pearshaped'
3668
    long_name = 'Pear-Shaped Comics'
3669
    url = 'http://pearshapedcomics.com'
3670
3671
3672
class PondScumComics(GenericTumblrV1):
3673
    """Class to retrieve Pond Scum Comics."""
3674
    name = 'pond'
3675
    long_name = 'Pond Scum'
3676
    url = 'http://pondscumcomic.tumblr.com'
3677
3678
3679
class MercworksTumblr(GenericTumblrV1):
3680
    """Class to retrieve Mercworks comics."""
3681
    # Also on http://mercworks.net
3682
    name = 'mercworks-tumblr'
3683
    long_name = 'Mercworks (from Tumblr)'
3684
    url = 'http://mercworks.tumblr.com'
3685
3686
3687
class OwlTurdTumblr(GenericTumblrV1):
3688
    """Class to retrieve Owl Turd comics."""
3689
    # Also on http://tapastic.com/series/Owl-Turd-Comix
3690
    name = 'owlturd-tumblr'
3691
    long_name = 'Owl Turd (from Tumblr)'
3692
    url = 'http://owlturdcomix.tumblr.com'
3693
    _categories = ('OWLTURD', )
3694
3695
3696
class VectorBelly(GenericTumblrV1):
3697
    """Class to retrieve Vector Belly comics."""
3698
    # Also on http://vectorbelly.com
3699
    name = 'vector'
3700
    long_name = 'Vector Belly'
3701
    url = 'http://vectorbelly.tumblr.com'
3702
3703
3704
class GoneIntoRapture(GenericTumblrV1):
3705
    """Class to retrieve Gone Into Rapture comics."""
3706
    # Also on http://goneintorapture.tumblr.com
3707
    # Also on http://tapastic.com/series/Goneintorapture
3708
    name = 'rapture'
3709
    long_name = 'Gone Into Rapture'
3710
    url = 'http://goneintorapture.com'
3711
3712
3713
class TheOatmealTumblr(GenericTumblrV1):
3714
    """Class to retrieve The Oatmeal comics."""
3715
    # Also on http://theoatmeal.com
3716
    name = 'oatmeal-tumblr'
3717
    long_name = 'The Oatmeal (from Tumblr)'
3718
    url = 'http://oatmeal.tumblr.com'
3719
3720
3721
class HeckIfIKnowComicsTumblr(GenericTumblrV1):
3722
    """Class to retrieve Heck If I Know Comics."""
3723
    # Also on http://tapastic.com/series/Regular
3724
    name = 'heck-tumblr'
3725
    long_name = 'Heck if I Know comics (from Tumblr)'
3726
    url = 'http://heckifiknowcomics.com'
3727
3728
3729
class MyJetPack(GenericTumblrV1):
3730
    """Class to retrieve My Jet Pack comics."""
3731
    name = 'jetpack'
3732
    long_name = 'My Jet Pack'
3733
    url = 'http://myjetpack.tumblr.com'
3734
3735
3736
class CheerUpEmoKidTumblr(GenericTumblrV1):
3737
    """Class to retrieve CheerUpEmoKid comics."""
3738
    # Also on http://www.cheerupemokid.com
3739
    # Also on http://tapastic.com/series/CUEK
3740
    name = 'cuek-tumblr'
3741
    long_name = 'Cheer Up Emo Kid (from Tumblr)'
3742
    url = 'https://enzocomics.tumblr.com'
3743
3744
3745
class ForLackOfABetterComic(GenericTumblrV1):
3746
    """Class to retrieve For Lack Of A Better Comics."""
3747
    # Also on http://forlackofabettercomic.com
3748
    name = 'lack'
3749
    long_name = 'For Lack Of A Better Comic'
3750
    url = 'http://forlackofabettercomic.tumblr.com'
3751
3752
3753
class ZenPencilsTumblr(GenericTumblrV1):
3754
    """Class to retrieve ZenPencils comics."""
3755
    # Also on http://zenpencils.com
3756
    # Also on http://www.gocomics.com/zen-pencils
3757
    name = 'zenpencils-tumblr'
3758
    long_name = 'Zen Pencils (from Tumblr)'
3759
    url = 'http://zenpencils.tumblr.com'
3760
    _categories = ('ZENPENCILS', )
3761
3762
3763
class ThreeWordPhraseTumblr(GenericTumblrV1):
3764
    """Class to retrieve Three Word Phrase comics."""
3765
    # Also on http://threewordphrase.com
3766
    name = 'threeword-tumblr'
3767
    long_name = 'Three Word Phrase (from Tumblr)'
3768
    url = 'http://threewordphrase.tumblr.com'
3769
3770
3771
class TimeTrabbleTumblr(GenericTumblrV1):
3772
    """Class to retrieve Time Trabble comics."""
3773
    # Also on http://timetrabble.com
3774
    name = 'timetrabble-tumblr'
3775
    long_name = 'Time Trabble (from Tumblr)'
3776
    url = 'http://timetrabble.tumblr.com'
3777
3778
3779
class SafelyEndangeredTumblr(GenericTumblrV1):
3780
    """Class to retrieve Safely Endangered comics."""
3781
    # Also on http://www.safelyendangered.com
3782
    name = 'endangered-tumblr'
3783
    long_name = 'Safely Endangered (from Tumblr)'
3784
    url = 'http://tumblr.safelyendangered.com'
3785
3786
3787
class MouseBearComedyTumblr(GenericTumblrV1):
3788
    """Class to retrieve Mouse Bear Comedy comics."""
3789
    # Also on http://www.mousebearcomedy.com
3790
    name = 'mousebear-tumblr'
3791
    long_name = 'Mouse Bear Comedy (from Tumblr)'
3792
    url = 'http://mousebearcomedy.tumblr.com'
3793
3794
3795
class BouletCorpTumblr(GenericTumblrV1):
3796
    """Class to retrieve BouletCorp comics."""
3797
    # Also on http://www.bouletcorp.com
3798
    name = 'boulet-tumblr'
3799
    long_name = 'Boulet Corp (from Tumblr)'
3800
    url = 'https://bouletcorp.tumblr.com'
3801
    _categories = ('BOULET', )
3802
3803
3804
class TheAwkwardYetiTumblr(GenericTumblrV1):
3805
    """Class to retrieve The Awkward Yeti comics."""
3806
    # Also on http://www.gocomics.com/the-awkward-yeti
3807
    # Also on http://theawkwardyeti.com
3808
    # Also on https://tapastic.com/series/TheAwkwardYeti
3809
    name = 'yeti-tumblr'
3810
    long_name = 'The Awkward Yeti (from Tumblr)'
3811
    url = 'http://larstheyeti.tumblr.com'
3812
    _categories = ('YETI', )
3813
3814
3815
class NellucNhoj(GenericTumblrV1):
3816
    """Class to retrieve NellucNhoj comics."""
3817
    name = 'nhoj'
3818
    long_name = 'Nelluc Nhoj'
3819
    url = 'http://nellucnhoj.com'
3820
3821
3822
class DownTheUpwardSpiralTumblr(GenericTumblrV1):
3823
    """Class to retrieve Down The Upward Spiral comics."""
3824
    # Also on http://www.downtheupwardspiral.com
3825
    # Also on https://tapastic.com/series/Down-the-Upward-Spiral
3826
    name = 'spiral-tumblr'
3827
    long_name = 'Down the Upward Spiral (from Tumblr)'
3828
    url = 'http://downtheupwardspiral.tumblr.com'
3829
3830
3831
class AsPerUsualTumblr(GenericTumblrV1):
3832
    """Class to retrieve As Per Usual comics."""
3833
    # Also on https://tapastic.com/series/AsPerUsual
3834
    name = 'usual-tumblr'
3835
    long_name = 'As Per Usual (from Tumblr)'
3836
    url = 'http://as-per-usual.tumblr.com'
3837
    categories = ('DAMILEE', )
3838
3839
3840
class HotComicsForCoolPeopleTumblr(GenericTumblrV1):
3841
    """Class to retrieve Hot Comics For Cool People."""
3842
    # Also on https://tapastic.com/series/Hot-Comics-For-Cool-People
3843
    # Also on http://hotcomics.biz (links to tumblr)
3844
    # Also on http://hcfcp.com (links to tumblr)
3845
    name = 'hotcomics-tumblr'
3846
    long_name = 'Hot Comics For Cool People (from Tumblr)'
3847
    url = 'http://hotcomicsforcoolpeople.tumblr.com'
3848
    categories = ('DAMILEE', )
3849
3850
3851
class OneOneOneOneComicTumblr(GenericTumblrV1):
3852
    """Class to retrieve 1111 Comics."""
3853
    # Also on http://www.1111comics.me
3854
    # Also on https://tapastic.com/series/1111-Comics
3855
    name = '1111-tumblr'
3856
    long_name = '1111 Comics (from Tumblr)'
3857
    url = 'http://comics1111.tumblr.com'
3858
    _categories = ('ONEONEONEONE', )
3859
3860
3861
class JhallComicsTumblr(GenericTumblrV1):
3862
    """Class to retrieve Jhall Comics."""
3863
    # Also on http://jhallcomics.com
3864
    name = 'jhall-tumblr'
3865
    long_name = 'Jhall Comics (from Tumblr)'
3866
    url = 'http://jhallcomics.tumblr.com'
3867
3868
3869
class BerkeleyMewsTumblr(GenericTumblrV1):
3870
    """Class to retrieve Berkeley Mews comics."""
3871
    # Also on http://www.gocomics.com/berkeley-mews
3872
    # Also on http://www.berkeleymews.com
3873
    name = 'berkeley-tumblr'
3874
    long_name = 'Berkeley Mews (from Tumblr)'
3875
    url = 'http://mews.tumblr.com'
3876
    _categories = ('BERKELEY', )
3877
3878
3879
class JoanCornellaTumblr(GenericTumblrV1):
3880
    """Class to retrieve Joan Cornella comics."""
3881
    # Also on http://joancornella.net
3882
    name = 'cornella-tumblr'
3883
    long_name = 'Joan Cornella (from Tumblr)'
3884
    url = 'http://cornellajoan.tumblr.com'
3885
3886
3887
class RespawnComicTumblr(GenericTumblrV1):
3888
    """Class to retrieve Respawn Comic."""
3889
    # Also on http://respawncomic.com
3890
    name = 'respawn-tumblr'
3891
    long_name = 'Respawn Comic (from Tumblr)'
3892
    url = 'https://respawncomic.tumblr.com'
3893
3894
3895
class ChrisHallbeckTumblr(GenericTumblrV1):
3896
    """Class to retrieve Chris Hallbeck comics."""
3897
    # Also on https://tapastic.com/ChrisHallbeck
3898
    # Also on http://maximumble.com
3899
    # Also on http://minimumble.com
3900
    # Also on http://thebookofbiff.com
3901
    name = 'hallbeck-tumblr'
3902
    long_name = 'Chris Hallback (from Tumblr)'
3903
    url = 'https://chrishallbeck.tumblr.com'
3904
    _categories = ('HALLBACK', )
3905
3906
3907
class ComicNuggets(GenericTumblrV1):
3908
    """Class to retrieve Comic Nuggets."""
3909
    name = 'nuggets'
3910
    long_name = 'Comic Nuggets'
3911
    url = 'http://comicnuggets.com'
3912
3913
3914
class PigeonGazetteTumblr(GenericTumblrV1):
3915
    """Class to retrieve The Pigeon Gazette comics."""
3916
    # Also on https://tapastic.com/series/The-Pigeon-Gazette
3917
    name = 'pigeon-tumblr'
3918
    long_name = 'The Pigeon Gazette (from Tumblr)'
3919
    url = 'http://thepigeongazette.tumblr.com'
3920
3921
3922
class CancerOwl(GenericTumblrV1):
3923
    """Class to retrieve Cancer Owl comics."""
3924
    # Also on http://cancerowl.com
3925
    name = 'cancerowl-tumblr'
3926
    long_name = 'Cancer Owl (from Tumblr)'
3927
    url = 'http://cancerowl.tumblr.com'
3928
3929
3930
class FowlLanguageTumblr(GenericTumblrV1):
3931
    """Class to retrieve Fowl Language comics."""
3932
    # Also on http://www.fowllanguagecomics.com
3933
    # Also on http://tapastic.com/series/Fowl-Language-Comics
3934
    # Also on http://www.gocomics.com/fowl-language
3935
    name = 'fowllanguage-tumblr'
3936
    long_name = 'Fowl Language Comics (from Tumblr)'
3937
    url = 'http://fowllanguagecomics.tumblr.com'
3938
    _categories = ('FOWLLANGUAGE', )
3939
3940
3941
class TheOdd1sOutTumblr(GenericTumblrV1):
3942
    """Class to retrieve The Odd 1s Out comics."""
3943
    # Also on http://theodd1sout.com
3944
    # Also on https://tapastic.com/series/Theodd1sout
3945
    name = 'theodd-tumblr'
3946
    long_name = 'The Odd 1s Out (from Tumblr)'
3947
    url = 'http://theodd1sout.tumblr.com'
3948
3949
3950
class TheUnderfoldTumblr(GenericTumblrV1):
3951
    """Class to retrieve The Underfold comics."""
3952
    # Also on http://theunderfold.com
3953
    name = 'underfold-tumblr'
3954
    long_name = 'The Underfold (from Tumblr)'
3955
    url = 'http://theunderfold.tumblr.com'
3956
3957
3958
class LolNeinTumblr(GenericTumblrV1):
3959
    """Class to retrieve Lol Nein comics."""
3960
    # Also on http://lolnein.com
3961
    name = 'lolnein-tumblr'
3962
    long_name = 'Lol Nein (from Tumblr)'
3963
    url = 'http://lolneincom.tumblr.com'
3964
3965
3966
class FatAwesomeComicsTumblr(GenericTumblrV1):
3967
    """Class to retrieve Fat Awesome Comics."""
3968
    # Also on http://fatawesome.com/comics
3969
    name = 'fatawesome-tumblr'
3970
    long_name = 'Fat Awesome (from Tumblr)'
3971
    url = 'http://fatawesomecomedy.tumblr.com'
3972
3973
3974
class TheWorldIsFlatTumblr(GenericTumblrV1):
3975
    """Class to retrieve The World Is Flat Comics."""
3976
    # Also on https://tapastic.com/series/The-World-is-Flat
3977
    name = 'flatworld-tumblr'
3978
    long_name = 'The World Is Flat (from Tumblr)'
3979
    url = 'http://theworldisflatcomics.com'
3980
3981
3982
class DorrisMc(GenericTumblrV1):
3983
    """Class to retrieve Dorris Mc Comics"""
3984
    # Also on http://www.gocomics.com/dorris-mccomics
3985
    name = 'dorrismc'
3986
    long_name = 'Dorris Mc'
3987
    url = 'http://dorrismccomics.com'
3988
3989
3990
class LeleozTumblr(GenericEmptyComic, GenericTumblrV1):
3991
    """Class to retrieve Leleoz comics."""
3992
    # Also on https://tapastic.com/series/Leleoz
3993
    name = 'leleoz-tumblr'
3994
    long_name = 'Leleoz (from Tumblr)'
3995
    url = 'http://leleozcomics.tumblr.com'
3996
3997
3998
class MoonBeardTumblr(GenericTumblrV1):
3999
    """Class to retrieve MoonBeard comics."""
4000
    # Also on http://moonbeard.com
4001
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
4002
    name = 'moonbeard-tumblr'
4003
    long_name = 'Moon Beard (from Tumblr)'
4004
    url = 'http://blog.squiresjam.es'
4005
4006
4007
class AComik(GenericTumblrV1):
4008
    """Class to retrieve A Comik"""
4009
    name = 'comik'
4010
    long_name = 'A Comik'
4011
    url = 'http://acomik.com'
4012
4013
4014
class ClassicRandy(GenericTumblrV1):
4015
    """Class to retrieve Classic Randy comics."""
4016
    name = 'randy'
4017
    long_name = 'Classic Randy'
4018
    url = 'http://classicrandy.tumblr.com'
4019
4020
4021
class DagssonTumblr(GenericTumblrV1):
4022
    """Class to retrieve Dagsson comics."""
4023
    # Also on http://www.dagsson.com
4024
    name = 'dagsson-tumblr'
4025
    long_name = 'Dagsson Hugleikur (from Tumblr)'
4026
    url = 'https://hugleikurdagsson.tumblr.com'
4027
4028
4029
class LinsEditionsTumblr(GenericTumblrV1):
4030
    """Class to retrieve L.I.N.S. Editions comics."""
4031
    # Also on https://linsedition.com
4032
    # Now on http://warandpeas.tumblr.com
4033
    name = 'lins-tumblr'
4034
    long_name = 'L.I.N.S. Editions (from Tumblr)'
4035
    url = 'https://linscomics.tumblr.com'
4036
    _categories = ('LINS', )
4037
4038
4039
class WarAndPeasTumblr(GenericTumblrV1):
4040
    """Class to retrieve War And Peas comics."""
4041
    # Was on https://linscomics.tumblr.com
4042
    name = 'warandpeas-tumblr'
4043
    long_name = 'War And Peas (from Tumblr)'
4044
    url = 'http://warandpeas.tumblr.com'
4045
    _categories = ('WARANDPEAS', )
4046
4047
4048
class OrigamiHotDish(GenericTumblrV1):
4049
    """Class to retrieve Origami Hot Dish comics."""
4050
    name = 'origamihotdish'
4051
    long_name = 'Origami Hot Dish'
4052
    url = 'http://origamihotdish.com'
4053
4054
4055
class HitAndMissComicsTumblr(GenericTumblrV1):
4056
    """Class to retrieve Hit and Miss Comics."""
4057
    name = 'hitandmiss'
4058
    long_name = 'Hit and Miss Comics'
4059
    url = 'https://hitandmisscomics.tumblr.com'
4060
4061
4062
class HMBlanc(GenericTumblrV1):
4063
    """Class to retrieve HM Blanc comics."""
4064
    name = 'hmblanc'
4065
    long_name = 'HM Blanc'
4066
    url = 'http://hmblanc.tumblr.com'
4067
4068
4069
class TalesOfAbsurdityTumblr(GenericTumblrV1):
4070
    """Class to retrieve Tales Of Absurdity comics."""
4071
    # Also on http://talesofabsurdity.com
4072
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
4073
    name = 'absurdity-tumblr'
4074
    long_name = 'Tales of Absurdity (from Tumblr)'
4075
    url = 'http://talesofabsurdity.tumblr.com'
4076
    _categories = ('ABSURDITY', )
4077
4078
4079
class RobbieAndBobby(GenericTumblrV1):
4080
    """Class to retrieve Robbie And Bobby comics."""
4081
    # Also on http://robbieandbobby.com
4082
    name = 'robbie-tumblr'
4083
    long_name = 'Robbie And Bobby (from Tumblr)'
4084
    url = 'http://robbieandbobby.tumblr.com'
4085
4086
4087
class ElectricBunnyComicTumblr(GenericTumblrV1):
4088
    """Class to retrieve Electric Bunny Comics."""
4089
    # Also on http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell
4090
    name = 'bunny-tumblr'
4091
    long_name = 'Electric Bunny Comic (from Tumblr)'
4092
    url = 'http://electricbunnycomics.tumblr.com'
4093
4094
4095
class Hoomph(GenericTumblrV1):
4096
    """Class to retrieve Hoomph comics."""
4097
    name = 'hoomph'
4098
    long_name = 'Hoomph'
4099
    url = 'http://hoom.ph'
4100
4101
4102
class BFGFSTumblr(GenericTumblrV1):
4103
    """Class to retrieve BFGFS comics."""
4104
    # Also on https://tapastic.com/series/BFGFS
4105
    # Also on http://bfgfs.com
4106
    name = 'bfgfs-tumblr'
4107
    long_name = 'BFGFS (from Tumblr)'
4108
    url = 'https://bfgfs.tumblr.com'
4109
4110
4111
class DoodleForFood(GenericTumblrV1):
4112
    """Class to retrieve Doodle For Food comics."""
4113
    # Also on https://tapastic.com/series/Doodle-for-Food
4114
    name = 'doodle'
4115
    long_name = 'Doodle For Food'
4116
    url = 'http://www.doodleforfood.com'
4117
4118
4119
class CassandraCalinTumblr(GenericTumblrV1):
4120
    """Class to retrieve C. Cassandra comics."""
4121
    # Also on http://cassandracalin.com
4122
    # Also on https://tapastic.com/series/C-Cassandra-comics
4123
    name = 'cassandra-tumblr'
4124
    long_name = 'Cassandra Calin (from Tumblr)'
4125
    url = 'http://c-cassandra.tumblr.com'
4126
4127
4128
class DougWasTaken(GenericTumblrV1):
4129
    """Class to retrieve Doug Was Taken comics."""
4130
    name = 'doug'
4131
    long_name = 'Doug Was Taken'
4132
    url = 'https://dougwastaken.tumblr.com'
4133
4134
4135
class MandatoryRollerCoaster(GenericTumblrV1):
4136
    """Class to retrieve Mandatory Roller Coaster comics."""
4137
    name = 'rollercoaster'
4138
    long_name = 'Mandatory Roller Coaster'
4139
    url = 'http://mandatoryrollercoaster.com'
4140
4141
4142
class CEstPasEnRegardantSesPompes(GenericTumblrV1):
4143
    """Class to retrieve C'Est Pas En Regardant Ses Pompes (...)  comics."""
4144
    name = 'cperspqccltt'
4145
    long_name = 'C Est Pas En Regardant Ses Pompes (...)'
4146
    url = 'http://cperspqccltt.tumblr.com'
4147
4148
4149
class TheGrohlTroll(GenericTumblrV1):
4150
    """Class to retrieve The Grohl Troll comics."""
4151
    name = 'grohltroll'
4152
    long_name = 'The Grohl Troll'
4153
    url = 'http://thegrohltroll.com'
4154
4155
4156
class WebcomicName(GenericTumblrV1):
4157
    """Class to retrieve Webcomic Name comics."""
4158
    name = 'webcomicname'
4159
    long_name = 'Webcomic Name'
4160
    url = 'http://webcomicname.com'
4161
4162
4163
class BooksOfAdam(GenericTumblrV1):
4164
    """Class to retrieve Books of Adam comics."""
4165
    # Also on http://www.booksofadam.com
4166
    name = 'booksofadam'
4167
    long_name = 'Books of Adam'
4168
    url = 'http://booksofadam.tumblr.com'
4169
4170
4171
class HarkAVagrant(GenericTumblrV1):
4172
    """Class to retrieve Hark A Vagrant comics."""
4173
    # Also on http://www.harkavagrant.com
4174
    name = 'hark-tumblr'
4175
    long_name = 'Hark A Vagrant (from Tumblr)'
4176
    url = 'http://beatonna.tumblr.com'
4177
4178
4179
class OurSuperAdventureTumblr(GenericTumblrV1):
4180
    """Class to retrieve Our Super Adventure comics."""
4181
    # Also on https://tapastic.com/series/Our-Super-Adventure
4182
    # Also on http://www.oursuperadventure.com
4183
    # http://sarahgraley.com
4184
    name = 'superadventure-tumblr'
4185
    long_name = 'Our Super Adventure (from Tumblr)'
4186
    url = 'http://sarahssketchbook.tumblr.com'
4187
4188
4189
class JakeLikesOnions(GenericTumblrV1):
4190
    """Class to retrieve Jake Likes Onions comics."""
4191
    name = 'jake'
4192
    long_name = 'Jake Likes Onions'
4193
    url = 'http://jakelikesonions.com'
4194
4195
4196
class InYourFaceCake(GenericTumblrV1):
4197
    """Class to retrieve In Your Face Cake comics."""
4198
    name = 'inyourfacecake-tumblr'
4199
    long_name = 'In Your Face Cake (from Tumblr)'
4200
    url = 'https://in-your-face-cake.tumblr.com'
4201
4202
4203
class Robospunk(GenericTumblrV1):
4204
    """Class to retrieve Robospunk comics."""
4205
    name = 'robospunk'
4206
    long_name = 'Robospunk'
4207
    url = 'http://robospunk.com'
4208
4209
4210
class BananaTwinky(GenericTumblrV1):
4211
    """Class to retrieve Banana Twinky comics."""
4212
    name = 'banana'
4213
    long_name = 'Banana Twinky'
4214
    url = 'https://bananatwinky.tumblr.com'
4215
4216
4217
class YesterdaysPopcornTumblr(GenericTumblrV1):
4218
    """Class to retrieve Yesterday's Popcorn comics."""
4219
    # Also on http://www.yesterdayspopcorn.com
4220
    # Also on https://tapastic.com/series/Yesterdays-Popcorn
4221
    name = 'popcorn-tumblr'
4222
    long_name = 'Yesterday\'s Popcorn (from Tumblr)'
4223
    url = 'http://yesterdayspopcorn.tumblr.com'
4224
4225
4226
class TwistedDoodles(GenericTumblrV1):
4227
    """Class to retrieve Twisted Doodles comics."""
4228
    name = 'twisted'
4229
    long_name = 'Twisted Doodles'
4230
    url = 'http://www.twisteddoodles.com'
4231
4232
4233
class UbertoolTumblr(GenericTumblrV1):
4234
    """Class to retrieve Ubertool comics."""
4235
    # Also on http://ubertoolcomic.com
4236
    # Also on https://tapastic.com/series/ubertool
4237
    name = 'ubertool-tumblr'
4238
    long_name = 'Ubertool (from Tumblr)'
4239
    url = 'https://ubertool.tumblr.com'
4240
    _categories = ('UBERTOOL', )
4241
4242
4243
class LittleLifeLinesTumblr(GenericTumblrV1):
4244
    """Class to retrieve Little Life Lines comics."""
4245
    # Also on http://www.littlelifelines.com
4246
    name = 'life-tumblr'
4247
    long_name = 'Little Life Lines (from Tumblr)'
4248
    url = 'https://little-life-lines.tumblr.com'
4249
4250
4251
class TheyCanTalk(GenericTumblrV1):
4252
    """Class to retrieve They Can Talk comics."""
4253
    name = 'theycantalk'
4254
    long_name = 'They Can Talk'
4255
    url = 'http://theycantalk.com'
4256
4257
4258
class Will5NeverCome(GenericTumblrV1):
4259
    """Class to retrieve Will 5:00 Never Come comics."""
4260
    name = 'will5'
4261
    long_name = 'Will 5:00 Never Come ?'
4262
    url = 'http://will5nevercome.com'
4263
4264
4265
class Sephko(GenericTumblrV1):
4266
    """Class to retrieve Sephko Comics."""
4267
    # Also on http://www.sephko.com
4268
    name = 'sephko'
4269
    long_name = 'Sephko'
4270
    url = 'https://sephko.tumblr.com'
4271
4272
4273
class BlazersAtDawn(GenericTumblrV1):
4274
    """Class to retrieve Blazers At Dawn Comics."""
4275
    name = 'blazers'
4276
    long_name = 'Blazers At Dawn'
4277
    url = 'http://blazersatdawn.tumblr.com'
4278
4279
4280
class ArtByMoga(GenericEmptyComic, GenericTumblrV1):  # Deactivated because it downloads too many things
4281
    """Class to retrieve Art By Moga Comics."""
4282
    name = 'moga'
4283
    long_name = 'Art By Moga'
4284
    url = 'http://artbymoga.tumblr.com'
4285
4286
4287
class VerbalVomitTumblr(GenericTumblrV1):
4288
    """Class to retrieve Verbal Vomit comics."""
4289
    # Also on http://www.verbal-vomit.com
4290
    name = 'vomit-tumblr'
4291
    long_name = 'Verbal Vomit (from Tumblr)'
4292
    url = 'http://verbalvomits.tumblr.com'
4293
4294
4295
class LibraryComic(GenericTumblrV1):
4296
    """Class to retrieve LibraryComic."""
4297
    # Also on http://librarycomic.com
4298
    name = 'library-tumblr'
4299
    long_name = 'LibraryComic (from Tumblr)'
4300
    url = 'https://librarycomic.tumblr.com'
4301
4302
4303
class TizzyStitchBirdTumblr(GenericTumblrV1):
4304
    """Class to retrieve Tizzy Stitch Bird comics."""
4305
    # Also on http://tizzystitchbird.com
4306
    # Also on https://tapastic.com/series/TizzyStitchbird
4307
    # Also on http://www.webtoons.com/en/challenge/tizzy-stitchbird/list?title_no=50082
4308
    name = 'tizzy-tumblr'
4309
    long_name = 'Tizzy Stitch Bird (from Tumblr)'
4310
    url = 'http://tizzystitchbird.tumblr.com'
4311
4312 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
4313
class VictimsOfCircumsolarTumblr(GenericTumblrV1):
4314
    """Class to retrieve VictimsOfCircumsolar comics."""
4315
    # Also on http://www.victimsofcircumsolar.com
4316
    name = 'circumsolar-tumblr'
4317
    long_name = 'Victims Of Circumsolar (from Tumblr)'
4318
    url = 'https://victimsofcomics.tumblr.com'
4319
4320
4321
class RockPaperCynicTumblr(GenericTumblrV1):
4322
    """Class to retrieve RockPaperCynic comics."""
4323
    # Also on http://www.rockpapercynic.com
4324
    # Also on https://tapastic.com/series/rockpapercynic
4325
    name = 'rpc-tumblr'
4326
    long_name = 'Rock Paper Cynic (from Tumblr)'
4327
    url = 'http://rockpapercynic.tumblr.com'
4328
4329
4330
class DeadlyPanelTumblr(GenericTumblrV1):
4331
    """Class to retrieve Deadly Panel comics."""
4332
    # Also on http://www.deadlypanel.com
4333
    # Also on https://tapastic.com/series/deadlypanel
4334
    name = 'deadly-tumblr'
4335
    long_name = 'Deadly Panel (from Tumblr)'
4336
    url = 'https://deadlypanel.tumblr.com'
4337
4338
4339
class CatanaComics(GenericTumblrV1):
4340
    """Class to retrieve Catana comics."""
4341
    name = 'catana'
4342
    long_name = 'Catana'
4343
    url = 'http://www.catanacomics.com'
4344
4345
4346
class AngryAtNothingTumblr(GenericTumblrV1):
4347
    """Class to retrieve Angry at Nothing comics."""
4348
    # Also on http://www.angryatnothing.net
4349
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
4350
    name = 'angry-tumblr'
4351
    long_name = 'Angry At Nothing (from Tumblr)'
4352
    url = 'http://angryatnothing.tumblr.com'
4353
4354
4355
class ShanghaiTango(GenericTumblrV1):
4356
    """Class to retrieve Shanghai Tango comic."""
4357
    name = 'tango'
4358
    long_name = 'Shanghai Tango'
4359
    url = 'http://tango2010weibo.tumblr.com'
4360
4361
4362
class OffTheLeashDogTumblr(GenericTumblrV1):
4363
    """Class to retrieve Off The Leash Dog comics."""
4364
    # Also on http://offtheleashdogcartoons.com
4365
    # Also on http://www.rupertfawcettcartoons.com
4366
    name = 'offtheleash-tumblr'
4367
    long_name = 'Off The Leash Dog (from Tumblr)'
4368
    url = 'http://rupertfawcettsdoggyblog.tumblr.com'
4369
    _categories = ('FAWCETT', )
4370
4371
4372
class ImogenQuestTumblr(GenericTumblrV1):
4373
    """Class to retrieve Imogen Quest comics."""
4374
    # Also on http://imogenquest.net
4375
    name = 'imogen-tumblr'
4376
    long_name = 'Imogen Quest (from Tumblr)'
4377
    url = 'http://imoquest.tumblr.com'
4378
4379
4380
class Shitfest(GenericTumblrV1):
4381
    """Class to retrieve Shitfest comics."""
4382
    name = 'shitfest'
4383
    long_name = 'Shitfest'
4384
    url = 'http://shitfestcomic.com'
4385
4386
4387
class IceCreamSandwichComics(GenericTumblrV1):
4388
    """Class to retrieve Ice Cream Sandwich Comics."""
4389
    name = 'icecream'
4390
    long_name = 'Ice Cream Sandwich Comics'
4391
    url = 'http://icecreamsandwichcomics.com'
4392
4393
4394
class Dustinteractive(GenericTumblrV1):
4395
    """Class to retrieve Dustinteractive comics."""
4396
    name = 'dustinteractive'
4397
    long_name = 'Dustinteractive'
4398
    url = 'http://dustinteractive.com'
4399
4400
4401
class StickyCinemaFloor(GenericTumblrV1):
4402
    """Class to retrieve Sticky Cinema Floor comics."""
4403
    name = 'stickycinema'
4404
    long_name = 'Sticky Cinema Floor'
4405
    url = 'https://stickycinemafloor.tumblr.com'
4406
4407
4408
class HorovitzComics(GenericListableComic):
4409
    """Generic class to handle the logic common to the different comics from Horovitz."""
4410
    url = 'http://www.horovitzcomics.com'
4411
    _categories = ('HOROVITZ', )
4412
    img_re = re.compile('.*comics/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
4413
    link_re = NotImplemented
4414
    get_url_from_archive_element = join_cls_url_to_href
4415
4416
    @classmethod
4417
    def get_comic_info(cls, soup, link):
4418
        """Get information about a particular comics."""
4419
        href = link['href']
4420
        num = int(cls.link_re.match(href).groups()[0])
4421
        title = link.string
4422
        imgs = soup.find_all('img', id='comic')
4423
        assert len(imgs) == 1
4424
        year, month, day = [int(s)
4425
                            for s in cls.img_re.match(imgs[0]['src']).groups()]
4426
        return {
4427
            'title': title,
4428
            'day': day,
4429
            'month': month,
4430
            'year': year,
4431
            'img': [i['src'] for i in imgs],
4432
            'num': num,
4433
        }
4434
4435
    @classmethod
4436
    def get_archive_elements(cls):
4437
        archive_url = 'http://www.horovitzcomics.com/comics/archive/'
4438
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.link_re))
4439
4440
4441
class HorovitzNew(HorovitzComics):
4442
    """Class to retrieve Horovitz new comics."""
4443
    name = 'horovitznew'
4444
    long_name = 'Horovitz New'
4445
    link_re = re.compile('^/comics/new/([0-9]+)$')
4446
4447
4448
class HorovitzClassic(HorovitzComics):
4449
    """Class to retrieve Horovitz classic comics."""
4450
    name = 'horovitzclassic'
4451
    long_name = 'Horovitz Classic'
4452
    link_re = re.compile('^/comics/classic/([0-9]+)$')
4453
4454
4455
class GenericGoComic(GenericNavigableComic):
4456
    """Generic class to handle the logic common to comics from gocomics.com."""
4457
    _categories = ('GOCOMIC', )
4458
4459
    @classmethod
4460
    def get_first_comic_link(cls):
4461
        """Get link to first comics."""
4462
        return get_soup_at_url(cls.url).find('a', class_='fa btn btn-outline-default btn-circle fa-backward sm ')
4463
4464
    @classmethod
4465
    def get_navi_link(cls, last_soup, next_):
4466
        """Get link to next or previous comic."""
4467
        PREV = 'fa btn btn-outline-default btn-circle fa-caret-left sm '
4468
        NEXT = 'fa btn btn-outline-default btn-circle fa-caret-right hidden-sm-up sm '
4469
        return last_soup.find('a', class_=NEXT if next_ else PREV)
4470
4471
    @classmethod
4472
    def get_url_from_link(cls, link):
4473
        gocomics = 'http://www.gocomics.com'
4474
        return urljoin_wrapper(gocomics, link['href'])
4475
4476
    @classmethod
4477
    def get_comic_info(cls, soup, link):
4478
        """Get information about a particular comics."""
4479
        date_str = soup.find('meta', property='article:published_time')['content']
4480
        day = string_to_date(date_str, "%Y-%m-%d")
4481
        imgs = soup.find('picture', class_='img-fluid item-comic-image').find_all('img')
4482
        author = soup.find('meta', property='article:author')['content']
4483
        tags = soup.find('meta', property='article:tag')['content']
4484
        return {
4485
            'day': day.day,
4486
            'month': day.month,
4487
            'year': day.year,
4488
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
4489
            'author': author,
4490
            'tags': tags,
4491
        }
4492
4493
4494
class PearlsBeforeSwine(GenericGoComic):
4495
    """Class to retrieve Pearls Before Swine comics."""
4496
    name = 'pearls'
4497
    long_name = 'Pearls Before Swine'
4498
    url = 'http://www.gocomics.com/pearlsbeforeswine'
4499
4500
4501
class Peanuts(GenericGoComic):
4502
    """Class to retrieve Peanuts comics."""
4503
    name = 'peanuts'
4504
    long_name = 'Peanuts'
4505
    url = 'http://www.gocomics.com/peanuts'
4506
4507
4508
class MattWuerker(GenericGoComic):
4509
    """Class to retrieve Matt Wuerker comics."""
4510
    name = 'wuerker'
4511
    long_name = 'Matt Wuerker'
4512
    url = 'http://www.gocomics.com/mattwuerker'
4513
4514
4515
class TomToles(GenericGoComic):
4516
    """Class to retrieve Tom Toles comics."""
4517
    name = 'toles'
4518
    long_name = 'Tom Toles'
4519
    url = 'http://www.gocomics.com/tomtoles'
4520
4521
4522
class BreakOfDay(GenericGoComic):
4523
    """Class to retrieve Break Of Day comics."""
4524
    name = 'breakofday'
4525
    long_name = 'Break Of Day'
4526
    url = 'http://www.gocomics.com/break-of-day'
4527
4528
4529
class Brevity(GenericGoComic):
4530
    """Class to retrieve Brevity comics."""
4531
    name = 'brevity'
4532
    long_name = 'Brevity'
4533
    url = 'http://www.gocomics.com/brevitypanel'
4534
4535
4536
class MichaelRamirez(GenericGoComic):
4537
    """Class to retrieve Michael Ramirez comics."""
4538
    name = 'ramirez'
4539
    long_name = 'Michael Ramirez'
4540
    url = 'http://www.gocomics.com/michaelramirez'
4541
4542
4543
class MikeLuckovich(GenericGoComic):
4544
    """Class to retrieve Mike Luckovich comics."""
4545
    name = 'luckovich'
4546
    long_name = 'Mike Luckovich'
4547
    url = 'http://www.gocomics.com/mikeluckovich'
4548
4549
4550
class JimBenton(GenericGoComic):
4551
    """Class to retrieve Jim Benton comics."""
4552
    # Also on http://jimbenton.tumblr.com
4553
    name = 'benton'
4554
    long_name = 'Jim Benton'
4555
    url = 'http://www.gocomics.com/jim-benton-cartoons'
4556
4557
4558
class TheArgyleSweater(GenericGoComic):
4559
    """Class to retrieve the Argyle Sweater comics."""
4560
    name = 'argyle'
4561
    long_name = 'Argyle Sweater'
4562
    url = 'http://www.gocomics.com/theargylesweater'
4563
4564
4565
class SunnyStreet(GenericGoComic):
4566
    """Class to retrieve Sunny Street comics."""
4567
    # Also on http://www.sunnystreetcomics.com
4568
    name = 'sunny'
4569
    long_name = 'Sunny Street'
4570
    url = 'http://www.gocomics.com/sunny-street'
4571
4572
4573
class OffTheMark(GenericGoComic):
4574
    """Class to retrieve Off The Mark comics."""
4575
    # Also on https://www.offthemark.com
4576
    name = 'offthemark'
4577
    long_name = 'Off The Mark'
4578
    url = 'http://www.gocomics.com/offthemark'
4579
4580
4581
class WuMo(GenericGoComic):
4582
    """Class to retrieve WuMo comics."""
4583
    # Also on http://wumo.com
4584
    name = 'wumo'
4585
    long_name = 'WuMo'
4586
    url = 'http://www.gocomics.com/wumo'
4587
4588
4589
class LunarBaboon(GenericGoComic):
4590
    """Class to retrieve Lunar Baboon comics."""
4591
    # Also on http://www.lunarbaboon.com
4592
    # Also on https://tapastic.com/series/Lunarbaboon
4593
    name = 'lunarbaboon'
4594
    long_name = 'Lunar Baboon'
4595
    url = 'http://www.gocomics.com/lunarbaboon'
4596
4597
4598
class SandersenGocomic(GenericGoComic):
4599
    """Class to retrieve Sarah Andersen comics."""
4600
    # Also on http://sarahcandersen.com
4601
    # Also on http://tapastic.com/series/Doodle-Time
4602
    name = 'sandersen-goc'
4603
    long_name = 'Sarah Andersen (from GoComics)'
4604
    url = 'http://www.gocomics.com/sarahs-scribbles'
4605
4606
4607
class SaturdayMorningBreakfastCerealGoComic(GenericGoComic):
4608
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
4609
    # Also on http://smbc-comics.tumblr.com
4610
    # Also on http://www.smbc-comics.com
4611
    name = 'smbc-goc'
4612
    long_name = 'Saturday Morning Breakfast Cereal (from GoComics)'
4613
    url = 'http://www.gocomics.com/saturday-morning-breakfast-cereal'
4614
    _categories = ('SMBC', )
4615
4616
4617
class CalvinAndHobbesGoComic(GenericGoComic):
4618
    """Class to retrieve Calvin and Hobbes comics."""
4619
    # From gocomics, not http://marcel-oehler.marcellosendos.ch/comics/ch/
4620
    name = 'calvin-goc'
4621
    long_name = 'Calvin and Hobbes (from GoComics)'
4622
    url = 'http://www.gocomics.com/calvinandhobbes'
4623
4624
4625
class RallGoComic(GenericGoComic):
4626
    """Class to retrieve Ted Rall comics."""
4627
    # Also on http://rall.com/comic
4628
    name = 'rall-goc'
4629
    long_name = "Ted Rall (from GoComics)"
4630
    url = "http://www.gocomics.com/ted-rall"
4631
    _categories = ('RALL', )
4632
4633
4634
class TheAwkwardYetiGoComic(GenericGoComic):
4635
    """Class to retrieve The Awkward Yeti comics."""
4636
    # Also on http://larstheyeti.tumblr.com
4637
    # Also on http://theawkwardyeti.com
4638
    # Also on https://tapastic.com/series/TheAwkwardYeti
4639
    name = 'yeti-goc'
4640
    long_name = 'The Awkward Yeti (from GoComics)'
4641
    url = 'http://www.gocomics.com/the-awkward-yeti'
4642
    _categories = ('YETI', )
4643
4644
4645
class BerkeleyMewsGoComics(GenericGoComic):
4646
    """Class to retrieve Berkeley Mews comics."""
4647
    # Also on http://mews.tumblr.com
4648
    # Also on http://www.berkeleymews.com
4649
    name = 'berkeley-goc'
4650
    long_name = 'Berkeley Mews (from GoComics)'
4651
    url = 'http://www.gocomics.com/berkeley-mews'
4652
    _categories = ('BERKELEY', )
4653
4654
4655
class SheldonGoComics(GenericGoComic):
4656
    """Class to retrieve Sheldon comics."""
4657
    # Also on http://www.sheldoncomics.com
4658
    name = 'sheldon-goc'
4659
    long_name = 'Sheldon Comics (from GoComics)'
4660
    url = 'http://www.gocomics.com/sheldon'
4661
4662
4663
class FowlLanguageGoComics(GenericGoComic):
4664
    """Class to retrieve Fowl Language comics."""
4665
    # Also on http://www.fowllanguagecomics.com
4666
    # Also on http://tapastic.com/series/Fowl-Language-Comics
4667
    # Also on http://fowllanguagecomics.tumblr.com
4668
    name = 'fowllanguage-goc'
4669
    long_name = 'Fowl Language Comics (from GoComics)'
4670
    url = 'http://www.gocomics.com/fowl-language'
4671
    _categories = ('FOWLLANGUAGE', )
4672
4673
4674
class NickAnderson(GenericGoComic):
4675
    """Class to retrieve Nick Anderson comics."""
4676
    name = 'nickanderson'
4677
    long_name = 'Nick Anderson'
4678
    url = 'http://www.gocomics.com/nickanderson'
4679
4680
4681
class GarfieldGoComics(GenericGoComic):
4682
    """Class to retrieve Garfield comics."""
4683
    # Also on http://garfield.com
4684
    name = 'garfield-goc'
4685
    long_name = 'Garfield (from GoComics)'
4686
    url = 'http://www.gocomics.com/garfield'
4687
    _categories = ('GARFIELD', )
4688
4689
4690
class DorrisMcGoComics(GenericGoComic):
4691
    """Class to retrieve Dorris Mc Comics"""
4692
    # Also on http://dorrismccomics.com
4693
    name = 'dorrismc-goc'
4694
    long_name = 'Dorris Mc (from GoComics)'
4695
    url = 'http://www.gocomics.com/dorris-mccomics'
4696
4697
4698
class FoxTrot(GenericGoComic):
4699
    """Class to retrieve FoxTrot comics."""
4700
    name = 'foxtrot'
4701
    long_name = 'FoxTrot'
4702
    url = 'http://www.gocomics.com/foxtrot'
4703
4704
4705
class FoxTrotClassics(GenericGoComic):
4706
    """Class to retrieve FoxTrot Classics comics."""
4707
    name = 'foxtrot-classics'
4708
    long_name = 'FoxTrot Classics'
4709
    url = 'http://www.gocomics.com/foxtrotclassics'
4710
4711
4712
class MisterAndMeGoComics(GenericEmptyComic, GenericGoComic):  # Removed ?
4713
    """Class to retrieve Mister & Me Comics."""
4714
    # Also on http://www.mister-and-me.com
4715
    # Also on https://tapastic.com/series/Mister-and-Me
4716
    name = 'mister-goc'
4717
    long_name = 'Mister & Me (from GoComics)'
4718
    url = 'http://www.gocomics.com/mister-and-me'
4719
4720
4721
class NonSequitur(GenericGoComic):
4722
    """Class to retrieve Non Sequitur (Wiley Miller) comics."""
4723
    name = 'nonsequitur'
4724
    long_name = 'Non Sequitur'
4725
    url = 'http://www.gocomics.com/nonsequitur'
4726
4727
4728
class GenericTapasticComic(GenericListableComic):
4729
    """Generic class to handle the logic common to comics from tapastic.com."""
4730
    _categories = ('TAPASTIC', )
4731
4732
    @classmethod
4733
    def get_comic_info(cls, soup, archive_elt):
4734
        """Get information about a particular comics."""
4735
        timestamp = int(archive_elt['publishDate']) / 1000.0
4736
        day = datetime.datetime.fromtimestamp(timestamp).date()
4737
        imgs = soup.find_all('img', class_='art-image')
4738
        if not imgs:
4739
            print("Comic %s is being uploaded, retry later" % cls.get_url_from_archive_element(archive_elt))
4740
            return None
4741
        assert len(imgs) > 0
4742
        return {
4743
            'day': day.day,
4744
            'year': day.year,
4745
            'month': day.month,
4746
            'img': [i['src'] for i in imgs],
4747
            'title': archive_elt['title'],
4748
        }
4749
4750
    @classmethod
4751
    def get_url_from_archive_element(cls, archive_elt):
4752
        return 'http://tapastic.com/episode/' + str(archive_elt['id'])
4753
4754
    @classmethod
4755
    def get_archive_elements(cls):
4756
        pref, suff = 'episodeList : ', ','
4757
        # Information is stored in the javascript part
4758
        # I don't know the clean way to get it so this is the ugly way.
4759
        string = [s[len(pref):-len(suff)] for s in (s.decode('utf-8').strip() for s in urlopen_wrapper(cls.url).readlines()) if s.startswith(pref) and s.endswith(suff)][0]
4760
        return json.loads(string)
4761
4762
4763
class VegetablesForDessert(GenericTapasticComic):
4764
    """Class to retrieve Vegetables For Dessert comics."""
4765
    # Also on http://vegetablesfordessert.tumblr.com
4766
    name = 'vegetables'
4767
    long_name = 'Vegetables For Dessert'
4768
    url = 'http://tapastic.com/series/vegetablesfordessert'
4769
4770
4771
class FowlLanguageTapa(GenericTapasticComic):
4772
    """Class to retrieve Fowl Language comics."""
4773
    # Also on http://www.fowllanguagecomics.com
4774
    # Also on http://fowllanguagecomics.tumblr.com
4775
    # Also on http://www.gocomics.com/fowl-language
4776
    name = 'fowllanguage-tapa'
4777
    long_name = 'Fowl Language Comics (from Tapastic)'
4778
    url = 'http://tapastic.com/series/Fowl-Language-Comics'
4779
    _categories = ('FOWLLANGUAGE', )
4780
4781
4782
class OscillatingProfundities(GenericTapasticComic):
4783
    """Class to retrieve Oscillating Profundities comics."""
4784
    name = 'oscillating'
4785
    long_name = 'Oscillating Profundities'
4786
    url = 'http://tapastic.com/series/oscillatingprofundities'
4787
4788
4789
class ZnoflatsComics(GenericTapasticComic):
4790
    """Class to retrieve Znoflats comics."""
4791
    name = 'znoflats'
4792
    long_name = 'Znoflats Comics'
4793
    url = 'http://tapastic.com/series/Znoflats-Comics'
4794
4795
4796
class SandersenTapastic(GenericTapasticComic):
4797
    """Class to retrieve Sarah Andersen comics."""
4798
    # Also on http://sarahcandersen.com
4799
    # Also on http://www.gocomics.com/sarahs-scribbles
4800
    name = 'sandersen-tapa'
4801
    long_name = 'Sarah Andersen (from Tapastic)'
4802
    url = 'http://tapastic.com/series/Doodle-Time'
4803
4804
4805
class TubeyToonsTapastic(GenericTapasticComic):
4806
    """Class to retrieve TubeyToons comics."""
4807
    # Also on http://tubeytoons.com
4808
    # Also on https://tubeytoons.tumblr.com
4809
    name = 'tubeytoons-tapa'
4810
    long_name = 'Tubey Toons (from Tapastic)'
4811
    url = 'http://tapastic.com/series/Tubey-Toons'
4812
    _categories = ('TUNEYTOONS', )
4813
4814
4815
class AnythingComicTapastic(GenericTapasticComic):
4816
    """Class to retrieve Anything Comics."""
4817
    # Also on http://www.anythingcomic.com
4818
    name = 'anythingcomic-tapa'
4819
    long_name = 'Anything Comic (from Tapastic)'
4820
    url = 'http://tapastic.com/series/anything'
4821
4822
4823
class UnearthedComicsTapastic(GenericTapasticComic):
4824
    """Class to retrieve Unearthed comics."""
4825
    # Also on http://unearthedcomics.com
4826
    # Also on https://unearthedcomics.tumblr.com
4827
    name = 'unearthed-tapa'
4828
    long_name = 'Unearthed Comics (from Tapastic)'
4829
    url = 'http://tapastic.com/series/UnearthedComics'
4830
    _categories = ('UNEARTHED', )
4831
4832
4833
class EverythingsStupidTapastic(GenericTapasticComic):
4834
    """Class to retrieve Everything's stupid Comics."""
4835
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
4836
    # Also on http://everythingsstupid.net
4837
    name = 'stupid-tapa'
4838
    long_name = "Everything's Stupid (from Tapastic)"
4839
    url = 'http://tapastic.com/series/EverythingsStupid'
4840
4841
4842
class JustSayEhTapastic(GenericTapasticComic):
4843
    """Class to retrieve Just Say Eh comics."""
4844
    # Also on http://www.justsayeh.com
4845
    name = 'justsayeh-tapa'
4846
    long_name = 'Just Say Eh (from Tapastic)'
4847
    url = 'http://tapastic.com/series/Just-Say-Eh'
4848
4849
4850
class ThorsThundershackTapastic(GenericTapasticComic):
4851
    """Class to retrieve Thor's Thundershack comics."""
4852
    # Also on http://www.thorsthundershack.com
4853
    name = 'thor-tapa'
4854
    long_name = 'Thor\'s Thundershack (from Tapastic)'
4855
    url = 'http://tapastic.com/series/Thors-Thundershac'
4856
    _categories = ('THOR', )
4857
4858
4859
class OwlTurdTapastic(GenericTapasticComic):
4860
    """Class to retrieve Owl Turd comics."""
4861
    # Also on http://owlturd.com
4862
    name = 'owlturd-tapa'
4863
    long_name = 'Owl Turd (from Tapastic)'
4864
    url = 'http://tapastic.com/series/Owl-Turd-Comix'
4865
    _categories = ('OWLTURD', )
4866
4867
4868
class GoneIntoRaptureTapastic(GenericTapasticComic):
4869
    """Class to retrieve Gone Into Rapture comics."""
4870
    # Also on http://goneintorapture.tumblr.com
4871
    # Also on http://goneintorapture.com
4872
    name = 'rapture-tapa'
4873
    long_name = 'Gone Into Rapture (from Tapastic)'
4874
    url = 'http://tapastic.com/series/Goneintorapture'
4875
4876
4877
class HeckIfIKnowComicsTapa(GenericTapasticComic):
4878
    """Class to retrieve Heck If I Know Comics."""
4879
    # Also on http://heckifiknowcomics.com
4880
    name = 'heck-tapa'
4881
    long_name = 'Heck if I Know comics (from Tapastic)'
4882
    url = 'http://tapastic.com/series/Regular'
4883
4884
4885
class CheerUpEmoKidTapa(GenericTapasticComic):
4886
    """Class to retrieve CheerUpEmoKid comics."""
4887
    # Also on http://www.cheerupemokid.com
4888
    # Also on https://enzocomics.tumblr.com
4889
    name = 'cuek-tapa'
4890
    long_name = 'Cheer Up Emo Kid (from Tapastic)'
4891
    url = 'http://tapastic.com/series/CUEK'
4892
4893
4894
class BigFootJusticeTapa(GenericTapasticComic):
4895
    """Class to retrieve Big Foot Justice comics."""
4896
    # Also on http://bigfootjustice.com
4897
    name = 'bigfoot-tapa'
4898
    long_name = 'Big Foot Justice (from Tapastic)'
4899
    url = 'http://tapastic.com/series/bigfoot-justice'
4900
4901
4902
class UpAndOutTapa(GenericTapasticComic):
4903
    """Class to retrieve Up & Out comics."""
4904
    # Also on http://upandoutcomic.tumblr.com
4905
    name = 'upandout-tapa'
4906
    long_name = 'Up And Out (from Tapastic)'
4907
    url = 'http://tapastic.com/series/UP-and-OUT'
4908
4909
4910
class ToonHoleTapa(GenericTapasticComic):
4911
    """Class to retrieve Toon Holes comics."""
4912
    # Also on http://www.toonhole.com
4913
    name = 'toonhole-tapa'
4914
    long_name = 'Toon Hole (from Tapastic)'
4915
    url = 'http://tapastic.com/series/TOONHOLE'
4916
4917
4918
class AngryAtNothingTapa(GenericTapasticComic):
4919
    """Class to retrieve Angry at Nothing comics."""
4920
    # Also on http://www.angryatnothing.net
4921
    # Also on http://angryatnothing.tumblr.com
4922
    name = 'angry-tapa'
4923
    long_name = 'Angry At Nothing (from Tapastic)'
4924
    url = 'http://tapastic.com/series/Comics-yeah-definitely-comics-'
4925
4926
4927
class LeleozTapa(GenericTapasticComic):
4928
    """Class to retrieve Leleoz comics."""
4929
    # Also on http://leleozcomics.tumblr.com
4930
    name = 'leleoz-tapa'
4931
    long_name = 'Leleoz (from Tapastic)'
4932
    url = 'https://tapastic.com/series/Leleoz'
4933
4934
4935
class TheAwkwardYetiTapa(GenericTapasticComic):
4936
    """Class to retrieve The Awkward Yeti comics."""
4937
    # Also on http://www.gocomics.com/the-awkward-yeti
4938
    # Also on http://theawkwardyeti.com
4939
    # Also on http://larstheyeti.tumblr.com
4940
    name = 'yeti-tapa'
4941
    long_name = 'The Awkward Yeti (from Tapastic)'
4942
    url = 'https://tapastic.com/series/TheAwkwardYeti'
4943
    _categories = ('YETI', )
4944
4945
4946
class AsPerUsualTapa(GenericTapasticComic):
4947
    """Class to retrieve As Per Usual comics."""
4948
    # Also on http://as-per-usual.tumblr.com
4949
    name = 'usual-tapa'
4950
    long_name = 'As Per Usual (from Tapastic)'
4951
    url = 'https://tapastic.com/series/AsPerUsual'
4952
    categories = ('DAMILEE', )
4953
4954
4955
class HotComicsForCoolPeopleTapa(GenericTapasticComic):
4956
    """Class to retrieve Hot Comics For Cool People."""
4957
    # Also on http://hotcomicsforcoolpeople.tumblr.com
4958
    # Also on http://hotcomics.biz (links to tumblr)
4959
    # Also on http://hcfcp.com (links to tumblr)
4960
    name = 'hotcomics-tapa'
4961
    long_name = 'Hot Comics For Cool People (from Tapastic)'
4962
    url = 'https://tapastic.com/series/Hot-Comics-For-Cool-People'
4963
    categories = ('DAMILEE', )
4964
4965
4966
class OneOneOneOneComicTapa(GenericTapasticComic):
4967
    """Class to retrieve 1111 Comics."""
4968
    # Also on http://www.1111comics.me
4969
    # Also on http://comics1111.tumblr.com
4970
    name = '1111-tapa'
4971
    long_name = '1111 Comics (from Tapastic)'
4972
    url = 'https://tapastic.com/series/1111-Comics'
4973
    _categories = ('ONEONEONEONE', )
4974
4975
4976
class TumbleDryTapa(GenericTapasticComic):
4977
    """Class to retrieve Tumble Dry comics."""
4978
    # Also on http://tumbledrycomics.com
4979
    name = 'tumbledry-tapa'
4980
    long_name = 'Tumblr Dry (from Tapastic)'
4981
    url = 'https://tapastic.com/series/TumbleDryComics'
4982
4983
4984
class DeadlyPanelTapa(GenericTapasticComic):
4985
    """Class to retrieve Deadly Panel comics."""
4986
    # Also on http://www.deadlypanel.com
4987
    # Also on https://deadlypanel.tumblr.com
4988
    name = 'deadly-tapa'
4989
    long_name = 'Deadly Panel (from Tapastic)'
4990
    url = 'https://tapastic.com/series/deadlypanel'
4991
4992
4993
class ChrisHallbeckMaxiTapa(GenericTapasticComic):
4994
    """Class to retrieve Chris Hallbeck comics."""
4995
    # Also on https://chrishallbeck.tumblr.com
4996
    # Also on http://maximumble.com
4997
    name = 'hallbeckmaxi-tapa'
4998
    long_name = 'Chris Hallback - Maximumble (from Tapastic)'
4999
    url = 'https://tapastic.com/series/Maximumble'
5000
    _categories = ('HALLBACK', )
5001
5002
5003
class ChrisHallbeckMiniTapa(GenericTapasticComic):
5004
    """Class to retrieve Chris Hallbeck comics."""
5005
    # Also on https://chrishallbeck.tumblr.com
5006
    # Also on http://minimumble.com
5007
    name = 'hallbeckmini-tapa'
5008
    long_name = 'Chris Hallback - Minimumble (from Tapastic)'
5009
    url = 'https://tapastic.com/series/Minimumble'
5010
    _categories = ('HALLBACK', )
5011
5012
5013
class ChrisHallbeckBiffTapa(GenericTapasticComic):
5014
    """Class to retrieve Chris Hallbeck comics."""
5015
    # Also on https://chrishallbeck.tumblr.com
5016
    # Also on http://thebookofbiff.com
5017
    name = 'hallbeckbiff-tapa'
5018
    long_name = 'Chris Hallback - The Book of Biff (from Tapastic)'
5019
    url = 'https://tapastic.com/series/Biff'
5020
    _categories = ('HALLBACK', )
5021
5022
5023
class RandoWisTapa(GenericTapasticComic):
5024
    """Class to retrieve RandoWis comics."""
5025
    # Also on https://randowis.com
5026
    name = 'randowis-tapa'
5027
    long_name = 'RandoWis (from Tapastic)'
5028
    url = 'https://tapastic.com/series/RandoWis'
5029
5030
5031
class PigeonGazetteTapa(GenericTapasticComic):
5032
    """Class to retrieve The Pigeon Gazette comics."""
5033
    # Also on http://thepigeongazette.tumblr.com
5034
    name = 'pigeon-tapa'
5035
    long_name = 'The Pigeon Gazette (from Tapastic)'
5036
    url = 'https://tapastic.com/series/The-Pigeon-Gazette'
5037
5038
5039
class TheOdd1sOutTapa(GenericTapasticComic):
5040
    """Class to retrieve The Odd 1s Out comics."""
5041
    # Also on http://theodd1sout.com
5042
    # Also on http://theodd1sout.tumblr.com
5043
    name = 'theodd-tapa'
5044
    long_name = 'The Odd 1s Out (from Tapastic)'
5045
    url = 'https://tapastic.com/series/Theodd1sout'
5046
5047
5048
class TheWorldIsFlatTapa(GenericTapasticComic):
5049
    """Class to retrieve The World Is Flat Comics."""
5050
    # Also on http://theworldisflatcomics.tumblr.com
5051
    name = 'flatworld-tapa'
5052
    long_name = 'The World Is Flat (from Tapastic)'
5053
    url = 'https://tapastic.com/series/The-World-is-Flat'
5054
5055
5056
class MisterAndMeTapa(GenericTapasticComic):
5057
    """Class to retrieve Mister & Me Comics."""
5058
    # Also on http://www.mister-and-me.com
5059
    # Also on http://www.gocomics.com/mister-and-me
5060
    name = 'mister-tapa'
5061
    long_name = 'Mister & Me (from Tapastic)'
5062
    url = 'https://tapastic.com/series/Mister-and-Me'
5063
5064
5065
class TalesOfAbsurdityTapa(GenericTapasticComic):
5066
    """Class to retrieve Tales Of Absurdity comics."""
5067
    # Also on http://talesofabsurdity.com
5068
    # Also on http://talesofabsurdity.tumblr.com
5069
    name = 'absurdity-tapa'
5070
    long_name = 'Tales of Absurdity (from Tapastic)'
5071
    url = 'http://tapastic.com/series/Tales-Of-Absurdity'
5072
    _categories = ('ABSURDITY', )
5073
5074
5075
class BFGFSTapa(GenericTapasticComic):
5076
    """Class to retrieve BFGFS comics."""
5077
    # Also on http://bfgfs.com
5078
    # Also on https://bfgfs.tumblr.com
5079
    name = 'bfgfs-tapa'
5080
    long_name = 'BFGFS (from Tapastic)'
5081
    url = 'https://tapastic.com/series/BFGFS'
5082
5083
5084
class DoodleForFoodTapa(GenericTapasticComic):
5085
    """Class to retrieve Doodle For Food comics."""
5086
    # Also on http://www.doodleforfood.com
5087
    name = 'doodle-tapa'
5088
    long_name = 'Doodle For Food (from Tapastic)'
5089
    url = 'https://tapastic.com/series/Doodle-for-Food'
5090
5091
5092
class MrLovensteinTapa(GenericTapasticComic):
5093
    """Class to retrieve Mr Lovenstein comics."""
5094
    # Also on  https://tapastic.com/series/MrLovenstein
5095
    name = 'mrlovenstein-tapa'
5096
    long_name = 'Mr. Lovenstein (from Tapastic)'
5097
    url = 'https://tapastic.com/series/MrLovenstein'
5098
5099
5100
class CassandraCalinTapa(GenericTapasticComic):
5101
    """Class to retrieve C. Cassandra comics."""
5102
    # Also on http://cassandracalin.com
5103
    # Also on http://c-cassandra.tumblr.com
5104
    name = 'cassandra-tapa'
5105
    long_name = 'Cassandra Calin (from Tapastic)'
5106
    url = 'https://tapastic.com/series/C-Cassandra-comics'
5107
5108
5109
class WafflesAndPancakes(GenericTapasticComic):
5110
    """Class to retrieve Waffles And Pancakes comics."""
5111
    # Also on http://wandpcomic.com
5112
    name = 'waffles'
5113
    long_name = 'Waffles And Pancakes'
5114
    url = 'https://tapastic.com/series/Waffles-and-Pancakes'
5115
5116
5117
class YesterdaysPopcornTapastic(GenericTapasticComic):
5118
    """Class to retrieve Yesterday's Popcorn comics."""
5119
    # Also on http://www.yesterdayspopcorn.com
5120
    # Also on http://yesterdayspopcorn.tumblr.com
5121
    name = 'popcorn-tapa'
5122
    long_name = 'Yesterday\'s Popcorn (from Tapastic)'
5123
    url = 'https://tapastic.com/series/Yesterdays-Popcorn'
5124
5125
5126
class OurSuperAdventureTapastic(GenericTapasticComic):
5127
    """Class to retrieve Our Super Adventure comics."""
5128
    # Also on http://www.oursuperadventure.com
5129
    # http://sarahssketchbook.tumblr.com
5130
    # http://sarahgraley.com
5131
    name = 'superadventure-tapastic'
5132
    long_name = 'Our Super Adventure (from Tapastic)'
5133
    url = 'https://tapastic.com/series/Our-Super-Adventure'
5134
5135
5136
class NamelessPCs(GenericTapasticComic):
5137
    """Class to retrieve Nameless PCs comics."""
5138
    # Also on http://namelesspcs.com
5139
    name = 'namelesspcs-tapa'
5140
    long_name = 'NamelessPCs (from Tapastic)'
5141
    url = 'https://tapastic.com/series/NamelessPC'
5142
5143
5144
class DownTheUpwardSpiralTapa(GenericTapasticComic):
5145
    """Class to retrieve Down The Upward Spiral comics."""
5146
    # Also on http://www.downtheupwardspiral.com
5147
    # Also on http://downtheupwardspiral.tumblr.com
5148
    name = 'spiral-tapa'
5149
    long_name = 'Down the Upward Spiral (from Tapastic)'
5150
    url = 'https://tapastic.com/series/Down-the-Upward-Spiral'
5151
5152
5153
class UbertoolTapa(GenericTapasticComic):
5154
    """Class to retrieve Ubertool comics."""
5155
    # Also on http://ubertoolcomic.com
5156
    # Also on https://ubertool.tumblr.com
5157
    name = 'ubertool-tapa'
5158
    long_name = 'Ubertool (from Tapastic)'
5159
    url = 'https://tapastic.com/series/ubertool'
5160
    _categories = ('UBERTOOL', )
5161
5162
5163
class BarteNerdsTapa(GenericTapasticComic):
5164
    """Class to retrieve BarteNerds comics."""
5165
    # Also on http://www.bartenerds.com
5166
    name = 'bartenerds-tapa'
5167
    long_name = 'BarteNerds (from Tapastic)'
5168
    url = 'https://tapastic.com/series/BarteNERDS'
5169
5170
5171
class SmallBlueYonderTapa(GenericTapasticComic):
5172
    """Class to retrieve Small Blue Yonder comics."""
5173
    # Also on http://www.smallblueyonder.com
5174
    name = 'smallblue-tapa'
5175
    long_name = 'Small Blue Yonder (from Tapastic)'
5176
    url = 'https://tapastic.com/series/Small-Blue-Yonder'
5177
5178
5179
class TizzyStitchBirdTapa(GenericTapasticComic):
5180
    """Class to retrieve Tizzy Stitch Bird comics."""
5181
    # Also on http://tizzystitchbird.com
5182
    # Also on http://tizzystitchbird.tumblr.com
5183
    # Also on http://www.webtoons.com/en/challenge/tizzy-stitchbird/list?title_no=50082
5184
    name = 'tizzy-tapa'
5185
    long_name = 'Tizzy Stitch Bird (from Tapastic)'
5186
    url = 'https://tapastic.com/series/TizzyStitchbird'
5187
5188
5189
class RockPaperCynicTapa(GenericTapasticComic):
5190
    """Class to retrieve RockPaperCynic comics."""
5191
    # Also on http://www.rockpapercynic.com
5192
    # Also on http://rockpapercynic.tumblr.com
5193
    name = 'rpc-tapa'
5194
    long_name = 'Rock Paper Cynic (from Tapastic)'
5195
    url = 'https://tapastic.com/series/rockpapercynic'
5196
5197
5198
class ItsTheTieTapa(GenericTapasticComic):
5199
    """Class to retrieve It's the tie comics."""
5200
    # Also on http://itsthetie.com
5201
    # Also on http://itsthetie.tumblr.com
5202
    name = 'tie-tapa'
5203
    long_name = "It's the tie (from Tapastic)"
5204
    url = "https://tapastic.com/series/itsthetie"
5205
    _categories = ('TIE', )
5206
5207
5208
def get_subclasses(klass):
5209
    """Gets the list of direct/indirect subclasses of a class"""
5210
    subclasses = klass.__subclasses__()
5211
    for derived in list(subclasses):
5212
        subclasses.extend(get_subclasses(derived))
5213
    return subclasses
5214
5215
5216
def remove_st_nd_rd_th_from_date(string):
5217
    """Function to transform 1st/2nd/3rd/4th in a parsable date format."""
5218
    # Hackish way to convert string with numeral "1st"/"2nd"/etc to date
5219
    return (string.replace('st', '')
5220
            .replace('nd', '')
5221
            .replace('rd', '')
5222
            .replace('th', '')
5223
            .replace('Augu', 'August'))
5224
5225
5226
def string_to_date(string, date_format, local=DEFAULT_LOCAL):
5227
    """Function to convert string to date object.
5228
    Wrapper around datetime.datetime.strptime."""
5229
    # format described in https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior
5230
    prev_locale = locale.setlocale(locale.LC_ALL)
5231
    if local != prev_locale:
5232
        locale.setlocale(locale.LC_ALL, local)
5233
    ret = datetime.datetime.strptime(string, date_format).date()
5234
    if local != prev_locale:
5235
        locale.setlocale(locale.LC_ALL, prev_locale)
5236
    return ret
5237
5238
5239
COMICS = set(get_subclasses(GenericComic))
5240
VALID_COMICS = [c for c in COMICS if c.name is not None]
5241
COMIC_NAMES = {c.name: c for c in VALID_COMICS}
5242
assert len(VALID_COMICS) == len(COMIC_NAMES)
5243
CLASS_NAMES = {c.__name__ for c in VALID_COMICS}
5244
assert len(VALID_COMICS) == len(CLASS_NAMES)
5245