Completed
Push — master ( 3e6c11...f98615 )
by De
01:02
created

comics.py (40 issues)

Code
1
#! /usr/bin/python3
2
# vim: set expandtab tabstop=4 shiftwidth=4 :
3
"""Module to retrieve webcomics"""
4
5
from comic_abstract import GenericComic, get_date_for_comic
6
import re
7
from datetime import date, timedelta
8
import datetime
9
from urlfunctions import get_soup_at_url, urljoin_wrapper,\
10
    convert_iri_to_plain_ascii_uri, load_json_at_url, urlopen_wrapper
11
import json
12
import locale
13
import urllib
14
15
DEFAULT_LOCAL = 'en_GB.UTF-8'
16
17
18
class Xkcd(GenericComic):
19
    """Class to retrieve Xkcd comics."""
20
    name = 'xkcd'
21
    long_name = 'xkcd'
22
    url = 'http://xkcd.com'
23
24
    @classmethod
25
    def get_next_comic(cls, last_comic):
26
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
27
        first_num = last_comic['num'] if last_comic else 0
28
        last_num = load_json_at_url(
29
            urljoin_wrapper(cls.url, 'info.0.json'))['num']
30
31
        for num in range(first_num + 1, last_num + 1):
32
            if num != 404:
33
                json_url = urljoin_wrapper(cls.url, '%d/info.0.json' % num)
34
                comic = load_json_at_url(json_url)
35
                comic['img'] = [comic['img']]
36
                comic['prefix'] = '%d-' % num
37
                comic['json_url'] = json_url
38
                comic['url'] = urljoin_wrapper(cls.url, str(num))
39
                comic['day'] = int(comic['day'])
40
                comic['month'] = int(comic['month'])
41
                comic['year'] = int(comic['year'])
42
                assert comic['num'] == num
43
                yield comic
44
45
46
# Helper functions corresponding to get_url_from_link/get_url_from_archive_element
47
48
49
@classmethod
50
def get_href(cls, link):
51
    """Implementation of get_url_from_link/get_url_from_archive_element."""
52
    return link['href']
53
54
55
@classmethod
56
def join_cls_url_to_href(cls, link):
57
    """Implementation of get_url_from_link/get_url_from_archive_element."""
58
    return urljoin_wrapper(cls.url, link['href'])
59
60
61
class GenericNavigableComic(GenericComic):
62
    """Generic class for "navigable" comics : with first/next arrows.
63
64
    This class applies to comic where previous and next comics can be
65
    accessed from a given comic. Once given a starting point (either
66
    the first comic or the last comic retrieved), it will handle the
67
    navigation, the retrieval of the soup object and the setting of
68
    the 'url' attribute on retrieved comics. This limits a lot the
69
    amount of boilerplate code in the different implementation classes.
70
71
    The method `get_next_comic` methods is implemented in terms of new
72
    more specialized methods to be implemented/overridden:
73
        - get_first_comic_link
74
        - get_navi_link
75
        - get_comic_info
76
        - get_url_from_link
77
    """
78
    _categories = ('NAVIGABLE', )
79
80
    @classmethod
81
    def get_first_comic_link(cls):
82
        """Get link to first comics.
83
84
        Sometimes this can be retrieved of any comic page, sometimes on
85
        the archive page, sometimes it doesn't exist at all and one has
86
        to iterate backward to find it before hardcoding the result found.
87
        """
88
        raise NotImplementedError
89
90
    @classmethod
91
    def get_navi_link(cls, last_soup, next_):
92
        """Get link to next (or previous - for dev purposes) comic."""
93
        raise NotImplementedError
94
95
    @classmethod
96
    def get_comic_info(cls, soup, link):
97
        """Get information about a particular comics."""
98
        raise NotImplementedError
99
100
    @classmethod
101
    def get_url_from_link(cls, link):
102
        """Get url corresponding to a link. Default implementation is similar to get_href."""
103
        return link['href']
104
105
    @classmethod
106
    def get_next_link(cls, last_soup):
107
        """Get link to next comic."""
108
        link = cls.get_navi_link(last_soup, True)
109
        cls.log("Next link is %s" % link)
110
        return link
111
112
    @classmethod
113
    def get_prev_link(cls, last_soup):
114
        """Get link to previous comic."""
115
        link = cls.get_navi_link(last_soup, False)
116
        cls.log("Prev link is %s" % link)
117
        return link
118
119
    @classmethod
120
    def get_next_comic(cls, last_comic):
121
        """Generic implementation of get_next_comic for navigable comics."""
122
        url = last_comic['url'] if last_comic else None
123
        cls.log("starting 'get_next_comic' from %s" % url)
124
        next_comic = \
125
            cls.get_next_link(get_soup_at_url(url)) \
126
            if url else \
127
            cls.get_first_comic_link()
128
        cls.log("next/first comic will be %s (url is %s)" % (str(next_comic), url))
129
        # cls.check_navigation(url)
130
        while next_comic:
131
            prev_url, url = url, cls.get_url_from_link(next_comic)
132
            if prev_url == url:
133
                cls.log("got same url %s" % url)
134
                break
135
            cls.log("about to get %s (%s)" % (url, str(next_comic)))
136
            soup = get_soup_at_url(url)
137
            comic = cls.get_comic_info(soup, next_comic)
138
            if comic is not None:
139
                assert 'url' not in comic
140
                comic['url'] = url
141
                yield comic
142
            next_comic = cls.get_next_link(soup)
143
            cls.log("next comic will be %s" % str(next_comic))
144
145
    @classmethod
146
    def check_first_link(cls):
147
        """Check that navigation to first comic seems to be working - for dev purposes."""
148
        cls.log("about to check first link")
149
        ok = True
150
        firstlink = cls.get_first_comic_link()
151
        if firstlink is None:
152
            print("From %s : no first link" % cls.url)
153
            ok = False
154
        else:
155
            firsturl = cls.get_url_from_link(firstlink)
156
            try:
157
                get_soup_at_url(firsturl)
158
            except urllib.error.HTTPError:
159
                print("From %s : invalid first url" % cls.url)
160
                ok = False
161
        cls.log("checked first link -> returned %d" % ok)
162
        return ok
163
164
    @classmethod
165
    def check_prev_next_links(cls, url):
166
        """Check that navigation to prev/next from a given URL seems to be working - for dev purposes."""
167
        cls.log("about to check prev/next from %s" % url)
168
        ok = True
169
        if url is None:
170
            prevlink, nextlink = None, None
171
        else:
172
            soup = get_soup_at_url(url)
173
            prevlink, nextlink = cls.get_prev_link(soup), cls.get_next_link(soup)
174
        if prevlink is None and nextlink is None:
175
            print("From %s : no previous nor next" % url)
176
            ok = False
177
        else:
178
            if prevlink:
179
                prevurl = cls.get_url_from_link(prevlink)
180
                prevsoup = get_soup_at_url(prevurl)
181
                prevnextlink = cls.get_next_link(prevsoup)
182
                prevnext = cls.get_url_from_link(prevnextlink) if prevnextlink is not None else "NO URL"
183
                if prevnext != url:
184
                    print("From %s, going backward then forward leads to %s" % (url, prevnext))
185
                    ok = False
186
            if nextlink:
187
                nexturl = cls.get_url_from_link(nextlink)
188
                if nexturl != url:
189
                    nextsoup = get_soup_at_url(nexturl)
190
                    nextprevlink = cls.get_prev_link(nextsoup)
191
                    nextprev = cls.get_url_from_link(nextprevlink) if nextprevlink is not None else "NO URL"
192
                    if nextprev != url:
193
                        print("From %s, going forward then backward leads to %s" % (url, nextprev))
194
                        ok = False
195
        cls.log("checked prev/next from %s -> returned %d" % (url, ok))
196
        return ok
197
198
    @classmethod
199
    def check_navigation(cls, url):
200
        """Check that navigation functions seem to be working - for dev purposes."""
201
        cls.log("about to check navigation from %s" % url)
202
        first = cls.check_first_link()
203
        prevnext = cls.check_prev_next_links(url)
204
        ok = first and prevnext
205
        cls.log("checked navigation from %s -> returned %d" % (url, ok))
206
        return ok
207
208
209
class GenericListableComic(GenericComic):
210
    """Generic class for "listable" comics : with a list of comics (aka 'archive')
211
212
    The method `get_next_comic` methods is implemented in terms of new
213
    more specialized methods to be implemented/overridden:
214
        - get_archive_elements
215
        - get_url_from_archive_element
216
        - get_comic_info
217
    """
218
    _categories = ('LISTABLE', )
219
220
    @classmethod
221
    def get_archive_elements(cls):
222
        """Get the archive elements (iterable)."""
223
        raise NotImplementedError
224
225
    @classmethod
226
    def get_url_from_archive_element(cls, archive_elt):
227
        """Get url corresponding to an archive element."""
228
        raise NotImplementedError
229
230
    @classmethod
231
    def get_comic_info(cls, soup, archive_elt):
232
        """Get information about a particular comics."""
233
        raise NotImplementedError
234
235
    @classmethod
236
    def get_next_comic(cls, last_comic):
237
        """Generic implementation of get_next_comic for listable comics."""
238
        waiting_for_url = last_comic['url'] if last_comic else None
239
        archive_elts = list(cls.get_archive_elements())
240
        for archive_elt in archive_elts:
241
            url = cls.get_url_from_archive_element(archive_elt)
242
            cls.log("considering %s" % url)
243
            if waiting_for_url is None:
244
                cls.log("about to get %s (%s)" % (url, str(archive_elt)))
245
                soup = get_soup_at_url(url)
246
                comic = cls.get_comic_info(soup, archive_elt)
247
                if comic is not None:
248
                    assert 'url' not in comic
249
                    comic['url'] = url
250
                    yield comic
251
            elif waiting_for_url == url:
252
                waiting_for_url = None
253
        if waiting_for_url is not None:
254
            print("Did not find %s in the %d comics: there might be a problem" %
255
                  (waiting_for_url, len(archive_elts)))
256
257
# Helper functions corresponding to get_first_comic_link/get_navi_link
258
259
260
@classmethod
261
def get_link_rel_next(cls, last_soup, next_):
262
    """Implementation of get_navi_link."""
263
    return last_soup.find('link', rel='next' if next_ else 'prev')
264
265
266
@classmethod
267
def get_a_rel_next(cls, last_soup, next_):
268
    """Implementation of get_navi_link."""
269
    return last_soup.find('a', rel='next' if next_ else 'prev')
270
271
272
@classmethod
273
def get_a_navi_navinext(cls, last_soup, next_):
274
    """Implementation of get_navi_link."""
275
    return last_soup.find('a', class_='navi navi-next' if next_ else 'navi navi-prev')
276
277
278
@classmethod
279
def get_a_navi_comicnavnext_navinext(cls, last_soup, next_):
280
    """Implementation of get_navi_link."""
281
    return last_soup.find('a', class_='navi comic-nav-next navi-next' if next_ else 'navi comic-nav-previous navi-prev')
282
283
284
@classmethod
285
def get_a_comicnavbase_comicnavnext(cls, last_soup, next_):
286
    """Implementation of get_navi_link."""
287
    return last_soup.find('a', class_='comic-nav-base comic-nav-next' if next_ else 'comic-nav-base comic-nav-previous')
288
289
290
@classmethod
291
def get_a_navi_navifirst(cls):
292
    """Implementation of get_first_comic_link."""
293
    return get_soup_at_url(cls.url).find('a', class_='navi navi-first')
294
295
296
@classmethod
297
def get_div_navfirst_a(cls):
298
    """Implementation of get_first_comic_link."""
299
    return get_soup_at_url(cls.url).find('div', class_="nav-first").find('a')
300
301
302
@classmethod
303
def get_a_comicnavbase_comicnavfirst(cls):
304
    """Implementation of get_first_comic_link."""
305
    return get_soup_at_url(cls.url).find('a', class_='comic-nav-base comic-nav-first')
306
307
308
@classmethod
309
def simulate_first_link(cls):
310
    """Implementation of get_first_comic_link creating a link-like object from
311
    an URL provided by the class.
312
313
    Note: The first URL can easily be found using :
314
    `get_first_comic_link = navigate_to_first_comic`.
315
    """
316
    return {'href': cls.first_url}
317
318
319
@classmethod
320
def navigate_to_first_comic(cls):
321
    """Implementation of get_first_comic_link navigating from a user provided
322
    URL to the first comic.
323
324
    Sometimes, the first comic cannot be reached directly so to start
325
    from the first comic one has to go to the previous comic until
326
    there is no previous comics. Once this URL is reached, it
327
    is better to hardcode it but for development purposes, it
328
    is convenient to have an automatic way to find it.
329
330
    Then, the URL found can easily be used via `simulate_first_link`.
331
    """
332
    url = input("Get starting URL: ")
333
    print(url)
334
    comic = cls.get_prev_link(get_soup_at_url(url))
335
    while comic:
336
        url = cls.get_url_from_link(comic)
337
        print(url)
338
        comic = cls.get_prev_link(get_soup_at_url(url))
339
    return {'href': url}
340
341
342
class GenericEmptyComic(GenericComic):
343
    """Generic class for comics where nothing is to be done.
344
345
    It can be useful to deactivate temporarily comics that do not work
346
    properly by replacing `def MyComic(GenericWhateverComic)` with
347
    `def MyComic(GenericEmptyComic, GenericWhateverComic)`."""
348
    _categories = ('EMPTY', )
349
350
    @classmethod
351
    def get_next_comic(cls, last_comic):
352
        """Implementation of get_next_comic returning no comics."""
353
        cls.log("comic is considered as empty - returning no comic")
354
        return []
355
356
357 View Code Duplication
class ExtraFabulousComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
358
    """Class to retrieve Extra Fabulous Comics."""
359
    name = 'efc'
360
    long_name = 'Extra Fabulous Comics'
361
    url = 'http://extrafabulouscomics.com'
362
    get_first_comic_link = get_a_navi_navifirst
363
    get_navi_link = get_link_rel_next
364
365
    @classmethod
366
    def get_comic_info(cls, soup, link):
367
        """Get information about a particular comics."""
368
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
369
        imgs = soup.find_all('img', src=img_src_re)
370
        title = soup.find('meta', property='og:title')['content']
371
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
372
        day = string_to_date(date_str, "%Y-%m-%d")
373
        return {
374
            'title': title,
375
            'img': [i['src'] for i in imgs],
376
            'month': day.month,
377
            'year': day.year,
378
            'day': day.day,
379
            'prefix': title + '-'
380
        }
381
382
383 View Code Duplication
class GenericLeMondeBlog(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
384
    """Generic class to retrieve comics from Le Monde blogs."""
385
    _categories = ('LEMONDE', 'FRANCAIS')
386
    get_navi_link = get_link_rel_next
387
    get_first_comic_link = simulate_first_link
388
    first_url = NotImplemented
389
390
    @classmethod
391
    def get_comic_info(cls, soup, link):
392
        """Get information about a particular comics."""
393
        url2 = soup.find('link', rel='shortlink')['href']
394
        title = soup.find('meta', property='og:title')['content']
395
        date_str = soup.find("span", class_="entry-date").string
396
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
397
        imgs = soup.find_all('meta', property='og:image')
398
        return {
399
            'title': title,
400
            'url2': url2,
401
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
402
            'month': day.month,
403
            'year': day.year,
404
            'day': day.day,
405
        }
406
407
408
class ZepWorld(GenericLeMondeBlog):
409
    """Class to retrieve Zep World comics."""
410
    name = "zep"
411
    long_name = "Zep World"
412
    url = "http://zepworld.blog.lemonde.fr"
413
    first_url = "http://zepworld.blog.lemonde.fr/2014/10/31/bientot-le-blog-de-zep/"
414
415
416
class Vidberg(GenericLeMondeBlog):
417
    """Class to retrieve Vidberg comics."""
418
    name = 'vidberg'
419
    long_name = "Vidberg - l'actu en patates"
420
    url = "http://vidberg.blog.lemonde.fr"
421
    # Not the first but I didn't find an efficient way to retrieve it
422
    first_url = "http://vidberg.blog.lemonde.fr/2012/02/09/revue-de-campagne-la-campagne-du-modem-semballe/"
423
424
425
class Plantu(GenericLeMondeBlog):
426
    """Class to retrieve Plantu comics."""
427
    name = 'plantu'
428
    long_name = "Plantu"
429
    url = "http://plantu.blog.lemonde.fr"
430
    first_url = "http://plantu.blog.lemonde.fr/2014/10/28/stress-test-a-bruxelles/"
431
432
433
class XavierGorce(GenericLeMondeBlog):
434
    """Class to retrieve Xavier Gorce comics."""
435
    name = 'gorce'
436
    long_name = "Xavier Gorce"
437
    url = "http://xaviergorce.blog.lemonde.fr"
438
    first_url = "http://xaviergorce.blog.lemonde.fr/2015/01/09/distinction/"
439
440
441
class CartooningForPeace(GenericLeMondeBlog):
442
    """Class to retrieve Cartooning For Peace comics."""
443
    name = 'forpeace'
444
    long_name = "Cartooning For Peace"
445
    url = "http://cartooningforpeace.blog.lemonde.fr"
446
    first_url = "http://cartooningforpeace.blog.lemonde.fr/2014/12/15/bado/"
447
448
449
class Aurel(GenericLeMondeBlog):
450
    """Class to retrieve Aurel comics."""
451
    name = 'aurel'
452
    long_name = "Aurel"
453
    url = "http://aurel.blog.lemonde.fr"
454
    first_url = "http://aurel.blog.lemonde.fr/2014/09/29/le-senat-repasse-a-droite/"
455
456
457
class LesCulottees(GenericLeMondeBlog):
458
    """Class to retrieve Les Culottees comics."""
459
    name = 'culottees'
460
    long_name = 'Les Culottees'
461
    url = "http://lesculottees.blog.lemonde.fr"
462
    first_url = "http://lesculottees.blog.lemonde.fr/2016/01/11/clementine-delait-femme-a-barbe/"
463
464
465
class UneAnneeAuLycee(GenericLeMondeBlog):
466
    """Class to retrieve Une Annee Au Lycee comics."""
467
    name = 'lycee'
468
    long_name = 'Une Annee au Lycee'
469
    url = 'http://uneanneeaulycee.blog.lemonde.fr'
470
    first_url = "http://uneanneeaulycee.blog.lemonde.fr/2016/06/13/la-semaine-du-bac-est-arrivee/"
471
472
473 View Code Duplication
class Rall(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
474
    """Class to retrieve Ted Rall comics."""
475
    # Also on http://www.gocomics.com/tedrall
476
    name = 'rall'
477
    long_name = "Ted Rall"
478
    url = "http://rall.com/comic"
479
    _categories = ('RALL', )
480
    get_navi_link = get_link_rel_next
481
    get_first_comic_link = simulate_first_link
482
    # Not the first but I didn't find an efficient way to retrieve it
483
    first_url = "http://rall.com/2014/01/30/los-angeles-times-cartoon-well-miss-those-california-flowers"
484
485
    @classmethod
486
    def get_comic_info(cls, soup, link):
487
        """Get information about a particular comics."""
488
        title = soup.find('meta', property='og:title')['content']
489
        author = soup.find("span", class_="author vcard").find("a").string
490
        date_str = soup.find("span", class_="entry-date").string
491
        day = string_to_date(date_str, "%B %d, %Y")
492
        desc = soup.find('meta', property='og:description')['content']
493
        imgs = soup.find('div', class_='entry-content').find_all('img')
494
        imgs = imgs[:-7]  # remove social media buttons
495
        return {
496
            'title': title,
497
            'author': author,
498
            'month': day.month,
499
            'year': day.year,
500
            'day': day.day,
501
            'description': desc,
502
            'img': [i['src'] for i in imgs],
503
        }
504
505
506
class Dilem(GenericNavigableComic):
507
    """Class to retrieve Ali Dilem comics."""
508
    name = 'dilem'
509
    long_name = 'Ali Dilem'
510
    url = 'http://information.tv5monde.com/dilem'
511
    _categories = ('FRANCAIS', )
512
    get_url_from_link = join_cls_url_to_href
513
    get_first_comic_link = simulate_first_link
514
    first_url = "http://information.tv5monde.com/dilem/2004-06-26"
515
516
    @classmethod
517
    def get_navi_link(cls, last_soup, next_):
518
        """Get link to next or previous comic."""
519
        # prev is next / next is prev
520
        li = last_soup.find('li', class_='prev' if next_ else 'next')
521 View Code Duplication
        return li.find('a') if li else None
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
522
523
    @classmethod
524
    def get_comic_info(cls, soup, link):
525
        """Get information about a particular comics."""
526
        short_url = soup.find('link', rel='shortlink')['href']
527
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
528
        imgs = soup.find_all('meta', property='og:image')
529
        date_str = soup.find('span', property='dc:date')['content']
530
        date_str = date_str[:10]
531
        day = string_to_date(date_str, "%Y-%m-%d")
532
        return {
533
            'short_url': short_url,
534
            'title': title,
535
            'img': [i['content'] for i in imgs],
536
            'day': day.day,
537
            'month': day.month,
538
            'year': day.year,
539
        }
540
541
542
class SpaceAvalanche(GenericNavigableComic):
543
    """Class to retrieve Space Avalanche comics."""
544
    name = 'avalanche'
545
    long_name = 'Space Avalanche'
546
    url = 'http://www.spaceavalanche.com'
547
    get_navi_link = get_link_rel_next
548
549
    @classmethod
550
    def get_first_comic_link(cls):
551
        """Get link to first comics."""
552
        return {'href': "http://www.spaceavalanche.com/2009/02/02/irish-sea/", 'title': "Irish Sea"}
553
554
    @classmethod
555
    def get_comic_info(cls, soup, link):
556
        """Get information about a particular comics."""
557
        url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
558
        title = link['title']
559
        url = cls.get_url_from_link(link)
560
        year, month, day = [int(s)
561
                            for s in url_date_re.match(url).groups()]
562
        imgs = soup.find("div", class_="entry").find_all("img")
563
        return {
564
            'title': title,
565
            'day': day,
566
            'month': month,
567
            'year': year,
568
            'img': [i['src'] for i in imgs],
569
        }
570
571
572
class ZenPencils(GenericNavigableComic):
573
    """Class to retrieve ZenPencils comics."""
574
    # Also on http://zenpencils.tumblr.com
575
    # Also on http://www.gocomics.com/zen-pencils
576
    name = 'zenpencils'
577
    long_name = 'Zen Pencils'
578
    url = 'http://zenpencils.com'
579
    _categories = ('ZENPENCILS', )
580
    get_navi_link = get_link_rel_next
581
    get_first_comic_link = simulate_first_link
582
    first_url = "http://zenpencils.com/comic/1-ralph-waldo-emerson-make-them-cry/"
583
584
    @classmethod
585
    def get_comic_info(cls, soup, link):
586
        """Get information about a particular comics."""
587
        imgs = soup.find('div', id='comic').find_all('img')
588
        # imgs2 = soup.find_all('meta', property='og:image')
589
        post = soup.find('div', class_='post-content')
590
        author = post.find("span", class_="post-author").find("a").string
591
        title = soup.find('meta', property='og:title')['content']
592
        date_str = post.find('span', class_='post-date').string
593
        day = string_to_date(date_str, "%B %d, %Y")
594
        assert imgs
595
        assert all(i['alt'] == i['title'] for i in imgs)
596
        assert all(i['alt'] in (title, "") for i in imgs)
597
        desc = soup.find('meta', property='og:description')['content']
598
        return {
599
            'title': title,
600
            'description': desc,
601
            'author': author,
602
            'day': day.day,
603
            'month': day.month,
604
            'year': day.year,
605
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
606
        }
607
608
609
class ItsTheTie(GenericNavigableComic):
610
    """Class to retrieve It's the tie comics."""
611
    # Also on http://itsthetie.tumblr.com
612
    # Also on https://tapastic.com/series/itsthetie
613
    name = 'tie'
614
    long_name = "It's the tie"
615
    url = "http://itsthetie.com"
616
    _categories = ('TIE', )
617
    get_first_comic_link = get_div_navfirst_a
618
    get_navi_link = get_a_rel_next
619
620
    @classmethod
621
    def get_comic_info(cls, soup, link):
622
        """Get information about a particular comics."""
623
        title = soup.find('h1', class_='comic-title').find('a').string
624
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
625
        day = string_to_date(date_str, "%B %d, %Y")
626
        # Bonus images may or may not be in meta og:image.
627
        imgs = soup.find_all('meta', property='og:image')
628
        imgs_src = [i['content'] for i in imgs]
629
        bonus = soup.find_all('img', attrs={'data-oversrc': True})
630
        bonus_src = [b['data-oversrc'] for b in bonus]
631
        all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src]
632
        all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")]
633
        tag_meta = soup.find('meta', property='article:tag')
634
        tags = tag_meta['content'] if tag_meta else ""
635
        return {
636
            'title': title,
637
            'month': day.month,
638
            'year': day.year,
639
            'day': day.day,
640
            'img': all_imgs_src,
641
            'tags': tags,
642
        }
643
644
645
class PenelopeBagieu(GenericNavigableComic):
646
    """Class to retrieve comics from Penelope Bagieu's blog."""
647
    name = 'bagieu'
648
    long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
649
    url = 'http://www.penelope-jolicoeur.com'
650
    _categories = ('FRANCAIS', )
651
    get_navi_link = get_link_rel_next
652
    get_first_comic_link = simulate_first_link
653
    first_url = 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'
654
655
    @classmethod
656
    def get_comic_info(cls, soup, link):
657
        """Get information about a particular comics."""
658
        date_str = soup.find('h2', class_='date-header').string
659
        day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
660
        imgs = soup.find('div', class_='entry-body').find_all('img')
661
        title = soup.find('h3', class_='entry-header').string
662
        return {
663
            'title': title,
664
            'img': [i['src'] for i in imgs],
665
            'month': day.month,
666
            'year': day.year,
667
            'day': day.day,
668
        }
669
670
671 View Code Duplication
class OneOneOneOneComic(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
672
    """Class to retrieve 1111 Comics."""
673
    # Also on http://comics1111.tumblr.com
674
    # Also on https://tapastic.com/series/1111-Comics
675
    name = '1111'
676
    long_name = '1111 Comics'
677
    url = 'http://www.1111comics.me'
678
    _categories = ('ONEONEONEONE', )
679
    get_first_comic_link = get_div_navfirst_a
680
    get_navi_link = get_link_rel_next
681
682
    @classmethod
683
    def get_comic_info(cls, soup, link):
684
        """Get information about a particular comics."""
685
        title = soup.find('h1', class_='comic-title').find('a').string
686
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
687
        day = string_to_date(date_str, "%B %d, %Y")
688
        imgs = soup.find_all('meta', property='og:image')
689
        return {
690
            'title': title,
691
            'month': day.month,
692
            'year': day.year,
693
            'day': day.day,
694
            'img': [i['content'] for i in imgs],
695
        }
696
697
698 View Code Duplication
class AngryAtNothing(GenericEmptyComic, GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
699
    """Class to retrieve Angry at Nothing comics."""
700
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
701
    # Also on http://angryatnothing.tumblr.com
702
    name = 'angry'
703
    long_name = 'Angry At Nothing'
704
    url = 'http://www.angryatnothing.net'
705
    get_first_comic_link = get_div_navfirst_a
706
    get_navi_link = get_a_rel_next
707
708
    @classmethod
709
    def get_comic_info(cls, soup, link):
710
        """Get information about a particular comics."""
711
        title = soup.find('h1', class_='comic-title').find('a').string
712
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
713
        day = string_to_date(date_str, "%B %d, %Y")
714
        imgs = soup.find_all('meta', property='og:image')
715
        return {
716
            'title': title,
717
            'month': day.month,
718
            'year': day.year,
719
            'day': day.day,
720
            'img': [i['content'] for i in imgs],
721
        }
722
723
724
class NeDroid(GenericNavigableComic):
725
    """Class to retrieve NeDroid comics."""
726
    name = 'nedroid'
727
    long_name = 'NeDroid'
728
    url = 'http://nedroid.com'
729
    get_first_comic_link = get_div_navfirst_a
730 View Code Duplication
    get_navi_link = get_link_rel_next
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
731
    get_url_from_link = join_cls_url_to_href
732
733
    @classmethod
734
    def get_comic_info(cls, soup, link):
735
        """Get information about a particular comics."""
736
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
737
        short_url = cls.get_url_from_link(soup.find('link', rel='shortlink'))
738
        num = int(short_url_re.match(short_url).groups()[0])
739
        imgs = soup.find('div', id='comic').find_all('img')
740
        assert len(imgs) == 1
741
        title = imgs[0]['alt']
742
        title2 = imgs[0]['title']
743
        return {
744
            'short_url': short_url,
745
            'title': title,
746
            'title2': title2,
747
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
748
            'num': num,
749
        }
750
751
752
class Garfield(GenericNavigableComic):
753
    """Class to retrieve Garfield comics."""
754
    # Also on http://www.gocomics.com/garfield
755
    name = 'garfield'
756
    long_name = 'Garfield'
757
    url = 'https://garfield.com'
758
    _categories = ('GARFIELD', )
759
    get_first_comic_link = simulate_first_link
760
    first_url = 'https://garfield.com/comic/1978/06/19'
761
762
    @classmethod
763
    def get_navi_link(cls, last_soup, next_):
764
        """Get link to next or previous comic."""
765
        return last_soup.find('a', class_='comic-arrow-right' if next_ else 'comic-arrow-left')
766
767
    @classmethod
768
    def get_comic_info(cls, soup, link):
769
        """Get information about a particular comics."""
770
        url = cls.get_url_from_link(link)
771
        date_re = re.compile('^%s/comic/([0-9]*)/([0-9]*)/([0-9]*)' % cls.url)
772
        year, month, day = [int(s) for s in date_re.match(url).groups()]
773
        imgs = soup.find('div', class_='comic-display').find_all('img', class_='img-responsive')
774
        return {
775
            'month': month,
776
            'year': year,
777
            'day': day,
778
            'img': [i['src'] for i in imgs],
779
        }
780
781 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
782
class Dilbert(GenericNavigableComic):
783
    """Class to retrieve Dilbert comics."""
784
    # Also on http://www.gocomics.com/dilbert-classics
785
    name = 'dilbert'
786
    long_name = 'Dilbert'
787
    url = 'http://dilbert.com'
788
    get_url_from_link = join_cls_url_to_href
789
    get_first_comic_link = simulate_first_link
790
    first_url = 'http://dilbert.com/strip/1989-04-16'
791
792
    @classmethod
793
    def get_navi_link(cls, last_soup, next_):
794
        """Get link to next or previous comic."""
795
        link = last_soup.find('div', class_='nav-comic nav-right' if next_ else 'nav-comic nav-left')
796
        return link.find('a') if link else None
797
798
    @classmethod
799
    def get_comic_info(cls, soup, link):
800
        """Get information about a particular comics."""
801
        title = soup.find('meta', property='og:title')['content']
802
        imgs = soup.find_all('meta', property='og:image')
803
        desc = soup.find('meta', property='og:description')['content']
804
        date_str = soup.find('meta', property='article:publish_date')['content']
805
        day = string_to_date(date_str, "%B %d, %Y")
806
        author = soup.find('meta', property='article:author')['content']
807
        tags = soup.find('meta', property='article:tag')['content']
808
        return {
809
            'title': title,
810
            'description': desc,
811
            'img': [i['content'] for i in imgs],
812
            'author': author,
813
            'tags': tags,
814
            'day': day.day,
815
            'month': day.month,
816
            'year': day.year
817
        }
818
819
820
class VictimsOfCircumsolar(GenericNavigableComic):
821
    """Class to retrieve VictimsOfCircumsolar comics."""
822
    # Also on http://victimsofcomics.tumblr.com
823
    name = 'circumsolar'
824
    long_name = 'Victims Of Circumsolar'
825
    url = 'http://www.victimsofcircumsolar.com'
826
    get_navi_link = get_a_navi_comicnavnext_navinext
827
    get_first_comic_link = simulate_first_link
828
    first_url = 'http://www.victimsofcircumsolar.com/comic/modern-addiction'
829
830
    @classmethod
831
    def get_comic_info(cls, soup, link):
832
        """Get information about a particular comics."""
833
        # Date is on the archive page
834
        title = soup.find_all('meta', property='og:title')[-1]['content']
835
        desc = soup.find_all('meta', property='og:description')[-1]['content']
836
        imgs = soup.find('div', id='comic').find_all('img')
837
        assert all(i['title'] == i['alt'] == title for i in imgs)
838
        return {
839
            'title': title,
840
            'description': desc,
841
            'img': [i['src'] for i in imgs],
842
        }
843
844
845
class ThreeWordPhrase(GenericNavigableComic):
846
    """Class to retrieve Three Word Phrase comics."""
847
    # Also on http://www.threewordphrase.tumblr.com
848
    name = 'threeword'
849
    long_name = 'Three Word Phrase'
850
    url = 'http://threewordphrase.com'
851
    get_url_from_link = join_cls_url_to_href
852
853
    @classmethod
854
    def get_first_comic_link(cls):
855
        """Get link to first comics."""
856
        return get_soup_at_url(cls.url).find('img', src='/firstlink.gif').parent
857
858
    @classmethod
859
    def get_navi_link(cls, last_soup, next_):
860
        """Get link to next or previous comic."""
861
        link = last_soup.find('img', src='/nextlink.gif' if next_ else '/prevlink.gif').parent
862
        return None if link.get('href') is None else link
863
864
    @classmethod
865
    def get_comic_info(cls, soup, link):
866
        """Get information about a particular comics."""
867
        title = soup.find('title')
868
        imgs = [img for img in soup.find_all('img')
869
                if not img['src'].endswith(
870
                    ('link.gif', '32.png', 'twpbookad.jpg',
871
                     'merchad.jpg', 'header.gif', 'tipjar.jpg'))]
872
        return {
873
            'title': title.string if title else None,
874
            'title2': '  '.join(img.get('alt') for img in imgs if img.get('alt')),
875
            'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs],
876
        }
877
878
879
class DeadlyPanel(GenericEmptyComic, GenericNavigableComic):
880
    """Class to retrieve Deadly Panel comics."""
881
    # Also on https://tapastic.com/series/deadlypanel
882
    # Also on http://deadlypanel.tumblr.com
883
    name = 'deadly'
884
    long_name = 'Deadly Panel'
885
    url = 'http://www.deadlypanel.com'
886
    get_first_comic_link = get_a_navi_navifirst
887
    get_navi_link = get_a_navi_comicnavnext_navinext
888
889
    @classmethod
890
    def get_comic_info(cls, soup, link):
891
        """Get information about a particular comics."""
892
        imgs = soup.find('div', id='comic').find_all('img')
893
        assert all(i['alt'] == i['title'] for i in imgs)
894
        return {
895
            'img': [i['src'] for i in imgs],
896
        }
897
898 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
899
class TheGentlemanArmchair(GenericNavigableComic):
900
    """Class to retrieve The Gentleman Armchair comics."""
901
    name = 'gentlemanarmchair'
902
    long_name = 'The Gentleman Armchair'
903
    url = 'http://thegentlemansarmchair.com'
904
    get_first_comic_link = get_a_navi_navifirst
905
    get_navi_link = get_link_rel_next
906
907
    @classmethod
908
    def get_comic_info(cls, soup, link):
909
        """Get information about a particular comics."""
910
        title = soup.find('h2', class_='post-title').string
911
        author = soup.find("span", class_="post-author").find("a").string
912
        date_str = soup.find('span', class_='post-date').string
913
        day = string_to_date(date_str, "%B %d, %Y")
914
        imgs = soup.find('div', id='comic').find_all('img')
915
        return {
916
            'img': [i['src'] for i in imgs],
917
            'title': title,
918
            'author': author,
919
            'month': day.month,
920
            'year': day.year,
921
            'day': day.day,
922
        }
923
924 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
925
class ImogenQuest(GenericNavigableComic):
926
    """Class to retrieve Imogen Quest comics."""
927
    # Also on http://imoquest.tumblr.com
928
    name = 'imogen'
929
    long_name = 'Imogen Quest'
930
    url = 'http://imogenquest.net'
931
    get_first_comic_link = get_div_navfirst_a
932
    get_navi_link = get_a_rel_next
933
934
    @classmethod
935
    def get_comic_info(cls, soup, link):
936
        """Get information about a particular comics."""
937
        title = soup.find('h2', class_='post-title').string
938
        author = soup.find("span", class_="post-author").find("a").string
939
        date_str = soup.find('span', class_='post-date').string
940
        day = string_to_date(date_str, '%B %d, %Y')
941
        imgs = soup.find('div', class_='comicpane').find_all('img')
942
        assert all(i['alt'] == i['title'] for i in imgs)
943
        title2 = imgs[0]['title']
944
        return {
945
            'day': day.day,
946
            'month': day.month,
947
            'year': day.year,
948
            'img': [i['src'] for i in imgs],
949
            'title': title,
950
            'title2': title2,
951
            'author': author,
952
        }
953
954 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
955
class MyExtraLife(GenericNavigableComic):
956
    """Class to retrieve My Extra Life comics."""
957
    name = 'extralife'
958
    long_name = 'My Extra Life'
959
    url = 'http://www.myextralife.com'
960
    get_navi_link = get_link_rel_next
961
962
    @classmethod
963
    def get_first_comic_link(cls):
964
        """Get link to first comics."""
965
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
966
967
    @classmethod
968
    def get_comic_info(cls, soup, link):
969
        """Get information about a particular comics."""
970
        title = soup.find("h1", class_="comic_title").string
971
        date_str = soup.find("span", class_="comic_date").string
972
        day = string_to_date(date_str, "%B %d, %Y")
973
        imgs = soup.find_all("img", class_="comic")
974
        assert all(i['alt'] == i['title'] == title for i in imgs)
975
        return {
976
            'title': title,
977
            'img': [i['src'] for i in imgs if i["src"]],
978
            'day': day.day,
979
            'month': day.month,
980
            'year': day.year
981
        }
982
983
984
class SaturdayMorningBreakfastCereal(GenericNavigableComic):
985
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
986
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
987
    # Also on http://smbc-comics.tumblr.com
988
    name = 'smbc'
989
    long_name = 'Saturday Morning Breakfast Cereal'
990
    url = 'http://www.smbc-comics.com'
991
    _categories = ('SMBC', )
992
    get_navi_link = get_a_rel_next
993
994
    @classmethod
995
    def get_first_comic_link(cls):
996
        """Get link to first comics."""
997
        return get_soup_at_url(cls.url).find('a', rel='start')
998
999
    @classmethod
1000
    def get_comic_info(cls, soup, link):
1001
        """Get information about a particular comics."""
1002
        image1 = soup.find('img', id='cc-comic')
1003
        image_url1 = image1['src']
1004
        aftercomic = soup.find('div', id='aftercomic')
1005
        image_url2 = aftercomic.find('img')['src'] if aftercomic else ''
1006
        imgs = [image_url1] + ([image_url2] if image_url2 else [])
1007
        date_str = soup.find('div', class_='cc-publishtime').contents[0]
1008
        day = string_to_date(date_str, "%B %d, %Y")
1009
        return {
1010
            'title': image1['title'],
1011
            'img': [urljoin_wrapper(cls.url, i) for i in imgs],
1012
            'day': day.day,
1013
            'month': day.month,
1014
            'year': day.year
1015
        }
1016
1017
1018
class PerryBibleFellowship(GenericListableComic):
1019
    """Class to retrieve Perry Bible Fellowship comics."""
1020
    name = 'pbf'
1021
    long_name = 'Perry Bible Fellowship'
1022
    url = 'http://pbfcomics.com'
1023
    get_url_from_archive_element = join_cls_url_to_href
1024
1025
    @classmethod
1026
    def get_archive_elements(cls):
1027
        comic_link_re = re.compile('^/[0-9]*/$')
1028
        return reversed(get_soup_at_url(cls.url).find_all('a', href=comic_link_re))
1029
1030
    @classmethod
1031
    def get_comic_info(cls, soup, link):
1032
        """Get information about a particular comics."""
1033
        url = cls.get_url_from_archive_element(link)
1034
        comic_img_re = re.compile('^/archive_b/PBF.*')
1035
        name = link.string
1036
        num = int(link['name'])
1037
        href = link['href']
1038
        assert href == '/%d/' % num
1039
        imgs = soup.find_all('img', src=comic_img_re)
1040
        assert len(imgs) == 1
1041
        assert imgs[0]['alt'] == name
1042
        return {
1043
            'num': num,
1044
            'name': name,
1045
            'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1046
            'prefix': '%d-' % num,
1047
        }
1048
1049 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1050
class Mercworks(GenericNavigableComic):
1051
    """Class to retrieve Mercworks comics."""
1052
    # Also on http://mercworks.tumblr.com
1053
    name = 'mercworks'
1054
    long_name = 'Mercworks'
1055
    url = 'http://mercworks.net'
1056
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1057
    get_navi_link = get_link_rel_next
1058
1059
    @classmethod
1060
    def get_comic_info(cls, soup, link):
1061
        """Get information about a particular comics."""
1062
        title = soup.find('meta', property='og:title')['content']
1063
        metadesc = soup.find('meta', property='og:description')
1064
        desc = metadesc['content'] if metadesc else ""
1065
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
1066
        day = string_to_date(date_str, "%Y-%m-%d")
1067
        imgs = soup.find_all('meta', property='og:image')
1068
        return {
1069
            'img': [i['content'] for i in imgs],
1070
            'title': title,
1071
            'desc': desc,
1072
            'day': day.day,
1073
            'month': day.month,
1074
            'year': day.year
1075
        }
1076
1077
1078
class BerkeleyMews(GenericListableComic):
1079
    """Class to retrieve Berkeley Mews comics."""
1080
    # Also on http://mews.tumblr.com
1081
    # Also on http://www.gocomics.com/berkeley-mews
1082
    name = 'berkeley'
1083
    long_name = 'Berkeley Mews'
1084
    url = 'http://www.berkeleymews.com'
1085
    _categories = ('BERKELEY', )
1086
    get_url_from_archive_element = get_href
1087
    comic_num_re = re.compile('%s/\\?p=([0-9]*)$' % url)
1088
1089
    @classmethod
1090
    def get_archive_elements(cls):
1091
        archive_url = urljoin_wrapper(cls.url, "?page_id=2")
1092
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_num_re))
1093
1094
    @classmethod
1095
    def get_comic_info(cls, soup, link):
1096
        """Get information about a particular comics."""
1097
        comic_date_re = re.compile('.*/([0-9]*)-([0-9]*)-([0-9]*)-.*')
1098
        url = cls.get_url_from_archive_element(link)
1099
        num = int(cls.comic_num_re.match(url).groups()[0])
1100
        img = soup.find('div', id='comic').find('img')
1101
        assert all(i['alt'] == i['title'] for i in [img])
1102
        title2 = img['title']
1103
        img_url = img['src']
1104
        year, month, day = [int(s) for s in comic_date_re.match(img_url).groups()]
1105
        return {
1106
            'num': num,
1107
            'title': link.string,
1108
            'title2': title2,
1109
            'img': [img_url],
1110
            'year': year,
1111
            'month': month,
1112
            'day': day,
1113
        }
1114
1115
1116
class GenericBouletCorp(GenericNavigableComic):
1117
    """Generic class to retrieve BouletCorp comics in different languages."""
1118
    # Also on http://bouletcorp.tumblr.com
1119
    _categories = ('BOULET', )
1120
    get_navi_link = get_link_rel_next
1121
1122
    @classmethod
1123
    def get_first_comic_link(cls):
1124
        """Get link to first comics."""
1125
        return get_soup_at_url(cls.url).find('div', id='centered_nav').find_all('a')[0]
1126
1127
    @classmethod
1128
    def get_comic_info(cls, soup, link):
1129
        """Get information about a particular comics."""
1130
        url = cls.get_url_from_link(link)
1131
        date_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/' % cls.url)
1132
        year, month, day = [int(s) for s in date_re.match(url).groups()]
1133
        imgs = soup.find('div', id='notes').find('div', class_='storycontent').find_all('img')
1134
        texts = '  '.join(t for t in (i.get('title') for i in imgs) if t)
1135
        title = soup.find('title').string
1136
        return {
1137
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs if i.get('src') is not None],
1138
            'title': title,
1139
            'texts': texts,
1140
            'year': year,
1141
            'month': month,
1142
            'day': day,
1143
        }
1144
1145
1146
class BouletCorp(GenericBouletCorp):
1147
    """Class to retrieve BouletCorp comics."""
1148
    name = 'boulet'
1149
    long_name = 'Boulet Corp'
1150
    url = 'http://www.bouletcorp.com'
1151
    _categories = ('FRANCAIS', )
1152
1153
1154
class BouletCorpEn(GenericBouletCorp):
1155
    """Class to retrieve EnglishBouletCorp comics."""
1156
    name = 'boulet_en'
1157
    long_name = 'Boulet Corp English'
1158
    url = 'http://english.bouletcorp.com'
1159
1160 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1161
class AmazingSuperPowers(GenericNavigableComic):
1162
    """Class to retrieve Amazing Super Powers comics."""
1163
    name = 'asp'
1164
    long_name = 'Amazing Super Powers'
1165
    url = 'http://www.amazingsuperpowers.com'
1166
    get_first_comic_link = get_a_navi_navifirst
1167
    get_navi_link = get_a_navi_navinext
1168
1169
    @classmethod
1170
    def get_comic_info(cls, soup, link):
1171
        """Get information about a particular comics."""
1172
        author = soup.find("span", class_="post-author").find("a").string
1173
        date_str = soup.find('span', class_='post-date').string
1174
        day = string_to_date(date_str, "%B %d, %Y")
1175
        imgs = soup.find('div', id='comic').find_all('img')
1176
        title = ' '.join(i['title'] for i in imgs)
1177
        assert all(i['alt'] == i['title'] for i in imgs)
1178
        return {
1179
            'title': title,
1180
            'author': author,
1181
            'img': [img['src'] for img in imgs],
1182
            'day': day.day,
1183
            'month': day.month,
1184
            'year': day.year
1185
        }
1186
1187
1188
class ToonHole(GenericNavigableComic):
1189
    """Class to retrieve Toon Holes comics."""
1190
    # Also on http://tapastic.com/series/TOONHOLE
1191
    name = 'toonhole'
1192
    long_name = 'Toon Hole'
1193
    url = 'http://www.toonhole.com'
1194
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1195
    get_navi_link = get_link_rel_next
1196
1197
    @classmethod
1198
    def get_comic_info(cls, soup, link):
1199
        """Get information about a particular comics."""
1200
        short_url = soup.find('link', rel='shortlink')['href']
1201
        date_str = soup.find('div', class_='entry-meta').contents[0].strip()
1202
        day = string_to_date(date_str, "%B %d, %Y")
1203
        imgs = soup.find('div', id='comic').find_all('img')
1204
        if imgs:
1205
            img = imgs[0]
1206
            title = img['alt']
1207
            assert img['title'] == title
1208
        else:
1209
            title = ""
1210
        return {
1211
            'short_url': short_url,
1212
            'title': title,
1213
            'month': day.month,
1214
            'year': day.year,
1215
            'day': day.day,
1216
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs],
1217
        }
1218
1219
1220
class Channelate(GenericNavigableComic):
1221
    """Class to retrieve Channelate comics."""
1222
    name = 'channelate'
1223
    long_name = 'Channelate'
1224
    url = 'http://www.channelate.com'
1225
    get_first_comic_link = get_div_navfirst_a
1226
    get_navi_link = get_link_rel_next
1227
    get_url_from_link = join_cls_url_to_href
1228
1229
    @classmethod
1230
    def get_comic_info(cls, soup, link):
1231
        """Get information about a particular comics."""
1232
        author = soup.find("span", class_="post-author").find("a").string
1233
        date_str = soup.find('span', class_='post-date').string
1234
        day = string_to_date(date_str, '%Y/%m/%d')
1235
        title = soup.find('meta', property='og:title')['content']
1236
        post = soup.find('div', id='comic')
1237
        imgs = post.find_all('img') if post else []
1238
        extra_url = None
1239
        extra_div = soup.find('div', id='extrapanelbutton')
1240
        if extra_div:
1241
            extra_url = extra_div.find('a')['href']
1242
            extra_soup = get_soup_at_url(extra_url)
1243
            extra_imgs = extra_soup.find_all('img', class_='extrapanelimage')
1244
            imgs.extend(extra_imgs)
1245
        return {
1246
            'url_extra': extra_url,
1247
            'title': title,
1248
            'author': author,
1249
            'month': day.month,
1250
            'year': day.year,
1251
            'day': day.day,
1252
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1253
        }
1254
1255
1256
class CyanideAndHappiness(GenericNavigableComic):
1257
    """Class to retrieve Cyanide And Happiness comics."""
1258
    name = 'cyanide'
1259
    long_name = 'Cyanide and Happiness'
1260
    url = 'http://explosm.net'
1261
    _categories = ('NSFW', )
1262
    get_url_from_link = join_cls_url_to_href
1263
1264
    @classmethod
1265
    def get_first_comic_link(cls):
1266
        """Get link to first comics."""
1267
        return get_soup_at_url(cls.url).find('a', title='Oldest comic')
1268
1269
    @classmethod
1270
    def get_navi_link(cls, last_soup, next_):
1271
        """Get link to next or previous comic."""
1272
        link = last_soup.find('a', class_='next-comic' if next_ else 'previous-comic ')
1273
        return None if link.get('href') is None else link
1274
1275
    @classmethod
1276
    def get_comic_info(cls, soup, link):
1277
        """Get information about a particular comics."""
1278
        url2 = soup.find('meta', property='og:url')['content']
1279
        num = int(url2.split('/')[-2])
1280
        date_str = soup.find('h3').find('a').string
1281
        day = string_to_date(date_str, '%Y.%m.%d')
1282
        author = soup.find('small', class_="author-credit-name").string
1283
        assert author.startswith('by ')
1284
        author = author[3:]
1285
        imgs = soup.find_all('img', id='main-comic')
1286
        return {
1287
            'num': num,
1288
            'author': author,
1289
            'month': day.month,
1290
            'year': day.year,
1291
            'day': day.day,
1292
            'prefix': '%d-' % num,
1293
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i['src'])) for i in imgs]
1294
        }
1295
1296
1297
class MrLovenstein(GenericComic):
1298
    """Class to retrieve Mr Lovenstein comics."""
1299
    # Also on https://tapastic.com/series/MrLovenstein
1300
    name = 'mrlovenstein'
1301
    long_name = 'Mr. Lovenstein'
1302
    url = 'http://www.mrlovenstein.com'
1303
1304
    @classmethod
1305
    def get_next_comic(cls, last_comic):
1306
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1307
        # TODO: more info from http://www.mrlovenstein.com/archive
1308
        comic_num_re = re.compile('^/comic/([0-9]*)$')
1309
        nums = [int(comic_num_re.match(link['href']).groups()[0])
1310
                for link in get_soup_at_url(cls.url).find_all('a', href=comic_num_re)]
1311
        first, last = min(nums), max(nums)
1312
        if last_comic:
1313
            first = last_comic['num'] + 1
1314
        for num in range(first, last + 1):
1315
            url = urljoin_wrapper(cls.url, '/comic/%d' % num)
1316
            soup = get_soup_at_url(url)
1317
            imgs = list(
1318
                reversed(soup.find_all('img', src=re.compile('^/images/comics/'))))
1319
            description = soup.find('meta', attrs={'name': 'description'})['content']
1320
            yield {
1321
                'url': url,
1322
                'num': num,
1323
                'texts': '  '.join(t for t in (i.get('title') for i in imgs) if t),
1324
                'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1325
                'description': description,
1326
            }
1327
1328
1329
class DinosaurComics(GenericListableComic):
1330
    """Class to retrieve Dinosaur Comics comics."""
1331
    name = 'dinosaur'
1332
    long_name = 'Dinosaur Comics'
1333
    url = 'http://www.qwantz.com'
1334
    get_url_from_archive_element = get_href
1335
    comic_link_re = re.compile('^%s/index.php\\?comic=([0-9]*)$' % url)
1336
1337
    @classmethod
1338
    def get_archive_elements(cls):
1339
        archive_url = urljoin_wrapper(cls.url, 'archive.php')
1340
        # first link is random -> skip it
1341
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re)[1:])
1342
1343
    @classmethod
1344
    def get_comic_info(cls, soup, link):
1345
        """Get information about a particular comics."""
1346
        url = cls.get_url_from_archive_element(link)
1347
        num = int(cls.comic_link_re.match(url).groups()[0])
1348
        date_str = link.string
1349
        text = link.next_sibling.string
1350
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1351
        comic_img_re = re.compile('^%s/comics/' % cls.url)
1352
        img = soup.find('img', src=comic_img_re)
1353
        return {
1354
            'month': day.month,
1355
            'year': day.year,
1356
            'day': day.day,
1357
            'img': [img.get('src')],
1358
            'title': img.get('title'),
1359
            'text': text,
1360
            'num': num,
1361 View Code Duplication
        }
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1362
1363
1364
class ButterSafe(GenericListableComic):
1365
    """Class to retrieve Butter Safe comics."""
1366
    name = 'butter'
1367
    long_name = 'ButterSafe'
1368
    url = 'http://buttersafe.com'
1369
    get_url_from_archive_element = get_href
1370
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)
1371
1372
    @classmethod
1373
    def get_archive_elements(cls):
1374
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1375
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
1376
1377
    @classmethod
1378
    def get_comic_info(cls, soup, link):
1379
        """Get information about a particular comics."""
1380
        url = cls.get_url_from_archive_element(link)
1381
        title = link.string
1382
        year, month, day = [int(s) for s in cls.comic_link_re.match(url).groups()]
1383
        img = soup.find('div', id='comic').find('img')
1384
        assert img['alt'] == title
1385
        return {
1386
            'title': title,
1387
            'day': day,
1388
            'month': month,
1389
            'year': year,
1390
            'img': [img['src']],
1391
        }
1392
1393
1394
class CalvinAndHobbes(GenericComic):
1395
    """Class to retrieve Calvin and Hobbes comics."""
1396
    # Also on http://www.gocomics.com/calvinandhobbes/
1397
    name = 'calvin'
1398
    long_name = 'Calvin and Hobbes'
1399
    # This is not through any official webpage but eh...
1400
    url = 'http://marcel-oehler.marcellosendos.ch/comics/ch/'
1401
1402
    @classmethod
1403
    def get_next_comic(cls, last_comic):
1404
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1405
        last_date = get_date_for_comic(
1406
            last_comic) if last_comic else date(1985, 11, 1)
1407
        link_re = re.compile('^([0-9]*)/([0-9]*)/')
1408
        img_re = re.compile('')
1409
        for link in get_soup_at_url(cls.url).find_all('a', href=link_re):
1410
            url = link['href']
1411
            year, month = link_re.match(url).groups()
1412
            if date(int(year), int(month), 1) + timedelta(days=31) >= last_date:
1413
                img_re = re.compile('^%s%s([0-9]*)' % (year, month))
1414
                month_url = urljoin_wrapper(cls.url, url)
1415
                for img in get_soup_at_url(month_url).find_all('img', src=img_re):
1416
                    img_src = img['src']
1417
                    day = int(img_re.match(img_src).groups()[0])
1418
                    comic_date = date(int(year), int(month), day)
1419
                    if comic_date > last_date:
1420
                        yield {
1421
                            'url': month_url,
1422
                            'year': int(year),
1423
                            'month': int(month),
1424
                            'day': int(day),
1425
                            'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)],
1426
                        }
1427 View Code Duplication
                        last_date = comic_date
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1428
1429
1430
class AbstruseGoose(GenericListableComic):
1431
    """Class to retrieve AbstruseGoose Comics."""
1432
    name = 'abstruse'
1433
    long_name = 'Abstruse Goose'
1434
    url = 'http://abstrusegoose.com'
1435
    get_url_from_archive_element = get_href
1436
    comic_url_re = re.compile('^%s/([0-9]*)$' % url)
1437
    comic_img_re = re.compile('^%s/strips/.*' % url)
1438
1439
    @classmethod
1440
    def get_archive_elements(cls):
1441
        archive_url = urljoin_wrapper(cls.url, 'archive')
1442
        return get_soup_at_url(archive_url).find_all('a', href=cls.comic_url_re)
1443
1444
    @classmethod
1445
    def get_comic_info(cls, soup, archive_elt):
1446
        comic_url = cls.get_url_from_archive_element(archive_elt)
1447
        num = int(cls.comic_url_re.match(comic_url).groups()[0])
1448
        return {
1449
            'num': num,
1450
            'title': archive_elt.string,
1451
            'img': [soup.find('img', src=cls.comic_img_re)['src']]
1452
        }
1453
1454
1455
class PhDComics(GenericNavigableComic):
1456
    """Class to retrieve PHD Comics."""
1457
    name = 'phd'
1458
    long_name = 'PhD Comics'
1459
    url = 'http://phdcomics.com/comics/archive.php'
1460
1461
    @classmethod
1462
    def get_first_comic_link(cls):
1463
        """Get link to first comics."""
1464
        soup = get_soup_at_url(cls.url)
1465
        img = soup.find('img', src='http://phdcomics.com/comics/images/first_button.gif')
1466
        return None if img is None else img.parent
1467
1468
    @classmethod
1469
    def get_navi_link(cls, last_soup, next_):
1470
        """Get link to next or previous comic."""
1471
        url = 'http://phdcomics.com/comics/images/%s_button.gif' % ('next' if next_ else 'prev')
1472
        img = last_soup.find('img', src=url)
1473
        return None if img is None else img.parent
1474
1475
    @classmethod
1476
    def get_comic_info(cls, soup, link):
1477
        """Get information about a particular comics."""
1478
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
1479
        imgs = soup.find_all('meta', property='og:image')
1480
        return {
1481
            'img': [i['content'] for i in imgs],
1482
            'title': title,
1483
        }
1484
1485 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1486
class Octopuns(GenericEmptyComic, GenericNavigableComic):
1487
    """Class to retrieve Octopuns comics."""
1488
    # Also on http://octopuns.tumblr.com
1489
    name = 'octopuns'
1490
    long_name = 'Octopuns'
1491
    url = 'http://www.octopuns.net'
1492
1493
    @classmethod
1494
    def get_first_comic_link(cls):
1495
        """Get link to first comics."""
1496
        return get_soup_at_url(cls.url).find('img', src=re.compile('.*/First.png')).parent
1497
1498
    @classmethod
1499
    def get_navi_link(cls, last_soup, next_):
1500
        """Get link to next or previous comic."""
1501
        link = last_soup.find('img', src=re.compile('.*/Next.png' if next_ else '.*/Back.png')).parent
1502
        return None if link.get('href') is None else link
1503
1504
    @classmethod
1505
    def get_comic_info(cls, soup, link):
1506
        """Get information about a particular comics."""
1507
        title = soup.find('h3', class_='post-title entry-title').string
1508
        date_str = soup.find('h2', class_='date-header').string
1509
        day = string_to_date(date_str, "%A, %B %d, %Y")
1510
        imgs = soup.find_all('link', rel='image_src')
1511
        return {
1512
            'img': [i['href'] for i in imgs],
1513
            'title': title,
1514
            'day': day.day,
1515
            'month': day.month,
1516
            'year': day.year,
1517
        }
1518
1519
1520
class Quarktees(GenericNavigableComic):
1521
    """Class to retrieve the Quarktees comics."""
1522
    name = 'quarktees'
1523
    long_name = 'Quarktees'
1524
    url = 'http://www.quarktees.com/blogs/news'
1525
    get_url_from_link = join_cls_url_to_href
1526
    get_first_comic_link = simulate_first_link
1527
    first_url = 'http://www.quarktees.com/blogs/news/12486621-coming-soon'
1528
1529
    @classmethod
1530
    def get_navi_link(cls, last_soup, next_):
1531
        """Get link to next or previous comic."""
1532
        return last_soup.find('a', id='article-next' if next_ else 'article-prev')
1533
1534
    @classmethod
1535
    def get_comic_info(cls, soup, link):
1536
        """Get information about a particular comics."""
1537
        title = soup.find('meta', property='og:title')['content']
1538
        article = soup.find('div', class_='single-article')
1539
        imgs = article.find_all('img')
1540
        return {
1541
            'title': title,
1542
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1543
        }
1544
1545
1546
class OverCompensating(GenericNavigableComic):
1547
    """Class to retrieve the Over Compensating comics."""
1548
    name = 'compensating'
1549
    long_name = 'Over Compensating'
1550
    url = 'http://www.overcompensating.com'
1551
    get_url_from_link = join_cls_url_to_href
1552
1553
    @classmethod
1554
    def get_first_comic_link(cls):
1555
        """Get link to first comics."""
1556
        return get_soup_at_url(cls.url).find('a', href=re.compile('comic=1$'))
1557
1558
    @classmethod
1559
    def get_navi_link(cls, last_soup, next_):
1560
        """Get link to next or previous comic."""
1561
        return last_soup.find('a', title='next comic' if next_ else 'go back already')
1562
1563
    @classmethod
1564
    def get_comic_info(cls, soup, link):
1565
        """Get information about a particular comics."""
1566
        img_src_re = re.compile('^/oc/comics/.*')
1567
        comic_num_re = re.compile('.*comic=([0-9]*)$')
1568
        comic_url = cls.get_url_from_link(link)
1569
        num = int(comic_num_re.match(comic_url).groups()[0])
1570
        img = soup.find('img', src=img_src_re)
1571
        return {
1572
            'num': num,
1573
            'img': [urljoin_wrapper(comic_url, img['src'])],
1574
            'title': img.get('title')
1575
        }
1576
1577
1578
class Oglaf(GenericNavigableComic):
1579
    """Class to retrieve Oglaf comics."""
1580
    name = 'oglaf'
1581
    long_name = 'Oglaf [NSFW]'
1582
    url = 'http://oglaf.com'
1583
    _categories = ('NSFW', )
1584
    get_url_from_link = join_cls_url_to_href
1585
1586
    @classmethod
1587
    def get_first_comic_link(cls):
1588
        """Get link to first comics."""
1589
        return get_soup_at_url(cls.url).find("div", id="st").parent
1590
1591
    @classmethod
1592
    def get_navi_link(cls, last_soup, next_):
1593
        """Get link to next or previous comic."""
1594
        div = last_soup.find("div", id="nx" if next_ else "pvs")
1595
        return div.parent if div else None
1596
1597
    @classmethod
1598
    def get_comic_info(cls, soup, link):
1599
        """Get information about a particular comics."""
1600
        title = soup.find('title').string
1601
        title_imgs = soup.find('div', id='tt').find_all('img')
1602
        assert len(title_imgs) == 1
1603
        strip_imgs = soup.find_all('img', id='strip')
1604
        assert len(strip_imgs) == 1
1605
        imgs = title_imgs + strip_imgs
1606
        desc = ' '.join(i['title'] for i in imgs)
1607
        return {
1608
            'title': title,
1609
            'img': [i['src'] for i in imgs],
1610
            'description': desc,
1611
        }
1612
1613
1614
class ScandinaviaAndTheWorld(GenericNavigableComic):
1615
    """Class to retrieve Scandinavia And The World comics."""
1616
    name = 'satw'
1617
    long_name = 'Scandinavia And The World'
1618
    url = 'http://satwcomic.com'
1619
    get_first_comic_link = simulate_first_link
1620
    first_url = 'http://satwcomic.com/sweden-denmark-and-norway'
1621
1622
    @classmethod
1623
    def get_navi_link(cls, last_soup, next_):
1624
        """Get link to next or previous comic."""
1625
        return last_soup.find('a', accesskey='n' if next_ else 'p')
1626
1627
    @classmethod
1628
    def get_comic_info(cls, soup, link):
1629
        """Get information about a particular comics."""
1630
        title = soup.find('meta', attrs={'name': 'twitter:label1'})['content']
1631
        desc = soup.find('meta', property='og:description')['content']
1632
        imgs = soup.find_all('img', itemprop="image")
1633
        return {
1634
            'title': title,
1635
            'description': desc,
1636
            'img': [i['src'] for i in imgs],
1637
        }
1638
1639
1640
class SomethingOfThatIlk(GenericEmptyComic):  # Does not exist anymore
1641
    """Class to retrieve the Something Of That Ilk comics."""
1642
    name = 'somethingofthatilk'
1643
    long_name = 'Something Of That Ilk'
1644
    url = 'http://www.somethingofthatilk.com'
1645
1646
1647
class InfiniteMonkeyBusiness(GenericNavigableComic):
1648
    """Generic class to retrieve InfiniteMonkeyBusiness comics."""
1649
    name = 'monkey'
1650
    long_name = 'Infinite Monkey Business'
1651
    url = 'http://infinitemonkeybusiness.net'
1652
    get_navi_link = get_a_navi_comicnavnext_navinext
1653
    get_first_comic_link = simulate_first_link
1654
    first_url = 'http://infinitemonkeybusiness.net/comic/pillory/'
1655
1656
    @classmethod
1657
    def get_comic_info(cls, soup, link):
1658
        """Get information about a particular comics."""
1659
        title = soup.find('meta', property='og:title')['content']
1660
        imgs = soup.find('div', id='comic').find_all('img')
1661
        return {
1662
            'title': title,
1663
            'img': [i['src'] for i in imgs],
1664
        }
1665
1666
1667
class Wondermark(GenericListableComic):
1668
    """Class to retrieve the Wondermark comics."""
1669
    name = 'wondermark'
1670
    long_name = 'Wondermark'
1671
    url = 'http://wondermark.com'
1672
    get_url_from_archive_element = get_href
1673
1674
    @classmethod
1675
    def get_archive_elements(cls):
1676
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1677
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1678
1679
    @classmethod
1680
    def get_comic_info(cls, soup, link):
1681
        """Get information about a particular comics."""
1682
        date_str = soup.find('div', class_='postdate').find('em').string
1683
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1684
        div = soup.find('div', id='comic')
1685
        if div:
1686
            img = div.find('img')
1687
            img_src = [img['src']]
1688
            alt = img['alt']
1689
            assert alt == img['title']
1690
            title = soup.find('meta', property='og:title')['content']
1691
        else:
1692
            img_src = []
1693
            alt = ''
1694
            title = ''
1695
        return {
1696
            'month': day.month,
1697
            'year': day.year,
1698
            'day': day.day,
1699
            'img': img_src,
1700
            'title': title,
1701
            'alt': alt,
1702
            'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')),
1703
        }
1704
1705
1706
class WarehouseComic(GenericNavigableComic):
1707
    """Class to retrieve Warehouse Comic comics."""
1708
    name = 'warehouse'
1709
    long_name = 'Warehouse Comic'
1710
    url = 'http://warehousecomic.com'
1711
    get_first_comic_link = get_a_navi_navifirst
1712
    get_navi_link = get_link_rel_next
1713
1714
    @classmethod
1715
    def get_comic_info(cls, soup, link):
1716
        """Get information about a particular comics."""
1717
        title = soup.find('h2', class_='post-title').string
1718
        date_str = soup.find('span', class_='post-date').string
1719
        day = string_to_date(date_str, "%B %d, %Y")
1720
        imgs = soup.find('div', id='comic').find_all('img')
1721
        return {
1722
            'img': [i['src'] for i in imgs],
1723
            'title': title,
1724
            'day': day.day,
1725
            'month': day.month,
1726
            'year': day.year,
1727
        }
1728
1729
1730
class JustSayEh(GenericNavigableComic):
1731
    """Class to retrieve Just Say Eh comics."""
1732
    # Also on http//tapastic.com/series/Just-Say-Eh
1733
    name = 'justsayeh'
1734
    long_name = 'Just Say Eh'
1735
    url = 'http://www.justsayeh.com'
1736
    get_first_comic_link = get_a_navi_navifirst
1737
    get_navi_link = get_a_navi_comicnavnext_navinext
1738
1739
    @classmethod
1740
    def get_comic_info(cls, soup, link):
1741
        """Get information about a particular comics."""
1742
        title = soup.find('h2', class_='post-title').string
1743
        imgs = soup.find("div", id="comic").find_all("img")
1744
        assert all(i['alt'] == i['title'] for i in imgs)
1745
        alt = imgs[0]['alt']
1746
        return {
1747
            'img': [i['src'] for i in imgs],
1748
            'title': title,
1749
            'alt': alt,
1750
        }
1751
1752 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1753
class MouseBearComedy(GenericNavigableComic):
1754
    """Class to retrieve Mouse Bear Comedy comics."""
1755
    # Also on http://mousebearcomedy.tumblr.com
1756
    name = 'mousebear'
1757
    long_name = 'Mouse Bear Comedy'
1758
    url = 'http://www.mousebearcomedy.com'
1759
    get_first_comic_link = get_a_navi_navifirst
1760
    get_navi_link = get_a_navi_comicnavnext_navinext
1761
1762
    @classmethod
1763
    def get_comic_info(cls, soup, link):
1764
        """Get information about a particular comics."""
1765
        title = soup.find('h2', class_='post-title').string
1766
        author = soup.find("span", class_="post-author").find("a").string
1767
        date_str = soup.find("span", class_="post-date").string
1768
        day = string_to_date(date_str, '%B %d, %Y')
1769
        imgs = soup.find("div", id="comic").find_all("img")
1770
        assert all(i['alt'] == i['title'] == title for i in imgs)
1771
        return {
1772
            'day': day.day,
1773
            'month': day.month,
1774
            'year': day.year,
1775
            'img': [i['src'] for i in imgs],
1776
            'title': title,
1777
            'author': author,
1778 View Code Duplication
        }
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1779
1780
1781
class BigFootJustice(GenericNavigableComic):
1782
    """Class to retrieve Big Foot Justice comics."""
1783
    # Also on http://tapastic.com/series/bigfoot-justice
1784
    name = 'bigfoot'
1785
    long_name = 'Big Foot Justice'
1786
    url = 'http://bigfootjustice.com'
1787
    get_first_comic_link = get_a_navi_navifirst
1788
    get_navi_link = get_a_navi_comicnavnext_navinext
1789
1790
    @classmethod
1791
    def get_comic_info(cls, soup, link):
1792
        """Get information about a particular comics."""
1793
        imgs = soup.find('div', id='comic').find_all('img')
1794
        assert all(i['title'] == i['alt'] for i in imgs)
1795
        title = ' '.join(i['title'] for i in imgs)
1796
        return {
1797
            'img': [i['src'] for i in imgs],
1798
            'title': title,
1799
        }
1800
1801
1802
class RespawnComic(GenericNavigableComic):
1803
    """Class to retrieve Respawn Comic."""
1804
    # Also on http://respawncomic.tumblr.com
1805
    name = 'respawn'
1806
    long_name = 'Respawn Comic'
1807
    url = 'http://respawncomic.com '
1808
    _categories = ('RESPAWN', )
1809
    get_navi_link = get_a_rel_next
1810 View Code Duplication
    get_first_comic_link = simulate_first_link
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1811
    first_url = 'http://respawncomic.com/comic/c0001/'
1812
1813
    @classmethod
1814
    def get_comic_info(cls, soup, link):
1815
        """Get information about a particular comics."""
1816
        title = soup.find('meta', property='og:title')['content']
1817
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1818
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1819
        date_str = date_str[:10]
1820
        day = string_to_date(date_str, "%Y-%m-%d")
1821
        imgs = soup.find_all('meta', property='og:image')
1822
        skip_imgs = {
1823
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1824
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1825
        }
1826
        return {
1827
            'title': title,
1828
            'author': author,
1829
            'day': day.day,
1830
            'month': day.month,
1831
            'year': day.year,
1832
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1833
        }
1834
1835 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1836
class SafelyEndangered(GenericNavigableComic):
1837
    """Class to retrieve Safely Endangered comics."""
1838
    # Also on http://tumblr.safelyendangered.com
1839
    name = 'endangered'
1840
    long_name = 'Safely Endangered'
1841
    url = 'http://www.safelyendangered.com'
1842
    get_navi_link = get_link_rel_next
1843
    get_first_comic_link = simulate_first_link
1844
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1845
1846
    @classmethod
1847
    def get_comic_info(cls, soup, link):
1848
        """Get information about a particular comics."""
1849
        title = soup.find('h2', class_='post-title').string
1850
        date_str = soup.find('span', class_='post-date').string
1851
        day = string_to_date(date_str, '%B %d, %Y')
1852
        imgs = soup.find('div', id='comic').find_all('img')
1853
        alt = imgs[0]['alt']
1854
        assert all(i['alt'] == i['title'] for i in imgs)
1855
        return {
1856
            'day': day.day,
1857
            'month': day.month,
1858
            'year': day.year,
1859
            'img': [i['src'] for i in imgs],
1860
            'title': title,
1861
            'alt': alt,
1862
        }
1863
1864 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1865
class PicturesInBoxes(GenericNavigableComic):
1866
    """Class to retrieve Pictures In Boxes comics."""
1867
    # Also on http://picturesinboxescomic.tumblr.com
1868
    name = 'picturesinboxes'
1869
    long_name = 'Pictures in Boxes'
1870
    url = 'http://www.picturesinboxes.com'
1871
    get_navi_link = get_a_navi_navinext
1872
    get_first_comic_link = simulate_first_link
1873
    first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'
1874
1875
    @classmethod
1876
    def get_comic_info(cls, soup, link):
1877
        """Get information about a particular comics."""
1878
        title = soup.find('h2', class_='post-title').string
1879
        author = soup.find("span", class_="post-author").find("a").string
1880
        date_str = soup.find('span', class_='post-date').string
1881
        day = string_to_date(date_str, '%B %d, %Y')
1882
        imgs = soup.find('div', class_='comicpane').find_all('img')
1883
        assert imgs
1884
        assert all(i['title'] == i['alt'] == title for i in imgs)
1885
        return {
1886
            'day': day.day,
1887
            'month': day.month,
1888
            'year': day.year,
1889
            'img': [i['src'] for i in imgs],
1890
            'title': title,
1891
            'author': author,
1892
        }
1893
1894 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1895
class Penmen(GenericNavigableComic):
1896
    """Class to retrieve Penmen comics."""
1897
    name = 'penmen'
1898
    long_name = 'Penmen'
1899
    url = 'http://penmen.com'
1900
    get_navi_link = get_link_rel_next
1901
    get_first_comic_link = simulate_first_link
1902
    first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/'
1903
1904
    @classmethod
1905
    def get_comic_info(cls, soup, link):
1906
        """Get information about a particular comics."""
1907
        title = soup.find('title').string
1908
        imgs = soup.find('div', class_='entry-content').find_all('img')
1909
        short_url = soup.find('link', rel='shortlink')['href']
1910
        tags = ' '.join(t.string for t in soup.find_all('a', rel='tag'))
1911
        date_str = soup.find('time')['datetime'][:10]
1912
        day = string_to_date(date_str, "%Y-%m-%d")
1913
        return {
1914
            'title': title,
1915
            'short_url': short_url,
1916
            'img': [i['src'] for i in imgs],
1917
            'tags': tags,
1918
            'month': day.month,
1919
            'year': day.year,
1920
            'day': day.day,
1921
        }
1922
1923
1924
class TheDoghouseDiaries(GenericNavigableComic):
1925
    """Class to retrieve The Dog House Diaries comics."""
1926
    name = 'doghouse'
1927
    long_name = 'The Dog House Diaries'
1928
    url = 'http://thedoghousediaries.com'
1929
1930
    @classmethod
1931
    def get_first_comic_link(cls):
1932
        """Get link to first comics."""
1933
        return get_soup_at_url(cls.url).find('a', id='firstlink')
1934
1935
    @classmethod
1936
    def get_navi_link(cls, last_soup, next_):
1937
        """Get link to next or previous comic."""
1938
        return last_soup.find('a', id='nextlink' if next_ else 'previouslink')
1939
1940
    @classmethod
1941
    def get_comic_info(cls, soup, link):
1942
        """Get information about a particular comics."""
1943
        comic_img_re = re.compile('^dhdcomics/.*')
1944
        img = soup.find('img', src=comic_img_re)
1945
        comic_url = cls.get_url_from_link(link)
1946
        return {
1947
            'title': soup.find('h2', id='titleheader').string,
1948
            'title2': soup.find('div', id='subtext').string,
1949
            'alt': img.get('title'),
1950
            'img': [urljoin_wrapper(comic_url, img['src'].strip())],
1951
            'num': int(comic_url.split('/')[-1]),
1952
        }
1953
1954
1955
class InvisibleBread(GenericListableComic):
1956
    """Class to retrieve Invisible Bread comics."""
1957
    # Also on http://www.gocomics.com/invisible-bread
1958
    name = 'invisiblebread'
1959
    long_name = 'Invisible Bread'
1960
    url = 'http://invisiblebread.com'
1961
1962
    @classmethod
1963
    def get_archive_elements(cls):
1964
        archive_url = urljoin_wrapper(cls.url, 'archives/')
1965
        return reversed(get_soup_at_url(archive_url).find_all('td', class_='archive-title'))
1966
1967
    @classmethod
1968 View Code Duplication
    def get_url_from_archive_element(cls, td):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1969
        return td.find('a')['href']
1970
1971
    @classmethod
1972
    def get_comic_info(cls, soup, td):
1973
        """Get information about a particular comics."""
1974
        url = cls.get_url_from_archive_element(td)
1975
        title = td.find('a').string
1976
        month_and_day = td.previous_sibling.string
1977
        link_re = re.compile('^%s/([0-9]+)/' % cls.url)
1978
        year = link_re.match(url).groups()[0]
1979
        date_str = month_and_day + ' ' + year
1980
        day = string_to_date(date_str, '%b %d %Y')
1981
        imgs = [soup.find('div', id='comic').find('img')]
1982
        assert len(imgs) == 1
1983
        assert all(i['title'] == i['alt'] == title for i in imgs)
1984
        return {
1985
            'month': day.month,
1986
            'year': day.year,
1987
            'day': day.day,
1988
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1989
            'title': title,
1990
        }
1991
1992
1993
class DiscoBleach(GenericEmptyComic):  # Does not work anymore
1994
    """Class to retrieve Disco Bleach Comics."""
1995
    name = 'discobleach'
1996
    long_name = 'Disco Bleach'
1997
    url = 'http://discobleach.com'
1998
1999
2000
class TubeyToons(GenericEmptyComic):  # Does not work anymore
2001
    """Class to retrieve TubeyToons comics."""
2002
    # Also on http://tapastic.com/series/Tubey-Toons
2003
    # Also on http://tubeytoons.tumblr.com
2004
    name = 'tubeytoons'
2005
    long_name = 'Tubey Toons'
2006
    url = 'http://tubeytoons.com'
2007
    _categories = ('TUNEYTOONS', )
2008
2009
2010
class CompletelySeriousComics(GenericNavigableComic):
2011
    """Class to retrieve Completely Serious comics."""
2012
    name = 'completelyserious'
2013
    long_name = 'Completely Serious Comics'
2014
    url = 'http://completelyseriouscomics.com'
2015
    get_first_comic_link = get_a_navi_navifirst
2016
    get_navi_link = get_a_navi_navinext
2017
2018
    @classmethod
2019
    def get_comic_info(cls, soup, link):
2020
        """Get information about a particular comics."""
2021
        title = soup.find('h2', class_='post-title').string
2022
        author = soup.find('span', class_='post-author').contents[1].string
2023
        date_str = soup.find('span', class_='post-date').string
2024
        day = string_to_date(date_str, '%B %d, %Y')
2025
        imgs = soup.find('div', class_='comicpane').find_all('img')
2026
        assert imgs
2027
        alt = imgs[0]['title']
2028
        assert all(i['title'] == i['alt'] == alt for i in imgs)
2029
        return {
2030
            'month': day.month,
2031
            'year': day.year,
2032
            'day': day.day,
2033
            'img': [i['src'] for i in imgs],
2034
            'title': title,
2035
            'alt': alt,
2036
            'author': author,
2037 View Code Duplication
        }
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2038
2039
2040
class PoorlyDrawnLines(GenericListableComic):
2041
    """Class to retrieve Poorly Drawn Lines comics."""
2042
    # Also on http://pdlcomics.tumblr.com
2043
    name = 'poorlydrawn'
2044
    long_name = 'Poorly Drawn Lines'
2045
    url = 'http://www.poorlydrawnlines.com'
2046
    _categories = ('POORLYDRAWN', )
2047
    get_url_from_archive_element = get_href
2048
2049
    @classmethod
2050
    def get_comic_info(cls, soup, link):
2051
        """Get information about a particular comics."""
2052
        imgs = soup.find('div', class_='post').find_all('img')
2053
        assert len(imgs) <= 1
2054
        return {
2055
            'img': [i['src'] for i in imgs],
2056
            'title': imgs[0].get('title', "") if imgs else "",
2057
        }
2058
2059
    @classmethod
2060
    def get_archive_elements(cls):
2061
        archive_url = urljoin_wrapper(cls.url, 'archive')
2062
        url_re = re.compile('^%s/comic/.' % cls.url)
2063
        return reversed(get_soup_at_url(archive_url).find_all('a', href=url_re))
2064
2065 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2066
class LoadingComics(GenericNavigableComic):
2067
    """Class to retrieve Loading Artist comics."""
2068
    name = 'loadingartist'
2069
    long_name = 'Loading Artist'
2070
    url = 'http://www.loadingartist.com/latest'
2071
2072
    @classmethod
2073
    def get_first_comic_link(cls):
2074
        """Get link to first comics."""
2075
        return get_soup_at_url(cls.url).find('a', title="First")
2076
2077
    @classmethod
2078
    def get_navi_link(cls, last_soup, next_):
2079
        """Get link to next or previous comic."""
2080
        return last_soup.find('a', title='Next' if next_ else 'Previous')
2081
2082
    @classmethod
2083
    def get_comic_info(cls, soup, link):
2084
        """Get information about a particular comics."""
2085
        title = soup.find('h1').string
2086
        date_str = soup.find('span', class_='date').string.strip()
2087
        day = string_to_date(date_str, "%B %d, %Y")
2088
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
2089
        return {
2090
            'title': title,
2091
            'img': [i['src'] for i in imgs],
2092
            'month': day.month,
2093
            'year': day.year,
2094
            'day': day.day,
2095
        }
2096
2097
2098
class ChuckleADuck(GenericNavigableComic):
2099
    """Class to retrieve Chuckle-A-Duck comics."""
2100
    name = 'chuckleaduck'
2101
    long_name = 'Chuckle-A-duck'
2102
    url = 'http://chuckleaduck.com'
2103
    get_first_comic_link = get_div_navfirst_a
2104
    get_navi_link = get_link_rel_next
2105
2106
    @classmethod
2107
    def get_comic_info(cls, soup, link):
2108
        """Get information about a particular comics."""
2109
        date_str = soup.find('span', class_='post-date').string
2110
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
2111
        author = soup.find('span', class_='post-author').string
2112
        div = soup.find('div', id='comic')
2113
        imgs = div.find_all('img') if div else []
2114
        title = imgs[0]['title'] if imgs else ""
2115
        assert all(i['title'] == i['alt'] == title for i in imgs)
2116
        return {
2117
            'month': day.month,
2118
            'year': day.year,
2119
            'day': day.day,
2120
            'img': [i['src'] for i in imgs],
2121
            'title': title,
2122
            'author': author,
2123
        }
2124
2125
2126
class DepressedAlien(GenericNavigableComic):
2127
    """Class to retrieve Depressed Alien Comics."""
2128
    name = 'depressedalien'
2129
    long_name = 'Depressed Alien'
2130
    url = 'http://depressedalien.com'
2131
    get_url_from_link = join_cls_url_to_href
2132
2133
    @classmethod
2134
    def get_first_comic_link(cls):
2135
        """Get link to first comics."""
2136
        return get_soup_at_url(cls.url).find('img', attrs={'name': 'beginArrow'}).parent
2137
2138
    @classmethod
2139
    def get_navi_link(cls, last_soup, next_):
2140
        """Get link to next or previous comic."""
2141
        return last_soup.find('img', attrs={'name': 'rightArrow' if next_ else 'leftArrow'}).parent
2142
2143
    @classmethod
2144
    def get_comic_info(cls, soup, link):
2145
        """Get information about a particular comics."""
2146
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2147
        imgs = soup.find_all('meta', property='og:image')
2148
        return {
2149
            'title': title,
2150
            'img': [i['content'] for i in imgs],
2151
        }
2152
2153
2154
class ThingsInSquares(GenericListableComic):
2155
    """Class to retrieve Things In Squares comics."""
2156
    # This can be retrieved in other languages
2157
    # Also on https://tapastic.com/series/Things-in-Squares
2158
    name = 'squares'
2159
    long_name = 'Things in squares'
2160
    url = 'http://www.thingsinsquares.com'
2161
2162
    @classmethod
2163
    def get_comic_info(cls, soup, tr):
2164
        """Get information about a particular comics."""
2165
        _, td2, td3 = tr.find_all('td')
2166
        a = td2.find('a')
2167
        date_str = td3.string
2168
        day = string_to_date(date_str, "%m.%d.%y")
2169
        title = a.string
2170
        title2 = soup.find('meta', property='og:title')['content']
2171
        desc = soup.find('meta', property='og:description')
2172
        description = desc['content'] if desc else ''
2173
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2174
        imgs = soup.find('div', class_='entry-content').find_all('img')
2175
        return {
2176
            'day': day.day,
2177
            'month': day.month,
2178
            'year': day.year,
2179
            'title': title,
2180
            'title2': title2,
2181
            'description': description,
2182
            'tags': tags,
2183
            'img': [i['src'] for i in imgs],
2184
            'alt': ' '.join(i['alt'] for i in imgs),
2185
        }
2186
2187
    @classmethod
2188
    def get_url_from_archive_element(cls, tr):
2189
        _, td2, td3 = tr.find_all('td')
2190
        return td2.find('a')['href']
2191
2192
    @classmethod
2193
    def get_archive_elements(cls):
2194
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
2195
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2196
2197
2198
class HappleTea(GenericNavigableComic):
2199
    """Class to retrieve Happle Tea Comics."""
2200
    name = 'happletea'
2201
    long_name = 'Happle Tea'
2202
    url = 'http://www.happletea.com'
2203
    get_first_comic_link = get_a_navi_navifirst
2204
    get_navi_link = get_link_rel_next
2205
2206
    @classmethod
2207
    def get_comic_info(cls, soup, link):
2208
        """Get information about a particular comics."""
2209
        imgs = soup.find('div', id='comic').find_all('img')
2210
        post = soup.find('div', class_='post-content')
2211
        title = post.find('h2', class_='post-title').string
2212
        author = post.find('a', rel='author').string
2213
        date_str = post.find('span', class_='post-date').string
2214
        day = string_to_date(date_str, "%B %d, %Y")
2215
        assert all(i['alt'] == i['title'] for i in imgs)
2216
        return {
2217
            'title': title,
2218
            'img': [i['src'] for i in imgs],
2219
            'alt': ''.join(i['alt'] for i in imgs),
2220
            'month': day.month,
2221
            'year': day.year,
2222
            'day': day.day,
2223
            'author': author,
2224
        }
2225
2226
2227
class RockPaperScissors(GenericNavigableComic):
2228
    """Class to retrieve Rock Paper Scissors comics."""
2229
    name = 'rps'
2230
    long_name = 'Rock Paper Scissors'
2231
    url = 'http://rps-comics.com'
2232
    get_first_comic_link = get_a_navi_navifirst
2233
    get_navi_link = get_link_rel_next
2234
2235
    @classmethod
2236
    def get_comic_info(cls, soup, link):
2237
        """Get information about a particular comics."""
2238
        title = soup.find('title').string
2239
        imgs = soup.find_all('meta', property='og:image')
2240
        short_url = soup.find('link', rel='shortlink')['href']
2241
        transcript = soup.find('div', id='transcript-content').string
2242
        return {
2243
            'title': title,
2244
            'transcript': transcript,
2245
            'short_url': short_url,
2246
            'img': [i['content'] for i in imgs],
2247
        }
2248
2249
2250
class FatAwesomeComics(GenericNavigableComic):
2251
    """Class to retrieve Fat Awesome Comics."""
2252
    # Also on http://fatawesomecomedy.tumblr.com
2253
    name = 'fatawesome'
2254
    long_name = 'Fat Awesome'
2255
    url = 'http://fatawesome.com/comics'
2256
    get_navi_link = get_a_rel_next
2257
    get_first_comic_link = simulate_first_link
2258
    first_url = 'http://fatawesome.com/shortbus/'
2259
2260
    @classmethod
2261
    def get_comic_info(cls, soup, link):
2262
        """Get information about a particular comics."""
2263
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2264
        description = soup.find('meta', attrs={'name': 'description'})['content']
2265
        tags_prop = soup.find('meta', property='article:tag')
2266
        tags = tags_prop['content'] if tags_prop else ""
2267
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2268
        day = string_to_date(date_str, "%Y-%m-%d")
2269
        imgs = soup.find_all('img', attrs={'data-recalc-dims': "1"})
2270
        assert len(imgs) == 1
2271
        return {
2272
            'title': title,
2273
            'description': description,
2274
            'tags': tags,
2275
            'alt': "".join(i['alt'] for i in imgs),
2276
            'img': [i['src'].rsplit('?', 1)[0] for i in imgs],
2277
            'month': day.month,
2278
            'year': day.year,
2279
            'day': day.day,
2280
        }
2281
2282
2283
class AnythingComic(GenericListableComic):
2284
    """Class to retrieve Anything Comics."""
2285
    # Also on http://tapastic.com/series/anything
2286
    name = 'anythingcomic'
2287
    long_name = 'Anything Comic'
2288
    url = 'http://www.anythingcomic.com'
2289
2290
    @classmethod
2291
    def get_archive_elements(cls):
2292
        archive_url = urljoin_wrapper(cls.url, 'archive/')
2293
        # The first 2 <tr>'s do not correspond to comics
2294
        return get_soup_at_url(archive_url).find('table', id='chapter_table').find_all('tr')[2:]
2295
2296
    @classmethod
2297
    def get_url_from_archive_element(cls, tr):
2298
        """Get url corresponding to an archive element."""
2299
        td_num, td_comic, td_date, _ = tr.find_all('td')
2300 View Code Duplication
        link = td_comic.find('a')
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2301
        return urljoin_wrapper(cls.url, link['href'])
2302
2303
    @classmethod
2304
    def get_comic_info(cls, soup, tr):
2305
        """Get information about a particular comics."""
2306
        td_num, td_comic, td_date, _ = tr.find_all('td')
2307
        num = int(td_num.string)
2308
        link = td_comic.find('a')
2309
        title = link.string
2310
        imgs = soup.find_all('img', id='comic_image')
2311
        date_str = td_date.string
2312
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y, %I:%M %p")
2313
        assert len(imgs) == 1
2314
        assert all(i.get('alt') == i.get('title') for i in imgs)
2315
        return {
2316
            'num': num,
2317
            'title': title,
2318
            'alt': imgs[0].get('alt', ''),
2319
            'img': [i['src'] for i in imgs],
2320
            'month': day.month,
2321
            'year': day.year,
2322
            'day': day.day,
2323
        }
2324
2325
2326
class LonnieMillsap(GenericNavigableComic):
2327
    """Class to retrieve Lonnie Millsap's comics."""
2328
    name = 'millsap'
2329
    long_name = 'Lonnie Millsap'
2330
    url = 'http://www.lonniemillsap.com'
2331
    get_navi_link = get_link_rel_next
2332
    get_first_comic_link = simulate_first_link
2333
    first_url = 'http://www.lonniemillsap.com/?p=42'
2334
2335
    @classmethod
2336
    def get_comic_info(cls, soup, link):
2337
        """Get information about a particular comics."""
2338
        title = soup.find('h2', class_='post-title').string
2339
        post = soup.find('div', class_='post-content')
2340
        author = post.find("span", class_="post-author").find("a").string
2341
        date_str = post.find("span", class_="post-date").string
2342
        day = string_to_date(date_str, "%B %d, %Y")
2343
        imgs = post.find("div", class_="entry").find_all("img")
2344
        return {
2345
            'title': title,
2346
            'author': author,
2347
            'img': [i['src'] for i in imgs],
2348
            'month': day.month,
2349
            'year': day.year,
2350
            'day': day.day,
2351
        }
2352
2353 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2354
class LinsEditions(GenericNavigableComic):
2355
    """Class to retrieve L.I.N.S. Editions comics."""
2356
    # Also on http://linscomics.tumblr.com
2357
    # Now on https://warandpeas.com
2358
    name = 'lins'
2359
    long_name = 'L.I.N.S. Editions'
2360
    url = 'https://linsedition.com'
2361
    _categories = ('LINS', )
2362
    get_navi_link = get_link_rel_next
2363
    get_first_comic_link = simulate_first_link
2364
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2365
2366
    @classmethod
2367
    def get_comic_info(cls, soup, link):
2368
        """Get information about a particular comics."""
2369
        title = soup.find('meta', property='og:title')['content']
2370
        imgs = soup.find_all('meta', property='og:image')
2371
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2372
        day = string_to_date(date_str, "%Y-%m-%d")
2373
        return {
2374
            'title': title,
2375
            'img': [i['content'] for i in imgs],
2376
            'month': day.month,
2377
            'year': day.year,
2378
            'day': day.day,
2379
        }
2380
2381
2382
class ThorsThundershack(GenericNavigableComic):
2383
    """Class to retrieve Thor's Thundershack comics."""
2384
    # Also on http://tapastic.com/series/Thors-Thundershac
2385
    name = 'thor'
2386
    long_name = 'Thor\'s Thundershack'
2387
    url = 'http://www.thorsthundershack.com'
2388
    _categories = ('THOR', )
2389
    get_url_from_link = join_cls_url_to_href
2390
2391
    @classmethod
2392
    def get_first_comic_link(cls):
2393
        """Get link to first comics."""
2394
        return get_soup_at_url(cls.url).find('a', class_='first navlink')
2395
2396
    @classmethod
2397
    def get_navi_link(cls, last_soup, next_):
2398
        """Get link to next or previous comic."""
2399
        for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
2400
            if link['href'] != '/comic':
2401
                return link
2402
        return None
2403
2404
    @classmethod
2405
    def get_comic_info(cls, soup, link):
2406
        """Get information about a particular comics."""
2407
        title = soup.find('meta', attrs={'name': 'description'})["content"]
2408
        description = soup.find('div', itemprop='articleBody').text
2409
        author = soup.find('span', itemprop='author copyrightHolder').string
2410
        imgs = soup.find_all('img', itemprop='image')
2411
        assert all(i['title'] == i['alt'] for i in imgs)
2412
        alt = imgs[0]['alt'] if imgs else ""
2413
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
2414
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2415
        return {
2416
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2417
            'month': day.month,
2418
            'year': day.year,
2419
            'day': day.day,
2420
            'author': author,
2421
            'title': title,
2422
            'alt': alt,
2423
            'description': description,
2424
        }
2425
2426
2427
class GerbilWithAJetpack(GenericNavigableComic):
2428
    """Class to retrieve GerbilWithAJetpack comics."""
2429
    name = 'gerbil'
2430
    long_name = 'Gerbil With A Jetpack'
2431
    url = 'http://gerbilwithajetpack.com'
2432
    get_first_comic_link = get_a_navi_navifirst
2433
    get_navi_link = get_a_rel_next
2434
2435
    @classmethod
2436
    def get_comic_info(cls, soup, link):
2437
        """Get information about a particular comics."""
2438
        title = soup.find('h2', class_='post-title').string
2439
        author = soup.find("span", class_="post-author").find("a").string
2440
        date_str = soup.find("span", class_="post-date").string
2441
        day = string_to_date(date_str, "%B %d, %Y")
2442
        imgs = soup.find("div", id="comic").find_all("img")
2443
        alt = imgs[0]['alt']
2444
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2445
        return {
2446
            'img': [i['src'] for i in imgs],
2447
            'title': title,
2448
            'alt': alt,
2449
            'author': author,
2450
            'day': day.day,
2451
            'month': day.month,
2452
            'year': day.year
2453
        }
2454
2455 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2456
class EveryDayBlues(GenericNavigableComic):
2457
    """Class to retrieve EveryDayBlues Comics."""
2458
    name = "blues"
2459
    long_name = "Every Day Blues"
2460
    url = "http://everydayblues.net"
2461
    get_first_comic_link = get_a_navi_navifirst
2462
    get_navi_link = get_link_rel_next
2463
2464
    @classmethod
2465
    def get_comic_info(cls, soup, link):
2466
        """Get information about a particular comics."""
2467
        title = soup.find("h2", class_="post-title").string
2468
        author = soup.find("span", class_="post-author").find("a").string
2469
        date_str = soup.find("span", class_="post-date").string
2470
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2471
        imgs = soup.find("div", id="comic").find_all("img")
2472
        assert all(i['alt'] == i['title'] == title for i in imgs)
2473
        assert len(imgs) <= 1
2474
        return {
2475
            'img': [i['src'] for i in imgs],
2476
            'title': title,
2477
            'author': author,
2478
            'day': day.day,
2479
            'month': day.month,
2480
            'year': day.year
2481
        }
2482
2483
2484
class BiterComics(GenericNavigableComic):
2485
    """Class to retrieve Biter Comics."""
2486
    name = "biter"
2487
    long_name = "Biter Comics"
2488
    url = "http://www.bitercomics.com"
2489
    get_first_comic_link = get_a_navi_navifirst
2490
    get_navi_link = get_link_rel_next
2491
2492
    @classmethod
2493
    def get_comic_info(cls, soup, link):
2494
        """Get information about a particular comics."""
2495
        title = soup.find("h1", class_="entry-title").string
2496
        author = soup.find("span", class_="author vcard").find("a").string
2497
        date_str = soup.find("span", class_="entry-date").string
2498
        day = string_to_date(date_str, "%B %d, %Y")
2499
        imgs = soup.find("div", id="comic").find_all("img")
2500
        assert all(i['alt'] == i['title'] for i in imgs)
2501
        assert len(imgs) == 1
2502
        alt = imgs[0]['alt']
2503
        return {
2504
            'img': [i['src'] for i in imgs],
2505
            'title': title,
2506
            'alt': alt,
2507
            'author': author,
2508
            'day': day.day,
2509
            'month': day.month,
2510
            'year': day.year
2511
        }
2512
2513 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2514
class TheAwkwardYeti(GenericNavigableComic):
2515
    """Class to retrieve The Awkward Yeti comics."""
2516
    # Also on http://www.gocomics.com/the-awkward-yeti
2517
    # Also on http://larstheyeti.tumblr.com
2518
    # Also on https://tapastic.com/series/TheAwkwardYeti
2519
    name = 'yeti'
2520
    long_name = 'The Awkward Yeti'
2521
    url = 'http://theawkwardyeti.com'
2522
    _categories = ('YETI', )
2523
    get_first_comic_link = get_a_navi_navifirst
2524
    get_navi_link = get_link_rel_next
2525
2526
    @classmethod
2527
    def get_comic_info(cls, soup, link):
2528
        """Get information about a particular comics."""
2529
        title = soup.find('h2', class_='post-title').string
2530
        date_str = soup.find("span", class_="post-date").string
2531
        day = string_to_date(date_str, "%B %d, %Y")
2532
        imgs = soup.find("div", id="comic").find_all("img")
2533
        assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
2534
        return {
2535
            'img': [i['src'] for i in imgs],
2536
            'title': title,
2537
            'day': day.day,
2538
            'month': day.month,
2539
            'year': day.year
2540
        }
2541
2542
2543
class PleasantThoughts(GenericNavigableComic):
2544
    """Class to retrieve Pleasant Thoughts comics."""
2545
    name = 'pleasant'
2546
    long_name = 'Pleasant Thoughts'
2547
    url = 'http://pleasant-thoughts.com'
2548
    get_first_comic_link = get_a_navi_navifirst
2549
    get_navi_link = get_link_rel_next
2550
2551
    @classmethod
2552
    def get_comic_info(cls, soup, link):
2553
        """Get information about a particular comics."""
2554
        post = soup.find('div', class_='post-content')
2555
        title = post.find('h2', class_='post-title').string
2556
        imgs = post.find("div", class_="entry").find_all("img")
2557
        return {
2558
            'title': title,
2559
            'img': [i['src'] for i in imgs],
2560
        }
2561
2562
2563
class MisterAndMe(GenericNavigableComic):
2564
    """Class to retrieve Mister & Me Comics."""
2565
    # Also on http://www.gocomics.com/mister-and-me
2566
    # Also on https://tapastic.com/series/Mister-and-Me
2567
    name = 'mister'
2568
    long_name = 'Mister & Me'
2569
    url = 'http://www.mister-and-me.com'
2570
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2571
    get_navi_link = get_link_rel_next
2572
2573
    @classmethod
2574
    def get_comic_info(cls, soup, link):
2575
        """Get information about a particular comics."""
2576
        title = soup.find('h2', class_='post-title').string
2577
        author = soup.find("span", class_="post-author").find("a").string
2578
        date_str = soup.find("span", class_="post-date").string
2579
        day = string_to_date(date_str, "%B %d, %Y")
2580
        imgs = soup.find("div", id="comic").find_all("img")
2581
        assert all(i['alt'] == i['title'] for i in imgs)
2582
        assert len(imgs) <= 1
2583
        alt = imgs[0]['alt'] if imgs else ""
2584
        return {
2585
            'img': [i['src'] for i in imgs],
2586
            'title': title,
2587
            'alt': alt,
2588
            'author': author,
2589
            'day': day.day,
2590
            'month': day.month,
2591
            'year': day.year
2592
        }
2593
2594
2595
class LastPlaceComics(GenericNavigableComic):
2596
    """Class to retrieve Last Place Comics."""
2597
    name = 'lastplace'
2598
    long_name = 'Last Place Comics'
2599
    url = "http://lastplacecomics.com"
2600
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2601
    get_navi_link = get_link_rel_next
2602
2603
    @classmethod
2604
    def get_comic_info(cls, soup, link):
2605
        """Get information about a particular comics."""
2606
        title = soup.find('h2', class_='post-title').string
2607
        author = soup.find("span", class_="post-author").find("a").string
2608
        date_str = soup.find("span", class_="post-date").string
2609
        day = string_to_date(date_str, "%B %d, %Y")
2610
        imgs = soup.find("div", id="comic").find_all("img")
2611
        assert all(i['alt'] == i['title'] for i in imgs)
2612
        assert len(imgs) <= 1
2613
        alt = imgs[0]['alt'] if imgs else ""
2614
        return {
2615
            'img': [i['src'] for i in imgs],
2616
            'title': title,
2617
            'alt': alt,
2618
            'author': author,
2619
            'day': day.day,
2620
            'month': day.month,
2621
            'year': day.year
2622
        }
2623
2624
2625
class TalesOfAbsurdity(GenericNavigableComic):
2626
    """Class to retrieve Tales Of Absurdity comics."""
2627
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
2628
    # Also on http://talesofabsurdity.tumblr.com
2629
    name = 'absurdity'
2630
    long_name = 'Tales of Absurdity'
2631
    url = 'http://talesofabsurdity.com'
2632
    _categories = ('ABSURDITY', )
2633
    get_first_comic_link = get_a_navi_navifirst
2634
    get_navi_link = get_a_navi_comicnavnext_navinext
2635
2636
    @classmethod
2637
    def get_comic_info(cls, soup, link):
2638
        """Get information about a particular comics."""
2639
        title = soup.find('h2', class_='post-title').string
2640
        author = soup.find("span", class_="post-author").find("a").string
2641
        date_str = soup.find("span", class_="post-date").string
2642
        day = string_to_date(date_str, "%B %d, %Y")
2643
        imgs = soup.find("div", id="comic").find_all("img")
2644
        assert all(i['alt'] == i['title'] for i in imgs)
2645
        alt = imgs[0]['alt'] if imgs else ""
2646
        return {
2647
            'img': [i['src'] for i in imgs],
2648
            'title': title,
2649
            'alt': alt,
2650
            'author': author,
2651
            'day': day.day,
2652
            'month': day.month,
2653
            'year': day.year
2654
        }
2655
2656
2657
class EndlessOrigami(GenericEmptyComic, GenericNavigableComic):
2658
    """Class to retrieve Endless Origami Comics."""
2659
    name = "origami"
2660
    long_name = "Endless Origami"
2661
    url = "http://endlessorigami.com"
2662
    get_first_comic_link = get_a_navi_navifirst
2663
    get_navi_link = get_link_rel_next
2664
2665
    @classmethod
2666
    def get_comic_info(cls, soup, link):
2667
        """Get information about a particular comics."""
2668
        title = soup.find('h2', class_='post-title').string
2669
        author = soup.find("span", class_="post-author").find("a").string
2670
        date_str = soup.find("span", class_="post-date").string
2671
        day = string_to_date(date_str, "%B %d, %Y")
2672
        imgs = soup.find("div", id="comic").find_all("img")
2673
        assert all(i['alt'] == i['title'] for i in imgs)
2674
        alt = imgs[0]['alt'] if imgs else ""
2675
        return {
2676
            'img': [i['src'] for i in imgs],
2677
            'title': title,
2678
            'alt': alt,
2679
            'author': author,
2680
            'day': day.day,
2681
            'month': day.month,
2682
            'year': day.year
2683
        }
2684
2685
2686
class PlanC(GenericNavigableComic):
2687
    """Class to retrieve Plan C comics."""
2688
    name = 'planc'
2689
    long_name = 'Plan C'
2690
    url = 'http://www.plancomic.com'
2691
    get_first_comic_link = get_a_navi_navifirst
2692
    get_navi_link = get_a_navi_comicnavnext_navinext
2693
2694
    @classmethod
2695
    def get_comic_info(cls, soup, link):
2696
        """Get information about a particular comics."""
2697
        title = soup.find('h2', class_='post-title').string
2698
        date_str = soup.find("span", class_="post-date").string
2699
        day = string_to_date(date_str, "%B %d, %Y")
2700
        imgs = soup.find('div', id='comic').find_all('img')
2701
        return {
2702
            'title': title,
2703
            'img': [i['src'] for i in imgs],
2704
            'month': day.month,
2705
            'year': day.year,
2706
            'day': day.day,
2707 View Code Duplication
        }
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2708
2709
2710
class BuniComic(GenericNavigableComic):
2711
    """Class to retrieve Buni Comics."""
2712
    name = 'buni'
2713
    long_name = 'BuniComics'
2714
    url = 'http://www.bunicomic.com'
2715
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2716
    get_navi_link = get_link_rel_next
2717
2718
    @classmethod
2719
    def get_comic_info(cls, soup, link):
2720
        """Get information about a particular comics."""
2721
        imgs = soup.find('div', id='comic').find_all('img')
2722
        assert all(i['alt'] == i['title'] for i in imgs)
2723
        assert len(imgs) == 1
2724
        return {
2725
            'img': [i['src'] for i in imgs],
2726
            'title': imgs[0]['title'],
2727
        }
2728
2729
2730
class GenericCommitStrip(GenericNavigableComic):
2731
    """Generic class to retrieve Commit Strips in different languages."""
2732
    get_navi_link = get_a_rel_next
2733
    get_first_comic_link = simulate_first_link
2734
    first_url = NotImplemented
2735
2736
    @classmethod
2737
    def get_comic_info(cls, soup, link):
2738
        """Get information about a particular comics."""
2739
        desc = soup.find('meta', property='og:description')['content']
2740
        title = soup.find('meta', property='og:title')['content']
2741
        imgs = soup.find('div', class_='entry-content').find_all('img')
2742
        title2 = ' '.join(i.get('title', '') for i in imgs)
2743
        return {
2744
            'title': title,
2745
            'title2': title2,
2746
            'description': desc,
2747
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2748
        }
2749
2750
2751
class CommitStripFr(GenericCommitStrip):
2752
    """Class to retrieve Commit Strips in French."""
2753
    name = 'commit_fr'
2754
    long_name = 'Commit Strip (Fr)'
2755
    url = 'http://www.commitstrip.com/fr'
2756
    _categories = ('FRANCAIS', )
2757
    first_url = 'http://www.commitstrip.com/fr/2012/02/22/interview/'
2758
2759
2760
class CommitStripEn(GenericCommitStrip):
2761
    """Class to retrieve Commit Strips in English."""
2762
    name = 'commit_en'
2763
    long_name = 'Commit Strip (En)'
2764
    url = 'http://www.commitstrip.com/en'
2765
    first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'
2766
2767 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2768
class GenericBoumerie(GenericNavigableComic):
2769
    """Generic class to retrieve Boumeries comics in different languages."""
2770
    get_first_comic_link = get_a_navi_navifirst
2771
    get_navi_link = get_link_rel_next
2772
    date_format = NotImplemented
2773
    lang = NotImplemented
2774
2775
    @classmethod
2776
    def get_comic_info(cls, soup, link):
2777
        """Get information about a particular comics."""
2778
        title = soup.find('h2', class_='post-title').string
2779
        short_url = soup.find('link', rel='shortlink')['href']
2780
        author = soup.find("span", class_="post-author").find("a").string
2781
        date_str = soup.find('span', class_='post-date').string
2782
        day = string_to_date(date_str, cls.date_format, cls.lang)
2783
        imgs = soup.find('div', id='comic').find_all('img')
2784
        assert all(i['alt'] == i['title'] for i in imgs)
2785
        return {
2786
            'short_url': short_url,
2787
            'img': [i['src'] for i in imgs],
2788
            'title': title,
2789
            'author': author,
2790
            'month': day.month,
2791
            'year': day.year,
2792
            'day': day.day,
2793
        }
2794
2795
2796
class BoumerieEn(GenericBoumerie):
2797
    """Class to retrieve Boumeries comics in English."""
2798
    name = 'boumeries_en'
2799
    long_name = 'Boumeries (En)'
2800
    url = 'http://comics.boumerie.com'
2801
    date_format = "%B %d, %Y"
2802
    lang = 'en_GB.UTF-8'
2803
2804
2805
class BoumerieFr(GenericBoumerie):
2806
    """Class to retrieve Boumeries comics in French."""
2807
    name = 'boumeries_fr'
2808
    long_name = 'Boumeries (Fr)'
2809
    url = 'http://bd.boumerie.com'
2810
    _categories = ('FRANCAIS', )
2811
    date_format = "%A, %d %B %Y"
2812
    lang = "fr_FR.utf8"
2813
2814 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2815
class UnearthedComics(GenericNavigableComic):
2816
    """Class to retrieve Unearthed comics."""
2817
    # Also on http://tapastic.com/series/UnearthedComics
2818
    # Also on http://unearthedcomics.tumblr.com
2819
    name = 'unearthed'
2820
    long_name = 'Unearthed Comics'
2821
    url = 'http://unearthedcomics.com'
2822
    _categories = ('UNEARTHED', )
2823
    get_navi_link = get_link_rel_next
2824
    get_first_comic_link = simulate_first_link
2825
    first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'
2826
2827
    @classmethod
2828
    def get_comic_info(cls, soup, link):
2829
        """Get information about a particular comics."""
2830
        short_url = soup.find('link', rel='shortlink')['href']
2831
        title_elt = soup.find('h1') or soup.find('h2')
2832
        title = title_elt.string if title_elt else ""
2833
        desc = soup.find('meta', property='og:description')
2834
        date_str = soup.find('time', class_='published updated hidden')['datetime']
2835
        day = string_to_date(date_str, "%Y-%m-%d")
2836
        post = soup.find('div', class_="entry content entry-content type-portfolio")
2837
        imgs = post.find_all('img')
2838
        return {
2839
            'title': title,
2840
            'description': desc,
2841
            'url2': short_url,
2842
            'img': [i['src'] for i in imgs],
2843
            'month': day.month,
2844
            'year': day.year,
2845
            'day': day.day,
2846
        }
2847
2848
2849
class Optipess(GenericNavigableComic):
2850
    """Class to retrieve Optipess comics."""
2851
    name = 'optipess'
2852
    long_name = 'Optipess'
2853
    url = 'http://www.optipess.com'
2854
    get_first_comic_link = get_a_navi_navifirst
2855
    get_navi_link = get_link_rel_next
2856
2857
    @classmethod
2858
    def get_comic_info(cls, soup, link):
2859
        """Get information about a particular comics."""
2860
        title = soup.find('h2', class_='post-title').string
2861
        author = soup.find("span", class_="post-author").find("a").string
2862
        comic = soup.find('div', id='comic')
2863
        imgs = comic.find_all('img') if comic else []
2864
        alt = imgs[0]['title'] if imgs else ""
2865
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2866
        date_str = soup.find('span', class_='post-date').string
2867
        day = string_to_date(date_str, "%B %d, %Y")
2868
        return {
2869
            'title': title,
2870
            'alt': alt,
2871
            'author': author,
2872
            'img': [i['src'] for i in imgs],
2873
            'month': day.month,
2874
            'year': day.year,
2875
            'day': day.day,
2876
        }
2877
2878
2879
class PainTrainComic(GenericNavigableComic):
2880
    """Class to retrieve Pain Train Comics."""
2881
    name = 'paintrain'
2882
    long_name = 'Pain Train Comics'
2883
    url = 'http://paintraincomic.com'
2884
    get_first_comic_link = get_a_navi_navifirst
2885
    get_navi_link = get_link_rel_next
2886
2887
    @classmethod
2888
    def get_comic_info(cls, soup, link):
2889
        """Get information about a particular comics."""
2890
        title = soup.find('h2', class_='post-title').string
2891
        short_url = soup.find('link', rel='shortlink')['href']
2892
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2893
        num = int(short_url_re.match(short_url).groups()[0])
2894
        imgs = soup.find('div', id='comic').find_all('img')
2895
        alt = imgs[0]['title']
2896
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2897
        date_str = soup.find('span', class_='post-date').string
2898
        day = string_to_date(date_str, "%d/%m/%Y")
2899
        return {
2900
            'short_url': short_url,
2901
            'num': num,
2902
            'img': [i['src'] for i in imgs],
2903
            'month': day.month,
2904
            'year': day.year,
2905
            'day': day.day,
2906
            'alt': alt,
2907
            'title': title,
2908
        }
2909
2910
2911
class MoonBeard(GenericNavigableComic):
2912
    """Class to retrieve MoonBeard comics."""
2913
    # Also on http://blog.squiresjam.es/moonbeard
2914
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
2915
    name = 'moonbeard'
2916
    long_name = 'Moon Beard'
2917
    url = 'http://moonbeard.com'
2918
    get_first_comic_link = get_a_navi_navifirst
2919
    get_navi_link = get_a_navi_navinext
2920
2921
    @classmethod
2922
    def get_comic_info(cls, soup, link):
2923
        """Get information about a particular comics."""
2924
        title = soup.find('h2', class_='post-title').string
2925
        short_url = soup.find('link', rel='shortlink')['href']
2926
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2927
        num = int(short_url_re.match(short_url).groups()[0])
2928
        imgs = soup.find('div', id='comic').find_all('img')
2929
        alt = imgs[0]['title']
2930
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2931
        date_str = soup.find('span', class_='post-date').string
2932
        day = string_to_date(date_str, "%B %d, %Y")
2933
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2934
        author = soup.find('span', class_='post-author').string
2935
        return {
2936
            'short_url': short_url,
2937
            'num': num,
2938
            'img': [i['src'] for i in imgs],
2939
            'month': day.month,
2940
            'year': day.year,
2941
            'day': day.day,
2942
            'title': title,
2943
            'tags': tags,
2944
            'alt': alt,
2945
            'author': author,
2946
        }
2947
2948
2949
class AHammADay(GenericNavigableComic):
2950
    """Class to retrieve class A Hamm A Day comics."""
2951
    name = 'hamm'
2952
    long_name = 'A Hamm A Day'
2953
    url = 'http://www.ahammaday.com'
2954
    get_url_from_link = join_cls_url_to_href
2955
    get_first_comic_link = simulate_first_link
2956
    first_url = 'http://www.ahammaday.com/today/3/6/french'
2957
2958
    @classmethod
2959
    def get_navi_link(cls, last_soup, next_):
2960
        """Get link to next or previous comic."""
2961
        # prev is next / next is prev
2962
        return last_soup.find('li', class_='previous' if next_ else 'next').find('a')
2963
2964
    @classmethod
2965
    def get_comic_info(cls, soup, link):
2966
        """Get information about a particular comics."""
2967
        date_str = soup.find('time', class_='published')['datetime']
2968
        day = string_to_date(date_str, "%Y-%m-%d")
2969
        author = soup.find('span', class_='blog-author').find('a').string
2970
        title = soup.find('meta', property='og:title')['content']
2971
        imgs = soup.find_all('meta', itemprop='image')
2972
        return {
2973
            'img': [i['content'] for i in imgs],
2974
            'title': title,
2975
            'author': author,
2976
            'day': day.day,
2977
            'month': day.month,
2978
            'year': day.year,
2979
        }
2980
2981 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2982
class LittleLifeLines(GenericNavigableComic):
2983
    """Class to retrieve Little Life Lines comics."""
2984
    # Also on https://little-life-lines.tumblr.com
2985
    name = 'life'
2986
    long_name = 'Little Life Lines'
2987
    url = 'http://www.littlelifelines.com'
2988
    get_url_from_link = join_cls_url_to_href
2989
    get_first_comic_link = simulate_first_link
2990
    first_url = 'http://www.littlelifelines.com/comics/well-done'
2991
2992
    @classmethod
2993
    def get_navi_link(cls, last_soup, next_):
2994
        """Get link to next or previous comic."""
2995
        # prev is next / next is prev
2996
        li = last_soup.find('li', class_='prev' if next_ else 'next')
2997
        return li.find('a') if li else None
2998
2999
    @classmethod
3000
    def get_comic_info(cls, soup, link):
3001
        """Get information about a particular comics."""
3002
        title = soup.find('meta', property='og:title')['content']
3003
        desc = soup.find('meta', property='og:description')['content']
3004
        date_str = soup.find('time', class_='published')['datetime']
3005
        day = string_to_date(date_str, "%Y-%m-%d")
3006
        author = soup.find('a', rel='author').string
3007
        div_content = soup.find('div', class_="body entry-content")
3008
        imgs = div_content.find_all('img')
3009
        imgs = [i for i in imgs if i.get('src') is not None]
3010
        alt = imgs[0]['alt']
3011
        return {
3012
            'title': title,
3013
            'alt': alt,
3014
            'description': desc,
3015
            'author': author,
3016
            'day': day.day,
3017
            'month': day.month,
3018
            'year': day.year,
3019
            'img': [i['src'] for i in imgs],
3020
        }
3021
3022
3023
class GenericWordPressInkblot(GenericNavigableComic):
3024
    """Generic class to retrieve comics using WordPress with Inkblot."""
3025
    get_navi_link = get_link_rel_next
3026
3027
    @classmethod
3028
    def get_first_comic_link(cls):
3029
        """Get link to first comics."""
3030
        return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')
3031
3032
    @classmethod
3033
    def get_comic_info(cls, soup, link):
3034
        """Get information about a particular comics."""
3035
        title = soup.find('meta', property='og:title')['content']
3036
        imgs = soup.find('div', class_='webcomic-image').find_all('img')
3037
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3038
        day = string_to_date(date_str, "%Y-%m-%d")
3039
        return {
3040
            'title': title,
3041
            'day': day.day,
3042
            'month': day.month,
3043
            'year': day.year,
3044
            'img': [i['src'] for i in imgs],
3045
        }
3046
3047
3048
class EverythingsStupid(GenericWordPressInkblot):
3049
    """Class to retrieve Everything's stupid Comics."""
3050
    # Also on http://tapastic.com/series/EverythingsStupid
3051
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
3052
    # Also on http://everythingsstupidcomics.tumblr.com
3053
    name = 'stupid'
3054
    long_name = "Everything's Stupid"
3055
    url = 'http://everythingsstupid.net'
3056
3057
3058
class TheIsmComics(GenericWordPressInkblot):
3059
    """Class to retrieve The Ism Comics."""
3060
    # Also on https://tapastic.com/series/TheIsm (?)
3061
    name = 'theism'
3062
    long_name = "The Ism"
3063
    url = 'http://www.theism-comics.com'
3064
3065
3066
class WoodenPlankStudios(GenericWordPressInkblot):
3067
    """Class to retrieve Wooden Plank Studios comics."""
3068
    name = 'woodenplank'
3069
    long_name = 'Wooden Plank Studios'
3070
    url = 'http://woodenplankstudios.com'
3071
3072
3073
class ElectricBunnyComic(GenericNavigableComic):
3074
    """Class to retrieve Electric Bunny Comics."""
3075
    # Also on http://electricbunnycomics.tumblr.com
3076
    name = 'bunny'
3077
    long_name = 'Electric Bunny Comic'
3078
    url = 'http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell'
3079
    get_url_from_link = join_cls_url_to_href
3080
3081
    @classmethod
3082
    def get_first_comic_link(cls):
3083
        """Get link to first comics."""
3084
        return get_soup_at_url(cls.url).find('img', alt='First').parent
3085
3086
    @classmethod
3087
    def get_navi_link(cls, last_soup, next_):
3088
        """Get link to next or previous comic."""
3089
        img = last_soup.find('img', alt='Next' if next_ else 'Back')
3090
        return img.parent if img else None
3091
3092
    @classmethod
3093
    def get_comic_info(cls, soup, link):
3094
        """Get information about a particular comics."""
3095
        title = soup.find('meta', property='og:title')['content']
3096
        imgs = soup.find_all('meta', property='og:image')
3097
        return {
3098
            'title': title,
3099
            'img': [i['content'] for i in imgs],
3100
        }
3101
3102
3103
class SheldonComics(GenericNavigableComic):
3104
    """Class to retrieve Sheldon comics."""
3105
    # Also on http://www.gocomics.com/sheldon
3106
    name = 'sheldon'
3107
    long_name = 'Sheldon Comics'
3108
    url = 'http://www.sheldoncomics.com'
3109
3110
    @classmethod
3111
    def get_first_comic_link(cls):
3112
        """Get link to first comics."""
3113
        return get_soup_at_url(cls.url).find("a", id="nav-first")
3114
3115
    @classmethod
3116
    def get_navi_link(cls, last_soup, next_):
3117
        """Get link to next or previous comic."""
3118
        for link in last_soup.find_all("a", id="nav-next" if next_ else "nav-prev"):
3119
            if link['href'] != 'http://www.sheldoncomics.com':
3120
                return link
3121
        return None
3122
3123
    @classmethod
3124
    def get_comic_info(cls, soup, link):
3125
        """Get information about a particular comics."""
3126
        imgs = soup.find("div", id="comic-foot").find_all("img")
3127
        assert all(i['alt'] == i['title'] for i in imgs)
3128
        assert len(imgs) == 1
3129
        title = imgs[0]['title']
3130
        return {
3131
            'title': title,
3132
            'img': [i['src'] for i in imgs],
3133
        }
3134
3135
3136
class Ubertool(GenericNavigableComic):
3137
    """Class to retrieve Ubertool comics."""
3138
    # Also on http://ubertool.tumblr.com
3139
    # Also on https://tapastic.com/series/ubertool
3140
    name = 'ubertool'
3141
    long_name = 'Ubertool'
3142
    url = 'http://ubertoolcomic.com'
3143
    _categories = ('UBERTOOL', )
3144
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
3145
    get_navi_link = get_a_comicnavbase_comicnavnext
3146
3147
    @classmethod
3148
    def get_comic_info(cls, soup, link):
3149
        """Get information about a particular comics."""
3150
        title = soup.find('h2', class_='post-title').string
3151
        date_str = soup.find('span', class_='post-date').string
3152
        day = string_to_date(date_str, "%B %d, %Y")
3153
        imgs = soup.find('div', id='comic').find_all('img')
3154
        return {
3155
            'img': [i['src'] for i in imgs],
3156
            'title': title,
3157
            'month': day.month,
3158
            'year': day.year,
3159
            'day': day.day,
3160
        }
3161
3162 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3163
class EarthExplodes(GenericNavigableComic):
3164
    """Class to retrieve The Earth Explodes comics."""
3165
    name = 'earthexplodes'
3166
    long_name = 'The Earth Explodes'
3167
    url = 'http://www.earthexplodes.com'
3168
    get_url_from_link = join_cls_url_to_href
3169
    get_first_comic_link = simulate_first_link
3170
    first_url = 'http://www.earthexplodes.com/comics/000/'
3171
3172
    @classmethod
3173
    def get_navi_link(cls, last_soup, next_):
3174
        """Get link to next or previous comic."""
3175
        return last_soup.find('a', id='next' if next_ else 'prev')
3176
3177
    @classmethod
3178
    def get_comic_info(cls, soup, link):
3179
        """Get information about a particular comics."""
3180
        title = soup.find('title').string
3181
        imgs = soup.find('div', id='image').find_all('img')
3182
        alt = imgs[0].get('title', '')
3183
        return {
3184
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3185
            'title': title,
3186
            'alt': alt,
3187
        }
3188
3189 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3190
class PomComics(GenericNavigableComic):
3191
    """Class to retrieve PomComics."""
3192
    name = 'pom'
3193
    long_name = 'Pom Comics / Piece of Me'
3194
    url = 'http://www.pomcomic.com'
3195
    get_url_from_link = join_cls_url_to_href
3196
3197
    @classmethod
3198
    def get_first_comic_link(cls):
3199
        """Get link to first comics."""
3200
        return get_soup_at_url(cls.url).find('a', class_='btn_first')
3201
3202
    @classmethod
3203
    def get_navi_link(cls, last_soup, next_):
3204
        """Get link to next or previous comic."""
3205
        return last_soup.find('a', class_='btn_next' if next_ else 'btn_prev')
3206
3207
    @classmethod
3208
    def get_comic_info(cls, soup, link):
3209
        """Get information about a particular comics."""
3210
        title = soup.find('h1', id="comic-name").string
3211
        desc = soup.find('meta', property='og:description')['content']
3212
        tags = soup.find('meta', attrs={'name': 'keywords'})['content']
3213
        imgs = soup.find('div', class_='comic').find_all('img')
3214
        return {
3215
            'title': title,
3216
            'desc': desc,
3217
            'tags': tags,
3218
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3219
        }
3220
3221
3222
class CubeDrone(GenericNavigableComic):
3223
    """Class to retrieve Cube Drone comics."""
3224
    name = 'cubedrone'
3225
    long_name = 'Cube Drone'
3226
    url = 'http://cube-drone.com/comics'
3227
    get_url_from_link = join_cls_url_to_href
3228
3229
    @classmethod
3230
    def get_first_comic_link(cls):
3231
        """Get link to first comics."""
3232
        return get_soup_at_url(cls.url).find('span', class_='glyphicon glyphicon-backward').parent
3233
3234
    @classmethod
3235
    def get_navi_link(cls, last_soup, next_):
3236
        """Get link to next or previous comic."""
3237
        class_ = 'glyphicon glyphicon-chevron-' + ('right' if next_ else 'left')
3238
        return last_soup.find('span', class_=class_).parent
3239
3240
    @classmethod
3241
    def get_comic_info(cls, soup, link):
3242
        """Get information about a particular comics."""
3243
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
3244
        url2 = soup.find('meta', attrs={'name': 'twitter:url'})['content']
3245
        # date_str = soup.find('h2', class_='comic_title').find('small').string
3246
        # day = string_to_date(date_str, "%B %d, %Y, %I:%M %p")
3247
        imgs = soup.find_all('img', class_='comic img-responsive')
3248
        title2 = imgs[0]['title']
3249
        alt = imgs[0]['alt']
3250
        return {
3251
            'url2': url2,
3252
            'title': title,
3253
            'title2': title2,
3254
            'alt': alt,
3255
            'img': [i['src'] for i in imgs],
3256
        }
3257
3258
3259
class MakeItStoopid(GenericNavigableComic):
3260
    """Class to retrieve Make It Stoopid Comics."""
3261
    name = 'stoopid'
3262
    long_name = 'Make it stoopid'
3263
    url = 'http://makeitstoopid.com/comic.php'
3264
3265
    @classmethod
3266
    def get_nav(cls, soup):
3267
        """Get the navigation elements from soup object."""
3268
        cnav = soup.find_all(class_='cnav')
3269
        nav1, nav2 = cnav[:5], cnav[5:]
3270
        assert nav1 == nav2
3271
        # begin, prev, archive, next_, end = nav1
3272
        return [None if i.get('href') is None else i for i in nav1]
3273
3274
    @classmethod
3275
    def get_first_comic_link(cls):
3276
        """Get link to first comics."""
3277
        return cls.get_nav(get_soup_at_url(cls.url))[0]
3278
3279
    @classmethod
3280
    def get_navi_link(cls, last_soup, next_):
3281
        """Get link to next or previous comic."""
3282
        return cls.get_nav(last_soup)[3 if next_ else 1]
3283
3284
    @classmethod
3285
    def get_comic_info(cls, soup, link):
3286
        """Get information about a particular comics."""
3287
        title = link['title']
3288
        imgs = soup.find_all('img', id='comicimg')
3289
        return {
3290
            'title': title,
3291
            'img': [i['src'] for i in imgs],
3292
        }
3293
3294 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3295
class MarketoonistComics(GenericNavigableComic):
3296
    """Class to retrieve Marketoonist Comics."""
3297
    name = 'marketoonist'
3298
    long_name = 'Marketoonist'
3299
    url = 'https://marketoonist.com/cartoons'
3300
    get_first_comic_link = simulate_first_link
3301
    get_navi_link = get_link_rel_next
3302
    first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'
3303
3304
    @classmethod
3305
    def get_comic_info(cls, soup, link):
3306
        """Get information about a particular comics."""
3307
        imgs = soup.find_all('meta', property='og:image')
3308
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3309
        day = string_to_date(date_str, "%Y-%m-%d")
3310
        title = soup.find('meta', property='og:title')['content']
3311
        return {
3312
            'img': [i['content'] for i in imgs],
3313
            'day': day.day,
3314
            'month': day.month,
3315
            'year': day.year,
3316
            'title': title,
3317
        }
3318
3319 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3320
class ConsoliaComics(GenericNavigableComic):
3321
    """Class to retrieve Consolia comics."""
3322
    name = 'consolia'
3323
    long_name = 'consolia'
3324
    url = 'https://consolia-comic.com'
3325
    get_url_from_link = join_cls_url_to_href
3326
3327
    @classmethod
3328
    def get_first_comic_link(cls):
3329
        """Get link to first comics."""
3330
        return get_soup_at_url(cls.url).find('a', class_='first')
3331
3332
    @classmethod
3333
    def get_navi_link(cls, last_soup, next_):
3334
        """Get link to next or previous comic."""
3335
        return last_soup.find('a', class_='next' if next_ else 'prev')
3336
3337
    @classmethod
3338
    def get_comic_info(cls, soup, link):
3339
        """Get information about a particular comics."""
3340
        title = soup.find('meta', property='og:title')['content']
3341
        date_str = soup.find('time')["datetime"]
3342
        day = string_to_date(date_str, "%Y-%m-%d")
3343
        imgs = soup.find_all('meta', property='og:image')
3344
        return {
3345
            'title': title,
3346
            'img': [i['content'] for i in imgs],
3347
            'day': day.day,
3348
            'month': day.month,
3349
            'year': day.year,
3350
        }
3351
3352 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3353
class TuMourrasMoinsBete(GenericNavigableComic):
3354
    """Class to retrieve Tu Mourras Moins Bete comics."""
3355
    name = 'mourrasmoinsbete'
3356
    long_name = 'Tu Mourras Moins Bete'
3357
    url = 'http://tumourrasmoinsbete.blogspot.fr'
3358
    _categories = ('FRANCAIS', )
3359
    get_first_comic_link = simulate_first_link
3360
    first_url = 'http://tumourrasmoinsbete.blogspot.fr/2008/06/essai.html'
3361
3362
    @classmethod
3363
    def get_navi_link(cls, last_soup, next_):
3364
        """Get link to next or previous comic."""
3365
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3366
3367
    @classmethod
3368
    def get_comic_info(cls, soup, link):
3369
        """Get information about a particular comics."""
3370
        title = soup.find('title').string
3371
        imgs = soup.find('div', itemprop='description articleBody').find_all('img')
3372
        author = soup.find('span', itemprop='author').string
3373
        return {
3374
            'img': [i['src'] for i in imgs],
3375
            'author': author,
3376
            'title': title,
3377
        }
3378
3379
3380
class GeekAndPoke(GenericNavigableComic):
3381
    """Class to retrieve Geek And Poke comics."""
3382
    name = 'geek'
3383
    long_name = 'Geek And Poke'
3384
    url = 'http://geek-and-poke.com'
3385
    get_url_from_link = join_cls_url_to_href
3386
    get_first_comic_link = simulate_first_link
3387
    first_url = 'http://geek-and-poke.com/geekandpoke/2006/8/27/a-new-place-for-a-not-so-old-blog.html'
3388
3389
    @classmethod
3390
    def get_navi_link(cls, last_soup, next_):
3391
        """Get link to next or previous comic."""
3392
        return last_soup.find('a', class_='prev-item' if next_ else 'next-item')
3393
3394
    @classmethod
3395
    def get_comic_info(cls, soup, link):
3396
        """Get information about a particular comics."""
3397
        title = soup.find('meta', property='og:title')['content']
3398
        desc = soup.find('meta', property='og:description')['content']
3399
        date_str = soup.find('time', class_='published')['datetime']
3400
        day = string_to_date(date_str, "%Y-%m-%d")
3401
        author = soup.find('a', rel='author').string
3402
        div_content = (soup.find('div', class_="body entry-content") or
3403
                       soup.find('div', class_="special-content"))
3404
        imgs = div_content.find_all('img')
3405
        imgs = [i for i in imgs if i.get('src') is not None]
3406
        assert all('title' not in i or i['alt'] == i['title'] for i in imgs)
3407
        alt = imgs[0].get('alt', "") if imgs else []
3408
        return {
3409
            'title': title,
3410
            'alt': alt,
3411
            'description': desc,
3412
            'author': author,
3413
            'day': day.day,
3414
            'month': day.month,
3415
            'year': day.year,
3416
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3417
        }
3418
3419 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3420
class GloryOwlComix(GenericNavigableComic):
3421
    """Class to retrieve Glory Owl comics."""
3422
    name = 'gloryowl'
3423
    long_name = 'Glory Owl'
3424
    url = 'http://gloryowlcomix.blogspot.fr'
3425
    _categories = ('NSFW', 'FRANCAIS')
3426
    get_first_comic_link = simulate_first_link
3427
    first_url = 'http://gloryowlcomix.blogspot.fr/2013/02/1_7.html'
3428
3429
    @classmethod
3430
    def get_navi_link(cls, last_soup, next_):
3431
        """Get link to next or previous comic."""
3432
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3433
3434
    @classmethod
3435
    def get_comic_info(cls, soup, link):
3436
        """Get information about a particular comics."""
3437
        title = soup.find('title').string
3438
        imgs = soup.find_all('link', rel='image_src')
3439
        author = soup.find('a', rel='author').string
3440
        return {
3441
            'img': [i['href'] for i in imgs],
3442
            'author': author,
3443
            'title': title,
3444
        }
3445
3446
3447
class GenericTumblrV1(GenericComic):
3448
    """Generic class to retrieve comics from Tumblr using the V1 API."""
3449
    _categories = ('TUMBLR', )
3450
3451
    @classmethod
3452
    def get_next_comic(cls, last_comic):
3453
        """Generic implementation of get_next_comic for Tumblr comics."""
3454
        for p in cls.get_posts(last_comic):
3455
            comic = cls.get_comic_info(p)
3456
            if comic is not None:
3457
                yield comic
3458
3459
    @classmethod
3460
    def get_url_from_post(cls, post):
3461
        return post['url']
3462
3463
    @classmethod
3464
    def get_api_url(cls):
3465
        return urljoin_wrapper(cls.url, '/api/read/')
3466
3467
    @classmethod
3468
    def get_comic_info(cls, post):
3469
        """Get information about a particular comics."""
3470
        type_ = post['type']
3471
        if type_ != 'photo':
3472
            return None
3473
        tumblr_id = int(post['id'])
3474
        api_url = cls.get_api_url() + '?id=%d' % (tumblr_id)
3475
        day = datetime.datetime.fromtimestamp(int(post['unix-timestamp'])).date()
3476
        caption = post.find('photo-caption')
3477
        title = caption.string if caption else ""
3478
        tags = ' '.join(t.string for t in post.find_all('tag'))
3479
        # Photos may appear in 'photo' tags and/or straight in the post
3480
        photo_tags = post.find_all('photo')
3481
        if not photo_tags:
3482
            photo_tags = [post]
3483
        # Images are in multiple resolutions - taking the first one
3484
        imgs = [photo.find('photo-url') for photo in photo_tags]
3485
        return {
3486
            'url': cls.get_url_from_post(post),
3487
            'url2': post['url-with-slug'],
3488
            'day': day.day,
3489
            'month': day.month,
3490
            'year': day.year,
3491
            'title': title,
3492
            'tags': tags,
3493
            'img': [i.string for i in imgs],
3494
            'tumblr-id': tumblr_id,
3495
            'api_url': api_url,
3496
        }
3497
3498
    @classmethod
3499
    def get_posts(cls, last_comic, nb_post_per_call=10):
3500
        """Get posts using API. nb_post_per_call is max 50.
3501
3502
        Posts are retrieved from newer to older as per the tumblr v1 api
3503
        but are returned in chronological order."""
3504
        waiting_for_url = last_comic['url'] if last_comic else None
3505
        posts_acc = []
3506
        if last_comic is not None:
3507
            # Sometimes, tumblr posts are deleted. When previous post is deleted, we
3508
            # might end up spending a lot of time looking for something that
3509
            # doesn't exist. Failing early and clearly might be a better option.
3510
            last_api_url = last_comic['api_url']
3511
            try:
3512
                get_soup_at_url(last_api_url)
3513
            except urllib.error.HTTPError:
3514
                try:
3515
                    get_soup_at_url(cls.url)
3516
                except urllib.error.HTTPError:
3517
                    print("Did not find previous post nor main url %s" % cls.url)
3518
                else:
3519
                    print("Did not find previous post %s : it might have been deleted" % last_api_url)
3520
                return reversed(posts_acc)
3521
        api_url = cls.get_api_url()
3522
        posts = get_soup_at_url(api_url).find('posts')
3523
        start, total = int(posts['start']), int(posts['total'])
3524
        assert start == 0
3525
        for starting_num in range(0, total, nb_post_per_call):
3526
            api_url2 = api_url + '?start=%d&num=%d' % (starting_num, nb_post_per_call)
3527
            posts2 = get_soup_at_url(api_url2).find('posts')
3528
            start2, total2 = int(posts2['start']), int(posts2['total'])
3529
            assert starting_num == start2, "%d != %d" % (starting_num, start2)
3530
            # This may happen and should be handled in the future
3531
            assert total == total2, "%d != %d" % (total, total2)
3532
            for p in posts2.find_all('post'):
3533
                if waiting_for_url and waiting_for_url == cls.get_url_from_post(p):
3534
                    return reversed(posts_acc)
3535
                posts_acc.append(p)
3536
        if waiting_for_url is None:
3537
            return reversed(posts_acc)
3538
        print("Did not find %s : there might be a problem" % waiting_for_url)
3539
        return []
3540
3541
3542
class SaturdayMorningBreakfastCerealTumblr(GenericTumblrV1):
3543
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
3544
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
3545
    # Also on http://www.smbc-comics.com
3546
    name = 'smbc-tumblr'
3547
    long_name = 'Saturday Morning Breakfast Cereal (from Tumblr)'
3548
    url = 'http://smbc-comics.tumblr.com'
3549
    _categories = ('SMBC', )
3550
3551
3552
class IrwinCardozo(GenericTumblrV1):
3553
    """Class to retrieve Irwin Cardozo Comics."""
3554
    name = 'irwinc'
3555
    long_name = 'Irwin Cardozo'
3556
    url = 'http://irwincardozocomics.tumblr.com'
3557
3558
3559
class AccordingToDevin(GenericTumblrV1):
3560
    """Class to retrieve According To Devin comics."""
3561
    name = 'devin'
3562
    long_name = 'According To Devin'
3563
    url = 'http://accordingtodevin.tumblr.com'
3564
3565
3566
class ItsTheTieTumblr(GenericTumblrV1):
3567
    """Class to retrieve It's the tie comics."""
3568
    # Also on http://itsthetie.com
3569
    # Also on https://tapastic.com/series/itsthetie
3570
    name = 'tie-tumblr'
3571
    long_name = "It's the tie (from Tumblr)"
3572
    url = "http://itsthetie.tumblr.com"
3573
    _categories = ('TIE', )
3574
3575
3576
class OctopunsTumblr(GenericTumblrV1):
3577
    """Class to retrieve Octopuns comics."""
3578
    # Also on http://www.octopuns.net
3579
    name = 'octopuns-tumblr'
3580
    long_name = 'Octopuns (from Tumblr)'
3581
    url = 'http://octopuns.tumblr.com'
3582
3583
3584
class PicturesInBoxesTumblr(GenericTumblrV1):
3585
    """Class to retrieve Pictures In Boxes comics."""
3586
    # Also on http://www.picturesinboxes.com
3587
    name = 'picturesinboxes-tumblr'
3588
    long_name = 'Pictures in Boxes (from Tumblr)'
3589
    url = 'http://picturesinboxescomic.tumblr.com'
3590
3591
3592
class TubeyToonsTumblr(GenericTumblrV1):
3593
    """Class to retrieve TubeyToons comics."""
3594
    # Also on http://tapastic.com/series/Tubey-Toons
3595
    # Also on http://tubeytoons.com
3596
    name = 'tubeytoons-tumblr'
3597
    long_name = 'Tubey Toons (from Tumblr)'
3598
    url = 'http://tubeytoons.tumblr.com'
3599
    _categories = ('TUNEYTOONS', )
3600
3601
3602
class UnearthedComicsTumblr(GenericTumblrV1):
3603
    """Class to retrieve Unearthed comics."""
3604
    # Also on http://tapastic.com/series/UnearthedComics
3605
    # Also on http://unearthedcomics.com
3606
    name = 'unearthed-tumblr'
3607
    long_name = 'Unearthed Comics (from Tumblr)'
3608
    url = 'http://unearthedcomics.tumblr.com'
3609
    _categories = ('UNEARTHED', )
3610
3611
3612
class PieComic(GenericTumblrV1):
3613
    """Class to retrieve Pie Comic comics."""
3614
    name = 'pie'
3615
    long_name = 'Pie Comic'
3616
    url = "http://piecomic.tumblr.com"
3617
3618
3619
class MrEthanDiamond(GenericTumblrV1):
3620
    """Class to retrieve Mr Ethan Diamond comics."""
3621
    name = 'diamond'
3622
    long_name = 'Mr Ethan Diamond'
3623
    url = 'http://mrethandiamond.tumblr.com'
3624
3625
3626
class Flocci(GenericTumblrV1):
3627
    """Class to retrieve floccinaucinihilipilification comics."""
3628
    name = 'flocci'
3629
    long_name = 'floccinaucinihilipilification'
3630
    url = "http://floccinaucinihilipilificationa.tumblr.com"
3631
3632
3633
class UpAndOut(GenericTumblrV1):
3634
    """Class to retrieve Up & Out comics."""
3635
    # Also on http://tapastic.com/series/UP-and-OUT
3636
    name = 'upandout'
3637
    long_name = 'Up And Out (from Tumblr)'
3638
    url = 'http://upandoutcomic.tumblr.com'
3639
3640
3641
class Pundemonium(GenericTumblrV1):
3642
    """Class to retrieve Pundemonium comics."""
3643
    name = 'pundemonium'
3644
    long_name = 'Pundemonium'
3645
    url = 'http://monstika.tumblr.com'
3646
3647
3648
class PoorlyDrawnLinesTumblr(GenericTumblrV1):
3649
    """Class to retrieve Poorly Drawn Lines comics."""
3650
    # Also on http://poorlydrawnlines.com
3651
    name = 'poorlydrawn-tumblr'
3652
    long_name = 'Poorly Drawn Lines (from Tumblr)'
3653
    url = 'http://pdlcomics.tumblr.com'
3654
    _categories = ('POORLYDRAWN', )
3655
3656
3657
class PearShapedComics(GenericTumblrV1):
3658
    """Class to retrieve Pear Shaped Comics."""
3659
    name = 'pearshaped'
3660
    long_name = 'Pear-Shaped Comics'
3661
    url = 'http://pearshapedcomics.com'
3662
3663
3664
class PondScumComics(GenericTumblrV1):
3665
    """Class to retrieve Pond Scum Comics."""
3666
    name = 'pond'
3667
    long_name = 'Pond Scum'
3668
    url = 'http://pondscumcomic.tumblr.com'
3669
3670
3671
class MercworksTumblr(GenericTumblrV1):
3672
    """Class to retrieve Mercworks comics."""
3673
    # Also on http://mercworks.net
3674
    name = 'mercworks-tumblr'
3675
    long_name = 'Mercworks (from Tumblr)'
3676
    url = 'http://mercworks.tumblr.com'
3677
3678
3679
class OwlTurdTumblr(GenericTumblrV1):
3680
    """Class to retrieve Owl Turd comics."""
3681
    # Also on http://tapastic.com/series/Owl-Turd-Comix
3682
    name = 'owlturd-tumblr'
3683
    long_name = 'Owl Turd (from Tumblr)'
3684
    url = 'http://owlturdcomix.tumblr.com'
3685
    _categories = ('OWLTURD', )
3686
3687
3688
class VectorBelly(GenericTumblrV1):
3689
    """Class to retrieve Vector Belly comics."""
3690
    # Also on http://vectorbelly.com
3691
    name = 'vector'
3692
    long_name = 'Vector Belly'
3693
    url = 'http://vectorbelly.tumblr.com'
3694
3695
3696
class GoneIntoRapture(GenericTumblrV1):
3697
    """Class to retrieve Gone Into Rapture comics."""
3698
    # Also on http://goneintorapture.tumblr.com
3699
    # Also on http://tapastic.com/series/Goneintorapture
3700
    name = 'rapture'
3701
    long_name = 'Gone Into Rapture'
3702
    url = 'http://www.goneintorapture.com'
3703
3704
3705
class TheOatmealTumblr(GenericTumblrV1):
3706
    """Class to retrieve The Oatmeal comics."""
3707
    # Also on http://theoatmeal.com
3708
    name = 'oatmeal-tumblr'
3709
    long_name = 'The Oatmeal (from Tumblr)'
3710
    url = 'http://oatmeal.tumblr.com'
3711
3712
3713
class HeckIfIKnowComicsTumblr(GenericTumblrV1):
3714
    """Class to retrieve Heck If I Know Comics."""
3715
    # Also on http://tapastic.com/series/Regular
3716
    name = 'heck-tumblr'
3717
    long_name = 'Heck if I Know comics (from Tumblr)'
3718
    url = 'http://heckifiknowcomics.com'
3719
3720
3721
class MyJetPack(GenericTumblrV1):
3722
    """Class to retrieve My Jet Pack comics."""
3723
    name = 'jetpack'
3724
    long_name = 'My Jet Pack'
3725
    url = 'http://myjetpack.tumblr.com'
3726
3727
3728
class CheerUpEmoKidTumblr(GenericTumblrV1):
3729
    """Class to retrieve CheerUpEmoKid comics."""
3730
    # Also on http://www.cheerupemokid.com
3731
    # Also on http://tapastic.com/series/CUEK
3732
    name = 'cuek-tumblr'
3733
    long_name = 'Cheer Up Emo Kid (from Tumblr)'
3734
    url = 'http://enzocomics.tumblr.com'
3735
3736
3737
class ForLackOfABetterComic(GenericTumblrV1):
3738
    """Class to retrieve For Lack Of A Better Comics."""
3739
    # Also on http://forlackofabettercomic.com
3740
    name = 'lack'
3741
    long_name = 'For Lack Of A Better Comic'
3742
    url = 'http://forlackofabettercomic.tumblr.com'
3743
3744
3745
class ZenPencilsTumblr(GenericTumblrV1):
3746
    """Class to retrieve ZenPencils comics."""
3747
    # Also on http://zenpencils.com
3748
    # Also on http://www.gocomics.com/zen-pencils
3749
    name = 'zenpencils-tumblr'
3750
    long_name = 'Zen Pencils (from Tumblr)'
3751
    url = 'http://zenpencils.tumblr.com'
3752
    _categories = ('ZENPENCILS', )
3753
3754
3755
class ThreeWordPhraseTumblr(GenericTumblrV1):
3756
    """Class to retrieve Three Word Phrase comics."""
3757
    # Also on http://threewordphrase.com
3758
    name = 'threeword-tumblr'
3759
    long_name = 'Three Word Phrase (from Tumblr)'
3760
    url = 'http://www.threewordphrase.tumblr.com'
3761
3762
3763
class TimeTrabbleTumblr(GenericTumblrV1):
3764
    """Class to retrieve Time Trabble comics."""
3765
    # Also on http://timetrabble.com
3766
    name = 'timetrabble-tumblr'
3767
    long_name = 'Time Trabble (from Tumblr)'
3768
    url = 'http://timetrabble.tumblr.com'
3769
3770
3771
class SafelyEndangeredTumblr(GenericTumblrV1):
3772
    """Class to retrieve Safely Endangered comics."""
3773
    # Also on http://www.safelyendangered.com
3774
    name = 'endangered-tumblr'
3775
    long_name = 'Safely Endangered (from Tumblr)'
3776
    url = 'http://tumblr.safelyendangered.com'
3777
3778
3779
class MouseBearComedyTumblr(GenericTumblrV1):
3780
    """Class to retrieve Mouse Bear Comedy comics."""
3781
    # Also on http://www.mousebearcomedy.com
3782
    name = 'mousebear-tumblr'
3783
    long_name = 'Mouse Bear Comedy (from Tumblr)'
3784
    url = 'http://mousebearcomedy.tumblr.com'
3785
3786
3787
class BouletCorpTumblr(GenericTumblrV1):
3788
    """Class to retrieve BouletCorp comics."""
3789
    # Also on http://www.bouletcorp.com
3790
    name = 'boulet-tumblr'
3791
    long_name = 'Boulet Corp (from Tumblr)'
3792
    url = 'http://bouletcorp.tumblr.com'
3793
    _categories = ('BOULET', )
3794
3795
3796
class TheAwkwardYetiTumblr(GenericTumblrV1):
3797
    """Class to retrieve The Awkward Yeti comics."""
3798
    # Also on http://www.gocomics.com/the-awkward-yeti
3799
    # Also on http://theawkwardyeti.com
3800
    # Also on https://tapastic.com/series/TheAwkwardYeti
3801
    name = 'yeti-tumblr'
3802
    long_name = 'The Awkward Yeti (from Tumblr)'
3803
    url = 'http://larstheyeti.tumblr.com'
3804
    _categories = ('YETI', )
3805
3806
3807
class NellucNhoj(GenericTumblrV1):
3808
    """Class to retrieve NellucNhoj comics."""
3809
    name = 'nhoj'
3810
    long_name = 'Nelluc Nhoj'
3811
    url = 'http://nellucnhoj.com'
3812
3813
3814
class DownTheUpwardSpiralTumblr(GenericTumblrV1):
3815
    """Class to retrieve Down The Upward Spiral comics."""
3816
    # Also on http://www.downtheupwardspiral.com
3817
    name = 'spiral-tumblr'
3818
    long_name = 'Down the Upward Spiral (from Tumblr)'
3819
    url = 'http://downtheupwardspiral.tumblr.com'
3820
3821
3822
class AsPerUsualTumblr(GenericTumblrV1):
3823
    """Class to retrieve As Per Usual comics."""
3824
    # Also on https://tapastic.com/series/AsPerUsual
3825
    name = 'usual-tumblr'
3826
    long_name = 'As Per Usual (from Tumblr)'
3827
    url = 'http://as-per-usual.tumblr.com'
3828
    categories = ('DAMILEE', )
3829
3830
3831
class HotComicsForCoolPeopleTumblr(GenericTumblrV1):
3832
    """Class to retrieve Hot Comics For Cool People."""
3833
    # Also on https://tapastic.com/series/Hot-Comics-For-Cool-People
3834
    # Also on http://hotcomics.biz (links to tumblr)
3835
    # Also on http://hcfcp.com (links to tumblr)
3836
    name = 'hotcomics-tumblr'
3837
    long_name = 'Hot Comics For Cool People (from Tumblr)'
3838
    url = 'http://hotcomicsforcoolpeople.tumblr.com'
3839
    categories = ('DAMILEE', )
3840
3841
3842
class OneOneOneOneComicTumblr(GenericTumblrV1):
3843
    """Class to retrieve 1111 Comics."""
3844
    # Also on http://www.1111comics.me
3845
    # Also on https://tapastic.com/series/1111-Comics
3846
    name = '1111-tumblr'
3847
    long_name = '1111 Comics (from Tumblr)'
3848
    url = 'http://comics1111.tumblr.com'
3849
    _categories = ('ONEONEONEONE', )
3850
3851
3852
class JhallComicsTumblr(GenericTumblrV1):
3853
    """Class to retrieve Jhall Comics."""
3854
    # Also on http://jhallcomics.com
3855
    name = 'jhall-tumblr'
3856
    long_name = 'Jhall Comics (from Tumblr)'
3857
    url = 'http://jhallcomics.tumblr.com'
3858
3859
3860
class BerkeleyMewsTumblr(GenericTumblrV1):
3861
    """Class to retrieve Berkeley Mews comics."""
3862
    # Also on http://www.gocomics.com/berkeley-mews
3863
    # Also on http://www.berkeleymews.com
3864
    name = 'berkeley-tumblr'
3865
    long_name = 'Berkeley Mews (from Tumblr)'
3866
    url = 'http://mews.tumblr.com'
3867
    _categories = ('BERKELEY', )
3868
3869
3870
class JoanCornellaTumblr(GenericTumblrV1):
3871
    """Class to retrieve Joan Cornella comics."""
3872
    # Also on http://joancornella.net
3873
    name = 'cornella-tumblr'
3874
    long_name = 'Joan Cornella (from Tumblr)'
3875
    url = 'http://cornellajoan.tumblr.com'
3876
3877
3878
class RespawnComicTumblr(GenericTumblrV1):
3879
    """Class to retrieve Respawn Comic."""
3880
    # Also on http://respawncomic.com
3881
    name = 'respawn-tumblr'
3882
    long_name = 'Respawn Comic (from Tumblr)'
3883
    url = 'http://respawncomic.tumblr.com'
3884
3885
3886
class ChrisHallbeckTumblr(GenericTumblrV1):
3887
    """Class to retrieve Chris Hallbeck comics."""
3888
    # Also on https://tapastic.com/ChrisHallbeck
3889
    # Also on http://maximumble.com
3890
    # Also on http://minimumble.com
3891
    # Also on http://thebookofbiff.com
3892
    name = 'hallbeck-tumblr'
3893
    long_name = 'Chris Hallback (from Tumblr)'
3894
    url = 'http://chrishallbeck.tumblr.com'
3895
    _categories = ('HALLBACK', )
3896
3897
3898
class ComicNuggets(GenericTumblrV1):
3899
    """Class to retrieve Comic Nuggets."""
3900
    name = 'nuggets'
3901
    long_name = 'Comic Nuggets'
3902
    url = 'http://comicnuggets.com'
3903
3904
3905
class PigeonGazetteTumblr(GenericTumblrV1):
3906
    """Class to retrieve The Pigeon Gazette comics."""
3907
    # Also on https://tapastic.com/series/The-Pigeon-Gazette
3908
    name = 'pigeon-tumblr'
3909
    long_name = 'The Pigeon Gazette (from Tumblr)'
3910
    url = 'http://thepigeongazette.tumblr.com'
3911
3912
3913
class CancerOwl(GenericTumblrV1):
3914
    """Class to retrieve Cancer Owl comics."""
3915
    # Also on http://cancerowl.com
3916
    name = 'cancerowl-tumblr'
3917
    long_name = 'Cancer Owl (from Tumblr)'
3918
    url = 'http://cancerowl.tumblr.com'
3919
3920
3921
class FowlLanguageTumblr(GenericTumblrV1):
3922
    """Class to retrieve Fowl Language comics."""
3923
    # Also on http://www.fowllanguagecomics.com
3924
    # Also on http://tapastic.com/series/Fowl-Language-Comics
3925
    # Also on http://www.gocomics.com/fowl-language
3926
    name = 'fowllanguage-tumblr'
3927
    long_name = 'Fowl Language Comics (from Tumblr)'
3928
    url = 'http://fowllanguagecomics.tumblr.com'
3929
    _categories = ('FOWLLANGUAGE', )
3930
3931
3932
class TheOdd1sOutTumblr(GenericTumblrV1):
3933
    """Class to retrieve The Odd 1s Out comics."""
3934
    # Also on http://theodd1sout.com
3935
    # Also on https://tapastic.com/series/Theodd1sout
3936
    name = 'theodd-tumblr'
3937
    long_name = 'The Odd 1s Out (from Tumblr)'
3938
    url = 'http://theodd1sout.tumblr.com'
3939
3940
3941
class TheUnderfoldTumblr(GenericTumblrV1):
3942
    """Class to retrieve The Underfold comics."""
3943
    # Also on http://theunderfold.com
3944
    name = 'underfold-tumblr'
3945
    long_name = 'The Underfold (from Tumblr)'
3946
    url = 'http://theunderfold.tumblr.com'
3947
3948
3949
class LolNeinTumblr(GenericTumblrV1):
3950
    """Class to retrieve Lol Nein comics."""
3951
    # Also on http://lolnein.com
3952
    name = 'lolnein-tumblr'
3953
    long_name = 'Lol Nein (from Tumblr)'
3954
    url = 'http://lolneincom.tumblr.com'
3955
3956
3957
class FatAwesomeComicsTumblr(GenericTumblrV1):
3958
    """Class to retrieve Fat Awesome Comics."""
3959
    # Also on http://fatawesome.com/comics
3960
    name = 'fatawesome-tumblr'
3961
    long_name = 'Fat Awesome (from Tumblr)'
3962
    url = 'http://fatawesomecomedy.tumblr.com'
3963
3964
3965
class TheWorldIsFlatTumblr(GenericTumblrV1):
3966
    """Class to retrieve The World Is Flat Comics."""
3967
    # Also on https://tapastic.com/series/The-World-is-Flat
3968
    name = 'flatworld-tumblr'
3969
    long_name = 'The World Is Flat (from Tumblr)'
3970
    url = 'http://theworldisflatcomics.tumblr.com'
3971
3972
3973
class DorrisMc(GenericTumblrV1):
3974
    """Class to retrieve Dorris Mc Comics"""
3975
    # Also on http://www.gocomics.com/dorris-mccomics
3976
    name = 'dorrismc'
3977
    long_name = 'Dorris Mc'
3978
    url = 'http://dorrismccomics.com'
3979
3980
3981
class LeleozTumblr(GenericEmptyComic, GenericTumblrV1):
3982
    """Class to retrieve Leleoz comics."""
3983
    # Also on https://tapastic.com/series/Leleoz
3984
    name = 'leleoz-tumblr'
3985
    long_name = 'Leleoz (from Tumblr)'
3986
    url = 'http://leleozcomics.tumblr.com'
3987
3988
3989
class MoonBeardTumblr(GenericTumblrV1):
3990
    """Class to retrieve MoonBeard comics."""
3991
    # Also on http://moonbeard.com
3992
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
3993
    name = 'moonbeard-tumblr'
3994
    long_name = 'Moon Beard (from Tumblr)'
3995
    url = 'http://blog.squiresjam.es/moonbeard'
3996
3997
3998
class AComik(GenericTumblrV1):
3999
    """Class to retrieve A Comik"""
4000
    name = 'comik'
4001
    long_name = 'A Comik'
4002
    url = 'http://acomik.com'
4003
4004
4005
class ClassicRandy(GenericTumblrV1):
4006
    """Class to retrieve Classic Randy comics."""
4007
    name = 'randy'
4008
    long_name = 'Classic Randy'
4009
    url = 'http://classicrandy.tumblr.com'
4010
4011
4012
class DagssonTumblr(GenericTumblrV1):
4013
    """Class to retrieve Dagsson comics."""
4014
    # Also on http://www.dagsson.com
4015
    name = 'dagsson-tumblr'
4016
    long_name = 'Dagsson Hugleikur (from Tumblr)'
4017
    url = 'http://hugleikurdagsson.tumblr.com'
4018
4019
4020
class LinsEditionsTumblr(GenericTumblrV1):
4021
    """Class to retrieve L.I.N.S. Editions comics."""
4022
    # Also on https://linsedition.com
4023
    # Now on http://warandpeas.tumblr.com
4024
    name = 'lins-tumblr'
4025
    long_name = 'L.I.N.S. Editions (from Tumblr)'
4026
    url = 'http://linscomics.tumblr.com'
4027
    _categories = ('LINS', )
4028
4029
4030
class WarAndPeasTumblr(GenericTumblrV1):
4031
    """Class to retrieve War And Peas comics."""
4032
    # Was on http://linscomics.tumblr.com
4033
    name = 'warandpeas-tumblr'
4034
    long_name = 'War And Peas (from Tumblr)'
4035
    url = 'http://warandpeas.tumblr.com'
4036
    _categories = ('WARANDPEAS', )
4037
4038
4039
class OrigamiHotDish(GenericTumblrV1):
4040
    """Class to retrieve Origami Hot Dish comics."""
4041
    name = 'origamihotdish'
4042
    long_name = 'Origami Hot Dish'
4043
    url = 'http://origamihotdish.com'
4044
4045
4046
class HitAndMissComicsTumblr(GenericTumblrV1):
4047
    """Class to retrieve Hit and Miss Comics."""
4048
    name = 'hitandmiss'
4049
    long_name = 'Hit and Miss Comics'
4050
    url = 'http://hitandmisscomics.tumblr.com'
4051
4052
4053
class HMBlanc(GenericTumblrV1):
4054
    """Class to retrieve HM Blanc comics."""
4055
    name = 'hmblanc'
4056
    long_name = 'HM Blanc'
4057
    url = 'http://hmblanc.tumblr.com'
4058
4059
4060
class TalesOfAbsurdityTumblr(GenericTumblrV1):
4061
    """Class to retrieve Tales Of Absurdity comics."""
4062
    # Also on http://talesofabsurdity.com
4063
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
4064
    name = 'absurdity-tumblr'
4065
    long_name = 'Tales of Absurdity (from Tumblr)'
4066
    url = 'http://talesofabsurdity.tumblr.com'
4067
    _categories = ('ABSURDITY', )
4068
4069
4070
class RobbieAndBobby(GenericTumblrV1):
4071
    """Class to retrieve Robbie And Bobby comics."""
4072
    # Also on http://robbieandbobby.com
4073
    name = 'robbie-tumblr'
4074
    long_name = 'Robbie And Bobby (from Tumblr)'
4075
    url = 'http://robbieandbobby.tumblr.com'
4076
4077
4078
class ElectricBunnyComicTumblr(GenericTumblrV1):
4079
    """Class to retrieve Electric Bunny Comics."""
4080
    # Also on http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell
4081
    name = 'bunny-tumblr'
4082
    long_name = 'Electric Bunny Comic (from Tumblr)'
4083
    url = 'http://electricbunnycomics.tumblr.com'
4084
4085
4086
class Hoomph(GenericTumblrV1):
4087
    """Class to retrieve Hoomph comics."""
4088
    name = 'hoomph'
4089
    long_name = 'Hoomph'
4090
    url = 'http://hoom.ph'
4091
4092
4093
class BFGFSTumblr(GenericTumblrV1):
4094
    """Class to retrieve BFGFS comics."""
4095
    # Also on https://tapastic.com/series/BFGFS
4096
    # Also on http://bfgfs.com
4097
    name = 'bfgfs-tumblr'
4098
    long_name = 'BFGFS (from Tumblr)'
4099
    url = 'http://bfgfs.tumblr.com'
4100
4101
4102
class DoodleForFood(GenericTumblrV1):
4103
    """Class to retrieve Doodle For Food comics."""
4104
    # Also on http://doodleforfood.com
4105
    name = 'doodle'
4106
    long_name = 'Doodle For Food'
4107
    url = 'http://doodleforfood.com'
4108
4109
4110
class CassandraCalinTumblr(GenericTumblrV1):
4111
    """Class to retrieve C. Cassandra comics."""
4112
    # Also on http://cassandracalin.com
4113
    # Also on https://tapastic.com/series/C-Cassandra-comics
4114
    name = 'cassandra-tumblr'
4115
    long_name = 'Cassandra Calin (from Tumblr)'
4116
    url = 'http://c-cassandra.tumblr.com'
4117
4118
4119
class DougWasTaken(GenericTumblrV1):
4120
    """Class to retrieve Doug Was Taken comics."""
4121
    name = 'doug'
4122
    long_name = 'Doug Was Taken'
4123
    url = 'http://dougwastaken.tumblr.com'
4124
4125
4126
class MandatoryRollerCoaster(GenericTumblrV1):
4127
    """Class to retrieve Mandatory Roller Coaster comics."""
4128
    name = 'rollercoaster'
4129
    long_name = 'Mandatory Roller Coaster'
4130
    url = 'http://mandatoryrollercoaster.com'
4131
4132
4133
class CEstPasEnRegardantSesPompes(GenericTumblrV1):
4134
    """Class to retrieve C'Est Pas En Regardant Ses Pompes (...)  comics."""
4135
    name = 'cperspqccltt'
4136
    long_name = 'C Est Pas En Regardant Ses Pompes (...)'
4137
    url = 'http://cperspqccltt.tumblr.com'
4138
4139
4140
class TheGrohlTroll(GenericTumblrV1):
4141
    """Class to retrieve The Grohl Troll comics."""
4142
    name = 'grohltroll'
4143
    long_name = 'The Grohl Troll'
4144
    url = 'http://thegrohltroll.com'
4145
4146
4147
class WebcomicName(GenericTumblrV1):
4148
    """Class to retrieve Webcomic Name comics."""
4149
    name = 'webcomicname'
4150
    long_name = 'Webcomic Name'
4151
    url = 'http://webcomicname.com'
4152
4153
4154
class BooksOfAdam(GenericTumblrV1):
4155
    """Class to retrieve Books of Adam comics."""
4156
    # Also on http://www.booksofadam.com
4157
    name = 'booksofadam'
4158
    long_name = 'Books of Adam'
4159
    url = 'http://booksofadam.tumblr.com'
4160
4161
4162
class HarkAVagrant(GenericTumblrV1):
4163
    """Class to retrieve Hark A Vagrant comics."""
4164
    # Also on http://www.harkavagrant.com
4165
    name = 'hark-tumblr'
4166
    long_name = 'Hark A Vagrant (from Tumblr)'
4167
    url = 'http://beatonna.tumblr.com'
4168
4169
4170
class OurSuperAdventureTumblr(GenericTumblrV1):
4171
    """Class to retrieve Our Super Adventure comics."""
4172
    # Also on https://tapastic.com/series/Our-Super-Adventure
4173
    # Also on http://www.oursuperadventure.com
4174
    # http://sarahgraley.com
4175
    name = 'superadventure-tumblr'
4176
    long_name = 'Our Super Adventure (from Tumblr)'
4177
    url = 'http://sarahssketchbook.tumblr.com'
4178
4179
4180
class JakeLikesOnions(GenericTumblrV1):
4181
    """Class to retrieve Jake Likes Onions comics."""
4182
    name = 'jake'
4183
    long_name = 'Jake Likes Onions'
4184
    url = 'http://jakelikesonions.com'
4185
4186
4187
class InYourFaceCake(GenericTumblrV1):
4188
    """Class to retrieve In Your Face Cake comics."""
4189
    name = 'inyourfacecake-tumblr'
4190
    long_name = 'In Your Face Cake (from Tumblr)'
4191
    url = 'http://in-your-face-cake.tumblr.com'
4192
4193
4194
class Robospunk(GenericTumblrV1):
4195
    """Class to retrieve Robospunk comics."""
4196
    name = 'robospunk'
4197
    long_name = 'Robospunk'
4198
    url = 'http://robospunk.com'
4199
4200
4201
class BananaTwinky(GenericTumblrV1):
4202
    """Class to retrieve Banana Twinky comics."""
4203
    name = 'banana'
4204
    long_name = 'Banana Twinky'
4205
    url = 'http://bananatwinky.tumblr.com'
4206
4207
4208
class YesterdaysPopcornTumblr(GenericTumblrV1):
4209
    """Class to retrieve Yesterday's Popcorn comics."""
4210
    # Also on http://www.yesterdayspopcorn.com
4211
    # Also on https://tapastic.com/series/Yesterdays-Popcorn
4212
    name = 'popcorn-tumblr'
4213
    long_name = 'Yesterday\'s Popcorn (from Tumblr)'
4214
    url = 'http://yesterdayspopcorn.tumblr.com'
4215
4216
4217
class TwistedDoodles(GenericTumblrV1):
4218
    """Class to retrieve Twisted Doodles comics."""
4219
    name = 'twisted'
4220
    long_name = 'Twisted Doodles'
4221
    url = 'http://www.twisteddoodles.com'
4222
4223
4224
class UbertoolTumblr(GenericTumblrV1):
4225
    """Class to retrieve Ubertool comics."""
4226
    # Also on http://ubertoolcomic.com
4227
    # Also on https://tapastic.com/series/ubertool
4228
    name = 'ubertool-tumblr'
4229
    long_name = 'Ubertool (from Tumblr)'
4230
    url = 'http://ubertool.tumblr.com'
4231
    _categories = ('UBERTOOL', )
4232
4233
4234
class LittleLifeLinesTumblr(GenericTumblrV1):
4235
    """Class to retrieve Little Life Lines comics."""
4236
    # Also on http://www.littlelifelines.com
4237
    name = 'life-tumblr'
4238
    long_name = 'Little Life Lines (from Tumblr)'
4239
    url = 'https://little-life-lines.tumblr.com'
4240
4241
4242
class TheyCanTalk(GenericTumblrV1):
4243
    """Class to retrieve They Can Talk comics."""
4244
    name = 'theycantalk'
4245
    long_name = 'They Can Talk'
4246
    url = 'http://theycantalk.com'
4247
4248
4249
class Will5NeverCome(GenericTumblrV1):
4250
    """Class to retrieve Will 5:00 Never Come comics."""
4251
    name = 'will5'
4252
    long_name = 'Will 5:00 Never Come ?'
4253
    url = 'http://will5nevercome.com'
4254
4255
4256
class Sephko(GenericTumblrV1):
4257
    """Class to retrieve Sephko Comics."""
4258
    # Also on http://www.sephko.com
4259
    name = 'sephko'
4260
    long_name = 'Sephko'
4261
    url = 'http://sephko.tumblr.com'
4262
4263
4264
class BlazersAtDawn(GenericTumblrV1):
4265
    """Class to retrieve Blazers At Dawn Comics."""
4266
    name = 'blazers'
4267
    long_name = 'Blazers At Dawn'
4268
    url = 'http://blazersatdawn.tumblr.com'
4269
4270
4271
class ArtByMoga(GenericEmptyComic, GenericTumblrV1):  # Deactivated because it downloads too many things
4272
    """Class to retrieve Art By Moga Comics."""
4273
    name = 'moga'
4274
    long_name = 'Art By Moga'
4275
    url = 'http://artbymoga.tumblr.com'
4276
4277
4278
class VerbalVomitTumblr(GenericTumblrV1):
4279
    """Class to retrieve Verbal Vomit comics."""
4280
    # Also on http://www.verbal-vomit.com
4281
    name = 'vomit-tumblr'
4282
    long_name = 'Verbal Vomit (from Tumblr)'
4283
    url = 'http://verbalvomits.tumblr.com'
4284
4285
4286
class LibraryComic(GenericTumblrV1):
4287
    """Class to retrieve LibraryComic."""
4288
    # Also on http://librarycomic.com
4289
    name = 'library-tumblr'
4290
    long_name = 'LibraryComic (from Tumblr)'
4291
    url = 'http://librarycomic.tumblr.com'
4292
4293
4294
class TizzyStitchBirdTumblr(GenericTumblrV1):
4295
    """Class to retrieve Tizzy Stitch Bird comics."""
4296
    # Also on http://tizzystitchbird.com
4297
    # Also on https://tapastic.com/series/TizzyStitchbird
4298
    # Also on http://www.webtoons.com/en/challenge/tizzy-stitchbird/list?title_no=50082
4299
    name = 'tizzy-tumblr'
4300
    long_name = 'Tizzy Stitch Bird (from Tumblr)'
4301
    url = 'http://tizzystitchbird.tumblr.com'
4302
4303
4304
class VictimsOfCircumsolarTumblr(GenericTumblrV1):
4305
    """Class to retrieve VictimsOfCircumsolar comics."""
4306
    # Also on http://www.victimsofcircumsolar.com
4307
    name = 'circumsolar-tumblr'
4308
    long_name = 'Victims Of Circumsolar (from Tumblr)'
4309
    url = 'http://victimsofcomics.tumblr.com'
4310
4311
4312 View Code Duplication
class RockPaperCynicTumblr(GenericTumblrV1):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
4313
    """Class to retrieve RockPaperCynic comics."""
4314
    # Also on http://www.rockpapercynic.com
4315
    # Also on https://tapastic.com/series/rockpapercynic
4316
    name = 'rpc-tumblr'
4317
    long_name = 'Rock Paper Cynic (from Tumblr)'
4318
    url = 'http://rockpapercynic.tumblr.com'
4319
4320
4321
class DeadlyPanelTumblr(GenericTumblrV1):
4322
    """Class to retrieve Deadly Panel comics."""
4323
    # Also on http://www.deadlypanel.com
4324
    # Also on https://tapastic.com/series/deadlypanel
4325
    name = 'deadly-tumblr'
4326
    long_name = 'Deadly Panel (from Tumblr)'
4327
    url = 'http://deadlypanel.tumblr.com'
4328
4329
4330
class CatanaComics(GenericTumblrV1):
4331
    """Class to retrieve Catana comics."""
4332
    name = 'catana'
4333
    long_name = 'Catana'
4334
    url = 'http://www.catanacomics.com'
4335
4336
4337
class AngryAtNothingTumblr(GenericTumblrV1):
4338
    """Class to retrieve Angry at Nothing comics."""
4339
    # Also on http://www.angryatnothing.net
4340
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
4341
    name = 'angry-tumblr'
4342
    long_name = 'Angry At Nothing (from Tumblr)'
4343
    url = 'http://angryatnothing.tumblr.com'
4344
4345
4346
class ShanghaiTango(GenericTumblrV1):
4347
    """Class to retrieve Shanghai Tango comic."""
4348
    name = 'tango'
4349
    long_name = 'Shanghai Tango'
4350
    url = 'http://tango2010weibo.tumblr.com'
4351
4352
4353
class OffTheLeashDogTumblr(GenericTumblrV1):
4354
    """Class to retrieve Off The Leash Dog comics."""
4355
    # Also on http://offtheleashdogcartoons.com
4356
    # Also on http://www.rupertfawcettcartoons.com
4357
    name = 'offtheleash-tumblr'
4358
    long_name = 'Off The Leash Dog (from Tumblr)'
4359
    url = 'http://rupertfawcettsdoggyblog.tumblr.com'
4360
    _categories = ('FAWCETT', )
4361
4362
4363
class ImogenQuestTumblr(GenericTumblrV1):
4364
    """Class to retrieve Imogen Quest comics."""
4365
    # Also on http://imogenquest.net
4366
    name = 'imogen-tumblr'
4367
    long_name = 'Imogen Quest (from Tumblr)'
4368
    url = 'http://imoquest.tumblr.com'
4369
4370
4371
class Shitfest(GenericTumblrV1):
4372
    """Class to retrieve Shitfest comics."""
4373
    name = 'shitfest'
4374
    long_name = 'Shitfest'
4375
    url = 'http://shitfestcomic.com'
4376
4377
4378
class HorovitzComics(GenericListableComic):
4379
    """Generic class to handle the logic common to the different comics from Horovitz."""
4380
    url = 'http://www.horovitzcomics.com'
4381
    _categories = ('HOROVITZ', )
4382
    img_re = re.compile('.*comics/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
4383
    link_re = NotImplemented
4384
    get_url_from_archive_element = join_cls_url_to_href
4385
4386
    @classmethod
4387
    def get_comic_info(cls, soup, link):
4388
        """Get information about a particular comics."""
4389
        href = link['href']
4390
        num = int(cls.link_re.match(href).groups()[0])
4391
        title = link.string
4392
        imgs = soup.find_all('img', id='comic')
4393
        assert len(imgs) == 1
4394
        year, month, day = [int(s)
4395
                            for s in cls.img_re.match(imgs[0]['src']).groups()]
4396
        return {
4397
            'title': title,
4398
            'day': day,
4399
            'month': month,
4400
            'year': year,
4401
            'img': [i['src'] for i in imgs],
4402
            'num': num,
4403
        }
4404
4405
    @classmethod
4406
    def get_archive_elements(cls):
4407
        archive_url = 'http://www.horovitzcomics.com/comics/archive/'
4408
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.link_re))
4409
4410
4411
class HorovitzNew(HorovitzComics):
4412
    """Class to retrieve Horovitz new comics."""
4413
    name = 'horovitznew'
4414
    long_name = 'Horovitz New'
4415
    link_re = re.compile('^/comics/new/([0-9]+)$')
4416
4417
4418
class HorovitzClassic(HorovitzComics):
4419
    """Class to retrieve Horovitz classic comics."""
4420
    name = 'horovitzclassic'
4421
    long_name = 'Horovitz Classic'
4422
    link_re = re.compile('^/comics/classic/([0-9]+)$')
4423
4424
4425
class GenericGoComic(GenericNavigableComic):
4426
    """Generic class to handle the logic common to comics from gocomics.com."""
4427
    _categories = ('GOCOMIC', )
4428
4429
    @classmethod
4430
    def get_first_comic_link(cls):
4431
        """Get link to first comics."""
4432
        return get_soup_at_url(cls.url).find('a', class_='fa btn btn-outline-default btn-circle fa-backward sm ')
4433
4434
    @classmethod
4435
    def get_navi_link(cls, last_soup, next_):
4436
        """Get link to next or previous comic."""
4437
        PREV = 'fa btn btn-outline-default btn-circle fa-caret-left sm '
4438
        NEXT = 'fa btn btn-outline-default btn-circle fa-caret-right sm '
4439
        return last_soup.find('a', class_=NEXT if next_ else PREV)
4440
4441
    @classmethod
4442
    def get_url_from_link(cls, link):
4443
        gocomics = 'http://www.gocomics.com'
4444
        return urljoin_wrapper(gocomics, link['href'])
4445
4446
    @classmethod
4447
    def get_comic_info(cls, soup, link):
4448
        """Get information about a particular comics."""
4449
        date_str = soup.find('meta', property='article:published_time')['content']
4450
        day = string_to_date(date_str, "%Y-%m-%d")
4451
        imgs = soup.find('picture', class_='img-fluid item-comic-image').find_all('img')
4452
        author = soup.find('meta', property='article:author')['content']
4453
        tags = soup.find('meta', property='article:tag')['content']
4454
        return {
4455
            'day': day.day,
4456
            'month': day.month,
4457
            'year': day.year,
4458
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
4459
            'author': author,
4460
            'tags': tags,
4461
        }
4462
4463
4464
class PearlsBeforeSwine(GenericGoComic):
4465
    """Class to retrieve Pearls Before Swine comics."""
4466
    name = 'pearls'
4467
    long_name = 'Pearls Before Swine'
4468
    url = 'http://www.gocomics.com/pearlsbeforeswine'
4469
4470
4471
class Peanuts(GenericGoComic):
4472
    """Class to retrieve Peanuts comics."""
4473
    name = 'peanuts'
4474
    long_name = 'Peanuts'
4475
    url = 'http://www.gocomics.com/peanuts'
4476
4477
4478
class MattWuerker(GenericGoComic):
4479
    """Class to retrieve Matt Wuerker comics."""
4480
    name = 'wuerker'
4481
    long_name = 'Matt Wuerker'
4482
    url = 'http://www.gocomics.com/mattwuerker'
4483
4484
4485
class TomToles(GenericGoComic):
4486
    """Class to retrieve Tom Toles comics."""
4487
    name = 'toles'
4488
    long_name = 'Tom Toles'
4489
    url = 'http://www.gocomics.com/tomtoles'
4490
4491
4492
class BreakOfDay(GenericGoComic):
4493
    """Class to retrieve Break Of Day comics."""
4494
    name = 'breakofday'
4495
    long_name = 'Break Of Day'
4496
    url = 'http://www.gocomics.com/break-of-day'
4497
4498
4499
class Brevity(GenericGoComic):
4500
    """Class to retrieve Brevity comics."""
4501
    name = 'brevity'
4502
    long_name = 'Brevity'
4503
    url = 'http://www.gocomics.com/brevitypanel'
4504
4505
4506
class MichaelRamirez(GenericGoComic):
4507
    """Class to retrieve Michael Ramirez comics."""
4508
    name = 'ramirez'
4509
    long_name = 'Michael Ramirez'
4510
    url = 'http://www.gocomics.com/michaelramirez'
4511
4512
4513
class MikeLuckovich(GenericGoComic):
4514
    """Class to retrieve Mike Luckovich comics."""
4515
    name = 'luckovich'
4516
    long_name = 'Mike Luckovich'
4517
    url = 'http://www.gocomics.com/mikeluckovich'
4518
4519
4520
class JimBenton(GenericGoComic):
4521
    """Class to retrieve Jim Benton comics."""
4522
    # Also on http://jimbenton.tumblr.com
4523
    name = 'benton'
4524
    long_name = 'Jim Benton'
4525
    url = 'http://www.gocomics.com/jim-benton-cartoons'
4526
4527
4528
class TheArgyleSweater(GenericGoComic):
4529
    """Class to retrieve the Argyle Sweater comics."""
4530
    name = 'argyle'
4531
    long_name = 'Argyle Sweater'
4532
    url = 'http://www.gocomics.com/theargylesweater'
4533
4534
4535
class SunnyStreet(GenericGoComic):
4536
    """Class to retrieve Sunny Street comics."""
4537
    # Also on http://www.sunnystreetcomics.com
4538
    name = 'sunny'
4539
    long_name = 'Sunny Street'
4540
    url = 'http://www.gocomics.com/sunny-street'
4541
4542
4543
class OffTheMark(GenericGoComic):
4544
    """Class to retrieve Off The Mark comics."""
4545
    # Also on https://www.offthemark.com
4546
    name = 'offthemark'
4547
    long_name = 'Off The Mark'
4548
    url = 'http://www.gocomics.com/offthemark'
4549
4550
4551
class WuMo(GenericGoComic):
4552
    """Class to retrieve WuMo comics."""
4553
    # Also on http://wumo.com
4554
    name = 'wumo'
4555
    long_name = 'WuMo'
4556
    url = 'http://www.gocomics.com/wumo'
4557
4558
4559
class LunarBaboon(GenericGoComic):
4560
    """Class to retrieve Lunar Baboon comics."""
4561
    # Also on http://www.lunarbaboon.com
4562
    # Also on https://tapastic.com/series/Lunarbaboon
4563
    name = 'lunarbaboon'
4564
    long_name = 'Lunar Baboon'
4565
    url = 'http://www.gocomics.com/lunarbaboon'
4566
4567
4568
class SandersenGocomic(GenericGoComic):
4569
    """Class to retrieve Sarah Andersen comics."""
4570
    # Also on http://sarahcandersen.com
4571
    # Also on http://tapastic.com/series/Doodle-Time
4572
    name = 'sandersen-goc'
4573
    long_name = 'Sarah Andersen (from GoComics)'
4574
    url = 'http://www.gocomics.com/sarahs-scribbles'
4575
4576
4577
class SaturdayMorningBreakfastCerealGoComic(GenericGoComic):
4578
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
4579
    # Also on http://smbc-comics.tumblr.com
4580
    # Also on http://www.smbc-comics.com
4581
    name = 'smbc-goc'
4582
    long_name = 'Saturday Morning Breakfast Cereal (from GoComics)'
4583
    url = 'http://www.gocomics.com/saturday-morning-breakfast-cereal'
4584
    _categories = ('SMBC', )
4585
4586
4587
class CalvinAndHobbesGoComic(GenericGoComic):
4588
    """Class to retrieve Calvin and Hobbes comics."""
4589
    # From gocomics, not http://marcel-oehler.marcellosendos.ch/comics/ch/
4590
    name = 'calvin-goc'
4591
    long_name = 'Calvin and Hobbes (from GoComics)'
4592
    url = 'http://www.gocomics.com/calvinandhobbes'
4593
4594
4595
class RallGoComic(GenericGoComic):
4596
    """Class to retrieve Ted Rall comics."""
4597
    # Also on http://rall.com/comic
4598
    name = 'rall-goc'
4599
    long_name = "Ted Rall (from GoComics)"
4600
    url = "http://www.gocomics.com/ted-rall"
4601
    _categories = ('RALL', )
4602
4603
4604
class TheAwkwardYetiGoComic(GenericGoComic):
4605
    """Class to retrieve The Awkward Yeti comics."""
4606
    # Also on http://larstheyeti.tumblr.com
4607
    # Also on http://theawkwardyeti.com
4608
    # Also on https://tapastic.com/series/TheAwkwardYeti
4609
    name = 'yeti-goc'
4610
    long_name = 'The Awkward Yeti (from GoComics)'
4611
    url = 'http://www.gocomics.com/the-awkward-yeti'
4612
    _categories = ('YETI', )
4613
4614
4615
class BerkeleyMewsGoComics(GenericGoComic):
4616
    """Class to retrieve Berkeley Mews comics."""
4617
    # Also on http://mews.tumblr.com
4618
    # Also on http://www.berkeleymews.com
4619
    name = 'berkeley-goc'
4620
    long_name = 'Berkeley Mews (from GoComics)'
4621
    url = 'http://www.gocomics.com/berkeley-mews'
4622
    _categories = ('BERKELEY', )
4623
4624
4625
class SheldonGoComics(GenericGoComic):
4626
    """Class to retrieve Sheldon comics."""
4627
    # Also on http://www.sheldoncomics.com
4628
    name = 'sheldon-goc'
4629
    long_name = 'Sheldon Comics (from GoComics)'
4630
    url = 'http://www.gocomics.com/sheldon'
4631
4632
4633
class FowlLanguageGoComics(GenericGoComic):
4634
    """Class to retrieve Fowl Language comics."""
4635
    # Also on http://www.fowllanguagecomics.com
4636
    # Also on http://tapastic.com/series/Fowl-Language-Comics
4637
    # Also on http://fowllanguagecomics.tumblr.com
4638
    name = 'fowllanguage-goc'
4639
    long_name = 'Fowl Language Comics (from GoComics)'
4640
    url = 'http://www.gocomics.com/fowl-language'
4641
    _categories = ('FOWLLANGUAGE', )
4642
4643
4644
class NickAnderson(GenericGoComic):
4645
    """Class to retrieve Nick Anderson comics."""
4646
    name = 'nickanderson'
4647
    long_name = 'Nick Anderson'
4648
    url = 'http://www.gocomics.com/nickanderson'
4649
4650
4651
class GarfieldGoComics(GenericGoComic):
4652
    """Class to retrieve Garfield comics."""
4653
    # Also on http://garfield.com
4654
    name = 'garfield-goc'
4655
    long_name = 'Garfield (from GoComics)'
4656
    url = 'http://www.gocomics.com/garfield'
4657
    _categories = ('GARFIELD', )
4658
4659
4660
class DorrisMcGoComics(GenericGoComic):
4661
    """Class to retrieve Dorris Mc Comics"""
4662
    # Also on http://dorrismccomics.com
4663
    name = 'dorrismc-goc'
4664
    long_name = 'Dorris Mc (from GoComics)'
4665
    url = 'http://www.gocomics.com/dorris-mccomics'
4666
4667
4668
class FoxTrot(GenericGoComic):
4669
    """Class to retrieve FoxTrot comics."""
4670
    name = 'foxtrot'
4671
    long_name = 'FoxTrot'
4672
    url = 'http://www.gocomics.com/foxtrot'
4673
4674
4675
class FoxTrotClassics(GenericGoComic):
4676
    """Class to retrieve FoxTrot Classics comics."""
4677
    name = 'foxtrot-classics'
4678
    long_name = 'FoxTrot Classics'
4679
    url = 'http://www.gocomics.com/foxtrotclassics'
4680
4681
4682
class MisterAndMeGoComics(GenericEmptyComic, GenericGoComic):  # Removed ?
4683
    """Class to retrieve Mister & Me Comics."""
4684
    # Also on http://www.mister-and-me.com
4685
    # Also on https://tapastic.com/series/Mister-and-Me
4686
    name = 'mister-goc'
4687
    long_name = 'Mister & Me (from GoComics)'
4688
    url = 'http://www.gocomics.com/mister-and-me'
4689
4690
4691
class NonSequitur(GenericGoComic):
4692
    """Class to retrieve Non Sequitur (Wiley Miller) comics."""
4693
    name = 'nonsequitur'
4694
    long_name = 'Non Sequitur'
4695
    url = 'http://www.gocomics.com/nonsequitur'
4696
4697
4698
class GenericTapasticComic(GenericListableComic):
4699
    """Generic class to handle the logic common to comics from tapastic.com."""
4700
    _categories = ('TAPASTIC', )
4701
4702
    @classmethod
4703
    def get_comic_info(cls, soup, archive_elt):
4704
        """Get information about a particular comics."""
4705
        timestamp = int(archive_elt['publishDate']) / 1000.0
4706
        day = datetime.datetime.fromtimestamp(timestamp).date()
4707
        imgs = soup.find_all('img', class_='art-image')
4708
        if not imgs:
4709
            print("Comic %s is being uploaded, retry later" % cls.get_url_from_archive_element(archive_elt))
4710
            return None
4711
        assert len(imgs) > 0
4712
        return {
4713
            'day': day.day,
4714
            'year': day.year,
4715
            'month': day.month,
4716
            'img': [i['src'] for i in imgs],
4717
            'title': archive_elt['title'],
4718
        }
4719
4720
    @classmethod
4721
    def get_url_from_archive_element(cls, archive_elt):
4722
        return 'http://tapastic.com/episode/' + str(archive_elt['id'])
4723
4724
    @classmethod
4725
    def get_archive_elements(cls):
4726
        pref, suff = 'episodeList : ', ','
4727
        # Information is stored in the javascript part
4728
        # I don't know the clean way to get it so this is the ugly way.
4729
        string = [s[len(pref):-len(suff)] for s in (s.decode('utf-8').strip() for s in urlopen_wrapper(cls.url).readlines()) if s.startswith(pref) and s.endswith(suff)][0]
4730
        return json.loads(string)
4731
4732
4733
class VegetablesForDessert(GenericTapasticComic):
4734
    """Class to retrieve Vegetables For Dessert comics."""
4735
    # Also on http://vegetablesfordessert.tumblr.com
4736
    name = 'vegetables'
4737
    long_name = 'Vegetables For Dessert'
4738
    url = 'http://tapastic.com/series/vegetablesfordessert'
4739
4740
4741
class FowlLanguageTapa(GenericTapasticComic):
4742
    """Class to retrieve Fowl Language comics."""
4743
    # Also on http://www.fowllanguagecomics.com
4744
    # Also on http://fowllanguagecomics.tumblr.com
4745
    # Also on http://www.gocomics.com/fowl-language
4746
    name = 'fowllanguage-tapa'
4747
    long_name = 'Fowl Language Comics (from Tapastic)'
4748
    url = 'http://tapastic.com/series/Fowl-Language-Comics'
4749
    _categories = ('FOWLLANGUAGE', )
4750
4751
4752
class OscillatingProfundities(GenericTapasticComic):
4753
    """Class to retrieve Oscillating Profundities comics."""
4754
    name = 'oscillating'
4755
    long_name = 'Oscillating Profundities'
4756
    url = 'http://tapastic.com/series/oscillatingprofundities'
4757
4758
4759
class ZnoflatsComics(GenericTapasticComic):
4760
    """Class to retrieve Znoflats comics."""
4761
    name = 'znoflats'
4762
    long_name = 'Znoflats Comics'
4763
    url = 'http://tapastic.com/series/Znoflats-Comics'
4764
4765
4766
class SandersenTapastic(GenericTapasticComic):
4767
    """Class to retrieve Sarah Andersen comics."""
4768
    # Also on http://sarahcandersen.com
4769
    # Also on http://www.gocomics.com/sarahs-scribbles
4770
    name = 'sandersen-tapa'
4771
    long_name = 'Sarah Andersen (from Tapastic)'
4772
    url = 'http://tapastic.com/series/Doodle-Time'
4773
4774
4775
class TubeyToonsTapastic(GenericTapasticComic):
4776
    """Class to retrieve TubeyToons comics."""
4777
    # Also on http://tubeytoons.com
4778
    # Also on http://tubeytoons.tumblr.com
4779
    name = 'tubeytoons-tapa'
4780
    long_name = 'Tubey Toons (from Tapastic)'
4781
    url = 'http://tapastic.com/series/Tubey-Toons'
4782
    _categories = ('TUNEYTOONS', )
4783
4784
4785
class AnythingComicTapastic(GenericTapasticComic):
4786
    """Class to retrieve Anything Comics."""
4787
    # Also on http://www.anythingcomic.com
4788
    name = 'anythingcomic-tapa'
4789
    long_name = 'Anything Comic (from Tapastic)'
4790
    url = 'http://tapastic.com/series/anything'
4791
4792
4793
class UnearthedComicsTapastic(GenericTapasticComic):
4794
    """Class to retrieve Unearthed comics."""
4795
    # Also on http://unearthedcomics.com
4796
    # Also on http://unearthedcomics.tumblr.com
4797
    name = 'unearthed-tapa'
4798
    long_name = 'Unearthed Comics (from Tapastic)'
4799
    url = 'http://tapastic.com/series/UnearthedComics'
4800
    _categories = ('UNEARTHED', )
4801
4802
4803
class EverythingsStupidTapastic(GenericTapasticComic):
4804
    """Class to retrieve Everything's stupid Comics."""
4805
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
4806
    # Also on http://everythingsstupid.net
4807
    name = 'stupid-tapa'
4808
    long_name = "Everything's Stupid (from Tapastic)"
4809
    url = 'http://tapastic.com/series/EverythingsStupid'
4810
4811
4812
class JustSayEhTapastic(GenericTapasticComic):
4813
    """Class to retrieve Just Say Eh comics."""
4814
    # Also on http://www.justsayeh.com
4815
    name = 'justsayeh-tapa'
4816
    long_name = 'Just Say Eh (from Tapastic)'
4817
    url = 'http://tapastic.com/series/Just-Say-Eh'
4818
4819
4820
class ThorsThundershackTapastic(GenericTapasticComic):
4821
    """Class to retrieve Thor's Thundershack comics."""
4822
    # Also on http://www.thorsthundershack.com
4823
    name = 'thor-tapa'
4824
    long_name = 'Thor\'s Thundershack (from Tapastic)'
4825
    url = 'http://tapastic.com/series/Thors-Thundershac'
4826
    _categories = ('THOR', )
4827
4828
4829
class OwlTurdTapastic(GenericTapasticComic):
4830
    """Class to retrieve Owl Turd comics."""
4831
    # Also on http://owlturd.com
4832
    name = 'owlturd-tapa'
4833
    long_name = 'Owl Turd (from Tapastic)'
4834
    url = 'http://tapastic.com/series/Owl-Turd-Comix'
4835
    _categories = ('OWLTURD', )
4836
4837
4838
class GoneIntoRaptureTapastic(GenericTapasticComic):
4839
    """Class to retrieve Gone Into Rapture comics."""
4840
    # Also on http://goneintorapture.tumblr.com
4841
    # Also on http://www.goneintorapture.com
4842
    name = 'rapture-tapa'
4843
    long_name = 'Gone Into Rapture (from Tapastic)'
4844
    url = 'http://tapastic.com/series/Goneintorapture'
4845
4846
4847
class HeckIfIKnowComicsTapa(GenericTapasticComic):
4848
    """Class to retrieve Heck If I Know Comics."""
4849
    # Also on http://heckifiknowcomics.com
4850
    name = 'heck-tapa'
4851
    long_name = 'Heck if I Know comics (from Tapastic)'
4852
    url = 'http://tapastic.com/series/Regular'
4853
4854
4855
class CheerUpEmoKidTapa(GenericTapasticComic):
4856
    """Class to retrieve CheerUpEmoKid comics."""
4857
    # Also on http://www.cheerupemokid.com
4858
    # Also on http://enzocomics.tumblr.com
4859
    name = 'cuek-tapa'
4860
    long_name = 'Cheer Up Emo Kid (from Tapastic)'
4861
    url = 'http://tapastic.com/series/CUEK'
4862
4863
4864
class BigFootJusticeTapa(GenericTapasticComic):
4865
    """Class to retrieve Big Foot Justice comics."""
4866
    # Also on http://bigfootjustice.com
4867
    name = 'bigfoot-tapa'
4868
    long_name = 'Big Foot Justice (from Tapastic)'
4869
    url = 'http://tapastic.com/series/bigfoot-justice'
4870
4871
4872
class UpAndOutTapa(GenericTapasticComic):
4873
    """Class to retrieve Up & Out comics."""
4874
    # Also on http://upandoutcomic.tumblr.com
4875
    name = 'upandout-tapa'
4876
    long_name = 'Up And Out (from Tapastic)'
4877
    url = 'http://tapastic.com/series/UP-and-OUT'
4878
4879
4880
class ToonHoleTapa(GenericTapasticComic):
4881
    """Class to retrieve Toon Holes comics."""
4882
    # Also on http://www.toonhole.com
4883
    name = 'toonhole-tapa'
4884
    long_name = 'Toon Hole (from Tapastic)'
4885
    url = 'http://tapastic.com/series/TOONHOLE'
4886
4887
4888
class AngryAtNothingTapa(GenericTapasticComic):
4889
    """Class to retrieve Angry at Nothing comics."""
4890
    # Also on http://www.angryatnothing.net
4891
    # Also on http://angryatnothing.tumblr.com
4892
    name = 'angry-tapa'
4893
    long_name = 'Angry At Nothing (from Tapastic)'
4894
    url = 'http://tapastic.com/series/Comics-yeah-definitely-comics-'
4895
4896
4897
class LeleozTapa(GenericTapasticComic):
4898
    """Class to retrieve Leleoz comics."""
4899
    # Also on http://leleozcomics.tumblr.com
4900
    name = 'leleoz-tapa'
4901
    long_name = 'Leleoz (from Tapastic)'
4902
    url = 'https://tapastic.com/series/Leleoz'
4903
4904
4905
class TheAwkwardYetiTapa(GenericTapasticComic):
4906
    """Class to retrieve The Awkward Yeti comics."""
4907
    # Also on http://www.gocomics.com/the-awkward-yeti
4908
    # Also on http://theawkwardyeti.com
4909
    # Also on http://larstheyeti.tumblr.com
4910
    name = 'yeti-tapa'
4911
    long_name = 'The Awkward Yeti (from Tapastic)'
4912
    url = 'https://tapastic.com/series/TheAwkwardYeti'
4913
    _categories = ('YETI', )
4914
4915
4916
class AsPerUsualTapa(GenericTapasticComic):
4917
    """Class to retrieve As Per Usual comics."""
4918
    # Also on http://as-per-usual.tumblr.com
4919
    name = 'usual-tapa'
4920
    long_name = 'As Per Usual (from Tapastic)'
4921
    url = 'https://tapastic.com/series/AsPerUsual'
4922
    categories = ('DAMILEE', )
4923
4924
4925
class HotComicsForCoolPeopleTapa(GenericTapasticComic):
4926
    """Class to retrieve Hot Comics For Cool People."""
4927
    # Also on http://hotcomicsforcoolpeople.tumblr.com
4928
    # Also on http://hotcomics.biz (links to tumblr)
4929
    # Also on http://hcfcp.com (links to tumblr)
4930
    name = 'hotcomics-tapa'
4931
    long_name = 'Hot Comics For Cool People (from Tapastic)'
4932
    url = 'https://tapastic.com/series/Hot-Comics-For-Cool-People'
4933
    categories = ('DAMILEE', )
4934
4935
4936
class OneOneOneOneComicTapa(GenericTapasticComic):
4937
    """Class to retrieve 1111 Comics."""
4938
    # Also on http://www.1111comics.me
4939
    # Also on http://comics1111.tumblr.com
4940
    name = '1111-tapa'
4941
    long_name = '1111 Comics (from Tapastic)'
4942
    url = 'https://tapastic.com/series/1111-Comics'
4943
    _categories = ('ONEONEONEONE', )
4944
4945
4946
class TumbleDryTapa(GenericTapasticComic):
4947
    """Class to retrieve Tumble Dry comics."""
4948
    # Also on http://tumbledrycomics.com
4949
    name = 'tumbledry-tapa'
4950
    long_name = 'Tumblr Dry (from Tapastic)'
4951
    url = 'https://tapastic.com/series/TumbleDryComics'
4952
4953
4954
class DeadlyPanelTapa(GenericTapasticComic):
4955
    """Class to retrieve Deadly Panel comics."""
4956
    # Also on http://www.deadlypanel.com
4957
    # Also on http://deadlypanel.tumblr.com
4958
    name = 'deadly-tapa'
4959
    long_name = 'Deadly Panel (from Tapastic)'
4960
    url = 'https://tapastic.com/series/deadlypanel'
4961
4962
4963
class ChrisHallbeckMaxiTapa(GenericTapasticComic):
4964
    """Class to retrieve Chris Hallbeck comics."""
4965
    # Also on http://chrishallbeck.tumblr.com
4966
    # Also on http://maximumble.com
4967
    name = 'hallbeckmaxi-tapa'
4968
    long_name = 'Chris Hallback - Maximumble (from Tapastic)'
4969
    url = 'https://tapastic.com/series/Maximumble'
4970
    _categories = ('HALLBACK', )
4971
4972
4973
class ChrisHallbeckMiniTapa(GenericTapasticComic):
4974
    """Class to retrieve Chris Hallbeck comics."""
4975
    # Also on http://chrishallbeck.tumblr.com
4976
    # Also on http://minimumble.com
4977
    name = 'hallbeckmini-tapa'
4978
    long_name = 'Chris Hallback - Minimumble (from Tapastic)'
4979
    url = 'https://tapastic.com/series/Minimumble'
4980
    _categories = ('HALLBACK', )
4981
4982
4983
class ChrisHallbeckBiffTapa(GenericTapasticComic):
4984
    """Class to retrieve Chris Hallbeck comics."""
4985
    # Also on http://chrishallbeck.tumblr.com
4986
    # Also on http://thebookofbiff.com
4987
    name = 'hallbeckbiff-tapa'
4988
    long_name = 'Chris Hallback - The Book of Biff (from Tapastic)'
4989
    url = 'https://tapastic.com/series/Biff'
4990
    _categories = ('HALLBACK', )
4991
4992
4993
class RandoWisTapa(GenericTapasticComic):
4994
    """Class to retrieve RandoWis comics."""
4995
    # Also on https://randowis.com
4996
    name = 'randowis-tapa'
4997
    long_name = 'RandoWis (from Tapastic)'
4998
    url = 'https://tapastic.com/series/RandoWis'
4999
5000
5001
class PigeonGazetteTapa(GenericTapasticComic):
5002
    """Class to retrieve The Pigeon Gazette comics."""
5003
    # Also on http://thepigeongazette.tumblr.com
5004
    name = 'pigeon-tapa'
5005
    long_name = 'The Pigeon Gazette (from Tapastic)'
5006
    url = 'https://tapastic.com/series/The-Pigeon-Gazette'
5007
5008
5009
class TheOdd1sOutTapa(GenericTapasticComic):
5010
    """Class to retrieve The Odd 1s Out comics."""
5011
    # Also on http://theodd1sout.com
5012
    # Also on http://theodd1sout.tumblr.com
5013
    name = 'theodd-tapa'
5014
    long_name = 'The Odd 1s Out (from Tapastic)'
5015
    url = 'https://tapastic.com/series/Theodd1sout'
5016
5017
5018
class TheWorldIsFlatTapa(GenericTapasticComic):
5019
    """Class to retrieve The World Is Flat Comics."""
5020
    # Also on http://theworldisflatcomics.tumblr.com
5021
    name = 'flatworld-tapa'
5022
    long_name = 'The World Is Flat (from Tapastic)'
5023
    url = 'https://tapastic.com/series/The-World-is-Flat'
5024
5025
5026
class MisterAndMeTapa(GenericTapasticComic):
5027
    """Class to retrieve Mister & Me Comics."""
5028
    # Also on http://www.mister-and-me.com
5029
    # Also on http://www.gocomics.com/mister-and-me
5030
    name = 'mister-tapa'
5031
    long_name = 'Mister & Me (from Tapastic)'
5032
    url = 'https://tapastic.com/series/Mister-and-Me'
5033
5034
5035
class TalesOfAbsurdityTapa(GenericTapasticComic):
5036
    """Class to retrieve Tales Of Absurdity comics."""
5037
    # Also on http://talesofabsurdity.com
5038
    # Also on http://talesofabsurdity.tumblr.com
5039
    name = 'absurdity-tapa'
5040
    long_name = 'Tales of Absurdity (from Tapastic)'
5041
    url = 'http://tapastic.com/series/Tales-Of-Absurdity'
5042
    _categories = ('ABSURDITY', )
5043
5044
5045
class BFGFSTapa(GenericTapasticComic):
5046
    """Class to retrieve BFGFS comics."""
5047
    # Also on http://bfgfs.com
5048
    # Also on http://bfgfs.tumblr.com
5049
    name = 'bfgfs-tapa'
5050
    long_name = 'BFGFS (from Tapastic)'
5051
    url = 'https://tapastic.com/series/BFGFS'
5052
5053
5054
class DoodleForFoodTapa(GenericTapasticComic):
5055
    """Class to retrieve Doodle For Food comics."""
5056
    # Also on http://doodleforfood.com
5057
    name = 'doodle-tapa'
5058
    long_name = 'Doodle For Food (from Tapastic)'
5059
    url = 'https://tapastic.com/series/Doodle-for-Food'
5060
5061
5062
class MrLovensteinTapa(GenericTapasticComic):
5063
    """Class to retrieve Mr Lovenstein comics."""
5064
    # Also on  https://tapastic.com/series/MrLovenstein
5065
    name = 'mrlovenstein-tapa'
5066
    long_name = 'Mr. Lovenstein (from Tapastic)'
5067
    url = 'https://tapastic.com/series/MrLovenstein'
5068
5069
5070
class CassandraCalinTapa(GenericTapasticComic):
5071
    """Class to retrieve C. Cassandra comics."""
5072
    # Also on http://cassandracalin.com
5073
    # Also on http://c-cassandra.tumblr.com
5074
    name = 'cassandra-tapa'
5075
    long_name = 'Cassandra Calin (from Tapastic)'
5076
    url = 'https://tapastic.com/series/C-Cassandra-comics'
5077
5078
5079
class WafflesAndPancakes(GenericTapasticComic):
5080
    """Class to retrieve Waffles And Pancakes comics."""
5081
    # Also on http://wandpcomic.com
5082
    name = 'waffles'
5083
    long_name = 'Waffles And Pancakes'
5084
    url = 'https://tapastic.com/series/Waffles-and-Pancakes'
5085
5086
5087
class YesterdaysPopcornTapastic(GenericTapasticComic):
5088
    """Class to retrieve Yesterday's Popcorn comics."""
5089
    # Also on http://www.yesterdayspopcorn.com
5090
    # Also on http://yesterdayspopcorn.tumblr.com
5091
    name = 'popcorn-tapa'
5092
    long_name = 'Yesterday\'s Popcorn (from Tapastic)'
5093
    url = 'https://tapastic.com/series/Yesterdays-Popcorn'
5094
5095
5096
class OurSuperAdventureTapastic(GenericTapasticComic):
5097
    """Class to retrieve Our Super Adventure comics."""
5098
    # Also on http://www.oursuperadventure.com
5099
    # http://sarahssketchbook.tumblr.com
5100
    # http://sarahgraley.com
5101
    name = 'superadventure-tapastic'
5102
    long_name = 'Our Super Adventure (from Tapastic)'
5103
    url = 'https://tapastic.com/series/Our-Super-Adventure'
5104
5105
5106
class NamelessPCs(GenericTapasticComic):
5107
    """Class to retrieve Nameless PCs comics."""
5108
    # Also on http://namelesspcs.com
5109
    name = 'namelesspcs-tapa'
5110
    long_name = 'NamelessPCs (from Tapastic)'
5111
    url = 'https://tapastic.com/series/NamelessPC'
5112
5113
5114
class UbertoolTapa(GenericTapasticComic):
5115
    """Class to retrieve Ubertool comics."""
5116
    # Also on http://ubertoolcomic.com
5117
    # Also on http://ubertool.tumblr.com
5118
    name = 'ubertool-tapa'
5119
    long_name = 'Ubertool (from Tapastic)'
5120
    url = 'https://tapastic.com/series/ubertool'
5121
    _categories = ('UBERTOOL', )
5122
5123
5124
class BarteNerdsTapa(GenericTapasticComic):
5125
    """Class to retrieve BarteNerds comics."""
5126
    # Also on http://www.bartenerds.com
5127
    name = 'bartenerds-tapa'
5128
    long_name = 'BarteNerds (from Tapastic)'
5129
    url = 'https://tapastic.com/series/BarteNERDS'
5130
5131
5132
class SmallBlueYonderTapa(GenericTapasticComic):
5133
    """Class to retrieve Small Blue Yonder comics."""
5134
    # Also on http://www.smallblueyonder.com
5135
    name = 'smallblue-tapa'
5136
    long_name = 'Small Blue Yonder (from Tapastic)'
5137
    url = 'https://tapastic.com/series/Small-Blue-Yonder'
5138
5139
5140
class TizzyStitchBirdTapa(GenericTapasticComic):
5141
    """Class to retrieve Tizzy Stitch Bird comics."""
5142
    # Also on http://tizzystitchbird.com
5143
    # Also on http://tizzystitchbird.tumblr.com
5144
    # Also on http://www.webtoons.com/en/challenge/tizzy-stitchbird/list?title_no=50082
5145
    name = 'tizzy-tapa'
5146
    long_name = 'Tizzy Stitch Bird (from Tapastic)'
5147
    url = 'https://tapastic.com/series/TizzyStitchbird'
5148
5149
5150
class RockPaperCynicTapa(GenericTapasticComic):
5151
    """Class to retrieve RockPaperCynic comics."""
5152
    # Also on http://www.rockpapercynic.com
5153
    # Also on http://rockpapercynic.tumblr.com
5154
    name = 'rpc-tapa'
5155
    long_name = 'Rock Paper Cynic (from Tapastic)'
5156
    url = 'https://tapastic.com/series/rockpapercynic'
5157
5158
5159
class ItsTheTieTapa(GenericTapasticComic):
5160
    """Class to retrieve It's the tie comics."""
5161
    # Also on http://itsthetie.com
5162
    # Also on http://itsthetie.tumblr.com
5163
    name = 'tie-tapa'
5164
    long_name = "It's the tie (from Tapastic)"
5165
    url = "https://tapastic.com/series/itsthetie"
5166
    _categories = ('TIE', )
5167
5168
5169
def get_subclasses(klass):
5170
    """Gets the list of direct/indirect subclasses of a class"""
5171
    subclasses = klass.__subclasses__()
5172
    for derived in list(subclasses):
5173
        subclasses.extend(get_subclasses(derived))
5174
    return subclasses
5175
5176
5177
def remove_st_nd_rd_th_from_date(string):
5178
    """Function to transform 1st/2nd/3rd/4th in a parsable date format."""
5179
    # Hackish way to convert string with numeral "1st"/"2nd"/etc to date
5180
    return (string.replace('st', '')
5181
            .replace('nd', '')
5182
            .replace('rd', '')
5183
            .replace('th', '')
5184
            .replace('Augu', 'August'))
5185
5186
5187
def string_to_date(string, date_format, local=DEFAULT_LOCAL):
5188
    """Function to convert string to date object.
5189
    Wrapper around datetime.datetime.strptime."""
5190
    # format described in https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior
5191
    prev_locale = locale.setlocale(locale.LC_ALL)
5192
    if local != prev_locale:
5193
        locale.setlocale(locale.LC_ALL, local)
5194
    ret = datetime.datetime.strptime(string, date_format).date()
5195
    if local != prev_locale:
5196
        locale.setlocale(locale.LC_ALL, prev_locale)
5197
    return ret
5198
5199
5200
COMICS = set(get_subclasses(GenericComic))
5201
VALID_COMICS = [c for c in COMICS if c.name is not None]
5202
COMIC_NAMES = {c.name: c for c in VALID_COMICS}
5203
assert len(VALID_COMICS) == len(COMIC_NAMES)
5204
CLASS_NAMES = {c.__name__ for c in VALID_COMICS}
5205
assert len(VALID_COMICS) == len(CLASS_NAMES)
5206