Completed
Push — master ( 3c1345...c91874 )
by De
01:06
created

comics.py (28 issues)

1
#! /usr/bin/python3
2
# vim: set expandtab tabstop=4 shiftwidth=4 :
3
"""Module to retrieve webcomics"""
4
5
from comic_abstract import GenericComic, get_date_for_comic
6
import re
7
from datetime import date, timedelta
8
import datetime
9
from urlfunctions import get_soup_at_url, urljoin_wrapper,\
10
    convert_iri_to_plain_ascii_uri, load_json_at_url, urlopen_wrapper
11
import json
12
import locale
13
import urllib
14
15
DEFAULT_LOCAL = 'en_GB.UTF-8'
16
17
18
class Xkcd(GenericComic):
19
    """Class to retrieve Xkcd comics."""
20
    name = 'xkcd'
21
    long_name = 'xkcd'
22
    url = 'http://xkcd.com'
23
24
    @classmethod
25
    def get_next_comic(cls, last_comic):
26
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
27
        first_num = last_comic['num'] if last_comic else 0
28
        last_num = load_json_at_url(
29
            urljoin_wrapper(cls.url, 'info.0.json'))['num']
30
31
        for num in range(first_num + 1, last_num + 1):
32
            if num != 404:
33
                json_url = urljoin_wrapper(cls.url, '%d/info.0.json' % num)
34
                comic = load_json_at_url(json_url)
35
                comic['img'] = [comic['img']]
36
                comic['prefix'] = '%d-' % num
37
                comic['json_url'] = json_url
38
                comic['url'] = urljoin_wrapper(cls.url, str(num))
39
                comic['day'] = int(comic['day'])
40
                comic['month'] = int(comic['month'])
41
                comic['year'] = int(comic['year'])
42
                assert comic['num'] == num
43
                yield comic
44
45
46
# Helper functions corresponding to get_url_from_link/get_url_from_archive_element
47
48
49
@classmethod
50
def get_href(cls, link):
51
    """Implementation of get_url_from_link/get_url_from_archive_element."""
52
    return link['href']
53
54
55
@classmethod
56
def join_cls_url_to_href(cls, link):
57
    """Implementation of get_url_from_link/get_url_from_archive_element."""
58
    return urljoin_wrapper(cls.url, link['href'])
59
60
61
class GenericNavigableComic(GenericComic):
62
    """Generic class for "navigable" comics : with first/next arrows.
63
64
    This class applies to comic where previous and next comics can be
65
    accessed from a given comic. Once given a starting point (either
66
    the first comic or the last comic retrieved), it will handle the
67
    navigation, the retrieval of the soup object and the setting of
68
    the 'url' attribute on retrieved comics. This limits a lot the
69
    amount of boilerplate code in the different implementation classes.
70
71
    The method `get_next_comic` methods is implemented in terms of new
72
    more specialized methods to be implemented/overridden:
73
        - get_first_comic_link
74
        - get_navi_link
75
        - get_comic_info
76
        - get_url_from_link
77
    """
78
    _categories = ('NAVIGABLE', )
79
80
    @classmethod
81
    def get_first_comic_link(cls):
82
        """Get link to first comics.
83
84
        Sometimes this can be retrieved of any comic page, sometimes on
85
        the archive page, sometimes it doesn't exist at all and one has
86
        to iterate backward to find it before hardcoding the result found.
87
        """
88
        raise NotImplementedError
89
90
    @classmethod
91
    def get_navi_link(cls, last_soup, next_):
92
        """Get link to next (or previous - for dev purposes) comic."""
93
        raise NotImplementedError
94
95
    @classmethod
96
    def get_comic_info(cls, soup, link):
97
        """Get information about a particular comics."""
98
        raise NotImplementedError
99
100
    @classmethod
101
    def get_url_from_link(cls, link):
102
        """Get url corresponding to a link. Default implementation is similar to get_href."""
103
        return link['href']
104
105
    @classmethod
106
    def get_next_link(cls, last_soup):
107
        """Get link to next comic."""
108
        link = cls.get_navi_link(last_soup, True)
109
        cls.log("Next link is %s" % link)
110
        return link
111
112
    @classmethod
113
    def get_prev_link(cls, last_soup):
114
        """Get link to previous comic."""
115
        link = cls.get_navi_link(last_soup, False)
116
        cls.log("Prev link is %s" % link)
117
        return link
118
119
    @classmethod
120
    def get_next_comic(cls, last_comic):
121
        """Generic implementation of get_next_comic for navigable comics."""
122
        url = last_comic['url'] if last_comic else None
123
        cls.log("starting 'get_next_comic' from %s" % url)
124
        next_comic = \
125
            cls.get_next_link(get_soup_at_url(url)) \
126
            if url else \
127
            cls.get_first_comic_link()
128
        cls.log("next/first comic will be %s (url is %s)" % (str(next_comic), url))
129
        # cls.check_navigation(url)
130
        while next_comic:
131
            prev_url, url = url, cls.get_url_from_link(next_comic)
132
            if prev_url == url:
133
                cls.log("got same url %s" % url)
134
                break
135
            cls.log("about to get %s (%s)" % (url, str(next_comic)))
136
            soup = get_soup_at_url(url)
137
            comic = cls.get_comic_info(soup, next_comic)
138
            if comic is not None:
139
                assert 'url' not in comic
140
                comic['url'] = url
141
                yield comic
142
            next_comic = cls.get_next_link(soup)
143
            cls.log("next comic will be %s" % str(next_comic))
144
145
    @classmethod
146
    def check_first_link(cls):
147
        """Check that navigation to first comic seems to be working - for dev purposes."""
148
        cls.log("about to check first link")
149
        ok = True
150
        firstlink = cls.get_first_comic_link()
151
        if firstlink is None:
152
            print("From %s : no first link" % cls.url)
153
            ok = False
154
        else:
155
            firsturl = cls.get_url_from_link(firstlink)
156
            try:
157
                get_soup_at_url(firsturl)
158
            except urllib.error.HTTPError:
159
                print("From %s : invalid first url" % cls.url)
160
                ok = False
161
        cls.log("checked first link -> returned %d" % ok)
162
        return ok
163
164
    @classmethod
165
    def check_prev_next_links(cls, url):
166
        """Check that navigation to prev/next from a given URL seems to be working - for dev purposes."""
167
        cls.log("about to check prev/next from %s" % url)
168
        ok = True
169
        if url is None:
170
            prevlink, nextlink = None, None
171
        else:
172
            soup = get_soup_at_url(url)
173
            prevlink, nextlink = cls.get_prev_link(soup), cls.get_next_link(soup)
174
        if prevlink is None and nextlink is None:
175
            print("From %s : no previous nor next" % url)
176
            ok = False
177
        else:
178
            if prevlink:
179
                prevurl = cls.get_url_from_link(prevlink)
180
                prevsoup = get_soup_at_url(prevurl)
181
                prevnextlink = cls.get_next_link(prevsoup)
182
                prevnext = cls.get_url_from_link(prevnextlink) if prevnextlink is not None else "NO URL"
183
                if prevnext != url:
184
                    print("From %s, going backward then forward leads to %s" % (url, prevnext))
185
                    ok = False
186
            if nextlink:
187
                nexturl = cls.get_url_from_link(nextlink)
188
                if nexturl != url:
189
                    nextsoup = get_soup_at_url(nexturl)
190
                    nextprevlink = cls.get_prev_link(nextsoup)
191
                    nextprev = cls.get_url_from_link(nextprevlink) if nextprevlink is not None else "NO URL"
192
                    if nextprev != url:
193
                        print("From %s, going forward then backward leads to %s" % (url, nextprev))
194
                        ok = False
195
        cls.log("checked prev/next from %s -> returned %d" % (url, ok))
196
        return ok
197
198
    @classmethod
199
    def check_navigation(cls, url):
200
        """Check that navigation functions seem to be working - for dev purposes."""
201
        cls.log("about to check navigation from %s" % url)
202
        first = cls.check_first_link()
203
        prevnext = cls.check_prev_next_links(url)
204
        ok = first and prevnext
205
        cls.log("checked navigation from %s -> returned %d" % (url, ok))
206
        return ok
207
208
209
class GenericListableComic(GenericComic):
210
    """Generic class for "listable" comics : with a list of comics (aka 'archive')
211
212
    The method `get_next_comic` methods is implemented in terms of new
213
    more specialized methods to be implemented/overridden:
214
        - get_archive_elements
215
        - get_url_from_archive_element
216
        - get_comic_info
217
    """
218
    _categories = ('LISTABLE', )
219
220
    @classmethod
221
    def get_archive_elements(cls):
222
        """Get the archive elements (iterable)."""
223
        raise NotImplementedError
224
225
    @classmethod
226
    def get_url_from_archive_element(cls, archive_elt):
227
        """Get url corresponding to an archive element."""
228
        raise NotImplementedError
229
230
    @classmethod
231
    def get_comic_info(cls, soup, archive_elt):
232
        """Get information about a particular comics."""
233
        raise NotImplementedError
234
235
    @classmethod
236
    def get_next_comic(cls, last_comic):
237
        """Generic implementation of get_next_comic for listable comics."""
238
        waiting_for_url = last_comic['url'] if last_comic else None
239
        archive_elts = list(cls.get_archive_elements())
240
        for archive_elt in archive_elts:
241
            url = cls.get_url_from_archive_element(archive_elt)
242
            cls.log("considering %s" % url)
243
            if waiting_for_url is None:
244
                cls.log("about to get %s (%s)" % (url, str(archive_elt)))
245
                soup = get_soup_at_url(url)
246
                comic = cls.get_comic_info(soup, archive_elt)
247
                if comic is not None:
248
                    assert 'url' not in comic
249
                    comic['url'] = url
250
                    yield comic
251
            elif waiting_for_url == url:
252
                waiting_for_url = None
253
        if waiting_for_url is not None:
254
            print("Did not find %s in the %d comics: there might be a problem" %
255
                  (waiting_for_url, len(archive_elts)))
256
257
# Helper functions corresponding to get_first_comic_link/get_navi_link
258
259
260
@classmethod
261
def get_link_rel_next(cls, last_soup, next_):
262
    """Implementation of get_navi_link."""
263
    return last_soup.find('link', rel='next' if next_ else 'prev')
264
265
266
@classmethod
267
def get_a_rel_next(cls, last_soup, next_):
268
    """Implementation of get_navi_link."""
269
    return last_soup.find('a', rel='next' if next_ else 'prev')
270
271
272
@classmethod
273
def get_a_navi_navinext(cls, last_soup, next_):
274
    """Implementation of get_navi_link."""
275
    return last_soup.find('a', class_='navi navi-next' if next_ else 'navi navi-prev')
276
277
278
@classmethod
279
def get_a_navi_comicnavnext_navinext(cls, last_soup, next_):
280
    """Implementation of get_navi_link."""
281
    return last_soup.find('a', class_='navi comic-nav-next navi-next' if next_ else 'navi comic-nav-previous navi-prev')
282
283
284
@classmethod
285
def get_a_comicnavbase_comicnavnext(cls, last_soup, next_):
286
    """Implementation of get_navi_link."""
287
    return last_soup.find('a', class_='comic-nav-base comic-nav-next' if next_ else 'comic-nav-base comic-nav-previous')
288
289
290
@classmethod
291
def get_a_navi_navifirst(cls):
292
    """Implementation of get_first_comic_link."""
293
    return get_soup_at_url(cls.url).find('a', class_='navi navi-first')
294
295
296
@classmethod
297
def get_div_navfirst_a(cls):
298
    """Implementation of get_first_comic_link."""
299
    return get_soup_at_url(cls.url).find('div', class_="nav-first").find('a')
300
301
302
@classmethod
303
def get_a_comicnavbase_comicnavfirst(cls):
304
    """Implementation of get_first_comic_link."""
305
    return get_soup_at_url(cls.url).find('a', class_='comic-nav-base comic-nav-first')
306
307
308
@classmethod
309
def simulate_first_link(cls):
310
    """Implementation of get_first_comic_link creating a link-like object from
311
    an URL provided by the class.
312
313
    Note: The first URL can easily be found using :
314
    `get_first_comic_link = navigate_to_first_comic`.
315
    """
316
    return {'href': cls.first_url}
317
318
319
@classmethod
320
def navigate_to_first_comic(cls):
321
    """Implementation of get_first_comic_link navigating from a user provided
322
    URL to the first comic.
323
324
    Sometimes, the first comic cannot be reached directly so to start
325
    from the first comic one has to go to the previous comic until
326
    there is no previous comics. Once this URL is reached, it
327
    is better to hardcode it but for development purposes, it
328
    is convenient to have an automatic way to find it.
329
330
    Then, the URL found can easily be used via `simulate_first_link`.
331
    """
332
    url = input("Get starting URL: ")
333
    print(url)
334
    comic = cls.get_prev_link(get_soup_at_url(url))
335
    while comic:
336
        url = cls.get_url_from_link(comic)
337
        print(url)
338
        comic = cls.get_prev_link(get_soup_at_url(url))
339
    return {'href': url}
340
341
342
class GenericEmptyComic(GenericComic):
343
    """Generic class for comics where nothing is to be done.
344
345
    It can be useful to deactivate temporarily comics that do not work
346
    properly by replacing `def MyComic(GenericWhateverComic)` with
347
    `def MyComic(GenericEmptyComic, GenericWhateverComic)`."""
348
    _categories = ('EMPTY', )
349
350
    @classmethod
351
    def get_next_comic(cls, last_comic):
352
        """Implementation of get_next_comic returning no comics."""
353
        cls.log("comic is considered as empty - returning no comic")
354
        return []
355
356
357 View Code Duplication
class ExtraFabulousComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
358
    """Class to retrieve Extra Fabulous Comics."""
359
    name = 'efc'
360
    long_name = 'Extra Fabulous Comics'
361
    url = 'http://extrafabulouscomics.com'
362
    get_first_comic_link = get_a_navi_navifirst
363
    get_navi_link = get_link_rel_next
364
365
    @classmethod
366
    def get_comic_info(cls, soup, link):
367
        """Get information about a particular comics."""
368
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
369
        imgs = soup.find_all('img', src=img_src_re)
370
        title = soup.find('meta', property='og:title')['content']
371
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
372
        day = string_to_date(date_str, "%Y-%m-%d")
373
        return {
374
            'title': title,
375
            'img': [i['src'] for i in imgs],
376
            'month': day.month,
377
            'year': day.year,
378
            'day': day.day,
379
            'prefix': title + '-'
380
        }
381
382
383
class GenericLeMondeBlog(GenericNavigableComic):
384
    """Generic class to retrieve comics from Le Monde blogs."""
385
    _categories = ('LEMONDE', 'FRANCAIS')
386
    get_navi_link = get_link_rel_next
387
    get_first_comic_link = simulate_first_link
388
    first_url = NotImplemented
389
390
    @classmethod
391
    def get_comic_info(cls, soup, link):
392
        """Get information about a particular comics."""
393
        url2 = soup.find('link', rel='shortlink')['href']
394
        title = soup.find('meta', property='og:title')['content']
395
        date_str = soup.find("span", class_="entry-date").string
396
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
397
        imgs = soup.find_all('meta', property='og:image')
398
        return {
399
            'title': title,
400
            'url2': url2,
401
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
402
            'month': day.month,
403
            'year': day.year,
404
            'day': day.day,
405
        }
406
407
408
class ZepWorld(GenericLeMondeBlog):
409
    """Class to retrieve Zep World comics."""
410
    name = "zep"
411
    long_name = "Zep World"
412
    url = "http://zepworld.blog.lemonde.fr"
413
    first_url = "http://zepworld.blog.lemonde.fr/2014/10/31/bientot-le-blog-de-zep/"
414
415
416
class Vidberg(GenericLeMondeBlog):
417
    """Class to retrieve Vidberg comics."""
418
    name = 'vidberg'
419
    long_name = "Vidberg - l'actu en patates"
420
    url = "http://vidberg.blog.lemonde.fr"
421
    # Not the first but I didn't find an efficient way to retrieve it
422
    first_url = "http://vidberg.blog.lemonde.fr/2012/02/09/revue-de-campagne-la-campagne-du-modem-semballe/"
423
424
425
class Plantu(GenericLeMondeBlog):
426
    """Class to retrieve Plantu comics."""
427
    name = 'plantu'
428
    long_name = "Plantu"
429
    url = "http://plantu.blog.lemonde.fr"
430
    first_url = "http://plantu.blog.lemonde.fr/2014/10/28/stress-test-a-bruxelles/"
431
432
433
class XavierGorce(GenericLeMondeBlog):
434
    """Class to retrieve Xavier Gorce comics."""
435
    name = 'gorce'
436
    long_name = "Xavier Gorce"
437
    url = "http://xaviergorce.blog.lemonde.fr"
438
    first_url = "http://xaviergorce.blog.lemonde.fr/2015/01/09/distinction/"
439
440
441
class CartooningForPeace(GenericLeMondeBlog):
442
    """Class to retrieve Cartooning For Peace comics."""
443
    name = 'forpeace'
444
    long_name = "Cartooning For Peace"
445
    url = "http://cartooningforpeace.blog.lemonde.fr"
446
    first_url = "http://cartooningforpeace.blog.lemonde.fr/2014/12/15/bado/"
447
448
449
class Aurel(GenericLeMondeBlog):
450
    """Class to retrieve Aurel comics."""
451
    name = 'aurel'
452
    long_name = "Aurel"
453
    url = "http://aurel.blog.lemonde.fr"
454
    first_url = "http://aurel.blog.lemonde.fr/2014/09/29/le-senat-repasse-a-droite/"
455
456
457
class LesCulottees(GenericLeMondeBlog):
458
    """Class to retrieve Les Culottees comics."""
459
    name = 'culottees'
460
    long_name = 'Les Culottees'
461
    url = "http://lesculottees.blog.lemonde.fr"
462
    first_url = "http://lesculottees.blog.lemonde.fr/2016/01/11/clementine-delait-femme-a-barbe/"
463
464
465
class UneAnneeAuLycee(GenericLeMondeBlog):
466
    """Class to retrieve Une Annee Au Lycee comics."""
467
    name = 'lycee'
468
    long_name = 'Une Annee au Lycee'
469
    url = 'http://uneanneeaulycee.blog.lemonde.fr'
470
    first_url = "http://uneanneeaulycee.blog.lemonde.fr/2016/06/13/la-semaine-du-bac-est-arrivee/"
471
472
473
class Rall(GenericNavigableComic):
474
    """Class to retrieve Ted Rall comics."""
475
    # Also on http://www.gocomics.com/tedrall
476
    name = 'rall'
477
    long_name = "Ted Rall"
478
    url = "http://rall.com/comic"
479
    _categories = ('RALL', )
480
    get_navi_link = get_link_rel_next
481
    get_first_comic_link = simulate_first_link
482
    # Not the first but I didn't find an efficient way to retrieve it
483
    first_url = "http://rall.com/2014/01/30/los-angeles-times-cartoon-well-miss-those-california-flowers"
484
485
    @classmethod
486
    def get_comic_info(cls, soup, link):
487
        """Get information about a particular comics."""
488
        title = soup.find('meta', property='og:title')['content']
489
        author = soup.find("span", class_="author vcard").find("a").string
490
        date_str = soup.find("span", class_="entry-date").string
491
        day = string_to_date(date_str, "%B %d, %Y")
492
        desc = soup.find('meta', property='og:description')['content']
493
        imgs = soup.find('div', class_='entry-content').find_all('img')
494
        imgs = imgs[:-7]  # remove social media buttons
495
        return {
496
            'title': title,
497
            'author': author,
498
            'month': day.month,
499
            'year': day.year,
500
            'day': day.day,
501
            'description': desc,
502
            'img': [i['src'] for i in imgs],
503
        }
504
505
506
class Dilem(GenericNavigableComic):
507
    """Class to retrieve Ali Dilem comics."""
508
    name = 'dilem'
509
    long_name = 'Ali Dilem'
510
    url = 'http://information.tv5monde.com/dilem'
511
    _categories = ('FRANCAIS', )
512
    get_url_from_link = join_cls_url_to_href
513
    get_first_comic_link = simulate_first_link
514
    first_url = "http://information.tv5monde.com/dilem/2004-06-26"
515
516
    @classmethod
517
    def get_navi_link(cls, last_soup, next_):
518
        """Get link to next or previous comic."""
519
        # prev is next / next is prev
520
        li = last_soup.find('li', class_='prev' if next_ else 'next')
521
        return li.find('a') if li else None
522
523 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
524
    def get_comic_info(cls, soup, link):
525
        """Get information about a particular comics."""
526
        short_url = soup.find('link', rel='shortlink')['href']
527
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
528
        imgs = soup.find_all('meta', property='og:image')
529
        date_str = soup.find('span', property='dc:date')['content']
530
        date_str = date_str[:10]
531
        day = string_to_date(date_str, "%Y-%m-%d")
532
        return {
533
            'short_url': short_url,
534
            'title': title,
535
            'img': [i['content'] for i in imgs],
536
            'day': day.day,
537
            'month': day.month,
538
            'year': day.year,
539
        }
540
541
542
class SpaceAvalanche(GenericNavigableComic):
543
    """Class to retrieve Space Avalanche comics."""
544
    name = 'avalanche'
545
    long_name = 'Space Avalanche'
546
    url = 'http://www.spaceavalanche.com'
547
    get_navi_link = get_link_rel_next
548
549
    @classmethod
550
    def get_first_comic_link(cls):
551
        """Get link to first comics."""
552
        return {'href': "http://www.spaceavalanche.com/2009/02/02/irish-sea/", 'title': "Irish Sea"}
553
554
    @classmethod
555
    def get_comic_info(cls, soup, link):
556
        """Get information about a particular comics."""
557
        url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
558
        title = link['title']
559
        url = cls.get_url_from_link(link)
560
        year, month, day = [int(s)
561
                            for s in url_date_re.match(url).groups()]
562
        imgs = soup.find("div", class_="entry").find_all("img")
563
        return {
564
            'title': title,
565
            'day': day,
566
            'month': month,
567
            'year': year,
568
            'img': [i['src'] for i in imgs],
569
        }
570
571
572
class ZenPencils(GenericEmptyComic, GenericNavigableComic):
573
    """Class to retrieve ZenPencils comics."""
574
    # Also on http://zenpencils.tumblr.com
575
    # Also on http://www.gocomics.com/zen-pencils
576
    name = 'zenpencils'
577
    long_name = 'Zen Pencils'
578
    url = 'http://zenpencils.com'
579
    _categories = ('ZENPENCILS', )
580
    get_navi_link = get_link_rel_next
581
    get_first_comic_link = simulate_first_link
582
    first_url = "http://zenpencils.com/comic/1-ralph-waldo-emerson-make-them-cry/"
583
584
    @classmethod
585
    def get_comic_info(cls, soup, link):
586
        """Get information about a particular comics."""
587
        imgs = soup.find('div', id='comic').find_all('img')
588
        # imgs2 = soup.find_all('meta', property='og:image')
589
        post = soup.find('div', class_='post-content')
590
        author = post.find("span", class_="post-author").find("a").string
591
        title = soup.find('meta', property='og:title')['content']
592
        date_str = post.find('span', class_='post-date').string
593
        day = string_to_date(date_str, "%B %d, %Y")
594
        assert imgs
595
        assert all(i['alt'] == i['title'] for i in imgs)
596
        assert all(i['alt'] in (title, "") for i in imgs)
597
        desc = soup.find('meta', property='og:description')['content']
598
        return {
599
            'title': title,
600
            'description': desc,
601
            'author': author,
602
            'day': day.day,
603
            'month': day.month,
604
            'year': day.year,
605
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
606
        }
607
608
609
class ItsTheTie(GenericEmptyComic, GenericNavigableComic):
610
    """Class to retrieve It's the tie comics."""
611
    # Also on http://itsthetie.tumblr.com
612
    # Also on https://tapastic.com/series/itsthetie
613
    name = 'tie'
614
    long_name = "It's the tie"
615
    url = "http://itsthetie.com"
616
    _categories = ('TIE', )
617
    get_first_comic_link = get_div_navfirst_a
618
    get_navi_link = get_a_rel_next
619
620
    @classmethod
621
    def get_comic_info(cls, soup, link):
622
        """Get information about a particular comics."""
623
        title = soup.find('h1', class_='comic-title').find('a').string
624
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
625
        day = string_to_date(date_str, "%B %d, %Y")
626
        # Bonus images may or may not be in meta og:image.
627
        imgs = soup.find_all('meta', property='og:image')
628
        imgs_src = [i['content'] for i in imgs]
629
        bonus = soup.find_all('img', attrs={'data-oversrc': True})
630
        bonus_src = [b['data-oversrc'] for b in bonus]
631
        all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src]
632
        all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")]
633
        tag_meta = soup.find('meta', property='article:tag')
634
        tags = tag_meta['content'] if tag_meta else ""
635
        return {
636
            'title': title,
637
            'month': day.month,
638
            'year': day.year,
639
            'day': day.day,
640
            'img': all_imgs_src,
641
            'tags': tags,
642
        }
643
644
645
class PenelopeBagieu(GenericNavigableComic):
646
    """Class to retrieve comics from Penelope Bagieu's blog."""
647
    name = 'bagieu'
648
    long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
649
    url = 'http://www.penelope-jolicoeur.com'
650
    _categories = ('FRANCAIS', )
651
    get_navi_link = get_link_rel_next
652
    get_first_comic_link = simulate_first_link
653
    first_url = 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'
654
655
    @classmethod
656
    def get_comic_info(cls, soup, link):
657
        """Get information about a particular comics."""
658
        date_str = soup.find('h2', class_='date-header').string
659
        day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
660
        imgs = soup.find('div', class_='entry-body').find_all('img')
661
        title = soup.find('h3', class_='entry-header').string
662
        return {
663
            'title': title,
664
            'img': [i['src'] for i in imgs],
665
            'month': day.month,
666
            'year': day.year,
667
            'day': day.day,
668
        }
669
670
671
class OneOneOneOneComic(GenericNavigableComic):
672
    """Class to retrieve 1111 Comics."""
673
    # Also on http://comics1111.tumblr.com
674
    # Also on https://tapastic.com/series/1111-Comics
675
    name = '1111'
676
    long_name = '1111 Comics'
677
    url = 'http://www.1111comics.me'
678
    _categories = ('ONEONEONEONE', )
679
    get_first_comic_link = get_div_navfirst_a
680
    get_navi_link = get_link_rel_next
681
682
    @classmethod
683
    def get_comic_info(cls, soup, link):
684
        """Get information about a particular comics."""
685
        title = soup.find('h1', class_='comic-title').find('a').string
686
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
687
        day = string_to_date(date_str, "%B %d, %Y")
688
        imgs = soup.find_all('meta', property='og:image')
689
        return {
690
            'title': title,
691
            'month': day.month,
692
            'year': day.year,
693
            'day': day.day,
694
            'img': [i['content'] for i in imgs],
695
        }
696
697
698
class AngryAtNothing(GenericEmptyComic, GenericNavigableComic):
699
    """Class to retrieve Angry at Nothing comics."""
700
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
701
    # Also on http://angryatnothing.tumblr.com
702
    name = 'angry'
703
    long_name = 'Angry At Nothing'
704
    url = 'http://www.angryatnothing.net'
705
    get_first_comic_link = get_div_navfirst_a
706
    get_navi_link = get_a_rel_next
707
708
    @classmethod
709
    def get_comic_info(cls, soup, link):
710
        """Get information about a particular comics."""
711
        title = soup.find('h1', class_='comic-title').find('a').string
712
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
713
        day = string_to_date(date_str, "%B %d, %Y")
714
        imgs = soup.find_all('meta', property='og:image')
715
        return {
716
            'title': title,
717
            'month': day.month,
718
            'year': day.year,
719
            'day': day.day,
720
            'img': [i['content'] for i in imgs],
721
        }
722
723
724
class NeDroid(GenericNavigableComic):
725
    """Class to retrieve NeDroid comics."""
726
    name = 'nedroid'
727
    long_name = 'NeDroid'
728
    url = 'http://nedroid.com'
729
    get_first_comic_link = get_div_navfirst_a
730 View Code Duplication
    get_navi_link = get_link_rel_next
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
731
    get_url_from_link = join_cls_url_to_href
732
733
    @classmethod
734
    def get_comic_info(cls, soup, link):
735
        """Get information about a particular comics."""
736
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
737
        short_url = cls.get_url_from_link(soup.find('link', rel='shortlink'))
738
        num = int(short_url_re.match(short_url).groups()[0])
739
        imgs = soup.find('div', id='comic').find_all('img')
740
        assert len(imgs) == 1
741
        title = imgs[0]['alt']
742
        title2 = imgs[0]['title']
743
        return {
744
            'short_url': short_url,
745
            'title': title,
746
            'title2': title2,
747
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
748
            'num': num,
749
        }
750
751
752
class Garfield(GenericNavigableComic):
753
    """Class to retrieve Garfield comics."""
754
    # Also on http://www.gocomics.com/garfield
755
    name = 'garfield'
756
    long_name = 'Garfield'
757
    url = 'https://garfield.com'
758
    _categories = ('GARFIELD', )
759
    get_first_comic_link = simulate_first_link
760
    first_url = 'https://garfield.com/comic/1978/06/19'
761
762
    @classmethod
763
    def get_navi_link(cls, last_soup, next_):
764
        """Get link to next or previous comic."""
765
        return last_soup.find('a', class_='comic-arrow-right' if next_ else 'comic-arrow-left')
766
767
    @classmethod
768
    def get_comic_info(cls, soup, link):
769
        """Get information about a particular comics."""
770
        url = cls.get_url_from_link(link)
771
        date_re = re.compile('^%s/comic/([0-9]*)/([0-9]*)/([0-9]*)' % cls.url)
772
        year, month, day = [int(s) for s in date_re.match(url).groups()]
773
        imgs = soup.find('div', class_='comic-display').find_all('img', class_='img-responsive')
774
        return {
775
            'month': month,
776
            'year': year,
777
            'day': day,
778
            'img': [i['src'] for i in imgs],
779
        }
780
781
782 View Code Duplication
class Dilbert(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
783
    """Class to retrieve Dilbert comics."""
784
    # Also on http://www.gocomics.com/dilbert-classics
785
    name = 'dilbert'
786
    long_name = 'Dilbert'
787
    url = 'http://dilbert.com'
788
    get_url_from_link = join_cls_url_to_href
789
    get_first_comic_link = simulate_first_link
790
    first_url = 'http://dilbert.com/strip/1989-04-16'
791
792
    @classmethod
793
    def get_navi_link(cls, last_soup, next_):
794
        """Get link to next or previous comic."""
795
        link = last_soup.find('div', class_='nav-comic nav-right' if next_ else 'nav-comic nav-left')
796
        return link.find('a') if link else None
797
798
    @classmethod
799
    def get_comic_info(cls, soup, link):
800
        """Get information about a particular comics."""
801
        title = soup.find('meta', property='og:title')['content']
802
        imgs = soup.find_all('meta', property='og:image')
803
        desc = soup.find('meta', property='og:description')['content']
804
        date_str = soup.find('meta', property='article:publish_date')['content']
805
        day = string_to_date(date_str, "%B %d, %Y")
806
        author = soup.find('meta', property='article:author')['content']
807
        tags = soup.find('meta', property='article:tag')['content']
808
        return {
809
            'title': title,
810
            'description': desc,
811
            'img': [i['content'] for i in imgs],
812
            'author': author,
813
            'tags': tags,
814
            'day': day.day,
815
            'month': day.month,
816
            'year': day.year
817
        }
818
819
820
class VictimsOfCircumsolar(GenericEmptyComic, GenericNavigableComic):
821
    """Class to retrieve VictimsOfCircumsolar comics."""
822
    # Also on https://victimsofcomics.tumblr.com
823
    name = 'circumsolar'
824
    long_name = 'Victims Of Circumsolar'
825
    url = 'http://www.victimsofcircumsolar.com'
826
    get_navi_link = get_a_navi_comicnavnext_navinext
827
    get_first_comic_link = simulate_first_link
828
    first_url = 'http://www.victimsofcircumsolar.com/comic/modern-addiction'
829
830
    @classmethod
831
    def get_comic_info(cls, soup, link):
832
        """Get information about a particular comics."""
833
        # Date is on the archive page
834
        title = soup.find_all('meta', property='og:title')[-1]['content']
835
        desc = soup.find_all('meta', property='og:description')[-1]['content']
836
        imgs = soup.find('div', id='comic').find_all('img')
837
        assert all(i['title'] == i['alt'] == title for i in imgs)
838
        return {
839
            'title': title,
840
            'description': desc,
841
            'img': [i['src'] for i in imgs],
842
        }
843
844
845
class ThreeWordPhrase(GenericNavigableComic):
846
    """Class to retrieve Three Word Phrase comics."""
847
    # Also on http://www.threewordphrase.tumblr.com
848
    name = 'threeword'
849
    long_name = 'Three Word Phrase'
850
    url = 'http://threewordphrase.com'
851
    get_url_from_link = join_cls_url_to_href
852
853
    @classmethod
854
    def get_first_comic_link(cls):
855
        """Get link to first comics."""
856
        return get_soup_at_url(cls.url).find('img', src='/firstlink.gif').parent
857
858
    @classmethod
859
    def get_navi_link(cls, last_soup, next_):
860
        """Get link to next or previous comic."""
861
        link = last_soup.find('img', src='/nextlink.gif' if next_ else '/prevlink.gif').parent
862
        return None if link.get('href') is None else link
863
864
    @classmethod
865
    def get_comic_info(cls, soup, link):
866
        """Get information about a particular comics."""
867
        title = soup.find('title')
868
        imgs = [img for img in soup.find_all('img')
869
                if not img['src'].endswith(
870
                    ('link.gif', '32.png', 'twpbookad.jpg',
871
                     'merchad.jpg', 'header.gif', 'tipjar.jpg'))]
872
        return {
873
            'title': title.string if title else None,
874
            'title2': '  '.join(img.get('alt') for img in imgs if img.get('alt')),
875
            'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs],
876
        }
877
878
879
class DeadlyPanel(GenericEmptyComic, GenericNavigableComic):
880
    """Class to retrieve Deadly Panel comics."""
881
    # Also on https://tapastic.com/series/deadlypanel
882
    # Also on https://deadlypanel.tumblr.com
883
    name = 'deadly'
884
    long_name = 'Deadly Panel'
885
    url = 'http://www.deadlypanel.com'
886
    get_first_comic_link = get_a_navi_navifirst
887
    get_navi_link = get_a_navi_comicnavnext_navinext
888
889
    @classmethod
890
    def get_comic_info(cls, soup, link):
891
        """Get information about a particular comics."""
892
        imgs = soup.find('div', id='comic').find_all('img')
893
        assert all(i['alt'] == i['title'] for i in imgs)
894
        return {
895
            'img': [i['src'] for i in imgs],
896
        }
897
898
899
class TheGentlemanArmchair(GenericNavigableComic):
900
    """Class to retrieve The Gentleman Armchair comics."""
901
    name = 'gentlemanarmchair'
902
    long_name = 'The Gentleman Armchair'
903
    url = 'http://thegentlemansarmchair.com'
904
    get_first_comic_link = get_a_navi_navifirst
905
    get_navi_link = get_link_rel_next
906
907
    @classmethod
908
    def get_comic_info(cls, soup, link):
909
        """Get information about a particular comics."""
910
        title = soup.find('h2', class_='post-title').string
911
        author = soup.find("span", class_="post-author").find("a").string
912
        date_str = soup.find('span', class_='post-date').string
913
        day = string_to_date(date_str, "%B %d, %Y")
914
        imgs = soup.find('div', id='comic').find_all('img')
915
        return {
916
            'img': [i['src'] for i in imgs],
917
            'title': title,
918
            'author': author,
919
            'month': day.month,
920
            'year': day.year,
921
            'day': day.day,
922
        }
923
924
925
class ImogenQuest(GenericNavigableComic):
926
    """Class to retrieve Imogen Quest comics."""
927
    # Also on http://imoquest.tumblr.com
928
    name = 'imogen'
929
    long_name = 'Imogen Quest'
930
    url = 'http://imogenquest.net'
931
    get_first_comic_link = get_div_navfirst_a
932
    get_navi_link = get_a_rel_next
933
934
    @classmethod
935
    def get_comic_info(cls, soup, link):
936
        """Get information about a particular comics."""
937
        title = soup.find('h2', class_='post-title').string
938
        author = soup.find("span", class_="post-author").find("a").string
939
        date_str = soup.find('span', class_='post-date').string
940
        day = string_to_date(date_str, '%B %d, %Y')
941
        imgs = soup.find('div', class_='comicpane').find_all('img')
942
        assert all(i['alt'] == i['title'] for i in imgs)
943
        title2 = imgs[0]['title']
944
        return {
945
            'day': day.day,
946
            'month': day.month,
947
            'year': day.year,
948
            'img': [i['src'] for i in imgs],
949
            'title': title,
950
            'title2': title2,
951
            'author': author,
952
        }
953
954
955
class MyExtraLife(GenericNavigableComic):
956
    """Class to retrieve My Extra Life comics."""
957
    name = 'extralife'
958
    long_name = 'My Extra Life'
959
    url = 'http://www.myextralife.com'
960
    get_navi_link = get_link_rel_next
961
962
    @classmethod
963
    def get_first_comic_link(cls):
964
        """Get link to first comics."""
965
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
966
967
    @classmethod
968
    def get_comic_info(cls, soup, link):
969
        """Get information about a particular comics."""
970
        title = soup.find("h1", class_="comic_title").string
971
        date_str = soup.find("span", class_="comic_date").string
972
        day = string_to_date(date_str, "%B %d, %Y")
973
        imgs = soup.find_all("img", class_="comic")
974
        assert all(i['alt'] == i['title'] == title for i in imgs)
975
        return {
976
            'title': title,
977
            'img': [i['src'] for i in imgs if i["src"]],
978
            'day': day.day,
979
            'month': day.month,
980
            'year': day.year
981
        }
982
983
984
class SaturdayMorningBreakfastCereal(GenericNavigableComic):
985
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
986
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
987
    # Also on http://smbc-comics.tumblr.com
988
    name = 'smbc'
989
    long_name = 'Saturday Morning Breakfast Cereal'
990
    url = 'http://www.smbc-comics.com'
991
    _categories = ('SMBC', )
992
    get_navi_link = get_a_rel_next
993
994
    @classmethod
995
    def get_first_comic_link(cls):
996
        """Get link to first comics."""
997
        return get_soup_at_url(cls.url).find('a', rel='start')
998
999
    @classmethod
1000
    def get_comic_info(cls, soup, link):
1001
        """Get information about a particular comics."""
1002
        image1 = soup.find('img', id='cc-comic')
1003
        image_url1 = image1['src']
1004
        aftercomic = soup.find('div', id='aftercomic')
1005
        image_url2 = aftercomic.find('img')['src'] if aftercomic else ''
1006
        imgs = [image_url1] + ([image_url2] if image_url2 else [])
1007
        date_str = soup.find('div', class_='cc-publishtime').contents[0]
1008
        day = string_to_date(date_str, "%B %d, %Y")
1009
        return {
1010
            'title': image1['title'],
1011
            'img': [urljoin_wrapper(cls.url, i) for i in imgs],
1012
            'day': day.day,
1013
            'month': day.month,
1014
            'year': day.year
1015
        }
1016
1017
1018
class PerryBibleFellowship(GenericListableComic):  # Is now navigable too
1019
    """Class to retrieve Perry Bible Fellowship comics."""
1020
    name = 'pbf'
1021
    long_name = 'Perry Bible Fellowship'
1022
    url = 'http://pbfcomics.com'
1023
    get_url_from_archive_element = join_cls_url_to_href
1024
1025
    @classmethod
1026
    def get_archive_elements(cls):
1027
        soup = get_soup_at_url(cls.url)
1028
        thumbnails = soup.find('div', id='all_thumbnails')
1029
        return reversed(thumbnails.find_all('a'))
1030
1031
    @classmethod
1032
    def get_comic_info(cls, soup, link):
1033
        """Get information about a particular comics."""
1034
        name = soup.find('meta', property='og:title')['content']
1035
        imgs = soup.find_all('meta', property='og:image')
1036
        assert len(imgs) == 1
1037
        return {
1038
            'name': name,
1039
            'img': [i['content'] for i in imgs],
1040
        }
1041
1042
1043 View Code Duplication
class Mercworks(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1044
    """Class to retrieve Mercworks comics."""
1045
    # Also on http://mercworks.tumblr.com
1046
    name = 'mercworks'
1047
    long_name = 'Mercworks'
1048
    url = 'http://mercworks.net'
1049
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1050
    get_navi_link = get_link_rel_next
1051
1052
    @classmethod
1053
    def get_comic_info(cls, soup, link):
1054
        """Get information about a particular comics."""
1055
        title = soup.find('meta', property='og:title')['content']
1056
        metadesc = soup.find('meta', property='og:description')
1057
        desc = metadesc['content'] if metadesc else ""
1058
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
1059
        day = string_to_date(date_str, "%Y-%m-%d")
1060
        imgs = soup.find_all('meta', property='og:image')
1061
        return {
1062
            'img': [i['content'] for i in imgs],
1063
            'title': title,
1064
            'desc': desc,
1065
            'day': day.day,
1066
            'month': day.month,
1067
            'year': day.year
1068
        }
1069
1070
1071
class BerkeleyMews(GenericListableComic):
1072
    """Class to retrieve Berkeley Mews comics."""
1073
    # Also on http://mews.tumblr.com
1074
    # Also on http://www.gocomics.com/berkeley-mews
1075
    name = 'berkeley'
1076
    long_name = 'Berkeley Mews'
1077
    url = 'http://www.berkeleymews.com'
1078
    _categories = ('BERKELEY', )
1079
    get_url_from_archive_element = get_href
1080
    comic_num_re = re.compile('%s/\\?p=([0-9]*)$' % url)
1081
1082
    @classmethod
1083
    def get_archive_elements(cls):
1084
        archive_url = urljoin_wrapper(cls.url, "?page_id=2")
1085
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_num_re))
1086
1087
    @classmethod
1088
    def get_comic_info(cls, soup, link):
1089
        """Get information about a particular comics."""
1090
        comic_date_re = re.compile('.*/([0-9]*)-([0-9]*)-([0-9]*)-.*')
1091
        url = cls.get_url_from_archive_element(link)
1092
        num = int(cls.comic_num_re.match(url).groups()[0])
1093
        img = soup.find('div', id='comic').find('img')
1094
        assert all(i['alt'] == i['title'] for i in [img])
1095
        title2 = img['title']
1096
        img_url = img['src']
1097
        year, month, day = [int(s) for s in comic_date_re.match(img_url).groups()]
1098
        return {
1099
            'num': num,
1100
            'title': link.string,
1101
            'title2': title2,
1102
            'img': [img_url],
1103
            'year': year,
1104
            'month': month,
1105
            'day': day,
1106
        }
1107
1108
1109
class GenericBouletCorp(GenericNavigableComic):
1110
    """Generic class to retrieve BouletCorp comics in different languages."""
1111
    # Also on https://bouletcorp.tumblr.com
1112
    _categories = ('BOULET', )
1113
    get_navi_link = get_link_rel_next
1114
1115
    @classmethod
1116
    def get_first_comic_link(cls):
1117
        """Get link to first comics."""
1118
        return get_soup_at_url(cls.url).find('div', id='centered_nav').find_all('a')[0]
1119
1120
    @classmethod
1121
    def get_comic_info(cls, soup, link):
1122
        """Get information about a particular comics."""
1123
        url = cls.get_url_from_link(link)
1124
        date_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/' % cls.url)
1125
        year, month, day = [int(s) for s in date_re.match(url).groups()]
1126
        imgs = soup.find('div', id='notes').find('div', class_='storycontent').find_all('img')
1127
        texts = '  '.join(t for t in (i.get('title') for i in imgs) if t)
1128
        title = soup.find('title').string
1129
        return {
1130
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs if i.get('src') is not None],
1131
            'title': title,
1132
            'texts': texts,
1133
            'year': year,
1134
            'month': month,
1135
            'day': day,
1136
        }
1137
1138
1139
class BouletCorp(GenericBouletCorp):
1140
    """Class to retrieve BouletCorp comics."""
1141
    name = 'boulet'
1142
    long_name = 'Boulet Corp'
1143
    url = 'http://www.bouletcorp.com'
1144
    _categories = ('FRANCAIS', )
1145
1146
1147
class BouletCorpEn(GenericBouletCorp):
1148
    """Class to retrieve EnglishBouletCorp comics."""
1149
    name = 'boulet_en'
1150
    long_name = 'Boulet Corp English'
1151
    url = 'http://english.bouletcorp.com'
1152
1153
1154
class AmazingSuperPowers(GenericNavigableComic):
1155
    """Class to retrieve Amazing Super Powers comics."""
1156
    name = 'asp'
1157
    long_name = 'Amazing Super Powers'
1158
    url = 'http://www.amazingsuperpowers.com'
1159
    get_first_comic_link = get_a_navi_navifirst
1160
    get_navi_link = get_a_navi_navinext
1161
1162
    @classmethod
1163
    def get_comic_info(cls, soup, link):
1164
        """Get information about a particular comics."""
1165
        author = soup.find("span", class_="post-author").find("a").string
1166
        date_str = soup.find('span', class_='post-date').string
1167
        day = string_to_date(date_str, "%B %d, %Y")
1168
        imgs = soup.find('div', id='comic').find_all('img')
1169
        title = ' '.join(i['title'] for i in imgs)
1170
        assert all(i['alt'] == i['title'] for i in imgs)
1171
        return {
1172
            'title': title,
1173
            'author': author,
1174
            'img': [img['src'] for img in imgs],
1175
            'day': day.day,
1176
            'month': day.month,
1177
            'year': day.year
1178
        }
1179
1180
1181
class ToonHole(GenericNavigableComic):
1182
    """Class to retrieve Toon Holes comics."""
1183
    # Also on http://tapastic.com/series/TOONHOLE
1184
    name = 'toonhole'
1185
    long_name = 'Toon Hole'
1186
    url = 'http://www.toonhole.com'
1187
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1188
    get_navi_link = get_link_rel_next
1189
1190
    @classmethod
1191
    def get_comic_info(cls, soup, link):
1192
        """Get information about a particular comics."""
1193
        short_url = soup.find('link', rel='shortlink')['href']
1194
        date_str = soup.find('div', class_='entry-meta').contents[0].strip()
1195
        day = string_to_date(date_str, "%B %d, %Y")
1196
        imgs = soup.find('div', id='comic').find_all('img')
1197
        if imgs:
1198
            img = imgs[0]
1199
            title = img['alt']
1200
            assert img['title'] == title
1201
        else:
1202
            title = ""
1203
        return {
1204
            'short_url': short_url,
1205
            'title': title,
1206
            'month': day.month,
1207
            'year': day.year,
1208
            'day': day.day,
1209
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs],
1210
        }
1211
1212
1213
class Channelate(GenericNavigableComic):
1214
    """Class to retrieve Channelate comics."""
1215
    name = 'channelate'
1216
    long_name = 'Channelate'
1217
    url = 'http://www.channelate.com'
1218
    get_first_comic_link = get_div_navfirst_a
1219
    get_navi_link = get_link_rel_next
1220
    get_url_from_link = join_cls_url_to_href
1221
1222
    @classmethod
1223
    def get_comic_info(cls, soup, link):
1224
        """Get information about a particular comics."""
1225
        author = soup.find("span", class_="post-author").find("a").string
1226
        date_str = soup.find('span', class_='post-date').string
1227
        day = string_to_date(date_str, '%Y/%m/%d')
1228
        title = soup.find('meta', property='og:title')['content']
1229
        post = soup.find('div', id='comic')
1230
        imgs = post.find_all('img') if post else []
1231
        extra_url = None
1232
        extra_div = soup.find('div', id='extrapanelbutton')
1233
        if extra_div:
1234
            extra_url = extra_div.find('a')['href']
1235
            extra_soup = get_soup_at_url(extra_url)
1236
            extra_imgs = extra_soup.find_all('img', class_='extrapanelimage')
1237
            imgs.extend(extra_imgs)
1238
        return {
1239
            'url_extra': extra_url,
1240
            'title': title,
1241
            'author': author,
1242
            'month': day.month,
1243
            'year': day.year,
1244
            'day': day.day,
1245
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1246
        }
1247
1248
1249
class CyanideAndHappiness(GenericNavigableComic):
1250
    """Class to retrieve Cyanide And Happiness comics."""
1251
    name = 'cyanide'
1252
    long_name = 'Cyanide and Happiness'
1253
    url = 'http://explosm.net'
1254
    _categories = ('NSFW', )
1255
    get_url_from_link = join_cls_url_to_href
1256
1257
    @classmethod
1258
    def get_first_comic_link(cls):
1259
        """Get link to first comics."""
1260
        return get_soup_at_url(cls.url).find('a', title='Oldest comic')
1261
1262
    @classmethod
1263
    def get_navi_link(cls, last_soup, next_):
1264
        """Get link to next or previous comic."""
1265
        link = last_soup.find('a', class_='next-comic' if next_ else 'previous-comic ')
1266
        return None if link.get('href') is None else link
1267
1268
    @classmethod
1269
    def get_comic_info(cls, soup, link):
1270
        """Get information about a particular comics."""
1271
        url2 = soup.find('meta', property='og:url')['content']
1272
        num = int(url2.split('/')[-2])
1273
        date_str = soup.find('h3').find('a').string
1274
        day = string_to_date(date_str, '%Y.%m.%d')
1275
        author = soup.find('small', class_="author-credit-name").string
1276
        assert author.startswith('by ')
1277
        author = author[3:]
1278
        imgs = soup.find_all('img', id='main-comic')
1279
        return {
1280
            'num': num,
1281
            'author': author,
1282
            'month': day.month,
1283
            'year': day.year,
1284
            'day': day.day,
1285
            'prefix': '%d-' % num,
1286
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i['src'])) for i in imgs]
1287
        }
1288
1289
1290
class MrLovenstein(GenericComic):
1291
    """Class to retrieve Mr Lovenstein comics."""
1292
    # Also on https://tapastic.com/series/MrLovenstein
1293
    name = 'mrlovenstein'
1294
    long_name = 'Mr. Lovenstein'
1295
    url = 'http://www.mrlovenstein.com'
1296
1297
    @classmethod
1298
    def get_next_comic(cls, last_comic):
1299
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1300
        # TODO: more info from http://www.mrlovenstein.com/archive
1301
        comic_num_re = re.compile('^/comic/([0-9]*)$')
1302
        nums = [int(comic_num_re.match(link['href']).groups()[0])
1303
                for link in get_soup_at_url(cls.url).find_all('a', href=comic_num_re)]
1304
        first, last = min(nums), max(nums)
1305
        if last_comic:
1306
            first = last_comic['num'] + 1
1307
        for num in range(first, last + 1):
1308
            url = urljoin_wrapper(cls.url, '/comic/%d' % num)
1309
            soup = get_soup_at_url(url)
1310
            imgs = list(
1311
                reversed(soup.find_all('img', src=re.compile('^/images/comics/'))))
1312
            description = soup.find('meta', attrs={'name': 'description'})['content']
1313
            yield {
1314
                'url': url,
1315
                'num': num,
1316
                'texts': '  '.join(t for t in (i.get('title') for i in imgs) if t),
1317
                'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1318
                'description': description,
1319
            }
1320
1321
1322
class DinosaurComics(GenericListableComic):
1323
    """Class to retrieve Dinosaur Comics comics."""
1324
    name = 'dinosaur'
1325
    long_name = 'Dinosaur Comics'
1326
    url = 'http://www.qwantz.com'
1327
    get_url_from_archive_element = get_href
1328
    comic_link_re = re.compile('^%s/index.php\\?comic=([0-9]*)$' % url)
1329
1330
    @classmethod
1331
    def get_archive_elements(cls):
1332
        archive_url = urljoin_wrapper(cls.url, 'archive.php')
1333
        # first link is random -> skip it
1334
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re)[1:])
1335
1336
    @classmethod
1337
    def get_comic_info(cls, soup, link):
1338
        """Get information about a particular comics."""
1339
        url = cls.get_url_from_archive_element(link)
1340
        num = int(cls.comic_link_re.match(url).groups()[0])
1341
        date_str = link.string
1342
        text = link.next_sibling.string
1343
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1344
        comic_img_re = re.compile('^%s/comics/' % cls.url)
1345
        img = soup.find('img', src=comic_img_re)
1346
        return {
1347
            'month': day.month,
1348
            'year': day.year,
1349
            'day': day.day,
1350
            'img': [img.get('src')],
1351
            'title': img.get('title'),
1352
            'text': text,
1353
            'num': num,
1354
        }
1355
1356
1357
class ButterSafe(GenericListableComic):
1358
    """Class to retrieve Butter Safe comics."""
1359
    name = 'butter'
1360
    long_name = 'ButterSafe'
1361 View Code Duplication
    url = 'http://buttersafe.com'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1362
    get_url_from_archive_element = get_href
1363
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)
1364
1365
    @classmethod
1366
    def get_archive_elements(cls):
1367
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1368
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
1369
1370
    @classmethod
1371
    def get_comic_info(cls, soup, link):
1372
        """Get information about a particular comics."""
1373
        url = cls.get_url_from_archive_element(link)
1374
        title = link.string
1375
        year, month, day = [int(s) for s in cls.comic_link_re.match(url).groups()]
1376
        img = soup.find('div', id='comic').find('img')
1377
        assert img['alt'] == title
1378
        return {
1379
            'title': title,
1380
            'day': day,
1381
            'month': month,
1382
            'year': year,
1383
            'img': [img['src']],
1384
        }
1385
1386
1387
class CalvinAndHobbes(GenericComic):
1388
    """Class to retrieve Calvin and Hobbes comics."""
1389
    # Also on http://www.gocomics.com/calvinandhobbes/
1390
    name = 'calvin'
1391
    long_name = 'Calvin and Hobbes'
1392
    # This is not through any official webpage but eh...
1393
    url = 'http://marcel-oehler.marcellosendos.ch/comics/ch/'
1394
1395
    @classmethod
1396
    def get_next_comic(cls, last_comic):
1397
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1398
        last_date = get_date_for_comic(
1399
            last_comic) if last_comic else date(1985, 11, 1)
1400
        link_re = re.compile('^([0-9]*)/([0-9]*)/')
1401
        img_re = re.compile('')
1402
        for link in get_soup_at_url(cls.url).find_all('a', href=link_re):
1403
            url = link['href']
1404
            year, month = link_re.match(url).groups()
1405
            if date(int(year), int(month), 1) + timedelta(days=31) >= last_date:
1406
                img_re = re.compile('^%s%s([0-9]*)' % (year, month))
1407
                month_url = urljoin_wrapper(cls.url, url)
1408
                for img in get_soup_at_url(month_url).find_all('img', src=img_re):
1409
                    img_src = img['src']
1410
                    day = int(img_re.match(img_src).groups()[0])
1411
                    comic_date = date(int(year), int(month), day)
1412
                    if comic_date > last_date:
1413
                        yield {
1414
                            'url': month_url,
1415
                            'year': int(year),
1416
                            'month': int(month),
1417
                            'day': int(day),
1418
                            'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)],
1419
                        }
1420
                        last_date = comic_date
1421
1422
1423
class AbstruseGoose(GenericListableComic):
1424
    """Class to retrieve AbstruseGoose Comics."""
1425
    name = 'abstruse'
1426
    long_name = 'Abstruse Goose'
1427 View Code Duplication
    url = 'http://abstrusegoose.com'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1428
    get_url_from_archive_element = get_href
1429
    comic_url_re = re.compile('^%s/([0-9]*)$' % url)
1430
    comic_img_re = re.compile('^%s/strips/.*' % url)
1431
1432
    @classmethod
1433
    def get_archive_elements(cls):
1434
        archive_url = urljoin_wrapper(cls.url, 'archive')
1435
        return get_soup_at_url(archive_url).find_all('a', href=cls.comic_url_re)
1436
1437
    @classmethod
1438
    def get_comic_info(cls, soup, archive_elt):
1439
        comic_url = cls.get_url_from_archive_element(archive_elt)
1440
        num = int(cls.comic_url_re.match(comic_url).groups()[0])
1441
        return {
1442
            'num': num,
1443
            'title': archive_elt.string,
1444
            'img': [soup.find('img', src=cls.comic_img_re)['src']]
1445
        }
1446
1447
1448
class PhDComics(GenericNavigableComic):
1449
    """Class to retrieve PHD Comics."""
1450
    name = 'phd'
1451
    long_name = 'PhD Comics'
1452
    url = 'http://phdcomics.com/comics/archive.php'
1453
1454
    @classmethod
1455
    def get_first_comic_link(cls):
1456
        """Get link to first comics."""
1457
        soup = get_soup_at_url(cls.url)
1458
        img = soup.find('img', src='http://phdcomics.com/comics/images/first_button.gif')
1459
        return None if img is None else img.parent
1460
1461
    @classmethod
1462
    def get_navi_link(cls, last_soup, next_):
1463
        """Get link to next or previous comic."""
1464
        url = 'http://phdcomics.com/comics/images/%s_button.gif' % ('next' if next_ else 'prev')
1465
        img = last_soup.find('img', src=url)
1466
        return None if img is None else img.parent
1467
1468
    @classmethod
1469
    def get_comic_info(cls, soup, link):
1470
        """Get information about a particular comics."""
1471
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
1472
        imgs = soup.find_all('meta', property='og:image')
1473
        return {
1474
            'img': [i['content'] for i in imgs],
1475
            'title': title,
1476
        }
1477
1478
1479
class Octopuns(GenericEmptyComic, GenericNavigableComic):
1480
    """Class to retrieve Octopuns comics."""
1481
    # Also on http://octopuns.tumblr.com
1482
    name = 'octopuns'
1483
    long_name = 'Octopuns'
1484
    url = 'http://www.octopuns.net'
1485
1486
    @classmethod
1487
    def get_first_comic_link(cls):
1488
        """Get link to first comics."""
1489
        return get_soup_at_url(cls.url).find('img', src=re.compile('.*/First.png')).parent
1490
1491
    @classmethod
1492
    def get_navi_link(cls, last_soup, next_):
1493
        """Get link to next or previous comic."""
1494
        link = last_soup.find('img', src=re.compile('.*/Next.png' if next_ else '.*/Back.png')).parent
1495
        return None if link.get('href') is None else link
1496
1497
    @classmethod
1498
    def get_comic_info(cls, soup, link):
1499
        """Get information about a particular comics."""
1500
        title = soup.find('h3', class_='post-title entry-title').string
1501
        date_str = soup.find('h2', class_='date-header').string
1502
        day = string_to_date(date_str, "%A, %B %d, %Y")
1503
        imgs = soup.find_all('link', rel='image_src')
1504
        return {
1505
            'img': [i['href'] for i in imgs],
1506
            'title': title,
1507
            'day': day.day,
1508
            'month': day.month,
1509
            'year': day.year,
1510
        }
1511
1512
1513
class Quarktees(GenericNavigableComic):
1514
    """Class to retrieve the Quarktees comics."""
1515
    name = 'quarktees'
1516
    long_name = 'Quarktees'
1517
    url = 'http://www.quarktees.com/blogs/news'
1518
    get_url_from_link = join_cls_url_to_href
1519
    get_first_comic_link = simulate_first_link
1520
    first_url = 'http://www.quarktees.com/blogs/news/12486621-coming-soon'
1521
1522
    @classmethod
1523
    def get_navi_link(cls, last_soup, next_):
1524
        """Get link to next or previous comic."""
1525
        return last_soup.find('a', id='article-next' if next_ else 'article-prev')
1526
1527
    @classmethod
1528
    def get_comic_info(cls, soup, link):
1529
        """Get information about a particular comics."""
1530
        title = soup.find('meta', property='og:title')['content']
1531
        article = soup.find('div', class_='single-article')
1532
        imgs = article.find_all('img')
1533
        return {
1534
            'title': title,
1535
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1536
        }
1537
1538
1539
class OverCompensating(GenericNavigableComic):
1540
    """Class to retrieve the Over Compensating comics."""
1541
    name = 'compensating'
1542
    long_name = 'Over Compensating'
1543
    url = 'http://www.overcompensating.com'
1544
    get_url_from_link = join_cls_url_to_href
1545
1546
    @classmethod
1547
    def get_first_comic_link(cls):
1548
        """Get link to first comics."""
1549
        return get_soup_at_url(cls.url).find('a', href=re.compile('comic=1$'))
1550
1551
    @classmethod
1552
    def get_navi_link(cls, last_soup, next_):
1553
        """Get link to next or previous comic."""
1554
        return last_soup.find('a', title='next comic' if next_ else 'go back already')
1555
1556
    @classmethod
1557
    def get_comic_info(cls, soup, link):
1558
        """Get information about a particular comics."""
1559
        img_src_re = re.compile('^/oc/comics/.*')
1560
        comic_num_re = re.compile('.*comic=([0-9]*)$')
1561
        comic_url = cls.get_url_from_link(link)
1562
        num = int(comic_num_re.match(comic_url).groups()[0])
1563
        img = soup.find('img', src=img_src_re)
1564
        return {
1565
            'num': num,
1566
            'img': [urljoin_wrapper(comic_url, img['src'])],
1567
            'title': img.get('title')
1568
        }
1569
1570
1571
class Oglaf(GenericNavigableComic):
1572
    """Class to retrieve Oglaf comics."""
1573
    name = 'oglaf'
1574
    long_name = 'Oglaf [NSFW]'
1575
    url = 'http://oglaf.com'
1576
    _categories = ('NSFW', )
1577
    get_url_from_link = join_cls_url_to_href
1578
1579
    @classmethod
1580
    def get_first_comic_link(cls):
1581
        """Get link to first comics."""
1582
        return get_soup_at_url(cls.url).find("div", id="st").parent
1583
1584
    @classmethod
1585
    def get_navi_link(cls, last_soup, next_):
1586
        """Get link to next or previous comic."""
1587
        div = last_soup.find("div", id="nx" if next_ else "pvs")
1588
        return div.parent if div else None
1589
1590
    @classmethod
1591
    def get_comic_info(cls, soup, link):
1592
        """Get information about a particular comics."""
1593
        title = soup.find('title').string
1594
        title_imgs = soup.find('div', id='tt').find_all('img')
1595
        assert len(title_imgs) == 1
1596
        strip_imgs = soup.find_all('img', id='strip')
1597
        assert len(strip_imgs) == 1
1598
        imgs = title_imgs + strip_imgs
1599
        desc = ' '.join(i['title'] for i in imgs)
1600
        return {
1601
            'title': title,
1602
            'img': [i['src'] for i in imgs],
1603
            'description': desc,
1604
        }
1605
1606
1607
class ScandinaviaAndTheWorld(GenericNavigableComic):
1608
    """Class to retrieve Scandinavia And The World comics."""
1609
    name = 'satw'
1610
    long_name = 'Scandinavia And The World'
1611
    url = 'http://satwcomic.com'
1612
    get_first_comic_link = simulate_first_link
1613
    first_url = 'http://satwcomic.com/sweden-denmark-and-norway'
1614
1615
    @classmethod
1616
    def get_navi_link(cls, last_soup, next_):
1617
        """Get link to next or previous comic."""
1618
        return last_soup.find('a', accesskey='n' if next_ else 'p')
1619
1620
    @classmethod
1621
    def get_comic_info(cls, soup, link):
1622
        """Get information about a particular comics."""
1623
        title = soup.find('meta', attrs={'name': 'twitter:label1'})['content']
1624
        desc = soup.find('meta', property='og:description')['content']
1625
        imgs = soup.find_all('img', itemprop="image")
1626
        return {
1627
            'title': title,
1628
            'description': desc,
1629
            'img': [i['src'] for i in imgs],
1630
        }
1631
1632
1633
class SomethingOfThatIlk(GenericEmptyComic):  # Does not exist anymore
1634
    """Class to retrieve the Something Of That Ilk comics."""
1635
    name = 'somethingofthatilk'
1636
    long_name = 'Something Of That Ilk'
1637
    url = 'http://www.somethingofthatilk.com'
1638
1639
1640
class InfiniteMonkeyBusiness(GenericNavigableComic):
1641
    """Generic class to retrieve InfiniteMonkeyBusiness comics."""
1642
    name = 'monkey'
1643
    long_name = 'Infinite Monkey Business'
1644
    url = 'http://infinitemonkeybusiness.net'
1645
    get_navi_link = get_a_navi_comicnavnext_navinext
1646
    get_first_comic_link = simulate_first_link
1647
    first_url = 'http://infinitemonkeybusiness.net/comic/pillory/'
1648
1649
    @classmethod
1650
    def get_comic_info(cls, soup, link):
1651
        """Get information about a particular comics."""
1652
        title = soup.find('meta', property='og:title')['content']
1653
        imgs = soup.find('div', id='comic').find_all('img')
1654
        return {
1655
            'title': title,
1656
            'img': [i['src'] for i in imgs],
1657
        }
1658
1659
1660
class Wondermark(GenericListableComic):
1661
    """Class to retrieve the Wondermark comics."""
1662
    name = 'wondermark'
1663
    long_name = 'Wondermark'
1664
    url = 'http://wondermark.com'
1665
    get_url_from_archive_element = get_href
1666
1667
    @classmethod
1668
    def get_archive_elements(cls):
1669
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1670
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1671
1672
    @classmethod
1673
    def get_comic_info(cls, soup, link):
1674
        """Get information about a particular comics."""
1675
        date_str = soup.find('div', class_='postdate').find('em').string
1676
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1677
        div = soup.find('div', id='comic')
1678
        if div:
1679
            img = div.find('img')
1680
            img_src = [img['src']]
1681
            alt = img['alt']
1682
            assert alt == img['title']
1683
            title = soup.find('meta', property='og:title')['content']
1684
        else:
1685
            img_src = []
1686
            alt = ''
1687
            title = ''
1688
        return {
1689
            'month': day.month,
1690
            'year': day.year,
1691
            'day': day.day,
1692
            'img': img_src,
1693
            'title': title,
1694
            'alt': alt,
1695
            'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')),
1696
        }
1697
1698
1699
class WarehouseComic(GenericNavigableComic):
1700
    """Class to retrieve Warehouse Comic comics."""
1701
    name = 'warehouse'
1702
    long_name = 'Warehouse Comic'
1703
    url = 'http://warehousecomic.com'
1704
    get_first_comic_link = get_a_navi_navifirst
1705
    get_navi_link = get_link_rel_next
1706
1707
    @classmethod
1708
    def get_comic_info(cls, soup, link):
1709
        """Get information about a particular comics."""
1710
        title = soup.find('h2', class_='post-title').string
1711
        date_str = soup.find('span', class_='post-date').string
1712
        day = string_to_date(date_str, "%B %d, %Y")
1713
        imgs = soup.find('div', id='comic').find_all('img')
1714
        return {
1715
            'img': [i['src'] for i in imgs],
1716
            'title': title,
1717
            'day': day.day,
1718
            'month': day.month,
1719
            'year': day.year,
1720
        }
1721
1722
1723
class JustSayEh(GenericNavigableComic):
1724
    """Class to retrieve Just Say Eh comics."""
1725
    # Also on http//tapastic.com/series/Just-Say-Eh
1726
    name = 'justsayeh'
1727
    long_name = 'Just Say Eh'
1728
    url = 'http://www.justsayeh.com'
1729
    get_first_comic_link = get_a_navi_navifirst
1730
    get_navi_link = get_a_navi_comicnavnext_navinext
1731
1732
    @classmethod
1733
    def get_comic_info(cls, soup, link):
1734
        """Get information about a particular comics."""
1735
        title = soup.find('h2', class_='post-title').string
1736
        imgs = soup.find("div", id="comic").find_all("img")
1737
        assert all(i['alt'] == i['title'] for i in imgs)
1738
        alt = imgs[0]['alt']
1739
        return {
1740
            'img': [i['src'] for i in imgs],
1741
            'title': title,
1742
            'alt': alt,
1743
        }
1744
1745
1746
class MouseBearComedy(GenericNavigableComic):
1747
    """Class to retrieve Mouse Bear Comedy comics."""
1748
    # Also on http://mousebearcomedy.tumblr.com
1749
    name = 'mousebear'
1750
    long_name = 'Mouse Bear Comedy'
1751
    url = 'http://www.mousebearcomedy.com'
1752
    get_first_comic_link = get_a_navi_navifirst
1753
    get_navi_link = get_a_navi_comicnavnext_navinext
1754
1755
    @classmethod
1756
    def get_comic_info(cls, soup, link):
1757
        """Get information about a particular comics."""
1758
        title = soup.find('h2', class_='post-title').string
1759
        author = soup.find("span", class_="post-author").find("a").string
1760
        date_str = soup.find("span", class_="post-date").string
1761
        day = string_to_date(date_str, '%B %d, %Y')
1762
        imgs = soup.find("div", id="comic").find_all("img")
1763
        assert all(i['alt'] == i['title'] == title for i in imgs)
1764
        return {
1765
            'day': day.day,
1766
            'month': day.month,
1767
            'year': day.year,
1768
            'img': [i['src'] for i in imgs],
1769
            'title': title,
1770
            'author': author,
1771
        }
1772
1773
1774
class BigFootJustice(GenericNavigableComic):
1775
    """Class to retrieve Big Foot Justice comics."""
1776
    # Also on http://tapastic.com/series/bigfoot-justice
1777
    name = 'bigfoot'
1778 View Code Duplication
    long_name = 'Big Foot Justice'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1779
    url = 'http://bigfootjustice.com'
1780
    get_first_comic_link = get_a_navi_navifirst
1781
    get_navi_link = get_a_navi_comicnavnext_navinext
1782
1783
    @classmethod
1784
    def get_comic_info(cls, soup, link):
1785
        """Get information about a particular comics."""
1786
        imgs = soup.find('div', id='comic').find_all('img')
1787
        assert all(i['title'] == i['alt'] for i in imgs)
1788
        title = ' '.join(i['title'] for i in imgs)
1789
        return {
1790
            'img': [i['src'] for i in imgs],
1791
            'title': title,
1792
        }
1793
1794
1795
class RespawnComic(GenericNavigableComic):
1796
    """Class to retrieve Respawn Comic."""
1797
    # Also on https://respawncomic.tumblr.com
1798
    name = 'respawn'
1799
    long_name = 'Respawn Comic'
1800
    url = 'http://respawncomic.com '
1801
    _categories = ('RESPAWN', )
1802
    get_navi_link = get_a_rel_next
1803
    get_first_comic_link = simulate_first_link
1804
    first_url = 'http://respawncomic.com/comic/c0001/'
1805
1806 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1807
    def get_comic_info(cls, soup, link):
1808
        """Get information about a particular comics."""
1809
        title = soup.find('meta', property='og:title')['content']
1810
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1811
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1812
        date_str = date_str[:10]
1813
        day = string_to_date(date_str, "%Y-%m-%d")
1814
        imgs = soup.find_all('meta', property='og:image')
1815
        skip_imgs = {
1816
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1817
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1818
        }
1819
        return {
1820
            'title': title,
1821
            'author': author,
1822
            'day': day.day,
1823
            'month': day.month,
1824
            'year': day.year,
1825
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1826
        }
1827
1828
1829
class SafelyEndangered(GenericNavigableComic):
1830
    """Class to retrieve Safely Endangered comics."""
1831
    # Also on http://tumblr.safelyendangered.com
1832
    name = 'endangered'
1833
    long_name = 'Safely Endangered'
1834
    url = 'http://www.safelyendangered.com'
1835
    get_navi_link = get_link_rel_next
1836
    get_first_comic_link = simulate_first_link
1837
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1838
1839
    @classmethod
1840
    def get_comic_info(cls, soup, link):
1841
        """Get information about a particular comics."""
1842
        title = soup.find('h2', class_='post-title').string
1843
        date_str = soup.find('span', class_='post-date').string
1844
        day = string_to_date(date_str, '%B %d, %Y')
1845
        imgs = soup.find('div', id='comic').find_all('img')
1846
        alt = imgs[0]['alt']
1847
        assert all(i['alt'] == i['title'] for i in imgs)
1848
        return {
1849
            'day': day.day,
1850
            'month': day.month,
1851
            'year': day.year,
1852
            'img': [i['src'] for i in imgs],
1853
            'title': title,
1854
            'alt': alt,
1855
        }
1856
1857
1858
class PicturesInBoxes(GenericNavigableComic):
1859
    """Class to retrieve Pictures In Boxes comics."""
1860
    # Also on https://picturesinboxescomic.tumblr.com
1861
    name = 'picturesinboxes'
1862
    long_name = 'Pictures in Boxes'
1863
    url = 'http://www.picturesinboxes.com'
1864
    get_navi_link = get_a_navi_navinext
1865
    get_first_comic_link = simulate_first_link
1866
    first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'
1867
1868
    @classmethod
1869
    def get_comic_info(cls, soup, link):
1870
        """Get information about a particular comics."""
1871
        title = soup.find('h2', class_='post-title').string
1872
        author = soup.find("span", class_="post-author").find("a").string
1873
        date_str = soup.find('span', class_='post-date').string
1874
        day = string_to_date(date_str, '%B %d, %Y')
1875
        imgs = soup.find('div', class_='comicpane').find_all('img')
1876
        assert imgs
1877
        assert all(i['title'] == i['alt'] == title for i in imgs)
1878
        return {
1879
            'day': day.day,
1880
            'month': day.month,
1881
            'year': day.year,
1882
            'img': [i['src'] for i in imgs],
1883
            'title': title,
1884
            'author': author,
1885
        }
1886
1887
1888
class Penmen(GenericNavigableComic):
1889
    """Class to retrieve Penmen comics."""
1890
    name = 'penmen'
1891
    long_name = 'Penmen'
1892
    url = 'http://penmen.com'
1893
    get_navi_link = get_link_rel_next
1894
    get_first_comic_link = simulate_first_link
1895
    first_url = 'http://penmen.com/index.php/2016/09/12/penmen-announces-grin-big-brand-clothing/'
1896
1897
    @classmethod
1898
    def get_comic_info(cls, soup, link):
1899
        """Get information about a particular comics."""
1900
        title = soup.find('title').string
1901
        imgs = soup.find('div', class_='entry-content').find_all('img')
1902
        short_url = soup.find('link', rel='shortlink')['href']
1903
        tags = ' '.join(t.string for t in soup.find_all('a', rel='tag'))
1904
        date_str = soup.find('time')['datetime'][:10]
1905
        day = string_to_date(date_str, "%Y-%m-%d")
1906
        return {
1907
            'title': title,
1908
            'short_url': short_url,
1909
            'img': [i['src'] for i in imgs],
1910
            'tags': tags,
1911
            'month': day.month,
1912
            'year': day.year,
1913
            'day': day.day,
1914
        }
1915
1916
1917
class TheDoghouseDiaries(GenericNavigableComic):
1918
    """Class to retrieve The Dog House Diaries comics."""
1919
    name = 'doghouse'
1920
    long_name = 'The Dog House Diaries'
1921
    url = 'http://thedoghousediaries.com'
1922
1923
    @classmethod
1924
    def get_first_comic_link(cls):
1925
        """Get link to first comics."""
1926
        return get_soup_at_url(cls.url).find('a', id='firstlink')
1927
1928
    @classmethod
1929
    def get_navi_link(cls, last_soup, next_):
1930
        """Get link to next or previous comic."""
1931
        return last_soup.find('a', id='nextlink' if next_ else 'previouslink')
1932
1933
    @classmethod
1934
    def get_comic_info(cls, soup, link):
1935
        """Get information about a particular comics."""
1936
        comic_img_re = re.compile('^dhdcomics/.*')
1937
        img = soup.find('img', src=comic_img_re)
1938
        comic_url = cls.get_url_from_link(link)
1939
        return {
1940
            'title': soup.find('h2', id='titleheader').string,
1941
            'title2': soup.find('div', id='subtext').string,
1942
            'alt': img.get('title'),
1943
            'img': [urljoin_wrapper(comic_url, img['src'].strip())],
1944
            'num': int(comic_url.split('/')[-1]),
1945
        }
1946
1947
1948
class InvisibleBread(GenericListableComic):
1949
    """Class to retrieve Invisible Bread comics."""
1950
    # Also on http://www.gocomics.com/invisible-bread
1951
    name = 'invisiblebread'
1952
    long_name = 'Invisible Bread'
1953
    url = 'http://invisiblebread.com'
1954
1955
    @classmethod
1956
    def get_archive_elements(cls):
1957
        archive_url = urljoin_wrapper(cls.url, 'archives/')
1958
        return reversed(get_soup_at_url(archive_url).find_all('td', class_='archive-title'))
1959
1960
    @classmethod
1961
    def get_url_from_archive_element(cls, td):
1962
        return td.find('a')['href']
1963
1964
    @classmethod
1965
    def get_comic_info(cls, soup, td):
1966
        """Get information about a particular comics."""
1967
        url = cls.get_url_from_archive_element(td)
1968 View Code Duplication
        title = td.find('a').string
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1969
        month_and_day = td.previous_sibling.string
1970
        link_re = re.compile('^%s/([0-9]+)/' % cls.url)
1971
        year = link_re.match(url).groups()[0]
1972
        date_str = month_and_day + ' ' + year
1973
        day = string_to_date(date_str, '%b %d %Y')
1974
        imgs = [soup.find('div', id='comic').find('img')]
1975
        assert len(imgs) == 1
1976
        assert all(i['title'] == i['alt'] == title for i in imgs)
1977
        return {
1978
            'month': day.month,
1979
            'year': day.year,
1980
            'day': day.day,
1981
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1982
            'title': title,
1983
        }
1984
1985
1986
class DiscoBleach(GenericEmptyComic):  # Does not work anymore
1987
    """Class to retrieve Disco Bleach Comics."""
1988
    name = 'discobleach'
1989
    long_name = 'Disco Bleach'
1990
    url = 'http://discobleach.com'
1991
1992
1993
class TubeyToons(GenericEmptyComic):  # Does not work anymore
1994
    """Class to retrieve TubeyToons comics."""
1995
    # Also on http://tapastic.com/series/Tubey-Toons
1996
    # Also on https://tubeytoons.tumblr.com
1997
    name = 'tubeytoons'
1998
    long_name = 'Tubey Toons'
1999
    url = 'http://tubeytoons.com'
2000
    _categories = ('TUNEYTOONS', )
2001
2002
2003
class CompletelySeriousComics(GenericNavigableComic):
2004
    """Class to retrieve Completely Serious comics."""
2005
    name = 'completelyserious'
2006
    long_name = 'Completely Serious Comics'
2007
    url = 'http://completelyseriouscomics.com'
2008
    get_first_comic_link = get_a_navi_navifirst
2009
    get_navi_link = get_a_navi_navinext
2010
2011 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2012
    def get_comic_info(cls, soup, link):
2013
        """Get information about a particular comics."""
2014
        title = soup.find('h2', class_='post-title').string
2015
        author = soup.find('span', class_='post-author').contents[1].string
2016
        date_str = soup.find('span', class_='post-date').string
2017
        day = string_to_date(date_str, '%B %d, %Y')
2018
        imgs = soup.find('div', class_='comicpane').find_all('img')
2019
        assert imgs
2020
        alt = imgs[0]['title']
2021
        assert all(i['title'] == i['alt'] == alt for i in imgs)
2022
        return {
2023
            'month': day.month,
2024
            'year': day.year,
2025
            'day': day.day,
2026
            'img': [i['src'] for i in imgs],
2027
            'title': title,
2028
            'alt': alt,
2029
            'author': author,
2030
        }
2031
2032
2033
class PoorlyDrawnLines(GenericListableComic):
2034
    """Class to retrieve Poorly Drawn Lines comics."""
2035
    # Also on http://pdlcomics.tumblr.com
2036
    name = 'poorlydrawn'
2037 View Code Duplication
    long_name = 'Poorly Drawn Lines'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2038
    url = 'https://www.poorlydrawnlines.com'
2039
    _categories = ('POORLYDRAWN', )
2040
    get_url_from_archive_element = get_href
2041
2042
    @classmethod
2043
    def get_comic_info(cls, soup, link):
2044
        """Get information about a particular comics."""
2045
        imgs = soup.find('div', class_='post').find_all('img')
2046
        assert len(imgs) <= 1
2047
        return {
2048
            'img': [i['src'] for i in imgs],
2049
            'title': imgs[0].get('title', "") if imgs else "",
2050
        }
2051
2052
    @classmethod
2053
    def get_archive_elements(cls):
2054
        archive_url = urljoin_wrapper(cls.url, 'archive')
2055
        url_re = re.compile('^%s/comic/.' % cls.url)
2056
        return reversed(get_soup_at_url(archive_url).find_all('a', href=url_re))
2057
2058
2059
class LoadingComics(GenericNavigableComic):
2060
    """Class to retrieve Loading Artist comics."""
2061
    name = 'loadingartist'
2062
    long_name = 'Loading Artist'
2063
    url = 'http://www.loadingartist.com/latest'
2064
2065
    @classmethod
2066
    def get_first_comic_link(cls):
2067
        """Get link to first comics."""
2068
        return get_soup_at_url(cls.url).find('a', title="First")
2069
2070
    @classmethod
2071
    def get_navi_link(cls, last_soup, next_):
2072
        """Get link to next or previous comic."""
2073
        return last_soup.find('a', title='Next' if next_ else 'Previous')
2074
2075
    @classmethod
2076
    def get_comic_info(cls, soup, link):
2077
        """Get information about a particular comics."""
2078
        title = soup.find('h1').string
2079
        date_str = soup.find('span', class_='date').string.strip()
2080
        day = string_to_date(date_str, "%B %d, %Y")
2081
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
2082
        return {
2083
            'title': title,
2084
            'img': [i['src'] for i in imgs],
2085
            'month': day.month,
2086
            'year': day.year,
2087
            'day': day.day,
2088
        }
2089
2090
2091
class ChuckleADuck(GenericNavigableComic):
2092
    """Class to retrieve Chuckle-A-Duck comics."""
2093
    name = 'chuckleaduck'
2094
    long_name = 'Chuckle-A-duck'
2095
    url = 'http://chuckleaduck.com'
2096
    get_first_comic_link = get_div_navfirst_a
2097
    get_navi_link = get_link_rel_next
2098
2099
    @classmethod
2100
    def get_comic_info(cls, soup, link):
2101
        """Get information about a particular comics."""
2102
        date_str = soup.find('span', class_='post-date').string
2103
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
2104
        author = soup.find('span', class_='post-author').string
2105
        div = soup.find('div', id='comic')
2106
        imgs = div.find_all('img') if div else []
2107
        title = imgs[0]['title'] if imgs else ""
2108
        assert all(i['title'] == i['alt'] == title for i in imgs)
2109
        return {
2110
            'month': day.month,
2111
            'year': day.year,
2112
            'day': day.day,
2113
            'img': [i['src'] for i in imgs],
2114
            'title': title,
2115
            'author': author,
2116
        }
2117
2118
2119
class DepressedAlien(GenericNavigableComic):
2120
    """Class to retrieve Depressed Alien Comics."""
2121
    name = 'depressedalien'
2122
    long_name = 'Depressed Alien'
2123
    url = 'http://depressedalien.com'
2124
    get_url_from_link = join_cls_url_to_href
2125
2126
    @classmethod
2127
    def get_first_comic_link(cls):
2128
        """Get link to first comics."""
2129
        return get_soup_at_url(cls.url).find('img', attrs={'name': 'beginArrow'}).parent
2130
2131
    @classmethod
2132
    def get_navi_link(cls, last_soup, next_):
2133
        """Get link to next or previous comic."""
2134
        return last_soup.find('img', attrs={'name': 'rightArrow' if next_ else 'leftArrow'}).parent
2135
2136
    @classmethod
2137
    def get_comic_info(cls, soup, link):
2138
        """Get information about a particular comics."""
2139
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2140
        imgs = soup.find_all('meta', property='og:image')
2141
        return {
2142
            'title': title,
2143
            'img': [i['content'] for i in imgs],
2144
        }
2145
2146
2147 View Code Duplication
class TurnOffUs(GenericListableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2148
    """Class to retrieve TurnOffUs comics."""
2149
    name = 'turnoffus'
2150
    long_name = 'Turn Off Us'
2151
    url = 'http://turnoff.us'
2152
    get_url_from_archive_element = join_cls_url_to_href
2153
2154
    @classmethod
2155
    def get_archive_elements(cls):
2156
        archive_url = urljoin_wrapper(cls.url, 'all')
2157
        post_list = get_soup_at_url(archive_url).find('ul', class_='post-list')
2158
        return reversed(post_list.find_all('a', class_='post-link'))
2159
2160
    @classmethod
2161
    def get_comic_info(cls, soup, archive_elt):
2162
        """Get information about a particular comics."""
2163
        title = soup.find('meta', property='og:title')['content']
2164
        imgs = soup.find_all('meta', property='og:image')
2165
        return {
2166
            'title': title,
2167
            'img': [i['content'] for i in imgs],
2168
        }
2169
2170
2171
class ThingsInSquares(GenericListableComic):
2172
    """Class to retrieve Things In Squares comics."""
2173
    # This can be retrieved in other languages
2174
    # Also on https://tapastic.com/series/Things-in-Squares
2175
    name = 'squares'
2176
    long_name = 'Things in squares'
2177
    url = 'http://www.thingsinsquares.com'
2178
2179
    @classmethod
2180
    def get_comic_info(cls, soup, tr):
2181
        """Get information about a particular comics."""
2182
        _, td2, td3 = tr.find_all('td')
2183
        a = td2.find('a')
2184
        date_str = td3.string
2185
        day = string_to_date(date_str, "%m.%d.%y")
2186
        title = a.string
2187
        title2 = soup.find('meta', property='og:title')['content']
2188
        desc = soup.find('meta', property='og:description')
2189
        description = desc['content'] if desc else ''
2190
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2191
        imgs = soup.find('div', class_='entry-content').find_all('img')
2192
        return {
2193
            'day': day.day,
2194
            'month': day.month,
2195
            'year': day.year,
2196
            'title': title,
2197
            'title2': title2,
2198
            'description': description,
2199
            'tags': tags,
2200
            'img': [i['src'] for i in imgs],
2201
            'alt': ' '.join(i['alt'] for i in imgs),
2202
        }
2203
2204
    @classmethod
2205
    def get_url_from_archive_element(cls, tr):
2206
        _, td2, td3 = tr.find_all('td')
2207
        return td2.find('a')['href']
2208
2209
    @classmethod
2210
    def get_archive_elements(cls):
2211
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
2212
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2213
2214
2215
class HappleTea(GenericNavigableComic):
2216
    """Class to retrieve Happle Tea Comics."""
2217
    name = 'happletea'
2218
    long_name = 'Happle Tea'
2219
    url = 'http://www.happletea.com'
2220
    get_first_comic_link = get_a_navi_navifirst
2221
    get_navi_link = get_link_rel_next
2222
2223
    @classmethod
2224
    def get_comic_info(cls, soup, link):
2225
        """Get information about a particular comics."""
2226
        imgs = soup.find('div', id='comic').find_all('img')
2227
        post = soup.find('div', class_='post-content')
2228
        title = post.find('h2', class_='post-title').string
2229
        author = post.find('a', rel='author').string
2230
        date_str = post.find('span', class_='post-date').string
2231
        day = string_to_date(date_str, "%B %d, %Y")
2232
        assert all(i['alt'] == i['title'] for i in imgs)
2233
        return {
2234
            'title': title,
2235
            'img': [i['src'] for i in imgs],
2236
            'alt': ''.join(i['alt'] for i in imgs),
2237
            'month': day.month,
2238
            'year': day.year,
2239
            'day': day.day,
2240
            'author': author,
2241
        }
2242
2243
2244
class RockPaperScissors(GenericNavigableComic):
2245
    """Class to retrieve Rock Paper Scissors comics."""
2246
    name = 'rps'
2247
    long_name = 'Rock Paper Scissors'
2248
    url = 'http://rps-comics.com'
2249
    get_first_comic_link = get_a_navi_navifirst
2250
    get_navi_link = get_link_rel_next
2251
2252
    @classmethod
2253
    def get_comic_info(cls, soup, link):
2254
        """Get information about a particular comics."""
2255
        title = soup.find('title').string
2256
        imgs = soup.find_all('meta', property='og:image')
2257
        short_url = soup.find('link', rel='shortlink')['href']
2258
        transcript = soup.find('div', id='transcript-content').string
2259
        return {
2260
            'title': title,
2261
            'transcript': transcript,
2262
            'short_url': short_url,
2263
            'img': [i['content'] for i in imgs],
2264
        }
2265
2266
2267
class FatAwesomeComics(GenericNavigableComic):
2268
    """Class to retrieve Fat Awesome Comics."""
2269
    # Also on http://fatawesomecomedy.tumblr.com
2270
    name = 'fatawesome'
2271
    long_name = 'Fat Awesome'
2272
    url = 'http://fatawesome.com/comics'
2273
    get_navi_link = get_a_rel_next
2274
    get_first_comic_link = simulate_first_link
2275
    first_url = 'http://fatawesome.com/shortbus/'
2276
2277
    @classmethod
2278
    def get_comic_info(cls, soup, link):
2279
        """Get information about a particular comics."""
2280
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2281
        description = soup.find('meta', attrs={'name': 'description'})['content']
2282
        tags_prop = soup.find('meta', property='article:tag')
2283
        tags = tags_prop['content'] if tags_prop else ""
2284
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2285
        day = string_to_date(date_str, "%Y-%m-%d")
2286
        imgs = soup.find_all('img', attrs={'data-recalc-dims': "1"})
2287
        assert len(imgs) == 1
2288
        return {
2289
            'title': title,
2290
            'description': description,
2291
            'tags': tags,
2292
            'alt': "".join(i['alt'] for i in imgs),
2293
            'img': [i['src'].rsplit('?', 1)[0] for i in imgs],
2294
            'month': day.month,
2295
            'year': day.year,
2296
            'day': day.day,
2297
        }
2298
2299
2300 View Code Duplication
class AnythingComic(GenericListableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2301
    """Class to retrieve Anything Comics."""
2302
    # Also on http://tapastic.com/series/anything
2303
    name = 'anythingcomic'
2304
    long_name = 'Anything Comic'
2305
    url = 'http://www.anythingcomic.com'
2306
2307
    @classmethod
2308
    def get_archive_elements(cls):
2309
        archive_url = urljoin_wrapper(cls.url, 'archive/')
2310
        # The first 2 <tr>'s do not correspond to comics
2311
        return get_soup_at_url(archive_url).find('table', id='chapter_table').find_all('tr')[2:]
2312
2313
    @classmethod
2314
    def get_url_from_archive_element(cls, tr):
2315
        """Get url corresponding to an archive element."""
2316
        td_num, td_comic, td_date, _ = tr.find_all('td')
2317
        link = td_comic.find('a')
2318
        return urljoin_wrapper(cls.url, link['href'])
2319
2320
    @classmethod
2321
    def get_comic_info(cls, soup, tr):
2322
        """Get information about a particular comics."""
2323
        td_num, td_comic, td_date, _ = tr.find_all('td')
2324
        num = int(td_num.string)
2325
        link = td_comic.find('a')
2326
        title = link.string
2327
        imgs = soup.find_all('img', id='comic_image')
2328
        date_str = td_date.string
2329
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y, %I:%M %p")
2330
        assert len(imgs) == 1
2331
        assert all(i.get('alt') == i.get('title') for i in imgs)
2332
        return {
2333
            'num': num,
2334
            'title': title,
2335
            'alt': imgs[0].get('alt', ''),
2336
            'img': [i['src'] for i in imgs],
2337
            'month': day.month,
2338
            'year': day.year,
2339
            'day': day.day,
2340
        }
2341
2342
2343
class LonnieMillsap(GenericNavigableComic):
2344
    """Class to retrieve Lonnie Millsap's comics."""
2345
    name = 'millsap'
2346
    long_name = 'Lonnie Millsap'
2347
    url = 'http://www.lonniemillsap.com'
2348
    get_navi_link = get_link_rel_next
2349
    get_first_comic_link = simulate_first_link
2350
    first_url = 'http://www.lonniemillsap.com/?p=42'
2351
2352
    @classmethod
2353
    def get_comic_info(cls, soup, link):
2354
        """Get information about a particular comics."""
2355
        title = soup.find('h2', class_='post-title').string
2356
        post = soup.find('div', class_='post-content')
2357
        author = post.find("span", class_="post-author").find("a").string
2358
        date_str = post.find("span", class_="post-date").string
2359
        day = string_to_date(date_str, "%B %d, %Y")
2360
        imgs = post.find("div", class_="entry").find_all("img")
2361
        return {
2362
            'title': title,
2363
            'author': author,
2364
            'img': [i['src'] for i in imgs],
2365
            'month': day.month,
2366
            'year': day.year,
2367
            'day': day.day,
2368
        }
2369
2370
2371 View Code Duplication
class LinsEditions(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2372
    """Class to retrieve L.I.N.S. Editions comics."""
2373
    # Also on https://linscomics.tumblr.com
2374
    # Now on https://warandpeas.com
2375
    name = 'lins'
2376
    long_name = 'L.I.N.S. Editions'
2377
    url = 'https://linsedition.com'
2378
    _categories = ('LINS', )
2379
    get_navi_link = get_link_rel_next
2380
    get_first_comic_link = simulate_first_link
2381
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2382
2383
    @classmethod
2384
    def get_comic_info(cls, soup, link):
2385
        """Get information about a particular comics."""
2386
        title = soup.find('meta', property='og:title')['content']
2387
        imgs = soup.find_all('meta', property='og:image')
2388
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2389
        day = string_to_date(date_str, "%Y-%m-%d")
2390
        return {
2391
            'title': title,
2392
            'img': [i['content'] for i in imgs],
2393
            'month': day.month,
2394
            'year': day.year,
2395
            'day': day.day,
2396
        }
2397 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2398
2399
class ThorsThundershack(GenericNavigableComic):
2400
    """Class to retrieve Thor's Thundershack comics."""
2401
    # Also on http://tapastic.com/series/Thors-Thundershac
2402
    name = 'thor'
2403
    long_name = 'Thor\'s Thundershack'
2404
    url = 'http://www.thorsthundershack.com'
2405
    _categories = ('THOR', )
2406
    get_url_from_link = join_cls_url_to_href
2407
2408
    @classmethod
2409
    def get_first_comic_link(cls):
2410
        """Get link to first comics."""
2411
        return get_soup_at_url(cls.url).find('a', class_='first navlink')
2412
2413
    @classmethod
2414
    def get_navi_link(cls, last_soup, next_):
2415
        """Get link to next or previous comic."""
2416
        for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
2417
            if link['href'] != '/comic':
2418
                return link
2419
        return None
2420
2421
    @classmethod
2422
    def get_comic_info(cls, soup, link):
2423
        """Get information about a particular comics."""
2424
        title = soup.find('meta', attrs={'name': 'description'})["content"]
2425
        description = soup.find('div', itemprop='articleBody').text
2426
        author = soup.find('span', itemprop='author copyrightHolder').string
2427
        imgs = soup.find_all('img', itemprop='image')
2428 View Code Duplication
        assert all(i['title'] == i['alt'] for i in imgs)
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2429
        alt = imgs[0]['alt'] if imgs else ""
2430
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
2431
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2432
        return {
2433
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2434
            'month': day.month,
2435
            'year': day.year,
2436
            'day': day.day,
2437
            'author': author,
2438
            'title': title,
2439
            'alt': alt,
2440
            'description': description,
2441
        }
2442
2443
2444
class GerbilWithAJetpack(GenericNavigableComic):
2445
    """Class to retrieve GerbilWithAJetpack comics."""
2446
    name = 'gerbil'
2447
    long_name = 'Gerbil With A Jetpack'
2448
    url = 'http://gerbilwithajetpack.com'
2449
    get_first_comic_link = get_a_navi_navifirst
2450
    get_navi_link = get_a_rel_next
2451
2452
    @classmethod
2453
    def get_comic_info(cls, soup, link):
2454
        """Get information about a particular comics."""
2455
        title = soup.find('h2', class_='post-title').string
2456
        author = soup.find("span", class_="post-author").find("a").string
2457
        date_str = soup.find("span", class_="post-date").string
2458
        day = string_to_date(date_str, "%B %d, %Y")
2459
        imgs = soup.find("div", id="comic").find_all("img")
2460
        alt = imgs[0]['alt']
2461
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2462
        return {
2463
            'img': [i['src'] for i in imgs],
2464
            'title': title,
2465
            'alt': alt,
2466
            'author': author,
2467
            'day': day.day,
2468
            'month': day.month,
2469
            'year': day.year
2470
        }
2471
2472
2473
class EveryDayBlues(GenericEmptyComic, GenericNavigableComic):
2474
    """Class to retrieve EveryDayBlues Comics."""
2475
    name = "blues"
2476
    long_name = "Every Day Blues"
2477
    url = "http://everydayblues.net"
2478
    get_first_comic_link = get_a_navi_navifirst
2479
    get_navi_link = get_link_rel_next
2480
2481
    @classmethod
2482
    def get_comic_info(cls, soup, link):
2483
        """Get information about a particular comics."""
2484
        title = soup.find("h2", class_="post-title").string
2485 View Code Duplication
        author = soup.find("span", class_="post-author").find("a").string
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2486
        date_str = soup.find("span", class_="post-date").string
2487
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2488
        imgs = soup.find("div", id="comic").find_all("img")
2489
        assert all(i['alt'] == i['title'] == title for i in imgs)
2490
        assert len(imgs) <= 1
2491
        return {
2492
            'img': [i['src'] for i in imgs],
2493
            'title': title,
2494
            'author': author,
2495
            'day': day.day,
2496
            'month': day.month,
2497
            'year': day.year
2498
        }
2499
2500
2501
class BiterComics(GenericNavigableComic):
2502
    """Class to retrieve Biter Comics."""
2503
    name = "biter"
2504
    long_name = "Biter Comics"
2505
    url = "http://www.bitercomics.com"
2506
    get_first_comic_link = get_a_navi_navifirst
2507
    get_navi_link = get_link_rel_next
2508
2509
    @classmethod
2510
    def get_comic_info(cls, soup, link):
2511
        """Get information about a particular comics."""
2512
        title = soup.find("h1", class_="entry-title").string
2513
        author = soup.find("span", class_="author vcard").find("a").string
2514
        date_str = soup.find("span", class_="entry-date").string
2515
        day = string_to_date(date_str, "%B %d, %Y")
2516
        imgs = soup.find("div", id="comic").find_all("img")
2517
        assert all(i['alt'] == i['title'] for i in imgs)
2518
        assert len(imgs) == 1
2519
        alt = imgs[0]['alt']
2520
        return {
2521
            'img': [i['src'] for i in imgs],
2522
            'title': title,
2523
            'alt': alt,
2524
            'author': author,
2525
            'day': day.day,
2526
            'month': day.month,
2527
            'year': day.year
2528
        }
2529
2530
2531
class TheAwkwardYeti(GenericNavigableComic):
2532
    """Class to retrieve The Awkward Yeti comics."""
2533
    # Also on http://www.gocomics.com/the-awkward-yeti
2534
    # Also on http://larstheyeti.tumblr.com
2535
    # Also on https://tapastic.com/series/TheAwkwardYeti
2536
    name = 'yeti'
2537
    long_name = 'The Awkward Yeti'
2538
    url = 'http://theawkwardyeti.com'
2539
    _categories = ('YETI', )
2540
    get_first_comic_link = get_a_navi_navifirst
2541
    get_navi_link = get_link_rel_next
2542
2543
    @classmethod
2544
    def get_comic_info(cls, soup, link):
2545
        """Get information about a particular comics."""
2546
        title = soup.find('h2', class_='post-title').string
2547
        date_str = soup.find("span", class_="post-date").string
2548
        day = string_to_date(date_str, "%B %d, %Y")
2549
        imgs = soup.find("div", id="comic").find_all("img")
2550
        assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
2551
        return {
2552
            'img': [i['src'] for i in imgs],
2553
            'title': title,
2554
            'day': day.day,
2555
            'month': day.month,
2556
            'year': day.year
2557
        }
2558
2559
2560
class PleasantThoughts(GenericNavigableComic):
2561
    """Class to retrieve Pleasant Thoughts comics."""
2562
    name = 'pleasant'
2563
    long_name = 'Pleasant Thoughts'
2564
    url = 'http://pleasant-thoughts.com'
2565
    get_first_comic_link = get_a_navi_navifirst
2566
    get_navi_link = get_link_rel_next
2567
2568
    @classmethod
2569
    def get_comic_info(cls, soup, link):
2570
        """Get information about a particular comics."""
2571
        post = soup.find('div', class_='post-content')
2572
        title = post.find('h2', class_='post-title').string
2573
        imgs = post.find("div", class_="entry").find_all("img")
2574
        return {
2575
            'title': title,
2576
            'img': [i['src'] for i in imgs],
2577
        }
2578
2579
2580
class MisterAndMe(GenericNavigableComic):
2581
    """Class to retrieve Mister & Me Comics."""
2582
    # Also on http://www.gocomics.com/mister-and-me
2583
    # Also on https://tapastic.com/series/Mister-and-Me
2584
    name = 'mister'
2585
    long_name = 'Mister & Me'
2586
    url = 'http://www.mister-and-me.com'
2587
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2588
    get_navi_link = get_link_rel_next
2589
2590
    @classmethod
2591
    def get_comic_info(cls, soup, link):
2592
        """Get information about a particular comics."""
2593
        title = soup.find('h2', class_='post-title').string
2594
        author = soup.find("span", class_="post-author").find("a").string
2595
        date_str = soup.find("span", class_="post-date").string
2596 View Code Duplication
        day = string_to_date(date_str, "%B %d, %Y")
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2597
        imgs = soup.find("div", id="comic").find_all("img")
2598
        assert all(i['alt'] == i['title'] for i in imgs)
2599
        assert len(imgs) <= 1
2600
        alt = imgs[0]['alt'] if imgs else ""
2601
        return {
2602
            'img': [i['src'] for i in imgs],
2603
            'title': title,
2604
            'alt': alt,
2605
            'author': author,
2606
            'day': day.day,
2607
            'month': day.month,
2608
            'year': day.year
2609
        }
2610
2611
2612
class LastPlaceComics(GenericNavigableComic):
2613
    """Class to retrieve Last Place Comics."""
2614
    name = 'lastplace'
2615
    long_name = 'Last Place Comics'
2616
    url = "http://lastplacecomics.com"
2617
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2618
    get_navi_link = get_link_rel_next
2619
2620
    @classmethod
2621
    def get_comic_info(cls, soup, link):
2622
        """Get information about a particular comics."""
2623
        title = soup.find('h2', class_='post-title').string
2624
        author = soup.find("span", class_="post-author").find("a").string
2625
        date_str = soup.find("span", class_="post-date").string
2626
        day = string_to_date(date_str, "%B %d, %Y")
2627
        imgs = soup.find("div", id="comic").find_all("img")
2628
        assert all(i['alt'] == i['title'] for i in imgs)
2629 View Code Duplication
        assert len(imgs) <= 1
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2630
        alt = imgs[0]['alt'] if imgs else ""
2631
        return {
2632
            'img': [i['src'] for i in imgs],
2633
            'title': title,
2634
            'alt': alt,
2635
            'author': author,
2636
            'day': day.day,
2637
            'month': day.month,
2638
            'year': day.year
2639
        }
2640
2641
2642
class TalesOfAbsurdity(GenericNavigableComic):
2643
    """Class to retrieve Tales Of Absurdity comics."""
2644
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
2645
    # Also on http://talesofabsurdity.tumblr.com
2646
    name = 'absurdity'
2647
    long_name = 'Tales of Absurdity'
2648
    url = 'http://talesofabsurdity.com'
2649
    _categories = ('ABSURDITY', )
2650
    get_first_comic_link = get_a_navi_navifirst
2651
    get_navi_link = get_a_navi_comicnavnext_navinext
2652
2653
    @classmethod
2654
    def get_comic_info(cls, soup, link):
2655
        """Get information about a particular comics."""
2656
        title = soup.find('h2', class_='post-title').string
2657
        author = soup.find("span", class_="post-author").find("a").string
2658
        date_str = soup.find("span", class_="post-date").string
2659
        day = string_to_date(date_str, "%B %d, %Y")
2660
        imgs = soup.find("div", id="comic").find_all("img")
2661
        assert all(i['alt'] == i['title'] for i in imgs)
2662
        alt = imgs[0]['alt'] if imgs else ""
2663
        return {
2664
            'img': [i['src'] for i in imgs],
2665
            'title': title,
2666
            'alt': alt,
2667
            'author': author,
2668
            'day': day.day,
2669
            'month': day.month,
2670
            'year': day.year
2671
        }
2672
2673
2674
class EndlessOrigami(GenericEmptyComic, GenericNavigableComic):
2675
    """Class to retrieve Endless Origami Comics."""
2676
    name = "origami"
2677
    long_name = "Endless Origami"
2678
    url = "http://endlessorigami.com"
2679
    get_first_comic_link = get_a_navi_navifirst
2680
    get_navi_link = get_link_rel_next
2681
2682
    @classmethod
2683
    def get_comic_info(cls, soup, link):
2684
        """Get information about a particular comics."""
2685
        title = soup.find('h2', class_='post-title').string
2686
        author = soup.find("span", class_="post-author").find("a").string
2687
        date_str = soup.find("span", class_="post-date").string
2688
        day = string_to_date(date_str, "%B %d, %Y")
2689
        imgs = soup.find("div", id="comic").find_all("img")
2690
        assert all(i['alt'] == i['title'] for i in imgs)
2691
        alt = imgs[0]['alt'] if imgs else ""
2692
        return {
2693
            'img': [i['src'] for i in imgs],
2694
            'title': title,
2695
            'alt': alt,
2696
            'author': author,
2697
            'day': day.day,
2698
            'month': day.month,
2699
            'year': day.year
2700
        }
2701
2702
2703
class PlanC(GenericNavigableComic):
2704
    """Class to retrieve Plan C comics."""
2705
    name = 'planc'
2706
    long_name = 'Plan C'
2707 View Code Duplication
    url = 'http://www.plancomic.com'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2708
    get_first_comic_link = get_a_navi_navifirst
2709
    get_navi_link = get_a_navi_comicnavnext_navinext
2710
2711
    @classmethod
2712
    def get_comic_info(cls, soup, link):
2713
        """Get information about a particular comics."""
2714
        title = soup.find('h2', class_='post-title').string
2715
        date_str = soup.find("span", class_="post-date").string
2716
        day = string_to_date(date_str, "%B %d, %Y")
2717
        imgs = soup.find('div', id='comic').find_all('img')
2718
        return {
2719
            'title': title,
2720
            'img': [i['src'] for i in imgs],
2721
            'month': day.month,
2722
            'year': day.year,
2723
            'day': day.day,
2724
        }
2725
2726
2727
class BuniComic(GenericNavigableComic):
2728
    """Class to retrieve Buni Comics."""
2729
    name = 'buni'
2730
    long_name = 'BuniComics'
2731
    url = 'http://www.bunicomic.com'
2732
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2733
    get_navi_link = get_link_rel_next
2734
2735
    @classmethod
2736
    def get_comic_info(cls, soup, link):
2737
        """Get information about a particular comics."""
2738
        imgs = soup.find('div', id='comic').find_all('img')
2739
        assert all(i['alt'] == i['title'] for i in imgs)
2740
        assert len(imgs) == 1
2741
        return {
2742
            'img': [i['src'] for i in imgs],
2743
            'title': imgs[0]['title'],
2744
        }
2745
2746
2747 View Code Duplication
class GenericCommitStrip(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2748
    """Generic class to retrieve Commit Strips in different languages."""
2749
    get_navi_link = get_a_rel_next
2750
    get_first_comic_link = simulate_first_link
2751
    first_url = NotImplemented
2752
2753
    @classmethod
2754
    def get_comic_info(cls, soup, link):
2755
        """Get information about a particular comics."""
2756
        desc = soup.find('meta', property='og:description')['content']
2757
        title = soup.find('meta', property='og:title')['content']
2758
        imgs = soup.find('div', class_='entry-content').find_all('img')
2759
        title2 = ' '.join(i.get('title', '') for i in imgs)
2760
        return {
2761
            'title': title,
2762
            'title2': title2,
2763
            'description': desc,
2764
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2765
        }
2766
2767
2768
class CommitStripFr(GenericCommitStrip):
2769
    """Class to retrieve Commit Strips in French."""
2770
    name = 'commit_fr'
2771
    long_name = 'Commit Strip (Fr)'
2772
    url = 'http://www.commitstrip.com/fr'
2773
    _categories = ('FRANCAIS', )
2774
    first_url = 'http://www.commitstrip.com/fr/2012/02/22/interview/'
2775
2776
2777
class CommitStripEn(GenericCommitStrip):
2778
    """Class to retrieve Commit Strips in English."""
2779
    name = 'commit_en'
2780
    long_name = 'Commit Strip (En)'
2781
    url = 'http://www.commitstrip.com/en'
2782
    first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'
2783
2784
2785
class GenericBoumerie(GenericNavigableComic):
2786
    """Generic class to retrieve Boumeries comics in different languages."""
2787
    get_first_comic_link = get_a_navi_navifirst
2788
    get_navi_link = get_link_rel_next
2789
    date_format = NotImplemented
2790
    lang = NotImplemented
2791
2792
    @classmethod
2793
    def get_comic_info(cls, soup, link):
2794
        """Get information about a particular comics."""
2795
        title = soup.find('h2', class_='post-title').string
2796
        short_url = soup.find('link', rel='shortlink')['href']
2797
        author = soup.find("span", class_="post-author").find("a").string
2798
        date_str = soup.find('span', class_='post-date').string
2799
        day = string_to_date(date_str, cls.date_format, cls.lang)
2800
        imgs = soup.find('div', id='comic').find_all('img')
2801
        assert all(i['alt'] == i['title'] for i in imgs)
2802
        return {
2803
            'short_url': short_url,
2804
            'img': [i['src'] for i in imgs],
2805
            'title': title,
2806
            'author': author,
2807
            'month': day.month,
2808
            'year': day.year,
2809
            'day': day.day,
2810
        }
2811
2812
2813
class BoumerieEn(GenericBoumerie):
2814
    """Class to retrieve Boumeries comics in English."""
2815
    name = 'boumeries_en'
2816
    long_name = 'Boumeries (En)'
2817
    url = 'http://comics.boumerie.com'
2818
    date_format = "%B %d, %Y"
2819
    lang = 'en_GB.UTF-8'
2820
2821
2822
class BoumerieFr(GenericBoumerie):
2823
    """Class to retrieve Boumeries comics in French."""
2824
    name = 'boumeries_fr'
2825
    long_name = 'Boumeries (Fr)'
2826
    url = 'http://bd.boumerie.com'
2827
    _categories = ('FRANCAIS', )
2828
    date_format = "%A, %d %B %Y"
2829
    lang = "fr_FR.utf8"
2830
2831
2832
class UnearthedComics(GenericNavigableComic):
2833
    """Class to retrieve Unearthed comics."""
2834
    # Also on http://tapastic.com/series/UnearthedComics
2835
    # Also on https://unearthedcomics.tumblr.com
2836
    name = 'unearthed'
2837
    long_name = 'Unearthed Comics'
2838
    url = 'http://unearthedcomics.com'
2839
    _categories = ('UNEARTHED', )
2840
    get_navi_link = get_link_rel_next
2841
    get_first_comic_link = simulate_first_link
2842
    first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'
2843
2844
    @classmethod
2845
    def get_comic_info(cls, soup, link):
2846
        """Get information about a particular comics."""
2847
        short_url = soup.find('link', rel='shortlink')['href']
2848
        title_elt = soup.find('h1') or soup.find('h2')
2849
        title = title_elt.string if title_elt else ""
2850
        desc = soup.find('meta', property='og:description')
2851
        date_str = soup.find('time', class_='published updated hidden')['datetime']
2852
        day = string_to_date(date_str, "%Y-%m-%d")
2853
        post = soup.find('div', class_="entry content entry-content type-portfolio")
2854
        imgs = post.find_all('img')
2855
        return {
2856
            'title': title,
2857
            'description': desc,
2858
            'url2': short_url,
2859
            'img': [i['src'] for i in imgs],
2860
            'month': day.month,
2861
            'year': day.year,
2862
            'day': day.day,
2863
        }
2864
2865
2866
class Optipess(GenericNavigableComic):
2867
    """Class to retrieve Optipess comics."""
2868
    name = 'optipess'
2869
    long_name = 'Optipess'
2870
    url = 'http://www.optipess.com'
2871
    get_first_comic_link = get_a_navi_navifirst
2872
    get_navi_link = get_link_rel_next
2873
2874
    @classmethod
2875
    def get_comic_info(cls, soup, link):
2876
        """Get information about a particular comics."""
2877
        title = soup.find('h2', class_='post-title').string
2878
        author = soup.find("span", class_="post-author").find("a").string
2879
        comic = soup.find('div', id='comic')
2880
        imgs = comic.find_all('img') if comic else []
2881
        alt = imgs[0]['title'] if imgs else ""
2882
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2883
        date_str = soup.find('span', class_='post-date').string
2884
        day = string_to_date(date_str, "%B %d, %Y")
2885
        return {
2886
            'title': title,
2887
            'alt': alt,
2888
            'author': author,
2889
            'img': [i['src'] for i in imgs],
2890
            'month': day.month,
2891
            'year': day.year,
2892
            'day': day.day,
2893
        }
2894
2895
2896
class PainTrainComic(GenericNavigableComic):
2897
    """Class to retrieve Pain Train Comics."""
2898
    name = 'paintrain'
2899
    long_name = 'Pain Train Comics'
2900
    url = 'http://paintraincomic.com'
2901
    get_first_comic_link = get_a_navi_navifirst
2902
    get_navi_link = get_link_rel_next
2903
2904
    @classmethod
2905
    def get_comic_info(cls, soup, link):
2906
        """Get information about a particular comics."""
2907
        title = soup.find('h2', class_='post-title').string
2908
        short_url = soup.find('link', rel='shortlink')['href']
2909
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2910
        num = int(short_url_re.match(short_url).groups()[0])
2911
        imgs = soup.find('div', id='comic').find_all('img')
2912
        alt = imgs[0]['title']
2913
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2914
        date_str = soup.find('span', class_='post-date').string
2915
        day = string_to_date(date_str, "%d/%m/%Y")
2916
        return {
2917
            'short_url': short_url,
2918
            'num': num,
2919
            'img': [i['src'] for i in imgs],
2920
            'month': day.month,
2921
            'year': day.year,
2922
            'day': day.day,
2923
            'alt': alt,
2924
            'title': title,
2925
        }
2926
2927
2928
class MoonBeard(GenericNavigableComic):
2929
    """Class to retrieve MoonBeard comics."""
2930
    # Also on http://blog.squiresjam.es
2931
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
2932
    name = 'moonbeard'
2933
    long_name = 'Moon Beard'
2934
    url = 'http://moonbeard.com'
2935
    get_first_comic_link = get_a_navi_navifirst
2936
    get_navi_link = get_a_navi_navinext
2937
2938
    @classmethod
2939
    def get_comic_info(cls, soup, link):
2940
        """Get information about a particular comics."""
2941
        title = soup.find('h2', class_='post-title').string
2942
        short_url = soup.find('link', rel='shortlink')['href']
2943
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2944
        num = int(short_url_re.match(short_url).groups()[0])
2945
        imgs = soup.find('div', id='comic').find_all('img')
2946
        alt = imgs[0]['title']
2947
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2948
        date_str = soup.find('span', class_='post-date').string
2949
        day = string_to_date(date_str, "%B %d, %Y")
2950
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2951
        author = soup.find('span', class_='post-author').string
2952
        return {
2953
            'short_url': short_url,
2954
            'num': num,
2955
            'img': [i['src'] for i in imgs],
2956
            'month': day.month,
2957
            'year': day.year,
2958
            'day': day.day,
2959
            'title': title,
2960
            'tags': tags,
2961
            'alt': alt,
2962
            'author': author,
2963
        }
2964
2965
2966
class AHammADay(GenericEmptyComic, GenericNavigableComic):
2967
    """Class to retrieve class A Hamm A Day comics."""
2968
    name = 'hamm'
2969
    long_name = 'A Hamm A Day'
2970
    url = 'http://www.ahammaday.com'
2971
    get_url_from_link = join_cls_url_to_href
2972
    get_first_comic_link = simulate_first_link
2973
    first_url = 'http://www.ahammaday.com/today/3/6/french'
2974
2975
    @classmethod
2976
    def get_navi_link(cls, last_soup, next_):
2977
        """Get link to next or previous comic."""
2978
        # prev is next / next is prev
2979
        return last_soup.find('li', class_='previous' if next_ else 'next').find('a')
2980
2981
    @classmethod
2982
    def get_comic_info(cls, soup, link):
2983
        """Get information about a particular comics."""
2984
        date_str = soup.find('time', class_='published')['datetime']
2985
        day = string_to_date(date_str, "%Y-%m-%d")
2986
        author = soup.find('span', class_='blog-author').find('a').string
2987
        title = soup.find('meta', property='og:title')['content']
2988
        imgs = soup.find_all('meta', itemprop='image')
2989
        return {
2990
            'img': [i['content'] for i in imgs],
2991
            'title': title,
2992
            'author': author,
2993
            'day': day.day,
2994
            'month': day.month,
2995
            'year': day.year,
2996
        }
2997
2998
2999 View Code Duplication
class LittleLifeLines(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3000
    """Class to retrieve Little Life Lines comics."""
3001
    # Also on https://little-life-lines.tumblr.com
3002
    name = 'life'
3003
    long_name = 'Little Life Lines'
3004
    url = 'http://www.littlelifelines.com'
3005
    get_url_from_link = join_cls_url_to_href
3006
    get_first_comic_link = simulate_first_link
3007
    first_url = 'http://www.littlelifelines.com/comics/well-done'
3008
3009
    @classmethod
3010
    def get_navi_link(cls, last_soup, next_):
3011
        """Get link to next or previous comic."""
3012
        # prev is next / next is prev
3013
        li = last_soup.find('li', class_='prev' if next_ else 'next')
3014
        return li.find('a') if li else None
3015
3016
    @classmethod
3017
    def get_comic_info(cls, soup, link):
3018
        """Get information about a particular comics."""
3019
        title = soup.find('meta', property='og:title')['content']
3020
        desc = soup.find('meta', property='og:description')['content']
3021
        date_str = soup.find('time', class_='published')['datetime']
3022
        day = string_to_date(date_str, "%Y-%m-%d")
3023
        author = soup.find('a', rel='author').string
3024
        div_content = soup.find('div', class_="body entry-content")
3025
        imgs = div_content.find_all('img')
3026
        imgs = [i for i in imgs if i.get('src') is not None]
3027
        alt = imgs[0]['alt']
3028
        return {
3029
            'title': title,
3030
            'alt': alt,
3031
            'description': desc,
3032
            'author': author,
3033
            'day': day.day,
3034
            'month': day.month,
3035
            'year': day.year,
3036
            'img': [i['src'] for i in imgs],
3037
        }
3038
3039
3040
class GenericWordPressInkblot(GenericNavigableComic):
3041
    """Generic class to retrieve comics using WordPress with Inkblot."""
3042
    get_navi_link = get_link_rel_next
3043
3044
    @classmethod
3045
    def get_first_comic_link(cls):
3046
        """Get link to first comics."""
3047
        return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')
3048
3049
    @classmethod
3050
    def get_comic_info(cls, soup, link):
3051
        """Get information about a particular comics."""
3052
        title = soup.find('meta', property='og:title')['content']
3053
        imgs = soup.find('div', class_='webcomic-image').find_all('img')
3054
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3055
        day = string_to_date(date_str, "%Y-%m-%d")
3056
        return {
3057
            'title': title,
3058
            'day': day.day,
3059
            'month': day.month,
3060
            'year': day.year,
3061
            'img': [i['src'] for i in imgs],
3062
        }
3063
3064
3065
class EverythingsStupid(GenericWordPressInkblot):
3066
    """Class to retrieve Everything's stupid Comics."""
3067
    # Also on http://tapastic.com/series/EverythingsStupid
3068
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
3069
    # Also on http://everythingsstupidcomics.tumblr.com
3070
    name = 'stupid'
3071
    long_name = "Everything's Stupid"
3072
    url = 'http://everythingsstupid.net'
3073
3074
3075
class TheIsmComics(GenericWordPressInkblot):
3076
    """Class to retrieve The Ism Comics."""
3077
    # Also on https://tapastic.com/series/TheIsm (?)
3078
    name = 'theism'
3079
    long_name = "The Ism"
3080
    url = 'http://www.theism-comics.com'
3081
3082
3083
class WoodenPlankStudios(GenericWordPressInkblot):
3084
    """Class to retrieve Wooden Plank Studios comics."""
3085
    name = 'woodenplank'
3086
    long_name = 'Wooden Plank Studios'
3087
    url = 'http://woodenplankstudios.com'
3088
3089
3090
class ElectricBunnyComic(GenericNavigableComic):
3091
    """Class to retrieve Electric Bunny Comics."""
3092
    # Also on http://electricbunnycomics.tumblr.com
3093
    name = 'bunny'
3094
    long_name = 'Electric Bunny Comic'
3095
    url = 'http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell'
3096
    get_url_from_link = join_cls_url_to_href
3097
3098
    @classmethod
3099
    def get_first_comic_link(cls):
3100
        """Get link to first comics."""
3101
        return get_soup_at_url(cls.url).find('img', alt='First').parent
3102
3103
    @classmethod
3104
    def get_navi_link(cls, last_soup, next_):
3105
        """Get link to next or previous comic."""
3106
        img = last_soup.find('img', alt='Next' if next_ else 'Back')
3107
        return img.parent if img else None
3108
3109
    @classmethod
3110
    def get_comic_info(cls, soup, link):
3111
        """Get information about a particular comics."""
3112
        title = soup.find('meta', property='og:title')['content']
3113
        imgs = soup.find_all('meta', property='og:image')
3114
        return {
3115
            'title': title,
3116
            'img': [i['content'] for i in imgs],
3117
        }
3118
3119
3120
class SheldonComics(GenericNavigableComic):
3121
    """Class to retrieve Sheldon comics."""
3122
    # Also on http://www.gocomics.com/sheldon
3123
    name = 'sheldon'
3124
    long_name = 'Sheldon Comics'
3125
    url = 'http://www.sheldoncomics.com'
3126
3127
    @classmethod
3128
    def get_first_comic_link(cls):
3129
        """Get link to first comics."""
3130
        return get_soup_at_url(cls.url).find("a", id="nav-first")
3131
3132
    @classmethod
3133
    def get_navi_link(cls, last_soup, next_):
3134
        """Get link to next or previous comic."""
3135
        for link in last_soup.find_all("a", id="nav-next" if next_ else "nav-prev"):
3136
            if link['href'] != 'http://www.sheldoncomics.com':
3137
                return link
3138
        return None
3139
3140
    @classmethod
3141
    def get_comic_info(cls, soup, link):
3142
        """Get information about a particular comics."""
3143
        imgs = soup.find("div", id="comic-foot").find_all("img")
3144
        assert all(i['alt'] == i['title'] for i in imgs)
3145
        assert len(imgs) == 1
3146
        title = imgs[0]['title']
3147
        return {
3148
            'title': title,
3149
            'img': [i['src'] for i in imgs],
3150
        }
3151
3152
3153
class Ubertool(GenericNavigableComic):
3154
    """Class to retrieve Ubertool comics."""
3155
    # Also on https://ubertool.tumblr.com
3156
    # Also on https://tapastic.com/series/ubertool
3157
    name = 'ubertool'
3158
    long_name = 'Ubertool'
3159
    url = 'http://ubertoolcomic.com'
3160
    _categories = ('UBERTOOL', )
3161
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
3162 View Code Duplication
    get_navi_link = get_a_comicnavbase_comicnavnext
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3163
3164
    @classmethod
3165
    def get_comic_info(cls, soup, link):
3166
        """Get information about a particular comics."""
3167
        title = soup.find('h2', class_='post-title').string
3168
        date_str = soup.find('span', class_='post-date').string
3169
        day = string_to_date(date_str, "%B %d, %Y")
3170
        imgs = soup.find('div', id='comic').find_all('img')
3171
        return {
3172
            'img': [i['src'] for i in imgs],
3173
            'title': title,
3174
            'month': day.month,
3175
            'year': day.year,
3176
            'day': day.day,
3177
        }
3178
3179
3180
class EarthExplodes(GenericNavigableComic):
3181
    """Class to retrieve The Earth Explodes comics."""
3182
    name = 'earthexplodes'
3183
    long_name = 'The Earth Explodes'
3184
    url = 'http://www.earthexplodes.com'
3185
    get_url_from_link = join_cls_url_to_href
3186
    get_first_comic_link = simulate_first_link
3187
    first_url = 'http://www.earthexplodes.com/comics/000/'
3188
3189
    @classmethod
3190
    def get_navi_link(cls, last_soup, next_):
3191
        """Get link to next or previous comic."""
3192
        return last_soup.find('a', id='next' if next_ else 'prev')
3193
3194
    @classmethod
3195
    def get_comic_info(cls, soup, link):
3196
        """Get information about a particular comics."""
3197
        title = soup.find('title').string
3198
        imgs = soup.find('div', id='image').find_all('img')
3199
        alt = imgs[0].get('title', '')
3200
        return {
3201
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3202
            'title': title,
3203
            'alt': alt,
3204
        }
3205
3206
3207
class PomComics(GenericNavigableComic):
3208
    """Class to retrieve PomComics."""
3209
    name = 'pom'
3210
    long_name = 'Pom Comics / Piece of Me'
3211
    url = 'http://www.pomcomic.com'
3212
    get_url_from_link = join_cls_url_to_href
3213
3214
    @classmethod
3215
    def get_first_comic_link(cls):
3216
        """Get link to first comics."""
3217
        return get_soup_at_url(cls.url).find('a', class_='btn_first')
3218
3219
    @classmethod
3220
    def get_navi_link(cls, last_soup, next_):
3221
        """Get link to next or previous comic."""
3222
        return last_soup.find('a', class_='btn_next' if next_ else 'btn_prev')
3223
3224
    @classmethod
3225
    def get_comic_info(cls, soup, link):
3226
        """Get information about a particular comics."""
3227
        title = soup.find('h1', id="comic-name").string
3228
        desc = soup.find('meta', property='og:description')['content']
3229
        tags = soup.find('meta', attrs={'name': 'keywords'})['content']
3230
        imgs = soup.find('div', class_='comic').find_all('img')
3231
        return {
3232
            'title': title,
3233
            'desc': desc,
3234
            'tags': tags,
3235
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3236
        }
3237
3238
3239
class CubeDrone(GenericEmptyComic, GenericNavigableComic):
3240
    """Class to retrieve Cube Drone comics."""
3241
    name = 'cubedrone'
3242
    long_name = 'Cube Drone'
3243
    url = 'http://cube-drone.com/comics'
3244
    get_url_from_link = join_cls_url_to_href
3245
3246
    @classmethod
3247
    def get_first_comic_link(cls):
3248
        """Get link to first comics."""
3249
        return get_soup_at_url(cls.url).find('span', class_='glyphicon glyphicon-backward').parent
3250
3251
    @classmethod
3252
    def get_navi_link(cls, last_soup, next_):
3253
        """Get link to next or previous comic."""
3254
        class_ = 'glyphicon glyphicon-chevron-' + ('right' if next_ else 'left')
3255
        return last_soup.find('span', class_=class_).parent
3256
3257
    @classmethod
3258
    def get_comic_info(cls, soup, link):
3259
        """Get information about a particular comics."""
3260
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
3261
        url2 = soup.find('meta', attrs={'name': 'twitter:url'})['content']
3262
        # date_str = soup.find('h2', class_='comic_title').find('small').string
3263
        # day = string_to_date(date_str, "%B %d, %Y, %I:%M %p")
3264
        imgs = soup.find_all('img', class_='comic img-responsive')
3265
        title2 = imgs[0]['title']
3266
        alt = imgs[0]['alt']
3267
        return {
3268
            'url2': url2,
3269
            'title': title,
3270
            'title2': title2,
3271
            'alt': alt,
3272
            'img': [i['src'] for i in imgs],
3273
        }
3274
3275
3276
class MakeItStoopid(GenericNavigableComic):
3277
    """Class to retrieve Make It Stoopid Comics."""
3278
    name = 'stoopid'
3279
    long_name = 'Make it stoopid'
3280
    url = 'http://makeitstoopid.com/comic.php'
3281
3282
    @classmethod
3283
    def get_nav(cls, soup):
3284
        """Get the navigation elements from soup object."""
3285
        cnav = soup.find_all(class_='cnav')
3286
        nav1, nav2 = cnav[:5], cnav[5:]
3287
        assert nav1 == nav2
3288
        # begin, prev, archive, next_, end = nav1
3289
        return [None if i.get('href') is None else i for i in nav1]
3290
3291
    @classmethod
3292
    def get_first_comic_link(cls):
3293
        """Get link to first comics."""
3294
        return cls.get_nav(get_soup_at_url(cls.url))[0]
3295
3296
    @classmethod
3297
    def get_navi_link(cls, last_soup, next_):
3298
        """Get link to next or previous comic."""
3299
        return cls.get_nav(last_soup)[3 if next_ else 1]
3300
3301
    @classmethod
3302
    def get_comic_info(cls, soup, link):
3303
        """Get information about a particular comics."""
3304
        title = link['title']
3305
        imgs = soup.find_all('img', id='comicimg')
3306
        return {
3307
            'title': title,
3308
            'img': [i['src'] for i in imgs],
3309
        }
3310
3311
3312 View Code Duplication
class MarketoonistComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3313
    """Class to retrieve Marketoonist Comics."""
3314
    name = 'marketoonist'
3315
    long_name = 'Marketoonist'
3316
    url = 'https://marketoonist.com/cartoons'
3317
    get_first_comic_link = simulate_first_link
3318
    get_navi_link = get_link_rel_next
3319
    first_url = 'https://marketoonist.com/2002/10/the-8-types-of-brand-managers-2.html'
3320
3321
    @classmethod
3322
    def get_comic_info(cls, soup, link):
3323
        """Get information about a particular comics."""
3324
        imgs = soup.find_all('meta', property='og:image')
3325
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
3326
        day = string_to_date(date_str, "%Y-%m-%d")
3327
        title = soup.find('meta', property='og:title')['content']
3328
        return {
3329
            'img': [i['content'] for i in imgs],
3330
            'day': day.day,
3331
            'month': day.month,
3332
            'year': day.year,
3333
            'title': title,
3334
        }
3335
3336
3337
class ConsoliaComics(GenericNavigableComic):
3338
    """Class to retrieve Consolia comics."""
3339
    name = 'consolia'
3340
    long_name = 'consolia'
3341
    url = 'https://consolia-comic.com'
3342
    get_url_from_link = join_cls_url_to_href
3343
3344
    @classmethod
3345
    def get_first_comic_link(cls):
3346
        """Get link to first comics."""
3347
        return get_soup_at_url(cls.url).find('a', class_='first')
3348
3349
    @classmethod
3350
    def get_navi_link(cls, last_soup, next_):
3351
        """Get link to next or previous comic."""
3352 View Code Duplication
        return last_soup.find('a', class_='next' if next_ else 'prev')
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3353
3354
    @classmethod
3355
    def get_comic_info(cls, soup, link):
3356
        """Get information about a particular comics."""
3357
        title = soup.find('meta', property='og:title')['content']
3358
        date_str = soup.find('time')["datetime"]
3359
        day = string_to_date(date_str, "%Y-%m-%d")
3360
        imgs = soup.find_all('meta', property='og:image')
3361
        return {
3362
            'title': title,
3363
            'img': [i['content'] for i in imgs],
3364
            'day': day.day,
3365
            'month': day.month,
3366
            'year': day.year,
3367
        }
3368
3369
3370
class TuMourrasMoinsBete(GenericNavigableComic):
3371
    """Class to retrieve Tu Mourras Moins Bete comics."""
3372
    name = 'mourrasmoinsbete'
3373
    long_name = 'Tu Mourras Moins Bete'
3374
    url = 'http://tumourrasmoinsbete.blogspot.fr'
3375
    _categories = ('FRANCAIS', )
3376
    get_first_comic_link = simulate_first_link
3377
    first_url = 'http://tumourrasmoinsbete.blogspot.fr/2008/06/essai.html'
3378
3379
    @classmethod
3380
    def get_navi_link(cls, last_soup, next_):
3381
        """Get link to next or previous comic."""
3382
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3383
3384
    @classmethod
3385
    def get_comic_info(cls, soup, link):
3386
        """Get information about a particular comics."""
3387
        title = soup.find('title').string
3388
        imgs = soup.find('div', itemprop='description articleBody').find_all('img')
3389
        author = soup.find('span', itemprop='author').string
3390
        return {
3391
            'img': [i['src'] for i in imgs],
3392
            'author': author,
3393
            'title': title,
3394
        }
3395
3396
3397
class GeekAndPoke(GenericNavigableComic):
3398
    """Class to retrieve Geek And Poke comics."""
3399
    name = 'geek'
3400
    long_name = 'Geek And Poke'
3401
    url = 'http://geek-and-poke.com'
3402
    get_url_from_link = join_cls_url_to_href
3403
    get_first_comic_link = simulate_first_link
3404
    first_url = 'http://geek-and-poke.com/geekandpoke/2006/8/27/a-new-place-for-a-not-so-old-blog.html'
3405
3406
    @classmethod
3407
    def get_navi_link(cls, last_soup, next_):
3408
        """Get link to next or previous comic."""
3409
        return last_soup.find('a', class_='prev-item' if next_ else 'next-item')
3410
3411
    @classmethod
3412
    def get_comic_info(cls, soup, link):
3413
        """Get information about a particular comics."""
3414
        title = soup.find('meta', property='og:title')['content']
3415
        desc = soup.find('meta', property='og:description')['content']
3416
        date_str = soup.find('time', class_='published')['datetime']
3417
        day = string_to_date(date_str, "%Y-%m-%d")
3418
        author = soup.find('a', rel='author').string
3419 View Code Duplication
        div_content = (soup.find('div', class_="body entry-content") or
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3420
                       soup.find('div', class_="special-content"))
3421
        imgs = div_content.find_all('img')
3422
        imgs = [i for i in imgs if i.get('src') is not None]
3423
        assert all('title' not in i or i['alt'] == i['title'] for i in imgs)
3424
        alt = imgs[0].get('alt', "") if imgs else []
3425
        return {
3426
            'title': title,
3427
            'alt': alt,
3428
            'description': desc,
3429
            'author': author,
3430
            'day': day.day,
3431
            'month': day.month,
3432
            'year': day.year,
3433
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3434
        }
3435
3436
3437
class GloryOwlComix(GenericNavigableComic):
3438
    """Class to retrieve Glory Owl comics."""
3439
    name = 'gloryowl'
3440
    long_name = 'Glory Owl'
3441
    url = 'http://gloryowlcomix.blogspot.fr'
3442
    _categories = ('NSFW', 'FRANCAIS')
3443
    get_first_comic_link = simulate_first_link
3444
    first_url = 'http://gloryowlcomix.blogspot.fr/2013/02/1_7.html'
3445
3446
    @classmethod
3447
    def get_navi_link(cls, last_soup, next_):
3448
        """Get link to next or previous comic."""
3449
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3450
3451
    @classmethod
3452
    def get_comic_info(cls, soup, link):
3453
        """Get information about a particular comics."""
3454
        title = soup.find('title').string
3455
        imgs = soup.find_all('link', rel='image_src')
3456
        author = soup.find('a', rel='author').string
3457
        return {
3458
            'img': [i['href'] for i in imgs],
3459
            'author': author,
3460
            'title': title,
3461
        }
3462
3463
3464
class GenericTumblrV1(GenericComic):
3465
    """Generic class to retrieve comics from Tumblr using the V1 API."""
3466
    _categories = ('TUMBLR', )
3467
3468
    @classmethod
3469
    def get_next_comic(cls, last_comic):
3470
        """Generic implementation of get_next_comic for Tumblr comics."""
3471
        for p in cls.get_posts(last_comic):
3472
            comic = cls.get_comic_info(p)
3473
            if comic is not None:
3474
                yield comic
3475
3476
    @classmethod
3477
    def get_url_from_post(cls, post):
3478
        url = post['url']
3479
        if not url.startswith(cls.url):
3480
            print("url '%s' does not start with '%s'" % (url, cls.url))
3481
        return url
3482
3483
    @classmethod
3484
    def get_api_url(cls):
3485
        return urljoin_wrapper(cls.url, '/api/read/')
3486
3487
    @classmethod
3488
    def get_api_url_for_id(cls, tumblr_id):
3489
        return cls.get_api_url() + '?id=%d' % (tumblr_id)
3490
3491
    @classmethod
3492
    def get_comic_info(cls, post):
3493
        """Get information about a particular comics."""
3494
        type_ = post['type']
3495
        if type_ != 'photo':
3496
            return None
3497
        tumblr_id = int(post['id'])
3498
        api_url = cls.get_api_url_for_id(tumblr_id)
3499
        day = datetime.datetime.fromtimestamp(int(post['unix-timestamp'])).date()
3500
        caption = post.find('photo-caption')
3501
        title = caption.string if caption else ""
3502
        tags = ' '.join(t.string for t in post.find_all('tag'))
3503
        # Photos may appear in 'photo' tags and/or straight in the post
3504
        photo_tags = post.find_all('photo')
3505
        if not photo_tags:
3506
            photo_tags = [post]
3507
        # Images are in multiple resolutions - taking the first one
3508
        imgs = [photo.find('photo-url') for photo in photo_tags]
3509
        return {
3510
            'url': cls.get_url_from_post(post),
3511
            'url2': post['url-with-slug'],
3512
            'day': day.day,
3513
            'month': day.month,
3514
            'year': day.year,
3515
            'title': title,
3516
            'tags': tags,
3517
            'img': [i.string for i in imgs],
3518
            'tumblr-id': tumblr_id,
3519
            'api_url': api_url,
3520
        }
3521
3522
    @classmethod
3523
    def get_posts(cls, last_comic, nb_post_per_call=10):
3524
        """Get posts using API. nb_post_per_call is max 50.
3525
3526
        Posts are retrieved from newer to older as per the tumblr v1 api
3527
        but are returned in chronological order."""
3528
        waiting_for_id = last_comic['tumblr-id'] if last_comic else None
3529
        posts_acc = []
3530
        if last_comic is not None:
3531
            # Sometimes, tumblr posts are deleted. When previous post is deleted, we
3532
            # might end up spending a lot of time looking for something that
3533
            # doesn't exist. Failing early and clearly might be a better option.
3534
            last_api_url = cls.get_api_url_for_id(waiting_for_id)
3535
            try:
3536
                get_soup_at_url(last_api_url)
3537
            except urllib.error.HTTPError:
3538
                try:
3539
                    get_soup_at_url(cls.url)
3540
                except urllib.error.HTTPError:
3541
                    print("Did not find previous post nor main url %s" % cls.url)
3542
                else:
3543
                    print("Did not find previous post %s : it might have been deleted" % last_api_url)
3544
                return reversed(posts_acc)
3545
        api_url = cls.get_api_url()
3546
        posts = get_soup_at_url(api_url).find('posts')
3547
        start, total = int(posts['start']), int(posts['total'])
3548
        assert start == 0
3549
        for starting_num in range(0, total, nb_post_per_call):
3550
            api_url2 = api_url + '?start=%d&num=%d' % (starting_num, nb_post_per_call)
3551
            posts2 = get_soup_at_url(api_url2).find('posts')
3552
            start2, total2 = int(posts2['start']), int(posts2['total'])
3553
            assert starting_num == start2, "%d != %d" % (starting_num, start2)
3554
            # This may happen and should be handled in the future
3555
            assert total == total2, "%d != %d" % (total, total2)
3556
            for p in posts2.find_all('post'):
3557
                tumblr_id = int(p['id'])
3558
                if waiting_for_id and waiting_for_id == tumblr_id:
3559
                    return reversed(posts_acc)
3560
                posts_acc.append(p)
3561
        if waiting_for_id is None:
3562
            return reversed(posts_acc)
3563
        print("Did not find %s : there might be a problem" % waiting_for_id)
3564
        return []
3565
3566
3567
class SaturdayMorningBreakfastCerealTumblr(GenericTumblrV1):
3568
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
3569
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
3570
    # Also on http://www.smbc-comics.com
3571
    name = 'smbc-tumblr'
3572
    long_name = 'Saturday Morning Breakfast Cereal (from Tumblr)'
3573
    url = 'http://smbc-comics.tumblr.com'
3574
    _categories = ('SMBC', )
3575
3576
3577
class IrwinCardozo(GenericTumblrV1):
3578
    """Class to retrieve Irwin Cardozo Comics."""
3579
    name = 'irwinc'
3580
    long_name = 'Irwin Cardozo'
3581
    url = 'http://irwincardozocomics.tumblr.com'
3582
3583
3584
class AccordingToDevin(GenericTumblrV1):
3585
    """Class to retrieve According To Devin comics."""
3586
    name = 'devin'
3587
    long_name = 'According To Devin'
3588
    url = 'http://accordingtodevin.tumblr.com'
3589
3590
3591
class ItsTheTieTumblr(GenericTumblrV1):
3592
    """Class to retrieve It's the tie comics."""
3593
    # Also on http://itsthetie.com
3594
    # Also on https://tapastic.com/series/itsthetie
3595
    name = 'tie-tumblr'
3596
    long_name = "It's the tie (from Tumblr)"
3597
    url = "http://itsthetie.tumblr.com"
3598
    _categories = ('TIE', )
3599
3600
3601
class OctopunsTumblr(GenericTumblrV1):
3602
    """Class to retrieve Octopuns comics."""
3603
    # Also on http://www.octopuns.net
3604
    name = 'octopuns-tumblr'
3605
    long_name = 'Octopuns (from Tumblr)'
3606
    url = 'http://octopuns.tumblr.com'
3607
3608
3609
class PicturesInBoxesTumblr(GenericTumblrV1):
3610
    """Class to retrieve Pictures In Boxes comics."""
3611
    # Also on http://www.picturesinboxes.com
3612
    name = 'picturesinboxes-tumblr'
3613
    long_name = 'Pictures in Boxes (from Tumblr)'
3614
    url = 'https://picturesinboxescomic.tumblr.com'
3615
3616
3617
class TubeyToonsTumblr(GenericTumblrV1):
3618
    """Class to retrieve TubeyToons comics."""
3619
    # Also on http://tapastic.com/series/Tubey-Toons
3620
    # Also on http://tubeytoons.com
3621
    name = 'tubeytoons-tumblr'
3622
    long_name = 'Tubey Toons (from Tumblr)'
3623
    url = 'https://tubeytoons.tumblr.com'
3624
    _categories = ('TUNEYTOONS', )
3625
3626
3627
class UnearthedComicsTumblr(GenericTumblrV1):
3628
    """Class to retrieve Unearthed comics."""
3629
    # Also on http://tapastic.com/series/UnearthedComics
3630
    # Also on http://unearthedcomics.com
3631
    name = 'unearthed-tumblr'
3632
    long_name = 'Unearthed Comics (from Tumblr)'
3633
    url = 'https://unearthedcomics.tumblr.com'
3634
    _categories = ('UNEARTHED', )
3635
3636
3637
class PieComic(GenericTumblrV1):
3638
    """Class to retrieve Pie Comic comics."""
3639
    name = 'pie'
3640
    long_name = 'Pie Comic'
3641
    url = "http://piecomic.tumblr.com"
3642
3643
3644
class MrEthanDiamond(GenericTumblrV1):
3645
    """Class to retrieve Mr Ethan Diamond comics."""
3646
    name = 'diamond'
3647
    long_name = 'Mr Ethan Diamond'
3648
    url = 'http://mrethandiamond.tumblr.com'
3649
3650
3651
class Flocci(GenericTumblrV1):
3652
    """Class to retrieve floccinaucinihilipilification comics."""
3653
    name = 'flocci'
3654
    long_name = 'floccinaucinihilipilification'
3655
    url = "http://floccinaucinihilipilificationa.tumblr.com"
3656
3657
3658
class UpAndOut(GenericTumblrV1):
3659
    """Class to retrieve Up & Out comics."""
3660
    # Also on http://tapastic.com/series/UP-and-OUT
3661
    name = 'upandout'
3662
    long_name = 'Up And Out (from Tumblr)'
3663
    url = 'http://upandoutcomic.tumblr.com'
3664
3665
3666
class Pundemonium(GenericTumblrV1):
3667
    """Class to retrieve Pundemonium comics."""
3668
    name = 'pundemonium'
3669
    long_name = 'Pundemonium'
3670
    url = 'http://monstika.tumblr.com'
3671
3672
3673
class PoorlyDrawnLinesTumblr(GenericTumblrV1):
3674
    """Class to retrieve Poorly Drawn Lines comics."""
3675
    # Also on http://poorlydrawnlines.com
3676
    name = 'poorlydrawn-tumblr'
3677
    long_name = 'Poorly Drawn Lines (from Tumblr)'
3678
    url = 'http://pdlcomics.tumblr.com'
3679
    _categories = ('POORLYDRAWN', )
3680
3681
3682
class PearShapedComics(GenericTumblrV1):
3683
    """Class to retrieve Pear Shaped Comics."""
3684
    name = 'pearshaped'
3685
    long_name = 'Pear-Shaped Comics'
3686
    url = 'http://pearshapedcomics.com'
3687
3688
3689
class PondScumComics(GenericTumblrV1):
3690
    """Class to retrieve Pond Scum Comics."""
3691
    name = 'pond'
3692
    long_name = 'Pond Scum'
3693
    url = 'http://pondscumcomic.tumblr.com'
3694
3695
3696
class MercworksTumblr(GenericTumblrV1):
3697
    """Class to retrieve Mercworks comics."""
3698
    # Also on http://mercworks.net
3699
    name = 'mercworks-tumblr'
3700
    long_name = 'Mercworks (from Tumblr)'
3701
    url = 'http://mercworks.tumblr.com'
3702
3703
3704
class OwlTurdTumblr(GenericTumblrV1):
3705
    """Class to retrieve Owl Turd comics."""
3706
    # Also on http://tapastic.com/series/Owl-Turd-Comix
3707
    name = 'owlturd-tumblr'
3708
    long_name = 'Owl Turd (from Tumblr)'
3709
    url = 'http://owlturdcomix.tumblr.com'
3710
    _categories = ('OWLTURD', )
3711
3712
3713
class VectorBelly(GenericTumblrV1):
3714
    """Class to retrieve Vector Belly comics."""
3715
    # Also on http://vectorbelly.com
3716
    name = 'vector'
3717
    long_name = 'Vector Belly'
3718
    url = 'http://vectorbelly.tumblr.com'
3719
3720
3721
class GoneIntoRapture(GenericTumblrV1):
3722
    """Class to retrieve Gone Into Rapture comics."""
3723
    # Also on http://goneintorapture.tumblr.com
3724
    # Also on http://tapastic.com/series/Goneintorapture
3725
    name = 'rapture'
3726
    long_name = 'Gone Into Rapture'
3727
    url = 'http://goneintorapture.com'
3728
3729
3730
class TheOatmealTumblr(GenericTumblrV1):
3731
    """Class to retrieve The Oatmeal comics."""
3732
    # Also on http://theoatmeal.com
3733
    name = 'oatmeal-tumblr'
3734
    long_name = 'The Oatmeal (from Tumblr)'
3735
    url = 'http://oatmeal.tumblr.com'
3736
3737
3738
class HeckIfIKnowComicsTumblr(GenericTumblrV1):
3739
    """Class to retrieve Heck If I Know Comics."""
3740
    # Also on http://tapastic.com/series/Regular
3741
    name = 'heck-tumblr'
3742
    long_name = 'Heck if I Know comics (from Tumblr)'
3743
    url = 'http://heckifiknowcomics.com'
3744
3745
3746
class MyJetPack(GenericTumblrV1):
3747
    """Class to retrieve My Jet Pack comics."""
3748
    name = 'jetpack'
3749
    long_name = 'My Jet Pack'
3750
    url = 'http://myjetpack.tumblr.com'
3751
3752
3753
class CheerUpEmoKidTumblr(GenericTumblrV1):
3754
    """Class to retrieve CheerUpEmoKid comics."""
3755
    # Also on http://www.cheerupemokid.com
3756
    # Also on http://tapastic.com/series/CUEK
3757
    name = 'cuek-tumblr'
3758
    long_name = 'Cheer Up Emo Kid (from Tumblr)'
3759
    url = 'https://enzocomics.tumblr.com'
3760
3761
3762
class ForLackOfABetterComic(GenericTumblrV1):
3763
    """Class to retrieve For Lack Of A Better Comics."""
3764
    # Also on http://forlackofabettercomic.com
3765
    name = 'lack'
3766
    long_name = 'For Lack Of A Better Comic'
3767
    url = 'http://forlackofabettercomic.tumblr.com'
3768
3769
3770
class ZenPencilsTumblr(GenericTumblrV1):
3771
    """Class to retrieve ZenPencils comics."""
3772
    # Also on http://zenpencils.com
3773
    # Also on http://www.gocomics.com/zen-pencils
3774
    name = 'zenpencils-tumblr'
3775
    long_name = 'Zen Pencils (from Tumblr)'
3776
    url = 'http://zenpencils.tumblr.com'
3777
    _categories = ('ZENPENCILS', )
3778
3779
3780
class ThreeWordPhraseTumblr(GenericTumblrV1):
3781
    """Class to retrieve Three Word Phrase comics."""
3782
    # Also on http://threewordphrase.com
3783
    name = 'threeword-tumblr'
3784
    long_name = 'Three Word Phrase (from Tumblr)'
3785
    url = 'http://threewordphrase.tumblr.com'
3786
3787
3788
class TimeTrabbleTumblr(GenericTumblrV1):
3789
    """Class to retrieve Time Trabble comics."""
3790
    # Also on http://timetrabble.com
3791
    name = 'timetrabble-tumblr'
3792
    long_name = 'Time Trabble (from Tumblr)'
3793
    url = 'http://timetrabble.tumblr.com'
3794
3795
3796
class SafelyEndangeredTumblr(GenericTumblrV1):
3797
    """Class to retrieve Safely Endangered comics."""
3798
    # Also on http://www.safelyendangered.com
3799
    name = 'endangered-tumblr'
3800
    long_name = 'Safely Endangered (from Tumblr)'
3801
    url = 'http://tumblr.safelyendangered.com'
3802
3803
3804
class MouseBearComedyTumblr(GenericTumblrV1):
3805
    """Class to retrieve Mouse Bear Comedy comics."""
3806
    # Also on http://www.mousebearcomedy.com
3807
    name = 'mousebear-tumblr'
3808
    long_name = 'Mouse Bear Comedy (from Tumblr)'
3809
    url = 'http://mousebearcomedy.tumblr.com'
3810
3811
3812
class BouletCorpTumblr(GenericTumblrV1):
3813
    """Class to retrieve BouletCorp comics."""
3814
    # Also on http://www.bouletcorp.com
3815
    name = 'boulet-tumblr'
3816
    long_name = 'Boulet Corp (from Tumblr)'
3817
    url = 'https://bouletcorp.tumblr.com'
3818
    _categories = ('BOULET', )
3819
3820
3821
class TheAwkwardYetiTumblr(GenericTumblrV1):
3822
    """Class to retrieve The Awkward Yeti comics."""
3823
    # Also on http://www.gocomics.com/the-awkward-yeti
3824
    # Also on http://theawkwardyeti.com
3825
    # Also on https://tapastic.com/series/TheAwkwardYeti
3826
    name = 'yeti-tumblr'
3827
    long_name = 'The Awkward Yeti (from Tumblr)'
3828
    url = 'http://larstheyeti.tumblr.com'
3829
    _categories = ('YETI', )
3830
3831
3832
class NellucNhoj(GenericTumblrV1):
3833
    """Class to retrieve NellucNhoj comics."""
3834
    name = 'nhoj'
3835
    long_name = 'Nelluc Nhoj'
3836
    url = 'http://nellucnhoj.com'
3837
3838
3839
class DownTheUpwardSpiralTumblr(GenericTumblrV1):
3840
    """Class to retrieve Down The Upward Spiral comics."""
3841
    # Also on http://www.downtheupwardspiral.com
3842
    # Also on https://tapastic.com/series/Down-the-Upward-Spiral
3843
    name = 'spiral-tumblr'
3844
    long_name = 'Down the Upward Spiral (from Tumblr)'
3845
    url = 'http://downtheupwardspiral.tumblr.com'
3846
3847
3848
class AsPerUsualTumblr(GenericTumblrV1):
3849
    """Class to retrieve As Per Usual comics."""
3850
    # Also on https://tapastic.com/series/AsPerUsual
3851
    name = 'usual-tumblr'
3852
    long_name = 'As Per Usual (from Tumblr)'
3853
    url = 'http://as-per-usual.tumblr.com'
3854
    categories = ('DAMILEE', )
3855
3856
3857
class HotComicsForCoolPeopleTumblr(GenericTumblrV1):
3858
    """Class to retrieve Hot Comics For Cool People."""
3859
    # Also on https://tapastic.com/series/Hot-Comics-For-Cool-People
3860
    # Also on http://hotcomics.biz (links to tumblr)
3861
    # Also on http://hcfcp.com (links to tumblr)
3862
    name = 'hotcomics-tumblr'
3863
    long_name = 'Hot Comics For Cool People (from Tumblr)'
3864
    url = 'http://hotcomicsforcoolpeople.tumblr.com'
3865
    categories = ('DAMILEE', )
3866
3867
3868
class OneOneOneOneComicTumblr(GenericTumblrV1):
3869
    """Class to retrieve 1111 Comics."""
3870
    # Also on http://www.1111comics.me
3871
    # Also on https://tapastic.com/series/1111-Comics
3872
    name = '1111-tumblr'
3873
    long_name = '1111 Comics (from Tumblr)'
3874
    url = 'http://comics1111.tumblr.com'
3875
    _categories = ('ONEONEONEONE', )
3876
3877
3878
class JhallComicsTumblr(GenericTumblrV1):
3879
    """Class to retrieve Jhall Comics."""
3880
    # Also on http://jhallcomics.com
3881
    name = 'jhall-tumblr'
3882
    long_name = 'Jhall Comics (from Tumblr)'
3883
    url = 'http://jhallcomics.tumblr.com'
3884
3885
3886
class BerkeleyMewsTumblr(GenericTumblrV1):
3887
    """Class to retrieve Berkeley Mews comics."""
3888
    # Also on http://www.gocomics.com/berkeley-mews
3889
    # Also on http://www.berkeleymews.com
3890
    name = 'berkeley-tumblr'
3891
    long_name = 'Berkeley Mews (from Tumblr)'
3892
    url = 'http://mews.tumblr.com'
3893
    _categories = ('BERKELEY', )
3894
3895
3896
class JoanCornellaTumblr(GenericTumblrV1):
3897
    """Class to retrieve Joan Cornella comics."""
3898
    # Also on http://joancornella.net
3899
    name = 'cornella-tumblr'
3900
    long_name = 'Joan Cornella (from Tumblr)'
3901
    url = 'http://cornellajoan.tumblr.com'
3902
3903
3904
class RespawnComicTumblr(GenericTumblrV1):
3905
    """Class to retrieve Respawn Comic."""
3906
    # Also on http://respawncomic.com
3907
    name = 'respawn-tumblr'
3908
    long_name = 'Respawn Comic (from Tumblr)'
3909
    url = 'https://respawncomic.tumblr.com'
3910
3911
3912
class ChrisHallbeckTumblr(GenericTumblrV1):
3913
    """Class to retrieve Chris Hallbeck comics."""
3914
    # Also on https://tapastic.com/ChrisHallbeck
3915
    # Also on http://maximumble.com
3916
    # Also on http://minimumble.com
3917
    # Also on http://thebookofbiff.com
3918
    name = 'hallbeck-tumblr'
3919
    long_name = 'Chris Hallback (from Tumblr)'
3920
    url = 'https://chrishallbeck.tumblr.com'
3921
    _categories = ('HALLBACK', )
3922
3923
3924
class ComicNuggets(GenericTumblrV1):
3925
    """Class to retrieve Comic Nuggets."""
3926
    name = 'nuggets'
3927
    long_name = 'Comic Nuggets'
3928
    url = 'http://comicnuggets.com'
3929
3930
3931
class PigeonGazetteTumblr(GenericTumblrV1):
3932
    """Class to retrieve The Pigeon Gazette comics."""
3933
    # Also on https://tapastic.com/series/The-Pigeon-Gazette
3934
    name = 'pigeon-tumblr'
3935
    long_name = 'The Pigeon Gazette (from Tumblr)'
3936
    url = 'http://thepigeongazette.tumblr.com'
3937
3938
3939
class CancerOwl(GenericTumblrV1):
3940
    """Class to retrieve Cancer Owl comics."""
3941
    # Also on http://cancerowl.com
3942
    name = 'cancerowl-tumblr'
3943
    long_name = 'Cancer Owl (from Tumblr)'
3944
    url = 'http://cancerowl.tumblr.com'
3945
3946
3947
class FowlLanguageTumblr(GenericTumblrV1):
3948
    """Class to retrieve Fowl Language comics."""
3949
    # Also on http://www.fowllanguagecomics.com
3950
    # Also on http://tapastic.com/series/Fowl-Language-Comics
3951
    # Also on http://www.gocomics.com/fowl-language
3952
    name = 'fowllanguage-tumblr'
3953
    long_name = 'Fowl Language Comics (from Tumblr)'
3954
    url = 'http://fowllanguagecomics.tumblr.com'
3955
    _categories = ('FOWLLANGUAGE', )
3956
3957
3958
class TheOdd1sOutTumblr(GenericTumblrV1):
3959
    """Class to retrieve The Odd 1s Out comics."""
3960
    # Also on http://theodd1sout.com
3961
    # Also on https://tapastic.com/series/Theodd1sout
3962
    name = 'theodd-tumblr'
3963
    long_name = 'The Odd 1s Out (from Tumblr)'
3964
    url = 'http://theodd1sout.tumblr.com'
3965
3966
3967
class TheUnderfoldTumblr(GenericTumblrV1):
3968
    """Class to retrieve The Underfold comics."""
3969
    # Also on http://theunderfold.com
3970
    name = 'underfold-tumblr'
3971
    long_name = 'The Underfold (from Tumblr)'
3972
    url = 'http://theunderfold.tumblr.com'
3973
3974
3975
class LolNeinTumblr(GenericTumblrV1):
3976
    """Class to retrieve Lol Nein comics."""
3977
    # Also on http://lolnein.com
3978
    name = 'lolnein-tumblr'
3979
    long_name = 'Lol Nein (from Tumblr)'
3980
    url = 'http://lolneincom.tumblr.com'
3981
3982
3983
class FatAwesomeComicsTumblr(GenericTumblrV1):
3984
    """Class to retrieve Fat Awesome Comics."""
3985
    # Also on http://fatawesome.com/comics
3986
    name = 'fatawesome-tumblr'
3987
    long_name = 'Fat Awesome (from Tumblr)'
3988
    url = 'http://fatawesomecomedy.tumblr.com'
3989
3990
3991
class TheWorldIsFlatTumblr(GenericTumblrV1):
3992
    """Class to retrieve The World Is Flat Comics."""
3993
    # Also on https://tapastic.com/series/The-World-is-Flat
3994
    name = 'flatworld-tumblr'
3995
    long_name = 'The World Is Flat (from Tumblr)'
3996
    url = 'http://theworldisflatcomics.com'
3997
3998
3999
class DorrisMc(GenericTumblrV1):
4000
    """Class to retrieve Dorris Mc Comics"""
4001
    # Also on http://www.gocomics.com/dorris-mccomics
4002
    name = 'dorrismc'
4003
    long_name = 'Dorris Mc'
4004
    url = 'http://dorrismccomics.com'
4005
4006
4007
class LeleozTumblr(GenericEmptyComic, GenericTumblrV1):
4008
    """Class to retrieve Leleoz comics."""
4009
    # Also on https://tapastic.com/series/Leleoz
4010
    name = 'leleoz-tumblr'
4011
    long_name = 'Leleoz (from Tumblr)'
4012
    url = 'http://leleozcomics.tumblr.com'
4013
4014
4015
class MoonBeardTumblr(GenericTumblrV1):
4016
    """Class to retrieve MoonBeard comics."""
4017
    # Also on http://moonbeard.com
4018
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
4019
    name = 'moonbeard-tumblr'
4020
    long_name = 'Moon Beard (from Tumblr)'
4021
    url = 'http://blog.squiresjam.es'
4022
4023
4024
class AComik(GenericTumblrV1):
4025
    """Class to retrieve A Comik"""
4026
    name = 'comik'
4027
    long_name = 'A Comik'
4028
    url = 'http://acomik.com'
4029
4030
4031
class ClassicRandy(GenericTumblrV1):
4032
    """Class to retrieve Classic Randy comics."""
4033
    name = 'randy'
4034
    long_name = 'Classic Randy'
4035
    url = 'http://classicrandy.tumblr.com'
4036
4037
4038
class DagssonTumblr(GenericTumblrV1):
4039
    """Class to retrieve Dagsson comics."""
4040
    # Also on http://www.dagsson.com
4041
    name = 'dagsson-tumblr'
4042
    long_name = 'Dagsson Hugleikur (from Tumblr)'
4043
    url = 'https://hugleikurdagsson.tumblr.com'
4044
4045
4046
class LinsEditionsTumblr(GenericTumblrV1):
4047
    """Class to retrieve L.I.N.S. Editions comics."""
4048
    # Also on https://linsedition.com
4049
    # Now on http://warandpeas.tumblr.com
4050
    name = 'lins-tumblr'
4051
    long_name = 'L.I.N.S. Editions (from Tumblr)'
4052
    url = 'https://linscomics.tumblr.com'
4053
    _categories = ('LINS', )
4054
4055
4056
class WarAndPeasTumblr(GenericTumblrV1):
4057
    """Class to retrieve War And Peas comics."""
4058
    # Was on https://linscomics.tumblr.com
4059
    name = 'warandpeas-tumblr'
4060
    long_name = 'War And Peas (from Tumblr)'
4061
    url = 'http://warandpeas.tumblr.com'
4062
    _categories = ('WARANDPEAS', )
4063
4064
4065
class OrigamiHotDish(GenericTumblrV1):
4066
    """Class to retrieve Origami Hot Dish comics."""
4067
    name = 'origamihotdish'
4068
    long_name = 'Origami Hot Dish'
4069
    url = 'http://origamihotdish.com'
4070
4071
4072
class HitAndMissComicsTumblr(GenericTumblrV1):
4073
    """Class to retrieve Hit and Miss Comics."""
4074
    name = 'hitandmiss'
4075
    long_name = 'Hit and Miss Comics'
4076
    url = 'https://hitandmisscomics.tumblr.com'
4077
4078
4079
class HMBlanc(GenericTumblrV1):
4080
    """Class to retrieve HM Blanc comics."""
4081
    name = 'hmblanc'
4082
    long_name = 'HM Blanc'
4083
    url = 'http://hmblanc.tumblr.com'
4084
4085
4086
class TalesOfAbsurdityTumblr(GenericTumblrV1):
4087
    """Class to retrieve Tales Of Absurdity comics."""
4088
    # Also on http://talesofabsurdity.com
4089
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
4090
    name = 'absurdity-tumblr'
4091
    long_name = 'Tales of Absurdity (from Tumblr)'
4092
    url = 'http://talesofabsurdity.tumblr.com'
4093
    _categories = ('ABSURDITY', )
4094
4095
4096
class RobbieAndBobby(GenericTumblrV1):
4097
    """Class to retrieve Robbie And Bobby comics."""
4098
    # Also on http://robbieandbobby.com
4099
    name = 'robbie-tumblr'
4100
    long_name = 'Robbie And Bobby (from Tumblr)'
4101
    url = 'http://robbieandbobby.tumblr.com'
4102
4103
4104
class ElectricBunnyComicTumblr(GenericTumblrV1):
4105
    """Class to retrieve Electric Bunny Comics."""
4106
    # Also on http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell
4107
    name = 'bunny-tumblr'
4108
    long_name = 'Electric Bunny Comic (from Tumblr)'
4109
    url = 'http://electricbunnycomics.tumblr.com'
4110
4111
4112
class Hoomph(GenericTumblrV1):
4113
    """Class to retrieve Hoomph comics."""
4114
    name = 'hoomph'
4115
    long_name = 'Hoomph'
4116
    url = 'http://hoom.ph'
4117
4118
4119
class BFGFSTumblr(GenericTumblrV1):
4120
    """Class to retrieve BFGFS comics."""
4121
    # Also on https://tapastic.com/series/BFGFS
4122
    # Also on http://bfgfs.com
4123
    name = 'bfgfs-tumblr'
4124
    long_name = 'BFGFS (from Tumblr)'
4125
    url = 'https://bfgfs.tumblr.com'
4126
4127
4128
class DoodleForFood(GenericTumblrV1):
4129
    """Class to retrieve Doodle For Food comics."""
4130
    # Also on https://tapastic.com/series/Doodle-for-Food
4131
    name = 'doodle'
4132
    long_name = 'Doodle For Food'
4133
    url = 'http://www.doodleforfood.com'
4134
4135
4136
class CassandraCalinTumblr(GenericTumblrV1):
4137
    """Class to retrieve C. Cassandra comics."""
4138
    # Also on http://cassandracalin.com
4139
    # Also on https://tapastic.com/series/C-Cassandra-comics
4140
    name = 'cassandra-tumblr'
4141
    long_name = 'Cassandra Calin (from Tumblr)'
4142
    url = 'http://c-cassandra.tumblr.com'
4143
4144
4145
class DougWasTaken(GenericTumblrV1):
4146
    """Class to retrieve Doug Was Taken comics."""
4147
    name = 'doug'
4148
    long_name = 'Doug Was Taken'
4149
    url = 'https://dougwastaken.tumblr.com'
4150
4151
4152
class MandatoryRollerCoaster(GenericTumblrV1):
4153
    """Class to retrieve Mandatory Roller Coaster comics."""
4154
    name = 'rollercoaster'
4155
    long_name = 'Mandatory Roller Coaster'
4156
    url = 'http://mandatoryrollercoaster.com'
4157
4158
4159
class CEstPasEnRegardantSesPompes(GenericTumblrV1):
4160
    """Class to retrieve C'Est Pas En Regardant Ses Pompes (...)  comics."""
4161
    name = 'cperspqccltt'
4162
    long_name = 'C Est Pas En Regardant Ses Pompes (...)'
4163
    url = 'http://cperspqccltt.tumblr.com'
4164
4165
4166
class TheGrohlTroll(GenericTumblrV1):
4167
    """Class to retrieve The Grohl Troll comics."""
4168
    name = 'grohltroll'
4169
    long_name = 'The Grohl Troll'
4170
    url = 'http://thegrohltroll.com'
4171
4172
4173
class WebcomicName(GenericTumblrV1):
4174
    """Class to retrieve Webcomic Name comics."""
4175
    name = 'webcomicname'
4176
    long_name = 'Webcomic Name'
4177
    url = 'http://webcomicname.com'
4178
4179
4180
class BooksOfAdam(GenericTumblrV1):
4181
    """Class to retrieve Books of Adam comics."""
4182
    # Also on http://www.booksofadam.com
4183
    name = 'booksofadam'
4184
    long_name = 'Books of Adam'
4185
    url = 'http://booksofadam.tumblr.com'
4186
4187
4188
class HarkAVagrant(GenericTumblrV1):
4189
    """Class to retrieve Hark A Vagrant comics."""
4190
    # Also on http://www.harkavagrant.com
4191
    name = 'hark-tumblr'
4192
    long_name = 'Hark A Vagrant (from Tumblr)'
4193
    url = 'http://beatonna.tumblr.com'
4194
4195
4196
class OurSuperAdventureTumblr(GenericTumblrV1):
4197
    """Class to retrieve Our Super Adventure comics."""
4198
    # Also on https://tapastic.com/series/Our-Super-Adventure
4199
    # Also on http://www.oursuperadventure.com
4200
    # http://sarahgraley.com
4201
    name = 'superadventure-tumblr'
4202
    long_name = 'Our Super Adventure (from Tumblr)'
4203
    url = 'http://sarahssketchbook.tumblr.com'
4204
4205
4206
class JakeLikesOnions(GenericTumblrV1):
4207
    """Class to retrieve Jake Likes Onions comics."""
4208
    name = 'jake'
4209
    long_name = 'Jake Likes Onions'
4210
    url = 'http://jakelikesonions.com'
4211
4212
4213
class InYourFaceCake(GenericTumblrV1):
4214
    """Class to retrieve In Your Face Cake comics."""
4215
    name = 'inyourfacecake-tumblr'
4216
    long_name = 'In Your Face Cake (from Tumblr)'
4217
    url = 'https://in-your-face-cake.tumblr.com'
4218
4219
4220
class Robospunk(GenericTumblrV1):
4221
    """Class to retrieve Robospunk comics."""
4222
    name = 'robospunk'
4223
    long_name = 'Robospunk'
4224
    url = 'http://robospunk.com'
4225
4226
4227
class BananaTwinky(GenericTumblrV1):
4228
    """Class to retrieve Banana Twinky comics."""
4229
    name = 'banana'
4230
    long_name = 'Banana Twinky'
4231
    url = 'https://bananatwinky.tumblr.com'
4232
4233
4234
class YesterdaysPopcornTumblr(GenericTumblrV1):
4235
    """Class to retrieve Yesterday's Popcorn comics."""
4236
    # Also on http://www.yesterdayspopcorn.com
4237
    # Also on https://tapastic.com/series/Yesterdays-Popcorn
4238
    name = 'popcorn-tumblr'
4239
    long_name = 'Yesterday\'s Popcorn (from Tumblr)'
4240
    url = 'http://yesterdayspopcorn.tumblr.com'
4241
4242
4243
class TwistedDoodles(GenericTumblrV1):
4244
    """Class to retrieve Twisted Doodles comics."""
4245
    name = 'twisted'
4246
    long_name = 'Twisted Doodles'
4247
    url = 'http://www.twisteddoodles.com'
4248
4249
4250
class UbertoolTumblr(GenericTumblrV1):
4251
    """Class to retrieve Ubertool comics."""
4252
    # Also on http://ubertoolcomic.com
4253
    # Also on https://tapastic.com/series/ubertool
4254
    name = 'ubertool-tumblr'
4255
    long_name = 'Ubertool (from Tumblr)'
4256
    url = 'https://ubertool.tumblr.com'
4257
    _categories = ('UBERTOOL', )
4258
4259
4260
class LittleLifeLinesTumblr(GenericTumblrV1):
4261
    """Class to retrieve Little Life Lines comics."""
4262
    # Also on http://www.littlelifelines.com
4263
    name = 'life-tumblr'
4264
    long_name = 'Little Life Lines (from Tumblr)'
4265
    url = 'https://little-life-lines.tumblr.com'
4266
4267
4268
class TheyCanTalk(GenericTumblrV1):
4269
    """Class to retrieve They Can Talk comics."""
4270
    name = 'theycantalk'
4271
    long_name = 'They Can Talk'
4272
    url = 'http://theycantalk.com'
4273
4274
4275
class Will5NeverCome(GenericTumblrV1):
4276
    """Class to retrieve Will 5:00 Never Come comics."""
4277
    name = 'will5'
4278
    long_name = 'Will 5:00 Never Come ?'
4279
    url = 'http://will5nevercome.com'
4280
4281
4282
class Sephko(GenericTumblrV1):
4283
    """Class to retrieve Sephko Comics."""
4284
    # Also on http://www.sephko.com
4285
    name = 'sephko'
4286
    long_name = 'Sephko'
4287
    url = 'https://sephko.tumblr.com'
4288
4289
4290
class BlazersAtDawn(GenericTumblrV1):
4291
    """Class to retrieve Blazers At Dawn Comics."""
4292
    name = 'blazers'
4293
    long_name = 'Blazers At Dawn'
4294
    url = 'http://blazersatdawn.tumblr.com'
4295
4296
4297
class ArtByMoga(GenericEmptyComic, GenericTumblrV1):  # Deactivated because it downloads too many things
4298
    """Class to retrieve Art By Moga Comics."""
4299
    name = 'moga'
4300
    long_name = 'Art By Moga'
4301
    url = 'http://artbymoga.tumblr.com'
4302
4303
4304
class VerbalVomitTumblr(GenericTumblrV1):
4305
    """Class to retrieve Verbal Vomit comics."""
4306
    # Also on http://www.verbal-vomit.com
4307
    name = 'vomit-tumblr'
4308
    long_name = 'Verbal Vomit (from Tumblr)'
4309
    url = 'http://verbalvomits.tumblr.com'
4310
4311
4312 View Code Duplication
class LibraryComic(GenericTumblrV1):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
4313
    """Class to retrieve LibraryComic."""
4314
    # Also on http://librarycomic.com
4315
    name = 'library-tumblr'
4316
    long_name = 'LibraryComic (from Tumblr)'
4317
    url = 'https://librarycomic.tumblr.com'
4318
4319
4320
class TizzyStitchBirdTumblr(GenericTumblrV1):
4321
    """Class to retrieve Tizzy Stitch Bird comics."""
4322
    # Also on http://tizzystitchbird.com
4323
    # Also on https://tapastic.com/series/TizzyStitchbird
4324
    # Also on http://www.webtoons.com/en/challenge/tizzy-stitchbird/list?title_no=50082
4325
    name = 'tizzy-tumblr'
4326
    long_name = 'Tizzy Stitch Bird (from Tumblr)'
4327
    url = 'http://tizzystitchbird.tumblr.com'
4328
4329
4330
class VictimsOfCircumsolarTumblr(GenericTumblrV1):
4331
    """Class to retrieve VictimsOfCircumsolar comics."""
4332
    # Also on http://www.victimsofcircumsolar.com
4333
    name = 'circumsolar-tumblr'
4334
    long_name = 'Victims Of Circumsolar (from Tumblr)'
4335
    url = 'https://victimsofcomics.tumblr.com'
4336
4337
4338
class RockPaperCynicTumblr(GenericTumblrV1):
4339
    """Class to retrieve RockPaperCynic comics."""
4340
    # Also on http://www.rockpapercynic.com
4341
    # Also on https://tapastic.com/series/rockpapercynic
4342
    name = 'rpc-tumblr'
4343
    long_name = 'Rock Paper Cynic (from Tumblr)'
4344
    url = 'http://rockpapercynic.tumblr.com'
4345
4346
4347
class DeadlyPanelTumblr(GenericTumblrV1):
4348
    """Class to retrieve Deadly Panel comics."""
4349
    # Also on http://www.deadlypanel.com
4350
    # Also on https://tapastic.com/series/deadlypanel
4351
    name = 'deadly-tumblr'
4352
    long_name = 'Deadly Panel (from Tumblr)'
4353
    url = 'https://deadlypanel.tumblr.com'
4354
4355
4356
class CatanaComics(GenericTumblrV1):
4357
    """Class to retrieve Catana comics."""
4358
    name = 'catana'
4359
    long_name = 'Catana'
4360
    url = 'http://www.catanacomics.com'
4361
4362
4363
class AngryAtNothingTumblr(GenericTumblrV1):
4364
    """Class to retrieve Angry at Nothing comics."""
4365
    # Also on http://www.angryatnothing.net
4366
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
4367
    name = 'angry-tumblr'
4368
    long_name = 'Angry At Nothing (from Tumblr)'
4369
    url = 'http://angryatnothing.tumblr.com'
4370
4371
4372
class ShanghaiTango(GenericTumblrV1):
4373
    """Class to retrieve Shanghai Tango comic."""
4374
    name = 'tango'
4375
    long_name = 'Shanghai Tango'
4376
    url = 'http://tango2010weibo.tumblr.com'
4377
4378
4379
class OffTheLeashDogTumblr(GenericTumblrV1):
4380
    """Class to retrieve Off The Leash Dog comics."""
4381
    # Also on http://offtheleashdogcartoons.com
4382
    # Also on http://www.rupertfawcettcartoons.com
4383
    name = 'offtheleash-tumblr'
4384
    long_name = 'Off The Leash Dog (from Tumblr)'
4385
    url = 'http://rupertfawcettsdoggyblog.tumblr.com'
4386
    _categories = ('FAWCETT', )
4387
4388
4389
class ImogenQuestTumblr(GenericTumblrV1):
4390
    """Class to retrieve Imogen Quest comics."""
4391
    # Also on http://imogenquest.net
4392
    name = 'imogen-tumblr'
4393
    long_name = 'Imogen Quest (from Tumblr)'
4394
    url = 'http://imoquest.tumblr.com'
4395
4396
4397
class Shitfest(GenericTumblrV1):
4398
    """Class to retrieve Shitfest comics."""
4399
    name = 'shitfest'
4400
    long_name = 'Shitfest'
4401
    url = 'http://shitfestcomic.com'
4402
4403
4404
class IceCreamSandwichComics(GenericTumblrV1):
4405
    """Class to retrieve Ice Cream Sandwich Comics."""
4406
    name = 'icecream'
4407
    long_name = 'Ice Cream Sandwich Comics'
4408
    url = 'http://icecreamsandwichcomics.com'
4409
4410
4411
class Dustinteractive(GenericTumblrV1):
4412
    """Class to retrieve Dustinteractive comics."""
4413
    name = 'dustinteractive'
4414
    long_name = 'Dustinteractive'
4415
    url = 'http://dustinteractive.com'
4416
4417
4418
class StickyCinemaFloor(GenericTumblrV1):
4419
    """Class to retrieve Sticky Cinema Floor comics."""
4420
    name = 'stickycinema'
4421
    long_name = 'Sticky Cinema Floor'
4422
    url = 'https://stickycinemafloor.tumblr.com'
4423
4424
4425
class HorovitzComics(GenericListableComic):
4426
    """Generic class to handle the logic common to the different comics from Horovitz."""
4427
    url = 'http://www.horovitzcomics.com'
4428
    _categories = ('HOROVITZ', )
4429
    img_re = re.compile('.*comics/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
4430
    link_re = NotImplemented
4431
    get_url_from_archive_element = join_cls_url_to_href
4432
4433
    @classmethod
4434
    def get_comic_info(cls, soup, link):
4435
        """Get information about a particular comics."""
4436
        href = link['href']
4437
        num = int(cls.link_re.match(href).groups()[0])
4438
        title = link.string
4439
        imgs = soup.find_all('img', id='comic')
4440
        assert len(imgs) == 1
4441
        year, month, day = [int(s)
4442
                            for s in cls.img_re.match(imgs[0]['src']).groups()]
4443
        return {
4444
            'title': title,
4445
            'day': day,
4446
            'month': month,
4447
            'year': year,
4448
            'img': [i['src'] for i in imgs],
4449
            'num': num,
4450
        }
4451
4452
    @classmethod
4453
    def get_archive_elements(cls):
4454
        archive_url = 'http://www.horovitzcomics.com/comics/archive/'
4455
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.link_re))
4456
4457
4458
class HorovitzNew(HorovitzComics):
4459
    """Class to retrieve Horovitz new comics."""
4460
    name = 'horovitznew'
4461
    long_name = 'Horovitz New'
4462
    link_re = re.compile('^/comics/new/([0-9]+)$')
4463
4464
4465
class HorovitzClassic(HorovitzComics):
4466
    """Class to retrieve Horovitz classic comics."""
4467
    name = 'horovitzclassic'
4468
    long_name = 'Horovitz Classic'
4469
    link_re = re.compile('^/comics/classic/([0-9]+)$')
4470
4471
4472
class GenericGoComic(GenericNavigableComic):
4473
    """Generic class to handle the logic common to comics from gocomics.com."""
4474
    _categories = ('GOCOMIC', )
4475
4476
    @classmethod
4477
    def get_first_comic_link(cls):
4478
        """Get link to first comics."""
4479
        return get_soup_at_url(cls.url).find('a', class_='fa btn btn-outline-default btn-circle fa-backward sm ')
4480
4481
    @classmethod
4482
    def get_navi_link(cls, last_soup, next_):
4483
        """Get link to next or previous comic."""
4484
        PREV = 'fa btn btn-outline-default btn-circle fa-caret-left sm '
4485
        NEXT = 'fa btn btn-outline-default btn-circle fa-caret-right hidden-sm-up sm '
4486
        return last_soup.find('a', class_=NEXT if next_ else PREV)
4487
4488
    @classmethod
4489
    def get_url_from_link(cls, link):
4490
        gocomics = 'http://www.gocomics.com'
4491
        return urljoin_wrapper(gocomics, link['href'])
4492
4493
    @classmethod
4494
    def get_comic_info(cls, soup, link):
4495
        """Get information about a particular comics."""
4496
        date_str = soup.find('meta', property='article:published_time')['content']
4497
        day = string_to_date(date_str, "%Y-%m-%d")
4498
        imgs = soup.find('picture', class_='img-fluid item-comic-image').find_all('img')
4499
        author = soup.find('meta', property='article:author')['content']
4500
        tags = soup.find('meta', property='article:tag')['content']
4501
        return {
4502
            'day': day.day,
4503
            'month': day.month,
4504
            'year': day.year,
4505
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
4506
            'author': author,
4507
            'tags': tags,
4508
        }
4509
4510
4511
class PearlsBeforeSwine(GenericGoComic):
4512
    """Class to retrieve Pearls Before Swine comics."""
4513
    name = 'pearls'
4514
    long_name = 'Pearls Before Swine'
4515
    url = 'http://www.gocomics.com/pearlsbeforeswine'
4516
4517
4518
class Peanuts(GenericGoComic):
4519
    """Class to retrieve Peanuts comics."""
4520
    name = 'peanuts'
4521
    long_name = 'Peanuts'
4522
    url = 'http://www.gocomics.com/peanuts'
4523
4524
4525
class MattWuerker(GenericGoComic):
4526
    """Class to retrieve Matt Wuerker comics."""
4527
    name = 'wuerker'
4528
    long_name = 'Matt Wuerker'
4529
    url = 'http://www.gocomics.com/mattwuerker'
4530
4531
4532
class TomToles(GenericGoComic):
4533
    """Class to retrieve Tom Toles comics."""
4534
    name = 'toles'
4535
    long_name = 'Tom Toles'
4536
    url = 'http://www.gocomics.com/tomtoles'
4537
4538
4539
class BreakOfDay(GenericGoComic):
4540
    """Class to retrieve Break Of Day comics."""
4541
    name = 'breakofday'
4542
    long_name = 'Break Of Day'
4543
    url = 'http://www.gocomics.com/break-of-day'
4544
4545
4546
class Brevity(GenericGoComic):
4547
    """Class to retrieve Brevity comics."""
4548
    name = 'brevity'
4549
    long_name = 'Brevity'
4550
    url = 'http://www.gocomics.com/brevitypanel'
4551
4552
4553
class MichaelRamirez(GenericGoComic):
4554
    """Class to retrieve Michael Ramirez comics."""
4555
    name = 'ramirez'
4556
    long_name = 'Michael Ramirez'
4557
    url = 'http://www.gocomics.com/michaelramirez'
4558
4559
4560
class MikeLuckovich(GenericGoComic):
4561
    """Class to retrieve Mike Luckovich comics."""
4562
    name = 'luckovich'
4563
    long_name = 'Mike Luckovich'
4564
    url = 'http://www.gocomics.com/mikeluckovich'
4565
4566
4567
class JimBenton(GenericGoComic):
4568
    """Class to retrieve Jim Benton comics."""
4569
    # Also on http://jimbenton.tumblr.com
4570
    name = 'benton'
4571
    long_name = 'Jim Benton'
4572
    url = 'http://www.gocomics.com/jim-benton-cartoons'
4573
4574
4575
class TheArgyleSweater(GenericGoComic):
4576
    """Class to retrieve the Argyle Sweater comics."""
4577
    name = 'argyle'
4578
    long_name = 'Argyle Sweater'
4579
    url = 'http://www.gocomics.com/theargylesweater'
4580
4581
4582
class SunnyStreet(GenericGoComic):
4583
    """Class to retrieve Sunny Street comics."""
4584
    # Also on http://www.sunnystreetcomics.com
4585
    name = 'sunny'
4586
    long_name = 'Sunny Street'
4587
    url = 'http://www.gocomics.com/sunny-street'
4588
4589
4590
class OffTheMark(GenericGoComic):
4591
    """Class to retrieve Off The Mark comics."""
4592
    # Also on https://www.offthemark.com
4593
    name = 'offthemark'
4594
    long_name = 'Off The Mark'
4595
    url = 'http://www.gocomics.com/offthemark'
4596
4597
4598
class WuMo(GenericGoComic):
4599
    """Class to retrieve WuMo comics."""
4600
    # Also on http://wumo.com
4601
    name = 'wumo'
4602
    long_name = 'WuMo'
4603
    url = 'http://www.gocomics.com/wumo'
4604
4605
4606
class LunarBaboon(GenericGoComic):
4607
    """Class to retrieve Lunar Baboon comics."""
4608
    # Also on http://www.lunarbaboon.com
4609
    # Also on https://tapastic.com/series/Lunarbaboon
4610
    name = 'lunarbaboon'
4611
    long_name = 'Lunar Baboon'
4612
    url = 'http://www.gocomics.com/lunarbaboon'
4613
4614
4615
class SandersenGocomic(GenericGoComic):
4616
    """Class to retrieve Sarah Andersen comics."""
4617
    # Also on http://sarahcandersen.com
4618
    # Also on http://tapastic.com/series/Doodle-Time
4619
    name = 'sandersen-goc'
4620
    long_name = 'Sarah Andersen (from GoComics)'
4621
    url = 'http://www.gocomics.com/sarahs-scribbles'
4622
4623
4624
class SaturdayMorningBreakfastCerealGoComic(GenericGoComic):
4625
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
4626
    # Also on http://smbc-comics.tumblr.com
4627
    # Also on http://www.smbc-comics.com
4628
    name = 'smbc-goc'
4629
    long_name = 'Saturday Morning Breakfast Cereal (from GoComics)'
4630
    url = 'http://www.gocomics.com/saturday-morning-breakfast-cereal'
4631
    _categories = ('SMBC', )
4632
4633
4634
class CalvinAndHobbesGoComic(GenericGoComic):
4635
    """Class to retrieve Calvin and Hobbes comics."""
4636
    # From gocomics, not http://marcel-oehler.marcellosendos.ch/comics/ch/
4637
    name = 'calvin-goc'
4638
    long_name = 'Calvin and Hobbes (from GoComics)'
4639
    url = 'http://www.gocomics.com/calvinandhobbes'
4640
4641
4642
class RallGoComic(GenericGoComic):
4643
    """Class to retrieve Ted Rall comics."""
4644
    # Also on http://rall.com/comic
4645
    name = 'rall-goc'
4646
    long_name = "Ted Rall (from GoComics)"
4647
    url = "http://www.gocomics.com/ted-rall"
4648
    _categories = ('RALL', )
4649
4650
4651
class TheAwkwardYetiGoComic(GenericGoComic):
4652
    """Class to retrieve The Awkward Yeti comics."""
4653
    # Also on http://larstheyeti.tumblr.com
4654
    # Also on http://theawkwardyeti.com
4655
    # Also on https://tapastic.com/series/TheAwkwardYeti
4656
    name = 'yeti-goc'
4657
    long_name = 'The Awkward Yeti (from GoComics)'
4658
    url = 'http://www.gocomics.com/the-awkward-yeti'
4659
    _categories = ('YETI', )
4660
4661
4662
class BerkeleyMewsGoComics(GenericGoComic):
4663
    """Class to retrieve Berkeley Mews comics."""
4664
    # Also on http://mews.tumblr.com
4665
    # Also on http://www.berkeleymews.com
4666
    name = 'berkeley-goc'
4667
    long_name = 'Berkeley Mews (from GoComics)'
4668
    url = 'http://www.gocomics.com/berkeley-mews'
4669
    _categories = ('BERKELEY', )
4670
4671
4672
class SheldonGoComics(GenericGoComic):
4673
    """Class to retrieve Sheldon comics."""
4674
    # Also on http://www.sheldoncomics.com
4675
    name = 'sheldon-goc'
4676
    long_name = 'Sheldon Comics (from GoComics)'
4677
    url = 'http://www.gocomics.com/sheldon'
4678
4679
4680
class FowlLanguageGoComics(GenericGoComic):
4681
    """Class to retrieve Fowl Language comics."""
4682
    # Also on http://www.fowllanguagecomics.com
4683
    # Also on http://tapastic.com/series/Fowl-Language-Comics
4684
    # Also on http://fowllanguagecomics.tumblr.com
4685
    name = 'fowllanguage-goc'
4686
    long_name = 'Fowl Language Comics (from GoComics)'
4687
    url = 'http://www.gocomics.com/fowl-language'
4688
    _categories = ('FOWLLANGUAGE', )
4689
4690
4691
class NickAnderson(GenericGoComic):
4692
    """Class to retrieve Nick Anderson comics."""
4693
    name = 'nickanderson'
4694
    long_name = 'Nick Anderson'
4695
    url = 'http://www.gocomics.com/nickanderson'
4696
4697
4698
class GarfieldGoComics(GenericGoComic):
4699
    """Class to retrieve Garfield comics."""
4700
    # Also on http://garfield.com
4701
    name = 'garfield-goc'
4702
    long_name = 'Garfield (from GoComics)'
4703
    url = 'http://www.gocomics.com/garfield'
4704
    _categories = ('GARFIELD', )
4705
4706
4707
class DorrisMcGoComics(GenericGoComic):
4708
    """Class to retrieve Dorris Mc Comics"""
4709
    # Also on http://dorrismccomics.com
4710
    name = 'dorrismc-goc'
4711
    long_name = 'Dorris Mc (from GoComics)'
4712
    url = 'http://www.gocomics.com/dorris-mccomics'
4713
4714
4715
class FoxTrot(GenericGoComic):
4716
    """Class to retrieve FoxTrot comics."""
4717
    name = 'foxtrot'
4718
    long_name = 'FoxTrot'
4719
    url = 'http://www.gocomics.com/foxtrot'
4720
4721
4722
class FoxTrotClassics(GenericGoComic):
4723
    """Class to retrieve FoxTrot Classics comics."""
4724
    name = 'foxtrot-classics'
4725
    long_name = 'FoxTrot Classics'
4726
    url = 'http://www.gocomics.com/foxtrotclassics'
4727
4728
4729
class MisterAndMeGoComics(GenericEmptyComic, GenericGoComic):  # Removed ?
4730
    """Class to retrieve Mister & Me Comics."""
4731
    # Also on http://www.mister-and-me.com
4732
    # Also on https://tapastic.com/series/Mister-and-Me
4733
    name = 'mister-goc'
4734
    long_name = 'Mister & Me (from GoComics)'
4735
    url = 'http://www.gocomics.com/mister-and-me'
4736
4737
4738
class NonSequitur(GenericGoComic):
4739
    """Class to retrieve Non Sequitur (Wiley Miller) comics."""
4740
    name = 'nonsequitur'
4741
    long_name = 'Non Sequitur'
4742
    url = 'http://www.gocomics.com/nonsequitur'
4743
4744
4745
class GenericTapasticComic(GenericListableComic):
4746
    """Generic class to handle the logic common to comics from tapastic.com."""
4747
    _categories = ('TAPASTIC', )
4748
4749
    @classmethod
4750
    def get_comic_info(cls, soup, archive_elt):
4751
        """Get information about a particular comics."""
4752
        timestamp = int(archive_elt['publishDate']) / 1000.0
4753
        day = datetime.datetime.fromtimestamp(timestamp).date()
4754
        imgs = soup.find_all('img', class_='art-image')
4755
        if not imgs:
4756
            print("Comic %s is being uploaded, retry later" % cls.get_url_from_archive_element(archive_elt))
4757
            return None
4758
        assert len(imgs) > 0
4759
        return {
4760
            'day': day.day,
4761
            'year': day.year,
4762
            'month': day.month,
4763
            'img': [i['src'] for i in imgs],
4764
            'title': archive_elt['title'],
4765
        }
4766
4767
    @classmethod
4768
    def get_url_from_archive_element(cls, archive_elt):
4769
        return 'http://tapastic.com/episode/' + str(archive_elt['id'])
4770
4771
    @classmethod
4772
    def get_archive_elements(cls):
4773
        pref, suff = 'episodeList : ', ','
4774
        # Information is stored in the javascript part
4775
        # I don't know the clean way to get it so this is the ugly way.
4776
        string = [s[len(pref):-len(suff)] for s in (s.decode('utf-8').strip() for s in urlopen_wrapper(cls.url).readlines()) if s.startswith(pref) and s.endswith(suff)][0]
4777
        return json.loads(string)
4778
4779
4780
class VegetablesForDessert(GenericTapasticComic):
4781
    """Class to retrieve Vegetables For Dessert comics."""
4782
    # Also on http://vegetablesfordessert.tumblr.com
4783
    name = 'vegetables'
4784
    long_name = 'Vegetables For Dessert'
4785
    url = 'http://tapastic.com/series/vegetablesfordessert'
4786
4787
4788
class FowlLanguageTapa(GenericTapasticComic):
4789
    """Class to retrieve Fowl Language comics."""
4790
    # Also on http://www.fowllanguagecomics.com
4791
    # Also on http://fowllanguagecomics.tumblr.com
4792
    # Also on http://www.gocomics.com/fowl-language
4793
    name = 'fowllanguage-tapa'
4794
    long_name = 'Fowl Language Comics (from Tapastic)'
4795
    url = 'http://tapastic.com/series/Fowl-Language-Comics'
4796
    _categories = ('FOWLLANGUAGE', )
4797
4798
4799
class OscillatingProfundities(GenericTapasticComic):
4800
    """Class to retrieve Oscillating Profundities comics."""
4801
    name = 'oscillating'
4802
    long_name = 'Oscillating Profundities'
4803
    url = 'http://tapastic.com/series/oscillatingprofundities'
4804
4805
4806
class ZnoflatsComics(GenericTapasticComic):
4807
    """Class to retrieve Znoflats comics."""
4808
    name = 'znoflats'
4809
    long_name = 'Znoflats Comics'
4810
    url = 'http://tapastic.com/series/Znoflats-Comics'
4811
4812
4813
class SandersenTapastic(GenericTapasticComic):
4814
    """Class to retrieve Sarah Andersen comics."""
4815
    # Also on http://sarahcandersen.com
4816
    # Also on http://www.gocomics.com/sarahs-scribbles
4817
    name = 'sandersen-tapa'
4818
    long_name = 'Sarah Andersen (from Tapastic)'
4819
    url = 'http://tapastic.com/series/Doodle-Time'
4820
4821
4822
class TubeyToonsTapastic(GenericTapasticComic):
4823
    """Class to retrieve TubeyToons comics."""
4824
    # Also on http://tubeytoons.com
4825
    # Also on https://tubeytoons.tumblr.com
4826
    name = 'tubeytoons-tapa'
4827
    long_name = 'Tubey Toons (from Tapastic)'
4828
    url = 'http://tapastic.com/series/Tubey-Toons'
4829
    _categories = ('TUNEYTOONS', )
4830
4831
4832
class AnythingComicTapastic(GenericTapasticComic):
4833
    """Class to retrieve Anything Comics."""
4834
    # Also on http://www.anythingcomic.com
4835
    name = 'anythingcomic-tapa'
4836
    long_name = 'Anything Comic (from Tapastic)'
4837
    url = 'http://tapastic.com/series/anything'
4838
4839
4840
class UnearthedComicsTapastic(GenericTapasticComic):
4841
    """Class to retrieve Unearthed comics."""
4842
    # Also on http://unearthedcomics.com
4843
    # Also on https://unearthedcomics.tumblr.com
4844
    name = 'unearthed-tapa'
4845
    long_name = 'Unearthed Comics (from Tapastic)'
4846
    url = 'http://tapastic.com/series/UnearthedComics'
4847
    _categories = ('UNEARTHED', )
4848
4849
4850
class EverythingsStupidTapastic(GenericTapasticComic):
4851
    """Class to retrieve Everything's stupid Comics."""
4852
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
4853
    # Also on http://everythingsstupid.net
4854
    name = 'stupid-tapa'
4855
    long_name = "Everything's Stupid (from Tapastic)"
4856
    url = 'http://tapastic.com/series/EverythingsStupid'
4857
4858
4859
class JustSayEhTapastic(GenericTapasticComic):
4860
    """Class to retrieve Just Say Eh comics."""
4861
    # Also on http://www.justsayeh.com
4862
    name = 'justsayeh-tapa'
4863
    long_name = 'Just Say Eh (from Tapastic)'
4864
    url = 'http://tapastic.com/series/Just-Say-Eh'
4865
4866
4867
class ThorsThundershackTapastic(GenericTapasticComic):
4868
    """Class to retrieve Thor's Thundershack comics."""
4869
    # Also on http://www.thorsthundershack.com
4870
    name = 'thor-tapa'
4871
    long_name = 'Thor\'s Thundershack (from Tapastic)'
4872
    url = 'http://tapastic.com/series/Thors-Thundershac'
4873
    _categories = ('THOR', )
4874
4875
4876
class OwlTurdTapastic(GenericTapasticComic):
4877
    """Class to retrieve Owl Turd comics."""
4878
    # Also on http://owlturd.com
4879
    name = 'owlturd-tapa'
4880
    long_name = 'Owl Turd (from Tapastic)'
4881
    url = 'http://tapastic.com/series/Owl-Turd-Comix'
4882
    _categories = ('OWLTURD', )
4883
4884
4885
class GoneIntoRaptureTapastic(GenericTapasticComic):
4886
    """Class to retrieve Gone Into Rapture comics."""
4887
    # Also on http://goneintorapture.tumblr.com
4888
    # Also on http://goneintorapture.com
4889
    name = 'rapture-tapa'
4890
    long_name = 'Gone Into Rapture (from Tapastic)'
4891
    url = 'http://tapastic.com/series/Goneintorapture'
4892
4893
4894
class HeckIfIKnowComicsTapa(GenericTapasticComic):
4895
    """Class to retrieve Heck If I Know Comics."""
4896
    # Also on http://heckifiknowcomics.com
4897
    name = 'heck-tapa'
4898
    long_name = 'Heck if I Know comics (from Tapastic)'
4899
    url = 'http://tapastic.com/series/Regular'
4900
4901
4902
class CheerUpEmoKidTapa(GenericTapasticComic):
4903
    """Class to retrieve CheerUpEmoKid comics."""
4904
    # Also on http://www.cheerupemokid.com
4905
    # Also on https://enzocomics.tumblr.com
4906
    name = 'cuek-tapa'
4907
    long_name = 'Cheer Up Emo Kid (from Tapastic)'
4908
    url = 'http://tapastic.com/series/CUEK'
4909
4910
4911
class BigFootJusticeTapa(GenericTapasticComic):
4912
    """Class to retrieve Big Foot Justice comics."""
4913
    # Also on http://bigfootjustice.com
4914
    name = 'bigfoot-tapa'
4915
    long_name = 'Big Foot Justice (from Tapastic)'
4916
    url = 'http://tapastic.com/series/bigfoot-justice'
4917
4918
4919
class UpAndOutTapa(GenericTapasticComic):
4920
    """Class to retrieve Up & Out comics."""
4921
    # Also on http://upandoutcomic.tumblr.com
4922
    name = 'upandout-tapa'
4923
    long_name = 'Up And Out (from Tapastic)'
4924
    url = 'http://tapastic.com/series/UP-and-OUT'
4925
4926
4927
class ToonHoleTapa(GenericTapasticComic):
4928
    """Class to retrieve Toon Holes comics."""
4929
    # Also on http://www.toonhole.com
4930
    name = 'toonhole-tapa'
4931
    long_name = 'Toon Hole (from Tapastic)'
4932
    url = 'http://tapastic.com/series/TOONHOLE'
4933
4934
4935
class AngryAtNothingTapa(GenericTapasticComic):
4936
    """Class to retrieve Angry at Nothing comics."""
4937
    # Also on http://www.angryatnothing.net
4938
    # Also on http://angryatnothing.tumblr.com
4939
    name = 'angry-tapa'
4940
    long_name = 'Angry At Nothing (from Tapastic)'
4941
    url = 'http://tapastic.com/series/Comics-yeah-definitely-comics-'
4942
4943
4944
class LeleozTapa(GenericTapasticComic):
4945
    """Class to retrieve Leleoz comics."""
4946
    # Also on http://leleozcomics.tumblr.com
4947
    name = 'leleoz-tapa'
4948
    long_name = 'Leleoz (from Tapastic)'
4949
    url = 'https://tapastic.com/series/Leleoz'
4950
4951
4952
class TheAwkwardYetiTapa(GenericTapasticComic):
4953
    """Class to retrieve The Awkward Yeti comics."""
4954
    # Also on http://www.gocomics.com/the-awkward-yeti
4955
    # Also on http://theawkwardyeti.com
4956
    # Also on http://larstheyeti.tumblr.com
4957
    name = 'yeti-tapa'
4958
    long_name = 'The Awkward Yeti (from Tapastic)'
4959
    url = 'https://tapastic.com/series/TheAwkwardYeti'
4960
    _categories = ('YETI', )
4961
4962
4963
class AsPerUsualTapa(GenericTapasticComic):
4964
    """Class to retrieve As Per Usual comics."""
4965
    # Also on http://as-per-usual.tumblr.com
4966
    name = 'usual-tapa'
4967
    long_name = 'As Per Usual (from Tapastic)'
4968
    url = 'https://tapastic.com/series/AsPerUsual'
4969
    categories = ('DAMILEE', )
4970
4971
4972
class HotComicsForCoolPeopleTapa(GenericTapasticComic):
4973
    """Class to retrieve Hot Comics For Cool People."""
4974
    # Also on http://hotcomicsforcoolpeople.tumblr.com
4975
    # Also on http://hotcomics.biz (links to tumblr)
4976
    # Also on http://hcfcp.com (links to tumblr)
4977
    name = 'hotcomics-tapa'
4978
    long_name = 'Hot Comics For Cool People (from Tapastic)'
4979
    url = 'https://tapastic.com/series/Hot-Comics-For-Cool-People'
4980
    categories = ('DAMILEE', )
4981
4982
4983
class OneOneOneOneComicTapa(GenericTapasticComic):
4984
    """Class to retrieve 1111 Comics."""
4985
    # Also on http://www.1111comics.me
4986
    # Also on http://comics1111.tumblr.com
4987
    name = '1111-tapa'
4988
    long_name = '1111 Comics (from Tapastic)'
4989
    url = 'https://tapastic.com/series/1111-Comics'
4990
    _categories = ('ONEONEONEONE', )
4991
4992
4993
class TumbleDryTapa(GenericTapasticComic):
4994
    """Class to retrieve Tumble Dry comics."""
4995
    # Also on http://tumbledrycomics.com
4996
    name = 'tumbledry-tapa'
4997
    long_name = 'Tumblr Dry (from Tapastic)'
4998
    url = 'https://tapastic.com/series/TumbleDryComics'
4999
5000
5001
class DeadlyPanelTapa(GenericTapasticComic):
5002
    """Class to retrieve Deadly Panel comics."""
5003
    # Also on http://www.deadlypanel.com
5004
    # Also on https://deadlypanel.tumblr.com
5005
    name = 'deadly-tapa'
5006
    long_name = 'Deadly Panel (from Tapastic)'
5007
    url = 'https://tapastic.com/series/deadlypanel'
5008
5009
5010
class ChrisHallbeckMaxiTapa(GenericTapasticComic):
5011
    """Class to retrieve Chris Hallbeck comics."""
5012
    # Also on https://chrishallbeck.tumblr.com
5013
    # Also on http://maximumble.com
5014
    name = 'hallbeckmaxi-tapa'
5015
    long_name = 'Chris Hallback - Maximumble (from Tapastic)'
5016
    url = 'https://tapastic.com/series/Maximumble'
5017
    _categories = ('HALLBACK', )
5018
5019
5020
class ChrisHallbeckMiniTapa(GenericEmptyComic, GenericTapasticComic):
5021
    """Class to retrieve Chris Hallbeck comics."""
5022
    # Also on https://chrishallbeck.tumblr.com
5023
    # Also on http://minimumble.com
5024
    name = 'hallbeckmini-tapa'
5025
    long_name = 'Chris Hallback - Minimumble (from Tapastic)'
5026
    url = 'https://tapastic.com/series/Minimumble'
5027
    _categories = ('HALLBACK', )
5028
5029
5030
class ChrisHallbeckBiffTapa(GenericEmptyComic, GenericTapasticComic):
5031
    """Class to retrieve Chris Hallbeck comics."""
5032
    # Also on https://chrishallbeck.tumblr.com
5033
    # Also on http://thebookofbiff.com
5034
    name = 'hallbeckbiff-tapa'
5035
    long_name = 'Chris Hallback - The Book of Biff (from Tapastic)'
5036
    url = 'https://tapastic.com/series/Biff'
5037
    _categories = ('HALLBACK', )
5038
5039
5040
class RandoWisTapa(GenericTapasticComic):
5041
    """Class to retrieve RandoWis comics."""
5042
    # Also on https://randowis.com
5043
    name = 'randowis-tapa'
5044
    long_name = 'RandoWis (from Tapastic)'
5045
    url = 'https://tapastic.com/series/RandoWis'
5046
5047
5048
class PigeonGazetteTapa(GenericTapasticComic):
5049
    """Class to retrieve The Pigeon Gazette comics."""
5050
    # Also on http://thepigeongazette.tumblr.com
5051
    name = 'pigeon-tapa'
5052
    long_name = 'The Pigeon Gazette (from Tapastic)'
5053
    url = 'https://tapastic.com/series/The-Pigeon-Gazette'
5054
5055
5056
class TheOdd1sOutTapa(GenericTapasticComic):
5057
    """Class to retrieve The Odd 1s Out comics."""
5058
    # Also on http://theodd1sout.com
5059
    # Also on http://theodd1sout.tumblr.com
5060
    name = 'theodd-tapa'
5061
    long_name = 'The Odd 1s Out (from Tapastic)'
5062
    url = 'https://tapastic.com/series/Theodd1sout'
5063
5064
5065
class TheWorldIsFlatTapa(GenericTapasticComic):
5066
    """Class to retrieve The World Is Flat Comics."""
5067
    # Also on http://theworldisflatcomics.tumblr.com
5068
    name = 'flatworld-tapa'
5069
    long_name = 'The World Is Flat (from Tapastic)'
5070
    url = 'https://tapastic.com/series/The-World-is-Flat'
5071
5072
5073
class MisterAndMeTapa(GenericTapasticComic):
5074
    """Class to retrieve Mister & Me Comics."""
5075
    # Also on http://www.mister-and-me.com
5076
    # Also on http://www.gocomics.com/mister-and-me
5077
    name = 'mister-tapa'
5078
    long_name = 'Mister & Me (from Tapastic)'
5079
    url = 'https://tapastic.com/series/Mister-and-Me'
5080
5081
5082
class TalesOfAbsurdityTapa(GenericTapasticComic):
5083
    """Class to retrieve Tales Of Absurdity comics."""
5084
    # Also on http://talesofabsurdity.com
5085
    # Also on http://talesofabsurdity.tumblr.com
5086
    name = 'absurdity-tapa'
5087
    long_name = 'Tales of Absurdity (from Tapastic)'
5088
    url = 'http://tapastic.com/series/Tales-Of-Absurdity'
5089
    _categories = ('ABSURDITY', )
5090
5091
5092
class BFGFSTapa(GenericTapasticComic):
5093
    """Class to retrieve BFGFS comics."""
5094
    # Also on http://bfgfs.com
5095
    # Also on https://bfgfs.tumblr.com
5096
    name = 'bfgfs-tapa'
5097
    long_name = 'BFGFS (from Tapastic)'
5098
    url = 'https://tapastic.com/series/BFGFS'
5099
5100
5101
class DoodleForFoodTapa(GenericTapasticComic):
5102
    """Class to retrieve Doodle For Food comics."""
5103
    # Also on http://www.doodleforfood.com
5104
    name = 'doodle-tapa'
5105
    long_name = 'Doodle For Food (from Tapastic)'
5106
    url = 'https://tapastic.com/series/Doodle-for-Food'
5107
5108
5109
class MrLovensteinTapa(GenericTapasticComic):
5110
    """Class to retrieve Mr Lovenstein comics."""
5111
    # Also on  https://tapastic.com/series/MrLovenstein
5112
    name = 'mrlovenstein-tapa'
5113
    long_name = 'Mr. Lovenstein (from Tapastic)'
5114
    url = 'https://tapastic.com/series/MrLovenstein'
5115
5116
5117
class CassandraCalinTapa(GenericTapasticComic):
5118
    """Class to retrieve C. Cassandra comics."""
5119
    # Also on http://cassandracalin.com
5120
    # Also on http://c-cassandra.tumblr.com
5121
    name = 'cassandra-tapa'
5122
    long_name = 'Cassandra Calin (from Tapastic)'
5123
    url = 'https://tapastic.com/series/C-Cassandra-comics'
5124
5125
5126
class WafflesAndPancakes(GenericTapasticComic):
5127
    """Class to retrieve Waffles And Pancakes comics."""
5128
    # Also on http://wandpcomic.com
5129
    name = 'waffles'
5130
    long_name = 'Waffles And Pancakes'
5131
    url = 'https://tapastic.com/series/Waffles-and-Pancakes'
5132
5133
5134
class YesterdaysPopcornTapastic(GenericTapasticComic):
5135
    """Class to retrieve Yesterday's Popcorn comics."""
5136
    # Also on http://www.yesterdayspopcorn.com
5137
    # Also on http://yesterdayspopcorn.tumblr.com
5138
    name = 'popcorn-tapa'
5139
    long_name = 'Yesterday\'s Popcorn (from Tapastic)'
5140
    url = 'https://tapastic.com/series/Yesterdays-Popcorn'
5141
5142
5143
class OurSuperAdventureTapastic(GenericEmptyComic, GenericTapasticComic):
5144
    """Class to retrieve Our Super Adventure comics."""
5145
    # Also on http://www.oursuperadventure.com
5146
    # http://sarahssketchbook.tumblr.com
5147
    # http://sarahgraley.com
5148
    name = 'superadventure-tapastic'
5149
    long_name = 'Our Super Adventure (from Tapastic)'
5150
    url = 'https://tapastic.com/series/Our-Super-Adventure'
5151
5152
5153
class NamelessPCs(GenericTapasticComic):
5154
    """Class to retrieve Nameless PCs comics."""
5155
    # Also on http://namelesspcs.com
5156
    name = 'namelesspcs-tapa'
5157
    long_name = 'NamelessPCs (from Tapastic)'
5158
    url = 'https://tapastic.com/series/NamelessPC'
5159
5160
5161
class DownTheUpwardSpiralTapa(GenericTapasticComic):
5162
    """Class to retrieve Down The Upward Spiral comics."""
5163
    # Also on http://www.downtheupwardspiral.com
5164
    # Also on http://downtheupwardspiral.tumblr.com
5165
    name = 'spiral-tapa'
5166
    long_name = 'Down the Upward Spiral (from Tapastic)'
5167
    url = 'https://tapastic.com/series/Down-the-Upward-Spiral'
5168
5169
5170
class UbertoolTapa(GenericTapasticComic):
5171
    """Class to retrieve Ubertool comics."""
5172
    # Also on http://ubertoolcomic.com
5173
    # Also on https://ubertool.tumblr.com
5174
    name = 'ubertool-tapa'
5175
    long_name = 'Ubertool (from Tapastic)'
5176
    url = 'https://tapastic.com/series/ubertool'
5177
    _categories = ('UBERTOOL', )
5178
5179
5180
class BarteNerdsTapa(GenericTapasticComic):
5181
    """Class to retrieve BarteNerds comics."""
5182
    # Also on http://www.bartenerds.com
5183
    name = 'bartenerds-tapa'
5184
    long_name = 'BarteNerds (from Tapastic)'
5185
    url = 'https://tapastic.com/series/BarteNERDS'
5186
5187
5188
class SmallBlueYonderTapa(GenericTapasticComic):
5189
    """Class to retrieve Small Blue Yonder comics."""
5190
    # Also on http://www.smallblueyonder.com
5191
    name = 'smallblue-tapa'
5192
    long_name = 'Small Blue Yonder (from Tapastic)'
5193
    url = 'https://tapastic.com/series/Small-Blue-Yonder'
5194
5195
5196
class TizzyStitchBirdTapa(GenericTapasticComic):
5197
    """Class to retrieve Tizzy Stitch Bird comics."""
5198
    # Also on http://tizzystitchbird.com
5199
    # Also on http://tizzystitchbird.tumblr.com
5200
    # Also on http://www.webtoons.com/en/challenge/tizzy-stitchbird/list?title_no=50082
5201
    name = 'tizzy-tapa'
5202
    long_name = 'Tizzy Stitch Bird (from Tapastic)'
5203
    url = 'https://tapastic.com/series/TizzyStitchbird'
5204
5205
5206
class RockPaperCynicTapa(GenericTapasticComic):
5207
    """Class to retrieve RockPaperCynic comics."""
5208
    # Also on http://www.rockpapercynic.com
5209
    # Also on http://rockpapercynic.tumblr.com
5210
    name = 'rpc-tapa'
5211
    long_name = 'Rock Paper Cynic (from Tapastic)'
5212
    url = 'https://tapastic.com/series/rockpapercynic'
5213
5214
5215
class ItsTheTieTapa(GenericTapasticComic):
5216
    """Class to retrieve It's the tie comics."""
5217
    # Also on http://itsthetie.com
5218
    # Also on http://itsthetie.tumblr.com
5219
    name = 'tie-tapa'
5220
    long_name = "It's the tie (from Tapastic)"
5221
    url = "https://tapastic.com/series/itsthetie"
5222
    _categories = ('TIE', )
5223
5224
5225
def get_subclasses(klass):
5226
    """Gets the list of direct/indirect subclasses of a class"""
5227
    subclasses = klass.__subclasses__()
5228
    for derived in list(subclasses):
5229
        subclasses.extend(get_subclasses(derived))
5230
    return subclasses
5231
5232
5233
def remove_st_nd_rd_th_from_date(string):
5234
    """Function to transform 1st/2nd/3rd/4th in a parsable date format."""
5235
    # Hackish way to convert string with numeral "1st"/"2nd"/etc to date
5236
    return (string.replace('st', '')
5237
            .replace('nd', '')
5238
            .replace('rd', '')
5239
            .replace('th', '')
5240
            .replace('Augu', 'August'))
5241
5242
5243
def string_to_date(string, date_format, local=DEFAULT_LOCAL):
5244
    """Function to convert string to date object.
5245
    Wrapper around datetime.datetime.strptime."""
5246
    # format described in https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior
5247
    prev_locale = locale.setlocale(locale.LC_ALL)
5248
    if local != prev_locale:
5249
        locale.setlocale(locale.LC_ALL, local)
5250
    ret = datetime.datetime.strptime(string, date_format).date()
5251
    if local != prev_locale:
5252
        locale.setlocale(locale.LC_ALL, prev_locale)
5253
    return ret
5254
5255
5256
COMICS = set(get_subclasses(GenericComic))
5257
VALID_COMICS = [c for c in COMICS if c.name is not None]
5258
COMIC_NAMES = {c.name: c for c in VALID_COMICS}
5259
assert len(VALID_COMICS) == len(COMIC_NAMES)
5260
CLASS_NAMES = {c.__name__ for c in VALID_COMICS}
5261
assert len(VALID_COMICS) == len(CLASS_NAMES)
5262