Completed
Push — master ( c3d761...46e76e )
by De
01:17
created

comics.py (22 issues)

Code
1
#! /usr/bin/python3
2
# vim: set expandtab tabstop=4 shiftwidth=4 :
3
"""Module to retrieve webcomics"""
4
5
from comic_abstract import GenericComic, get_date_for_comic
6
import re
7
from datetime import date, timedelta
8
import datetime
9
from urlfunctions import get_soup_at_url, urljoin_wrapper,\
10
    convert_iri_to_plain_ascii_uri, load_json_at_url, urlopen_wrapper
11
import json
12
import locale
13
import urllib
14
15
DEFAULT_LOCAL = 'en_GB.UTF-8'
16
17
18
class Xkcd(GenericComic):
19
    """Class to retrieve Xkcd comics."""
20
    name = 'xkcd'
21
    long_name = 'xkcd'
22
    url = 'http://xkcd.com'
23
24
    @classmethod
25
    def get_next_comic(cls, last_comic):
26
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
27
        first_num = last_comic['num'] if last_comic else 0
28
        last_num = load_json_at_url(
29
            urljoin_wrapper(cls.url, 'info.0.json'))['num']
30
31
        for num in range(first_num + 1, last_num + 1):
32
            if num != 404:
33
                json_url = urljoin_wrapper(cls.url, '%d/info.0.json' % num)
34
                comic = load_json_at_url(json_url)
35
                comic['img'] = [comic['img']]
36
                comic['prefix'] = '%d-' % num
37
                comic['json_url'] = json_url
38
                comic['url'] = urljoin_wrapper(cls.url, str(num))
39
                comic['day'] = int(comic['day'])
40
                comic['month'] = int(comic['month'])
41
                comic['year'] = int(comic['year'])
42
                assert comic['num'] == num
43
                yield comic
44
45
46
# Helper functions corresponding to get_url_from_link/get_url_from_archive_element
47
48
49
@classmethod
50
def get_href(cls, link):
51
    """Implementation of get_url_from_link/get_url_from_archive_element."""
52
    return link['href']
53
54
55
@classmethod
56
def join_cls_url_to_href(cls, link):
57
    """Implementation of get_url_from_link/get_url_from_archive_element."""
58
    return urljoin_wrapper(cls.url, link['href'])
59
60
61
class GenericNavigableComic(GenericComic):
62
    """Generic class for "navigable" comics : with first/next arrows.
63
64
    This class applies to comic where previous and next comics can be
65
    accessed from a given comic. Once given a starting point (either
66
    the first comic or the last comic retrieved), it will handle the
67
    navigation, the retrieval of the soup object and the setting of
68
    the 'url' attribute on retrieved comics. This limits a lot the
69
    amount of boilerplate code in the different implementation classes.
70
71
    The method `get_next_comic` methods is implemented in terms of new
72
    more specialized methods to be implemented/overridden:
73
        - get_first_comic_link
74
        - get_navi_link
75
        - get_comic_info
76
        - get_url_from_link
77
    """
78
    _categories = ('NAVIGABLE', )
79
80
    @classmethod
81
    def get_first_comic_link(cls):
82
        """Get link to first comics.
83
84
        Sometimes this can be retrieved of any comic page, sometimes on
85
        the archive page, sometimes it doesn't exist at all and one has
86
        to iterate backward to find it before hardcoding the result found.
87
        """
88
        raise NotImplementedError
89
90
    @classmethod
91
    def get_navi_link(cls, last_soup, next_):
92
        """Get link to next (or previous - for dev purposes) comic."""
93
        raise NotImplementedError
94
95
    @classmethod
96
    def get_comic_info(cls, soup, link):
97
        """Get information about a particular comics."""
98
        raise NotImplementedError
99
100
    @classmethod
101
    def get_url_from_link(cls, link):
102
        """Get url corresponding to a link. Default implementation is similar to get_href."""
103
        return link['href']
104
105
    @classmethod
106
    def get_next_link(cls, last_soup):
107
        """Get link to next comic."""
108
        return cls.get_navi_link(last_soup, True)
109
110
    @classmethod
111
    def get_prev_link(cls, last_soup):
112
        """Get link to previous comic."""
113
        return cls.get_navi_link(last_soup, False)
114
115
    @classmethod
116
    def get_next_comic(cls, last_comic):
117
        """Generic implementation of get_next_comic for navigable comics."""
118
        url = last_comic['url'] if last_comic else None
119
        next_comic = \
120
            cls.get_next_link(get_soup_at_url(url)) \
121
            if url else \
122
            cls.get_first_comic_link()
123
        cls.log("next/first comic will be %s (url is %s)" % (str(next_comic), url))
124
        while next_comic:
125
            prev_url, url = url, cls.get_url_from_link(next_comic)
126
            if prev_url == url:
127
                cls.log("got same url %s" % url)
128
                break
129
            cls.log("about to get %s (%s)" % (url, str(next_comic)))
130
            soup = get_soup_at_url(url)
131
            comic = cls.get_comic_info(soup, next_comic)
132
            if comic is not None:
133
                assert 'url' not in comic
134
                comic['url'] = url
135
                yield comic
136
            next_comic = cls.get_next_link(soup)
137
            cls.log("next comic will be %s" % str(next_comic))
138
139
    @classmethod
140
    def check_first_link(cls):
141
        """Check that navigation to first comic seems to be working - for dev purposes."""
142
        cls.log("about to check first link")
143
        ok = True
144
        firstlink = cls.get_first_comic_link()
145
        if firstlink is None:
146
            print("From %s : no first link" % cls.url)
147
            ok = False
148
        else:
149
            firsturl = cls.get_url_from_link(firstlink)
150
            try:
151
                get_soup_at_url(firsturl)
152
            except urllib.error.HTTPError:
153
                print("From %s : invalid first url" % cls.url)
154
                ok = False
155
        cls.log("checked first link -> returned %d" % ok)
156
        return ok
157
158
    @classmethod
159
    def check_prev_next_links(cls, url):
160
        """Check that navigation to prev/next from a given URL seems to be working - for dev purposes."""
161
        cls.log("about to check prev/next from %s" % url)
162
        ok = True
163
        if url is None:
164
            prevlink, nextlink = None, None
165
        else:
166
            soup = get_soup_at_url(url)
167
            prevlink, nextlink = cls.get_prev_link(soup), cls.get_next_link(soup)
168
        if prevlink is None and nextlink is None:
169
            print("From %s : no previous nor next" % url)
170
            ok = False
171
        else:
172
            if prevlink:
173
                prevurl = cls.get_url_from_link(prevlink)
174
                prevsoup = get_soup_at_url(prevurl)
175
                prevnext = cls.get_url_from_link(cls.get_next_link(prevsoup))
176
                if prevnext != url:
177
                    print("From %s, going backward then forward leads to %s" % (url, prevnext))
178
                    ok = False
179
            if nextlink:
180
                nexturl = cls.get_url_from_link(nextlink)
181
                if nexturl != url:
182
                    nextsoup = get_soup_at_url(nexturl)
183
                    nextprev = cls.get_url_from_link(cls.get_prev_link(nextsoup))
184
                    if nextprev != url:
185
                        print("From %s, going forward then backward leads to %s" % (url, nextprev))
186
                        ok = False
187
        cls.log("checked prev/next from %s -> returned %d" % (url, ok))
188
        return ok
189
190
    @classmethod
191
    def check_navigation(cls, url):
192
        """Check that navigation functions seem to be working - for dev purposes."""
193
        cls.log("about to check navigation from %s" % url)
194
        first = cls.check_first_link()
195
        prevnext = cls.check_prev_next_links(url)
196
        ok = first and prevnext
197
        cls.log("checked navigation from %s -> returned %d" % (url, ok))
198
        return ok
199
200
201
class GenericListableComic(GenericComic):
202
    """Generic class for "listable" comics : with a list of comics (aka 'archive')
203
204
    The method `get_next_comic` methods is implemented in terms of new
205
    more specialized methods to be implemented/overridden:
206
        - get_archive_elements
207
        - get_url_from_archive_element
208
        - get_comic_info
209
    """
210
    _categories = ('LISTABLE', )
211
212
    @classmethod
213
    def get_archive_elements(cls):
214
        """Get the archive elements (iterable)."""
215
        raise NotImplementedError
216
217
    @classmethod
218
    def get_url_from_archive_element(cls, archive_elt):
219
        """Get url corresponding to an archive element."""
220
        raise NotImplementedError
221
222
    @classmethod
223
    def get_comic_info(cls, soup, archive_elt):
224
        """Get information about a particular comics."""
225
        raise NotImplementedError
226
227
    @classmethod
228
    def get_next_comic(cls, last_comic):
229
        """Generic implementation of get_next_comic for listable comics."""
230
        waiting_for_url = last_comic['url'] if last_comic else None
231
        for archive_elt in cls.get_archive_elements():
232
            url = cls.get_url_from_archive_element(archive_elt)
233
            cls.log("considering %s" % url)
234
            if waiting_for_url and waiting_for_url == url:
235
                waiting_for_url = None
236
            elif waiting_for_url is None:
237
                cls.log("about to get %s (%s)" % (url, str(archive_elt)))
238
                soup = get_soup_at_url(url)
239
                comic = cls.get_comic_info(soup, archive_elt)
240
                if comic is not None:
241
                    assert 'url' not in comic
242
                    comic['url'] = url
243
                    yield comic
244
        if waiting_for_url is not None:
245
            print("Did not find %s : there might be a problem" % waiting_for_url)
246
247
# Helper functions corresponding to get_first_comic_link/get_navi_link
248
249
250
@classmethod
251
def get_link_rel_next(cls, last_soup, next_):
252
    """Implementation of get_navi_link."""
253
    return last_soup.find('link', rel='next' if next_ else 'prev')
254
255
256
@classmethod
257
def get_a_rel_next(cls, last_soup, next_):
258
    """Implementation of get_navi_link."""
259
    return last_soup.find('a', rel='next' if next_ else 'prev')
260
261
262
@classmethod
263
def get_a_navi_navinext(cls, last_soup, next_):
264
    """Implementation of get_navi_link."""
265
    return last_soup.find('a', class_='navi navi-next' if next_ else 'navi navi-prev')
266
267
268
@classmethod
269
def get_a_navi_comicnavnext_navinext(cls, last_soup, next_):
270
    """Implementation of get_navi_link."""
271
    return last_soup.find('a', class_='navi comic-nav-next navi-next' if next_ else 'navi comic-nav-previous navi-prev')
272
273
274
@classmethod
275
def get_a_comicnavbase_comicnavnext(cls, last_soup, next_):
276
    """Implementation of get_navi_link."""
277
    return last_soup.find('a', class_='comic-nav-base comic-nav-next' if next_ else 'comic-nav-base comic-nav-previous')
278
279
280
@classmethod
281
def get_a_navi_navifirst(cls):
282
    """Implementation of get_first_comic_link."""
283
    return get_soup_at_url(cls.url).find('a', class_='navi navi-first')
284
285
286
@classmethod
287
def get_div_navfirst_a(cls):
288
    """Implementation of get_first_comic_link."""
289
    return get_soup_at_url(cls.url).find('div', class_="nav-first").find('a')
290
291
292
@classmethod
293
def get_a_comicnavbase_comicnavfirst(cls):
294
    """Implementation of get_first_comic_link."""
295
    return get_soup_at_url(cls.url).find('a', class_='comic-nav-base comic-nav-first')
296
297
298
@classmethod
299
def simulate_first_link(cls):
300
    """Implementation of get_first_comic_link creating a link-like object from
301
    an URL provided by the class."""
302
    return {'href': cls.first_url}
303
304
305
@classmethod
306
def navigate_to_first_comic(cls):
307
    """Implementation of get_first_comic_link navigating from a user provided
308
    URL to the first comic.
309
310
    Sometimes, the first comic cannot be reached directly so to start
311
    from the first comic one has to go to the previous comic until
312
    there is no previous comics. Once this URL is reached, it
313
    is better to hardcode it but for development purposes, it
314
    is convenient to have an automatic way to find it.
315
    """
316
    url = input("Get starting URL: ")
317
    print(url)
318
    comic = cls.get_prev_link(get_soup_at_url(url))
319
    while comic:
320
        url = cls.get_url_from_link(comic)
321
        print(url)
322
        comic = cls.get_prev_link(get_soup_at_url(url))
323
    return {'href': url}
324
325
326
class GenericEmptyComic(GenericComic):
327
    """Generic class for comics where nothing is to be done.
328
329
    It can be useful to deactivate temporarily comics that do not work
330
    properly by replacing `def MyComic(GenericWhateverComic)` with
331
    `def MyComic(GenericEmptyComic, GenericWhateverComic)`."""
332
    _categories = ('EMPTY', )
333
334
    @classmethod
335
    def get_next_comic(cls, last_comic):
336
        """Implementation of get_next_comic returning no comics."""
337
        cls.log("comic is considered as empty - returning no comic")
338 View Code Duplication
        return []
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
339
340
341
class ExtraFabulousComics(GenericNavigableComic):
342
    """Class to retrieve Extra Fabulous Comics."""
343
    name = 'efc'
344
    long_name = 'Extra Fabulous Comics'
345
    url = 'http://extrafabulouscomics.com'
346
    get_first_comic_link = get_a_navi_navifirst
347
    get_navi_link = get_link_rel_next
348
349
    @classmethod
350
    def get_comic_info(cls, soup, link):
351
        """Get information about a particular comics."""
352
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
353
        imgs = soup.find_all('img', src=img_src_re)
354
        title = soup.find('meta', property='og:title')['content']
355
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
356
        day = string_to_date(date_str, "%Y-%m-%d")
357
        return {
358
            'title': title,
359
            'img': [i['src'] for i in imgs],
360
            'month': day.month,
361
            'year': day.year,
362
            'day': day.day,
363
            'prefix': title + '-'
364
        }
365
366
367
class GenericLeMondeBlog(GenericNavigableComic):
368
    """Generic class to retrieve comics from Le Monde blogs."""
369
    _categories = ('LEMONDE', 'FRANCAIS')
370
    get_navi_link = get_link_rel_next
371
    get_first_comic_link = simulate_first_link
372
    first_url = NotImplemented
373
374
    @classmethod
375
    def get_comic_info(cls, soup, link):
376
        """Get information about a particular comics."""
377
        url2 = soup.find('link', rel='shortlink')['href']
378
        title = soup.find('meta', property='og:title')['content']
379
        date_str = soup.find("span", class_="entry-date").string
380
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
381
        imgs = soup.find_all('meta', property='og:image')
382
        return {
383
            'title': title,
384
            'url2': url2,
385
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
386
            'month': day.month,
387
            'year': day.year,
388
            'day': day.day,
389
        }
390
391
392
class ZepWorld(GenericLeMondeBlog):
393
    """Class to retrieve Zep World comics."""
394
    name = "zep"
395
    long_name = "Zep World"
396
    url = "http://zepworld.blog.lemonde.fr"
397
    first_url = "http://zepworld.blog.lemonde.fr/2014/10/31/bientot-le-blog-de-zep/"
398
399
400
class Vidberg(GenericLeMondeBlog):
401
    """Class to retrieve Vidberg comics."""
402
    name = 'vidberg'
403
    long_name = "Vidberg - l'actu en patates"
404
    url = "http://vidberg.blog.lemonde.fr"
405
    # Not the first but I didn't find an efficient way to retrieve it
406
    first_url = "http://vidberg.blog.lemonde.fr/2012/02/09/revue-de-campagne-la-campagne-du-modem-semballe/"
407
408
409
class Plantu(GenericLeMondeBlog):
410
    """Class to retrieve Plantu comics."""
411
    name = 'plantu'
412
    long_name = "Plantu"
413
    url = "http://plantu.blog.lemonde.fr"
414
    first_url = "http://plantu.blog.lemonde.fr/2014/10/28/stress-test-a-bruxelles/"
415
416
417
class XavierGorce(GenericLeMondeBlog):
418
    """Class to retrieve Xavier Gorce comics."""
419
    name = 'gorce'
420
    long_name = "Xavier Gorce"
421
    url = "http://xaviergorce.blog.lemonde.fr"
422
    first_url = "http://xaviergorce.blog.lemonde.fr/2015/01/09/distinction/"
423
424
425
class CartooningForPeace(GenericLeMondeBlog):
426
    """Class to retrieve Cartooning For Peace comics."""
427
    name = 'forpeace'
428
    long_name = "Cartooning For Peace"
429
    url = "http://cartooningforpeace.blog.lemonde.fr"
430
    first_url = "http://cartooningforpeace.blog.lemonde.fr/2014/12/15/bado/"
431
432
433
class Aurel(GenericLeMondeBlog):
434
    """Class to retrieve Aurel comics."""
435
    name = 'aurel'
436
    long_name = "Aurel"
437
    url = "http://aurel.blog.lemonde.fr"
438
    first_url = "http://aurel.blog.lemonde.fr/2014/09/29/le-senat-repasse-a-droite/"
439
440
441
class LesCulottees(GenericLeMondeBlog):
442
    """Class to retrieve Les Culottees comics."""
443
    name = 'culottees'
444
    long_name = 'Les Culottees'
445
    url = "http://lesculottees.blog.lemonde.fr"
446
    first_url = "http://lesculottees.blog.lemonde.fr/2016/01/11/clementine-delait-femme-a-barbe/"
447
448
449
class UneAnneeAuLycee(GenericLeMondeBlog):
450
    """Class to retrieve Une Annee Au Lycee comics."""
451
    name = 'lycee'
452
    long_name = 'Une Annee au Lycee'
453
    url = 'http://uneanneeaulycee.blog.lemonde.fr'
454
    first_url = "http://uneanneeaulycee.blog.lemonde.fr/2016/06/13/la-semaine-du-bac-est-arrivee/"
455
456
457
class Rall(GenericNavigableComic):
458
    """Class to retrieve Ted Rall comics."""
459
    # Also on http://www.gocomics.com/tedrall
460
    name = 'rall'
461
    long_name = "Ted Rall"
462
    url = "http://rall.com/comic"
463
    _categories = ('RALL', )
464
    get_navi_link = get_link_rel_next
465
    get_first_comic_link = simulate_first_link
466
    # Not the first but I didn't find an efficient way to retrieve it
467
    first_url = "http://rall.com/2014/01/30/los-angeles-times-cartoon-well-miss-those-california-flowers"
468
469
    @classmethod
470
    def get_comic_info(cls, soup, link):
471
        """Get information about a particular comics."""
472
        title = soup.find('meta', property='og:title')['content']
473
        author = soup.find("span", class_="author vcard").find("a").string
474
        date_str = soup.find("span", class_="entry-date").string
475
        day = string_to_date(date_str, "%B %d, %Y")
476
        desc = soup.find('meta', property='og:description')['content']
477
        imgs = soup.find('div', class_='entry-content').find_all('img')
478
        imgs = imgs[:-7]  # remove social media buttons
479
        return {
480
            'title': title,
481
            'author': author,
482
            'month': day.month,
483
            'year': day.year,
484
            'day': day.day,
485
            'description': desc,
486
            'img': [i['src'] for i in imgs],
487
        }
488
489
490
class Dilem(GenericNavigableComic):
491
    """Class to retrieve Ali Dilem comics."""
492
    name = 'dilem'
493
    long_name = 'Ali Dilem'
494
    url = 'http://information.tv5monde.com/dilem'
495
    _categories = ('FRANCAIS', )
496
    get_url_from_link = join_cls_url_to_href
497
    get_first_comic_link = simulate_first_link
498
    first_url = "http://information.tv5monde.com/dilem/2004-06-26"
499
500
    @classmethod
501
    def get_navi_link(cls, last_soup, next_):
502
        """Get link to next or previous comic."""
503
        # prev is next / next is prev
504
        li = last_soup.find('li', class_='prev' if next_ else 'next')
505
        return li.find('a') if li else None
506
507
    @classmethod
508
    def get_comic_info(cls, soup, link):
509
        """Get information about a particular comics."""
510
        short_url = soup.find('link', rel='shortlink')['href']
511
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
512
        imgs = soup.find_all('meta', property='og:image')
513
        date_str = soup.find('span', property='dc:date')['content']
514
        date_str = date_str[:10]
515
        day = string_to_date(date_str, "%Y-%m-%d")
516
        return {
517
            'short_url': short_url,
518
            'title': title,
519
            'img': [i['content'] for i in imgs],
520
            'day': day.day,
521
            'month': day.month,
522
            'year': day.year,
523
        }
524
525
526
class SpaceAvalanche(GenericNavigableComic):
527
    """Class to retrieve Space Avalanche comics."""
528
    name = 'avalanche'
529
    long_name = 'Space Avalanche'
530
    url = 'http://www.spaceavalanche.com'
531
    get_navi_link = get_link_rel_next
532
533
    @classmethod
534
    def get_first_comic_link(cls):
535
        """Get link to first comics."""
536
        return {'href': "http://www.spaceavalanche.com/2009/02/02/irish-sea/", 'title': "Irish Sea"}
537
538
    @classmethod
539
    def get_comic_info(cls, soup, link):
540
        """Get information about a particular comics."""
541
        url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
542
        title = link['title']
543
        url = cls.get_url_from_link(link)
544
        year, month, day = [int(s)
545
                            for s in url_date_re.match(url).groups()]
546
        imgs = soup.find("div", class_="entry").find_all("img")
547
        return {
548
            'title': title,
549
            'day': day,
550
            'month': month,
551
            'year': year,
552
            'img': [i['src'] for i in imgs],
553
        }
554
555
556
class ZenPencils(GenericNavigableComic):
557
    """Class to retrieve ZenPencils comics."""
558
    # Also on http://zenpencils.tumblr.com
559
    # Also on http://www.gocomics.com/zen-pencils
560
    name = 'zenpencils'
561
    long_name = 'Zen Pencils'
562
    url = 'http://zenpencils.com'
563
    _categories = ('ZENPENCILS', )
564
    get_navi_link = get_link_rel_next
565
    get_first_comic_link = simulate_first_link
566
    first_url = "http://zenpencils.com/comic/1-ralph-waldo-emerson-make-them-cry/"
567
568
    @classmethod
569
    def get_comic_info(cls, soup, link):
570
        """Get information about a particular comics."""
571
        imgs = soup.find('div', id='comic').find_all('img')
572
        # imgs2 = soup.find_all('meta', property='og:image')
573
        post = soup.find('div', class_='post-content')
574
        author = post.find("span", class_="post-author").find("a").string
575
        title = soup.find('meta', property='og:title')['content']
576
        date_str = post.find('span', class_='post-date').string
577
        day = string_to_date(date_str, "%B %d, %Y")
578
        assert imgs
579
        assert all(i['alt'] == i['title'] for i in imgs)
580
        assert all(i['alt'] in (title, "") for i in imgs)
581
        desc = soup.find('meta', property='og:description')['content']
582
        return {
583
            'title': title,
584
            'description': desc,
585
            'author': author,
586
            'day': day.day,
587
            'month': day.month,
588
            'year': day.year,
589
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
590
        }
591
592
593
class ItsTheTie(GenericNavigableComic):
594
    """Class to retrieve It's the tie comics."""
595
    # Also on http://itsthetie.tumblr.com
596
    # Also on https://tapastic.com/series/itsthetie
597
    name = 'tie'
598
    long_name = "It's the tie"
599
    url = "http://itsthetie.com"
600
    _categories = ('TIE', )
601
    get_first_comic_link = get_div_navfirst_a
602
    get_navi_link = get_a_rel_next
603
604
    @classmethod
605
    def get_comic_info(cls, soup, link):
606
        """Get information about a particular comics."""
607
        title = soup.find('h1', class_='comic-title').find('a').string
608
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
609
        day = string_to_date(date_str, "%B %d, %Y")
610
        # Bonus images may or may not be in meta og:image.
611
        imgs = soup.find_all('meta', property='og:image')
612
        imgs_src = [i['content'] for i in imgs]
613
        bonus = soup.find_all('img', attrs={'data-oversrc': True})
614
        bonus_src = [b['data-oversrc'] for b in bonus]
615
        all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src]
616
        all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")]
617
        tag_meta = soup.find('meta', property='article:tag')
618
        tags = tag_meta['content'] if tag_meta else ""
619
        return {
620
            'title': title,
621
            'month': day.month,
622
            'year': day.year,
623
            'day': day.day,
624
            'img': all_imgs_src,
625
            'tags': tags,
626
        }
627
628
629
class PenelopeBagieu(GenericNavigableComic):
630
    """Class to retrieve comics from Penelope Bagieu's blog."""
631
    name = 'bagieu'
632
    long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
633
    url = 'http://www.penelope-jolicoeur.com'
634
    _categories = ('FRANCAIS', )
635
    get_navi_link = get_link_rel_next
636
    get_first_comic_link = simulate_first_link
637
    first_url = 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'
638
639
    @classmethod
640
    def get_comic_info(cls, soup, link):
641
        """Get information about a particular comics."""
642
        date_str = soup.find('h2', class_='date-header').string
643
        day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
644
        imgs = soup.find('div', class_='entry-body').find_all('img')
645 View Code Duplication
        title = soup.find('h3', class_='entry-header').string
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
646
        return {
647
            'title': title,
648
            'img': [i['src'] for i in imgs],
649
            'month': day.month,
650
            'year': day.year,
651
            'day': day.day,
652
        }
653
654
655
class OneOneOneOneComic(GenericNavigableComic):
656
    """Class to retrieve 1111 Comics."""
657
    # Also on http://comics1111.tumblr.com
658
    # Also on https://tapastic.com/series/1111-Comics
659
    name = '1111'
660
    long_name = '1111 Comics'
661
    url = 'http://www.1111comics.me'
662
    _categories = ('ONEONEONEONE', )
663
    get_first_comic_link = get_div_navfirst_a
664
    get_navi_link = get_link_rel_next
665
666
    @classmethod
667
    def get_comic_info(cls, soup, link):
668
        """Get information about a particular comics."""
669
        title = soup.find('h1', class_='comic-title').find('a').string
670
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
671
        day = string_to_date(date_str, "%B %d, %Y")
672
        imgs = soup.find_all('meta', property='og:image')
673
        return {
674
            'title': title,
675
            'month': day.month,
676
            'year': day.year,
677
            'day': day.day,
678
            'img': [i['content'] for i in imgs],
679
        }
680
681
682
class AngryAtNothing(GenericNavigableComic):
683
    """Class to retrieve Angry at Nothing comics."""
684
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
685
    name = 'angry'
686
    long_name = 'Angry At Nothing'
687
    url = 'http://www.angryatnothing.net'
688
    get_first_comic_link = get_div_navfirst_a
689
    get_navi_link = get_a_rel_next
690
691
    @classmethod
692
    def get_comic_info(cls, soup, link):
693
        """Get information about a particular comics."""
694
        title = soup.find('h1', class_='comic-title').find('a').string
695
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
696
        day = string_to_date(date_str, "%B %d, %Y")
697
        imgs = soup.find_all('meta', property='og:image')
698
        return {
699
            'title': title,
700
            'month': day.month,
701
            'year': day.year,
702
            'day': day.day,
703
            'img': [i['content'] for i in imgs],
704
        }
705
706
707
class NeDroid(GenericNavigableComic):
708
    """Class to retrieve NeDroid comics."""
709
    name = 'nedroid'
710
    long_name = 'NeDroid'
711
    url = 'http://nedroid.com'
712
    get_first_comic_link = get_div_navfirst_a
713
    get_navi_link = get_link_rel_next
714
    get_url_from_link = join_cls_url_to_href
715
716
    @classmethod
717
    def get_comic_info(cls, soup, link):
718
        """Get information about a particular comics."""
719
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
720
        comic_url_re = re.compile('//nedroid.com/comics/([0-9]*)-([0-9]*)-([0-9]*).*')
721
        short_url = cls.get_url_from_link(soup.find('link', rel='shortlink'))
722
        num = int(short_url_re.match(short_url).groups()[0])
723
        imgs = soup.find('div', id='comic').find_all('img')
724
        year, month, day = [int(s) for s in comic_url_re.match(imgs[0]['src']).groups()]
725
        assert len(imgs) == 1
726
        title = imgs[0]['alt']
727
        title2 = imgs[0]['title']
728
        return {
729
            'short_url': short_url,
730
            'title': title,
731
            'title2': title2,
732
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
733
            'day': day,
734
            'month': month,
735
            'year': year,
736
            'num': num,
737
        }
738
739
740
class Garfield(GenericNavigableComic):
741
    """Class to retrieve Garfield comics."""
742
    # Also on http://www.gocomics.com/garfield
743
    name = 'garfield'
744
    long_name = 'Garfield'
745
    url = 'https://garfield.com'
746
    _categories = ('GARFIELD', )
747
    get_first_comic_link = simulate_first_link
748
    first_url = 'https://garfield.com/comic/1978/06/19'
749
750
    @classmethod
751
    def get_navi_link(cls, last_soup, next_):
752
        """Get link to next or previous comic."""
753
        return last_soup.find('a', class_='comic-arrow-right' if next_ else 'comic-arrow-left')
754
755
    @classmethod
756
    def get_comic_info(cls, soup, link):
757
        """Get information about a particular comics."""
758 View Code Duplication
        url = cls.get_url_from_link(link)
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
759
        date_re = re.compile('^%s/comic/([0-9]*)/([0-9]*)/([0-9]*)' % cls.url)
760
        year, month, day = [int(s) for s in date_re.match(url).groups()]
761
        imgs = soup.find('div', class_='comic-display').find_all('img', class_='img-responsive')
762
        return {
763
            'month': month,
764
            'year': year,
765
            'day': day,
766
            'img': [i['src'] for i in imgs],
767
        }
768
769
770
class Dilbert(GenericNavigableComic):
771
    """Class to retrieve Dilbert comics."""
772
    # Also on http://www.gocomics.com/dilbert-classics
773
    name = 'dilbert'
774
    long_name = 'Dilbert'
775
    url = 'http://dilbert.com'
776
    get_url_from_link = join_cls_url_to_href
777
    get_first_comic_link = simulate_first_link
778
    first_url = 'http://dilbert.com/strip/1989-04-16'
779
780
    @classmethod
781
    def get_navi_link(cls, last_soup, next_):
782
        """Get link to next or previous comic."""
783
        link = last_soup.find('div', class_='nav-comic nav-right' if next_ else 'nav-comic nav-left')
784
        return link.find('a') if link else None
785
786
    @classmethod
787
    def get_comic_info(cls, soup, link):
788
        """Get information about a particular comics."""
789
        title = soup.find('meta', property='og:title')['content']
790
        imgs = soup.find_all('meta', property='og:image')
791
        desc = soup.find('meta', property='og:description')['content']
792
        date_str = soup.find('meta', property='article:publish_date')['content']
793
        day = string_to_date(date_str, "%B %d, %Y")
794
        author = soup.find('meta', property='article:author')['content']
795
        tags = soup.find('meta', property='article:tag')['content']
796
        return {
797
            'title': title,
798
            'description': desc,
799
            'img': [i['content'] for i in imgs],
800
            'author': author,
801
            'tags': tags,
802
            'day': day.day,
803
            'month': day.month,
804
            'year': day.year
805
        }
806
807
808
class VictimsOfCircumsolar(GenericNavigableComic):
809
    """Class to retrieve VictimsOfCircumsolar comics."""
810
    name = 'circumsolar'
811
    long_name = 'Victims Of Circumsolar'
812
    url = 'http://www.victimsofcircumsolar.com'
813
    get_navi_link = get_a_navi_comicnavnext_navinext
814
    get_first_comic_link = simulate_first_link
815
    first_url = 'http://www.victimsofcircumsolar.com/comic/modern-addiction'
816
817
    @classmethod
818
    def get_comic_info(cls, soup, link):
819
        """Get information about a particular comics."""
820
        # Date is on the archive page
821
        title = soup.find_all('meta', property='og:title')[-1]['content']
822
        desc = soup.find_all('meta', property='og:description')[-1]['content']
823
        imgs = soup.find('div', id='comic').find_all('img')
824
        assert all(i['title'] == i['alt'] == title for i in imgs)
825
        return {
826
            'title': title,
827
            'description': desc,
828
            'img': [i['src'] for i in imgs],
829
        }
830
831
832
class ThreeWordPhrase(GenericNavigableComic):
833
    """Class to retrieve Three Word Phrase comics."""
834
    # Also on http://www.threewordphrase.tumblr.com
835
    name = 'threeword'
836
    long_name = 'Three Word Phrase'
837
    url = 'http://threewordphrase.com'
838
    get_url_from_link = join_cls_url_to_href
839
840
    @classmethod
841
    def get_first_comic_link(cls):
842
        """Get link to first comics."""
843
        return get_soup_at_url(cls.url).find('img', src='/firstlink.gif').parent
844
845
    @classmethod
846
    def get_navi_link(cls, last_soup, next_):
847
        """Get link to next or previous comic."""
848
        link = last_soup.find('img', src='/nextlink.gif' if next_ else '/prevlink.gif').parent
849
        return None if link.get('href') is None else link
850
851
    @classmethod
852
    def get_comic_info(cls, soup, link):
853
        """Get information about a particular comics."""
854
        title = soup.find('title')
855
        imgs = [img for img in soup.find_all('img')
856
                if not img['src'].endswith(
857
                    ('link.gif', '32.png', 'twpbookad.jpg',
858
                     'merchad.jpg', 'header.gif', 'tipjar.jpg'))]
859
        return {
860
            'title': title.string if title else None,
861
            'title2': '  '.join(img.get('alt') for img in imgs if img.get('alt')),
862
            'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs],
863
        }
864
865
866
class DeadlyPanel(GenericEmptyComic, GenericNavigableComic):
867
    """Class to retrieve Deadly Panel comics."""
868
    # Also on https://tapastic.com/series/deadlypanel
869
    name = 'deadly'
870
    long_name = 'Deadly Panel'
871
    url = 'http://www.deadlypanel.com'
872
    get_first_comic_link = get_a_navi_navifirst
873
    get_navi_link = get_a_navi_comicnavnext_navinext
874
875
    @classmethod
876
    def get_comic_info(cls, soup, link):
877
        """Get information about a particular comics."""
878
        imgs = soup.find('div', id='comic').find_all('img')
879
        assert all(i['alt'] == i['title'] for i in imgs)
880
        return {
881
            'img': [i['src'] for i in imgs],
882
        }
883
884
885
class TheGentlemanArmchair(GenericNavigableComic):
886
    """Class to retrieve The Gentleman Armchair comics."""
887
    name = 'gentlemanarmchair'
888
    long_name = 'The Gentleman Armchair'
889
    url = 'http://thegentlemansarmchair.com'
890
    get_first_comic_link = get_a_navi_navifirst
891
    get_navi_link = get_link_rel_next
892
893
    @classmethod
894
    def get_comic_info(cls, soup, link):
895
        """Get information about a particular comics."""
896
        title = soup.find('h2', class_='post-title').string
897
        author = soup.find("span", class_="post-author").find("a").string
898
        date_str = soup.find('span', class_='post-date').string
899
        day = string_to_date(date_str, "%B %d, %Y")
900
        imgs = soup.find('div', id='comic').find_all('img')
901
        return {
902
            'img': [i['src'] for i in imgs],
903
            'title': title,
904
            'author': author,
905
            'month': day.month,
906
            'year': day.year,
907
            'day': day.day,
908
        }
909
910
911
class MyExtraLife(GenericNavigableComic):
912
    """Class to retrieve My Extra Life comics."""
913
    name = 'extralife'
914
    long_name = 'My Extra Life'
915
    url = 'http://www.myextralife.com'
916
    get_navi_link = get_link_rel_next
917
918
    @classmethod
919
    def get_first_comic_link(cls):
920
        """Get link to first comics."""
921
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
922
923
    @classmethod
924
    def get_comic_info(cls, soup, link):
925
        """Get information about a particular comics."""
926
        title = soup.find("h1", class_="comic_title").string
927
        date_str = soup.find("span", class_="comic_date").string
928
        day = string_to_date(date_str, "%B %d, %Y")
929
        imgs = soup.find_all("img", class_="comic")
930
        assert all(i['alt'] == i['title'] == title for i in imgs)
931
        return {
932
            'title': title,
933
            'img': [i['src'] for i in imgs if i["src"]],
934
            'day': day.day,
935
            'month': day.month,
936
            'year': day.year
937
        }
938
939
940
class SaturdayMorningBreakfastCereal(GenericNavigableComic):
941
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
942
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
943
    # Also on http://smbc-comics.tumblr.com
944
    name = 'smbc'
945
    long_name = 'Saturday Morning Breakfast Cereal'
946
    url = 'http://www.smbc-comics.com'
947
    _categories = ('SMBC', )
948
    get_navi_link = get_a_rel_next
949
950
    @classmethod
951
    def get_first_comic_link(cls):
952
        """Get link to first comics."""
953
        return get_soup_at_url(cls.url).find('a', rel='start')
954
955
    @classmethod
956
    def get_comic_info(cls, soup, link):
957
        """Get information about a particular comics."""
958
        image1 = soup.find('img', id='cc-comic')
959
        image_url1 = image1['src']
960
        aftercomic = soup.find('div', id='aftercomic')
961
        image_url2 = aftercomic.find('img')['src'] if aftercomic else ''
962
        imgs = [image_url1] + ([image_url2] if image_url2 else [])
963
        date_str = soup.find('div', class_='cc-publishtime').contents[0]
964
        day = string_to_date(date_str, "%B %d, %Y")
965
        return {
966
            'title': image1['title'],
967
            'img': [urljoin_wrapper(cls.url, i) for i in imgs],
968
            'day': day.day,
969
            'month': day.month,
970
            'year': day.year
971
        }
972
973
974
class PerryBibleFellowship(GenericListableComic):
975
    """Class to retrieve Perry Bible Fellowship comics."""
976
    name = 'pbf'
977
    long_name = 'Perry Bible Fellowship'
978
    url = 'http://pbfcomics.com'
979
    get_url_from_archive_element = join_cls_url_to_href
980
981
    @classmethod
982
    def get_archive_elements(cls):
983
        comic_link_re = re.compile('^/[0-9]*/$')
984
        return reversed(get_soup_at_url(cls.url).find_all('a', href=comic_link_re))
985
986
    @classmethod
987
    def get_comic_info(cls, soup, link):
988
        """Get information about a particular comics."""
989
        url = cls.get_url_from_archive_element(link)
990
        comic_img_re = re.compile('^/archive_b/PBF.*')
991
        name = link.string
992
        num = int(link['name'])
993
        href = link['href']
994
        assert href == '/%d/' % num
995
        imgs = soup.find_all('img', src=comic_img_re)
996
        assert len(imgs) == 1
997
        assert imgs[0]['alt'] == name
998
        return {
999
            'num': num,
1000
            'name': name,
1001
            'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1002
            'prefix': '%d-' % num,
1003
        }
1004
1005
1006
class Mercworks(GenericNavigableComic):
1007
    """Class to retrieve Mercworks comics."""
1008
    # Also on http://mercworks.tumblr.com
1009
    name = 'mercworks'
1010
    long_name = 'Mercworks'
1011
    url = 'http://mercworks.net'
1012
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1013
    get_navi_link = get_a_rel_next
1014
1015
    @classmethod
1016
    def get_comic_info(cls, soup, link):
1017
        """Get information about a particular comics."""
1018
        title = soup.find('meta', property='og:title')['content']
1019
        metadesc = soup.find('meta', property='og:description')
1020
        desc = metadesc['content'] if metadesc else ""
1021
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1022
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1023
        date_str = date_str[:10]
1024
        day = string_to_date(date_str, "%Y-%m-%d")
1025
        imgs = soup.find_all('meta', property='og:image')
1026
        return {
1027
            'img': [i['content'] for i in imgs],
1028
            'title': title,
1029
            'author': author,
1030
            'desc': desc,
1031
            'day': day.day,
1032
            'month': day.month,
1033
            'year': day.year
1034
        }
1035
1036
1037
class BerkeleyMews(GenericListableComic):
1038
    """Class to retrieve Berkeley Mews comics."""
1039
    # Also on http://mews.tumblr.com
1040
    # Also on http://www.gocomics.com/berkeley-mews
1041
    name = 'berkeley'
1042
    long_name = 'Berkeley Mews'
1043
    url = 'http://www.berkeleymews.com'
1044
    _categories = ('BERKELEY', )
1045
    get_url_from_archive_element = get_href
1046
    comic_num_re = re.compile('%s/\\?p=([0-9]*)$' % url)
1047
1048
    @classmethod
1049
    def get_archive_elements(cls):
1050
        archive_url = urljoin_wrapper(cls.url, "?page_id=2")
1051
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_num_re))
1052
1053
    @classmethod
1054
    def get_comic_info(cls, soup, link):
1055
        """Get information about a particular comics."""
1056
        comic_date_re = re.compile('.*/([0-9]*)-([0-9]*)-([0-9]*)-.*')
1057
        url = cls.get_url_from_archive_element(link)
1058
        num = int(cls.comic_num_re.match(url).groups()[0])
1059
        img = soup.find('div', id='comic').find('img')
1060
        assert all(i['alt'] == i['title'] for i in [img])
1061
        title2 = img['title']
1062
        img_url = img['src']
1063
        year, month, day = [int(s) for s in comic_date_re.match(img_url).groups()]
1064
        return {
1065
            'num': num,
1066
            'title': link.string,
1067
            'title2': title2,
1068
            'img': [img_url],
1069
            'year': year,
1070
            'month': month,
1071
            'day': day,
1072
        }
1073
1074
1075
class GenericBouletCorp(GenericNavigableComic):
1076
    """Generic class to retrieve BouletCorp comics in different languages."""
1077
    # Also on http://bouletcorp.tumblr.com
1078
    _categories = ('BOULET', )
1079
    get_navi_link = get_link_rel_next
1080
1081
    @classmethod
1082
    def get_first_comic_link(cls):
1083
        """Get link to first comics."""
1084
        return get_soup_at_url(cls.url).find('div', id='centered_nav').find_all('a')[0]
1085
1086
    @classmethod
1087
    def get_comic_info(cls, soup, link):
1088
        """Get information about a particular comics."""
1089
        url = cls.get_url_from_link(link)
1090
        date_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/' % cls.url)
1091
        year, month, day = [int(s) for s in date_re.match(url).groups()]
1092
        imgs = soup.find('div', id='notes').find('div', class_='storycontent').find_all('img')
1093
        texts = '  '.join(t for t in (i.get('title') for i in imgs) if t)
1094
        title = soup.find('title').string
1095
        return {
1096
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs if i.get('src') is not None],
1097
            'title': title,
1098
            'texts': texts,
1099
            'year': year,
1100
            'month': month,
1101
            'day': day,
1102
        }
1103
1104
1105
class BouletCorp(GenericBouletCorp):
1106
    """Class to retrieve BouletCorp comics."""
1107
    name = 'boulet'
1108
    long_name = 'Boulet Corp'
1109
    url = 'http://www.bouletcorp.com'
1110
    _categories = ('FRANCAIS', )
1111
1112
1113
class BouletCorpEn(GenericBouletCorp):
1114
    """Class to retrieve EnglishBouletCorp comics."""
1115
    name = 'boulet_en'
1116
    long_name = 'Boulet Corp English'
1117
    url = 'http://english.bouletcorp.com'
1118
1119
1120
class AmazingSuperPowers(GenericNavigableComic):
1121
    """Class to retrieve Amazing Super Powers comics."""
1122
    name = 'asp'
1123
    long_name = 'Amazing Super Powers'
1124
    url = 'http://www.amazingsuperpowers.com'
1125
    get_first_comic_link = get_a_navi_navifirst
1126
    get_navi_link = get_a_navi_navinext
1127
1128
    @classmethod
1129
    def get_comic_info(cls, soup, link):
1130
        """Get information about a particular comics."""
1131
        author = soup.find("span", class_="post-author").find("a").string
1132
        date_str = soup.find('span', class_='post-date').string
1133
        day = string_to_date(date_str, "%B %d, %Y")
1134
        imgs = soup.find('div', id='comic').find_all('img')
1135
        title = ' '.join(i['title'] for i in imgs)
1136
        assert all(i['alt'] == i['title'] for i in imgs)
1137
        return {
1138
            'title': title,
1139
            'author': author,
1140
            'img': [img['src'] for img in imgs],
1141
            'day': day.day,
1142
            'month': day.month,
1143
            'year': day.year
1144
        }
1145
1146
1147
class ToonHole(GenericListableComic):
1148
    """Class to retrieve Toon Holes comics."""
1149
    # Also on http://tapastic.com/series/TOONHOLE
1150
    name = 'toonhole'
1151
    long_name = 'Toon Hole'
1152
    url = 'http://www.toonhole.com'
1153
    get_url_from_archive_element = get_href
1154
1155
    @classmethod
1156
    def get_comic_info(cls, soup, link):
1157
        """Get information about a particular comics."""
1158
        title = link.string
1159
        date_str = remove_st_nd_rd_th_from_date(soup.find('div', class_='comicdate').string.strip())
1160
        day = string_to_date(date_str, "%B %d, %Y")
1161
        imgs = soup.find('div', id='comic').find_all('img')
1162
        assert all(i['alt'] == i['title'] == title for i in imgs)
1163
        return {
1164
            'title': title,
1165
            'month': day.month,
1166
            'year': day.year,
1167
            'day': day.day,
1168
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs],
1169
        }
1170
1171
    @classmethod
1172
    def get_archive_elements(cls):
1173
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1174
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1175
1176
1177
class Channelate(GenericNavigableComic):
1178
    """Class to retrieve Channelate comics."""
1179
    name = 'channelate'
1180
    long_name = 'Channelate'
1181
    url = 'http://www.channelate.com'
1182
    get_first_comic_link = get_div_navfirst_a
1183
    get_navi_link = get_link_rel_next
1184
    get_url_from_link = join_cls_url_to_href
1185
1186
    @classmethod
1187
    def get_comic_info(cls, soup, link):
1188
        """Get information about a particular comics."""
1189
        author = soup.find("span", class_="post-author").find("a").string
1190
        date_str = soup.find('span', class_='post-date').string
1191
        day = string_to_date(date_str, '%Y/%m/%d')
1192
        title = soup.find('meta', property='og:title')['content']
1193
        post = soup.find('div', id='comic')
1194
        imgs = post.find_all('img') if post else []
1195
        extra_url = None
1196
        extra_div = soup.find('div', id='extrapanelbutton')
1197
        if extra_div:
1198
            extra_url = extra_div.find('a')['href']
1199
            extra_soup = get_soup_at_url(extra_url)
1200
            extra_imgs = extra_soup.find_all('img', class_='extrapanelimage')
1201
            imgs.extend(extra_imgs)
1202
        return {
1203
            'url_extra': extra_url,
1204
            'title': title,
1205
            'author': author,
1206
            'month': day.month,
1207
            'year': day.year,
1208
            'day': day.day,
1209
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1210
        }
1211
1212
1213
class CyanideAndHappiness(GenericNavigableComic):
1214
    """Class to retrieve Cyanide And Happiness comics."""
1215
    name = 'cyanide'
1216
    long_name = 'Cyanide and Happiness'
1217
    url = 'http://explosm.net'
1218
    _categories = ('NSFW', )
1219
    get_url_from_link = join_cls_url_to_href
1220
1221
    @classmethod
1222
    def get_first_comic_link(cls):
1223
        """Get link to first comics."""
1224
        return get_soup_at_url(cls.url).find('a', title='Oldest comic')
1225
1226
    @classmethod
1227
    def get_navi_link(cls, last_soup, next_):
1228
        """Get link to next or previous comic."""
1229
        link = last_soup.find('a', class_='next-comic' if next_ else 'previous-comic ')
1230
        return None if link.get('href') is None else link
1231
1232
    @classmethod
1233
    def get_comic_info(cls, soup, link):
1234
        """Get information about a particular comics."""
1235
        url2 = soup.find('meta', property='og:url')['content']
1236
        num = int(url2.split('/')[-2])
1237
        date_str = soup.find('h3').find('a').string
1238
        day = string_to_date(date_str, '%Y.%m.%d')
1239
        author = soup.find('small', class_="author-credit-name").string
1240
        assert author.startswith('by ')
1241
        author = author[3:]
1242
        imgs = soup.find_all('img', id='main-comic')
1243
        return {
1244
            'num': num,
1245
            'author': author,
1246
            'month': day.month,
1247
            'year': day.year,
1248
            'day': day.day,
1249
            'prefix': '%d-' % num,
1250
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i['src'])) for i in imgs]
1251
        }
1252
1253
1254
class MrLovenstein(GenericComic):
1255
    """Class to retrieve Mr Lovenstein comics."""
1256
    # Also on https://tapastic.com/series/MrLovenstein
1257
    name = 'mrlovenstein'
1258
    long_name = 'Mr. Lovenstein'
1259
    url = 'http://www.mrlovenstein.com'
1260
1261
    @classmethod
1262
    def get_next_comic(cls, last_comic):
1263
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1264
        # TODO: more info from http://www.mrlovenstein.com/archive
1265
        comic_num_re = re.compile('^/comic/([0-9]*)$')
1266
        nums = [int(comic_num_re.match(link['href']).groups()[0])
1267
                for link in get_soup_at_url(cls.url).find_all('a', href=comic_num_re)]
1268
        first, last = min(nums), max(nums)
1269
        if last_comic:
1270
            first = last_comic['num'] + 1
1271
        for num in range(first, last + 1):
1272
            url = urljoin_wrapper(cls.url, '/comic/%d' % num)
1273
            soup = get_soup_at_url(url)
1274
            imgs = list(
1275
                reversed(soup.find_all('img', src=re.compile('^/images/comics/'))))
1276
            description = soup.find('meta', attrs={'name': 'description'})['content']
1277
            yield {
1278
                'url': url,
1279
                'num': num,
1280
                'texts': '  '.join(t for t in (i.get('title') for i in imgs) if t),
1281
                'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1282
                'description': description,
1283
            }
1284
1285
1286
class DinosaurComics(GenericListableComic):
1287
    """Class to retrieve Dinosaur Comics comics."""
1288
    name = 'dinosaur'
1289
    long_name = 'Dinosaur Comics'
1290
    url = 'http://www.qwantz.com'
1291
    get_url_from_archive_element = get_href
1292
    comic_link_re = re.compile('^%s/index.php\\?comic=([0-9]*)$' % url)
1293
1294
    @classmethod
1295
    def get_archive_elements(cls):
1296
        archive_url = urljoin_wrapper(cls.url, 'archive.php')
1297
        # first link is random -> skip it
1298
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re)[1:])
1299
1300
    @classmethod
1301
    def get_comic_info(cls, soup, link):
1302
        """Get information about a particular comics."""
1303
        url = cls.get_url_from_archive_element(link)
1304
        num = int(cls.comic_link_re.match(url).groups()[0])
1305
        date_str = link.string
1306
        text = link.next_sibling.string
1307
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1308
        comic_img_re = re.compile('^%s/comics/' % cls.url)
1309
        img = soup.find('img', src=comic_img_re)
1310
        return {
1311
            'month': day.month,
1312
            'year': day.year,
1313
            'day': day.day,
1314
            'img': [img.get('src')],
1315
            'title': img.get('title'),
1316
            'text': text,
1317
            'num': num,
1318
        }
1319
1320
1321
class ButterSafe(GenericListableComic):
1322
    """Class to retrieve Butter Safe comics."""
1323
    name = 'butter'
1324
    long_name = 'ButterSafe'
1325
    url = 'http://buttersafe.com'
1326
    get_url_from_archive_element = get_href
1327 View Code Duplication
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1328
1329
    @classmethod
1330
    def get_archive_elements(cls):
1331
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1332
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
1333
1334
    @classmethod
1335
    def get_comic_info(cls, soup, link):
1336
        """Get information about a particular comics."""
1337
        url = cls.get_url_from_archive_element(link)
1338
        title = link.string
1339
        year, month, day = [int(s) for s in cls.comic_link_re.match(url).groups()]
1340
        img = soup.find('div', id='comic').find('img')
1341
        assert img['alt'] == title
1342
        return {
1343
            'title': title,
1344
            'day': day,
1345
            'month': month,
1346
            'year': year,
1347
            'img': [img['src']],
1348
        }
1349
1350
1351
class CalvinAndHobbes(GenericComic):
1352
    """Class to retrieve Calvin and Hobbes comics."""
1353
    # Also on http://www.gocomics.com/calvinandhobbes/
1354
    name = 'calvin'
1355
    long_name = 'Calvin and Hobbes'
1356
    # This is not through any official webpage but eh...
1357
    url = 'http://marcel-oehler.marcellosendos.ch/comics/ch/'
1358
1359
    @classmethod
1360
    def get_next_comic(cls, last_comic):
1361
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1362
        last_date = get_date_for_comic(
1363
            last_comic) if last_comic else date(1985, 11, 1)
1364
        link_re = re.compile('^([0-9]*)/([0-9]*)/')
1365
        img_re = re.compile('')
1366
        for link in get_soup_at_url(cls.url).find_all('a', href=link_re):
1367
            url = link['href']
1368
            year, month = link_re.match(url).groups()
1369
            if date(int(year), int(month), 1) + timedelta(days=31) >= last_date:
1370
                img_re = re.compile('^%s%s([0-9]*)' % (year, month))
1371
                month_url = urljoin_wrapper(cls.url, url)
1372
                for img in get_soup_at_url(month_url).find_all('img', src=img_re):
1373
                    img_src = img['src']
1374
                    day = int(img_re.match(img_src).groups()[0])
1375
                    comic_date = date(int(year), int(month), day)
1376
                    if comic_date > last_date:
1377
                        yield {
1378
                            'url': month_url,
1379
                            'year': int(year),
1380
                            'month': int(month),
1381
                            'day': int(day),
1382
                            'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)],
1383
                        }
1384
                        last_date = comic_date
1385
1386
1387
class AbstruseGoose(GenericListableComic):
1388
    """Class to retrieve AbstruseGoose Comics."""
1389
    name = 'abstruse'
1390
    long_name = 'Abstruse Goose'
1391
    url = 'http://abstrusegoose.com'
1392
    get_url_from_archive_element = get_href
1393 View Code Duplication
    comic_url_re = re.compile('^%s/([0-9]*)$' % url)
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1394
    comic_img_re = re.compile('^%s/strips/.*' % url)
1395
1396
    @classmethod
1397
    def get_archive_elements(cls):
1398
        archive_url = urljoin_wrapper(cls.url, 'archive')
1399
        return get_soup_at_url(archive_url).find_all('a', href=cls.comic_url_re)
1400
1401
    @classmethod
1402
    def get_comic_info(cls, soup, archive_elt):
1403
        comic_url = cls.get_url_from_archive_element(archive_elt)
1404
        num = int(cls.comic_url_re.match(comic_url).groups()[0])
1405
        return {
1406
            'num': num,
1407
            'title': archive_elt.string,
1408
            'img': [soup.find('img', src=cls.comic_img_re)['src']]
1409
        }
1410
1411
1412
class PhDComics(GenericNavigableComic):
1413
    """Class to retrieve PHD Comics."""
1414
    name = 'phd'
1415
    long_name = 'PhD Comics'
1416
    url = 'http://phdcomics.com/comics/archive.php'
1417
    get_url_from_link = join_cls_url_to_href
1418
1419
    @classmethod
1420
    def get_first_comic_link(cls):
1421
        """Get link to first comics."""
1422
        return get_soup_at_url(cls.url).find('img', src='images/first_button.gif').parent
1423
1424
    @classmethod
1425
    def get_navi_link(cls, last_soup, next_):
1426
        """Get link to next or previous comic."""
1427
        img = last_soup.find('img', src='images/next_button.gif' if next_ else 'images/prev_button.gif')
1428
        return None if img is None else img.parent
1429
1430
    @classmethod
1431
    def get_comic_info(cls, soup, link):
1432
        """Get information about a particular comics."""
1433 View Code Duplication
        date_str = soup.find('font', face='Arial,Helvetica,Geneva,Swiss,SunSans-Regular', color='white').string.strip()
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1434
        try:
1435
            day = string_to_date(date_str, '%m/%d/%Y')
1436
        except ValueError:
1437
            print("Invalid date %s" % date_str)
1438
            day = date.today()
1439
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
1440
        return {
1441
            'year': day.year,
1442
            'month': day.month,
1443
            'day': day.day,
1444
            'img': [soup.find('img', id='comic')['src']],
1445
            'title': title,
1446
        }
1447
1448
1449
class Octopuns(GenericNavigableComic):
1450
    """Class to retrieve Octopuns comics."""
1451
    # Also on http://octopuns.tumblr.com
1452
    name = 'octopuns'
1453
    long_name = 'Octopuns'
1454
    url = 'http://www.octopuns.net'
1455
1456
    @classmethod
1457
    def get_first_comic_link(cls):
1458
        """Get link to first comics."""
1459
        return get_soup_at_url(cls.url).find('img', src=re.compile('.*/First.png')).parent
1460
1461
    @classmethod
1462
    def get_navi_link(cls, last_soup, next_):
1463
        """Get link to next or previous comic."""
1464
        link = last_soup.find('img', src=re.compile('.*/Next.png' if next_ else '.*/Back.png')).parent
1465
        return None if link.get('href') is None else link
1466
1467
    @classmethod
1468
    def get_comic_info(cls, soup, link):
1469
        """Get information about a particular comics."""
1470
        title = soup.find('h3', class_='post-title entry-title').string
1471
        date_str = soup.find('h2', class_='date-header').string
1472
        day = string_to_date(date_str, "%A, %B %d, %Y")
1473
        imgs = soup.find_all('link', rel='image_src')
1474
        return {
1475
            'img': [i['href'] for i in imgs],
1476
            'title': title,
1477
            'day': day.day,
1478
            'month': day.month,
1479
            'year': day.year,
1480
        }
1481
1482
1483
class Quarktees(GenericNavigableComic):
1484
    """Class to retrieve the Quarktees comics."""
1485
    name = 'quarktees'
1486
    long_name = 'Quarktees'
1487
    url = 'http://www.quarktees.com/blogs/news'
1488
    get_url_from_link = join_cls_url_to_href
1489
    get_first_comic_link = simulate_first_link
1490
    first_url = 'http://www.quarktees.com/blogs/news/12486621-coming-soon'
1491
1492
    @classmethod
1493
    def get_navi_link(cls, last_soup, next_):
1494
        """Get link to next or previous comic."""
1495
        return last_soup.find('a', id='article-next' if next_ else 'article-prev')
1496
1497
    @classmethod
1498
    def get_comic_info(cls, soup, link):
1499
        """Get information about a particular comics."""
1500
        title = soup.find('meta', property='og:title')['content']
1501
        article = soup.find('div', class_='single-article')
1502
        imgs = article.find_all('img')
1503
        return {
1504
            'title': title,
1505
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1506
        }
1507
1508
1509
class OverCompensating(GenericNavigableComic):
1510
    """Class to retrieve the Over Compensating comics."""
1511
    name = 'compensating'
1512
    long_name = 'Over Compensating'
1513
    url = 'http://www.overcompensating.com'
1514
    get_url_from_link = join_cls_url_to_href
1515
1516
    @classmethod
1517
    def get_first_comic_link(cls):
1518
        """Get link to first comics."""
1519
        return get_soup_at_url(cls.url).find('a', href=re.compile('comic=1$'))
1520
1521
    @classmethod
1522
    def get_navi_link(cls, last_soup, next_):
1523
        """Get link to next or previous comic."""
1524
        return last_soup.find('a', title='next comic' if next_ else 'go back already')
1525
1526
    @classmethod
1527
    def get_comic_info(cls, soup, link):
1528
        """Get information about a particular comics."""
1529
        img_src_re = re.compile('^/oc/comics/.*')
1530
        comic_num_re = re.compile('.*comic=([0-9]*)$')
1531
        comic_url = cls.get_url_from_link(link)
1532
        num = int(comic_num_re.match(comic_url).groups()[0])
1533
        img = soup.find('img', src=img_src_re)
1534
        return {
1535
            'num': num,
1536
            'img': [urljoin_wrapper(comic_url, img['src'])],
1537
            'title': img.get('title')
1538
        }
1539
1540
1541
class Oglaf(GenericNavigableComic):
1542
    """Class to retrieve Oglaf comics."""
1543
    name = 'oglaf'
1544
    long_name = 'Oglaf [NSFW]'
1545
    url = 'http://oglaf.com'
1546
    _categories = ('NSFW', )
1547
    get_url_from_link = join_cls_url_to_href
1548
1549
    @classmethod
1550
    def get_first_comic_link(cls):
1551
        """Get link to first comics."""
1552
        return get_soup_at_url(cls.url).find("div", id="st").parent
1553
1554
    @classmethod
1555
    def get_navi_link(cls, last_soup, next_):
1556
        """Get link to next or previous comic."""
1557
        div = last_soup.find("div", id="nx" if next_ else "pvs")
1558
        return div.parent if div else None
1559
1560
    @classmethod
1561
    def get_comic_info(cls, soup, link):
1562
        """Get information about a particular comics."""
1563
        title = soup.find('title').string
1564
        title_imgs = soup.find('div', id='tt').find_all('img')
1565
        assert len(title_imgs) == 1
1566
        strip_imgs = soup.find_all('img', id='strip')
1567
        assert len(strip_imgs) == 1
1568
        imgs = title_imgs + strip_imgs
1569
        desc = ' '.join(i['title'] for i in imgs)
1570
        return {
1571
            'title': title,
1572
            'img': [i['src'] for i in imgs],
1573
            'description': desc,
1574
        }
1575
1576
1577
class ScandinaviaAndTheWorld(GenericNavigableComic):
1578
    """Class to retrieve Scandinavia And The World comics."""
1579
    name = 'satw'
1580
    long_name = 'Scandinavia And The World'
1581
    url = 'http://satwcomic.com'
1582
    get_first_comic_link = simulate_first_link
1583
    first_url = 'http://satwcomic.com/sweden-denmark-and-norway'
1584
1585
    @classmethod
1586
    def get_navi_link(cls, last_soup, next_):
1587
        """Get link to next or previous comic."""
1588
        return last_soup.find('a', accesskey='n' if next_ else 'p')
1589
1590
    @classmethod
1591
    def get_comic_info(cls, soup, link):
1592
        """Get information about a particular comics."""
1593
        title = soup.find('meta', attrs={'name': 'twitter:label1'})['content']
1594
        desc = soup.find('meta', property='og:description')['content']
1595
        imgs = soup.find_all('img', itemprop="image")
1596
        return {
1597
            'title': title,
1598
            'description': desc,
1599
            'img': [i['src'] for i in imgs],
1600
        }
1601
1602
1603
class SomethingOfThatIlk(GenericEmptyComic):  # Does not exist anymore
1604
    """Class to retrieve the Something Of That Ilk comics."""
1605
    name = 'somethingofthatilk'
1606
    long_name = 'Something Of That Ilk'
1607
    url = 'http://www.somethingofthatilk.com'
1608
1609
1610
class InfiniteMonkeyBusiness(GenericNavigableComic):
1611
    """Generic class to retrieve InfiniteMonkeyBusiness comics."""
1612
    name = 'monkey'
1613
    long_name = 'Infinite Monkey Business'
1614
    url = 'http://infinitemonkeybusiness.net'
1615
    get_navi_link = get_a_navi_comicnavnext_navinext
1616
    get_first_comic_link = simulate_first_link
1617
    first_url = 'http://infinitemonkeybusiness.net/comic/pillory/'
1618
1619
    @classmethod
1620
    def get_comic_info(cls, soup, link):
1621
        """Get information about a particular comics."""
1622
        title = soup.find('meta', property='og:title')['content']
1623
        imgs = soup.find('div', id='comic').find_all('img')
1624
        return {
1625
            'title': title,
1626
            'img': [i['src'] for i in imgs],
1627
        }
1628
1629
1630
class Wondermark(GenericListableComic):
1631
    """Class to retrieve the Wondermark comics."""
1632
    name = 'wondermark'
1633
    long_name = 'Wondermark'
1634
    url = 'http://wondermark.com'
1635
    get_url_from_archive_element = get_href
1636
1637
    @classmethod
1638
    def get_archive_elements(cls):
1639
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1640
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1641
1642
    @classmethod
1643
    def get_comic_info(cls, soup, link):
1644
        """Get information about a particular comics."""
1645
        date_str = soup.find('div', class_='postdate').find('em').string
1646
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1647
        div = soup.find('div', id='comic')
1648
        if div:
1649
            img = div.find('img')
1650 View Code Duplication
            img_src = [img['src']]
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1651
            alt = img['alt']
1652
            assert alt == img['title']
1653
            title = soup.find('meta', property='og:title')['content']
1654
        else:
1655
            img_src = []
1656
            alt = ''
1657
            title = ''
1658
        return {
1659
            'month': day.month,
1660
            'year': day.year,
1661
            'day': day.day,
1662
            'img': img_src,
1663
            'title': title,
1664
            'alt': alt,
1665
            'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')),
1666
        }
1667
1668
1669
class WarehouseComic(GenericNavigableComic):
1670
    """Class to retrieve Warehouse Comic comics."""
1671
    name = 'warehouse'
1672
    long_name = 'Warehouse Comic'
1673
    url = 'http://warehousecomic.com'
1674
    get_first_comic_link = get_a_navi_navifirst
1675
    get_navi_link = get_link_rel_next
1676
1677
    @classmethod
1678
    def get_comic_info(cls, soup, link):
1679
        """Get information about a particular comics."""
1680
        title = soup.find('h2', class_='post-title').string
1681
        date_str = soup.find('span', class_='post-date').string
1682
        day = string_to_date(date_str, "%B %d, %Y")
1683
        imgs = soup.find('div', id='comic').find_all('img')
1684
        return {
1685
            'img': [i['src'] for i in imgs],
1686
            'title': title,
1687
            'day': day.day,
1688
            'month': day.month,
1689
            'year': day.year,
1690
        }
1691
1692
1693
class JustSayEh(GenericNavigableComic):
1694
    """Class to retrieve Just Say Eh comics."""
1695
    # Also on http//tapastic.com/series/Just-Say-Eh
1696
    name = 'justsayeh'
1697
    long_name = 'Just Say Eh'
1698
    url = 'http://www.justsayeh.com'
1699
    get_first_comic_link = get_a_navi_navifirst
1700
    get_navi_link = get_a_navi_comicnavnext_navinext
1701
1702
    @classmethod
1703
    def get_comic_info(cls, soup, link):
1704
        """Get information about a particular comics."""
1705
        title = soup.find('h2', class_='post-title').string
1706
        imgs = soup.find("div", id="comic").find_all("img")
1707
        assert all(i['alt'] == i['title'] for i in imgs)
1708
        alt = imgs[0]['alt']
1709
        return {
1710
            'img': [i['src'] for i in imgs],
1711
            'title': title,
1712
            'alt': alt,
1713
        }
1714
1715
1716
class MouseBearComedy(GenericNavigableComic):
1717
    """Class to retrieve Mouse Bear Comedy comics."""
1718
    # Also on http://mousebearcomedy.tumblr.com
1719
    name = 'mousebear'
1720
    long_name = 'Mouse Bear Comedy'
1721
    url = 'http://www.mousebearcomedy.com'
1722
    get_first_comic_link = get_a_navi_navifirst
1723
    get_navi_link = get_a_navi_comicnavnext_navinext
1724
1725
    @classmethod
1726
    def get_comic_info(cls, soup, link):
1727
        """Get information about a particular comics."""
1728
        title = soup.find('h2', class_='post-title').string
1729
        author = soup.find("span", class_="post-author").find("a").string
1730
        date_str = soup.find("span", class_="post-date").string
1731
        day = string_to_date(date_str, '%B %d, %Y')
1732
        imgs = soup.find("div", id="comic").find_all("img")
1733
        assert all(i['alt'] == i['title'] == title for i in imgs)
1734
        return {
1735
            'day': day.day,
1736
            'month': day.month,
1737
            'year': day.year,
1738
            'img': [i['src'] for i in imgs],
1739
            'title': title,
1740
            'author': author,
1741
        }
1742
1743
1744
class BigFootJustice(GenericNavigableComic):
1745
    """Class to retrieve Big Foot Justice comics."""
1746
    # Also on http://tapastic.com/series/bigfoot-justice
1747
    name = 'bigfoot'
1748
    long_name = 'Big Foot Justice'
1749
    url = 'http://bigfootjustice.com'
1750
    get_first_comic_link = get_a_navi_navifirst
1751
    get_navi_link = get_a_navi_comicnavnext_navinext
1752
1753
    @classmethod
1754
    def get_comic_info(cls, soup, link):
1755
        """Get information about a particular comics."""
1756
        imgs = soup.find('div', id='comic').find_all('img')
1757
        assert all(i['title'] == i['alt'] for i in imgs)
1758
        title = ' '.join(i['title'] for i in imgs)
1759
        return {
1760
            'img': [i['src'] for i in imgs],
1761
            'title': title,
1762
        }
1763
1764
1765
class RespawnComic(GenericNavigableComic):
1766
    """Class to retrieve Respawn Comic."""
1767
    # Also on http://respawncomic.tumblr.com
1768
    name = 'respawn'
1769
    long_name = 'Respawn Comic'
1770
    url = 'http://respawncomic.com '
1771
    _categories = ('RESPAWN', )
1772
    get_navi_link = get_a_rel_next
1773
    get_first_comic_link = simulate_first_link
1774
    first_url = 'http://respawncomic.com/comic/c0001/'
1775
1776
    @classmethod
1777
    def get_comic_info(cls, soup, link):
1778
        """Get information about a particular comics."""
1779
        title = soup.find('meta', property='og:title')['content']
1780
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1781 View Code Duplication
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1782
        date_str = date_str[:10]
1783
        day = string_to_date(date_str, "%Y-%m-%d")
1784
        imgs = soup.find_all('meta', property='og:image')
1785
        skip_imgs = {
1786
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1787
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1788
        }
1789
        return {
1790
            'title': title,
1791
            'author': author,
1792
            'day': day.day,
1793
            'month': day.month,
1794
            'year': day.year,
1795
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1796
        }
1797
1798
1799
class SafelyEndangered(GenericNavigableComic):
1800
    """Class to retrieve Safely Endangered comics."""
1801
    # Also on http://tumblr.safelyendangered.com
1802
    name = 'endangered'
1803
    long_name = 'Safely Endangered'
1804
    url = 'http://www.safelyendangered.com'
1805
    get_navi_link = get_link_rel_next
1806
    get_first_comic_link = simulate_first_link
1807
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1808
1809
    @classmethod
1810 View Code Duplication
    def get_comic_info(cls, soup, link):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1811
        """Get information about a particular comics."""
1812
        title = soup.find('h2', class_='post-title').string
1813
        date_str = soup.find('span', class_='post-date').string
1814
        day = string_to_date(date_str, '%B %d, %Y')
1815
        imgs = soup.find('div', id='comic').find_all('img')
1816
        alt = imgs[0]['alt']
1817
        assert all(i['alt'] == i['title'] for i in imgs)
1818
        return {
1819
            'day': day.day,
1820
            'month': day.month,
1821
            'year': day.year,
1822
            'img': [i['src'] for i in imgs],
1823
            'title': title,
1824
            'alt': alt,
1825
        }
1826
1827
1828
class PicturesInBoxes(GenericNavigableComic):
1829
    """Class to retrieve Pictures In Boxes comics."""
1830
    # Also on http://picturesinboxescomic.tumblr.com
1831
    name = 'picturesinboxes'
1832
    long_name = 'Pictures in Boxes'
1833
    url = 'http://www.picturesinboxes.com'
1834
    get_navi_link = get_a_navi_navinext
1835
    get_first_comic_link = simulate_first_link
1836
    first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'
1837
1838
    @classmethod
1839
    def get_comic_info(cls, soup, link):
1840
        """Get information about a particular comics."""
1841
        title = soup.find('h2', class_='post-title').string
1842
        author = soup.find("span", class_="post-author").find("a").string
1843
        date_str = soup.find('span', class_='post-date').string
1844
        day = string_to_date(date_str, '%B %d, %Y')
1845
        imgs = soup.find('div', class_='comicpane').find_all('img')
1846
        assert imgs
1847
        assert all(i['title'] == i['alt'] == title for i in imgs)
1848
        return {
1849
            'day': day.day,
1850
            'month': day.month,
1851
            'year': day.year,
1852
            'img': [i['src'] for i in imgs],
1853
            'title': title,
1854
            'author': author,
1855
        }
1856
1857
1858
class Penmen(GenericEmptyComic):
1859
    """Class to retrieve Penmen comics."""
1860
    name = 'penmen'
1861
    long_name = 'Penmen'
1862
    url = 'http://penmen.com'
1863
1864
1865
class TheDoghouseDiaries(GenericNavigableComic):
1866
    """Class to retrieve The Dog House Diaries comics."""
1867
    name = 'doghouse'
1868
    long_name = 'The Dog House Diaries'
1869
    url = 'http://thedoghousediaries.com'
1870
1871
    @classmethod
1872
    def get_first_comic_link(cls):
1873
        """Get link to first comics."""
1874
        return get_soup_at_url(cls.url).find('a', id='firstlink')
1875
1876
    @classmethod
1877
    def get_navi_link(cls, last_soup, next_):
1878
        """Get link to next or previous comic."""
1879
        return last_soup.find('a', id='nextlink' if next_ else 'previouslink')
1880
1881
    @classmethod
1882
    def get_comic_info(cls, soup, link):
1883
        """Get information about a particular comics."""
1884
        comic_img_re = re.compile('^dhdcomics/.*')
1885
        img = soup.find('img', src=comic_img_re)
1886
        comic_url = cls.get_url_from_link(link)
1887
        return {
1888
            'title': soup.find('h2', id='titleheader').string,
1889
            'title2': soup.find('div', id='subtext').string,
1890
            'alt': img.get('title'),
1891
            'img': [urljoin_wrapper(comic_url, img['src'].strip())],
1892
            'num': int(comic_url.split('/')[-1]),
1893
        }
1894
1895
1896
class InvisibleBread(GenericListableComic):
1897
    """Class to retrieve Invisible Bread comics."""
1898
    # Also on http://www.gocomics.com/invisible-bread
1899
    name = 'invisiblebread'
1900
    long_name = 'Invisible Bread'
1901
    url = 'http://invisiblebread.com'
1902
1903
    @classmethod
1904
    def get_archive_elements(cls):
1905
        archive_url = urljoin_wrapper(cls.url, 'archives/')
1906
        return reversed(get_soup_at_url(archive_url).find_all('td', class_='archive-title'))
1907
1908
    @classmethod
1909
    def get_url_from_archive_element(cls, td):
1910
        return td.find('a')['href']
1911
1912
    @classmethod
1913
    def get_comic_info(cls, soup, td):
1914
        """Get information about a particular comics."""
1915
        url = cls.get_url_from_archive_element(td)
1916
        title = td.find('a').string
1917
        month_and_day = td.previous_sibling.string
1918
        link_re = re.compile('^%s/([0-9]+)/' % cls.url)
1919
        year = link_re.match(url).groups()[0]
1920
        date_str = month_and_day + ' ' + year
1921
        day = string_to_date(date_str, '%b %d %Y')
1922
        imgs = [soup.find('div', id='comic').find('img')]
1923
        assert len(imgs) == 1
1924
        assert all(i['title'] == i['alt'] == title for i in imgs)
1925
        return {
1926
            'month': day.month,
1927
            'year': day.year,
1928
            'day': day.day,
1929
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1930
            'title': title,
1931
        }
1932 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1933
1934
class DiscoBleach(GenericEmptyComic):  # Does not work anymore
1935
    """Class to retrieve Disco Bleach Comics."""
1936
    name = 'discobleach'
1937
    long_name = 'Disco Bleach'
1938
    url = 'http://discobleach.com'
1939
1940
1941
class TubeyToons(GenericEmptyComic):  # Does not work anymore
1942
    """Class to retrieve TubeyToons comics."""
1943
    # Also on http://tapastic.com/series/Tubey-Toons
1944
    # Also on http://tubeytoons.tumblr.com
1945
    name = 'tubeytoons'
1946
    long_name = 'Tubey Toons'
1947
    url = 'http://tubeytoons.com'
1948
    _categories = ('TUNEYTOONS', )
1949
1950
1951
class CompletelySeriousComics(GenericNavigableComic):
1952
    """Class to retrieve Completely Serious comics."""
1953
    name = 'completelyserious'
1954
    long_name = 'Completely Serious Comics'
1955
    url = 'http://completelyseriouscomics.com'
1956
    get_first_comic_link = get_a_navi_navifirst
1957
    get_navi_link = get_a_navi_navinext
1958
1959
    @classmethod
1960
    def get_comic_info(cls, soup, link):
1961
        """Get information about a particular comics."""
1962
        title = soup.find('h2', class_='post-title').string
1963
        author = soup.find('span', class_='post-author').contents[1].string
1964
        date_str = soup.find('span', class_='post-date').string
1965
        day = string_to_date(date_str, '%B %d, %Y')
1966
        imgs = soup.find('div', class_='comicpane').find_all('img')
1967
        assert imgs
1968
        alt = imgs[0]['title']
1969
        assert all(i['title'] == i['alt'] == alt for i in imgs)
1970
        return {
1971
            'month': day.month,
1972
            'year': day.year,
1973
            'day': day.day,
1974
            'img': [i['src'] for i in imgs],
1975
            'title': title,
1976
            'alt': alt,
1977
            'author': author,
1978
        }
1979
1980
1981
class PoorlyDrawnLines(GenericListableComic):
1982
    """Class to retrieve Poorly Drawn Lines comics."""
1983
    # Also on http://pdlcomics.tumblr.com
1984
    name = 'poorlydrawn'
1985
    long_name = 'Poorly Drawn Lines'
1986
    url = 'http://poorlydrawnlines.com'
1987 View Code Duplication
    _categories = ('POORLYDRAWN', )
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1988
    get_url_from_archive_element = get_href
1989
1990
    @classmethod
1991
    def get_comic_info(cls, soup, link):
1992
        """Get information about a particular comics."""
1993
        imgs = soup.find('div', class_='post').find_all('img')
1994
        assert len(imgs) <= 1
1995
        return {
1996
            'img': [i['src'] for i in imgs],
1997
            'title': imgs[0].get('title', "") if imgs else "",
1998
        }
1999
2000
    @classmethod
2001
    def get_archive_elements(cls):
2002
        archive_url = urljoin_wrapper(cls.url, 'archive')
2003
        url_re = re.compile('^%s/comic/.' % cls.url)
2004
        return reversed(get_soup_at_url(archive_url).find_all('a', href=url_re))
2005
2006
2007
class LoadingComics(GenericNavigableComic):
2008
    """Class to retrieve Loading Artist comics."""
2009
    name = 'loadingartist'
2010
    long_name = 'Loading Artist'
2011
    url = 'http://www.loadingartist.com/latest'
2012
2013
    @classmethod
2014
    def get_first_comic_link(cls):
2015
        """Get link to first comics."""
2016
        return get_soup_at_url(cls.url).find('a', title="First")
2017
2018
    @classmethod
2019 View Code Duplication
    def get_navi_link(cls, last_soup, next_):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2020
        """Get link to next or previous comic."""
2021
        return last_soup.find('a', title='Next' if next_ else 'Previous')
2022
2023
    @classmethod
2024
    def get_comic_info(cls, soup, link):
2025
        """Get information about a particular comics."""
2026
        title = soup.find('h1').string
2027
        date_str = soup.find('span', class_='date').string.strip()
2028
        day = string_to_date(date_str, "%B %d, %Y")
2029
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
2030
        return {
2031
            'title': title,
2032
            'img': [i['src'] for i in imgs],
2033
            'month': day.month,
2034
            'year': day.year,
2035
            'day': day.day,
2036
        }
2037
2038
2039
class ChuckleADuck(GenericNavigableComic):
2040
    """Class to retrieve Chuckle-A-Duck comics."""
2041
    name = 'chuckleaduck'
2042
    long_name = 'Chuckle-A-duck'
2043
    url = 'http://chuckleaduck.com'
2044
    get_first_comic_link = get_div_navfirst_a
2045
    get_navi_link = get_link_rel_next
2046
2047
    @classmethod
2048
    def get_comic_info(cls, soup, link):
2049
        """Get information about a particular comics."""
2050
        date_str = soup.find('span', class_='post-date').string
2051
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
2052
        author = soup.find('span', class_='post-author').string
2053
        div = soup.find('div', id='comic')
2054
        imgs = div.find_all('img') if div else []
2055
        title = imgs[0]['title'] if imgs else ""
2056
        assert all(i['title'] == i['alt'] == title for i in imgs)
2057
        return {
2058
            'month': day.month,
2059
            'year': day.year,
2060
            'day': day.day,
2061
            'img': [i['src'] for i in imgs],
2062
            'title': title,
2063
            'author': author,
2064
        }
2065
2066
2067
class DepressedAlien(GenericNavigableComic):
2068
    """Class to retrieve Depressed Alien Comics."""
2069
    name = 'depressedalien'
2070
    long_name = 'Depressed Alien'
2071
    url = 'http://depressedalien.com'
2072
    get_url_from_link = join_cls_url_to_href
2073
2074
    @classmethod
2075
    def get_first_comic_link(cls):
2076
        """Get link to first comics."""
2077
        return get_soup_at_url(cls.url).find('img', attrs={'name': 'beginArrow'}).parent
2078
2079
    @classmethod
2080
    def get_navi_link(cls, last_soup, next_):
2081
        """Get link to next or previous comic."""
2082
        return last_soup.find('img', attrs={'name': 'rightArrow' if next_ else 'leftArrow'}).parent
2083
2084
    @classmethod
2085
    def get_comic_info(cls, soup, link):
2086
        """Get information about a particular comics."""
2087
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2088
        imgs = soup.find_all('meta', property='og:image')
2089
        return {
2090
            'title': title,
2091
            'img': [i['content'] for i in imgs],
2092
        }
2093
2094
2095
class ThingsInSquares(GenericListableComic):
2096
    """Class to retrieve Things In Squares comics."""
2097
    # This can be retrieved in other languages
2098
    # Also on https://tapastic.com/series/Things-in-Squares
2099
    name = 'squares'
2100
    long_name = 'Things in squares'
2101
    url = 'http://www.thingsinsquares.com'
2102
2103
    @classmethod
2104
    def get_comic_info(cls, soup, tr):
2105
        """Get information about a particular comics."""
2106
        _, td2, td3 = tr.find_all('td')
2107
        a = td2.find('a')
2108
        date_str = td3.string
2109
        day = string_to_date(date_str, "%m.%d.%y")
2110
        title = a.string
2111
        title2 = soup.find('meta', property='og:title')['content']
2112
        desc = soup.find('meta', property='og:description')
2113
        description = desc['content'] if desc else ''
2114
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2115
        imgs = soup.find('div', class_='entry-content').find_all('img')
2116
        return {
2117
            'day': day.day,
2118
            'month': day.month,
2119 View Code Duplication
            'year': day.year,
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2120
            'title': title,
2121
            'title2': title2,
2122
            'description': description,
2123
            'tags': tags,
2124
            'img': [i['src'] for i in imgs],
2125
            'alt': ' '.join(i['alt'] for i in imgs),
2126
        }
2127
2128
    @classmethod
2129
    def get_url_from_archive_element(cls, tr):
2130
        _, td2, td3 = tr.find_all('td')
2131
        return td2.find('a')['href']
2132
2133
    @classmethod
2134
    def get_archive_elements(cls):
2135
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
2136
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2137
2138
2139
class HappleTea(GenericNavigableComic):
2140
    """Class to retrieve Happle Tea Comics."""
2141
    name = 'happletea'
2142
    long_name = 'Happle Tea'
2143
    url = 'http://www.happletea.com'
2144
    get_first_comic_link = get_a_navi_navifirst
2145
    get_navi_link = get_link_rel_next
2146
2147
    @classmethod
2148
    def get_comic_info(cls, soup, link):
2149
        """Get information about a particular comics."""
2150
        imgs = soup.find('div', id='comic').find_all('img')
2151
        post = soup.find('div', class_='post-content')
2152
        title = post.find('h2', class_='post-title').string
2153
        author = post.find('a', rel='author').string
2154
        date_str = post.find('span', class_='post-date').string
2155
        day = string_to_date(date_str, "%B %d, %Y")
2156
        assert all(i['alt'] == i['title'] for i in imgs)
2157
        return {
2158
            'title': title,
2159
            'img': [i['src'] for i in imgs],
2160
            'alt': ''.join(i['alt'] for i in imgs),
2161
            'month': day.month,
2162
            'year': day.year,
2163
            'day': day.day,
2164
            'author': author,
2165
        }
2166
2167
2168
class FatAwesomeComics(GenericNavigableComic):
2169
    """Class to retrieve Fat Awesome Comics."""
2170
    # Also on http://fatawesomecomedy.tumblr.com
2171
    name = 'fatawesome'
2172
    long_name = 'Fat Awesome'
2173
    url = 'http://fatawesome.com/comics'
2174
    get_navi_link = get_a_rel_next
2175
    get_first_comic_link = simulate_first_link
2176
    first_url = 'http://fatawesome.com/shortbus/'
2177
2178
    @classmethod
2179
    def get_comic_info(cls, soup, link):
2180
        """Get information about a particular comics."""
2181
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2182
        description = soup.find('meta', attrs={'name': 'description'})['content']
2183
        tags_prop = soup.find('meta', property='article:tag')
2184
        tags = tags_prop['content'] if tags_prop else ""
2185
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2186
        day = string_to_date(date_str, "%Y-%m-%d")
2187
        imgs = soup.find_all('img', attrs={'data-recalc-dims': "1"})
2188
        assert len(imgs) == 1
2189
        return {
2190
            'title': title,
2191
            'description': description,
2192
            'tags': tags,
2193
            'alt': "".join(i['alt'] for i in imgs),
2194
            'img': [i['src'].rsplit('?', 1)[0] for i in imgs],
2195
            'month': day.month,
2196
            'year': day.year,
2197
            'day': day.day,
2198
        }
2199
2200
2201
class AnythingComic(GenericListableComic):
2202
    """Class to retrieve Anything Comics."""
2203
    # Also on http://tapastic.com/series/anything
2204
    name = 'anythingcomic'
2205
    long_name = 'Anything Comic'
2206
    url = 'http://www.anythingcomic.com'
2207
2208
    @classmethod
2209
    def get_archive_elements(cls):
2210
        archive_url = urljoin_wrapper(cls.url, 'archive/')
2211
        # The first 2 <tr>'s do not correspond to comics
2212
        return get_soup_at_url(archive_url).find('table', id='chapter_table').find_all('tr')[2:]
2213
2214
    @classmethod
2215
    def get_url_from_archive_element(cls, tr):
2216
        """Get url corresponding to an archive element."""
2217
        td_num, td_comic, td_date, _ = tr.find_all('td')
2218
        link = td_comic.find('a')
2219
        return urljoin_wrapper(cls.url, link['href'])
2220
2221
    @classmethod
2222
    def get_comic_info(cls, soup, tr):
2223
        """Get information about a particular comics."""
2224
        td_num, td_comic, td_date, _ = tr.find_all('td')
2225
        num = int(td_num.string)
2226
        link = td_comic.find('a')
2227
        title = link.string
2228
        imgs = soup.find_all('img', id='comic_image')
2229
        day = string_to_date(td_date.string, '%d %b %Y %I:%M %p')
2230
        assert len(imgs) == 1
2231
        assert all(i.get('alt') == i.get('title') for i in imgs)
2232
        return {
2233
            'num': num,
2234
            'title': title,
2235
            'alt': imgs[0].get('alt', ''),
2236
            'img': [i['src'] for i in imgs],
2237
            'month': day.month,
2238
            'year': day.year,
2239
            'day': day.day,
2240
        }
2241
2242
2243
class LonnieMillsap(GenericNavigableComic):
2244
    """Class to retrieve Lonnie Millsap's comics."""
2245
    name = 'millsap'
2246
    long_name = 'Lonnie Millsap'
2247
    url = 'http://www.lonniemillsap.com'
2248
    get_navi_link = get_link_rel_next
2249
    get_first_comic_link = simulate_first_link
2250
    first_url = 'http://www.lonniemillsap.com/?p=42'
2251
2252
    @classmethod
2253
    def get_comic_info(cls, soup, link):
2254
        """Get information about a particular comics."""
2255
        title = soup.find('h2', class_='post-title').string
2256
        post = soup.find('div', class_='post-content')
2257
        author = post.find("span", class_="post-author").find("a").string
2258
        date_str = post.find("span", class_="post-date").string
2259
        day = string_to_date(date_str, "%B %d, %Y")
2260
        imgs = post.find("div", class_="entry").find_all("img")
2261
        return {
2262
            'title': title,
2263
            'author': author,
2264
            'img': [i['src'] for i in imgs],
2265
            'month': day.month,
2266
            'year': day.year,
2267
            'day': day.day,
2268
        }
2269
2270
2271
class LinsEditions(GenericNavigableComic):
2272
    """Class to retrieve L.I.N.S. Editions comics."""
2273
    # Also on http://linscomics.tumblr.com
2274
    name = 'lins'
2275
    long_name = 'L.I.N.S. Editions'
2276
    url = 'https://linsedition.com'
2277
    _categories = ('LINS', )
2278
    get_navi_link = get_link_rel_next
2279
    get_first_comic_link = simulate_first_link
2280
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2281
2282
    @classmethod
2283
    def get_comic_info(cls, soup, link):
2284
        """Get information about a particular comics."""
2285
        title = soup.find('meta', property='og:title')['content']
2286
        imgs = soup.find_all('meta', property='og:image')
2287
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2288
        day = string_to_date(date_str, "%Y-%m-%d")
2289
        return {
2290
            'title': title,
2291
            'img': [i['content'] for i in imgs],
2292
            'month': day.month,
2293
            'year': day.year,
2294
            'day': day.day,
2295
        }
2296
2297
2298
class ThorsThundershack(GenericNavigableComic):
2299
    """Class to retrieve Thor's Thundershack comics."""
2300
    # Also on http://tapastic.com/series/Thors-Thundershac
2301
    name = 'thor'
2302
    long_name = 'Thor\'s Thundershack'
2303
    url = 'http://www.thorsthundershack.com'
2304
    _categories = ('THOR', )
2305
    get_url_from_link = join_cls_url_to_href
2306
2307
    @classmethod
2308
    def get_first_comic_link(cls):
2309
        """Get link to first comics."""
2310
        return get_soup_at_url(cls.url).find('a', class_='first navlink')
2311
2312
    @classmethod
2313
    def get_navi_link(cls, last_soup, next_):
2314
        """Get link to next or previous comic."""
2315
        for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
2316
            if link['href'] != '/comic':
2317
                return link
2318
        return None
2319
2320
    @classmethod
2321 View Code Duplication
    def get_comic_info(cls, soup, link):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2322
        """Get information about a particular comics."""
2323
        title = soup.find('meta', attrs={'name': 'description'})["content"]
2324
        description = soup.find('div', itemprop='articleBody').text
2325
        author = soup.find('span', itemprop='author copyrightHolder').string
2326
        imgs = soup.find_all('img', itemprop='image')
2327
        assert all(i['title'] == i['alt'] for i in imgs)
2328
        alt = imgs[0]['alt'] if imgs else ""
2329
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
2330
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2331
        return {
2332
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2333
            'month': day.month,
2334
            'year': day.year,
2335
            'day': day.day,
2336
            'author': author,
2337
            'title': title,
2338
            'alt': alt,
2339
            'description': description,
2340
        }
2341
2342
2343
class GerbilWithAJetpack(GenericNavigableComic):
2344
    """Class to retrieve GerbilWithAJetpack comics."""
2345
    name = 'gerbil'
2346
    long_name = 'Gerbil With A Jetpack'
2347
    url = 'http://gerbilwithajetpack.com'
2348
    get_first_comic_link = get_a_navi_navifirst
2349
    get_navi_link = get_a_rel_next
2350
2351
    @classmethod
2352
    def get_comic_info(cls, soup, link):
2353
        """Get information about a particular comics."""
2354
        title = soup.find('h2', class_='post-title').string
2355
        author = soup.find("span", class_="post-author").find("a").string
2356
        date_str = soup.find("span", class_="post-date").string
2357
        day = string_to_date(date_str, "%B %d, %Y")
2358
        imgs = soup.find("div", id="comic").find_all("img")
2359
        alt = imgs[0]['alt']
2360
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2361
        return {
2362
            'img': [i['src'] for i in imgs],
2363
            'title': title,
2364
            'alt': alt,
2365
            'author': author,
2366
            'day': day.day,
2367
            'month': day.month,
2368
            'year': day.year
2369
        }
2370
2371
2372
class EveryDayBlues(GenericNavigableComic):
2373
    """Class to retrieve EveryDayBlues Comics."""
2374
    name = "blues"
2375
    long_name = "Every Day Blues"
2376
    url = "http://everydayblues.net"
2377
    get_first_comic_link = get_a_navi_navifirst
2378 View Code Duplication
    get_navi_link = get_link_rel_next
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2379
2380
    @classmethod
2381
    def get_comic_info(cls, soup, link):
2382
        """Get information about a particular comics."""
2383
        title = soup.find("h2", class_="post-title").string
2384
        author = soup.find("span", class_="post-author").find("a").string
2385
        date_str = soup.find("span", class_="post-date").string
2386
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2387
        imgs = soup.find("div", id="comic").find_all("img")
2388
        assert all(i['alt'] == i['title'] == title for i in imgs)
2389
        assert len(imgs) <= 1
2390
        return {
2391
            'img': [i['src'] for i in imgs],
2392
            'title': title,
2393
            'author': author,
2394
            'day': day.day,
2395
            'month': day.month,
2396
            'year': day.year
2397
        }
2398
2399
2400
class BiterComics(GenericNavigableComic):
2401
    """Class to retrieve Biter Comics."""
2402
    name = "biter"
2403
    long_name = "Biter Comics"
2404
    url = "http://www.bitercomics.com"
2405
    get_first_comic_link = get_a_navi_navifirst
2406
    get_navi_link = get_link_rel_next
2407
2408
    @classmethod
2409
    def get_comic_info(cls, soup, link):
2410
        """Get information about a particular comics."""
2411
        title = soup.find("h1", class_="entry-title").string
2412
        author = soup.find("span", class_="author vcard").find("a").string
2413
        date_str = soup.find("span", class_="entry-date").string
2414
        day = string_to_date(date_str, "%B %d, %Y")
2415
        imgs = soup.find("div", id="comic").find_all("img")
2416
        assert all(i['alt'] == i['title'] for i in imgs)
2417
        assert len(imgs) == 1
2418
        alt = imgs[0]['alt']
2419
        return {
2420
            'img': [i['src'] for i in imgs],
2421
            'title': title,
2422
            'alt': alt,
2423
            'author': author,
2424
            'day': day.day,
2425
            'month': day.month,
2426
            'year': day.year
2427
        }
2428
2429
2430
class TheAwkwardYeti(GenericNavigableComic):
2431
    """Class to retrieve The Awkward Yeti comics."""
2432
    # Also on http://www.gocomics.com/the-awkward-yeti
2433
    # Also on http://larstheyeti.tumblr.com
2434
    # Also on https://tapastic.com/series/TheAwkwardYeti
2435
    name = 'yeti'
2436
    long_name = 'The Awkward Yeti'
2437
    url = 'http://theawkwardyeti.com'
2438
    _categories = ('YETI', )
2439
    get_first_comic_link = get_a_navi_navifirst
2440
    get_navi_link = get_link_rel_next
2441
2442
    @classmethod
2443
    def get_comic_info(cls, soup, link):
2444
        """Get information about a particular comics."""
2445
        title = soup.find('h2', class_='post-title').string
2446
        date_str = soup.find("span", class_="post-date").string
2447
        day = string_to_date(date_str, "%B %d, %Y")
2448
        imgs = soup.find("div", id="comic").find_all("img")
2449
        assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
2450
        return {
2451
            'img': [i['src'] for i in imgs],
2452
            'title': title,
2453
            'day': day.day,
2454
            'month': day.month,
2455
            'year': day.year
2456
        }
2457
2458
2459
class PleasantThoughts(GenericNavigableComic):
2460
    """Class to retrieve Pleasant Thoughts comics."""
2461
    name = 'pleasant'
2462
    long_name = 'Pleasant Thoughts'
2463
    url = 'http://pleasant-thoughts.com'
2464
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2465
    get_navi_link = get_link_rel_next
2466
2467
    @classmethod
2468
    def get_comic_info(cls, soup, link):
2469
        """Get information about a particular comics."""
2470
        post = soup.find('div', class_='post-content')
2471
        title = post.find('h2', class_='post-title').string
2472
        imgs = post.find("div", class_="entry").find_all("img")
2473
        return {
2474
            'title': title,
2475
            'img': [i['src'] for i in imgs],
2476
        }
2477
2478
2479
class MisterAndMe(GenericNavigableComic):
2480
    """Class to retrieve Mister & Me Comics."""
2481
    # Also on http://www.gocomics.com/mister-and-me
2482
    # Also on https://tapastic.com/series/Mister-and-Me
2483
    name = 'mister'
2484
    long_name = 'Mister & Me'
2485
    url = 'http://www.mister-and-me.com'
2486
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2487
    get_navi_link = get_link_rel_next
2488 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2489
    @classmethod
2490
    def get_comic_info(cls, soup, link):
2491
        """Get information about a particular comics."""
2492
        title = soup.find('h2', class_='post-title').string
2493
        author = soup.find("span", class_="post-author").find("a").string
2494
        date_str = soup.find("span", class_="post-date").string
2495
        day = string_to_date(date_str, "%B %d, %Y")
2496
        imgs = soup.find("div", id="comic").find_all("img")
2497
        assert all(i['alt'] == i['title'] for i in imgs)
2498
        assert len(imgs) <= 1
2499
        alt = imgs[0]['alt'] if imgs else ""
2500
        return {
2501
            'img': [i['src'] for i in imgs],
2502
            'title': title,
2503
            'alt': alt,
2504
            'author': author,
2505
            'day': day.day,
2506
            'month': day.month,
2507
            'year': day.year
2508
        }
2509
2510
2511
class LastPlaceComics(GenericNavigableComic):
2512
    """Class to retrieve Last Place Comics."""
2513
    name = 'lastplace'
2514
    long_name = 'Last Place Comics'
2515
    url = "http://lastplacecomics.com"
2516
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2517
    get_navi_link = get_link_rel_next
2518 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2519
    @classmethod
2520
    def get_comic_info(cls, soup, link):
2521
        """Get information about a particular comics."""
2522
        title = soup.find('h2', class_='post-title').string
2523
        author = soup.find("span", class_="post-author").find("a").string
2524
        date_str = soup.find("span", class_="post-date").string
2525
        day = string_to_date(date_str, "%B %d, %Y")
2526
        imgs = soup.find("div", id="comic").find_all("img")
2527
        assert all(i['alt'] == i['title'] for i in imgs)
2528
        assert len(imgs) <= 1
2529
        alt = imgs[0]['alt'] if imgs else ""
2530
        return {
2531
            'img': [i['src'] for i in imgs],
2532
            'title': title,
2533
            'alt': alt,
2534
            'author': author,
2535
            'day': day.day,
2536
            'month': day.month,
2537
            'year': day.year
2538
        }
2539
2540
2541
class TalesOfAbsurdity(GenericNavigableComic):
2542
    """Class to retrieve Tales Of Absurdity comics."""
2543
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
2544
    # Also on http://talesofabsurdity.tumblr.com
2545
    name = 'absurdity'
2546
    long_name = 'Tales of Absurdity'
2547
    url = 'http://talesofabsurdity.com'
2548
    _categories = ('ABSURDITY', )
2549
    get_first_comic_link = get_a_navi_navifirst
2550
    get_navi_link = get_a_navi_comicnavnext_navinext
2551
2552
    @classmethod
2553
    def get_comic_info(cls, soup, link):
2554
        """Get information about a particular comics."""
2555
        title = soup.find('h2', class_='post-title').string
2556
        author = soup.find("span", class_="post-author").find("a").string
2557
        date_str = soup.find("span", class_="post-date").string
2558
        day = string_to_date(date_str, "%B %d, %Y")
2559
        imgs = soup.find("div", id="comic").find_all("img")
2560
        assert all(i['alt'] == i['title'] for i in imgs)
2561
        alt = imgs[0]['alt'] if imgs else ""
2562
        return {
2563
            'img': [i['src'] for i in imgs],
2564
            'title': title,
2565
            'alt': alt,
2566
            'author': author,
2567
            'day': day.day,
2568
            'month': day.month,
2569
            'year': day.year
2570
        }
2571
2572
2573
class EndlessOrigami(GenericNavigableComic):
2574
    """Class to retrieve Endless Origami Comics."""
2575
    name = "origami"
2576
    long_name = "Endless Origami"
2577
    url = "http://endlessorigami.com"
2578
    get_first_comic_link = get_a_navi_navifirst
2579
    get_navi_link = get_link_rel_next
2580
2581
    @classmethod
2582
    def get_comic_info(cls, soup, link):
2583
        """Get information about a particular comics."""
2584
        title = soup.find('h2', class_='post-title').string
2585
        author = soup.find("span", class_="post-author").find("a").string
2586
        date_str = soup.find("span", class_="post-date").string
2587
        day = string_to_date(date_str, "%B %d, %Y")
2588
        imgs = soup.find("div", id="comic").find_all("img")
2589
        assert all(i['alt'] == i['title'] for i in imgs)
2590
        alt = imgs[0]['alt'] if imgs else ""
2591
        return {
2592
            'img': [i['src'] for i in imgs],
2593
            'title': title,
2594
            'alt': alt,
2595
            'author': author,
2596
            'day': day.day,
2597
            'month': day.month,
2598
            'year': day.year
2599
        }
2600
2601
2602
class PlanC(GenericNavigableComic):
2603
    """Class to retrieve Plan C comics."""
2604
    name = 'planc'
2605
    long_name = 'Plan C'
2606
    url = 'http://www.plancomic.com'
2607
    get_first_comic_link = get_a_navi_navifirst
2608
    get_navi_link = get_a_navi_comicnavnext_navinext
2609
2610
    @classmethod
2611
    def get_comic_info(cls, soup, link):
2612
        """Get information about a particular comics."""
2613
        title = soup.find('h2', class_='post-title').string
2614
        date_str = soup.find("span", class_="post-date").string
2615
        day = string_to_date(date_str, "%B %d, %Y")
2616
        imgs = soup.find('div', id='comic').find_all('img')
2617
        return {
2618
            'title': title,
2619
            'img': [i['src'] for i in imgs],
2620
            'month': day.month,
2621
            'year': day.year,
2622
            'day': day.day,
2623
        }
2624
2625
2626
class BuniComic(GenericNavigableComic):
2627
    """Class to retrieve Buni Comics."""
2628
    name = 'buni'
2629
    long_name = 'BuniComics'
2630
    url = 'http://www.bunicomic.com'
2631
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2632
    get_navi_link = get_link_rel_next
2633
2634
    @classmethod
2635
    def get_comic_info(cls, soup, link):
2636
        """Get information about a particular comics."""
2637
        imgs = soup.find('div', id='comic').find_all('img')
2638
        assert all(i['alt'] == i['title'] for i in imgs)
2639
        assert len(imgs) == 1
2640
        return {
2641
            'img': [i['src'] for i in imgs],
2642
            'title': imgs[0]['title'],
2643
        }
2644
2645
2646
class GenericCommitStrip(GenericNavigableComic):
2647
    """Generic class to retrieve Commit Strips in different languages."""
2648
    get_navi_link = get_a_rel_next
2649
    get_first_comic_link = simulate_first_link
2650
    first_url = NotImplemented
2651
2652
    @classmethod
2653
    def get_comic_info(cls, soup, link):
2654
        """Get information about a particular comics."""
2655
        desc = soup.find('meta', property='og:description')['content']
2656
        title = soup.find('meta', property='og:title')['content']
2657
        imgs = soup.find('div', class_='entry-content').find_all('img')
2658
        title2 = ' '.join(i.get('title', '') for i in imgs)
2659 View Code Duplication
        return {
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2660
            'title': title,
2661
            'title2': title2,
2662
            'description': desc,
2663
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2664
        }
2665
2666
2667
class CommitStripFr(GenericCommitStrip):
2668
    """Class to retrieve Commit Strips in French."""
2669
    name = 'commit_fr'
2670
    long_name = 'Commit Strip (Fr)'
2671
    url = 'http://www.commitstrip.com/fr'
2672
    _categories = ('FRANCAIS', )
2673
    first_url = 'http://www.commitstrip.com/fr/2012/02/22/interview/'
2674
2675
2676
class CommitStripEn(GenericCommitStrip):
2677
    """Class to retrieve Commit Strips in English."""
2678
    name = 'commit_en'
2679
    long_name = 'Commit Strip (En)'
2680
    url = 'http://www.commitstrip.com/en'
2681
    first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'
2682
2683
2684
class GenericBoumerie(GenericNavigableComic):
2685
    """Generic class to retrieve Boumeries comics in different languages."""
2686
    get_first_comic_link = get_a_navi_navifirst
2687
    get_navi_link = get_link_rel_next
2688
    date_format = NotImplemented
2689
    lang = NotImplemented
2690
2691
    @classmethod
2692
    def get_comic_info(cls, soup, link):
2693
        """Get information about a particular comics."""
2694
        title = soup.find('h2', class_='post-title').string
2695
        short_url = soup.find('link', rel='shortlink')['href']
2696
        author = soup.find("span", class_="post-author").find("a").string
2697
        date_str = soup.find('span', class_='post-date').string
2698
        day = string_to_date(date_str, cls.date_format, cls.lang)
2699
        imgs = soup.find('div', id='comic').find_all('img')
2700
        assert all(i['alt'] == i['title'] for i in imgs)
2701
        return {
2702
            'short_url': short_url,
2703
            'img': [i['src'] for i in imgs],
2704
            'title': title,
2705
            'author': author,
2706
            'month': day.month,
2707
            'year': day.year,
2708
            'day': day.day,
2709
        }
2710
2711
2712
class BoumerieEn(GenericBoumerie):
2713
    """Class to retrieve Boumeries comics in English."""
2714
    name = 'boumeries_en'
2715
    long_name = 'Boumeries (En)'
2716
    url = 'http://comics.boumerie.com'
2717
    date_format = "%B %d, %Y"
2718
    lang = 'en_GB.UTF-8'
2719
2720
2721
class BoumerieFr(GenericBoumerie):
2722
    """Class to retrieve Boumeries comics in French."""
2723
    name = 'boumeries_fr'
2724
    long_name = 'Boumeries (Fr)'
2725
    url = 'http://bd.boumerie.com'
2726
    _categories = ('FRANCAIS', )
2727
    date_format = "%A, %d %B %Y"
2728
    lang = "fr_FR.utf8"
2729
2730
2731
class UnearthedComics(GenericNavigableComic):
2732
    """Class to retrieve Unearthed comics."""
2733
    # Also on http://tapastic.com/series/UnearthedComics
2734
    # Also on http://unearthedcomics.tumblr.com
2735
    name = 'unearthed'
2736
    long_name = 'Unearthed Comics'
2737
    url = 'http://unearthedcomics.com'
2738 View Code Duplication
    _categories = ('UNEARTHED', )
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2739
    get_navi_link = get_link_rel_next
2740
    get_first_comic_link = simulate_first_link
2741
    first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'
2742
2743
    @classmethod
2744
    def get_comic_info(cls, soup, link):
2745
        """Get information about a particular comics."""
2746
        short_url = soup.find('link', rel='shortlink')['href']
2747
        title_elt = soup.find('h1') or soup.find('h2')
2748
        title = title_elt.string if title_elt else ""
2749
        desc = soup.find('meta', property='og:description')
2750
        date_str = soup.find('time', class_='published updated hidden')['datetime']
2751
        day = string_to_date(date_str, "%Y-%m-%d")
2752
        post = soup.find('div', class_="entry content entry-content type-portfolio")
2753
        imgs = post.find_all('img')
2754
        return {
2755
            'title': title,
2756
            'description': desc,
2757
            'url2': short_url,
2758
            'img': [i['src'] for i in imgs],
2759
            'month': day.month,
2760
            'year': day.year,
2761
            'day': day.day,
2762
        }
2763
2764
2765
class Optipess(GenericNavigableComic):
2766
    """Class to retrieve Optipess comics."""
2767
    name = 'optipess'
2768
    long_name = 'Optipess'
2769
    url = 'http://www.optipess.com'
2770
    get_first_comic_link = get_a_navi_navifirst
2771
    get_navi_link = get_link_rel_next
2772
2773
    @classmethod
2774
    def get_comic_info(cls, soup, link):
2775
        """Get information about a particular comics."""
2776
        title = soup.find('h2', class_='post-title').string
2777
        author = soup.find("span", class_="post-author").find("a").string
2778
        comic = soup.find('div', id='comic')
2779
        imgs = comic.find_all('img') if comic else []
2780
        alt = imgs[0]['title'] if imgs else ""
2781
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2782
        date_str = soup.find('span', class_='post-date').string
2783
        day = string_to_date(date_str, "%B %d, %Y")
2784
        return {
2785
            'title': title,
2786
            'alt': alt,
2787
            'author': author,
2788
            'img': [i['src'] for i in imgs],
2789
            'month': day.month,
2790
            'year': day.year,
2791
            'day': day.day,
2792
        }
2793
2794
2795
class PainTrainComic(GenericNavigableComic):
2796
    """Class to retrieve Pain Train Comics."""
2797
    name = 'paintrain'
2798
    long_name = 'Pain Train Comics'
2799
    url = 'http://paintraincomic.com'
2800
    get_first_comic_link = get_a_navi_navifirst
2801
    get_navi_link = get_link_rel_next
2802
2803
    @classmethod
2804
    def get_comic_info(cls, soup, link):
2805
        """Get information about a particular comics."""
2806
        title = soup.find('h2', class_='post-title').string
2807
        short_url = soup.find('link', rel='shortlink')['href']
2808
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2809
        num = int(short_url_re.match(short_url).groups()[0])
2810
        imgs = soup.find('div', id='comic').find_all('img')
2811
        alt = imgs[0]['title']
2812
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2813
        date_str = soup.find('span', class_='post-date').string
2814
        day = string_to_date(date_str, "%d/%m/%Y")
2815
        return {
2816
            'short_url': short_url,
2817
            'num': num,
2818
            'img': [i['src'] for i in imgs],
2819
            'month': day.month,
2820
            'year': day.year,
2821
            'day': day.day,
2822
            'alt': alt,
2823
            'title': title,
2824
        }
2825
2826
2827
class MoonBeard(GenericNavigableComic):
2828
    """Class to retrieve MoonBeard comics."""
2829
    # Also on http://blog.squiresjam.es/moonbeard
2830
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
2831
    name = 'moonbeard'
2832
    long_name = 'Moon Beard'
2833
    url = 'http://moonbeard.com'
2834
    get_first_comic_link = get_a_navi_navifirst
2835
    get_navi_link = get_a_navi_navinext
2836
2837
    @classmethod
2838
    def get_comic_info(cls, soup, link):
2839
        """Get information about a particular comics."""
2840
        title = soup.find('h2', class_='post-title').string
2841
        short_url = soup.find('link', rel='shortlink')['href']
2842
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2843
        num = int(short_url_re.match(short_url).groups()[0])
2844
        imgs = soup.find('div', id='comic').find_all('img')
2845
        alt = imgs[0]['title']
2846
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2847
        date_str = soup.find('span', class_='post-date').string
2848
        day = string_to_date(date_str, "%B %d, %Y")
2849
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2850
        author = soup.find('span', class_='post-author').string
2851
        return {
2852
            'short_url': short_url,
2853
            'num': num,
2854
            'img': [i['src'] for i in imgs],
2855
            'month': day.month,
2856
            'year': day.year,
2857
            'day': day.day,
2858
            'title': title,
2859
            'tags': tags,
2860
            'alt': alt,
2861
            'author': author,
2862
        }
2863
2864
2865
class AHamADay(GenericNavigableComic):
2866
    """Class to retrieve class A Ham A Day comics."""
2867
    name = 'ham'
2868
    long_name = 'A Ham A Day'
2869 View Code Duplication
    url = 'http://www.ahammaday.com'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2870
    get_url_from_link = join_cls_url_to_href
2871
    get_first_comic_link = simulate_first_link
2872
    first_url = 'http://www.ahammaday.com/today/3/6/french'
2873
2874
    @classmethod
2875
    def get_navi_link(cls, last_soup, next_):
2876
        """Get link to next or previous comic."""
2877
        # prev is next / next is prev
2878
        return last_soup.find('li', class_='previous' if next_ else 'next').find('a')
2879
2880
    @classmethod
2881
    def get_comic_info(cls, soup, link):
2882
        """Get information about a particular comics."""
2883
        date_str = soup.find('time', class_='published')['datetime']
2884
        day = string_to_date(date_str, "%Y-%m-%d")
2885
        author = soup.find('span', class_='blog-author').find('a').string
2886
        title = soup.find('meta', property='og:title')['content']
2887
        imgs = soup.find_all('meta', itemprop='image')
2888
        return {
2889
            'img': [i['content'] for i in imgs],
2890
            'title': title,
2891
            'author': author,
2892
            'day': day.day,
2893
            'month': day.month,
2894
            'year': day.year,
2895
        }
2896
2897
2898
class LittleLifeLines(GenericNavigableComic):
2899
    """Class to retrieve Little Life Lines comics."""
2900
    name = 'life'
2901
    long_name = 'Little Life Lines'
2902
    url = 'http://www.littlelifelines.com'
2903
    get_url_from_link = join_cls_url_to_href
2904
    get_first_comic_link = simulate_first_link
2905
    first_url = 'http://www.littlelifelines.com/comics/well-done'
2906
2907
    @classmethod
2908
    def get_navi_link(cls, last_soup, next_):
2909
        """Get link to next or previous comic."""
2910
        # prev is next / next is prev
2911
        li = last_soup.find('li', class_='prev' if next_ else 'next')
2912
        return li.find('a') if li else None
2913
2914
    @classmethod
2915
    def get_comic_info(cls, soup, link):
2916
        """Get information about a particular comics."""
2917
        title = soup.find('meta', property='og:title')['content']
2918
        desc = soup.find('meta', property='og:description')['content']
2919
        date_str = soup.find('time', class_='published')['datetime']
2920
        day = string_to_date(date_str, "%Y-%m-%d")
2921
        author = soup.find('a', rel='author').string
2922
        div_content = soup.find('div', class_="body entry-content")
2923
        imgs = div_content.find_all('img')
2924
        imgs = [i for i in imgs if i.get('src') is not None]
2925
        alt = imgs[0]['alt']
2926
        return {
2927
            'title': title,
2928
            'alt': alt,
2929
            'description': desc,
2930
            'author': author,
2931
            'day': day.day,
2932
            'month': day.month,
2933
            'year': day.year,
2934
            'img': [i['src'] for i in imgs],
2935
        }
2936
2937
2938
class GenericWordPressInkblot(GenericNavigableComic):
2939
    """Generic class to retrieve comics using WordPress with Inkblot."""
2940
    get_navi_link = get_link_rel_next
2941
2942
    @classmethod
2943
    def get_first_comic_link(cls):
2944
        """Get link to first comics."""
2945
        return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')
2946
2947
    @classmethod
2948
    def get_comic_info(cls, soup, link):
2949
        """Get information about a particular comics."""
2950
        title = soup.find('meta', property='og:title')['content']
2951
        imgs = soup.find('div', class_='webcomic-image').find_all('img')
2952
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2953
        day = string_to_date(date_str, "%Y-%m-%d")
2954
        return {
2955
            'title': title,
2956
            'day': day.day,
2957
            'month': day.month,
2958
            'year': day.year,
2959
            'img': [i['src'] for i in imgs],
2960
        }
2961
2962
2963
class EverythingsStupid(GenericWordPressInkblot):
2964
    """Class to retrieve Everything's stupid Comics."""
2965
    # Also on http://tapastic.com/series/EverythingsStupid
2966
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
2967
    # Also on http://everythingsstupidcomics.tumblr.com
2968
    name = 'stupid'
2969
    long_name = "Everything's Stupid"
2970
    url = 'http://everythingsstupid.net'
2971
2972
2973
class TheIsmComics(GenericWordPressInkblot):
2974
    """Class to retrieve The Ism Comics."""
2975
    # Also on https://tapastic.com/series/TheIsm (?)
2976
    name = 'theism'
2977
    long_name = "The Ism"
2978
    url = 'http://www.theism-comics.com'
2979
2980
2981
class WoodenPlankStudios(GenericWordPressInkblot):
2982
    """Class to retrieve Wooden Plank Studios comics."""
2983
    name = 'woodenplank'
2984
    long_name = 'Wooden Plank Studios'
2985
    url = 'http://woodenplankstudios.com'
2986
2987
2988
class ElectricBunnyComic(GenericNavigableComic):
2989
    """Class to retrieve Electric Bunny Comics."""
2990
    # Also on http://electricbunnycomics.tumblr.com
2991
    name = 'bunny'
2992
    long_name = 'Electric Bunny Comic'
2993
    url = 'http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell'
2994
    get_url_from_link = join_cls_url_to_href
2995
2996
    @classmethod
2997
    def get_first_comic_link(cls):
2998
        """Get link to first comics."""
2999
        return get_soup_at_url(cls.url).find('img', alt='First').parent
3000
3001
    @classmethod
3002
    def get_navi_link(cls, last_soup, next_):
3003
        """Get link to next or previous comic."""
3004
        img = last_soup.find('img', alt='Next' if next_ else 'Back')
3005
        return img.parent if img else None
3006
3007
    @classmethod
3008
    def get_comic_info(cls, soup, link):
3009
        """Get information about a particular comics."""
3010
        title = soup.find('meta', property='og:title')['content']
3011
        imgs = soup.find_all('meta', property='og:image')
3012
        return {
3013
            'title': title,
3014
            'img': [i['content'] for i in imgs],
3015
        }
3016
3017
3018
class SheldonComics(GenericNavigableComic):
3019
    """Class to retrieve Sheldon comics."""
3020
    # Also on http://www.gocomics.com/sheldon
3021
    name = 'sheldon'
3022
    long_name = 'Sheldon Comics'
3023
    url = 'http://www.sheldoncomics.com'
3024
3025
    @classmethod
3026
    def get_first_comic_link(cls):
3027
        """Get link to first comics."""
3028
        return get_soup_at_url(cls.url).find("a", id="nav-first")
3029
3030
    @classmethod
3031
    def get_navi_link(cls, last_soup, next_):
3032
        """Get link to next or previous comic."""
3033
        for link in last_soup.find_all("a", id="nav-next" if next_ else "nav-prev"):
3034
            if link['href'] != 'http://www.sheldoncomics.com':
3035
                return link
3036
        return None
3037
3038
    @classmethod
3039
    def get_comic_info(cls, soup, link):
3040
        """Get information about a particular comics."""
3041
        imgs = soup.find("div", id="comic-foot").find_all("img")
3042
        assert all(i['alt'] == i['title'] for i in imgs)
3043
        assert len(imgs) == 1
3044
        title = imgs[0]['title']
3045
        return {
3046
            'title': title,
3047
            'img': [i['src'] for i in imgs],
3048
        }
3049
3050
3051
class CubeDrone(GenericNavigableComic):
3052
    """Class to retrieve Cube Drone comics."""
3053
    name = 'cubedrone'
3054
    long_name = 'Cube Drone'
3055
    url = 'http://cube-drone.com/comics'
3056
    get_url_from_link = join_cls_url_to_href
3057
3058
    @classmethod
3059
    def get_first_comic_link(cls):
3060
        """Get link to first comics."""
3061
        return get_soup_at_url(cls.url).find('span', class_='glyphicon glyphicon-backward').parent
3062
3063
    @classmethod
3064
    def get_navi_link(cls, last_soup, next_):
3065
        """Get link to next or previous comic."""
3066
        class_ = 'glyphicon glyphicon-chevron-' + ('right' if next_ else 'left')
3067
        return last_soup.find('span', class_=class_).parent
3068
3069
    @classmethod
3070
    def get_comic_info(cls, soup, link):
3071
        """Get information about a particular comics."""
3072
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
3073
        url2 = soup.find('meta', attrs={'name': 'twitter:url'})['content']
3074
        # date_str = soup.find('h2', class_='comic_title').find('small').string
3075
        # day = string_to_date(date_str, "%B %d, %Y, %I:%M %p")
3076
        imgs = soup.find_all('img', class_='comic img-responsive')
3077
        title2 = imgs[0]['title']
3078
        alt = imgs[0]['alt']
3079
        return {
3080
            'url2': url2,
3081
            'title': title,
3082
            'title2': title2,
3083
            'alt': alt,
3084
            'img': [i['src'] for i in imgs],
3085
        }
3086
3087
3088
class MakeItStoopid(GenericNavigableComic):
3089
    """Class to retrieve Make It Stoopid Comics."""
3090
    name = 'stoopid'
3091
    long_name = 'Make it stoopid'
3092
    url = 'http://makeitstoopid.com/comic.php'
3093
3094
    @classmethod
3095
    def get_nav(cls, soup):
3096
        """Get the navigation elements from soup object."""
3097
        cnav = soup.find_all(class_='cnav')
3098
        nav1, nav2 = cnav[:5], cnav[5:]
3099
        assert nav1 == nav2
3100
        # begin, prev, archive, next_, end = nav1
3101
        return [None if i.get('href') is None else i for i in nav1]
3102
3103
    @classmethod
3104
    def get_first_comic_link(cls):
3105
        """Get link to first comics."""
3106
        return cls.get_nav(get_soup_at_url(cls.url))[0]
3107
3108
    @classmethod
3109
    def get_navi_link(cls, last_soup, next_):
3110
        """Get link to next or previous comic."""
3111
        return cls.get_nav(last_soup)[3 if next_ else 1]
3112
3113
    @classmethod
3114
    def get_comic_info(cls, soup, link):
3115
        """Get information about a particular comics."""
3116
        title = link['title']
3117
        imgs = soup.find_all('img', id='comicimg')
3118
        return {
3119
            'title': title,
3120
            'img': [i['src'] for i in imgs],
3121
        }
3122
3123
3124 View Code Duplication
class TuMourrasMoinsBete(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3125
    """Class to retrieve Tu Mourras Moins Bete comics."""
3126
    name = 'mourrasmoinsbete'
3127
    long_name = 'Tu Mourras Moins Bete'
3128
    url = 'http://tumourrasmoinsbete.blogspot.fr'
3129
    _categories = ('FRANCAIS', )
3130
    get_first_comic_link = simulate_first_link
3131
    first_url = 'http://tumourrasmoinsbete.blogspot.fr/2008/06/essai.html'
3132
3133
    @classmethod
3134
    def get_navi_link(cls, last_soup, next_):
3135
        """Get link to next or previous comic."""
3136
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3137
3138
    @classmethod
3139
    def get_comic_info(cls, soup, link):
3140
        """Get information about a particular comics."""
3141
        title = soup.find('title').string
3142
        imgs = soup.find('div', itemprop='description articleBody').find_all('img')
3143
        author = soup.find('span', itemprop='author').string
3144
        return {
3145
            'img': [i['src'] for i in imgs],
3146
            'author': author,
3147
            'title': title,
3148
        }
3149
3150
3151
class GeekAndPoke(GenericNavigableComic):
3152
    """Class to retrieve Geek And Poke comics."""
3153
    name = 'geek'
3154
    long_name = 'Geek And Poke'
3155
    url = 'http://geek-and-poke.com'
3156
    get_url_from_link = join_cls_url_to_href
3157
    get_first_comic_link = simulate_first_link
3158
    first_url = 'http://geek-and-poke.com/geekandpoke/2006/8/27/a-new-place-for-a-not-so-old-blog.html'
3159
3160
    @classmethod
3161
    def get_navi_link(cls, last_soup, next_):
3162
        """Get link to next or previous comic."""
3163
        return last_soup.find('a', class_='prev-item' if next_ else 'next-item')
3164
3165
    @classmethod
3166
    def get_comic_info(cls, soup, link):
3167
        """Get information about a particular comics."""
3168
        title = soup.find('meta', property='og:title')['content']
3169
        desc = soup.find('meta', property='og:description')['content']
3170
        date_str = soup.find('time', class_='published')['datetime']
3171
        day = string_to_date(date_str, "%Y-%m-%d")
3172
        author = soup.find('a', rel='author').string
3173
        div_content = (soup.find('div', class_="body entry-content") or
3174
                       soup.find('div', class_="special-content"))
3175
        imgs = div_content.find_all('img')
3176
        imgs = [i for i in imgs if i.get('src') is not None]
3177
        assert all('title' not in i or i['alt'] == i['title'] for i in imgs)
3178
        alt = imgs[0].get('alt', "") if imgs else []
3179
        return {
3180
            'title': title,
3181
            'alt': alt,
3182
            'description': desc,
3183
            'author': author,
3184
            'day': day.day,
3185
            'month': day.month,
3186
            'year': day.year,
3187
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3188
        }
3189
3190
3191 View Code Duplication
class GloryOwlComix(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3192
    """Class to retrieve Glory Owl comics."""
3193
    name = 'gloryowl'
3194
    long_name = 'Glory Owl'
3195
    url = 'http://gloryowlcomix.blogspot.fr'
3196
    _categories = ('NSFW', 'FRANCAIS')
3197
    get_first_comic_link = simulate_first_link
3198
    first_url = 'http://gloryowlcomix.blogspot.fr/2013/02/1_7.html'
3199
3200
    @classmethod
3201
    def get_navi_link(cls, last_soup, next_):
3202
        """Get link to next or previous comic."""
3203
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3204
3205
    @classmethod
3206
    def get_comic_info(cls, soup, link):
3207
        """Get information about a particular comics."""
3208
        title = soup.find('title').string
3209
        imgs = soup.find_all('link', rel='image_src')
3210
        author = soup.find('a', rel='author').string
3211
        return {
3212
            'img': [i['href'] for i in imgs],
3213
            'author': author,
3214
            'title': title,
3215
        }
3216
3217
3218
class GenericTumblrV1(GenericComic):
3219
    """Generic class to retrieve comics from Tumblr using the V1 API."""
3220
    _categories = ('TUMBLR', )
3221
3222
    @classmethod
3223
    def get_next_comic(cls, last_comic):
3224
        """Generic implementation of get_next_comic for Tumblr comics."""
3225
        for p in cls.get_posts(last_comic):
3226
            comic = cls.get_comic_info(p)
3227
            if comic is not None:
3228
                yield comic
3229
3230
    @classmethod
3231
    def get_url_from_post(cls, post):
3232
        return post['url']
3233
3234
    @classmethod
3235
    def get_api_url(cls):
3236
        return urljoin_wrapper(cls.url, '/api/read/')
3237
3238
    @classmethod
3239
    def get_comic_info(cls, post):
3240
        """Get information about a particular comics."""
3241
        # print(post)
3242
        type_ = post['type']
3243
        if type_ != 'photo':
3244
            # print("Type is %s" % type_)
3245
            return None
3246
        tumblr_id = int(post['id'])
3247
        api_url = cls.get_api_url() + '?id=%d' % (tumblr_id)
3248
        day = datetime.datetime.fromtimestamp(int(post['unix-timestamp'])).date()
3249
        caption = post.find('photo-caption')
3250
        title = caption.string if caption else ""
3251
        tags = ' '.join(t.string for t in post.find_all('tag'))
3252
        # Photos may appear in 'photo' tags and/or straight in the post
3253
        photo_tags = post.find_all('photo')
3254
        if not photo_tags:
3255
            photo_tags = [post]
3256
        # Images are in multiple resolutions - taking the first one
3257
        imgs = [photo.find('photo-url') for photo in photo_tags]
3258
        return {
3259
            'url': cls.get_url_from_post(post),
3260
            'url2': post['url-with-slug'],
3261
            'day': day.day,
3262
            'month': day.month,
3263
            'year': day.year,
3264
            'title': title,
3265
            'tags': tags,
3266
            'img': [i.string for i in imgs],
3267
            'tumblr-id': tumblr_id,
3268
            'api_url': api_url,  # for debug purposes
3269
        }
3270
3271
    @classmethod
3272
    def get_posts(cls, last_comic, nb_post_per_call=10):
3273
        """Get posts using API. nb_post_per_call is max 50.
3274
3275
        Posts are retrieved from newer to older as per the tumblr v1 api
3276
        but are returned in chronological order."""
3277
        waiting_for_url = last_comic['url'] if last_comic else None
3278
        posts_acc = []
3279
        if last_comic is not None:
3280
            # Sometimes, tumblr posts are deleted. We previous post is deleted, we
3281
            # might end up spending a lot of time looking for something that
3282
            # doesn't exist. Failing early and clearly might be a better option.
3283
            last_api_url = last_comic['api_url']
3284
            try:
3285
                get_soup_at_url(last_api_url)
3286
            except urllib.error.HTTPError:
3287
                try:
3288
                    get_soup_at_url(cls.url)
3289
                except urllib.error.HTTPError:
3290
                    print("Did not find previous post nor main url %s" % cls.url)
3291
                else:
3292
                    print("Did not find previous post %s : it might have been deleted" % last_api_url)
3293
                return reversed(posts_acc)
3294
        api_url = cls.get_api_url()
3295
        posts = get_soup_at_url(api_url).find('posts')
3296
        start, total = int(posts['start']), int(posts['total'])
3297
        assert start == 0
3298
        for starting_num in range(0, total, nb_post_per_call):
3299
            api_url2 = api_url + '?start=%d&num=%d' % (starting_num, nb_post_per_call)
3300
            # print(api_url2)
3301
            posts2 = get_soup_at_url(api_url2).find('posts')
3302
            start2, total2 = int(posts2['start']), int(posts2['total'])
3303
            assert starting_num == start2, "%d != %d" % (starting_num, start2)
3304
            # This may happen and should be handled in the future
3305
            assert total == total2, "%d != %d" % (total, total2)
3306
            for p in posts2.find_all('post'):
3307
                if waiting_for_url and waiting_for_url == cls.get_url_from_post(p):
3308
                    return reversed(posts_acc)
3309
                posts_acc.append(p)
3310
        if waiting_for_url is None:
3311
            return reversed(posts_acc)
3312
        print("Did not find %s : there might be a problem" % waiting_for_url)
3313
        return []
3314
3315
3316
class SaturdayMorningBreakfastCerealTumblr(GenericEmptyComic, GenericTumblrV1):
3317
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
3318
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
3319
    # Also on http://www.smbc-comics.com
3320
    name = 'smbc-tumblr'
3321
    long_name = 'Saturday Morning Breakfast Cereal (from Tumblr)'
3322
    url = 'http://smbc-comics.tumblr.com'
3323
    _categories = ('SMBC', )
3324
3325
3326
class IrwinCardozo(GenericTumblrV1):
3327
    """Class to retrieve Irwin Cardozo Comics."""
3328
    name = 'irwinc'
3329
    long_name = 'Irwin Cardozo'
3330
    url = 'http://irwincardozocomics.tumblr.com'
3331
3332
3333
class AccordingToDevin(GenericTumblrV1):
3334
    """Class to retrieve According To Devin comics."""
3335
    name = 'devin'
3336
    long_name = 'According To Devin'
3337
    url = 'http://accordingtodevin.tumblr.com'
3338
3339
3340
class ItsTheTieTumblr(GenericTumblrV1):
3341
    """Class to retrieve It's the tie comics."""
3342
    # Also on http://itsthetie.com
3343
    # Also on https://tapastic.com/series/itsthetie
3344
    name = 'tie-tumblr'
3345
    long_name = "It's the tie (from Tumblr)"
3346
    url = "http://itsthetie.tumblr.com"
3347
    _categories = ('TIE', )
3348
3349
3350
class OctopunsTumblr(GenericTumblrV1):
3351
    """Class to retrieve Octopuns comics."""
3352
    # Also on http://www.octopuns.net
3353
    name = 'octopuns-tumblr'
3354
    long_name = 'Octopuns (from Tumblr)'
3355
    url = 'http://octopuns.tumblr.com'
3356
3357
3358
class PicturesInBoxesTumblr(GenericTumblrV1):
3359
    """Class to retrieve Pictures In Boxes comics."""
3360
    # Also on http://www.picturesinboxes.com
3361
    name = 'picturesinboxes-tumblr'
3362
    long_name = 'Pictures in Boxes (from Tumblr)'
3363
    url = 'http://picturesinboxescomic.tumblr.com'
3364
3365
3366
class TubeyToonsTumblr(GenericTumblrV1):
3367
    """Class to retrieve TubeyToons comics."""
3368
    # Also on http://tapastic.com/series/Tubey-Toons
3369
    # Also on http://tubeytoons.com
3370
    name = 'tubeytoons-tumblr'
3371
    long_name = 'Tubey Toons (from Tumblr)'
3372
    url = 'http://tubeytoons.tumblr.com'
3373
    _categories = ('TUNEYTOONS', )
3374
3375
3376
class UnearthedComicsTumblr(GenericTumblrV1):
3377
    """Class to retrieve Unearthed comics."""
3378
    # Also on http://tapastic.com/series/UnearthedComics
3379
    # Also on http://unearthedcomics.com
3380
    name = 'unearthed-tumblr'
3381
    long_name = 'Unearthed Comics (from Tumblr)'
3382
    url = 'http://unearthedcomics.tumblr.com'
3383
    _categories = ('UNEARTHED', )
3384
3385
3386
class PieComic(GenericTumblrV1):
3387
    """Class to retrieve Pie Comic comics."""
3388
    name = 'pie'
3389
    long_name = 'Pie Comic'
3390
    url = "http://piecomic.tumblr.com"
3391
3392
3393
class MrEthanDiamond(GenericTumblrV1):
3394
    """Class to retrieve Mr Ethan Diamond comics."""
3395
    name = 'diamond'
3396
    long_name = 'Mr Ethan Diamond'
3397
    url = 'http://mrethandiamond.tumblr.com'
3398
3399
3400
class Flocci(GenericTumblrV1):
3401
    """Class to retrieve floccinaucinihilipilification comics."""
3402
    name = 'flocci'
3403
    long_name = 'floccinaucinihilipilification'
3404
    url = "http://floccinaucinihilipilificationa.tumblr.com"
3405
3406
3407
class UpAndOut(GenericTumblrV1):
3408
    """Class to retrieve Up & Out comics."""
3409
    # Also on http://tapastic.com/series/UP-and-OUT
3410
    name = 'upandout'
3411
    long_name = 'Up And Out (from Tumblr)'
3412
    url = 'http://upandoutcomic.tumblr.com'
3413
3414
3415
class Pundemonium(GenericTumblrV1):
3416
    """Class to retrieve Pundemonium comics."""
3417
    name = 'pundemonium'
3418
    long_name = 'Pundemonium'
3419
    url = 'http://monstika.tumblr.com'
3420
3421
3422
class PoorlyDrawnLinesTumblr(GenericEmptyComic, GenericTumblrV1):
3423
    """Class to retrieve Poorly Drawn Lines comics."""
3424
    # Also on http://poorlydrawnlines.com
3425
    name = 'poorlydrawn-tumblr'
3426
    long_name = 'Poorly Drawn Lines (from Tumblr)'
3427
    url = 'http://pdlcomics.tumblr.com'
3428
    _categories = ('POORLYDRAWN', )
3429
3430
3431
class PearShapedComics(GenericTumblrV1):
3432
    """Class to retrieve Pear Shaped Comics."""
3433
    name = 'pearshaped'
3434
    long_name = 'Pear-Shaped Comics'
3435
    url = 'http://pearshapedcomics.com'
3436
3437
3438
class PondScumComics(GenericTumblrV1):
3439
    """Class to retrieve Pond Scum Comics."""
3440
    name = 'pond'
3441
    long_name = 'Pond Scum'
3442
    url = 'http://pondscumcomic.tumblr.com'
3443
3444
3445
class MercworksTumblr(GenericTumblrV1):
3446
    """Class to retrieve Mercworks comics."""
3447
    # Also on http://mercworks.net
3448
    name = 'mercworks-tumblr'
3449
    long_name = 'Mercworks (from Tumblr)'
3450
    url = 'http://mercworks.tumblr.com'
3451
3452
3453
class OwlTurdTumblr(GenericEmptyComic, GenericTumblrV1):
3454
    """Class to retrieve Owl Turd comics."""
3455
    # Also on http://tapastic.com/series/Owl-Turd-Comix
3456
    name = 'owlturd-tumblr'
3457
    long_name = 'Owl Turd (from Tumblr)'
3458
    url = 'http://owlturd.com'
3459
    _categories = ('OWLTURD', )
3460
3461
3462
class VectorBelly(GenericTumblrV1):
3463
    """Class to retrieve Vector Belly comics."""
3464
    # Also on http://vectorbelly.com
3465
    name = 'vector'
3466
    long_name = 'Vector Belly'
3467
    url = 'http://vectorbelly.tumblr.com'
3468
3469
3470
class GoneIntoRapture(GenericTumblrV1):
3471
    """Class to retrieve Gone Into Rapture comics."""
3472
    # Also on http://goneintorapture.tumblr.com
3473
    # Also on http://tapastic.com/series/Goneintorapture
3474
    name = 'rapture'
3475
    long_name = 'Gone Into Rapture'
3476
    url = 'http://www.goneintorapture.com'
3477
3478
3479
class TheOatmealTumblr(GenericTumblrV1):
3480
    """Class to retrieve The Oatmeal comics."""
3481
    # Also on http://theoatmeal.com
3482
    name = 'oatmeal-tumblr'
3483
    long_name = 'The Oatmeal (from Tumblr)'
3484
    url = 'http://oatmeal.tumblr.com'
3485
3486
3487
class HeckIfIKnowComicsTumblr(GenericTumblrV1):
3488
    """Class to retrieve Heck If I Know Comics."""
3489
    # Also on http://tapastic.com/series/Regular
3490
    name = 'heck-tumblr'
3491
    long_name = 'Heck if I Know comics (from Tumblr)'
3492
    url = 'http://heckifiknowcomics.com'
3493
3494
3495
class MyJetPack(GenericTumblrV1):
3496
    """Class to retrieve My Jet Pack comics."""
3497
    name = 'jetpack'
3498
    long_name = 'My Jet Pack'
3499
    url = 'http://myjetpack.tumblr.com'
3500
3501
3502
class CheerUpEmoKidTumblr(GenericTumblrV1):
3503
    """Class to retrieve CheerUpEmoKid comics."""
3504
    # Also on http://www.cheerupemokid.com
3505
    # Also on http://tapastic.com/series/CUEK
3506
    name = 'cuek-tumblr'
3507
    long_name = 'Cheer Up Emo Kid (from Tumblr)'
3508
    url = 'http://enzocomics.tumblr.com'
3509
3510
3511
class ForLackOfABetterComic(GenericEmptyComic, GenericTumblrV1):
3512
    """Class to retrieve For Lack Of A Better Comics."""
3513
    # Also on http://forlackofabettercomic.com
3514
    name = 'lack'
3515
    long_name = 'For Lack Of A Better Comic'
3516
    url = 'http://forlackofabettercomic.tumblr.com'
3517
3518
3519
class ZenPencilsTumblr(GenericTumblrV1):
3520
    """Class to retrieve ZenPencils comics."""
3521
    # Also on http://zenpencils.com
3522
    # Also on http://www.gocomics.com/zen-pencils
3523
    name = 'zenpencils-tumblr'
3524
    long_name = 'Zen Pencils (from Tumblr)'
3525
    url = 'http://zenpencils.tumblr.com'
3526
    _categories = ('ZENPENCILS', )
3527
3528
3529
class ThreeWordPhraseTumblr(GenericTumblrV1):
3530
    """Class to retrieve Three Word Phrase comics."""
3531
    # Also on http://threewordphrase.com
3532
    name = 'threeword-tumblr'
3533
    long_name = 'Three Word Phrase (from Tumblr)'
3534
    url = 'http://www.threewordphrase.tumblr.com'
3535
3536
3537
class TimeTrabbleTumblr(GenericTumblrV1):
3538
    """Class to retrieve Time Trabble comics."""
3539
    # Also on http://timetrabble.com
3540
    name = 'timetrabble-tumblr'
3541
    long_name = 'Time Trabble (from Tumblr)'
3542
    url = 'http://timetrabble.tumblr.com'
3543
3544
3545
class SafelyEndangeredTumblr(GenericTumblrV1):
3546
    """Class to retrieve Safely Endangered comics."""
3547
    # Also on http://www.safelyendangered.com
3548
    name = 'endangered-tumblr'
3549
    long_name = 'Safely Endangered (from Tumblr)'
3550
    url = 'http://tumblr.safelyendangered.com'
3551
3552
3553
class MouseBearComedyTumblr(GenericTumblrV1):
3554
    """Class to retrieve Mouse Bear Comedy comics."""
3555
    # Also on http://www.mousebearcomedy.com
3556
    name = 'mousebear-tumblr'
3557
    long_name = 'Mouse Bear Comedy (from Tumblr)'
3558
    url = 'http://mousebearcomedy.tumblr.com'
3559
3560
3561
class BouletCorpTumblr(GenericTumblrV1):
3562
    """Class to retrieve BouletCorp comics."""
3563
    # Also on http://www.bouletcorp.com
3564
    name = 'boulet-tumblr'
3565
    long_name = 'Boulet Corp (from Tumblr)'
3566
    url = 'http://bouletcorp.tumblr.com'
3567
    _categories = ('BOULET', )
3568
3569
3570
class TheAwkwardYetiTumblr(GenericEmptyComic, GenericTumblrV1):
3571
    """Class to retrieve The Awkward Yeti comics."""
3572
    # Also on http://www.gocomics.com/the-awkward-yeti
3573
    # Also on http://theawkwardyeti.com
3574
    # Also on https://tapastic.com/series/TheAwkwardYeti
3575
    name = 'yeti-tumblr'
3576
    long_name = 'The Awkward Yeti (from Tumblr)'
3577
    url = 'http://larstheyeti.tumblr.com'
3578
    _categories = ('YETI', )
3579
3580
3581
class NellucNhoj(GenericTumblrV1):
3582
    """Class to retrieve NellucNhoj comics."""
3583
    name = 'nhoj'
3584
    long_name = 'Nelluc Nhoj'
3585
    url = 'http://nellucnhoj.com'
3586
3587
3588
class DownTheUpwardSpiralTumblr(GenericTumblrV1):
3589
    """Class to retrieve Down The Upward Spiral comics."""
3590
    # Also on http://www.downtheupwardspiral.com
3591
    name = 'spiral-tumblr'
3592
    long_name = 'Down the Upward Spiral (from Tumblr)'
3593
    url = 'http://downtheupwardspiral.tumblr.com'
3594
3595
3596
class AsPerUsualTumblr(GenericTumblrV1):
3597
    """Class to retrieve As Per Usual comics."""
3598
    # Also on https://tapastic.com/series/AsPerUsual
3599
    name = 'usual-tumblr'
3600
    long_name = 'As Per Usual (from Tumblr)'
3601
    url = 'http://as-per-usual.tumblr.com'
3602
3603
3604
class OneOneOneOneComicTumblr(GenericTumblrV1):
3605
    """Class to retrieve 1111 Comics."""
3606
    # Also on http://www.1111comics.me
3607
    # Also on https://tapastic.com/series/1111-Comics
3608
    name = '1111-tumblr'
3609
    long_name = '1111 Comics (from Tumblr)'
3610
    url = 'http://comics1111.tumblr.com'
3611
    _categories = ('ONEONEONEONE', )
3612
3613
3614
class JhallComicsTumblr(GenericTumblrV1):
3615
    """Class to retrieve Jhall Comics."""
3616
    # Also on http://jhallcomics.com
3617
    name = 'jhall-tumblr'
3618
    long_name = 'Jhall Comics (from Tumblr)'
3619
    url = 'http://jhallcomics.tumblr.com'
3620
3621
3622
class BerkeleyMewsTumblr(GenericTumblrV1):
3623
    """Class to retrieve Berkeley Mews comics."""
3624
    # Also on http://www.gocomics.com/berkeley-mews
3625
    # Also on http://www.berkeleymews.com
3626
    name = 'berkeley-tumblr'
3627
    long_name = 'Berkeley Mews (from Tumblr)'
3628
    url = 'http://mews.tumblr.com'
3629
    _categories = ('BERKELEY', )
3630
3631
3632
class JoanCornellaTumblr(GenericTumblrV1):
3633
    """Class to retrieve Joan Cornella comics."""
3634
    # Also on http://joancornella.net
3635
    name = 'cornella-tumblr'
3636
    long_name = 'Joan Cornella (from Tumblr)'
3637
    url = 'http://cornellajoan.tumblr.com'
3638
3639
3640
class RespawnComicTumblr(GenericTumblrV1):
3641
    """Class to retrieve Respawn Comic."""
3642
    # Also on http://respawncomic.com
3643
    name = 'respawn-tumblr'
3644
    long_name = 'Respawn Comic (from Tumblr)'
3645
    url = 'http://respawncomic.tumblr.com'
3646
3647
3648
class ChrisHallbeckTumblr(GenericEmptyComic, GenericTumblrV1):
3649
    """Class to retrieve Chris Hallbeck comics."""
3650
    # Also on https://tapastic.com/ChrisHallbeck
3651
    # Also on http://maximumble.com
3652
    # Also on http://minimumble.com
3653
    # Also on http://thebookofbiff.com
3654
    name = 'hallbeck-tumblr'
3655
    long_name = 'Chris Hallback (from Tumblr)'
3656
    url = 'http://chrishallbeck.tumblr.com'
3657
    _categories = ('HALLBACK', )
3658
3659
3660
class ComicNuggets(GenericTumblrV1):
3661
    """Class to retrieve Comic Nuggets."""
3662
    name = 'nuggets'
3663
    long_name = 'Comic Nuggets'
3664
    url = 'http://comicnuggets.com'
3665
3666
3667
class PigeonGazetteTumblr(GenericTumblrV1):
3668
    """Class to retrieve The Pigeon Gazette comics."""
3669
    # Also on https://tapastic.com/series/The-Pigeon-Gazette
3670
    name = 'pigeon-tumblr'
3671
    long_name = 'The Pigeon Gazette (from Tumblr)'
3672
    url = 'http://thepigeongazette.tumblr.com'
3673
3674
3675
class CancerOwl(GenericTumblrV1):
3676
    """Class to retrieve Cancer Owl comics."""
3677
    # Also on http://cancerowl.com
3678
    name = 'cancerowl-tumblr'
3679
    long_name = 'Cancer Owl (from Tumblr)'
3680
    url = 'http://cancerowl.tumblr.com'
3681
3682
3683
class FowlLanguageTumblr(GenericTumblrV1):
3684
    """Class to retrieve Fowl Language comics."""
3685
    # Also on http://www.fowllanguagecomics.com
3686
    # Also on http://tapastic.com/series/Fowl-Language-Comics
3687
    # Also on http://www.gocomics.com/fowl-language
3688
    name = 'fowllanguage-tumblr'
3689
    long_name = 'Fowl Language Comics (from Tumblr)'
3690
    url = 'http://fowllanguagecomics.tumblr.com'
3691
    _categories = ('FOWLLANGUAGE', )
3692
3693
3694
class TheOdd1sOutTumblr(GenericTumblrV1):
3695
    """Class to retrieve The Odd 1s Out comics."""
3696
    # Also on http://theodd1sout.com
3697
    # Also on https://tapastic.com/series/Theodd1sout
3698
    name = 'theodd-tumblr'
3699
    long_name = 'The Odd 1s Out (from Tumblr)'
3700
    url = 'http://theodd1sout.tumblr.com'
3701
3702
3703
class TheUnderfoldTumblr(GenericTumblrV1):
3704
    """Class to retrieve The Underfold comics."""
3705
    # Also on http://theunderfold.com
3706
    name = 'underfold-tumblr'
3707
    long_name = 'The Underfold (from Tumblr)'
3708
    url = 'http://theunderfold.tumblr.com'
3709
3710
3711
class LolNeinTumblr(GenericTumblrV1):
3712
    """Class to retrieve Lol Nein comics."""
3713
    # Also on http://lolnein.com
3714
    name = 'lolnein-tumblr'
3715
    long_name = 'Lol Nein (from Tumblr)'
3716
    url = 'http://lolneincom.tumblr.com'
3717
3718
3719
class FatAwesomeComicsTumblr(GenericTumblrV1):
3720
    """Class to retrieve Fat Awesome Comics."""
3721
    # Also on http://fatawesome.com/comics
3722
    name = 'fatawesome-tumblr'
3723
    long_name = 'Fat Awesome (from Tumblr)'
3724
    url = 'http://fatawesomecomedy.tumblr.com'
3725
3726
3727
class TheWorldIsFlatTumblr(GenericTumblrV1):
3728
    """Class to retrieve The World Is Flat Comics."""
3729
    # Also on https://tapastic.com/series/The-World-is-Flat
3730
    name = 'flatworld-tumblr'
3731
    long_name = 'The World Is Flat (from Tumblr)'
3732
    url = 'http://theworldisflatcomics.tumblr.com'
3733
3734
3735
class DorrisMc(GenericEmptyComic, GenericTumblrV1):
3736
    """Class to retrieve Dorris Mc Comics"""
3737
    # Also on http://www.gocomics.com/dorris-mccomics
3738
    name = 'dorrismc'
3739
    long_name = 'Dorris Mc'
3740
    url = 'http://dorrismccomics.com'
3741
3742
3743
class LeleozTumblr(GenericEmptyComic, GenericTumblrV1):
3744
    """Class to retrieve Leleoz comics."""
3745
    # Also on https://tapastic.com/series/Leleoz
3746
    name = 'leleoz-tumblr'
3747
    long_name = 'Leleoz (from Tumblr)'
3748
    url = 'http://leleozcomics.tumblr.com'
3749
3750
3751
class MoonBeardTumblr(GenericTumblrV1):
3752
    """Class to retrieve MoonBeard comics."""
3753
    # Also on http://moonbeard.com
3754
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
3755
    name = 'moonbeard-tumblr'
3756
    long_name = 'Moon Beard (from Tumblr)'
3757
    url = 'http://blog.squiresjam.es/moonbeard'
3758
3759
3760
class AComik(GenericTumblrV1):
3761
    """Class to retrieve A Comik"""
3762
    name = 'comik'
3763
    long_name = 'A Comik'
3764
    url = 'http://acomik.com'
3765
3766
3767
class ClassicRandy(GenericTumblrV1):
3768
    """Class to retrieve Classic Randy comics."""
3769
    name = 'randy'
3770
    long_name = 'Classic Randy'
3771
    url = 'http://classicrandy.tumblr.com'
3772
3773
3774
class DagssonTumblr(GenericTumblrV1):
3775
    """Class to retrieve Dagsson comics."""
3776
    # Also on http://www.dagsson.com
3777
    name = 'dagsson-tumblr'
3778
    long_name = 'Dagsson Hugleikur (from Tumblr)'
3779
    url = 'http://hugleikurdagsson.tumblr.com'
3780
3781
3782
class LinsEditionsTumblr(GenericTumblrV1):
3783
    """Class to retrieve L.I.N.S. Editions comics."""
3784
    # Also on https://linsedition.com
3785
    name = 'lins-tumblr'
3786
    long_name = 'L.I.N.S. Editions (from Tumblr)'
3787
    url = 'http://linscomics.tumblr.com'
3788
    _categories = ('LINS', )
3789
3790
3791
class OrigamiHotDish(GenericTumblrV1):
3792
    """Class to retrieve Origami Hot Dish comics."""
3793
    name = 'origamihotdish'
3794
    long_name = 'Origami Hot Dish'
3795
    url = 'http://origamihotdish.com'
3796
3797
3798
class HitAndMissComicsTumblr(GenericTumblrV1):
3799
    """Class to retrieve Hit and Miss Comics."""
3800
    name = 'hitandmiss'
3801
    long_name = 'Hit and Miss Comics'
3802
    url = 'http://hitandmisscomics.tumblr.com'
3803
3804
3805
class HMBlanc(GenericTumblrV1):
3806
    """Class to retrieve HM Blanc comics."""
3807
    name = 'hmblanc'
3808
    long_name = 'HM Blanc'
3809
    url = 'http://hmblanc.tumblr.com'
3810
3811
3812
class TalesOfAbsurdityTumblr(GenericTumblrV1):
3813
    """Class to retrieve Tales Of Absurdity comics."""
3814
    # Also on http://talesofabsurdity.com
3815
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
3816
    name = 'absurdity-tumblr'
3817
    long_name = 'Tales of Absurdity (from Tumblr)'
3818
    url = 'http://talesofabsurdity.tumblr.com'
3819
    _categories = ('ABSURDITY', )
3820
3821
3822
class RobbieAndBobby(GenericTumblrV1):
3823
    """Class to retrieve Robbie And Bobby comics."""
3824
    # Also on http://robbieandbobby.com
3825
    name = 'robbie-tumblr'
3826
    long_name = 'Robbie And Bobby (from Tumblr)'
3827
    url = 'http://robbieandbobby.tumblr.com'
3828
3829
3830
class ElectricBunnyComicTumblr(GenericTumblrV1):
3831
    """Class to retrieve Electric Bunny Comics."""
3832
    # Also on http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell
3833
    name = 'bunny-tumblr'
3834
    long_name = 'Electric Bunny Comic (from Tumblr)'
3835
    url = 'http://electricbunnycomics.tumblr.com'
3836
3837
3838
class Hoomph(GenericTumblrV1):
3839
    """Class to retrieve Hoomph comics."""
3840
    name = 'hoomph'
3841
    long_name = 'Hoomph'
3842
    url = 'http://hoom.ph'
3843
3844
3845
class BFGFSTumblr(GenericTumblrV1):
3846
    """Class to retrieve BFGFS comics."""
3847
    # Also on https://tapastic.com/series/BFGFS
3848
    # Also on http://bfgfs.com
3849
    name = 'bfgfs-tumblr'
3850
    long_name = 'BFGFS (from Tumblr)'
3851
    url = 'http://bfgfs.tumblr.com'
3852
3853
3854
class DoodleForFood(GenericTumblrV1):
3855
    """Class to retrieve Doodle For Food comics."""
3856
    # Also on http://doodleforfood.com
3857
    name = 'doodle'
3858
    long_name = 'Doodle For Food'
3859
    url = 'http://doodleforfood.com'
3860
3861
3862
class CassandraCalinTumblr(GenericEmptyComic, GenericTumblrV1):
3863
    """Class to retrieve C. Cassandra comics."""
3864
    # Also on http://cassandracalin.com
3865
    # Also on https://tapastic.com/series/C-Cassandra-comics
3866
    name = 'cassandra-tumblr'
3867
    long_name = 'Cassandra Calin (from Tumblr)'
3868
    url = 'http://c-cassandra.tumblr.com'
3869
3870
3871
class DougWasTaken(GenericTumblrV1):
3872
    """Class to retrieve Doug Was Taken comics."""
3873
    name = 'doog'
3874
    long_name = 'Doug Was Taken'
3875
    url = 'http://dougwastaken.tumblr.com'
3876
3877
3878
class MandatoryRollerCoaster(GenericEmptyComic, GenericTumblrV1):
3879
    """Class to retrieve Mandatory Roller Coaster comics."""
3880
    name = 'rollercoaster'
3881
    long_name = 'Mandatory Roller Coaster'
3882
    url = 'http://mandatoryrollercoaster.com'
3883
3884
3885
class CEstPasEnRegardantSesPompes(GenericEmptyComic, GenericTumblrV1):
3886
    """Class to retrieve C'Est Pas En Regardant Ses Pompes (...)  comics."""
3887
    name = 'cperspqccltt'
3888
    long_name = 'C Est Pas En Regardant Ses Pompes (...)'
3889
    url = 'http://cperspqccltt.tumblr.com'
3890
3891
3892
class TheGrohlTroll(GenericEmptyComic, GenericTumblrV1):
3893
    """Class to retrieve The Grohl Troll comics."""
3894
    name = 'grohltroll'
3895
    long_name = 'The Grohl Troll'
3896
    url = 'http://thegrohltroll.com'
3897
3898
3899
class WebcomicName(GenericEmptyComic, GenericTumblrV1):
3900
    """Class to retrieve Webcomic Name comics."""
3901
    name = 'webcomicname'
3902
    long_name = 'Webcomic Name'
3903
    url = 'http://webcomicname.com'
3904
3905
3906
class BooksOfAdam(GenericEmptyComic, GenericTumblrV1):
3907
    """Class to retrieve Books of Adam comics."""
3908
    # Also on http://www.booksofadam.com
3909
    name = 'booksofadam'
3910
    long_name = 'Books of Adam'
3911
    url = 'http://booksofadam.tumblr.com'
3912
3913
3914
class HarkAVagrant(GenericEmptyComic, GenericTumblrV1):
3915
    """Class to retrieve Hark A Vagrant comics."""
3916
    # Also on http://www.harkavagrant.com
3917
    name = 'hark-tumblr'
3918
    long_name = 'Hark A Vagrant (from Tumblr)'
3919
    url = 'http://beatonna.tumblr.com'
3920
3921
3922
class OurSuperAdventureTumblr(GenericEmptyComic, GenericTumblrV1):
3923
    """Class to retrieve Our Super Adventure comics."""
3924
    # Also on https://tapastic.com/series/Our-Super-Adventure
3925
    # Also on http://www.oursuperadventure.com
3926
    # http://sarahgraley.com
3927
    name = 'superadventure-tumblr'
3928
    long_name = 'Our Super Adventure (from Tumblr)'
3929
    url = 'http://sarahssketchbook.tumblr.com'
3930
3931
3932
class JakeLikesOnions(GenericTumblrV1):
3933
    """Class to retrieve Jake Likes Onions comics."""
3934
    name = 'jake'
3935
    long_name = 'Jake Likes Onions'
3936
    url = 'http://jakelikesonions.com'
3937
3938
3939
class InYourFaceCake(GenericEmptyComic, GenericTumblrV1):
3940
    """Class to retrieve In Your Face Cake comics."""
3941
    name = 'inyourfacecake-tumblr'
3942
    long_name = 'In Your Face Cake (from Tumblr)'
3943
    url = 'http://in-your-face-cake.tumblr.com'
3944
3945
3946
class Robospunk(GenericTumblrV1):
3947
    """Class to retrieve Robospunk comics."""
3948
    name = 'robospunk'
3949
    long_name = 'Robospunk'
3950
    url = 'http://robospunk.com'
3951
3952
3953
class BananaTwinky(GenericTumblrV1):
3954
    """Class to retrieve Banana Twinky comics."""
3955
    name = 'banana'
3956
    long_name = 'Banana Twinky'
3957
    url = 'http://bananatwinky.tumblr.com'
3958
3959
3960
class YesterdaysPopcornTumblr(GenericTumblrV1):
3961
    """Class to retrieve Yesterday's Popcorn comics."""
3962
    # Also on http://www.yesterdayspopcorn.com
3963
    # Also on https://tapastic.com/series/Yesterdays-Popcorn
3964
    name = 'popcorn-tumblr'
3965
    long_name = 'Yesterday\'s Popcorn (from Tumblr)'
3966
    url = 'http://yesterdayspopcorn.tumblr.com'
3967
3968
3969
class TwistedDoodles(GenericEmptyComic, GenericTumblrV1):
3970
    """Class to retrieve Twisted Doodles comics."""
3971
    name = 'twisted'
3972
    long_name = 'Twisted Doodles'
3973
    url = 'http://www.twisteddoodles.com'
3974
3975
3976
class HorovitzComics(GenericListableComic):
3977
    """Generic class to handle the logic common to the different comics from Horovitz."""
3978
    url = 'http://www.horovitzcomics.com'
3979
    _categories = ('HOROVITZ', )
3980
    img_re = re.compile('.*comics/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
3981
    link_re = NotImplemented
3982
    get_url_from_archive_element = join_cls_url_to_href
3983
3984
    @classmethod
3985
    def get_comic_info(cls, soup, link):
3986
        """Get information about a particular comics."""
3987
        href = link['href']
3988
        num = int(cls.link_re.match(href).groups()[0])
3989
        title = link.string
3990
        imgs = soup.find_all('img', id='comic')
3991
        assert len(imgs) == 1
3992
        year, month, day = [int(s)
3993
                            for s in cls.img_re.match(imgs[0]['src']).groups()]
3994
        return {
3995
            'title': title,
3996
            'day': day,
3997
            'month': month,
3998
            'year': year,
3999
            'img': [i['src'] for i in imgs],
4000
            'num': num,
4001
        }
4002
4003
    @classmethod
4004
    def get_archive_elements(cls):
4005
        archive_url = 'http://www.horovitzcomics.com/comics/archive/'
4006
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.link_re))
4007
4008
4009
class HorovitzNew(HorovitzComics):
4010
    """Class to retrieve Horovitz new comics."""
4011
    name = 'horovitznew'
4012
    long_name = 'Horovitz New'
4013
    link_re = re.compile('^/comics/new/([0-9]+)$')
4014
4015
4016
class HorovitzClassic(HorovitzComics):
4017
    """Class to retrieve Horovitz classic comics."""
4018
    name = 'horovitzclassic'
4019
    long_name = 'Horovitz Classic'
4020
    link_re = re.compile('^/comics/classic/([0-9]+)$')
4021
4022
4023
class GenericGoComic(GenericNavigableComic):
4024
    """Generic class to handle the logic common to comics from gocomics.com."""
4025
    _categories = ('GOCOMIC', )
4026
    url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)$')
4027
4028
    @classmethod
4029
    def get_first_comic_link(cls):
4030
        """Get link to first comics."""
4031
        return get_soup_at_url(cls.url).find('a', class_='beginning')
4032
4033
    @classmethod
4034
    def get_navi_link(cls, last_soup, next_):
4035
        """Get link to next or previous comic."""
4036
        return last_soup.find('a', class_='next' if next_ else 'prev', href=cls.url_date_re)
4037
4038
    @classmethod
4039
    def get_url_from_link(cls, link):
4040
        gocomics = 'http://www.gocomics.com'
4041
        return urljoin_wrapper(gocomics, link['href'])
4042
4043
    @classmethod
4044
    def get_comic_info(cls, soup, link):
4045
        """Get information about a particular comics."""
4046
        url = cls.get_url_from_link(link)
4047
        year, month, day = [int(s)
4048
                            for s in cls.url_date_re.match(url).groups()]
4049
        return {
4050
            'day': day,
4051
            'month': month,
4052
            'year': year,
4053
            'img': [soup.find_all('img', class_='strip')[-1]['src']],
4054
            'author': soup.find('meta', attrs={'name': 'author'})['content']
4055
        }
4056
4057
4058
class PearlsBeforeSwine(GenericGoComic):
4059
    """Class to retrieve Pearls Before Swine comics."""
4060
    name = 'pearls'
4061
    long_name = 'Pearls Before Swine'
4062
    url = 'http://www.gocomics.com/pearlsbeforeswine'
4063
4064
4065
class Peanuts(GenericGoComic):
4066
    """Class to retrieve Peanuts comics."""
4067
    name = 'peanuts'
4068
    long_name = 'Peanuts'
4069
    url = 'http://www.gocomics.com/peanuts'
4070
4071
4072
class MattWuerker(GenericGoComic):
4073
    """Class to retrieve Matt Wuerker comics."""
4074
    name = 'wuerker'
4075
    long_name = 'Matt Wuerker'
4076
    url = 'http://www.gocomics.com/mattwuerker'
4077
4078
4079
class TomToles(GenericGoComic):
4080
    """Class to retrieve Tom Toles comics."""
4081
    name = 'toles'
4082
    long_name = 'Tom Toles'
4083
    url = 'http://www.gocomics.com/tomtoles'
4084
4085
4086
class BreakOfDay(GenericGoComic):
4087
    """Class to retrieve Break Of Day comics."""
4088
    name = 'breakofday'
4089
    long_name = 'Break Of Day'
4090
    url = 'http://www.gocomics.com/break-of-day'
4091
4092
4093
class Brevity(GenericGoComic):
4094
    """Class to retrieve Brevity comics."""
4095
    name = 'brevity'
4096
    long_name = 'Brevity'
4097
    url = 'http://www.gocomics.com/brevity'
4098
4099
4100
class MichaelRamirez(GenericGoComic):
4101
    """Class to retrieve Michael Ramirez comics."""
4102
    name = 'ramirez'
4103
    long_name = 'Michael Ramirez'
4104
    url = 'http://www.gocomics.com/michaelramirez'
4105
4106
4107
class MikeLuckovich(GenericGoComic):
4108
    """Class to retrieve Mike Luckovich comics."""
4109
    name = 'luckovich'
4110
    long_name = 'Mike Luckovich'
4111
    url = 'http://www.gocomics.com/mikeluckovich'
4112
4113
4114
class JimBenton(GenericGoComic):
4115
    """Class to retrieve Jim Benton comics."""
4116
    # Also on http://jimbenton.tumblr.com
4117
    name = 'benton'
4118
    long_name = 'Jim Benton'
4119
    url = 'http://www.gocomics.com/jim-benton-cartoons'
4120
4121
4122
class TheArgyleSweater(GenericGoComic):
4123
    """Class to retrieve the Argyle Sweater comics."""
4124
    name = 'argyle'
4125
    long_name = 'Argyle Sweater'
4126
    url = 'http://www.gocomics.com/theargylesweater'
4127
4128
4129
class SunnyStreet(GenericGoComic):
4130
    """Class to retrieve Sunny Street comics."""
4131
    # Also on http://www.sunnystreetcomics.com
4132
    name = 'sunny'
4133
    long_name = 'Sunny Street'
4134
    url = 'http://www.gocomics.com/sunny-street'
4135
4136
4137
class OffTheMark(GenericGoComic):
4138
    """Class to retrieve Off The Mark comics."""
4139
    # Also on https://www.offthemark.com
4140
    name = 'offthemark'
4141
    long_name = 'Off The Mark'
4142
    url = 'http://www.gocomics.com/offthemark'
4143
4144
4145
class WuMo(GenericGoComic):
4146
    """Class to retrieve WuMo comics."""
4147
    # Also on http://wumo.com
4148
    name = 'wumo'
4149
    long_name = 'WuMo'
4150
    url = 'http://www.gocomics.com/wumo'
4151
4152
4153
class LunarBaboon(GenericGoComic):
4154
    """Class to retrieve Lunar Baboon comics."""
4155
    # Also on http://www.lunarbaboon.com
4156
    # Also on https://tapastic.com/series/Lunarbaboon
4157
    name = 'lunarbaboon'
4158
    long_name = 'Lunar Baboon'
4159
    url = 'http://www.gocomics.com/lunarbaboon'
4160
4161
4162
class SandersenGocomic(GenericGoComic):
4163
    """Class to retrieve Sarah Andersen comics."""
4164
    # Also on http://sarahcandersen.com
4165
    # Also on http://tapastic.com/series/Doodle-Time
4166
    name = 'sandersen-goc'
4167
    long_name = 'Sarah Andersen (from GoComics)'
4168
    url = 'http://www.gocomics.com/sarahs-scribbles'
4169
4170
4171
class SaturdayMorningBreakfastCerealGoComic(GenericGoComic):
4172
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
4173
    # Also on http://smbc-comics.tumblr.com
4174
    # Also on http://www.smbc-comics.com
4175
    name = 'smbc-goc'
4176
    long_name = 'Saturday Morning Breakfast Cereal (from GoComics)'
4177
    url = 'http://www.gocomics.com/saturday-morning-breakfast-cereal'
4178
    _categories = ('SMBC', )
4179
4180
4181
class CalvinAndHobbesGoComic(GenericGoComic):
4182
    """Class to retrieve Calvin and Hobbes comics."""
4183
    # From gocomics, not http://marcel-oehler.marcellosendos.ch/comics/ch/
4184
    name = 'calvin-goc'
4185
    long_name = 'Calvin and Hobbes (from GoComics)'
4186
    url = 'http://www.gocomics.com/calvinandhobbes'
4187
4188
4189
class RallGoComic(GenericGoComic):
4190
    """Class to retrieve Ted Rall comics."""
4191
    # Also on http://rall.com/comic
4192
    name = 'rall-goc'
4193
    long_name = "Ted Rall (from GoComics)"
4194
    url = "http://www.gocomics.com/tedrall"
4195
    _categories = ('RALL', )
4196
4197
4198
class TheAwkwardYetiGoComic(GenericGoComic):
4199
    """Class to retrieve The Awkward Yeti comics."""
4200
    # Also on http://larstheyeti.tumblr.com
4201
    # Also on http://theawkwardyeti.com
4202
    # Also on https://tapastic.com/series/TheAwkwardYeti
4203
    name = 'yeti-goc'
4204
    long_name = 'The Awkward Yeti (from GoComics)'
4205
    url = 'http://www.gocomics.com/the-awkward-yeti'
4206
    _categories = ('YETI', )
4207
4208
4209
class BerkeleyMewsGoComics(GenericGoComic):
4210
    """Class to retrieve Berkeley Mews comics."""
4211
    # Also on http://mews.tumblr.com
4212
    # Also on http://www.berkeleymews.com
4213
    name = 'berkeley-goc'
4214
    long_name = 'Berkeley Mews (from GoComics)'
4215
    url = 'http://www.gocomics.com/berkeley-mews'
4216
    _categories = ('BERKELEY', )
4217
4218
4219
class SheldonGoComics(GenericGoComic):
4220
    """Class to retrieve Sheldon comics."""
4221
    # Also on http://www.sheldoncomics.com
4222
    name = 'sheldon-goc'
4223
    long_name = 'Sheldon Comics (from GoComics)'
4224
    url = 'http://www.gocomics.com/sheldon'
4225
4226
4227
class FowlLanguageGoComics(GenericGoComic):
4228
    """Class to retrieve Fowl Language comics."""
4229
    # Also on http://www.fowllanguagecomics.com
4230
    # Also on http://tapastic.com/series/Fowl-Language-Comics
4231
    # Also on http://fowllanguagecomics.tumblr.com
4232
    name = 'fowllanguage-goc'
4233
    long_name = 'Fowl Language Comics (from GoComics)'
4234
    url = 'http://www.gocomics.com/fowl-language'
4235
    _categories = ('FOWLLANGUAGE', )
4236
4237
4238
class NickAnderson(GenericGoComic):
4239
    """Class to retrieve Nick Anderson comics."""
4240
    name = 'nickanderson'
4241
    long_name = 'Nick Anderson'
4242
    url = 'http://www.gocomics.com/nickanderson'
4243
4244
4245
class GarfieldGoComics(GenericGoComic):
4246
    """Class to retrieve Garfield comics."""
4247
    # Also on http://garfield.com
4248
    name = 'garfield-goc'
4249
    long_name = 'Garfield (from GoComics)'
4250
    url = 'http://www.gocomics.com/garfield'
4251
    _categories = ('GARFIELD', )
4252
4253
4254
class DorrisMcGoComics(GenericGoComic):
4255
    """Class to retrieve Dorris Mc Comics"""
4256
    # Also on http://dorrismccomics.com
4257
    name = 'dorrismc-goc'
4258
    long_name = 'Dorris Mc (from GoComics)'
4259
    url = 'http://www.gocomics.com/dorris-mccomics'
4260
4261
4262
class FoxTrot(GenericGoComic):
4263
    """Class to retrieve FoxTrot comics."""
4264
    name = 'foxtrot'
4265
    long_name = 'FoxTrot'
4266
    url = 'http://www.gocomics.com/foxtrot'
4267
4268
4269
class FoxTrotClassics(GenericGoComic):
4270
    """Class to retrieve FoxTrot Classics comics."""
4271
    name = 'foxtrot-classics'
4272
    long_name = 'FoxTrot Classics'
4273
    url = 'http://www.gocomics.com/foxtrotclassics'
4274
4275
4276
class MisterAndMeGoComics(GenericGoComic):
4277
    """Class to retrieve Mister & Me Comics."""
4278
    # Also on http://www.mister-and-me.com
4279
    # Also on https://tapastic.com/series/Mister-and-Me
4280
    name = 'mister-goc'
4281
    long_name = 'Mister & Me (from GoComics)'
4282
    url = 'http://www.gocomics.com/mister-and-me'
4283
4284
4285
class NonSequitur(GenericGoComic):
4286
    """Class to retrieve Non Sequitur (Wiley Miller) comics."""
4287
    name = 'nonsequitur'
4288
    long_name = 'Non Sequitur'
4289
    url = 'http://www.gocomics.com/nonsequitur'
4290
4291
4292
class GenericTapasticComic(GenericListableComic):
4293
    """Generic class to handle the logic common to comics from tapastic.com."""
4294
    _categories = ('TAPASTIC', )
4295
4296
    @classmethod
4297
    def get_comic_info(cls, soup, archive_elt):
4298
        """Get information about a particular comics."""
4299
        timestamp = int(archive_elt['publishDate']) / 1000.0
4300
        day = datetime.datetime.fromtimestamp(timestamp).date()
4301
        imgs = soup.find_all('img', class_='art-image')
4302
        if not imgs:
4303
            print("Comic %s is being uploaded, retry later" % cls.get_url_from_archive_element(archive_elt))
4304
            return None
4305
        assert len(imgs) > 0
4306
        return {
4307
            'day': day.day,
4308
            'year': day.year,
4309
            'month': day.month,
4310
            'img': [i['src'] for i in imgs],
4311
            'title': archive_elt['title'],
4312
        }
4313
4314
    @classmethod
4315
    def get_url_from_archive_element(cls, archive_elt):
4316
        return 'http://tapastic.com/episode/' + str(archive_elt['id'])
4317
4318
    @classmethod
4319
    def get_archive_elements(cls):
4320
        pref, suff = 'episodeList : ', ','
4321
        # Information is stored in the javascript part
4322
        # I don't know the clean way to get it so this is the ugly way.
4323
        string = [s[len(pref):-len(suff)] for s in (s.decode('utf-8').strip() for s in urlopen_wrapper(cls.url).readlines()) if s.startswith(pref) and s.endswith(suff)][0]
4324
        return json.loads(string)
4325
4326
4327
class VegetablesForDessert(GenericTapasticComic):
4328
    """Class to retrieve Vegetables For Dessert comics."""
4329
    # Also on http://vegetablesfordessert.tumblr.com
4330
    name = 'vegetables'
4331
    long_name = 'Vegetables For Dessert'
4332
    url = 'http://tapastic.com/series/vegetablesfordessert'
4333
4334
4335
class FowlLanguageTapa(GenericTapasticComic):
4336
    """Class to retrieve Fowl Language comics."""
4337
    # Also on http://www.fowllanguagecomics.com
4338
    # Also on http://fowllanguagecomics.tumblr.com
4339
    # Also on http://www.gocomics.com/fowl-language
4340
    name = 'fowllanguage-tapa'
4341
    long_name = 'Fowl Language Comics (from Tapastic)'
4342
    url = 'http://tapastic.com/series/Fowl-Language-Comics'
4343
    _categories = ('FOWLLANGUAGE', )
4344
4345
4346
class OscillatingProfundities(GenericTapasticComic):
4347
    """Class to retrieve Oscillating Profundities comics."""
4348
    name = 'oscillating'
4349
    long_name = 'Oscillating Profundities'
4350
    url = 'http://tapastic.com/series/oscillatingprofundities'
4351
4352
4353
class ZnoflatsComics(GenericTapasticComic):
4354
    """Class to retrieve Znoflats comics."""
4355
    name = 'znoflats'
4356
    long_name = 'Znoflats Comics'
4357
    url = 'http://tapastic.com/series/Znoflats-Comics'
4358
4359
4360
class SandersenTapastic(GenericTapasticComic):
4361
    """Class to retrieve Sarah Andersen comics."""
4362
    # Also on http://sarahcandersen.com
4363
    # Also on http://www.gocomics.com/sarahs-scribbles
4364
    name = 'sandersen-tapa'
4365
    long_name = 'Sarah Andersen (from Tapastic)'
4366
    url = 'http://tapastic.com/series/Doodle-Time'
4367
4368
4369
class TubeyToonsTapastic(GenericTapasticComic):
4370
    """Class to retrieve TubeyToons comics."""
4371
    # Also on http://tubeytoons.com
4372
    # Also on http://tubeytoons.tumblr.com
4373
    name = 'tubeytoons-tapa'
4374
    long_name = 'Tubey Toons (from Tapastic)'
4375
    url = 'http://tapastic.com/series/Tubey-Toons'
4376
    _categories = ('TUNEYTOONS', )
4377
4378
4379
class AnythingComicTapastic(GenericTapasticComic):
4380
    """Class to retrieve Anything Comics."""
4381
    # Also on http://www.anythingcomic.com
4382
    name = 'anythingcomic-tapa'
4383
    long_name = 'Anything Comic (from Tapastic)'
4384
    url = 'http://tapastic.com/series/anything'
4385
4386
4387
class UnearthedComicsTapastic(GenericTapasticComic):
4388
    """Class to retrieve Unearthed comics."""
4389
    # Also on http://unearthedcomics.com
4390
    # Also on http://unearthedcomics.tumblr.com
4391
    name = 'unearthed-tapa'
4392
    long_name = 'Unearthed Comics (from Tapastic)'
4393
    url = 'http://tapastic.com/series/UnearthedComics'
4394
    _categories = ('UNEARTHED', )
4395
4396
4397
class EverythingsStupidTapastic(GenericTapasticComic):
4398
    """Class to retrieve Everything's stupid Comics."""
4399
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
4400
    # Also on http://everythingsstupid.net
4401
    name = 'stupid-tapa'
4402
    long_name = "Everything's Stupid (from Tapastic)"
4403
    url = 'http://tapastic.com/series/EverythingsStupid'
4404
4405
4406
class JustSayEhTapastic(GenericTapasticComic):
4407
    """Class to retrieve Just Say Eh comics."""
4408
    # Also on http://www.justsayeh.com
4409
    name = 'justsayeh-tapa'
4410
    long_name = 'Just Say Eh (from Tapastic)'
4411
    url = 'http://tapastic.com/series/Just-Say-Eh'
4412
4413
4414
class ThorsThundershackTapastic(GenericTapasticComic):
4415
    """Class to retrieve Thor's Thundershack comics."""
4416
    # Also on http://www.thorsthundershack.com
4417
    name = 'thor-tapa'
4418
    long_name = 'Thor\'s Thundershack (from Tapastic)'
4419
    url = 'http://tapastic.com/series/Thors-Thundershac'
4420
    _categories = ('THOR', )
4421
4422
4423
class OwlTurdTapastic(GenericTapasticComic):
4424
    """Class to retrieve Owl Turd comics."""
4425
    # Also on http://owlturd.com
4426
    name = 'owlturd-tapa'
4427
    long_name = 'Owl Turd (from Tapastic)'
4428
    url = 'http://tapastic.com/series/Owl-Turd-Comix'
4429
    _categories = ('OWLTURD', )
4430
4431
4432
class GoneIntoRaptureTapastic(GenericTapasticComic):
4433
    """Class to retrieve Gone Into Rapture comics."""
4434
    # Also on http://goneintorapture.tumblr.com
4435
    # Also on http://www.goneintorapture.com
4436
    name = 'rapture-tapa'
4437
    long_name = 'Gone Into Rapture (from Tapastic)'
4438
    url = 'http://tapastic.com/series/Goneintorapture'
4439
4440
4441
class HeckIfIKnowComicsTapa(GenericTapasticComic):
4442
    """Class to retrieve Heck If I Know Comics."""
4443
    # Also on http://heckifiknowcomics.com
4444
    name = 'heck-tapa'
4445
    long_name = 'Heck if I Know comics (from Tapastic)'
4446
    url = 'http://tapastic.com/series/Regular'
4447
4448
4449
class CheerUpEmoKidTapa(GenericTapasticComic):
4450
    """Class to retrieve CheerUpEmoKid comics."""
4451
    # Also on http://www.cheerupemokid.com
4452
    # Also on http://enzocomics.tumblr.com
4453
    name = 'cuek-tapa'
4454
    long_name = 'Cheer Up Emo Kid (from Tapastic)'
4455
    url = 'http://tapastic.com/series/CUEK'
4456
4457
4458
class BigFootJusticeTapa(GenericTapasticComic):
4459
    """Class to retrieve Big Foot Justice comics."""
4460
    # Also on http://bigfootjustice.com
4461
    name = 'bigfoot-tapa'
4462
    long_name = 'Big Foot Justice (from Tapastic)'
4463
    url = 'http://tapastic.com/series/bigfoot-justice'
4464
4465
4466
class UpAndOutTapa(GenericTapasticComic):
4467
    """Class to retrieve Up & Out comics."""
4468
    # Also on http://upandoutcomic.tumblr.com
4469
    name = 'upandout-tapa'
4470
    long_name = 'Up And Out (from Tapastic)'
4471
    url = 'http://tapastic.com/series/UP-and-OUT'
4472
4473
4474
class ToonHoleTapa(GenericTapasticComic):
4475
    """Class to retrieve Toon Holes comics."""
4476
    # Also on http://www.toonhole.com
4477
    name = 'toonhole-tapa'
4478
    long_name = 'Toon Hole (from Tapastic)'
4479
    url = 'http://tapastic.com/series/TOONHOLE'
4480
4481
4482
class AngryAtNothingTapa(GenericTapasticComic):
4483
    """Class to retrieve Angry at Nothing comics."""
4484
    # Also on http://www.angryatnothing.net
4485
    name = 'angry-tapa'
4486
    long_name = 'Angry At Nothing (from Tapastic)'
4487
    url = 'http://tapastic.com/series/Comics-yeah-definitely-comics-'
4488
4489
4490
class LeleozTapa(GenericTapasticComic):
4491
    """Class to retrieve Leleoz comics."""
4492
    # Also on http://leleozcomics.tumblr.com
4493
    name = 'leleoz-tapa'
4494
    long_name = 'Leleoz (from Tapastic)'
4495
    url = 'https://tapastic.com/series/Leleoz'
4496
4497
4498
class TheAwkwardYetiTapa(GenericTapasticComic):
4499
    """Class to retrieve The Awkward Yeti comics."""
4500
    # Also on http://www.gocomics.com/the-awkward-yeti
4501
    # Also on http://theawkwardyeti.com
4502
    # Also on http://larstheyeti.tumblr.com
4503
    name = 'yeti-tapa'
4504
    long_name = 'The Awkward Yeti (from Tapastic)'
4505
    url = 'https://tapastic.com/series/TheAwkwardYeti'
4506
    _categories = ('YETI', )
4507
4508
4509
class AsPerUsualTapa(GenericTapasticComic):
4510
    """Class to retrieve As Per Usual comics."""
4511
    # Also on http://as-per-usual.tumblr.com
4512
    name = 'usual-tapa'
4513
    long_name = 'As Per Usual (from Tapastic)'
4514
    url = 'https://tapastic.com/series/AsPerUsual'
4515
4516
4517
class OneOneOneOneComicTapa(GenericTapasticComic):
4518
    """Class to retrieve 1111 Comics."""
4519
    # Also on http://www.1111comics.me
4520
    # Also on http://comics1111.tumblr.com
4521
    name = '1111-tapa'
4522
    long_name = '1111 Comics (from Tapastic)'
4523
    url = 'https://tapastic.com/series/1111-Comics'
4524
    _categories = ('ONEONEONEONE', )
4525
4526
4527
class TumbleDryTapa(GenericTapasticComic):
4528
    """Class to retrieve Tumble Dry comics."""
4529
    # Also on http://tumbledrycomics.com
4530
    name = 'tumbledry-tapa'
4531
    long_name = 'Tumblr Dry (from Tapastic)'
4532
    url = 'https://tapastic.com/series/TumbleDryComics'
4533
4534
4535
class DeadlyPanelTapa(GenericTapasticComic):
4536
    """Class to retrieve Deadly Panel comics."""
4537
    # Also on http://www.deadlypanel.com
4538
    name = 'deadly-tapa'
4539
    long_name = 'Deadly Panel (from Tapastic)'
4540
    url = 'https://tapastic.com/series/deadlypanel'
4541
4542
4543
class ChrisHallbeckMaxiTapa(GenericTapasticComic):
4544
    """Class to retrieve Chris Hallbeck comics."""
4545
    # Also on http://chrishallbeck.tumblr.com
4546
    # Also on http://maximumble.com
4547
    name = 'hallbeckmaxi-tapa'
4548
    long_name = 'Chris Hallback - Maximumble (from Tapastic)'
4549
    url = 'https://tapastic.com/series/Maximumble'
4550
    _categories = ('HALLBACK', )
4551
4552
4553
class ChrisHallbeckMiniTapa(GenericTapasticComic):
4554
    """Class to retrieve Chris Hallbeck comics."""
4555
    # Also on http://chrishallbeck.tumblr.com
4556
    # Also on http://minimumble.com
4557
    name = 'hallbeckmini-tapa'
4558
    long_name = 'Chris Hallback - Minimumble (from Tapastic)'
4559
    url = 'https://tapastic.com/series/Minimumble'
4560
    _categories = ('HALLBACK', )
4561
4562
4563
class ChrisHallbeckBiffTapa(GenericTapasticComic):
4564
    """Class to retrieve Chris Hallbeck comics."""
4565
    # Also on http://chrishallbeck.tumblr.com
4566
    # Also on http://thebookofbiff.com
4567
    name = 'hallbeckbiff-tapa'
4568
    long_name = 'Chris Hallback - The Book of Biff (from Tapastic)'
4569
    url = 'https://tapastic.com/series/Biff'
4570
    _categories = ('HALLBACK', )
4571
4572
4573
class RandoWisTapa(GenericTapasticComic):
4574
    """Class to retrieve RandoWis comics."""
4575
    # Also on https://randowis.com
4576
    name = 'randowis-tapa'
4577
    long_name = 'RandoWis (from Tapastic)'
4578
    url = 'https://tapastic.com/series/RandoWis'
4579
4580
4581
class PigeonGazetteTapa(GenericTapasticComic):
4582
    """Class to retrieve The Pigeon Gazette comics."""
4583
    # Also on http://thepigeongazette.tumblr.com
4584
    name = 'pigeon-tapa'
4585
    long_name = 'The Pigeon Gazette (from Tapastic)'
4586
    url = 'https://tapastic.com/series/The-Pigeon-Gazette'
4587
4588
4589
class TheOdd1sOutTapa(GenericTapasticComic):
4590
    """Class to retrieve The Odd 1s Out comics."""
4591
    # Also on http://theodd1sout.com
4592
    # Also on http://theodd1sout.tumblr.com
4593
    name = 'theodd-tapa'
4594
    long_name = 'The Odd 1s Out (from Tapastic)'
4595
    url = 'https://tapastic.com/series/Theodd1sout'
4596
4597
4598
class TheWorldIsFlatTapa(GenericTapasticComic):
4599
    """Class to retrieve The World Is Flat Comics."""
4600
    # Also on http://theworldisflatcomics.tumblr.com
4601
    name = 'flatworld-tapa'
4602
    long_name = 'The World Is Flat (from Tapastic)'
4603
    url = 'https://tapastic.com/series/The-World-is-Flat'
4604
4605
4606
class MisterAndMeTapa(GenericTapasticComic):
4607
    """Class to retrieve Mister & Me Comics."""
4608
    # Also on http://www.mister-and-me.com
4609
    # Also on http://www.gocomics.com/mister-and-me
4610
    name = 'mister-tapa'
4611
    long_name = 'Mister & Me (from Tapastic)'
4612
    url = 'https://tapastic.com/series/Mister-and-Me'
4613
4614
4615
class TalesOfAbsurdityTapa(GenericTapasticComic):
4616
    """Class to retrieve Tales Of Absurdity comics."""
4617
    # Also on http://talesofabsurdity.com
4618
    # Also on http://talesofabsurdity.tumblr.com
4619
    name = 'absurdity-tapa'
4620
    long_name = 'Tales of Absurdity (from Tapastic)'
4621
    url = 'http://tapastic.com/series/Tales-Of-Absurdity'
4622
    _categories = ('ABSURDITY', )
4623
4624
4625
class BFGFSTapa(GenericTapasticComic):
4626
    """Class to retrieve BFGFS comics."""
4627
    # Also on http://bfgfs.com
4628
    # Also on http://bfgfs.tumblr.com
4629
    name = 'bfgfs-tapa'
4630
    long_name = 'BFGFS (from Tapastic)'
4631
    url = 'https://tapastic.com/series/BFGFS'
4632
4633
4634
class DoodleForFoodTapa(GenericTapasticComic):
4635
    """Class to retrieve Doodle For Food comics."""
4636
    # Also on http://doodleforfood.com
4637
    name = 'doodle-tapa'
4638
    long_name = 'Doodle For Food (from Tapastic)'
4639
    url = 'https://tapastic.com/series/Doodle-for-Food'
4640
4641
4642
class MrLovensteinTapa(GenericTapasticComic):
4643
    """Class to retrieve Mr Lovenstein comics."""
4644
    # Also on  https://tapastic.com/series/MrLovenstein
4645
    name = 'mrlovenstein-tapa'
4646
    long_name = 'Mr. Lovenstein (from Tapastic)'
4647
    url = 'https://tapastic.com/series/MrLovenstein'
4648
4649
4650
class CassandraCalinTapa(GenericTapasticComic):
4651
    """Class to retrieve C. Cassandra comics."""
4652
    # Also on http://cassandracalin.com
4653
    # Also on http://c-cassandra.tumblr.com
4654
    name = 'cassandra-tapa'
4655
    long_name = 'Cassandra Calin (from Tapastic)'
4656
    url = 'https://tapastic.com/series/C-Cassandra-comics'
4657
4658
4659
class WafflesAndPancakes(GenericTapasticComic):
4660
    """Class to retrieve Waffles And Pancakes comics."""
4661
    # Also on http://wandpcomic.com
4662
    name = 'waffles'
4663
    long_name = 'Waffles And Pancakes'
4664
    url = 'https://tapastic.com/series/Waffles-and-Pancakes'
4665
4666
4667
class YesterdaysPopcornTapastic(GenericTapasticComic):
4668
    """Class to retrieve Yesterday's Popcorn comics."""
4669
    # Also on http://www.yesterdayspopcorn.com
4670
    # Also on http://yesterdayspopcorn.tumblr.com
4671
    name = 'popcorn-tapa'
4672
    long_name = 'Yesterday\'s Popcorn (from Tapastic)'
4673
    url = 'https://tapastic.com/series/Yesterdays-Popcorn'
4674
4675
4676
class OurSuperAdventureTapastic(GenericTapasticComic):
4677
    """Class to retrieve Our Super Adventure comics."""
4678
    # Also on http://www.oursuperadventure.com
4679
    # http://sarahssketchbook.tumblr.com
4680
    # http://sarahgraley.com
4681
    name = 'superadventure-tapastic'
4682
    long_name = 'Our Super Adventure (from Tapastic)'
4683
    url = 'https://tapastic.com/series/Our-Super-Adventure'
4684
4685
4686
class NamelessPCs(GenericTapasticComic):
4687
    """Class to retrieve Nameless PCs comics."""
4688
    # Also on http://namelesspcs.com
4689
    name = 'namelesspcs-tapa'
4690
    long_name = 'NamelessPCs (from Tapastic)'
4691
    url = 'https://tapastic.com/series/NamelessPC'
4692
4693
4694
def get_subclasses(klass):
4695
    """Gets the list of direct/indirect subclasses of a class"""
4696
    subclasses = klass.__subclasses__()
4697
    for derived in list(subclasses):
4698
        subclasses.extend(get_subclasses(derived))
4699
    return subclasses
4700
4701
4702
def remove_st_nd_rd_th_from_date(string):
4703
    """Function to transform 1st/2nd/3rd/4th in a parsable date format."""
4704
    # Hackish way to convert string with numeral "1st"/"2nd"/etc to date
4705
    return (string.replace('st', '')
4706
            .replace('nd', '')
4707
            .replace('rd', '')
4708
            .replace('th', '')
4709
            .replace('Augu', 'August'))
4710
4711
4712
def string_to_date(string, date_format, local=DEFAULT_LOCAL):
4713
    """Function to convert string to date object.
4714
    Wrapper around datetime.datetime.strptime."""
4715
    # format described in https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior
4716
    prev_locale = locale.setlocale(locale.LC_ALL)
4717
    if local != prev_locale:
4718
        locale.setlocale(locale.LC_ALL, local)
4719
    ret = datetime.datetime.strptime(string, date_format).date()
4720
    if local != prev_locale:
4721
        locale.setlocale(locale.LC_ALL, prev_locale)
4722
    return ret
4723
4724
4725
COMICS = set(get_subclasses(GenericComic))
4726
VALID_COMICS = [c for c in COMICS if c.name is not None]
4727
COMIC_NAMES = {c.name: c for c in VALID_COMICS}
4728
assert len(VALID_COMICS) == len(COMIC_NAMES)
4729
CLASS_NAMES = {c.__name__ for c in VALID_COMICS}
4730
assert len(VALID_COMICS) == len(CLASS_NAMES)
4731