Completed
Push — master ( fcff78...37f97a )
by De
01:03
created

comics.py (6 issues)

1
#! /usr/bin/python3
2
# vim: set expandtab tabstop=4 shiftwidth=4 :
3
"""Module to retrieve webcomics"""
4
5
from comic_abstract import GenericComic, get_date_for_comic
6
import re
7
from datetime import date, timedelta
8
import datetime
9
from urlfunctions import get_soup_at_url, urljoin_wrapper,\
10
    convert_iri_to_plain_ascii_uri, load_json_at_url, urlopen_wrapper
11
import json
12
import locale
13
import urllib
14
15
DEFAULT_LOCAL = 'en_GB.UTF-8'
16
17
18
class Xkcd(GenericComic):
19
    """Class to retrieve Xkcd comics."""
20
    name = 'xkcd'
21
    long_name = 'xkcd'
22
    url = 'http://xkcd.com'
23
24
    @classmethod
25
    def get_next_comic(cls, last_comic):
26
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
27
        first_num = last_comic['num'] if last_comic else 0
28
        last_num = load_json_at_url(
29
            urljoin_wrapper(cls.url, 'info.0.json'))['num']
30
31
        for num in range(first_num + 1, last_num + 1):
32
            if num != 404:
33
                json_url = urljoin_wrapper(cls.url, '%d/info.0.json' % num)
34
                comic = load_json_at_url(json_url)
35
                comic['img'] = [comic['img']]
36
                comic['prefix'] = '%d-' % num
37
                comic['json_url'] = json_url
38
                comic['url'] = urljoin_wrapper(cls.url, str(num))
39
                comic['day'] = int(comic['day'])
40
                comic['month'] = int(comic['month'])
41
                comic['year'] = int(comic['year'])
42
                assert comic['num'] == num
43
                yield comic
44
45
46
# Helper functions corresponding to get_url_from_link/get_url_from_archive_element
47
48
49
@classmethod
50
def get_href(cls, link):
51
    """Implementation of get_url_from_link/get_url_from_archive_element."""
52
    return link['href']
53
54
55
@classmethod
56
def join_cls_url_to_href(cls, link):
57
    """Implementation of get_url_from_link/get_url_from_archive_element."""
58
    return urljoin_wrapper(cls.url, link['href'])
59
60
61
class GenericNavigableComic(GenericComic):
62
    """Generic class for "navigable" comics : with first/next arrows.
63
64
    This class applies to comic where previous and next comics can be
65
    accessed from a given comic. Once given a starting point (either
66
    the first comic or the last comic retrieved), it will handle the
67
    navigation, the retrieval of the soup object and the setting of
68
    the 'url' attribute on retrieved comics. This limits a lot the
69
    amount of boilerplate code in the different implementation classes.
70
71
    The method `get_next_comic` methods is implemented in terms of new
72
    more specialized methods to be implemented/overridden:
73
        - get_first_comic_link
74
        - get_navi_link
75
        - get_comic_info
76
        - get_url_from_link
77
    """
78
    _categories = ('NAVIGABLE', )
79
80
    @classmethod
81
    def get_first_comic_link(cls):
82
        """Get link to first comics.
83
84
        Sometimes this can be retrieved of any comic page, sometimes on
85
        the archive page, sometimes it doesn't exist at all and one has
86
        to iterate backward to find it before hardcoding the result found.
87
        """
88
        raise NotImplementedError
89
90
    @classmethod
91
    def get_navi_link(cls, last_soup, next_):
92
        """Get link to next (or previous - for dev purposes) comic."""
93
        raise NotImplementedError
94
95
    @classmethod
96
    def get_comic_info(cls, soup, link):
97
        """Get information about a particular comics."""
98
        raise NotImplementedError
99
100
    @classmethod
101
    def get_url_from_link(cls, link):
102
        """Get url corresponding to a link. Default implementation is similar to get_href."""
103
        return link['href']
104
105
    @classmethod
106
    def get_next_link(cls, last_soup):
107
        """Get link to next comic."""
108
        return cls.get_navi_link(last_soup, True)
109
110
    @classmethod
111
    def get_prev_link(cls, last_soup):
112
        """Get link to previous comic."""
113
        return cls.get_navi_link(last_soup, False)
114
115
    @classmethod
116
    def get_next_comic(cls, last_comic):
117
        """Generic implementation of get_next_comic for navigable comics."""
118
        url = last_comic['url'] if last_comic else None
119
        next_comic = \
120
            cls.get_next_link(get_soup_at_url(url)) \
121
            if url else \
122
            cls.get_first_comic_link()
123
        cls.log("next/first comic will be %s (url is %s)" % (str(next_comic), url))
124
        while next_comic:
125
            prev_url, url = url, cls.get_url_from_link(next_comic)
126
            if prev_url == url:
127
                cls.log("got same url %s" % url)
128
                break
129
            cls.log("about to get %s (%s)" % (url, str(next_comic)))
130
            soup = get_soup_at_url(url)
131
            comic = cls.get_comic_info(soup, next_comic)
132
            if comic is not None:
133
                assert 'url' not in comic
134
                comic['url'] = url
135
                yield comic
136
            next_comic = cls.get_next_link(soup)
137
            cls.log("next comic will be %s" % str(next_comic))
138
139
    @classmethod
140
    def check_first_link(cls):
141
        """Check that navigation to first comic seems to be working - for dev purposes."""
142
        cls.log("about to check first link")
143
        ok = True
144
        firstlink = cls.get_first_comic_link()
145
        if firstlink is None:
146
            print("From %s : no first link" % cls.url)
147
            ok = False
148
        else:
149
            firsturl = cls.get_url_from_link(firstlink)
150
            try:
151
                get_soup_at_url(firsturl)
152
            except urllib.error.HTTPError:
153
                print("From %s : invalid first url" % cls.url)
154
                ok = False
155
        cls.log("checked first link -> returned %d" % ok)
156
        return ok
157
158
    @classmethod
159
    def check_prev_next_links(cls, url):
160
        """Check that navigation to prev/next from a given URL seems to be working - for dev purposes."""
161
        cls.log("about to check prev/next from %s" % url)
162
        ok = True
163
        if url is None:
164
            prevlink, nextlink = None, None
165
        else:
166
            soup = get_soup_at_url(url)
167
            prevlink, nextlink = cls.get_prev_link(soup), cls.get_next_link(soup)
168
        if prevlink is None and nextlink is None:
169
            print("From %s : no previous nor next" % url)
170
            ok = False
171
        else:
172
            if prevlink:
173
                prevurl = cls.get_url_from_link(prevlink)
174
                prevsoup = get_soup_at_url(prevurl)
175
                prevnext = cls.get_url_from_link(cls.get_next_link(prevsoup))
176
                if prevnext != url:
177
                    print("From %s, going backward then forward leads to %s" % (url, prevnext))
178
                    ok = False
179
            if nextlink:
180
                nexturl = cls.get_url_from_link(nextlink)
181
                if nexturl != url:
182
                    nextsoup = get_soup_at_url(nexturl)
183
                    nextprev = cls.get_url_from_link(cls.get_prev_link(nextsoup))
184
                    if nextprev != url:
185
                        print("From %s, going forward then backward leads to %s" % (url, nextprev))
186
                        ok = False
187
        cls.log("checked prev/next from %s -> returned %d" % (url, ok))
188
        return ok
189
190
    @classmethod
191
    def check_navigation(cls, url):
192
        """Check that navigation functions seem to be working - for dev purposes."""
193
        cls.log("about to check navigation from %s" % url)
194
        first = cls.check_first_link()
195
        prevnext = cls.check_prev_next_links(url)
196
        ok = first and prevnext
197
        cls.log("checked navigation from %s -> returned %d" % (url, ok))
198
        return ok
199
200
201
class GenericListableComic(GenericComic):
202
    """Generic class for "listable" comics : with a list of comics (aka 'archive')
203
204
    The method `get_next_comic` methods is implemented in terms of new
205
    more specialized methods to be implemented/overridden:
206
        - get_archive_elements
207
        - get_url_from_archive_element
208
        - get_comic_info
209
    """
210
    _categories = ('LISTABLE', )
211
212
    @classmethod
213
    def get_archive_elements(cls):
214
        """Get the archive elements (iterable)."""
215
        raise NotImplementedError
216
217
    @classmethod
218
    def get_url_from_archive_element(cls, archive_elt):
219
        """Get url corresponding to an archive element."""
220
        raise NotImplementedError
221
222
    @classmethod
223
    def get_comic_info(cls, soup, archive_elt):
224
        """Get information about a particular comics."""
225
        raise NotImplementedError
226
227
    @classmethod
228
    def get_next_comic(cls, last_comic):
229
        """Generic implementation of get_next_comic for listable comics."""
230
        waiting_for_url = last_comic['url'] if last_comic else None
231
        for archive_elt in cls.get_archive_elements():
232
            url = cls.get_url_from_archive_element(archive_elt)
233
            cls.log("considering %s" % url)
234
            if waiting_for_url and waiting_for_url == url:
235
                waiting_for_url = None
236
            elif waiting_for_url is None:
237
                cls.log("about to get %s (%s)" % (url, str(archive_elt)))
238
                soup = get_soup_at_url(url)
239
                comic = cls.get_comic_info(soup, archive_elt)
240
                if comic is not None:
241
                    assert 'url' not in comic
242
                    comic['url'] = url
243
                    yield comic
244
        if waiting_for_url is not None:
245
            print("Did not find %s : there might be a problem" % waiting_for_url)
246
247
# Helper functions corresponding to get_first_comic_link/get_navi_link
248
249
250
@classmethod
251
def get_link_rel_next(cls, last_soup, next_):
252
    """Implementation of get_navi_link."""
253
    return last_soup.find('link', rel='next' if next_ else 'prev')
254
255
256
@classmethod
257
def get_a_rel_next(cls, last_soup, next_):
258
    """Implementation of get_navi_link."""
259
    return last_soup.find('a', rel='next' if next_ else 'prev')
260
261
262
@classmethod
263
def get_a_navi_navinext(cls, last_soup, next_):
264
    """Implementation of get_navi_link."""
265
    return last_soup.find('a', class_='navi navi-next' if next_ else 'navi navi-prev')
266
267
268
@classmethod
269
def get_a_navi_comicnavnext_navinext(cls, last_soup, next_):
270
    """Implementation of get_navi_link."""
271
    return last_soup.find('a', class_='navi comic-nav-next navi-next' if next_ else 'navi comic-nav-previous navi-prev')
272
273
274
@classmethod
275
def get_a_comicnavbase_comicnavnext(cls, last_soup, next_):
276
    """Implementation of get_navi_link."""
277
    return last_soup.find('a', class_='comic-nav-base comic-nav-next' if next_ else 'comic-nav-base comic-nav-previous')
278
279
280
@classmethod
281
def get_a_navi_navifirst(cls):
282
    """Implementation of get_first_comic_link."""
283
    return get_soup_at_url(cls.url).find('a', class_='navi navi-first')
284
285
286
@classmethod
287
def get_div_navfirst_a(cls):
288
    """Implementation of get_first_comic_link."""
289
    return get_soup_at_url(cls.url).find('div', class_="nav-first").find('a')
290
291
292
@classmethod
293
def get_a_comicnavbase_comicnavfirst(cls):
294
    """Implementation of get_first_comic_link."""
295
    return get_soup_at_url(cls.url).find('a', class_='comic-nav-base comic-nav-first')
296
297
298
@classmethod
299
def simulate_first_link(cls):
300
    """Implementation of get_first_comic_link creating a link-like object from
301
    an URL provided by the class."""
302
    return {'href': cls.first_url}
303
304
305
@classmethod
306
def navigate_to_first_comic(cls):
307
    """Implementation of get_first_comic_link navigating from a user provided
308
    URL to the first comic.
309
310
    Sometimes, the first comic cannot be reached directly so to start
311
    from the first comic one has to go to the previous comic until
312
    there is no previous comics. Once this URL is reached, it
313
    is better to hardcode it but for development purposes, it
314
    is convenient to have an automatic way to find it.
315
    """
316
    url = input("Get starting URL: ")
317
    print(url)
318
    comic = cls.get_prev_link(get_soup_at_url(url))
319
    while comic:
320
        url = cls.get_url_from_link(comic)
321
        print(url)
322
        comic = cls.get_prev_link(get_soup_at_url(url))
323
    return {'href': url}
324
325
326
class GenericEmptyComic(GenericComic):
327
    """Generic class for comics where nothing is to be done.
328
329
    It can be useful to deactivate temporarily comics that do not work
330
    properly by replacing `def MyComic(GenericWhateverComic)` with
331
    `def MyComic(GenericEmptyComic, GenericWhateverComic)`."""
332
    _categories = ('EMPTY', )
333
334
    @classmethod
335
    def get_next_comic(cls, last_comic):
336
        """Implementation of get_next_comic returning no comics."""
337
        cls.log("comic is considered as empty - returning no comic")
338 View Code Duplication
        return []
339
340
341
class ExtraFabulousComics(GenericNavigableComic):
342
    """Class to retrieve Extra Fabulous Comics."""
343
    name = 'efc'
344
    long_name = 'Extra Fabulous Comics'
345
    url = 'http://extrafabulouscomics.com'
346
    get_first_comic_link = get_a_navi_navifirst
347
    get_navi_link = get_link_rel_next
348
349
    @classmethod
350
    def get_comic_info(cls, soup, link):
351
        """Get information about a particular comics."""
352
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
353
        imgs = soup.find_all('img', src=img_src_re)
354
        title = soup.find('meta', property='og:title')['content']
355
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
356
        day = string_to_date(date_str, "%Y-%m-%d")
357
        return {
358
            'title': title,
359
            'img': [i['src'] for i in imgs],
360
            'month': day.month,
361
            'year': day.year,
362
            'day': day.day,
363
            'prefix': title + '-'
364
        }
365
366
367
class GenericLeMondeBlog(GenericNavigableComic):
368
    """Generic class to retrieve comics from Le Monde blogs."""
369
    _categories = ('LEMONDE', 'FRANCAIS')
370
    get_navi_link = get_link_rel_next
371
    get_first_comic_link = simulate_first_link
372
    first_url = NotImplemented
373
374
    @classmethod
375
    def get_comic_info(cls, soup, link):
376
        """Get information about a particular comics."""
377
        url2 = soup.find('link', rel='shortlink')['href']
378
        title = soup.find('meta', property='og:title')['content']
379
        date_str = soup.find("span", class_="entry-date").string
380
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
381
        imgs = soup.find_all('meta', property='og:image')
382
        return {
383
            'title': title,
384
            'url2': url2,
385
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
386
            'month': day.month,
387
            'year': day.year,
388
            'day': day.day,
389
        }
390
391
392
class ZepWorld(GenericLeMondeBlog):
393
    """Class to retrieve Zep World comics."""
394
    name = "zep"
395
    long_name = "Zep World"
396
    url = "http://zepworld.blog.lemonde.fr"
397
    first_url = "http://zepworld.blog.lemonde.fr/2014/10/31/bientot-le-blog-de-zep/"
398
399
400
class Vidberg(GenericLeMondeBlog):
401
    """Class to retrieve Vidberg comics."""
402
    name = 'vidberg'
403
    long_name = "Vidberg - l'actu en patates"
404
    url = "http://vidberg.blog.lemonde.fr"
405
    # Not the first but I didn't find an efficient way to retrieve it
406
    first_url = "http://vidberg.blog.lemonde.fr/2012/02/09/revue-de-campagne-la-campagne-du-modem-semballe/"
407
408
409
class Plantu(GenericLeMondeBlog):
410
    """Class to retrieve Plantu comics."""
411
    name = 'plantu'
412
    long_name = "Plantu"
413
    url = "http://plantu.blog.lemonde.fr"
414
    first_url = "http://plantu.blog.lemonde.fr/2014/10/28/stress-test-a-bruxelles/"
415
416
417
class XavierGorce(GenericLeMondeBlog):
418
    """Class to retrieve Xavier Gorce comics."""
419
    name = 'gorce'
420
    long_name = "Xavier Gorce"
421
    url = "http://xaviergorce.blog.lemonde.fr"
422
    first_url = "http://xaviergorce.blog.lemonde.fr/2015/01/09/distinction/"
423
424
425
class CartooningForPeace(GenericLeMondeBlog):
426
    """Class to retrieve Cartooning For Peace comics."""
427
    name = 'forpeace'
428
    long_name = "Cartooning For Peace"
429
    url = "http://cartooningforpeace.blog.lemonde.fr"
430
    first_url = "http://cartooningforpeace.blog.lemonde.fr/2014/12/15/bado/"
431
432
433
class Aurel(GenericLeMondeBlog):
434
    """Class to retrieve Aurel comics."""
435
    name = 'aurel'
436
    long_name = "Aurel"
437
    url = "http://aurel.blog.lemonde.fr"
438
    first_url = "http://aurel.blog.lemonde.fr/2014/09/29/le-senat-repasse-a-droite/"
439
440
441
class LesCulottees(GenericLeMondeBlog):
442
    """Class to retrieve Les Culottees comics."""
443
    name = 'culottees'
444
    long_name = 'Les Culottees'
445
    url = "http://lesculottees.blog.lemonde.fr"
446
    first_url = "http://lesculottees.blog.lemonde.fr/2016/01/11/clementine-delait-femme-a-barbe/"
447
448
449
class UneAnneeAuLycee(GenericLeMondeBlog):
450
    """Class to retrieve Une Annee Au Lycee comics."""
451
    name = 'lycee'
452
    long_name = 'Une Annee au Lycee'
453
    url = 'http://uneanneeaulycee.blog.lemonde.fr'
454
    first_url = "http://uneanneeaulycee.blog.lemonde.fr/2016/06/13/la-semaine-du-bac-est-arrivee/"
455
456
457
class Rall(GenericNavigableComic):
458
    """Class to retrieve Ted Rall comics."""
459
    # Also on http://www.gocomics.com/tedrall
460
    name = 'rall'
461
    long_name = "Ted Rall"
462
    url = "http://rall.com/comic"
463
    _categories = ('RALL', )
464
    get_navi_link = get_link_rel_next
465
    get_first_comic_link = simulate_first_link
466
    # Not the first but I didn't find an efficient way to retrieve it
467
    first_url = "http://rall.com/2014/01/30/los-angeles-times-cartoon-well-miss-those-california-flowers"
468
469
    @classmethod
470
    def get_comic_info(cls, soup, link):
471
        """Get information about a particular comics."""
472
        title = soup.find('meta', property='og:title')['content']
473
        author = soup.find("span", class_="author vcard").find("a").string
474
        date_str = soup.find("span", class_="entry-date").string
475
        day = string_to_date(date_str, "%B %d, %Y")
476
        desc = soup.find('meta', property='og:description')['content']
477
        imgs = soup.find('div', class_='entry-content').find_all('img')
478
        imgs = imgs[:-7]  # remove social media buttons
479
        return {
480
            'title': title,
481
            'author': author,
482
            'month': day.month,
483
            'year': day.year,
484
            'day': day.day,
485
            'description': desc,
486
            'img': [i['src'] for i in imgs],
487
        }
488
489
490
class Dilem(GenericNavigableComic):
491
    """Class to retrieve Ali Dilem comics."""
492
    name = 'dilem'
493
    long_name = 'Ali Dilem'
494
    url = 'http://information.tv5monde.com/dilem'
495
    _categories = ('FRANCAIS', )
496
    get_url_from_link = join_cls_url_to_href
497
    get_first_comic_link = simulate_first_link
498
    first_url = "http://information.tv5monde.com/dilem/2004-06-26"
499
500
    @classmethod
501
    def get_navi_link(cls, last_soup, next_):
502
        """Get link to next or previous comic."""
503
        # prev is next / next is prev
504
        li = last_soup.find('li', class_='prev' if next_ else 'next')
505
        return li.find('a') if li else None
506
507
    @classmethod
508
    def get_comic_info(cls, soup, link):
509
        """Get information about a particular comics."""
510
        short_url = soup.find('link', rel='shortlink')['href']
511
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
512
        imgs = soup.find_all('meta', property='og:image')
513
        date_str = soup.find('span', property='dc:date')['content']
514
        date_str = date_str[:10]
515
        day = string_to_date(date_str, "%Y-%m-%d")
516
        return {
517
            'short_url': short_url,
518
            'title': title,
519
            'img': [i['content'] for i in imgs],
520
            'day': day.day,
521
            'month': day.month,
522
            'year': day.year,
523
        }
524
525
526
class SpaceAvalanche(GenericNavigableComic):
527
    """Class to retrieve Space Avalanche comics."""
528
    name = 'avalanche'
529
    long_name = 'Space Avalanche'
530
    url = 'http://www.spaceavalanche.com'
531
    get_navi_link = get_link_rel_next
532
533
    @classmethod
534
    def get_first_comic_link(cls):
535
        """Get link to first comics."""
536
        return {'href': "http://www.spaceavalanche.com/2009/02/02/irish-sea/", 'title': "Irish Sea"}
537
538
    @classmethod
539
    def get_comic_info(cls, soup, link):
540
        """Get information about a particular comics."""
541
        url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
542
        title = link['title']
543
        url = cls.get_url_from_link(link)
544
        year, month, day = [int(s)
545
                            for s in url_date_re.match(url).groups()]
546
        imgs = soup.find("div", class_="entry").find_all("img")
547
        return {
548
            'title': title,
549
            'day': day,
550
            'month': month,
551
            'year': year,
552
            'img': [i['src'] for i in imgs],
553
        }
554
555
556
class ZenPencils(GenericNavigableComic):
557
    """Class to retrieve ZenPencils comics."""
558
    # Also on http://zenpencils.tumblr.com
559
    # Also on http://www.gocomics.com/zen-pencils
560
    name = 'zenpencils'
561
    long_name = 'Zen Pencils'
562
    url = 'http://zenpencils.com'
563
    _categories = ('ZENPENCILS', )
564
    get_navi_link = get_link_rel_next
565
    get_first_comic_link = simulate_first_link
566
    first_url = "http://zenpencils.com/comic/1-ralph-waldo-emerson-make-them-cry/"
567
568
    @classmethod
569
    def get_comic_info(cls, soup, link):
570
        """Get information about a particular comics."""
571
        imgs = soup.find('div', id='comic').find_all('img')
572
        # imgs2 = soup.find_all('meta', property='og:image')
573
        post = soup.find('div', class_='post-content')
574
        author = post.find("span", class_="post-author").find("a").string
575
        title = soup.find('meta', property='og:title')['content']
576
        date_str = post.find('span', class_='post-date').string
577
        day = string_to_date(date_str, "%B %d, %Y")
578
        assert imgs
579
        assert all(i['alt'] == i['title'] for i in imgs)
580
        assert all(i['alt'] in (title, "") for i in imgs)
581
        desc = soup.find('meta', property='og:description')['content']
582
        return {
583
            'title': title,
584
            'description': desc,
585
            'author': author,
586
            'day': day.day,
587
            'month': day.month,
588
            'year': day.year,
589
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
590
        }
591
592
593
class ItsTheTie(GenericNavigableComic):
594
    """Class to retrieve It's the tie comics."""
595
    # Also on http://itsthetie.tumblr.com
596
    # Also on https://tapastic.com/series/itsthetie
597
    name = 'tie'
598
    long_name = "It's the tie"
599
    url = "http://itsthetie.com"
600
    _categories = ('TIE', )
601
    get_first_comic_link = get_div_navfirst_a
602
    get_navi_link = get_a_rel_next
603
604
    @classmethod
605
    def get_comic_info(cls, soup, link):
606
        """Get information about a particular comics."""
607
        title = soup.find('h1', class_='comic-title').find('a').string
608
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
609
        day = string_to_date(date_str, "%B %d, %Y")
610
        # Bonus images may or may not be in meta og:image.
611
        imgs = soup.find_all('meta', property='og:image')
612
        imgs_src = [i['content'] for i in imgs]
613
        bonus = soup.find_all('img', attrs={'data-oversrc': True})
614
        bonus_src = [b['data-oversrc'] for b in bonus]
615
        all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src]
616
        all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")]
617
        tag_meta = soup.find('meta', property='article:tag')
618
        tags = tag_meta['content'] if tag_meta else ""
619
        return {
620
            'title': title,
621
            'month': day.month,
622
            'year': day.year,
623
            'day': day.day,
624
            'img': all_imgs_src,
625
            'tags': tags,
626
        }
627
628
629
class PenelopeBagieu(GenericNavigableComic):
630
    """Class to retrieve comics from Penelope Bagieu's blog."""
631
    name = 'bagieu'
632
    long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
633
    url = 'http://www.penelope-jolicoeur.com'
634
    _categories = ('FRANCAIS', )
635
    get_navi_link = get_link_rel_next
636
    get_first_comic_link = simulate_first_link
637
    first_url = 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'
638
639
    @classmethod
640
    def get_comic_info(cls, soup, link):
641
        """Get information about a particular comics."""
642
        date_str = soup.find('h2', class_='date-header').string
643
        day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
644
        imgs = soup.find('div', class_='entry-body').find_all('img')
645 View Code Duplication
        title = soup.find('h3', class_='entry-header').string
646
        return {
647
            'title': title,
648
            'img': [i['src'] for i in imgs],
649
            'month': day.month,
650
            'year': day.year,
651
            'day': day.day,
652
        }
653
654
655
class OneOneOneOneComic(GenericNavigableComic):
656
    """Class to retrieve 1111 Comics."""
657
    # Also on http://comics1111.tumblr.com
658
    # Also on https://tapastic.com/series/1111-Comics
659
    name = '1111'
660
    long_name = '1111 Comics'
661
    url = 'http://www.1111comics.me'
662
    _categories = ('ONEONEONEONE', )
663
    get_first_comic_link = get_div_navfirst_a
664
    get_navi_link = get_link_rel_next
665
666
    @classmethod
667
    def get_comic_info(cls, soup, link):
668
        """Get information about a particular comics."""
669
        title = soup.find('h1', class_='comic-title').find('a').string
670
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
671
        day = string_to_date(date_str, "%B %d, %Y")
672
        imgs = soup.find_all('meta', property='og:image')
673
        return {
674
            'title': title,
675
            'month': day.month,
676
            'year': day.year,
677
            'day': day.day,
678
            'img': [i['content'] for i in imgs],
679
        }
680
681
682
class AngryAtNothing(GenericNavigableComic):
683
    """Class to retrieve Angry at Nothing comics."""
684
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
685
    name = 'angry'
686
    long_name = 'Angry At Nothing'
687
    url = 'http://www.angryatnothing.net'
688
    get_first_comic_link = get_div_navfirst_a
689
    get_navi_link = get_a_rel_next
690
691
    @classmethod
692
    def get_comic_info(cls, soup, link):
693
        """Get information about a particular comics."""
694
        title = soup.find('h1', class_='comic-title').find('a').string
695
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
696
        day = string_to_date(date_str, "%B %d, %Y")
697
        imgs = soup.find_all('meta', property='og:image')
698
        return {
699
            'title': title,
700
            'month': day.month,
701
            'year': day.year,
702
            'day': day.day,
703
            'img': [i['content'] for i in imgs],
704
        }
705
706
707
class NeDroid(GenericNavigableComic):
708
    """Class to retrieve NeDroid comics."""
709
    name = 'nedroid'
710
    long_name = 'NeDroid'
711
    url = 'http://nedroid.com'
712
    get_first_comic_link = get_div_navfirst_a
713
    get_navi_link = get_link_rel_next
714
    get_url_from_link = join_cls_url_to_href
715
716
    @classmethod
717
    def get_comic_info(cls, soup, link):
718
        """Get information about a particular comics."""
719
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
720
        comic_url_re = re.compile('//nedroid.com/comics/([0-9]*)-([0-9]*)-([0-9]*).*')
721
        short_url = cls.get_url_from_link(soup.find('link', rel='shortlink'))
722
        num = int(short_url_re.match(short_url).groups()[0])
723
        imgs = soup.find('div', id='comic').find_all('img')
724
        year, month, day = [int(s) for s in comic_url_re.match(imgs[0]['src']).groups()]
725
        assert len(imgs) == 1
726
        title = imgs[0]['alt']
727
        title2 = imgs[0]['title']
728
        return {
729
            'short_url': short_url,
730
            'title': title,
731
            'title2': title2,
732
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
733
            'day': day,
734
            'month': month,
735
            'year': year,
736
            'num': num,
737
        }
738
739
740
class Garfield(GenericNavigableComic):
741
    """Class to retrieve Garfield comics."""
742
    # Also on http://www.gocomics.com/garfield
743
    name = 'garfield'
744
    long_name = 'Garfield'
745
    url = 'https://garfield.com'
746
    _categories = ('GARFIELD', )
747
    get_first_comic_link = simulate_first_link
748
    first_url = 'https://garfield.com/comic/1978/06/19'
749
750
    @classmethod
751
    def get_navi_link(cls, last_soup, next_):
752
        """Get link to next or previous comic."""
753
        return last_soup.find('a', class_='comic-arrow-right' if next_ else 'comic-arrow-left')
754
755
    @classmethod
756
    def get_comic_info(cls, soup, link):
757
        """Get information about a particular comics."""
758 View Code Duplication
        url = cls.get_url_from_link(link)
759
        date_re = re.compile('^%s/comic/([0-9]*)/([0-9]*)/([0-9]*)' % cls.url)
760
        year, month, day = [int(s) for s in date_re.match(url).groups()]
761
        imgs = soup.find('div', class_='comic-display').find_all('img', class_='img-responsive')
762
        return {
763
            'month': month,
764
            'year': year,
765
            'day': day,
766
            'img': [i['src'] for i in imgs],
767
        }
768
769
770
class Dilbert(GenericNavigableComic):
771
    """Class to retrieve Dilbert comics."""
772
    # Also on http://www.gocomics.com/dilbert-classics
773
    name = 'dilbert'
774
    long_name = 'Dilbert'
775
    url = 'http://dilbert.com'
776
    get_url_from_link = join_cls_url_to_href
777
    get_first_comic_link = simulate_first_link
778
    first_url = 'http://dilbert.com/strip/1989-04-16'
779
780
    @classmethod
781
    def get_navi_link(cls, last_soup, next_):
782
        """Get link to next or previous comic."""
783
        link = last_soup.find('div', class_='nav-comic nav-right' if next_ else 'nav-comic nav-left')
784
        return link.find('a') if link else None
785
786
    @classmethod
787
    def get_comic_info(cls, soup, link):
788
        """Get information about a particular comics."""
789
        title = soup.find('meta', property='og:title')['content']
790
        imgs = soup.find_all('meta', property='og:image')
791
        desc = soup.find('meta', property='og:description')['content']
792
        date_str = soup.find('meta', property='article:publish_date')['content']
793
        day = string_to_date(date_str, "%B %d, %Y")
794
        author = soup.find('meta', property='article:author')['content']
795
        tags = soup.find('meta', property='article:tag')['content']
796
        return {
797
            'title': title,
798
            'description': desc,
799
            'img': [i['content'] for i in imgs],
800
            'author': author,
801
            'tags': tags,
802
            'day': day.day,
803
            'month': day.month,
804
            'year': day.year
805
        }
806
807
808
class VictimsOfCircumsolar(GenericNavigableComic):
809
    """Class to retrieve VictimsOfCircumsolar comics."""
810
    name = 'circumsolar'
811
    long_name = 'Victims Of Circumsolar'
812
    url = 'http://www.victimsofcircumsolar.com'
813
    get_navi_link = get_a_navi_comicnavnext_navinext
814
    get_first_comic_link = simulate_first_link
815
    first_url = 'http://www.victimsofcircumsolar.com/comic/modern-addiction'
816
817
    @classmethod
818
    def get_comic_info(cls, soup, link):
819
        """Get information about a particular comics."""
820
        # Date is on the archive page
821
        title = soup.find_all('meta', property='og:title')[-1]['content']
822
        desc = soup.find_all('meta', property='og:description')[-1]['content']
823
        imgs = soup.find('div', id='comic').find_all('img')
824
        assert all(i['title'] == i['alt'] == title for i in imgs)
825
        return {
826
            'title': title,
827
            'description': desc,
828
            'img': [i['src'] for i in imgs],
829
        }
830
831
832
class ThreeWordPhrase(GenericNavigableComic):
833
    """Class to retrieve Three Word Phrase comics."""
834
    # Also on http://www.threewordphrase.tumblr.com
835
    name = 'threeword'
836
    long_name = 'Three Word Phrase'
837
    url = 'http://threewordphrase.com'
838
    get_url_from_link = join_cls_url_to_href
839
840
    @classmethod
841
    def get_first_comic_link(cls):
842
        """Get link to first comics."""
843
        return get_soup_at_url(cls.url).find('img', src='/firstlink.gif').parent
844
845
    @classmethod
846
    def get_navi_link(cls, last_soup, next_):
847
        """Get link to next or previous comic."""
848
        link = last_soup.find('img', src='/nextlink.gif' if next_ else '/prevlink.gif').parent
849
        return None if link.get('href') is None else link
850
851
    @classmethod
852
    def get_comic_info(cls, soup, link):
853
        """Get information about a particular comics."""
854
        title = soup.find('title')
855
        imgs = [img for img in soup.find_all('img')
856
                if not img['src'].endswith(
857
                    ('link.gif', '32.png', 'twpbookad.jpg',
858
                     'merchad.jpg', 'header.gif', 'tipjar.jpg'))]
859
        return {
860
            'title': title.string if title else None,
861
            'title2': '  '.join(img.get('alt') for img in imgs if img.get('alt')),
862
            'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs],
863
        }
864
865
866
class DeadlyPanel(GenericEmptyComic, GenericNavigableComic):
867
    """Class to retrieve Deadly Panel comics."""
868
    # Also on https://tapastic.com/series/deadlypanel
869
    name = 'deadly'
870
    long_name = 'Deadly Panel'
871
    url = 'http://www.deadlypanel.com'
872
    get_first_comic_link = get_a_navi_navifirst
873
    get_navi_link = get_a_navi_comicnavnext_navinext
874
875
    @classmethod
876
    def get_comic_info(cls, soup, link):
877
        """Get information about a particular comics."""
878
        imgs = soup.find('div', id='comic').find_all('img')
879
        assert all(i['alt'] == i['title'] for i in imgs)
880
        return {
881
            'img': [i['src'] for i in imgs],
882
        }
883
884
885
class TheGentlemanArmchair(GenericNavigableComic):
886
    """Class to retrieve The Gentleman Armchair comics."""
887
    name = 'gentlemanarmchair'
888
    long_name = 'The Gentleman Armchair'
889
    url = 'http://thegentlemansarmchair.com'
890
    get_first_comic_link = get_a_navi_navifirst
891
    get_navi_link = get_link_rel_next
892
893
    @classmethod
894
    def get_comic_info(cls, soup, link):
895
        """Get information about a particular comics."""
896
        title = soup.find('h2', class_='post-title').string
897
        author = soup.find("span", class_="post-author").find("a").string
898
        date_str = soup.find('span', class_='post-date').string
899
        day = string_to_date(date_str, "%B %d, %Y")
900
        imgs = soup.find('div', id='comic').find_all('img')
901
        return {
902
            'img': [i['src'] for i in imgs],
903
            'title': title,
904
            'author': author,
905
            'month': day.month,
906
            'year': day.year,
907
            'day': day.day,
908
        }
909
910
911
class MyExtraLife(GenericNavigableComic):
912
    """Class to retrieve My Extra Life comics."""
913
    name = 'extralife'
914
    long_name = 'My Extra Life'
915
    url = 'http://www.myextralife.com'
916
    get_navi_link = get_link_rel_next
917
918
    @classmethod
919
    def get_first_comic_link(cls):
920
        """Get link to first comics."""
921
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
922
923
    @classmethod
924
    def get_comic_info(cls, soup, link):
925
        """Get information about a particular comics."""
926
        title = soup.find("h1", class_="comic_title").string
927
        date_str = soup.find("span", class_="comic_date").string
928
        day = string_to_date(date_str, "%B %d, %Y")
929
        imgs = soup.find_all("img", class_="comic")
930
        assert all(i['alt'] == i['title'] == title for i in imgs)
931
        return {
932
            'title': title,
933
            'img': [i['src'] for i in imgs if i["src"]],
934
            'day': day.day,
935
            'month': day.month,
936
            'year': day.year
937
        }
938
939
940
class SaturdayMorningBreakfastCereal(GenericNavigableComic):
941
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
942
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
943
    # Also on http://smbc-comics.tumblr.com
944
    name = 'smbc'
945
    long_name = 'Saturday Morning Breakfast Cereal'
946
    url = 'http://www.smbc-comics.com'
947
    _categories = ('SMBC', )
948
    get_navi_link = get_a_rel_next
949
950
    @classmethod
951
    def get_first_comic_link(cls):
952
        """Get link to first comics."""
953
        return get_soup_at_url(cls.url).find('a', rel='start')
954
955
    @classmethod
956
    def get_comic_info(cls, soup, link):
957
        """Get information about a particular comics."""
958
        image1 = soup.find('img', id='cc-comic')
959
        image_url1 = image1['src']
960
        aftercomic = soup.find('div', id='aftercomic')
961
        image_url2 = aftercomic.find('img')['src'] if aftercomic else ''
962
        imgs = [image_url1] + ([image_url2] if image_url2 else [])
963
        date_str = soup.find('div', class_='cc-publishtime').contents[0]
964
        day = string_to_date(date_str, "%B %d, %Y")
965
        return {
966
            'title': image1['title'],
967
            'img': [urljoin_wrapper(cls.url, i) for i in imgs],
968
            'day': day.day,
969
            'month': day.month,
970
            'year': day.year
971
        }
972
973
974
class PerryBibleFellowship(GenericListableComic):
975
    """Class to retrieve Perry Bible Fellowship comics."""
976
    name = 'pbf'
977
    long_name = 'Perry Bible Fellowship'
978
    url = 'http://pbfcomics.com'
979
    get_url_from_archive_element = join_cls_url_to_href
980
981
    @classmethod
982
    def get_archive_elements(cls):
983
        comic_link_re = re.compile('^/[0-9]*/$')
984
        return reversed(get_soup_at_url(cls.url).find_all('a', href=comic_link_re))
985
986
    @classmethod
987
    def get_comic_info(cls, soup, link):
988
        """Get information about a particular comics."""
989
        url = cls.get_url_from_archive_element(link)
990
        comic_img_re = re.compile('^/archive_b/PBF.*')
991
        name = link.string
992
        num = int(link['name'])
993
        href = link['href']
994
        assert href == '/%d/' % num
995
        imgs = soup.find_all('img', src=comic_img_re)
996
        assert len(imgs) == 1
997
        assert imgs[0]['alt'] == name
998
        return {
999
            'num': num,
1000
            'name': name,
1001
            'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1002
            'prefix': '%d-' % num,
1003
        }
1004
1005
1006
class Mercworks(GenericNavigableComic):
1007
    """Class to retrieve Mercworks comics."""
1008
    # Also on http://mercworks.tumblr.com
1009
    name = 'mercworks'
1010
    long_name = 'Mercworks'
1011
    url = 'http://mercworks.net'
1012
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1013
    get_navi_link = get_a_rel_next
1014
1015
    @classmethod
1016
    def get_comic_info(cls, soup, link):
1017
        """Get information about a particular comics."""
1018
        title = soup.find('meta', property='og:title')['content']
1019
        metadesc = soup.find('meta', property='og:description')
1020
        desc = metadesc['content'] if metadesc else ""
1021
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1022
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1023
        date_str = date_str[:10]
1024
        day = string_to_date(date_str, "%Y-%m-%d")
1025
        imgs = soup.find_all('meta', property='og:image')
1026
        return {
1027
            'img': [i['content'] for i in imgs],
1028
            'title': title,
1029
            'author': author,
1030
            'desc': desc,
1031
            'day': day.day,
1032
            'month': day.month,
1033
            'year': day.year
1034
        }
1035
1036
1037
class BerkeleyMews(GenericListableComic):
1038
    """Class to retrieve Berkeley Mews comics."""
1039
    # Also on http://mews.tumblr.com
1040
    # Also on http://www.gocomics.com/berkeley-mews
1041
    name = 'berkeley'
1042
    long_name = 'Berkeley Mews'
1043
    url = 'http://www.berkeleymews.com'
1044
    _categories = ('BERKELEY', )
1045
    get_url_from_archive_element = get_href
1046
    comic_num_re = re.compile('%s/\\?p=([0-9]*)$' % url)
1047
1048
    @classmethod
1049
    def get_archive_elements(cls):
1050
        archive_url = urljoin_wrapper(cls.url, "?page_id=2")
1051
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_num_re))
1052
1053
    @classmethod
1054
    def get_comic_info(cls, soup, link):
1055
        """Get information about a particular comics."""
1056
        comic_date_re = re.compile('.*/([0-9]*)-([0-9]*)-([0-9]*)-.*')
1057
        url = cls.get_url_from_archive_element(link)
1058
        num = int(cls.comic_num_re.match(url).groups()[0])
1059
        img = soup.find('div', id='comic').find('img')
1060
        assert all(i['alt'] == i['title'] for i in [img])
1061
        title2 = img['title']
1062
        img_url = img['src']
1063
        year, month, day = [int(s) for s in comic_date_re.match(img_url).groups()]
1064
        return {
1065
            'num': num,
1066
            'title': link.string,
1067
            'title2': title2,
1068
            'img': [img_url],
1069
            'year': year,
1070
            'month': month,
1071
            'day': day,
1072
        }
1073
1074
1075
class GenericBouletCorp(GenericNavigableComic):
1076
    """Generic class to retrieve BouletCorp comics in different languages."""
1077
    # Also on http://bouletcorp.tumblr.com
1078
    _categories = ('BOULET', )
1079
    get_navi_link = get_link_rel_next
1080
1081
    @classmethod
1082
    def get_first_comic_link(cls):
1083
        """Get link to first comics."""
1084
        return get_soup_at_url(cls.url).find('div', id='centered_nav').find_all('a')[0]
1085
1086
    @classmethod
1087
    def get_comic_info(cls, soup, link):
1088
        """Get information about a particular comics."""
1089
        url = cls.get_url_from_link(link)
1090
        date_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/' % cls.url)
1091
        year, month, day = [int(s) for s in date_re.match(url).groups()]
1092
        imgs = soup.find('div', id='notes').find('div', class_='storycontent').find_all('img')
1093
        texts = '  '.join(t for t in (i.get('title') for i in imgs) if t)
1094
        title = soup.find('title').string
1095
        return {
1096
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs if i.get('src') is not None],
1097
            'title': title,
1098
            'texts': texts,
1099
            'year': year,
1100
            'month': month,
1101
            'day': day,
1102
        }
1103
1104
1105
class BouletCorp(GenericBouletCorp):
1106
    """Class to retrieve BouletCorp comics."""
1107
    name = 'boulet'
1108
    long_name = 'Boulet Corp'
1109
    url = 'http://www.bouletcorp.com'
1110
    _categories = ('FRANCAIS', )
1111
1112
1113
class BouletCorpEn(GenericBouletCorp):
1114
    """Class to retrieve EnglishBouletCorp comics."""
1115
    name = 'boulet_en'
1116
    long_name = 'Boulet Corp English'
1117
    url = 'http://english.bouletcorp.com'
1118
1119
1120
class AmazingSuperPowers(GenericNavigableComic):
1121
    """Class to retrieve Amazing Super Powers comics."""
1122
    name = 'asp'
1123
    long_name = 'Amazing Super Powers'
1124
    url = 'http://www.amazingsuperpowers.com'
1125
    get_first_comic_link = get_a_navi_navifirst
1126
    get_navi_link = get_a_navi_navinext
1127
1128
    @classmethod
1129
    def get_comic_info(cls, soup, link):
1130
        """Get information about a particular comics."""
1131
        author = soup.find("span", class_="post-author").find("a").string
1132
        date_str = soup.find('span', class_='post-date').string
1133
        day = string_to_date(date_str, "%B %d, %Y")
1134
        imgs = soup.find('div', id='comic').find_all('img')
1135
        title = ' '.join(i['title'] for i in imgs)
1136
        assert all(i['alt'] == i['title'] for i in imgs)
1137
        return {
1138
            'title': title,
1139
            'author': author,
1140
            'img': [img['src'] for img in imgs],
1141
            'day': day.day,
1142
            'month': day.month,
1143
            'year': day.year
1144
        }
1145
1146
1147
class ToonHole(GenericListableComic):
1148
    """Class to retrieve Toon Holes comics."""
1149
    # Also on http://tapastic.com/series/TOONHOLE
1150
    name = 'toonhole'
1151
    long_name = 'Toon Hole'
1152
    url = 'http://www.toonhole.com'
1153
    get_url_from_archive_element = get_href
1154
1155
    @classmethod
1156
    def get_comic_info(cls, soup, link):
1157
        """Get information about a particular comics."""
1158
        title = link.string
1159
        date_str = remove_st_nd_rd_th_from_date(soup.find('div', class_='comicdate').string.strip())
1160
        day = string_to_date(date_str, "%B %d, %Y")
1161
        imgs = soup.find('div', id='comic').find_all('img')
1162
        assert all(i['alt'] == i['title'] == title for i in imgs)
1163
        return {
1164
            'title': title,
1165
            'month': day.month,
1166
            'year': day.year,
1167
            'day': day.day,
1168
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs],
1169
        }
1170
1171
    @classmethod
1172
    def get_archive_elements(cls):
1173
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1174
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1175
1176
1177
class Channelate(GenericNavigableComic):
1178
    """Class to retrieve Channelate comics."""
1179
    name = 'channelate'
1180
    long_name = 'Channelate'
1181
    url = 'http://www.channelate.com'
1182
    get_first_comic_link = get_div_navfirst_a
1183
    get_navi_link = get_link_rel_next
1184
    get_url_from_link = join_cls_url_to_href
1185
1186
    @classmethod
1187
    def get_comic_info(cls, soup, link):
1188
        """Get information about a particular comics."""
1189
        author = soup.find("span", class_="post-author").find("a").string
1190
        date_str = soup.find('span', class_='post-date').string
1191
        day = string_to_date(date_str, '%Y/%m/%d')
1192
        title = soup.find('meta', property='og:title')['content']
1193
        post = soup.find('div', id='comic')
1194
        imgs = post.find_all('img') if post else []
1195
        extra_url = None
1196
        extra_div = soup.find('div', id='extrapanelbutton')
1197
        if extra_div:
1198
            extra_url = extra_div.find('a')['href']
1199
            extra_soup = get_soup_at_url(extra_url)
1200
            extra_imgs = extra_soup.find_all('img', class_='extrapanelimage')
1201
            imgs.extend(extra_imgs)
1202
        return {
1203
            'url_extra': extra_url,
1204
            'title': title,
1205
            'author': author,
1206
            'month': day.month,
1207
            'year': day.year,
1208
            'day': day.day,
1209
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1210
        }
1211
1212
1213
class CyanideAndHappiness(GenericNavigableComic):
1214
    """Class to retrieve Cyanide And Happiness comics."""
1215
    name = 'cyanide'
1216
    long_name = 'Cyanide and Happiness'
1217
    url = 'http://explosm.net'
1218
    _categories = ('NSFW', )
1219
    get_url_from_link = join_cls_url_to_href
1220
1221
    @classmethod
1222
    def get_first_comic_link(cls):
1223
        """Get link to first comics."""
1224
        return get_soup_at_url(cls.url).find('a', title='Oldest comic')
1225
1226
    @classmethod
1227
    def get_navi_link(cls, last_soup, next_):
1228
        """Get link to next or previous comic."""
1229
        link = last_soup.find('a', class_='next-comic' if next_ else 'previous-comic ')
1230
        return None if link.get('href') is None else link
1231
1232
    @classmethod
1233
    def get_comic_info(cls, soup, link):
1234
        """Get information about a particular comics."""
1235
        url2 = soup.find('meta', property='og:url')['content']
1236
        num = int(url2.split('/')[-2])
1237
        date_str = soup.find('h3').find('a').string
1238
        day = string_to_date(date_str, '%Y.%m.%d')
1239
        author = soup.find('small', class_="author-credit-name").string
1240
        assert author.startswith('by ')
1241
        author = author[3:]
1242
        imgs = soup.find_all('img', id='main-comic')
1243
        return {
1244
            'num': num,
1245
            'author': author,
1246
            'month': day.month,
1247
            'year': day.year,
1248
            'day': day.day,
1249
            'prefix': '%d-' % num,
1250
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i['src'])) for i in imgs]
1251
        }
1252
1253
1254
class MrLovenstein(GenericComic):
1255
    """Class to retrieve Mr Lovenstein comics."""
1256
    # Also on https://tapastic.com/series/MrLovenstein
1257
    name = 'mrlovenstein'
1258
    long_name = 'Mr. Lovenstein'
1259
    url = 'http://www.mrlovenstein.com'
1260
1261
    @classmethod
1262
    def get_next_comic(cls, last_comic):
1263
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1264
        # TODO: more info from http://www.mrlovenstein.com/archive
1265
        comic_num_re = re.compile('^/comic/([0-9]*)$')
1266
        nums = [int(comic_num_re.match(link['href']).groups()[0])
1267
                for link in get_soup_at_url(cls.url).find_all('a', href=comic_num_re)]
1268
        first, last = min(nums), max(nums)
1269
        if last_comic:
1270
            first = last_comic['num'] + 1
1271
        for num in range(first, last + 1):
1272
            url = urljoin_wrapper(cls.url, '/comic/%d' % num)
1273
            soup = get_soup_at_url(url)
1274
            imgs = list(
1275
                reversed(soup.find_all('img', src=re.compile('^/images/comics/'))))
1276
            description = soup.find('meta', attrs={'name': 'description'})['content']
1277
            yield {
1278
                'url': url,
1279
                'num': num,
1280
                'texts': '  '.join(t for t in (i.get('title') for i in imgs) if t),
1281
                'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1282
                'description': description,
1283
            }
1284
1285
1286
class DinosaurComics(GenericListableComic):
1287
    """Class to retrieve Dinosaur Comics comics."""
1288
    name = 'dinosaur'
1289
    long_name = 'Dinosaur Comics'
1290
    url = 'http://www.qwantz.com'
1291
    get_url_from_archive_element = get_href
1292
    comic_link_re = re.compile('^%s/index.php\\?comic=([0-9]*)$' % url)
1293
1294
    @classmethod
1295
    def get_archive_elements(cls):
1296
        archive_url = urljoin_wrapper(cls.url, 'archive.php')
1297
        # first link is random -> skip it
1298
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re)[1:])
1299
1300
    @classmethod
1301
    def get_comic_info(cls, soup, link):
1302
        """Get information about a particular comics."""
1303
        url = cls.get_url_from_archive_element(link)
1304
        num = int(cls.comic_link_re.match(url).groups()[0])
1305
        date_str = link.string
1306
        text = link.next_sibling.string
1307
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1308
        comic_img_re = re.compile('^%s/comics/' % cls.url)
1309
        img = soup.find('img', src=comic_img_re)
1310
        return {
1311
            'month': day.month,
1312
            'year': day.year,
1313
            'day': day.day,
1314
            'img': [img.get('src')],
1315
            'title': img.get('title'),
1316
            'text': text,
1317
            'num': num,
1318
        }
1319
1320
1321
class ButterSafe(GenericListableComic):
1322
    """Class to retrieve Butter Safe comics."""
1323
    name = 'butter'
1324
    long_name = 'ButterSafe'
1325
    url = 'http://buttersafe.com'
1326
    get_url_from_archive_element = get_href
1327 View Code Duplication
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1328
1329
    @classmethod
1330
    def get_archive_elements(cls):
1331
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1332
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
1333
1334
    @classmethod
1335
    def get_comic_info(cls, soup, link):
1336
        """Get information about a particular comics."""
1337
        url = cls.get_url_from_archive_element(link)
1338
        title = link.string
1339
        year, month, day = [int(s) for s in cls.comic_link_re.match(url).groups()]
1340
        img = soup.find('div', id='comic').find('img')
1341
        assert img['alt'] == title
1342
        return {
1343
            'title': title,
1344
            'day': day,
1345
            'month': month,
1346
            'year': year,
1347
            'img': [img['src']],
1348
        }
1349
1350
1351
class CalvinAndHobbes(GenericComic):
1352
    """Class to retrieve Calvin and Hobbes comics."""
1353
    # Also on http://www.gocomics.com/calvinandhobbes/
1354
    name = 'calvin'
1355
    long_name = 'Calvin and Hobbes'
1356
    # This is not through any official webpage but eh...
1357
    url = 'http://marcel-oehler.marcellosendos.ch/comics/ch/'
1358
1359
    @classmethod
1360
    def get_next_comic(cls, last_comic):
1361
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1362
        last_date = get_date_for_comic(
1363
            last_comic) if last_comic else date(1985, 11, 1)
1364
        link_re = re.compile('^([0-9]*)/([0-9]*)/')
1365
        img_re = re.compile('')
1366
        for link in get_soup_at_url(cls.url).find_all('a', href=link_re):
1367
            url = link['href']
1368
            year, month = link_re.match(url).groups()
1369
            if date(int(year), int(month), 1) + timedelta(days=31) >= last_date:
1370
                img_re = re.compile('^%s%s([0-9]*)' % (year, month))
1371
                month_url = urljoin_wrapper(cls.url, url)
1372
                for img in get_soup_at_url(month_url).find_all('img', src=img_re):
1373
                    img_src = img['src']
1374
                    day = int(img_re.match(img_src).groups()[0])
1375
                    comic_date = date(int(year), int(month), day)
1376
                    if comic_date > last_date:
1377
                        yield {
1378
                            'url': month_url,
1379
                            'year': int(year),
1380
                            'month': int(month),
1381
                            'day': int(day),
1382
                            'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)],
1383
                        }
1384
                        last_date = comic_date
1385
1386
1387
class AbstruseGoose(GenericListableComic):
1388
    """Class to retrieve AbstruseGoose Comics."""
1389
    name = 'abstruse'
1390
    long_name = 'Abstruse Goose'
1391
    url = 'http://abstrusegoose.com'
1392
    get_url_from_archive_element = get_href
1393 View Code Duplication
    comic_url_re = re.compile('^%s/([0-9]*)$' % url)
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1394
    comic_img_re = re.compile('^%s/strips/.*' % url)
1395
1396
    @classmethod
1397
    def get_archive_elements(cls):
1398
        archive_url = urljoin_wrapper(cls.url, 'archive')
1399
        return get_soup_at_url(archive_url).find_all('a', href=cls.comic_url_re)
1400
1401
    @classmethod
1402
    def get_comic_info(cls, soup, archive_elt):
1403
        comic_url = cls.get_url_from_archive_element(archive_elt)
1404
        num = int(cls.comic_url_re.match(comic_url).groups()[0])
1405
        return {
1406
            'num': num,
1407
            'title': archive_elt.string,
1408
            'img': [soup.find('img', src=cls.comic_img_re)['src']]
1409
        }
1410
1411
1412
class PhDComics(GenericNavigableComic):
1413
    """Class to retrieve PHD Comics."""
1414
    name = 'phd'
1415
    long_name = 'PhD Comics'
1416
    url = 'http://phdcomics.com/comics/archive.php'
1417
    get_url_from_link = join_cls_url_to_href
1418
1419
    @classmethod
1420
    def get_first_comic_link(cls):
1421
        """Get link to first comics."""
1422
        return get_soup_at_url(cls.url).find('img', src='images/first_button.gif').parent
1423
1424
    @classmethod
1425
    def get_navi_link(cls, last_soup, next_):
1426
        """Get link to next or previous comic."""
1427
        img = last_soup.find('img', src='images/next_button.gif' if next_ else 'images/prev_button.gif')
1428
        return None if img is None else img.parent
1429
1430
    @classmethod
1431
    def get_comic_info(cls, soup, link):
1432
        """Get information about a particular comics."""
1433 View Code Duplication
        date_str = soup.find('font', face='Arial,Helvetica,Geneva,Swiss,SunSans-Regular', color='white').string.strip()
1434
        try:
1435
            day = string_to_date(date_str, '%m/%d/%Y')
1436
        except ValueError:
1437
            print("Invalid date %s" % date_str)
1438
            day = date.today()
1439
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
1440
        return {
1441
            'year': day.year,
1442
            'month': day.month,
1443
            'day': day.day,
1444
            'img': [soup.find('img', id='comic')['src']],
1445
            'title': title,
1446
        }
1447
1448
1449
class Octopuns(GenericNavigableComic):
1450
    """Class to retrieve Octopuns comics."""
1451
    # Also on http://octopuns.tumblr.com
1452
    name = 'octopuns'
1453
    long_name = 'Octopuns'
1454
    url = 'http://www.octopuns.net'
1455
1456
    @classmethod
1457
    def get_first_comic_link(cls):
1458
        """Get link to first comics."""
1459
        return get_soup_at_url(cls.url).find('img', src=re.compile('.*/First.png')).parent
1460
1461
    @classmethod
1462
    def get_navi_link(cls, last_soup, next_):
1463
        """Get link to next or previous comic."""
1464
        link = last_soup.find('img', src=re.compile('.*/Next.png' if next_ else '.*/Back.png')).parent
1465
        return None if link.get('href') is None else link
1466
1467
    @classmethod
1468
    def get_comic_info(cls, soup, link):
1469
        """Get information about a particular comics."""
1470
        title = soup.find('h3', class_='post-title entry-title').string
1471
        date_str = soup.find('h2', class_='date-header').string
1472
        day = string_to_date(date_str, "%A, %B %d, %Y")
1473
        imgs = soup.find_all('link', rel='image_src')
1474
        return {
1475
            'img': [i['href'] for i in imgs],
1476
            'title': title,
1477
            'day': day.day,
1478
            'month': day.month,
1479
            'year': day.year,
1480
        }
1481
1482
1483
class Quarktees(GenericNavigableComic):
1484
    """Class to retrieve the Quarktees comics."""
1485
    name = 'quarktees'
1486
    long_name = 'Quarktees'
1487
    url = 'http://www.quarktees.com/blogs/news'
1488
    get_url_from_link = join_cls_url_to_href
1489
    get_first_comic_link = simulate_first_link
1490
    first_url = 'http://www.quarktees.com/blogs/news/12486621-coming-soon'
1491
1492
    @classmethod
1493
    def get_navi_link(cls, last_soup, next_):
1494
        """Get link to next or previous comic."""
1495
        return last_soup.find('a', id='article-next' if next_ else 'article-prev')
1496
1497
    @classmethod
1498
    def get_comic_info(cls, soup, link):
1499
        """Get information about a particular comics."""
1500
        title = soup.find('meta', property='og:title')['content']
1501
        article = soup.find('div', class_='single-article')
1502
        imgs = article.find_all('img')
1503
        return {
1504
            'title': title,
1505
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1506
        }
1507
1508
1509
class OverCompensating(GenericNavigableComic):
1510
    """Class to retrieve the Over Compensating comics."""
1511
    name = 'compensating'
1512
    long_name = 'Over Compensating'
1513
    url = 'http://www.overcompensating.com'
1514
    get_url_from_link = join_cls_url_to_href
1515
1516
    @classmethod
1517
    def get_first_comic_link(cls):
1518
        """Get link to first comics."""
1519
        return get_soup_at_url(cls.url).find('a', href=re.compile('comic=1$'))
1520
1521
    @classmethod
1522
    def get_navi_link(cls, last_soup, next_):
1523
        """Get link to next or previous comic."""
1524
        return last_soup.find('a', title='next comic' if next_ else 'go back already')
1525
1526
    @classmethod
1527
    def get_comic_info(cls, soup, link):
1528
        """Get information about a particular comics."""
1529
        img_src_re = re.compile('^/oc/comics/.*')
1530
        comic_num_re = re.compile('.*comic=([0-9]*)$')
1531
        comic_url = cls.get_url_from_link(link)
1532
        num = int(comic_num_re.match(comic_url).groups()[0])
1533
        img = soup.find('img', src=img_src_re)
1534
        return {
1535
            'num': num,
1536
            'img': [urljoin_wrapper(comic_url, img['src'])],
1537
            'title': img.get('title')
1538
        }
1539
1540
1541
class Oglaf(GenericNavigableComic):
1542
    """Class to retrieve Oglaf comics."""
1543
    name = 'oglaf'
1544
    long_name = 'Oglaf [NSFW]'
1545
    url = 'http://oglaf.com'
1546
    _categories = ('NSFW', )
1547
    get_url_from_link = join_cls_url_to_href
1548
1549
    @classmethod
1550
    def get_first_comic_link(cls):
1551
        """Get link to first comics."""
1552
        return get_soup_at_url(cls.url).find("div", id="st").parent
1553
1554
    @classmethod
1555
    def get_navi_link(cls, last_soup, next_):
1556
        """Get link to next or previous comic."""
1557
        div = last_soup.find("div", id="nx" if next_ else "pvs")
1558
        return div.parent if div else None
1559
1560
    @classmethod
1561
    def get_comic_info(cls, soup, link):
1562
        """Get information about a particular comics."""
1563
        title = soup.find('title').string
1564
        title_imgs = soup.find('div', id='tt').find_all('img')
1565
        assert len(title_imgs) == 1
1566
        strip_imgs = soup.find_all('img', id='strip')
1567
        assert len(strip_imgs) == 1
1568
        imgs = title_imgs + strip_imgs
1569
        desc = ' '.join(i['title'] for i in imgs)
1570
        return {
1571
            'title': title,
1572
            'img': [i['src'] for i in imgs],
1573
            'description': desc,
1574
        }
1575
1576
1577
class ScandinaviaAndTheWorld(GenericNavigableComic):
1578
    """Class to retrieve Scandinavia And The World comics."""
1579
    name = 'satw'
1580
    long_name = 'Scandinavia And The World'
1581
    url = 'http://satwcomic.com'
1582
    get_first_comic_link = simulate_first_link
1583
    first_url = 'http://satwcomic.com/sweden-denmark-and-norway'
1584
1585
    @classmethod
1586
    def get_navi_link(cls, last_soup, next_):
1587
        """Get link to next or previous comic."""
1588
        return last_soup.find('a', accesskey='n' if next_ else 'p')
1589
1590
    @classmethod
1591
    def get_comic_info(cls, soup, link):
1592
        """Get information about a particular comics."""
1593
        title = soup.find('meta', attrs={'name': 'twitter:label1'})['content']
1594
        desc = soup.find('meta', property='og:description')['content']
1595
        imgs = soup.find_all('img', itemprop="image")
1596
        return {
1597
            'title': title,
1598
            'description': desc,
1599
            'img': [i['src'] for i in imgs],
1600
        }
1601
1602
1603
class SomethingOfThatIlk(GenericEmptyComic):  # Does not exist anymore
1604
    """Class to retrieve the Something Of That Ilk comics."""
1605
    name = 'somethingofthatilk'
1606
    long_name = 'Something Of That Ilk'
1607
    url = 'http://www.somethingofthatilk.com'
1608
1609
1610
class InfiniteMonkeyBusiness(GenericNavigableComic):
1611
    """Generic class to retrieve InfiniteMonkeyBusiness comics."""
1612
    name = 'monkey'
1613
    long_name = 'Infinite Monkey Business'
1614
    url = 'http://infinitemonkeybusiness.net'
1615
    get_navi_link = get_a_navi_comicnavnext_navinext
1616
    get_first_comic_link = simulate_first_link
1617
    first_url = 'http://infinitemonkeybusiness.net/comic/pillory/'
1618
1619
    @classmethod
1620
    def get_comic_info(cls, soup, link):
1621
        """Get information about a particular comics."""
1622
        title = soup.find('meta', property='og:title')['content']
1623
        imgs = soup.find('div', id='comic').find_all('img')
1624
        return {
1625
            'title': title,
1626
            'img': [i['src'] for i in imgs],
1627
        }
1628
1629
1630
class Wondermark(GenericListableComic):
1631
    """Class to retrieve the Wondermark comics."""
1632
    name = 'wondermark'
1633
    long_name = 'Wondermark'
1634
    url = 'http://wondermark.com'
1635
    get_url_from_archive_element = get_href
1636
1637
    @classmethod
1638
    def get_archive_elements(cls):
1639
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1640
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1641
1642
    @classmethod
1643
    def get_comic_info(cls, soup, link):
1644
        """Get information about a particular comics."""
1645
        date_str = soup.find('div', class_='postdate').find('em').string
1646
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1647
        div = soup.find('div', id='comic')
1648
        if div:
1649
            img = div.find('img')
1650 View Code Duplication
            img_src = [img['src']]
1651
            alt = img['alt']
1652
            assert alt == img['title']
1653
            title = soup.find('meta', property='og:title')['content']
1654
        else:
1655
            img_src = []
1656
            alt = ''
1657
            title = ''
1658
        return {
1659
            'month': day.month,
1660
            'year': day.year,
1661
            'day': day.day,
1662
            'img': img_src,
1663
            'title': title,
1664
            'alt': alt,
1665
            'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')),
1666
        }
1667
1668
1669
class WarehouseComic(GenericNavigableComic):
1670
    """Class to retrieve Warehouse Comic comics."""
1671
    name = 'warehouse'
1672
    long_name = 'Warehouse Comic'
1673
    url = 'http://warehousecomic.com'
1674
    get_first_comic_link = get_a_navi_navifirst
1675
    get_navi_link = get_link_rel_next
1676
1677
    @classmethod
1678
    def get_comic_info(cls, soup, link):
1679
        """Get information about a particular comics."""
1680
        title = soup.find('h2', class_='post-title').string
1681
        date_str = soup.find('span', class_='post-date').string
1682
        day = string_to_date(date_str, "%B %d, %Y")
1683
        imgs = soup.find('div', id='comic').find_all('img')
1684
        return {
1685
            'img': [i['src'] for i in imgs],
1686
            'title': title,
1687
            'day': day.day,
1688
            'month': day.month,
1689
            'year': day.year,
1690
        }
1691
1692
1693
class JustSayEh(GenericNavigableComic):
1694
    """Class to retrieve Just Say Eh comics."""
1695
    # Also on http//tapastic.com/series/Just-Say-Eh
1696
    name = 'justsayeh'
1697
    long_name = 'Just Say Eh'
1698
    url = 'http://www.justsayeh.com'
1699
    get_first_comic_link = get_a_navi_navifirst
1700
    get_navi_link = get_a_navi_comicnavnext_navinext
1701
1702
    @classmethod
1703
    def get_comic_info(cls, soup, link):
1704
        """Get information about a particular comics."""
1705
        title = soup.find('h2', class_='post-title').string
1706
        imgs = soup.find("div", id="comic").find_all("img")
1707
        assert all(i['alt'] == i['title'] for i in imgs)
1708
        alt = imgs[0]['alt']
1709
        return {
1710
            'img': [i['src'] for i in imgs],
1711
            'title': title,
1712
            'alt': alt,
1713
        }
1714
1715
1716
class MouseBearComedy(GenericNavigableComic):
1717
    """Class to retrieve Mouse Bear Comedy comics."""
1718
    # Also on http://mousebearcomedy.tumblr.com
1719
    name = 'mousebear'
1720
    long_name = 'Mouse Bear Comedy'
1721
    url = 'http://www.mousebearcomedy.com'
1722
    get_first_comic_link = get_a_navi_navifirst
1723
    get_navi_link = get_a_navi_comicnavnext_navinext
1724
1725
    @classmethod
1726
    def get_comic_info(cls, soup, link):
1727
        """Get information about a particular comics."""
1728
        title = soup.find('h2', class_='post-title').string
1729
        author = soup.find("span", class_="post-author").find("a").string
1730
        date_str = soup.find("span", class_="post-date").string
1731
        day = string_to_date(date_str, '%B %d, %Y')
1732
        imgs = soup.find("div", id="comic").find_all("img")
1733
        assert all(i['alt'] == i['title'] == title for i in imgs)
1734
        return {
1735
            'day': day.day,
1736
            'month': day.month,
1737
            'year': day.year,
1738
            'img': [i['src'] for i in imgs],
1739
            'title': title,
1740
            'author': author,
1741
        }
1742
1743
1744 View Code Duplication
class BigFootJustice(GenericNavigableComic):
1745
    """Class to retrieve Big Foot Justice comics."""
1746
    # Also on http://tapastic.com/series/bigfoot-justice
1747
    name = 'bigfoot'
1748
    long_name = 'Big Foot Justice'
1749
    url = 'http://bigfootjustice.com'
1750
    get_first_comic_link = get_a_navi_navifirst
1751
    get_navi_link = get_a_navi_comicnavnext_navinext
1752
1753
    @classmethod
1754
    def get_comic_info(cls, soup, link):
1755
        """Get information about a particular comics."""
1756
        imgs = soup.find('div', id='comic').find_all('img')
1757
        assert all(i['title'] == i['alt'] for i in imgs)
1758
        title = ' '.join(i['title'] for i in imgs)
1759
        return {
1760
            'img': [i['src'] for i in imgs],
1761
            'title': title,
1762
        }
1763
1764
1765
class RespawnComic(GenericNavigableComic):
1766
    """Class to retrieve Respawn Comic."""
1767
    # Also on http://respawncomic.tumblr.com
1768
    name = 'respawn'
1769
    long_name = 'Respawn Comic'
1770
    url = 'http://respawncomic.com '
1771
    _categories = ('RESPAWN', )
1772
    get_navi_link = get_a_rel_next
1773
    get_first_comic_link = simulate_first_link
1774
    first_url = 'http://respawncomic.com/comic/c0001/'
1775
1776
    @classmethod
1777
    def get_comic_info(cls, soup, link):
1778
        """Get information about a particular comics."""
1779
        title = soup.find('meta', property='og:title')['content']
1780
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1781 View Code Duplication
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1782
        date_str = date_str[:10]
1783
        day = string_to_date(date_str, "%Y-%m-%d")
1784
        imgs = soup.find_all('meta', property='og:image')
1785
        skip_imgs = {
1786
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1787
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1788
        }
1789
        return {
1790
            'title': title,
1791
            'author': author,
1792
            'day': day.day,
1793
            'month': day.month,
1794
            'year': day.year,
1795
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1796
        }
1797
1798
1799
class SafelyEndangered(GenericNavigableComic):
1800
    """Class to retrieve Safely Endangered comics."""
1801
    # Also on http://tumblr.safelyendangered.com
1802
    name = 'endangered'
1803
    long_name = 'Safely Endangered'
1804
    url = 'http://www.safelyendangered.com'
1805
    get_navi_link = get_link_rel_next
1806
    get_first_comic_link = simulate_first_link
1807
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1808
1809
    @classmethod
1810 View Code Duplication
    def get_comic_info(cls, soup, link):
1811
        """Get information about a particular comics."""
1812
        title = soup.find('h2', class_='post-title').string
1813
        date_str = soup.find('span', class_='post-date').string
1814
        day = string_to_date(date_str, '%B %d, %Y')
1815
        imgs = soup.find('div', id='comic').find_all('img')
1816
        alt = imgs[0]['alt']
1817
        assert all(i['alt'] == i['title'] for i in imgs)
1818
        return {
1819
            'day': day.day,
1820
            'month': day.month,
1821
            'year': day.year,
1822
            'img': [i['src'] for i in imgs],
1823
            'title': title,
1824
            'alt': alt,
1825
        }
1826
1827
1828
class PicturesInBoxes(GenericNavigableComic):
1829
    """Class to retrieve Pictures In Boxes comics."""
1830
    # Also on http://picturesinboxescomic.tumblr.com
1831
    name = 'picturesinboxes'
1832
    long_name = 'Pictures in Boxes'
1833
    url = 'http://www.picturesinboxes.com'
1834
    get_navi_link = get_a_navi_navinext
1835
    get_first_comic_link = simulate_first_link
1836
    first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'
1837
1838
    @classmethod
1839
    def get_comic_info(cls, soup, link):
1840
        """Get information about a particular comics."""
1841
        title = soup.find('h2', class_='post-title').string
1842
        author = soup.find("span", class_="post-author").find("a").string
1843
        date_str = soup.find('span', class_='post-date').string
1844
        day = string_to_date(date_str, '%B %d, %Y')
1845
        imgs = soup.find('div', class_='comicpane').find_all('img')
1846
        assert imgs
1847
        assert all(i['title'] == i['alt'] == title for i in imgs)
1848
        return {
1849
            'day': day.day,
1850
            'month': day.month,
1851
            'year': day.year,
1852
            'img': [i['src'] for i in imgs],
1853
            'title': title,
1854
            'author': author,
1855
        }
1856
1857
1858
class Penmen(GenericEmptyComic):
1859
    """Class to retrieve Penmen comics."""
1860
    name = 'penmen'
1861
    long_name = 'Penmen'
1862
    url = 'http://penmen.com'
1863
1864
1865
class TheDoghouseDiaries(GenericNavigableComic):
1866
    """Class to retrieve The Dog House Diaries comics."""
1867
    name = 'doghouse'
1868
    long_name = 'The Dog House Diaries'
1869
    url = 'http://thedoghousediaries.com'
1870
1871
    @classmethod
1872
    def get_first_comic_link(cls):
1873
        """Get link to first comics."""
1874
        return get_soup_at_url(cls.url).find('a', id='firstlink')
1875
1876
    @classmethod
1877
    def get_navi_link(cls, last_soup, next_):
1878
        """Get link to next or previous comic."""
1879
        return last_soup.find('a', id='nextlink' if next_ else 'previouslink')
1880
1881
    @classmethod
1882
    def get_comic_info(cls, soup, link):
1883
        """Get information about a particular comics."""
1884
        comic_img_re = re.compile('^dhdcomics/.*')
1885
        img = soup.find('img', src=comic_img_re)
1886
        comic_url = cls.get_url_from_link(link)
1887
        return {
1888
            'title': soup.find('h2', id='titleheader').string,
1889
            'title2': soup.find('div', id='subtext').string,
1890
            'alt': img.get('title'),
1891
            'img': [urljoin_wrapper(comic_url, img['src'].strip())],
1892
            'num': int(comic_url.split('/')[-1]),
1893
        }
1894
1895
1896
class InvisibleBread(GenericListableComic):
1897
    """Class to retrieve Invisible Bread comics."""
1898
    # Also on http://www.gocomics.com/invisible-bread
1899
    name = 'invisiblebread'
1900
    long_name = 'Invisible Bread'
1901
    url = 'http://invisiblebread.com'
1902
1903
    @classmethod
1904
    def get_archive_elements(cls):
1905
        archive_url = urljoin_wrapper(cls.url, 'archives/')
1906
        return reversed(get_soup_at_url(archive_url).find_all('td', class_='archive-title'))
1907
1908
    @classmethod
1909
    def get_url_from_archive_element(cls, td):
1910
        return td.find('a')['href']
1911
1912
    @classmethod
1913
    def get_comic_info(cls, soup, td):
1914
        """Get information about a particular comics."""
1915
        url = cls.get_url_from_archive_element(td)
1916
        title = td.find('a').string
1917
        month_and_day = td.previous_sibling.string
1918
        link_re = re.compile('^%s/([0-9]+)/' % cls.url)
1919
        year = link_re.match(url).groups()[0]
1920
        date_str = month_and_day + ' ' + year
1921
        day = string_to_date(date_str, '%b %d %Y')
1922
        imgs = [soup.find('div', id='comic').find('img')]
1923
        assert len(imgs) == 1
1924
        assert all(i['title'] == i['alt'] == title for i in imgs)
1925
        return {
1926
            'month': day.month,
1927
            'year': day.year,
1928
            'day': day.day,
1929
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1930
            'title': title,
1931
        }
1932 View Code Duplication
1933
1934
class DiscoBleach(GenericEmptyComic):  # Does not work anymore
1935
    """Class to retrieve Disco Bleach Comics."""
1936
    name = 'discobleach'
1937
    long_name = 'Disco Bleach'
1938
    url = 'http://discobleach.com'
1939
1940
1941
class TubeyToons(GenericEmptyComic):  # Does not work anymore
1942
    """Class to retrieve TubeyToons comics."""
1943
    # Also on http://tapastic.com/series/Tubey-Toons
1944
    # Also on http://tubeytoons.tumblr.com
1945
    name = 'tubeytoons'
1946
    long_name = 'Tubey Toons'
1947
    url = 'http://tubeytoons.com'
1948
    _categories = ('TUNEYTOONS', )
1949
1950
1951
class CompletelySeriousComics(GenericNavigableComic):
1952
    """Class to retrieve Completely Serious comics."""
1953
    name = 'completelyserious'
1954
    long_name = 'Completely Serious Comics'
1955
    url = 'http://completelyseriouscomics.com'
1956
    get_first_comic_link = get_a_navi_navifirst
1957
    get_navi_link = get_a_navi_navinext
1958
1959
    @classmethod
1960
    def get_comic_info(cls, soup, link):
1961
        """Get information about a particular comics."""
1962
        title = soup.find('h2', class_='post-title').string
1963
        author = soup.find('span', class_='post-author').contents[1].string
1964
        date_str = soup.find('span', class_='post-date').string
1965
        day = string_to_date(date_str, '%B %d, %Y')
1966
        imgs = soup.find('div', class_='comicpane').find_all('img')
1967
        assert imgs
1968
        alt = imgs[0]['title']
1969
        assert all(i['title'] == i['alt'] == alt for i in imgs)
1970
        return {
1971
            'month': day.month,
1972
            'year': day.year,
1973
            'day': day.day,
1974
            'img': [i['src'] for i in imgs],
1975
            'title': title,
1976
            'alt': alt,
1977
            'author': author,
1978
        }
1979
1980
1981
class PoorlyDrawnLines(GenericListableComic):
1982
    """Class to retrieve Poorly Drawn Lines comics."""
1983
    # Also on http://pdlcomics.tumblr.com
1984
    name = 'poorlydrawn'
1985
    long_name = 'Poorly Drawn Lines'
1986
    url = 'http://poorlydrawnlines.com'
1987 View Code Duplication
    _categories = ('POORLYDRAWN', )
1988
    get_url_from_archive_element = get_href
1989
1990
    @classmethod
1991
    def get_comic_info(cls, soup, link):
1992
        """Get information about a particular comics."""
1993
        imgs = soup.find('div', class_='post').find_all('img')
1994
        assert len(imgs) <= 1
1995
        return {
1996
            'img': [i['src'] for i in imgs],
1997
            'title': imgs[0].get('title', "") if imgs else "",
1998
        }
1999
2000
    @classmethod
2001
    def get_archive_elements(cls):
2002
        archive_url = urljoin_wrapper(cls.url, 'archive')
2003
        url_re = re.compile('^%s/comic/.' % cls.url)
2004
        return reversed(get_soup_at_url(archive_url).find_all('a', href=url_re))
2005
2006
2007
class LoadingComics(GenericNavigableComic):
2008
    """Class to retrieve Loading Artist comics."""
2009
    name = 'loadingartist'
2010
    long_name = 'Loading Artist'
2011
    url = 'http://www.loadingartist.com/latest'
2012
2013
    @classmethod
2014
    def get_first_comic_link(cls):
2015
        """Get link to first comics."""
2016
        return get_soup_at_url(cls.url).find('a', title="First")
2017
2018
    @classmethod
2019 View Code Duplication
    def get_navi_link(cls, last_soup, next_):
2020
        """Get link to next or previous comic."""
2021
        return last_soup.find('a', title='Next' if next_ else 'Previous')
2022
2023
    @classmethod
2024
    def get_comic_info(cls, soup, link):
2025
        """Get information about a particular comics."""
2026
        title = soup.find('h1').string
2027
        date_str = soup.find('span', class_='date').string.strip()
2028
        day = string_to_date(date_str, "%B %d, %Y")
2029
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
2030
        return {
2031
            'title': title,
2032
            'img': [i['src'] for i in imgs],
2033
            'month': day.month,
2034
            'year': day.year,
2035
            'day': day.day,
2036
        }
2037
2038
2039
class ChuckleADuck(GenericNavigableComic):
2040
    """Class to retrieve Chuckle-A-Duck comics."""
2041
    name = 'chuckleaduck'
2042
    long_name = 'Chuckle-A-duck'
2043
    url = 'http://chuckleaduck.com'
2044
    get_first_comic_link = get_div_navfirst_a
2045
    get_navi_link = get_link_rel_next
2046
2047
    @classmethod
2048
    def get_comic_info(cls, soup, link):
2049
        """Get information about a particular comics."""
2050
        date_str = soup.find('span', class_='post-date').string
2051
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
2052
        author = soup.find('span', class_='post-author').string
2053
        div = soup.find('div', id='comic')
2054
        imgs = div.find_all('img') if div else []
2055
        title = imgs[0]['title'] if imgs else ""
2056
        assert all(i['title'] == i['alt'] == title for i in imgs)
2057
        return {
2058
            'month': day.month,
2059
            'year': day.year,
2060
            'day': day.day,
2061
            'img': [i['src'] for i in imgs],
2062
            'title': title,
2063
            'author': author,
2064
        }
2065
2066
2067
class DepressedAlien(GenericNavigableComic):
2068
    """Class to retrieve Depressed Alien Comics."""
2069
    name = 'depressedalien'
2070
    long_name = 'Depressed Alien'
2071
    url = 'http://depressedalien.com'
2072
    get_url_from_link = join_cls_url_to_href
2073
2074
    @classmethod
2075
    def get_first_comic_link(cls):
2076
        """Get link to first comics."""
2077
        return get_soup_at_url(cls.url).find('img', attrs={'name': 'beginArrow'}).parent
2078
2079
    @classmethod
2080
    def get_navi_link(cls, last_soup, next_):
2081
        """Get link to next or previous comic."""
2082
        return last_soup.find('img', attrs={'name': 'rightArrow' if next_ else 'leftArrow'}).parent
2083
2084
    @classmethod
2085
    def get_comic_info(cls, soup, link):
2086
        """Get information about a particular comics."""
2087
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2088
        imgs = soup.find_all('meta', property='og:image')
2089
        return {
2090
            'title': title,
2091
            'img': [i['content'] for i in imgs],
2092
        }
2093
2094
2095
class ThingsInSquares(GenericListableComic):
2096
    """Class to retrieve Things In Squares comics."""
2097
    # This can be retrieved in other languages
2098
    # Also on https://tapastic.com/series/Things-in-Squares
2099
    name = 'squares'
2100
    long_name = 'Things in squares'
2101
    url = 'http://www.thingsinsquares.com'
2102
2103
    @classmethod
2104
    def get_comic_info(cls, soup, tr):
2105
        """Get information about a particular comics."""
2106
        _, td2, td3 = tr.find_all('td')
2107
        a = td2.find('a')
2108
        date_str = td3.string
2109
        day = string_to_date(date_str, "%m.%d.%y")
2110
        title = a.string
2111
        title2 = soup.find('meta', property='og:title')['content']
2112
        desc = soup.find('meta', property='og:description')
2113
        description = desc['content'] if desc else ''
2114
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2115
        imgs = soup.find('div', class_='entry-content').find_all('img')
2116
        return {
2117
            'day': day.day,
2118
            'month': day.month,
2119 View Code Duplication
            'year': day.year,
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2120
            'title': title,
2121
            'title2': title2,
2122
            'description': description,
2123
            'tags': tags,
2124
            'img': [i['src'] for i in imgs],
2125
            'alt': ' '.join(i['alt'] for i in imgs),
2126
        }
2127
2128
    @classmethod
2129
    def get_url_from_archive_element(cls, tr):
2130
        _, td2, td3 = tr.find_all('td')
2131
        return td2.find('a')['href']
2132
2133
    @classmethod
2134
    def get_archive_elements(cls):
2135
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
2136
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2137
2138
2139
class HappleTea(GenericNavigableComic):
2140
    """Class to retrieve Happle Tea Comics."""
2141
    name = 'happletea'
2142
    long_name = 'Happle Tea'
2143
    url = 'http://www.happletea.com'
2144
    get_first_comic_link = get_a_navi_navifirst
2145
    get_navi_link = get_link_rel_next
2146
2147
    @classmethod
2148
    def get_comic_info(cls, soup, link):
2149
        """Get information about a particular comics."""
2150
        imgs = soup.find('div', id='comic').find_all('img')
2151
        post = soup.find('div', class_='post-content')
2152
        title = post.find('h2', class_='post-title').string
2153
        author = post.find('a', rel='author').string
2154
        date_str = post.find('span', class_='post-date').string
2155
        day = string_to_date(date_str, "%B %d, %Y")
2156
        assert all(i['alt'] == i['title'] for i in imgs)
2157
        return {
2158
            'title': title,
2159
            'img': [i['src'] for i in imgs],
2160
            'alt': ''.join(i['alt'] for i in imgs),
2161
            'month': day.month,
2162
            'year': day.year,
2163
            'day': day.day,
2164
            'author': author,
2165
        }
2166
2167
2168
class FatAwesomeComics(GenericNavigableComic):
2169
    """Class to retrieve Fat Awesome Comics."""
2170
    # Also on http://fatawesomecomedy.tumblr.com
2171
    name = 'fatawesome'
2172
    long_name = 'Fat Awesome'
2173
    url = 'http://fatawesome.com/comics'
2174
    get_navi_link = get_a_rel_next
2175
    get_first_comic_link = simulate_first_link
2176
    first_url = 'http://fatawesome.com/shortbus/'
2177
2178
    @classmethod
2179
    def get_comic_info(cls, soup, link):
2180
        """Get information about a particular comics."""
2181
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2182
        description = soup.find('meta', attrs={'name': 'description'})['content']
2183
        tags_prop = soup.find('meta', property='article:tag')
2184
        tags = tags_prop['content'] if tags_prop else ""
2185
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2186
        day = string_to_date(date_str, "%Y-%m-%d")
2187
        imgs = soup.find_all('img', attrs={'data-recalc-dims': "1"})
2188
        assert len(imgs) == 1
2189
        return {
2190
            'title': title,
2191
            'description': description,
2192
            'tags': tags,
2193
            'alt': "".join(i['alt'] for i in imgs),
2194
            'img': [i['src'].rsplit('?', 1)[0] for i in imgs],
2195
            'month': day.month,
2196
            'year': day.year,
2197
            'day': day.day,
2198
        }
2199
2200
2201
class AnythingComic(GenericListableComic):
2202
    """Class to retrieve Anything Comics."""
2203
    # Also on http://tapastic.com/series/anything
2204
    name = 'anythingcomic'
2205
    long_name = 'Anything Comic'
2206
    url = 'http://www.anythingcomic.com'
2207
2208
    @classmethod
2209
    def get_archive_elements(cls):
2210
        archive_url = urljoin_wrapper(cls.url, 'archive/')
2211
        # The first 2 <tr>'s do not correspond to comics
2212
        return get_soup_at_url(archive_url).find('table', id='chapter_table').find_all('tr')[2:]
2213
2214
    @classmethod
2215
    def get_url_from_archive_element(cls, tr):
2216
        """Get url corresponding to an archive element."""
2217
        td_num, td_comic, td_date, _ = tr.find_all('td')
2218
        link = td_comic.find('a')
2219
        return urljoin_wrapper(cls.url, link['href'])
2220
2221
    @classmethod
2222
    def get_comic_info(cls, soup, tr):
2223
        """Get information about a particular comics."""
2224
        td_num, td_comic, td_date, _ = tr.find_all('td')
2225
        num = int(td_num.string)
2226
        link = td_comic.find('a')
2227
        title = link.string
2228
        imgs = soup.find_all('img', id='comic_image')
2229
        day = string_to_date(td_date.string, '%d %b %Y %I:%M %p')
2230
        assert len(imgs) == 1
2231
        assert all(i.get('alt') == i.get('title') for i in imgs)
2232
        return {
2233
            'num': num,
2234
            'title': title,
2235
            'alt': imgs[0].get('alt', ''),
2236
            'img': [i['src'] for i in imgs],
2237
            'month': day.month,
2238
            'year': day.year,
2239
            'day': day.day,
2240
        }
2241
2242
2243
class LonnieMillsap(GenericNavigableComic):
2244
    """Class to retrieve Lonnie Millsap's comics."""
2245
    name = 'millsap'
2246
    long_name = 'Lonnie Millsap'
2247
    url = 'http://www.lonniemillsap.com'
2248
    get_navi_link = get_link_rel_next
2249
    get_first_comic_link = simulate_first_link
2250
    first_url = 'http://www.lonniemillsap.com/?p=42'
2251
2252
    @classmethod
2253
    def get_comic_info(cls, soup, link):
2254
        """Get information about a particular comics."""
2255
        title = soup.find('h2', class_='post-title').string
2256
        post = soup.find('div', class_='post-content')
2257
        author = post.find("span", class_="post-author").find("a").string
2258
        date_str = post.find("span", class_="post-date").string
2259
        day = string_to_date(date_str, "%B %d, %Y")
2260
        imgs = post.find("div", class_="entry").find_all("img")
2261
        return {
2262
            'title': title,
2263
            'author': author,
2264
            'img': [i['src'] for i in imgs],
2265
            'month': day.month,
2266
            'year': day.year,
2267
            'day': day.day,
2268
        }
2269
2270
2271
class LinsEditions(GenericNavigableComic):
2272
    """Class to retrieve L.I.N.S. Editions comics."""
2273
    # Also on http://linscomics.tumblr.com
2274
    name = 'lins'
2275
    long_name = 'L.I.N.S. Editions'
2276
    url = 'https://linsedition.com'
2277
    _categories = ('LINS', )
2278
    get_navi_link = get_link_rel_next
2279
    get_first_comic_link = simulate_first_link
2280
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2281
2282
    @classmethod
2283
    def get_comic_info(cls, soup, link):
2284
        """Get information about a particular comics."""
2285
        title = soup.find('meta', property='og:title')['content']
2286
        imgs = soup.find_all('meta', property='og:image')
2287
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2288
        day = string_to_date(date_str, "%Y-%m-%d")
2289
        return {
2290
            'title': title,
2291
            'img': [i['content'] for i in imgs],
2292
            'month': day.month,
2293
            'year': day.year,
2294
            'day': day.day,
2295
        }
2296
2297
2298
class ThorsThundershack(GenericNavigableComic):
2299
    """Class to retrieve Thor's Thundershack comics."""
2300
    # Also on http://tapastic.com/series/Thors-Thundershac
2301
    name = 'thor'
2302
    long_name = 'Thor\'s Thundershack'
2303
    url = 'http://www.thorsthundershack.com'
2304
    _categories = ('THOR', )
2305
    get_url_from_link = join_cls_url_to_href
2306
2307
    @classmethod
2308
    def get_first_comic_link(cls):
2309
        """Get link to first comics."""
2310
        return get_soup_at_url(cls.url).find('a', class_='first navlink')
2311
2312
    @classmethod
2313
    def get_navi_link(cls, last_soup, next_):
2314
        """Get link to next or previous comic."""
2315
        for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
2316
            if link['href'] != '/comic':
2317
                return link
2318
        return None
2319
2320
    @classmethod
2321 View Code Duplication
    def get_comic_info(cls, soup, link):
2322
        """Get information about a particular comics."""
2323
        title = soup.find('meta', attrs={'name': 'description'})["content"]
2324
        description = soup.find('div', itemprop='articleBody').text
2325
        author = soup.find('span', itemprop='author copyrightHolder').string
2326
        imgs = soup.find_all('img', itemprop='image')
2327
        assert all(i['title'] == i['alt'] for i in imgs)
2328
        alt = imgs[0]['alt'] if imgs else ""
2329
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
2330
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2331
        return {
2332
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2333
            'month': day.month,
2334
            'year': day.year,
2335
            'day': day.day,
2336
            'author': author,
2337
            'title': title,
2338
            'alt': alt,
2339
            'description': description,
2340
        }
2341
2342
2343
class GerbilWithAJetpack(GenericNavigableComic):
2344
    """Class to retrieve GerbilWithAJetpack comics."""
2345
    name = 'gerbil'
2346
    long_name = 'Gerbil With A Jetpack'
2347
    url = 'http://gerbilwithajetpack.com'
2348
    get_first_comic_link = get_a_navi_navifirst
2349
    get_navi_link = get_a_rel_next
2350
2351
    @classmethod
2352
    def get_comic_info(cls, soup, link):
2353
        """Get information about a particular comics."""
2354
        title = soup.find('h2', class_='post-title').string
2355
        author = soup.find("span", class_="post-author").find("a").string
2356
        date_str = soup.find("span", class_="post-date").string
2357
        day = string_to_date(date_str, "%B %d, %Y")
2358
        imgs = soup.find("div", id="comic").find_all("img")
2359
        alt = imgs[0]['alt']
2360
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2361
        return {
2362
            'img': [i['src'] for i in imgs],
2363
            'title': title,
2364
            'alt': alt,
2365
            'author': author,
2366
            'day': day.day,
2367
            'month': day.month,
2368
            'year': day.year
2369
        }
2370
2371
2372
class EveryDayBlues(GenericNavigableComic):
2373
    """Class to retrieve EveryDayBlues Comics."""
2374
    name = "blues"
2375
    long_name = "Every Day Blues"
2376
    url = "http://everydayblues.net"
2377
    get_first_comic_link = get_a_navi_navifirst
2378 View Code Duplication
    get_navi_link = get_link_rel_next
2379
2380
    @classmethod
2381
    def get_comic_info(cls, soup, link):
2382
        """Get information about a particular comics."""
2383
        title = soup.find("h2", class_="post-title").string
2384
        author = soup.find("span", class_="post-author").find("a").string
2385
        date_str = soup.find("span", class_="post-date").string
2386
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2387
        imgs = soup.find("div", id="comic").find_all("img")
2388
        assert all(i['alt'] == i['title'] == title for i in imgs)
2389
        assert len(imgs) <= 1
2390
        return {
2391
            'img': [i['src'] for i in imgs],
2392
            'title': title,
2393
            'author': author,
2394
            'day': day.day,
2395
            'month': day.month,
2396
            'year': day.year
2397
        }
2398
2399
2400
class BiterComics(GenericNavigableComic):
2401
    """Class to retrieve Biter Comics."""
2402
    name = "biter"
2403
    long_name = "Biter Comics"
2404
    url = "http://www.bitercomics.com"
2405
    get_first_comic_link = get_a_navi_navifirst
2406
    get_navi_link = get_link_rel_next
2407
2408
    @classmethod
2409
    def get_comic_info(cls, soup, link):
2410
        """Get information about a particular comics."""
2411
        title = soup.find("h1", class_="entry-title").string
2412
        author = soup.find("span", class_="author vcard").find("a").string
2413
        date_str = soup.find("span", class_="entry-date").string
2414
        day = string_to_date(date_str, "%B %d, %Y")
2415
        imgs = soup.find("div", id="comic").find_all("img")
2416
        assert all(i['alt'] == i['title'] for i in imgs)
2417
        assert len(imgs) == 1
2418
        alt = imgs[0]['alt']
2419
        return {
2420
            'img': [i['src'] for i in imgs],
2421
            'title': title,
2422
            'alt': alt,
2423
            'author': author,
2424
            'day': day.day,
2425
            'month': day.month,
2426
            'year': day.year
2427
        }
2428
2429
2430
class TheAwkwardYeti(GenericNavigableComic):
2431
    """Class to retrieve The Awkward Yeti comics."""
2432
    # Also on http://www.gocomics.com/the-awkward-yeti
2433
    # Also on http://larstheyeti.tumblr.com
2434
    # Also on https://tapastic.com/series/TheAwkwardYeti
2435
    name = 'yeti'
2436
    long_name = 'The Awkward Yeti'
2437
    url = 'http://theawkwardyeti.com'
2438
    _categories = ('YETI', )
2439
    get_first_comic_link = get_a_navi_navifirst
2440
    get_navi_link = get_link_rel_next
2441
2442
    @classmethod
2443
    def get_comic_info(cls, soup, link):
2444
        """Get information about a particular comics."""
2445
        title = soup.find('h2', class_='post-title').string
2446
        date_str = soup.find("span", class_="post-date").string
2447
        day = string_to_date(date_str, "%B %d, %Y")
2448
        imgs = soup.find("div", id="comic").find_all("img")
2449
        assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
2450
        return {
2451
            'img': [i['src'] for i in imgs],
2452
            'title': title,
2453
            'day': day.day,
2454
            'month': day.month,
2455
            'year': day.year
2456
        }
2457
2458
2459
class PleasantThoughts(GenericNavigableComic):
2460
    """Class to retrieve Pleasant Thoughts comics."""
2461
    name = 'pleasant'
2462
    long_name = 'Pleasant Thoughts'
2463
    url = 'http://pleasant-thoughts.com'
2464
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2465
    get_navi_link = get_link_rel_next
2466
2467
    @classmethod
2468
    def get_comic_info(cls, soup, link):
2469
        """Get information about a particular comics."""
2470
        post = soup.find('div', class_='post-content')
2471
        title = post.find('h2', class_='post-title').string
2472
        imgs = post.find("div", class_="entry").find_all("img")
2473
        return {
2474
            'title': title,
2475
            'img': [i['src'] for i in imgs],
2476
        }
2477
2478
2479
class MisterAndMe(GenericNavigableComic):
2480
    """Class to retrieve Mister & Me Comics."""
2481
    # Also on http://www.gocomics.com/mister-and-me
2482
    # Also on https://tapastic.com/series/Mister-and-Me
2483
    name = 'mister'
2484
    long_name = 'Mister & Me'
2485
    url = 'http://www.mister-and-me.com'
2486
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2487
    get_navi_link = get_link_rel_next
2488 View Code Duplication
2489
    @classmethod
2490
    def get_comic_info(cls, soup, link):
2491
        """Get information about a particular comics."""
2492
        title = soup.find('h2', class_='post-title').string
2493
        author = soup.find("span", class_="post-author").find("a").string
2494
        date_str = soup.find("span", class_="post-date").string
2495
        day = string_to_date(date_str, "%B %d, %Y")
2496
        imgs = soup.find("div", id="comic").find_all("img")
2497
        assert all(i['alt'] == i['title'] for i in imgs)
2498
        assert len(imgs) <= 1
2499
        alt = imgs[0]['alt'] if imgs else ""
2500
        return {
2501
            'img': [i['src'] for i in imgs],
2502
            'title': title,
2503
            'alt': alt,
2504
            'author': author,
2505
            'day': day.day,
2506
            'month': day.month,
2507
            'year': day.year
2508
        }
2509
2510
2511
class LastPlaceComics(GenericNavigableComic):
2512
    """Class to retrieve Last Place Comics."""
2513
    name = 'lastplace'
2514
    long_name = 'Last Place Comics'
2515
    url = "http://lastplacecomics.com"
2516
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2517
    get_navi_link = get_link_rel_next
2518 View Code Duplication
2519
    @classmethod
2520
    def get_comic_info(cls, soup, link):
2521
        """Get information about a particular comics."""
2522
        title = soup.find('h2', class_='post-title').string
2523
        author = soup.find("span", class_="post-author").find("a").string
2524
        date_str = soup.find("span", class_="post-date").string
2525
        day = string_to_date(date_str, "%B %d, %Y")
2526
        imgs = soup.find("div", id="comic").find_all("img")
2527
        assert all(i['alt'] == i['title'] for i in imgs)
2528
        assert len(imgs) <= 1
2529
        alt = imgs[0]['alt'] if imgs else ""
2530
        return {
2531
            'img': [i['src'] for i in imgs],
2532
            'title': title,
2533
            'alt': alt,
2534
            'author': author,
2535
            'day': day.day,
2536
            'month': day.month,
2537
            'year': day.year
2538
        }
2539
2540
2541
class TalesOfAbsurdity(GenericNavigableComic):
2542
    """Class to retrieve Tales Of Absurdity comics."""
2543
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
2544
    # Also on http://talesofabsurdity.tumblr.com
2545
    name = 'absurdity'
2546
    long_name = 'Tales of Absurdity'
2547
    url = 'http://talesofabsurdity.com'
2548
    _categories = ('ABSURDITY', )
2549
    get_first_comic_link = get_a_navi_navifirst
2550
    get_navi_link = get_a_navi_comicnavnext_navinext
2551
2552
    @classmethod
2553
    def get_comic_info(cls, soup, link):
2554
        """Get information about a particular comics."""
2555
        title = soup.find('h2', class_='post-title').string
2556
        author = soup.find("span", class_="post-author").find("a").string
2557
        date_str = soup.find("span", class_="post-date").string
2558
        day = string_to_date(date_str, "%B %d, %Y")
2559
        imgs = soup.find("div", id="comic").find_all("img")
2560
        assert all(i['alt'] == i['title'] for i in imgs)
2561
        alt = imgs[0]['alt'] if imgs else ""
2562
        return {
2563
            'img': [i['src'] for i in imgs],
2564
            'title': title,
2565
            'alt': alt,
2566
            'author': author,
2567
            'day': day.day,
2568
            'month': day.month,
2569
            'year': day.year
2570
        }
2571
2572
2573
class EndlessOrigami(GenericNavigableComic):
2574
    """Class to retrieve Endless Origami Comics."""
2575
    name = "origami"
2576
    long_name = "Endless Origami"
2577
    url = "http://endlessorigami.com"
2578
    get_first_comic_link = get_a_navi_navifirst
2579
    get_navi_link = get_link_rel_next
2580
2581
    @classmethod
2582
    def get_comic_info(cls, soup, link):
2583
        """Get information about a particular comics."""
2584
        title = soup.find('h2', class_='post-title').string
2585
        author = soup.find("span", class_="post-author").find("a").string
2586
        date_str = soup.find("span", class_="post-date").string
2587
        day = string_to_date(date_str, "%B %d, %Y")
2588
        imgs = soup.find("div", id="comic").find_all("img")
2589
        assert all(i['alt'] == i['title'] for i in imgs)
2590
        alt = imgs[0]['alt'] if imgs else ""
2591
        return {
2592
            'img': [i['src'] for i in imgs],
2593
            'title': title,
2594
            'alt': alt,
2595
            'author': author,
2596
            'day': day.day,
2597
            'month': day.month,
2598
            'year': day.year
2599
        }
2600
2601
2602
class PlanC(GenericNavigableComic):
2603
    """Class to retrieve Plan C comics."""
2604
    name = 'planc'
2605
    long_name = 'Plan C'
2606
    url = 'http://www.plancomic.com'
2607
    get_first_comic_link = get_a_navi_navifirst
2608
    get_navi_link = get_a_navi_comicnavnext_navinext
2609
2610
    @classmethod
2611
    def get_comic_info(cls, soup, link):
2612
        """Get information about a particular comics."""
2613
        title = soup.find('h2', class_='post-title').string
2614
        date_str = soup.find("span", class_="post-date").string
2615
        day = string_to_date(date_str, "%B %d, %Y")
2616
        imgs = soup.find('div', id='comic').find_all('img')
2617
        return {
2618
            'title': title,
2619
            'img': [i['src'] for i in imgs],
2620
            'month': day.month,
2621
            'year': day.year,
2622
            'day': day.day,
2623
        }
2624
2625
2626 View Code Duplication
class BuniComic(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2627
    """Class to retrieve Buni Comics."""
2628
    name = 'buni'
2629
    long_name = 'BuniComics'
2630
    url = 'http://www.bunicomic.com'
2631
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2632
    get_navi_link = get_link_rel_next
2633
2634
    @classmethod
2635
    def get_comic_info(cls, soup, link):
2636
        """Get information about a particular comics."""
2637
        imgs = soup.find('div', id='comic').find_all('img')
2638
        assert all(i['alt'] == i['title'] for i in imgs)
2639
        assert len(imgs) == 1
2640
        return {
2641
            'img': [i['src'] for i in imgs],
2642
            'title': imgs[0]['title'],
2643
        }
2644
2645
2646
class GenericCommitStrip(GenericNavigableComic):
2647
    """Generic class to retrieve Commit Strips in different languages."""
2648
    get_navi_link = get_a_rel_next
2649
    get_first_comic_link = simulate_first_link
2650
    first_url = NotImplemented
2651
2652
    @classmethod
2653
    def get_comic_info(cls, soup, link):
2654
        """Get information about a particular comics."""
2655
        desc = soup.find('meta', property='og:description')['content']
2656
        title = soup.find('meta', property='og:title')['content']
2657
        imgs = soup.find('div', class_='entry-content').find_all('img')
2658
        title2 = ' '.join(i.get('title', '') for i in imgs)
2659 View Code Duplication
        return {
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2660
            'title': title,
2661
            'title2': title2,
2662
            'description': desc,
2663
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2664
        }
2665
2666
2667
class CommitStripFr(GenericCommitStrip):
2668
    """Class to retrieve Commit Strips in French."""
2669
    name = 'commit_fr'
2670
    long_name = 'Commit Strip (Fr)'
2671
    url = 'http://www.commitstrip.com/fr'
2672
    _categories = ('FRANCAIS', )
2673
    first_url = 'http://www.commitstrip.com/fr/2012/02/22/interview/'
2674
2675
2676
class CommitStripEn(GenericCommitStrip):
2677
    """Class to retrieve Commit Strips in English."""
2678
    name = 'commit_en'
2679
    long_name = 'Commit Strip (En)'
2680
    url = 'http://www.commitstrip.com/en'
2681
    first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'
2682
2683
2684
class GenericBoumerie(GenericNavigableComic):
2685
    """Generic class to retrieve Boumeries comics in different languages."""
2686
    get_first_comic_link = get_a_navi_navifirst
2687
    get_navi_link = get_link_rel_next
2688
    date_format = NotImplemented
2689
    lang = NotImplemented
2690
2691
    @classmethod
2692
    def get_comic_info(cls, soup, link):
2693
        """Get information about a particular comics."""
2694
        title = soup.find('h2', class_='post-title').string
2695
        short_url = soup.find('link', rel='shortlink')['href']
2696
        author = soup.find("span", class_="post-author").find("a").string
2697
        date_str = soup.find('span', class_='post-date').string
2698
        day = string_to_date(date_str, cls.date_format, cls.lang)
2699
        imgs = soup.find('div', id='comic').find_all('img')
2700
        assert all(i['alt'] == i['title'] for i in imgs)
2701
        return {
2702
            'short_url': short_url,
2703
            'img': [i['src'] for i in imgs],
2704
            'title': title,
2705
            'author': author,
2706
            'month': day.month,
2707
            'year': day.year,
2708
            'day': day.day,
2709
        }
2710
2711
2712
class BoumerieEn(GenericBoumerie):
2713
    """Class to retrieve Boumeries comics in English."""
2714
    name = 'boumeries_en'
2715
    long_name = 'Boumeries (En)'
2716
    url = 'http://comics.boumerie.com'
2717
    date_format = "%B %d, %Y"
2718
    lang = 'en_GB.UTF-8'
2719
2720
2721
class BoumerieFr(GenericBoumerie):
2722
    """Class to retrieve Boumeries comics in French."""
2723
    name = 'boumeries_fr'
2724
    long_name = 'Boumeries (Fr)'
2725
    url = 'http://bd.boumerie.com'
2726
    _categories = ('FRANCAIS', )
2727
    date_format = "%A, %d %B %Y"
2728
    lang = "fr_FR.utf8"
2729
2730
2731
class UnearthedComics(GenericNavigableComic):
2732
    """Class to retrieve Unearthed comics."""
2733
    # Also on http://tapastic.com/series/UnearthedComics
2734
    # Also on http://unearthedcomics.tumblr.com
2735
    name = 'unearthed'
2736
    long_name = 'Unearthed Comics'
2737
    url = 'http://unearthedcomics.com'
2738 View Code Duplication
    _categories = ('UNEARTHED', )
2739
    get_navi_link = get_link_rel_next
2740
    get_first_comic_link = simulate_first_link
2741
    first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'
2742
2743
    @classmethod
2744
    def get_comic_info(cls, soup, link):
2745
        """Get information about a particular comics."""
2746
        short_url = soup.find('link', rel='shortlink')['href']
2747
        title_elt = soup.find('h1') or soup.find('h2')
2748
        title = title_elt.string if title_elt else ""
2749
        desc = soup.find('meta', property='og:description')
2750
        date_str = soup.find('time', class_='published updated hidden')['datetime']
2751
        day = string_to_date(date_str, "%Y-%m-%d")
2752
        post = soup.find('div', class_="entry content entry-content type-portfolio")
2753
        imgs = post.find_all('img')
2754
        return {
2755
            'title': title,
2756
            'description': desc,
2757
            'url2': short_url,
2758
            'img': [i['src'] for i in imgs],
2759
            'month': day.month,
2760
            'year': day.year,
2761
            'day': day.day,
2762
        }
2763
2764
2765
class Optipess(GenericNavigableComic):
2766
    """Class to retrieve Optipess comics."""
2767
    name = 'optipess'
2768
    long_name = 'Optipess'
2769
    url = 'http://www.optipess.com'
2770
    get_first_comic_link = get_a_navi_navifirst
2771
    get_navi_link = get_link_rel_next
2772
2773
    @classmethod
2774
    def get_comic_info(cls, soup, link):
2775
        """Get information about a particular comics."""
2776
        title = soup.find('h2', class_='post-title').string
2777
        author = soup.find("span", class_="post-author").find("a").string
2778
        comic = soup.find('div', id='comic')
2779
        imgs = comic.find_all('img') if comic else []
2780
        alt = imgs[0]['title'] if imgs else ""
2781
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2782
        date_str = soup.find('span', class_='post-date').string
2783
        day = string_to_date(date_str, "%B %d, %Y")
2784
        return {
2785
            'title': title,
2786
            'alt': alt,
2787
            'author': author,
2788
            'img': [i['src'] for i in imgs],
2789
            'month': day.month,
2790
            'year': day.year,
2791
            'day': day.day,
2792
        }
2793
2794
2795
class PainTrainComic(GenericNavigableComic):
2796
    """Class to retrieve Pain Train Comics."""
2797
    name = 'paintrain'
2798
    long_name = 'Pain Train Comics'
2799
    url = 'http://paintraincomic.com'
2800
    get_first_comic_link = get_a_navi_navifirst
2801
    get_navi_link = get_link_rel_next
2802
2803
    @classmethod
2804
    def get_comic_info(cls, soup, link):
2805
        """Get information about a particular comics."""
2806
        title = soup.find('h2', class_='post-title').string
2807
        short_url = soup.find('link', rel='shortlink')['href']
2808
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2809
        num = int(short_url_re.match(short_url).groups()[0])
2810
        imgs = soup.find('div', id='comic').find_all('img')
2811
        alt = imgs[0]['title']
2812
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2813
        date_str = soup.find('span', class_='post-date').string
2814
        day = string_to_date(date_str, "%d/%m/%Y")
2815
        return {
2816
            'short_url': short_url,
2817
            'num': num,
2818
            'img': [i['src'] for i in imgs],
2819
            'month': day.month,
2820
            'year': day.year,
2821
            'day': day.day,
2822
            'alt': alt,
2823
            'title': title,
2824
        }
2825
2826
2827
class MoonBeard(GenericNavigableComic):
2828
    """Class to retrieve MoonBeard comics."""
2829
    # Also on http://blog.squiresjam.es/moonbeard
2830
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
2831
    name = 'moonbeard'
2832
    long_name = 'Moon Beard'
2833
    url = 'http://moonbeard.com'
2834
    get_first_comic_link = get_a_navi_navifirst
2835
    get_navi_link = get_a_navi_navinext
2836
2837
    @classmethod
2838
    def get_comic_info(cls, soup, link):
2839
        """Get information about a particular comics."""
2840
        title = soup.find('h2', class_='post-title').string
2841
        short_url = soup.find('link', rel='shortlink')['href']
2842
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2843
        num = int(short_url_re.match(short_url).groups()[0])
2844
        imgs = soup.find('div', id='comic').find_all('img')
2845
        alt = imgs[0]['title']
2846
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2847
        date_str = soup.find('span', class_='post-date').string
2848
        day = string_to_date(date_str, "%B %d, %Y")
2849
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2850
        author = soup.find('span', class_='post-author').string
2851
        return {
2852
            'short_url': short_url,
2853
            'num': num,
2854
            'img': [i['src'] for i in imgs],
2855
            'month': day.month,
2856
            'year': day.year,
2857
            'day': day.day,
2858
            'title': title,
2859
            'tags': tags,
2860
            'alt': alt,
2861
            'author': author,
2862
        }
2863
2864
2865
class AHamADay(GenericNavigableComic):
2866
    """Class to retrieve class A Ham A Day comics."""
2867
    name = 'ham'
2868
    long_name = 'A Ham A Day'
2869 View Code Duplication
    url = 'http://www.ahammaday.com'
2870
    get_url_from_link = join_cls_url_to_href
2871
    get_first_comic_link = simulate_first_link
2872
    first_url = 'http://www.ahammaday.com/today/3/6/french'
2873
2874
    @classmethod
2875
    def get_navi_link(cls, last_soup, next_):
2876
        """Get link to next or previous comic."""
2877
        # prev is next / next is prev
2878
        return last_soup.find('li', class_='previous' if next_ else 'next').find('a')
2879
2880
    @classmethod
2881
    def get_comic_info(cls, soup, link):
2882
        """Get information about a particular comics."""
2883
        date_str = soup.find('time', class_='published')['datetime']
2884
        day = string_to_date(date_str, "%Y-%m-%d")
2885
        author = soup.find('span', class_='blog-author').find('a').string
2886
        title = soup.find('meta', property='og:title')['content']
2887
        imgs = soup.find_all('meta', itemprop='image')
2888
        return {
2889
            'img': [i['content'] for i in imgs],
2890
            'title': title,
2891
            'author': author,
2892
            'day': day.day,
2893
            'month': day.month,
2894
            'year': day.year,
2895
        }
2896
2897
2898
class LittleLifeLines(GenericNavigableComic):
2899
    """Class to retrieve Little Life Lines comics."""
2900
    # Also on https://little-life-lines.tumblr.com
2901
    name = 'life'
2902
    long_name = 'Little Life Lines'
2903
    url = 'http://www.littlelifelines.com'
2904
    get_url_from_link = join_cls_url_to_href
2905
    get_first_comic_link = simulate_first_link
2906
    first_url = 'http://www.littlelifelines.com/comics/well-done'
2907
2908
    @classmethod
2909
    def get_navi_link(cls, last_soup, next_):
2910
        """Get link to next or previous comic."""
2911
        # prev is next / next is prev
2912
        li = last_soup.find('li', class_='prev' if next_ else 'next')
2913
        return li.find('a') if li else None
2914
2915
    @classmethod
2916
    def get_comic_info(cls, soup, link):
2917
        """Get information about a particular comics."""
2918
        title = soup.find('meta', property='og:title')['content']
2919
        desc = soup.find('meta', property='og:description')['content']
2920
        date_str = soup.find('time', class_='published')['datetime']
2921
        day = string_to_date(date_str, "%Y-%m-%d")
2922
        author = soup.find('a', rel='author').string
2923
        div_content = soup.find('div', class_="body entry-content")
2924
        imgs = div_content.find_all('img')
2925
        imgs = [i for i in imgs if i.get('src') is not None]
2926
        alt = imgs[0]['alt']
2927
        return {
2928
            'title': title,
2929
            'alt': alt,
2930
            'description': desc,
2931
            'author': author,
2932
            'day': day.day,
2933
            'month': day.month,
2934
            'year': day.year,
2935
            'img': [i['src'] for i in imgs],
2936
        }
2937
2938
2939
class GenericWordPressInkblot(GenericNavigableComic):
2940
    """Generic class to retrieve comics using WordPress with Inkblot."""
2941
    get_navi_link = get_link_rel_next
2942
2943
    @classmethod
2944
    def get_first_comic_link(cls):
2945
        """Get link to first comics."""
2946
        return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')
2947
2948
    @classmethod
2949
    def get_comic_info(cls, soup, link):
2950
        """Get information about a particular comics."""
2951
        title = soup.find('meta', property='og:title')['content']
2952
        imgs = soup.find('div', class_='webcomic-image').find_all('img')
2953
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2954
        day = string_to_date(date_str, "%Y-%m-%d")
2955
        return {
2956
            'title': title,
2957
            'day': day.day,
2958
            'month': day.month,
2959
            'year': day.year,
2960
            'img': [i['src'] for i in imgs],
2961
        }
2962
2963
2964
class EverythingsStupid(GenericWordPressInkblot):
2965
    """Class to retrieve Everything's stupid Comics."""
2966
    # Also on http://tapastic.com/series/EverythingsStupid
2967
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
2968
    # Also on http://everythingsstupidcomics.tumblr.com
2969
    name = 'stupid'
2970
    long_name = "Everything's Stupid"
2971
    url = 'http://everythingsstupid.net'
2972
2973
2974
class TheIsmComics(GenericWordPressInkblot):
2975
    """Class to retrieve The Ism Comics."""
2976
    # Also on https://tapastic.com/series/TheIsm (?)
2977
    name = 'theism'
2978
    long_name = "The Ism"
2979
    url = 'http://www.theism-comics.com'
2980
2981
2982
class WoodenPlankStudios(GenericWordPressInkblot):
2983
    """Class to retrieve Wooden Plank Studios comics."""
2984
    name = 'woodenplank'
2985
    long_name = 'Wooden Plank Studios'
2986
    url = 'http://woodenplankstudios.com'
2987
2988
2989
class ElectricBunnyComic(GenericNavigableComic):
2990
    """Class to retrieve Electric Bunny Comics."""
2991
    # Also on http://electricbunnycomics.tumblr.com
2992
    name = 'bunny'
2993
    long_name = 'Electric Bunny Comic'
2994
    url = 'http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell'
2995
    get_url_from_link = join_cls_url_to_href
2996
2997
    @classmethod
2998
    def get_first_comic_link(cls):
2999
        """Get link to first comics."""
3000
        return get_soup_at_url(cls.url).find('img', alt='First').parent
3001
3002
    @classmethod
3003
    def get_navi_link(cls, last_soup, next_):
3004
        """Get link to next or previous comic."""
3005
        img = last_soup.find('img', alt='Next' if next_ else 'Back')
3006
        return img.parent if img else None
3007
3008
    @classmethod
3009
    def get_comic_info(cls, soup, link):
3010
        """Get information about a particular comics."""
3011
        title = soup.find('meta', property='og:title')['content']
3012
        imgs = soup.find_all('meta', property='og:image')
3013
        return {
3014
            'title': title,
3015
            'img': [i['content'] for i in imgs],
3016
        }
3017
3018
3019
class SheldonComics(GenericNavigableComic):
3020
    """Class to retrieve Sheldon comics."""
3021
    # Also on http://www.gocomics.com/sheldon
3022
    name = 'sheldon'
3023
    long_name = 'Sheldon Comics'
3024
    url = 'http://www.sheldoncomics.com'
3025
3026
    @classmethod
3027
    def get_first_comic_link(cls):
3028
        """Get link to first comics."""
3029
        return get_soup_at_url(cls.url).find("a", id="nav-first")
3030
3031
    @classmethod
3032
    def get_navi_link(cls, last_soup, next_):
3033
        """Get link to next or previous comic."""
3034
        for link in last_soup.find_all("a", id="nav-next" if next_ else "nav-prev"):
3035
            if link['href'] != 'http://www.sheldoncomics.com':
3036
                return link
3037
        return None
3038
3039
    @classmethod
3040
    def get_comic_info(cls, soup, link):
3041
        """Get information about a particular comics."""
3042
        imgs = soup.find("div", id="comic-foot").find_all("img")
3043
        assert all(i['alt'] == i['title'] for i in imgs)
3044
        assert len(imgs) == 1
3045
        title = imgs[0]['title']
3046
        return {
3047
            'title': title,
3048
            'img': [i['src'] for i in imgs],
3049
        }
3050
3051
3052
class Ubertool(GenericNavigableComic):
3053
    """Class to retrieve Ubertool comics."""
3054
    # Also on http://ubertool.tumblr.com
3055
    # Also on https://tapastic.com/series/ubertool
3056
    name = 'ubertool'
3057
    long_name = 'Ubertool'
3058
    url = 'http://ubertoolcomic.com'
3059
    _categories = ('UBERTOOL', )
3060
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
3061
    get_navi_link = get_a_comicnavbase_comicnavnext
3062
3063
    @classmethod
3064
    def get_comic_info(cls, soup, link):
3065
        """Get information about a particular comics."""
3066
        title = soup.find('h2', class_='post-title').string
3067
        date_str = soup.find('span', class_='post-date').string
3068
        day = string_to_date(date_str, "%B %d, %Y")
3069
        imgs = soup.find('div', id='comic').find_all('img')
3070
        return {
3071
            'img': [i['src'] for i in imgs],
3072
            'title': title,
3073
            'month': day.month,
3074
            'year': day.year,
3075
            'day': day.day,
3076
        }
3077
3078
3079
class CubeDrone(GenericNavigableComic):
3080
    """Class to retrieve Cube Drone comics."""
3081
    name = 'cubedrone'
3082
    long_name = 'Cube Drone'
3083
    url = 'http://cube-drone.com/comics'
3084
    get_url_from_link = join_cls_url_to_href
3085
3086
    @classmethod
3087
    def get_first_comic_link(cls):
3088
        """Get link to first comics."""
3089
        return get_soup_at_url(cls.url).find('span', class_='glyphicon glyphicon-backward').parent
3090
3091
    @classmethod
3092
    def get_navi_link(cls, last_soup, next_):
3093
        """Get link to next or previous comic."""
3094
        class_ = 'glyphicon glyphicon-chevron-' + ('right' if next_ else 'left')
3095
        return last_soup.find('span', class_=class_).parent
3096
3097
    @classmethod
3098
    def get_comic_info(cls, soup, link):
3099
        """Get information about a particular comics."""
3100
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
3101
        url2 = soup.find('meta', attrs={'name': 'twitter:url'})['content']
3102
        # date_str = soup.find('h2', class_='comic_title').find('small').string
3103
        # day = string_to_date(date_str, "%B %d, %Y, %I:%M %p")
3104
        imgs = soup.find_all('img', class_='comic img-responsive')
3105
        title2 = imgs[0]['title']
3106
        alt = imgs[0]['alt']
3107
        return {
3108
            'url2': url2,
3109
            'title': title,
3110
            'title2': title2,
3111
            'alt': alt,
3112
            'img': [i['src'] for i in imgs],
3113
        }
3114
3115
3116
class MakeItStoopid(GenericNavigableComic):
3117
    """Class to retrieve Make It Stoopid Comics."""
3118
    name = 'stoopid'
3119
    long_name = 'Make it stoopid'
3120
    url = 'http://makeitstoopid.com/comic.php'
3121
3122
    @classmethod
3123
    def get_nav(cls, soup):
3124 View Code Duplication
        """Get the navigation elements from soup object."""
3125
        cnav = soup.find_all(class_='cnav')
3126
        nav1, nav2 = cnav[:5], cnav[5:]
3127
        assert nav1 == nav2
3128
        # begin, prev, archive, next_, end = nav1
3129
        return [None if i.get('href') is None else i for i in nav1]
3130
3131
    @classmethod
3132
    def get_first_comic_link(cls):
3133
        """Get link to first comics."""
3134
        return cls.get_nav(get_soup_at_url(cls.url))[0]
3135
3136
    @classmethod
3137
    def get_navi_link(cls, last_soup, next_):
3138
        """Get link to next or previous comic."""
3139
        return cls.get_nav(last_soup)[3 if next_ else 1]
3140
3141
    @classmethod
3142
    def get_comic_info(cls, soup, link):
3143
        """Get information about a particular comics."""
3144
        title = link['title']
3145
        imgs = soup.find_all('img', id='comicimg')
3146
        return {
3147
            'title': title,
3148
            'img': [i['src'] for i in imgs],
3149
        }
3150
3151
3152
class TuMourrasMoinsBete(GenericNavigableComic):
3153
    """Class to retrieve Tu Mourras Moins Bete comics."""
3154
    name = 'mourrasmoinsbete'
3155
    long_name = 'Tu Mourras Moins Bete'
3156
    url = 'http://tumourrasmoinsbete.blogspot.fr'
3157
    _categories = ('FRANCAIS', )
3158
    get_first_comic_link = simulate_first_link
3159
    first_url = 'http://tumourrasmoinsbete.blogspot.fr/2008/06/essai.html'
3160
3161
    @classmethod
3162
    def get_navi_link(cls, last_soup, next_):
3163
        """Get link to next or previous comic."""
3164
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3165
3166
    @classmethod
3167
    def get_comic_info(cls, soup, link):
3168
        """Get information about a particular comics."""
3169
        title = soup.find('title').string
3170
        imgs = soup.find('div', itemprop='description articleBody').find_all('img')
3171
        author = soup.find('span', itemprop='author').string
3172
        return {
3173
            'img': [i['src'] for i in imgs],
3174
            'author': author,
3175
            'title': title,
3176
        }
3177
3178
3179
class GeekAndPoke(GenericNavigableComic):
3180
    """Class to retrieve Geek And Poke comics."""
3181
    name = 'geek'
3182
    long_name = 'Geek And Poke'
3183
    url = 'http://geek-and-poke.com'
3184
    get_url_from_link = join_cls_url_to_href
3185
    get_first_comic_link = simulate_first_link
3186
    first_url = 'http://geek-and-poke.com/geekandpoke/2006/8/27/a-new-place-for-a-not-so-old-blog.html'
3187
3188
    @classmethod
3189
    def get_navi_link(cls, last_soup, next_):
3190
        """Get link to next or previous comic."""
3191 View Code Duplication
        return last_soup.find('a', class_='prev-item' if next_ else 'next-item')
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
3192
3193
    @classmethod
3194
    def get_comic_info(cls, soup, link):
3195
        """Get information about a particular comics."""
3196
        title = soup.find('meta', property='og:title')['content']
3197
        desc = soup.find('meta', property='og:description')['content']
3198
        date_str = soup.find('time', class_='published')['datetime']
3199
        day = string_to_date(date_str, "%Y-%m-%d")
3200
        author = soup.find('a', rel='author').string
3201
        div_content = (soup.find('div', class_="body entry-content") or
3202
                       soup.find('div', class_="special-content"))
3203
        imgs = div_content.find_all('img')
3204
        imgs = [i for i in imgs if i.get('src') is not None]
3205
        assert all('title' not in i or i['alt'] == i['title'] for i in imgs)
3206
        alt = imgs[0].get('alt', "") if imgs else []
3207
        return {
3208
            'title': title,
3209
            'alt': alt,
3210
            'description': desc,
3211
            'author': author,
3212
            'day': day.day,
3213
            'month': day.month,
3214
            'year': day.year,
3215
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3216
        }
3217
3218
3219
class GloryOwlComix(GenericNavigableComic):
3220
    """Class to retrieve Glory Owl comics."""
3221
    name = 'gloryowl'
3222
    long_name = 'Glory Owl'
3223
    url = 'http://gloryowlcomix.blogspot.fr'
3224
    _categories = ('NSFW', 'FRANCAIS')
3225
    get_first_comic_link = simulate_first_link
3226
    first_url = 'http://gloryowlcomix.blogspot.fr/2013/02/1_7.html'
3227
3228
    @classmethod
3229
    def get_navi_link(cls, last_soup, next_):
3230
        """Get link to next or previous comic."""
3231
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3232
3233
    @classmethod
3234
    def get_comic_info(cls, soup, link):
3235
        """Get information about a particular comics."""
3236
        title = soup.find('title').string
3237
        imgs = soup.find_all('link', rel='image_src')
3238
        author = soup.find('a', rel='author').string
3239
        return {
3240
            'img': [i['href'] for i in imgs],
3241
            'author': author,
3242
            'title': title,
3243
        }
3244
3245
3246
class GenericTumblrV1(GenericComic):
3247
    """Generic class to retrieve comics from Tumblr using the V1 API."""
3248
    _categories = ('TUMBLR', )
3249
3250
    @classmethod
3251
    def get_next_comic(cls, last_comic):
3252
        """Generic implementation of get_next_comic for Tumblr comics."""
3253
        for p in cls.get_posts(last_comic):
3254
            comic = cls.get_comic_info(p)
3255
            if comic is not None:
3256
                yield comic
3257
3258
    @classmethod
3259
    def get_url_from_post(cls, post):
3260
        return post['url']
3261
3262
    @classmethod
3263
    def get_api_url(cls):
3264
        return urljoin_wrapper(cls.url, '/api/read/')
3265
3266
    @classmethod
3267
    def get_comic_info(cls, post):
3268
        """Get information about a particular comics."""
3269
        type_ = post['type']
3270
        if type_ != 'photo':
3271
            return None
3272
        tumblr_id = int(post['id'])
3273
        api_url = cls.get_api_url() + '?id=%d' % (tumblr_id)
3274
        day = datetime.datetime.fromtimestamp(int(post['unix-timestamp'])).date()
3275
        caption = post.find('photo-caption')
3276
        title = caption.string if caption else ""
3277
        tags = ' '.join(t.string for t in post.find_all('tag'))
3278
        # Photos may appear in 'photo' tags and/or straight in the post
3279
        photo_tags = post.find_all('photo')
3280
        if not photo_tags:
3281
            photo_tags = [post]
3282
        # Images are in multiple resolutions - taking the first one
3283
        imgs = [photo.find('photo-url') for photo in photo_tags]
3284
        return {
3285
            'url': cls.get_url_from_post(post),
3286
            'url2': post['url-with-slug'],
3287
            'day': day.day,
3288
            'month': day.month,
3289
            'year': day.year,
3290
            'title': title,
3291
            'tags': tags,
3292
            'img': [i.string for i in imgs],
3293
            'tumblr-id': tumblr_id,
3294
            'api_url': api_url,
3295
        }
3296
3297
    @classmethod
3298
    def get_posts(cls, last_comic, nb_post_per_call=10):
3299
        """Get posts using API. nb_post_per_call is max 50.
3300
3301
        Posts are retrieved from newer to older as per the tumblr v1 api
3302
        but are returned in chronological order."""
3303
        waiting_for_url = last_comic['url'] if last_comic else None
3304
        posts_acc = []
3305
        if last_comic is not None:
3306
            # Sometimes, tumblr posts are deleted. When previous post is deleted, we
3307
            # might end up spending a lot of time looking for something that
3308
            # doesn't exist. Failing early and clearly might be a better option.
3309
            last_api_url = last_comic['api_url']
3310
            try:
3311
                get_soup_at_url(last_api_url)
3312
            except urllib.error.HTTPError:
3313
                try:
3314
                    get_soup_at_url(cls.url)
3315
                except urllib.error.HTTPError:
3316
                    print("Did not find previous post nor main url %s" % cls.url)
3317
                else:
3318
                    print("Did not find previous post %s : it might have been deleted" % last_api_url)
3319
                return reversed(posts_acc)
3320
        api_url = cls.get_api_url()
3321
        posts = get_soup_at_url(api_url).find('posts')
3322
        start, total = int(posts['start']), int(posts['total'])
3323
        assert start == 0
3324
        for starting_num in range(0, total, nb_post_per_call):
3325
            api_url2 = api_url + '?start=%d&num=%d' % (starting_num, nb_post_per_call)
3326
            posts2 = get_soup_at_url(api_url2).find('posts')
3327
            start2, total2 = int(posts2['start']), int(posts2['total'])
3328
            assert starting_num == start2, "%d != %d" % (starting_num, start2)
3329
            # This may happen and should be handled in the future
3330
            assert total == total2, "%d != %d" % (total, total2)
3331
            for p in posts2.find_all('post'):
3332
                if waiting_for_url and waiting_for_url == cls.get_url_from_post(p):
3333
                    return reversed(posts_acc)
3334
                posts_acc.append(p)
3335
        if waiting_for_url is None:
3336
            return reversed(posts_acc)
3337
        print("Did not find %s : there might be a problem" % waiting_for_url)
3338
        return []
3339
3340
3341
class GenericTumblrV1Empty(GenericEmptyComic, GenericTumblrV1):
3342
    """Generic class for Tumblr comics with a problem."""
3343
    pass
3344
3345
3346
class SaturdayMorningBreakfastCerealTumblr(GenericTumblrV1Empty):
3347
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
3348
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
3349
    # Also on http://www.smbc-comics.com
3350
    name = 'smbc-tumblr'
3351
    long_name = 'Saturday Morning Breakfast Cereal (from Tumblr)'
3352
    url = 'http://smbc-comics.tumblr.com'
3353
    _categories = ('SMBC', )
3354
3355
3356
class IrwinCardozo(GenericTumblrV1):
3357
    """Class to retrieve Irwin Cardozo Comics."""
3358
    name = 'irwinc'
3359
    long_name = 'Irwin Cardozo'
3360
    url = 'http://irwincardozocomics.tumblr.com'
3361
3362
3363
class AccordingToDevin(GenericTumblrV1):
3364
    """Class to retrieve According To Devin comics."""
3365
    name = 'devin'
3366
    long_name = 'According To Devin'
3367
    url = 'http://accordingtodevin.tumblr.com'
3368
3369
3370
class ItsTheTieTumblr(GenericTumblrV1):
3371
    """Class to retrieve It's the tie comics."""
3372
    # Also on http://itsthetie.com
3373
    # Also on https://tapastic.com/series/itsthetie
3374
    name = 'tie-tumblr'
3375
    long_name = "It's the tie (from Tumblr)"
3376
    url = "http://itsthetie.tumblr.com"
3377
    _categories = ('TIE', )
3378
3379
3380
class OctopunsTumblr(GenericTumblrV1):
3381
    """Class to retrieve Octopuns comics."""
3382
    # Also on http://www.octopuns.net
3383
    name = 'octopuns-tumblr'
3384
    long_name = 'Octopuns (from Tumblr)'
3385
    url = 'http://octopuns.tumblr.com'
3386
3387
3388
class PicturesInBoxesTumblr(GenericTumblrV1):
3389
    """Class to retrieve Pictures In Boxes comics."""
3390
    # Also on http://www.picturesinboxes.com
3391
    name = 'picturesinboxes-tumblr'
3392
    long_name = 'Pictures in Boxes (from Tumblr)'
3393
    url = 'http://picturesinboxescomic.tumblr.com'
3394
3395
3396
class TubeyToonsTumblr(GenericTumblrV1):
3397
    """Class to retrieve TubeyToons comics."""
3398
    # Also on http://tapastic.com/series/Tubey-Toons
3399
    # Also on http://tubeytoons.com
3400
    name = 'tubeytoons-tumblr'
3401
    long_name = 'Tubey Toons (from Tumblr)'
3402
    url = 'http://tubeytoons.tumblr.com'
3403
    _categories = ('TUNEYTOONS', )
3404
3405
3406
class UnearthedComicsTumblr(GenericTumblrV1):
3407
    """Class to retrieve Unearthed comics."""
3408
    # Also on http://tapastic.com/series/UnearthedComics
3409
    # Also on http://unearthedcomics.com
3410
    name = 'unearthed-tumblr'
3411
    long_name = 'Unearthed Comics (from Tumblr)'
3412
    url = 'http://unearthedcomics.tumblr.com'
3413
    _categories = ('UNEARTHED', )
3414
3415
3416
class PieComic(GenericTumblrV1):
3417
    """Class to retrieve Pie Comic comics."""
3418
    name = 'pie'
3419
    long_name = 'Pie Comic'
3420
    url = "http://piecomic.tumblr.com"
3421
3422
3423
class MrEthanDiamond(GenericTumblrV1):
3424
    """Class to retrieve Mr Ethan Diamond comics."""
3425
    name = 'diamond'
3426
    long_name = 'Mr Ethan Diamond'
3427
    url = 'http://mrethandiamond.tumblr.com'
3428
3429
3430
class Flocci(GenericTumblrV1):
3431
    """Class to retrieve floccinaucinihilipilification comics."""
3432
    name = 'flocci'
3433
    long_name = 'floccinaucinihilipilification'
3434
    url = "http://floccinaucinihilipilificationa.tumblr.com"
3435
3436
3437
class UpAndOut(GenericTumblrV1):
3438
    """Class to retrieve Up & Out comics."""
3439
    # Also on http://tapastic.com/series/UP-and-OUT
3440
    name = 'upandout'
3441
    long_name = 'Up And Out (from Tumblr)'
3442
    url = 'http://upandoutcomic.tumblr.com'
3443
3444
3445
class Pundemonium(GenericTumblrV1):
3446
    """Class to retrieve Pundemonium comics."""
3447
    name = 'pundemonium'
3448
    long_name = 'Pundemonium'
3449
    url = 'http://monstika.tumblr.com'
3450
3451
3452
class PoorlyDrawnLinesTumblr(GenericTumblrV1Empty):
3453
    """Class to retrieve Poorly Drawn Lines comics."""
3454
    # Also on http://poorlydrawnlines.com
3455
    name = 'poorlydrawn-tumblr'
3456
    long_name = 'Poorly Drawn Lines (from Tumblr)'
3457
    url = 'http://pdlcomics.tumblr.com'
3458
    _categories = ('POORLYDRAWN', )
3459
3460
3461
class PearShapedComics(GenericTumblrV1):
3462
    """Class to retrieve Pear Shaped Comics."""
3463
    name = 'pearshaped'
3464
    long_name = 'Pear-Shaped Comics'
3465
    url = 'http://pearshapedcomics.com'
3466
3467
3468
class PondScumComics(GenericTumblrV1):
3469
    """Class to retrieve Pond Scum Comics."""
3470
    name = 'pond'
3471
    long_name = 'Pond Scum'
3472
    url = 'http://pondscumcomic.tumblr.com'
3473
3474
3475
class MercworksTumblr(GenericTumblrV1):
3476
    """Class to retrieve Mercworks comics."""
3477
    # Also on http://mercworks.net
3478
    name = 'mercworks-tumblr'
3479
    long_name = 'Mercworks (from Tumblr)'
3480
    url = 'http://mercworks.tumblr.com'
3481
3482
3483
class OwlTurdTumblr(GenericTumblrV1Empty):
3484
    """Class to retrieve Owl Turd comics."""
3485
    # Also on http://tapastic.com/series/Owl-Turd-Comix
3486
    name = 'owlturd-tumblr'
3487
    long_name = 'Owl Turd (from Tumblr)'
3488
    url = 'http://owlturd.com'
3489
    _categories = ('OWLTURD', )
3490
3491
3492
class VectorBelly(GenericTumblrV1):
3493
    """Class to retrieve Vector Belly comics."""
3494
    # Also on http://vectorbelly.com
3495
    name = 'vector'
3496
    long_name = 'Vector Belly'
3497
    url = 'http://vectorbelly.tumblr.com'
3498
3499
3500
class GoneIntoRapture(GenericTumblrV1):
3501
    """Class to retrieve Gone Into Rapture comics."""
3502
    # Also on http://goneintorapture.tumblr.com
3503
    # Also on http://tapastic.com/series/Goneintorapture
3504
    name = 'rapture'
3505
    long_name = 'Gone Into Rapture'
3506
    url = 'http://www.goneintorapture.com'
3507
3508
3509
class TheOatmealTumblr(GenericTumblrV1):
3510
    """Class to retrieve The Oatmeal comics."""
3511
    # Also on http://theoatmeal.com
3512
    name = 'oatmeal-tumblr'
3513
    long_name = 'The Oatmeal (from Tumblr)'
3514
    url = 'http://oatmeal.tumblr.com'
3515
3516
3517
class HeckIfIKnowComicsTumblr(GenericTumblrV1):
3518
    """Class to retrieve Heck If I Know Comics."""
3519
    # Also on http://tapastic.com/series/Regular
3520
    name = 'heck-tumblr'
3521
    long_name = 'Heck if I Know comics (from Tumblr)'
3522
    url = 'http://heckifiknowcomics.com'
3523
3524
3525
class MyJetPack(GenericTumblrV1):
3526
    """Class to retrieve My Jet Pack comics."""
3527
    name = 'jetpack'
3528
    long_name = 'My Jet Pack'
3529
    url = 'http://myjetpack.tumblr.com'
3530
3531
3532
class CheerUpEmoKidTumblr(GenericTumblrV1):
3533
    """Class to retrieve CheerUpEmoKid comics."""
3534
    # Also on http://www.cheerupemokid.com
3535
    # Also on http://tapastic.com/series/CUEK
3536
    name = 'cuek-tumblr'
3537
    long_name = 'Cheer Up Emo Kid (from Tumblr)'
3538
    url = 'http://enzocomics.tumblr.com'
3539
3540
3541
class ForLackOfABetterComic(GenericTumblrV1Empty):
3542
    """Class to retrieve For Lack Of A Better Comics."""
3543
    # Also on http://forlackofabettercomic.com
3544
    name = 'lack'
3545
    long_name = 'For Lack Of A Better Comic'
3546
    url = 'http://forlackofabettercomic.tumblr.com'
3547
3548
3549
class ZenPencilsTumblr(GenericTumblrV1):
3550
    """Class to retrieve ZenPencils comics."""
3551
    # Also on http://zenpencils.com
3552
    # Also on http://www.gocomics.com/zen-pencils
3553
    name = 'zenpencils-tumblr'
3554
    long_name = 'Zen Pencils (from Tumblr)'
3555
    url = 'http://zenpencils.tumblr.com'
3556
    _categories = ('ZENPENCILS', )
3557
3558
3559
class ThreeWordPhraseTumblr(GenericTumblrV1):
3560
    """Class to retrieve Three Word Phrase comics."""
3561
    # Also on http://threewordphrase.com
3562
    name = 'threeword-tumblr'
3563
    long_name = 'Three Word Phrase (from Tumblr)'
3564
    url = 'http://www.threewordphrase.tumblr.com'
3565
3566
3567
class TimeTrabbleTumblr(GenericTumblrV1):
3568
    """Class to retrieve Time Trabble comics."""
3569
    # Also on http://timetrabble.com
3570
    name = 'timetrabble-tumblr'
3571
    long_name = 'Time Trabble (from Tumblr)'
3572
    url = 'http://timetrabble.tumblr.com'
3573
3574
3575
class SafelyEndangeredTumblr(GenericTumblrV1):
3576
    """Class to retrieve Safely Endangered comics."""
3577
    # Also on http://www.safelyendangered.com
3578
    name = 'endangered-tumblr'
3579
    long_name = 'Safely Endangered (from Tumblr)'
3580
    url = 'http://tumblr.safelyendangered.com'
3581
3582
3583
class MouseBearComedyTumblr(GenericTumblrV1):
3584
    """Class to retrieve Mouse Bear Comedy comics."""
3585
    # Also on http://www.mousebearcomedy.com
3586
    name = 'mousebear-tumblr'
3587
    long_name = 'Mouse Bear Comedy (from Tumblr)'
3588
    url = 'http://mousebearcomedy.tumblr.com'
3589
3590
3591
class BouletCorpTumblr(GenericTumblrV1):
3592
    """Class to retrieve BouletCorp comics."""
3593
    # Also on http://www.bouletcorp.com
3594
    name = 'boulet-tumblr'
3595
    long_name = 'Boulet Corp (from Tumblr)'
3596
    url = 'http://bouletcorp.tumblr.com'
3597
    _categories = ('BOULET', )
3598
3599
3600
class TheAwkwardYetiTumblr(GenericTumblrV1Empty):
3601
    """Class to retrieve The Awkward Yeti comics."""
3602
    # Also on http://www.gocomics.com/the-awkward-yeti
3603
    # Also on http://theawkwardyeti.com
3604
    # Also on https://tapastic.com/series/TheAwkwardYeti
3605
    name = 'yeti-tumblr'
3606
    long_name = 'The Awkward Yeti (from Tumblr)'
3607
    url = 'http://larstheyeti.tumblr.com'
3608
    _categories = ('YETI', )
3609
3610
3611
class NellucNhoj(GenericTumblrV1):
3612
    """Class to retrieve NellucNhoj comics."""
3613
    name = 'nhoj'
3614
    long_name = 'Nelluc Nhoj'
3615
    url = 'http://nellucnhoj.com'
3616
3617
3618
class DownTheUpwardSpiralTumblr(GenericTumblrV1):
3619
    """Class to retrieve Down The Upward Spiral comics."""
3620
    # Also on http://www.downtheupwardspiral.com
3621
    name = 'spiral-tumblr'
3622
    long_name = 'Down the Upward Spiral (from Tumblr)'
3623
    url = 'http://downtheupwardspiral.tumblr.com'
3624
3625
3626
class AsPerUsualTumblr(GenericTumblrV1Empty):
3627
    """Class to retrieve As Per Usual comics."""
3628
    # Also on https://tapastic.com/series/AsPerUsual
3629
    name = 'usual-tumblr'
3630
    long_name = 'As Per Usual (from Tumblr)'
3631
    url = 'http://as-per-usual.tumblr.com'
3632
    categories = ('DAMILEE', )
3633
3634
3635
class HotComicsForCoolPeopleTumblr(GenericTumblrV1):
3636
    """Class to retrieve Hot Comics For Cool People."""
3637
    # Also on https://tapastic.com/series/Hot-Comics-For-Cool-People
3638
    # Also on http://hotcomics.biz (links to tumblr)
3639
    # Also on http://hcfcp.com (links to tumblr)
3640
    name = 'hotcomics-tumblr'
3641
    long_name = 'Hot Comics For Cool People (from Tumblr)'
3642
    url = 'http://hotcomicsforcoolpeople.tumblr.com'
3643
    categories = ('DAMILEE', )
3644
3645
3646
class OneOneOneOneComicTumblr(GenericTumblrV1):
3647
    """Class to retrieve 1111 Comics."""
3648
    # Also on http://www.1111comics.me
3649
    # Also on https://tapastic.com/series/1111-Comics
3650
    name = '1111-tumblr'
3651
    long_name = '1111 Comics (from Tumblr)'
3652
    url = 'http://comics1111.tumblr.com'
3653
    _categories = ('ONEONEONEONE', )
3654
3655
3656
class JhallComicsTumblr(GenericTumblrV1):
3657
    """Class to retrieve Jhall Comics."""
3658
    # Also on http://jhallcomics.com
3659
    name = 'jhall-tumblr'
3660
    long_name = 'Jhall Comics (from Tumblr)'
3661
    url = 'http://jhallcomics.tumblr.com'
3662
3663
3664
class BerkeleyMewsTumblr(GenericTumblrV1):
3665
    """Class to retrieve Berkeley Mews comics."""
3666
    # Also on http://www.gocomics.com/berkeley-mews
3667
    # Also on http://www.berkeleymews.com
3668
    name = 'berkeley-tumblr'
3669
    long_name = 'Berkeley Mews (from Tumblr)'
3670
    url = 'http://mews.tumblr.com'
3671
    _categories = ('BERKELEY', )
3672
3673
3674
class JoanCornellaTumblr(GenericTumblrV1):
3675
    """Class to retrieve Joan Cornella comics."""
3676
    # Also on http://joancornella.net
3677
    name = 'cornella-tumblr'
3678
    long_name = 'Joan Cornella (from Tumblr)'
3679
    url = 'http://cornellajoan.tumblr.com'
3680
3681
3682
class RespawnComicTumblr(GenericTumblrV1):
3683
    """Class to retrieve Respawn Comic."""
3684
    # Also on http://respawncomic.com
3685
    name = 'respawn-tumblr'
3686
    long_name = 'Respawn Comic (from Tumblr)'
3687
    url = 'http://respawncomic.tumblr.com'
3688
3689
3690
class ChrisHallbeckTumblr(GenericTumblrV1Empty):
3691
    """Class to retrieve Chris Hallbeck comics."""
3692
    # Also on https://tapastic.com/ChrisHallbeck
3693
    # Also on http://maximumble.com
3694
    # Also on http://minimumble.com
3695
    # Also on http://thebookofbiff.com
3696
    name = 'hallbeck-tumblr'
3697
    long_name = 'Chris Hallback (from Tumblr)'
3698
    url = 'http://chrishallbeck.tumblr.com'
3699
    _categories = ('HALLBACK', )
3700
3701
3702
class ComicNuggets(GenericTumblrV1):
3703
    """Class to retrieve Comic Nuggets."""
3704
    name = 'nuggets'
3705
    long_name = 'Comic Nuggets'
3706
    url = 'http://comicnuggets.com'
3707
3708
3709
class PigeonGazetteTumblr(GenericTumblrV1):
3710
    """Class to retrieve The Pigeon Gazette comics."""
3711
    # Also on https://tapastic.com/series/The-Pigeon-Gazette
3712
    name = 'pigeon-tumblr'
3713
    long_name = 'The Pigeon Gazette (from Tumblr)'
3714
    url = 'http://thepigeongazette.tumblr.com'
3715
3716
3717
class CancerOwl(GenericTumblrV1):
3718
    """Class to retrieve Cancer Owl comics."""
3719
    # Also on http://cancerowl.com
3720
    name = 'cancerowl-tumblr'
3721
    long_name = 'Cancer Owl (from Tumblr)'
3722
    url = 'http://cancerowl.tumblr.com'
3723
3724
3725
class FowlLanguageTumblr(GenericTumblrV1):
3726
    """Class to retrieve Fowl Language comics."""
3727
    # Also on http://www.fowllanguagecomics.com
3728
    # Also on http://tapastic.com/series/Fowl-Language-Comics
3729
    # Also on http://www.gocomics.com/fowl-language
3730
    name = 'fowllanguage-tumblr'
3731
    long_name = 'Fowl Language Comics (from Tumblr)'
3732
    url = 'http://fowllanguagecomics.tumblr.com'
3733
    _categories = ('FOWLLANGUAGE', )
3734
3735
3736
class TheOdd1sOutTumblr(GenericTumblrV1):
3737
    """Class to retrieve The Odd 1s Out comics."""
3738
    # Also on http://theodd1sout.com
3739
    # Also on https://tapastic.com/series/Theodd1sout
3740
    name = 'theodd-tumblr'
3741
    long_name = 'The Odd 1s Out (from Tumblr)'
3742
    url = 'http://theodd1sout.tumblr.com'
3743
3744
3745
class TheUnderfoldTumblr(GenericTumblrV1):
3746
    """Class to retrieve The Underfold comics."""
3747
    # Also on http://theunderfold.com
3748
    name = 'underfold-tumblr'
3749
    long_name = 'The Underfold (from Tumblr)'
3750
    url = 'http://theunderfold.tumblr.com'
3751
3752
3753
class LolNeinTumblr(GenericTumblrV1Empty):
3754
    """Class to retrieve Lol Nein comics."""
3755
    # Also on http://lolnein.com
3756
    name = 'lolnein-tumblr'
3757
    long_name = 'Lol Nein (from Tumblr)'
3758
    url = 'http://lolneincom.tumblr.com'
3759
3760
3761
class FatAwesomeComicsTumblr(GenericTumblrV1):
3762
    """Class to retrieve Fat Awesome Comics."""
3763
    # Also on http://fatawesome.com/comics
3764
    name = 'fatawesome-tumblr'
3765
    long_name = 'Fat Awesome (from Tumblr)'
3766
    url = 'http://fatawesomecomedy.tumblr.com'
3767
3768
3769
class TheWorldIsFlatTumblr(GenericTumblrV1):
3770
    """Class to retrieve The World Is Flat Comics."""
3771
    # Also on https://tapastic.com/series/The-World-is-Flat
3772
    name = 'flatworld-tumblr'
3773
    long_name = 'The World Is Flat (from Tumblr)'
3774
    url = 'http://theworldisflatcomics.tumblr.com'
3775
3776
3777
class DorrisMc(GenericTumblrV1Empty):
3778
    """Class to retrieve Dorris Mc Comics"""
3779
    # Also on http://www.gocomics.com/dorris-mccomics
3780
    name = 'dorrismc'
3781
    long_name = 'Dorris Mc'
3782
    url = 'http://dorrismccomics.com'
3783
3784
3785
class LeleozTumblr(GenericTumblrV1Empty):
3786
    """Class to retrieve Leleoz comics."""
3787
    # Also on https://tapastic.com/series/Leleoz
3788
    name = 'leleoz-tumblr'
3789
    long_name = 'Leleoz (from Tumblr)'
3790
    url = 'http://leleozcomics.tumblr.com'
3791
3792
3793
class MoonBeardTumblr(GenericTumblrV1):
3794
    """Class to retrieve MoonBeard comics."""
3795
    # Also on http://moonbeard.com
3796
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
3797
    name = 'moonbeard-tumblr'
3798
    long_name = 'Moon Beard (from Tumblr)'
3799
    url = 'http://blog.squiresjam.es/moonbeard'
3800
3801
3802
class AComik(GenericTumblrV1):
3803
    """Class to retrieve A Comik"""
3804
    name = 'comik'
3805
    long_name = 'A Comik'
3806
    url = 'http://acomik.com'
3807
3808
3809
class ClassicRandy(GenericTumblrV1):
3810
    """Class to retrieve Classic Randy comics."""
3811
    name = 'randy'
3812
    long_name = 'Classic Randy'
3813
    url = 'http://classicrandy.tumblr.com'
3814
3815
3816
class DagssonTumblr(GenericTumblrV1):
3817
    """Class to retrieve Dagsson comics."""
3818
    # Also on http://www.dagsson.com
3819
    name = 'dagsson-tumblr'
3820
    long_name = 'Dagsson Hugleikur (from Tumblr)'
3821
    url = 'http://hugleikurdagsson.tumblr.com'
3822
3823
3824
class LinsEditionsTumblr(GenericTumblrV1):
3825
    """Class to retrieve L.I.N.S. Editions comics."""
3826
    # Also on https://linsedition.com
3827
    name = 'lins-tumblr'
3828
    long_name = 'L.I.N.S. Editions (from Tumblr)'
3829
    url = 'http://linscomics.tumblr.com'
3830
    _categories = ('LINS', )
3831
3832
3833
class OrigamiHotDish(GenericTumblrV1):
3834
    """Class to retrieve Origami Hot Dish comics."""
3835
    name = 'origamihotdish'
3836
    long_name = 'Origami Hot Dish'
3837
    url = 'http://origamihotdish.com'
3838
3839
3840
class HitAndMissComicsTumblr(GenericTumblrV1):
3841
    """Class to retrieve Hit and Miss Comics."""
3842
    name = 'hitandmiss'
3843
    long_name = 'Hit and Miss Comics'
3844
    url = 'http://hitandmisscomics.tumblr.com'
3845
3846
3847
class HMBlanc(GenericTumblrV1):
3848
    """Class to retrieve HM Blanc comics."""
3849
    name = 'hmblanc'
3850
    long_name = 'HM Blanc'
3851
    url = 'http://hmblanc.tumblr.com'
3852
3853
3854
class TalesOfAbsurdityTumblr(GenericTumblrV1):
3855
    """Class to retrieve Tales Of Absurdity comics."""
3856
    # Also on http://talesofabsurdity.com
3857
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
3858
    name = 'absurdity-tumblr'
3859
    long_name = 'Tales of Absurdity (from Tumblr)'
3860
    url = 'http://talesofabsurdity.tumblr.com'
3861
    _categories = ('ABSURDITY', )
3862
3863
3864
class RobbieAndBobby(GenericTumblrV1):
3865
    """Class to retrieve Robbie And Bobby comics."""
3866
    # Also on http://robbieandbobby.com
3867
    name = 'robbie-tumblr'
3868
    long_name = 'Robbie And Bobby (from Tumblr)'
3869
    url = 'http://robbieandbobby.tumblr.com'
3870
3871
3872
class ElectricBunnyComicTumblr(GenericTumblrV1):
3873
    """Class to retrieve Electric Bunny Comics."""
3874
    # Also on http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell
3875
    name = 'bunny-tumblr'
3876
    long_name = 'Electric Bunny Comic (from Tumblr)'
3877
    url = 'http://electricbunnycomics.tumblr.com'
3878
3879
3880
class Hoomph(GenericTumblrV1):
3881
    """Class to retrieve Hoomph comics."""
3882
    name = 'hoomph'
3883
    long_name = 'Hoomph'
3884
    url = 'http://hoom.ph'
3885
3886
3887
class BFGFSTumblr(GenericTumblrV1):
3888
    """Class to retrieve BFGFS comics."""
3889
    # Also on https://tapastic.com/series/BFGFS
3890
    # Also on http://bfgfs.com
3891
    name = 'bfgfs-tumblr'
3892
    long_name = 'BFGFS (from Tumblr)'
3893
    url = 'http://bfgfs.tumblr.com'
3894
3895
3896
class DoodleForFood(GenericTumblrV1Empty):
3897
    """Class to retrieve Doodle For Food comics."""
3898
    # Also on http://doodleforfood.com
3899
    name = 'doodle'
3900
    long_name = 'Doodle For Food'
3901
    url = 'http://doodleforfood.com'
3902
3903
3904
class CassandraCalinTumblr(GenericTumblrV1Empty):
3905
    """Class to retrieve C. Cassandra comics."""
3906
    # Also on http://cassandracalin.com
3907
    # Also on https://tapastic.com/series/C-Cassandra-comics
3908
    name = 'cassandra-tumblr'
3909
    long_name = 'Cassandra Calin (from Tumblr)'
3910
    url = 'http://c-cassandra.tumblr.com'
3911
3912
3913
class DougWasTaken(GenericTumblrV1):
3914
    """Class to retrieve Doug Was Taken comics."""
3915
    name = 'doog'
3916
    long_name = 'Doug Was Taken'
3917
    url = 'http://dougwastaken.tumblr.com'
3918
3919
3920
class MandatoryRollerCoaster(GenericTumblrV1Empty):
3921
    """Class to retrieve Mandatory Roller Coaster comics."""
3922
    name = 'rollercoaster'
3923
    long_name = 'Mandatory Roller Coaster'
3924
    url = 'http://mandatoryrollercoaster.com'
3925
3926
3927
class CEstPasEnRegardantSesPompes(GenericTumblrV1Empty):
3928
    """Class to retrieve C'Est Pas En Regardant Ses Pompes (...)  comics."""
3929
    name = 'cperspqccltt'
3930
    long_name = 'C Est Pas En Regardant Ses Pompes (...)'
3931
    url = 'http://cperspqccltt.tumblr.com'
3932
3933
3934
class TheGrohlTroll(GenericTumblrV1Empty):
3935
    """Class to retrieve The Grohl Troll comics."""
3936
    name = 'grohltroll'
3937
    long_name = 'The Grohl Troll'
3938
    url = 'http://thegrohltroll.com'
3939
3940
3941
class WebcomicName(GenericTumblrV1Empty):
3942
    """Class to retrieve Webcomic Name comics."""
3943
    name = 'webcomicname'
3944
    long_name = 'Webcomic Name'
3945
    url = 'http://webcomicname.com'
3946
3947
3948
class BooksOfAdam(GenericTumblrV1Empty):
3949
    """Class to retrieve Books of Adam comics."""
3950
    # Also on http://www.booksofadam.com
3951
    name = 'booksofadam'
3952
    long_name = 'Books of Adam'
3953
    url = 'http://booksofadam.tumblr.com'
3954
3955
3956
class HarkAVagrant(GenericTumblrV1Empty):
3957
    """Class to retrieve Hark A Vagrant comics."""
3958
    # Also on http://www.harkavagrant.com
3959
    name = 'hark-tumblr'
3960
    long_name = 'Hark A Vagrant (from Tumblr)'
3961
    url = 'http://beatonna.tumblr.com'
3962
3963
3964
class OurSuperAdventureTumblr(GenericTumblrV1Empty):
3965
    """Class to retrieve Our Super Adventure comics."""
3966
    # Also on https://tapastic.com/series/Our-Super-Adventure
3967
    # Also on http://www.oursuperadventure.com
3968
    # http://sarahgraley.com
3969
    name = 'superadventure-tumblr'
3970
    long_name = 'Our Super Adventure (from Tumblr)'
3971
    url = 'http://sarahssketchbook.tumblr.com'
3972
3973
3974
class JakeLikesOnions(GenericTumblrV1):
3975
    """Class to retrieve Jake Likes Onions comics."""
3976
    name = 'jake'
3977
    long_name = 'Jake Likes Onions'
3978
    url = 'http://jakelikesonions.com'
3979
3980
3981
class InYourFaceCake(GenericTumblrV1Empty):
3982
    """Class to retrieve In Your Face Cake comics."""
3983
    name = 'inyourfacecake-tumblr'
3984
    long_name = 'In Your Face Cake (from Tumblr)'
3985
    url = 'http://in-your-face-cake.tumblr.com'
3986
3987
3988
class Robospunk(GenericTumblrV1):
3989
    """Class to retrieve Robospunk comics."""
3990
    name = 'robospunk'
3991
    long_name = 'Robospunk'
3992
    url = 'http://robospunk.com'
3993
3994
3995
class BananaTwinky(GenericTumblrV1):
3996
    """Class to retrieve Banana Twinky comics."""
3997
    name = 'banana'
3998
    long_name = 'Banana Twinky'
3999
    url = 'http://bananatwinky.tumblr.com'
4000
4001
4002
class YesterdaysPopcornTumblr(GenericTumblrV1):
4003
    """Class to retrieve Yesterday's Popcorn comics."""
4004
    # Also on http://www.yesterdayspopcorn.com
4005
    # Also on https://tapastic.com/series/Yesterdays-Popcorn
4006
    name = 'popcorn-tumblr'
4007
    long_name = 'Yesterday\'s Popcorn (from Tumblr)'
4008
    url = 'http://yesterdayspopcorn.tumblr.com'
4009
4010
4011
class TwistedDoodles(GenericTumblrV1Empty):
4012
    """Class to retrieve Twisted Doodles comics."""
4013
    name = 'twisted'
4014
    long_name = 'Twisted Doodles'
4015
    url = 'http://www.twisteddoodles.com'
4016
4017
4018
class UbertoolTumblr(GenericTumblrV1Empty):
4019
    """Class to retrieve Ubertool comics."""
4020
    # Also on http://ubertoolcomic.com
4021
    # Also on https://tapastic.com/series/ubertool
4022
    name = 'ubertool-tumblr'
4023
    long_name = 'Ubertool (from Tumblr)'
4024
    url = 'http://ubertool.tumblr.com'
4025
    _categories = ('UBERTOOL', )
4026
4027
4028
class LittleLifeLinesTumblr(GenericTumblrV1):
4029
    """Class to retrieve Little Life Lines comics."""
4030
    # Also on http://www.littlelifelines.com
4031
    name = 'life-tumblr'
4032
    long_name = 'Little Life Lines (from Tumblr)'
4033
    url = 'https://little-life-lines.tumblr.com'
4034
4035
4036
class TheyCanTalk(GenericTumblrV1Empty):
4037
    """Class to retrieve They Can Talk comics."""
4038
    name = 'theycantalk'
4039
    long_name = 'They Can Talk'
4040
    url = 'http://theycantalk.com'
4041
4042
4043
class Will5NeverCome(GenericTumblrV1Empty):
4044
    """Class to retrieve Will 5:00 Never Come comics."""
4045
    name = 'will5'
4046
    long_name = 'Will 5:00 Never Come ?'
4047
    url = 'http://will5nevercome.com'
4048
4049
4050
class Sephko(GenericTumblrV1):
4051
    """Class to retrieve Sephko Comics."""
4052
    # Also on http://www.sephko.com
4053
    name = 'sephko'
4054
    long_name = 'Sephko'
4055
    url = 'http://sephko.tumblr.com'
4056
4057
4058
class BlazersAtDawn(GenericTumblrV1):
4059
    """Class to retrieve Blazers At Dawn Comics."""
4060
    name = 'blazers'
4061
    long_name = 'Blazers At Dawn'
4062
    url = 'http://blazersatdawn.tumblr.com'
4063
4064
4065
class ArtByMoga(GenericTumblrV1Empty):
4066
    """Class to retrieve Art By Moga Comics."""
4067
    name = 'moga'
4068
    long_name = 'Art By Moga'
4069
    url = 'http://artbymoga.tumblr.com'
4070
4071
4072
class HorovitzComics(GenericListableComic):
4073
    """Generic class to handle the logic common to the different comics from Horovitz."""
4074
    url = 'http://www.horovitzcomics.com'
4075
    _categories = ('HOROVITZ', )
4076
    img_re = re.compile('.*comics/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
4077
    link_re = NotImplemented
4078
    get_url_from_archive_element = join_cls_url_to_href
4079
4080
    @classmethod
4081
    def get_comic_info(cls, soup, link):
4082
        """Get information about a particular comics."""
4083
        href = link['href']
4084
        num = int(cls.link_re.match(href).groups()[0])
4085
        title = link.string
4086
        imgs = soup.find_all('img', id='comic')
4087
        assert len(imgs) == 1
4088
        year, month, day = [int(s)
4089
                            for s in cls.img_re.match(imgs[0]['src']).groups()]
4090
        return {
4091
            'title': title,
4092
            'day': day,
4093
            'month': month,
4094
            'year': year,
4095
            'img': [i['src'] for i in imgs],
4096
            'num': num,
4097
        }
4098
4099
    @classmethod
4100
    def get_archive_elements(cls):
4101
        archive_url = 'http://www.horovitzcomics.com/comics/archive/'
4102
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.link_re))
4103
4104
4105
class HorovitzNew(HorovitzComics):
4106
    """Class to retrieve Horovitz new comics."""
4107
    name = 'horovitznew'
4108
    long_name = 'Horovitz New'
4109
    link_re = re.compile('^/comics/new/([0-9]+)$')
4110
4111
4112
class HorovitzClassic(HorovitzComics):
4113
    """Class to retrieve Horovitz classic comics."""
4114
    name = 'horovitzclassic'
4115
    long_name = 'Horovitz Classic'
4116
    link_re = re.compile('^/comics/classic/([0-9]+)$')
4117
4118
4119
class GenericGoComic(GenericNavigableComic):
4120
    """Generic class to handle the logic common to comics from gocomics.com."""
4121
    _categories = ('GOCOMIC', )
4122
    url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)$')
4123
4124
    @classmethod
4125
    def get_first_comic_link(cls):
4126
        """Get link to first comics."""
4127
        return get_soup_at_url(cls.url).find('a', class_='beginning')
4128
4129
    @classmethod
4130
    def get_navi_link(cls, last_soup, next_):
4131
        """Get link to next or previous comic."""
4132
        return last_soup.find('a', class_='next' if next_ else 'prev', href=cls.url_date_re)
4133
4134
    @classmethod
4135
    def get_url_from_link(cls, link):
4136
        gocomics = 'http://www.gocomics.com'
4137
        return urljoin_wrapper(gocomics, link['href'])
4138
4139
    @classmethod
4140
    def get_comic_info(cls, soup, link):
4141
        """Get information about a particular comics."""
4142
        url = cls.get_url_from_link(link)
4143
        year, month, day = [int(s)
4144
                            for s in cls.url_date_re.match(url).groups()]
4145
        return {
4146
            'day': day,
4147
            'month': month,
4148
            'year': year,
4149
            'img': [soup.find_all('img', class_='strip')[-1]['src']],
4150
            'author': soup.find('meta', attrs={'name': 'author'})['content']
4151
        }
4152
4153
4154
class PearlsBeforeSwine(GenericGoComic):
4155
    """Class to retrieve Pearls Before Swine comics."""
4156
    name = 'pearls'
4157
    long_name = 'Pearls Before Swine'
4158
    url = 'http://www.gocomics.com/pearlsbeforeswine'
4159
4160
4161
class Peanuts(GenericGoComic):
4162
    """Class to retrieve Peanuts comics."""
4163
    name = 'peanuts'
4164
    long_name = 'Peanuts'
4165
    url = 'http://www.gocomics.com/peanuts'
4166
4167
4168
class MattWuerker(GenericGoComic):
4169
    """Class to retrieve Matt Wuerker comics."""
4170
    name = 'wuerker'
4171
    long_name = 'Matt Wuerker'
4172
    url = 'http://www.gocomics.com/mattwuerker'
4173
4174
4175
class TomToles(GenericGoComic):
4176
    """Class to retrieve Tom Toles comics."""
4177
    name = 'toles'
4178
    long_name = 'Tom Toles'
4179
    url = 'http://www.gocomics.com/tomtoles'
4180
4181
4182
class BreakOfDay(GenericGoComic):
4183
    """Class to retrieve Break Of Day comics."""
4184
    name = 'breakofday'
4185
    long_name = 'Break Of Day'
4186
    url = 'http://www.gocomics.com/break-of-day'
4187
4188
4189
class Brevity(GenericGoComic):
4190
    """Class to retrieve Brevity comics."""
4191
    name = 'brevity'
4192
    long_name = 'Brevity'
4193
    url = 'http://www.gocomics.com/brevity'
4194
4195
4196
class MichaelRamirez(GenericGoComic):
4197
    """Class to retrieve Michael Ramirez comics."""
4198
    name = 'ramirez'
4199
    long_name = 'Michael Ramirez'
4200
    url = 'http://www.gocomics.com/michaelramirez'
4201
4202
4203
class MikeLuckovich(GenericGoComic):
4204
    """Class to retrieve Mike Luckovich comics."""
4205
    name = 'luckovich'
4206
    long_name = 'Mike Luckovich'
4207
    url = 'http://www.gocomics.com/mikeluckovich'
4208
4209
4210
class JimBenton(GenericGoComic):
4211
    """Class to retrieve Jim Benton comics."""
4212
    # Also on http://jimbenton.tumblr.com
4213
    name = 'benton'
4214
    long_name = 'Jim Benton'
4215
    url = 'http://www.gocomics.com/jim-benton-cartoons'
4216
4217
4218
class TheArgyleSweater(GenericGoComic):
4219
    """Class to retrieve the Argyle Sweater comics."""
4220
    name = 'argyle'
4221
    long_name = 'Argyle Sweater'
4222
    url = 'http://www.gocomics.com/theargylesweater'
4223
4224
4225
class SunnyStreet(GenericGoComic):
4226
    """Class to retrieve Sunny Street comics."""
4227
    # Also on http://www.sunnystreetcomics.com
4228
    name = 'sunny'
4229
    long_name = 'Sunny Street'
4230
    url = 'http://www.gocomics.com/sunny-street'
4231
4232
4233
class OffTheMark(GenericGoComic):
4234
    """Class to retrieve Off The Mark comics."""
4235
    # Also on https://www.offthemark.com
4236
    name = 'offthemark'
4237
    long_name = 'Off The Mark'
4238
    url = 'http://www.gocomics.com/offthemark'
4239
4240
4241
class WuMo(GenericGoComic):
4242
    """Class to retrieve WuMo comics."""
4243
    # Also on http://wumo.com
4244
    name = 'wumo'
4245
    long_name = 'WuMo'
4246
    url = 'http://www.gocomics.com/wumo'
4247
4248
4249
class LunarBaboon(GenericGoComic):
4250
    """Class to retrieve Lunar Baboon comics."""
4251
    # Also on http://www.lunarbaboon.com
4252
    # Also on https://tapastic.com/series/Lunarbaboon
4253
    name = 'lunarbaboon'
4254
    long_name = 'Lunar Baboon'
4255
    url = 'http://www.gocomics.com/lunarbaboon'
4256
4257
4258
class SandersenGocomic(GenericGoComic):
4259
    """Class to retrieve Sarah Andersen comics."""
4260
    # Also on http://sarahcandersen.com
4261
    # Also on http://tapastic.com/series/Doodle-Time
4262
    name = 'sandersen-goc'
4263
    long_name = 'Sarah Andersen (from GoComics)'
4264
    url = 'http://www.gocomics.com/sarahs-scribbles'
4265
4266
4267
class SaturdayMorningBreakfastCerealGoComic(GenericGoComic):
4268
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
4269
    # Also on http://smbc-comics.tumblr.com
4270
    # Also on http://www.smbc-comics.com
4271
    name = 'smbc-goc'
4272
    long_name = 'Saturday Morning Breakfast Cereal (from GoComics)'
4273
    url = 'http://www.gocomics.com/saturday-morning-breakfast-cereal'
4274
    _categories = ('SMBC', )
4275
4276
4277
class CalvinAndHobbesGoComic(GenericGoComic):
4278
    """Class to retrieve Calvin and Hobbes comics."""
4279
    # From gocomics, not http://marcel-oehler.marcellosendos.ch/comics/ch/
4280
    name = 'calvin-goc'
4281
    long_name = 'Calvin and Hobbes (from GoComics)'
4282
    url = 'http://www.gocomics.com/calvinandhobbes'
4283
4284
4285
class RallGoComic(GenericGoComic):
4286
    """Class to retrieve Ted Rall comics."""
4287
    # Also on http://rall.com/comic
4288
    name = 'rall-goc'
4289
    long_name = "Ted Rall (from GoComics)"
4290
    url = "http://www.gocomics.com/tedrall"
4291
    _categories = ('RALL', )
4292
4293
4294
class TheAwkwardYetiGoComic(GenericGoComic):
4295
    """Class to retrieve The Awkward Yeti comics."""
4296
    # Also on http://larstheyeti.tumblr.com
4297
    # Also on http://theawkwardyeti.com
4298
    # Also on https://tapastic.com/series/TheAwkwardYeti
4299
    name = 'yeti-goc'
4300
    long_name = 'The Awkward Yeti (from GoComics)'
4301
    url = 'http://www.gocomics.com/the-awkward-yeti'
4302
    _categories = ('YETI', )
4303
4304
4305
class BerkeleyMewsGoComics(GenericGoComic):
4306
    """Class to retrieve Berkeley Mews comics."""
4307
    # Also on http://mews.tumblr.com
4308
    # Also on http://www.berkeleymews.com
4309
    name = 'berkeley-goc'
4310
    long_name = 'Berkeley Mews (from GoComics)'
4311
    url = 'http://www.gocomics.com/berkeley-mews'
4312
    _categories = ('BERKELEY', )
4313
4314
4315
class SheldonGoComics(GenericGoComic):
4316
    """Class to retrieve Sheldon comics."""
4317
    # Also on http://www.sheldoncomics.com
4318
    name = 'sheldon-goc'
4319
    long_name = 'Sheldon Comics (from GoComics)'
4320
    url = 'http://www.gocomics.com/sheldon'
4321
4322
4323
class FowlLanguageGoComics(GenericGoComic):
4324
    """Class to retrieve Fowl Language comics."""
4325
    # Also on http://www.fowllanguagecomics.com
4326
    # Also on http://tapastic.com/series/Fowl-Language-Comics
4327
    # Also on http://fowllanguagecomics.tumblr.com
4328
    name = 'fowllanguage-goc'
4329
    long_name = 'Fowl Language Comics (from GoComics)'
4330
    url = 'http://www.gocomics.com/fowl-language'
4331
    _categories = ('FOWLLANGUAGE', )
4332
4333
4334
class NickAnderson(GenericGoComic):
4335
    """Class to retrieve Nick Anderson comics."""
4336
    name = 'nickanderson'
4337
    long_name = 'Nick Anderson'
4338
    url = 'http://www.gocomics.com/nickanderson'
4339
4340
4341
class GarfieldGoComics(GenericGoComic):
4342
    """Class to retrieve Garfield comics."""
4343
    # Also on http://garfield.com
4344
    name = 'garfield-goc'
4345
    long_name = 'Garfield (from GoComics)'
4346
    url = 'http://www.gocomics.com/garfield'
4347
    _categories = ('GARFIELD', )
4348
4349
4350
class DorrisMcGoComics(GenericGoComic):
4351
    """Class to retrieve Dorris Mc Comics"""
4352
    # Also on http://dorrismccomics.com
4353
    name = 'dorrismc-goc'
4354
    long_name = 'Dorris Mc (from GoComics)'
4355
    url = 'http://www.gocomics.com/dorris-mccomics'
4356
4357
4358
class FoxTrot(GenericGoComic):
4359
    """Class to retrieve FoxTrot comics."""
4360
    name = 'foxtrot'
4361
    long_name = 'FoxTrot'
4362
    url = 'http://www.gocomics.com/foxtrot'
4363
4364
4365
class FoxTrotClassics(GenericGoComic):
4366
    """Class to retrieve FoxTrot Classics comics."""
4367
    name = 'foxtrot-classics'
4368
    long_name = 'FoxTrot Classics'
4369
    url = 'http://www.gocomics.com/foxtrotclassics'
4370
4371
4372
class MisterAndMeGoComics(GenericGoComic):
4373
    """Class to retrieve Mister & Me Comics."""
4374
    # Also on http://www.mister-and-me.com
4375
    # Also on https://tapastic.com/series/Mister-and-Me
4376
    name = 'mister-goc'
4377
    long_name = 'Mister & Me (from GoComics)'
4378
    url = 'http://www.gocomics.com/mister-and-me'
4379
4380
4381
class NonSequitur(GenericGoComic):
4382
    """Class to retrieve Non Sequitur (Wiley Miller) comics."""
4383
    name = 'nonsequitur'
4384
    long_name = 'Non Sequitur'
4385
    url = 'http://www.gocomics.com/nonsequitur'
4386
4387
4388
class GenericTapasticComic(GenericListableComic):
4389
    """Generic class to handle the logic common to comics from tapastic.com."""
4390
    _categories = ('TAPASTIC', )
4391
4392
    @classmethod
4393
    def get_comic_info(cls, soup, archive_elt):
4394
        """Get information about a particular comics."""
4395
        timestamp = int(archive_elt['publishDate']) / 1000.0
4396
        day = datetime.datetime.fromtimestamp(timestamp).date()
4397
        imgs = soup.find_all('img', class_='art-image')
4398
        if not imgs:
4399
            print("Comic %s is being uploaded, retry later" % cls.get_url_from_archive_element(archive_elt))
4400
            return None
4401
        assert len(imgs) > 0
4402
        return {
4403
            'day': day.day,
4404
            'year': day.year,
4405
            'month': day.month,
4406
            'img': [i['src'] for i in imgs],
4407
            'title': archive_elt['title'],
4408
        }
4409
4410
    @classmethod
4411
    def get_url_from_archive_element(cls, archive_elt):
4412
        return 'http://tapastic.com/episode/' + str(archive_elt['id'])
4413
4414
    @classmethod
4415
    def get_archive_elements(cls):
4416
        pref, suff = 'episodeList : ', ','
4417
        # Information is stored in the javascript part
4418
        # I don't know the clean way to get it so this is the ugly way.
4419
        string = [s[len(pref):-len(suff)] for s in (s.decode('utf-8').strip() for s in urlopen_wrapper(cls.url).readlines()) if s.startswith(pref) and s.endswith(suff)][0]
4420
        return json.loads(string)
4421
4422
4423
class VegetablesForDessert(GenericTapasticComic):
4424
    """Class to retrieve Vegetables For Dessert comics."""
4425
    # Also on http://vegetablesfordessert.tumblr.com
4426
    name = 'vegetables'
4427
    long_name = 'Vegetables For Dessert'
4428
    url = 'http://tapastic.com/series/vegetablesfordessert'
4429
4430
4431
class FowlLanguageTapa(GenericTapasticComic):
4432
    """Class to retrieve Fowl Language comics."""
4433
    # Also on http://www.fowllanguagecomics.com
4434
    # Also on http://fowllanguagecomics.tumblr.com
4435
    # Also on http://www.gocomics.com/fowl-language
4436
    name = 'fowllanguage-tapa'
4437
    long_name = 'Fowl Language Comics (from Tapastic)'
4438
    url = 'http://tapastic.com/series/Fowl-Language-Comics'
4439
    _categories = ('FOWLLANGUAGE', )
4440
4441
4442
class OscillatingProfundities(GenericTapasticComic):
4443
    """Class to retrieve Oscillating Profundities comics."""
4444
    name = 'oscillating'
4445
    long_name = 'Oscillating Profundities'
4446
    url = 'http://tapastic.com/series/oscillatingprofundities'
4447
4448
4449
class ZnoflatsComics(GenericTapasticComic):
4450
    """Class to retrieve Znoflats comics."""
4451
    name = 'znoflats'
4452
    long_name = 'Znoflats Comics'
4453
    url = 'http://tapastic.com/series/Znoflats-Comics'
4454
4455
4456
class SandersenTapastic(GenericTapasticComic):
4457
    """Class to retrieve Sarah Andersen comics."""
4458
    # Also on http://sarahcandersen.com
4459
    # Also on http://www.gocomics.com/sarahs-scribbles
4460
    name = 'sandersen-tapa'
4461
    long_name = 'Sarah Andersen (from Tapastic)'
4462
    url = 'http://tapastic.com/series/Doodle-Time'
4463
4464
4465
class TubeyToonsTapastic(GenericTapasticComic):
4466
    """Class to retrieve TubeyToons comics."""
4467
    # Also on http://tubeytoons.com
4468
    # Also on http://tubeytoons.tumblr.com
4469
    name = 'tubeytoons-tapa'
4470
    long_name = 'Tubey Toons (from Tapastic)'
4471
    url = 'http://tapastic.com/series/Tubey-Toons'
4472
    _categories = ('TUNEYTOONS', )
4473
4474
4475
class AnythingComicTapastic(GenericTapasticComic):
4476
    """Class to retrieve Anything Comics."""
4477
    # Also on http://www.anythingcomic.com
4478
    name = 'anythingcomic-tapa'
4479
    long_name = 'Anything Comic (from Tapastic)'
4480
    url = 'http://tapastic.com/series/anything'
4481
4482
4483
class UnearthedComicsTapastic(GenericTapasticComic):
4484
    """Class to retrieve Unearthed comics."""
4485
    # Also on http://unearthedcomics.com
4486
    # Also on http://unearthedcomics.tumblr.com
4487
    name = 'unearthed-tapa'
4488
    long_name = 'Unearthed Comics (from Tapastic)'
4489
    url = 'http://tapastic.com/series/UnearthedComics'
4490
    _categories = ('UNEARTHED', )
4491
4492
4493
class EverythingsStupidTapastic(GenericTapasticComic):
4494
    """Class to retrieve Everything's stupid Comics."""
4495
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
4496
    # Also on http://everythingsstupid.net
4497
    name = 'stupid-tapa'
4498
    long_name = "Everything's Stupid (from Tapastic)"
4499
    url = 'http://tapastic.com/series/EverythingsStupid'
4500
4501
4502
class JustSayEhTapastic(GenericTapasticComic):
4503
    """Class to retrieve Just Say Eh comics."""
4504
    # Also on http://www.justsayeh.com
4505
    name = 'justsayeh-tapa'
4506
    long_name = 'Just Say Eh (from Tapastic)'
4507
    url = 'http://tapastic.com/series/Just-Say-Eh'
4508
4509
4510
class ThorsThundershackTapastic(GenericTapasticComic):
4511
    """Class to retrieve Thor's Thundershack comics."""
4512
    # Also on http://www.thorsthundershack.com
4513
    name = 'thor-tapa'
4514
    long_name = 'Thor\'s Thundershack (from Tapastic)'
4515
    url = 'http://tapastic.com/series/Thors-Thundershac'
4516
    _categories = ('THOR', )
4517
4518
4519
class OwlTurdTapastic(GenericTapasticComic):
4520
    """Class to retrieve Owl Turd comics."""
4521
    # Also on http://owlturd.com
4522
    name = 'owlturd-tapa'
4523
    long_name = 'Owl Turd (from Tapastic)'
4524
    url = 'http://tapastic.com/series/Owl-Turd-Comix'
4525
    _categories = ('OWLTURD', )
4526
4527
4528
class GoneIntoRaptureTapastic(GenericTapasticComic):
4529
    """Class to retrieve Gone Into Rapture comics."""
4530
    # Also on http://goneintorapture.tumblr.com
4531
    # Also on http://www.goneintorapture.com
4532
    name = 'rapture-tapa'
4533
    long_name = 'Gone Into Rapture (from Tapastic)'
4534
    url = 'http://tapastic.com/series/Goneintorapture'
4535
4536
4537
class HeckIfIKnowComicsTapa(GenericTapasticComic):
4538
    """Class to retrieve Heck If I Know Comics."""
4539
    # Also on http://heckifiknowcomics.com
4540
    name = 'heck-tapa'
4541
    long_name = 'Heck if I Know comics (from Tapastic)'
4542
    url = 'http://tapastic.com/series/Regular'
4543
4544
4545
class CheerUpEmoKidTapa(GenericTapasticComic):
4546
    """Class to retrieve CheerUpEmoKid comics."""
4547
    # Also on http://www.cheerupemokid.com
4548
    # Also on http://enzocomics.tumblr.com
4549
    name = 'cuek-tapa'
4550
    long_name = 'Cheer Up Emo Kid (from Tapastic)'
4551
    url = 'http://tapastic.com/series/CUEK'
4552
4553
4554
class BigFootJusticeTapa(GenericTapasticComic):
4555
    """Class to retrieve Big Foot Justice comics."""
4556
    # Also on http://bigfootjustice.com
4557
    name = 'bigfoot-tapa'
4558
    long_name = 'Big Foot Justice (from Tapastic)'
4559
    url = 'http://tapastic.com/series/bigfoot-justice'
4560
4561
4562
class UpAndOutTapa(GenericTapasticComic):
4563
    """Class to retrieve Up & Out comics."""
4564
    # Also on http://upandoutcomic.tumblr.com
4565
    name = 'upandout-tapa'
4566
    long_name = 'Up And Out (from Tapastic)'
4567
    url = 'http://tapastic.com/series/UP-and-OUT'
4568
4569
4570
class ToonHoleTapa(GenericTapasticComic):
4571
    """Class to retrieve Toon Holes comics."""
4572
    # Also on http://www.toonhole.com
4573
    name = 'toonhole-tapa'
4574
    long_name = 'Toon Hole (from Tapastic)'
4575
    url = 'http://tapastic.com/series/TOONHOLE'
4576
4577
4578
class AngryAtNothingTapa(GenericTapasticComic):
4579
    """Class to retrieve Angry at Nothing comics."""
4580
    # Also on http://www.angryatnothing.net
4581
    name = 'angry-tapa'
4582
    long_name = 'Angry At Nothing (from Tapastic)'
4583
    url = 'http://tapastic.com/series/Comics-yeah-definitely-comics-'
4584
4585
4586
class LeleozTapa(GenericTapasticComic):
4587
    """Class to retrieve Leleoz comics."""
4588
    # Also on http://leleozcomics.tumblr.com
4589
    name = 'leleoz-tapa'
4590
    long_name = 'Leleoz (from Tapastic)'
4591
    url = 'https://tapastic.com/series/Leleoz'
4592
4593
4594
class TheAwkwardYetiTapa(GenericTapasticComic):
4595
    """Class to retrieve The Awkward Yeti comics."""
4596
    # Also on http://www.gocomics.com/the-awkward-yeti
4597
    # Also on http://theawkwardyeti.com
4598
    # Also on http://larstheyeti.tumblr.com
4599
    name = 'yeti-tapa'
4600
    long_name = 'The Awkward Yeti (from Tapastic)'
4601
    url = 'https://tapastic.com/series/TheAwkwardYeti'
4602
    _categories = ('YETI', )
4603
4604
4605
class AsPerUsualTapa(GenericTapasticComic):
4606
    """Class to retrieve As Per Usual comics."""
4607
    # Also on http://as-per-usual.tumblr.com
4608
    name = 'usual-tapa'
4609
    long_name = 'As Per Usual (from Tapastic)'
4610
    url = 'https://tapastic.com/series/AsPerUsual'
4611
    categories = ('DAMILEE', )
4612
4613
4614
class HotComicsForCoolPeopleTapa(GenericTapasticComic):
4615
    """Class to retrieve Hot Comics For Cool People."""
4616
    # Also on http://hotcomicsforcoolpeople.tumblr.com
4617
    # Also on http://hotcomics.biz (links to tumblr)
4618
    # Also on http://hcfcp.com (links to tumblr)
4619
    name = 'hotcomics-tapa'
4620
    long_name = 'Hot Comics For Cool People (from Tapastic)'
4621
    url = 'https://tapastic.com/series/Hot-Comics-For-Cool-People'
4622
    categories = ('DAMILEE', )
4623
4624
4625
class OneOneOneOneComicTapa(GenericTapasticComic):
4626
    """Class to retrieve 1111 Comics."""
4627
    # Also on http://www.1111comics.me
4628
    # Also on http://comics1111.tumblr.com
4629
    name = '1111-tapa'
4630
    long_name = '1111 Comics (from Tapastic)'
4631
    url = 'https://tapastic.com/series/1111-Comics'
4632
    _categories = ('ONEONEONEONE', )
4633
4634
4635
class TumbleDryTapa(GenericTapasticComic):
4636
    """Class to retrieve Tumble Dry comics."""
4637
    # Also on http://tumbledrycomics.com
4638
    name = 'tumbledry-tapa'
4639
    long_name = 'Tumblr Dry (from Tapastic)'
4640
    url = 'https://tapastic.com/series/TumbleDryComics'
4641
4642
4643
class DeadlyPanelTapa(GenericTapasticComic):
4644
    """Class to retrieve Deadly Panel comics."""
4645
    # Also on http://www.deadlypanel.com
4646
    name = 'deadly-tapa'
4647
    long_name = 'Deadly Panel (from Tapastic)'
4648
    url = 'https://tapastic.com/series/deadlypanel'
4649
4650
4651
class ChrisHallbeckMaxiTapa(GenericTapasticComic):
4652
    """Class to retrieve Chris Hallbeck comics."""
4653
    # Also on http://chrishallbeck.tumblr.com
4654
    # Also on http://maximumble.com
4655
    name = 'hallbeckmaxi-tapa'
4656
    long_name = 'Chris Hallback - Maximumble (from Tapastic)'
4657
    url = 'https://tapastic.com/series/Maximumble'
4658
    _categories = ('HALLBACK', )
4659
4660
4661
class ChrisHallbeckMiniTapa(GenericTapasticComic):
4662
    """Class to retrieve Chris Hallbeck comics."""
4663
    # Also on http://chrishallbeck.tumblr.com
4664
    # Also on http://minimumble.com
4665
    name = 'hallbeckmini-tapa'
4666
    long_name = 'Chris Hallback - Minimumble (from Tapastic)'
4667
    url = 'https://tapastic.com/series/Minimumble'
4668
    _categories = ('HALLBACK', )
4669
4670
4671
class ChrisHallbeckBiffTapa(GenericTapasticComic):
4672
    """Class to retrieve Chris Hallbeck comics."""
4673
    # Also on http://chrishallbeck.tumblr.com
4674
    # Also on http://thebookofbiff.com
4675
    name = 'hallbeckbiff-tapa'
4676
    long_name = 'Chris Hallback - The Book of Biff (from Tapastic)'
4677
    url = 'https://tapastic.com/series/Biff'
4678
    _categories = ('HALLBACK', )
4679
4680
4681
class RandoWisTapa(GenericTapasticComic):
4682
    """Class to retrieve RandoWis comics."""
4683
    # Also on https://randowis.com
4684
    name = 'randowis-tapa'
4685
    long_name = 'RandoWis (from Tapastic)'
4686
    url = 'https://tapastic.com/series/RandoWis'
4687
4688
4689
class PigeonGazetteTapa(GenericTapasticComic):
4690
    """Class to retrieve The Pigeon Gazette comics."""
4691
    # Also on http://thepigeongazette.tumblr.com
4692
    name = 'pigeon-tapa'
4693
    long_name = 'The Pigeon Gazette (from Tapastic)'
4694
    url = 'https://tapastic.com/series/The-Pigeon-Gazette'
4695
4696
4697
class TheOdd1sOutTapa(GenericTapasticComic):
4698
    """Class to retrieve The Odd 1s Out comics."""
4699
    # Also on http://theodd1sout.com
4700
    # Also on http://theodd1sout.tumblr.com
4701
    name = 'theodd-tapa'
4702
    long_name = 'The Odd 1s Out (from Tapastic)'
4703
    url = 'https://tapastic.com/series/Theodd1sout'
4704
4705
4706
class TheWorldIsFlatTapa(GenericTapasticComic):
4707
    """Class to retrieve The World Is Flat Comics."""
4708
    # Also on http://theworldisflatcomics.tumblr.com
4709
    name = 'flatworld-tapa'
4710
    long_name = 'The World Is Flat (from Tapastic)'
4711
    url = 'https://tapastic.com/series/The-World-is-Flat'
4712
4713
4714
class MisterAndMeTapa(GenericTapasticComic):
4715
    """Class to retrieve Mister & Me Comics."""
4716
    # Also on http://www.mister-and-me.com
4717
    # Also on http://www.gocomics.com/mister-and-me
4718
    name = 'mister-tapa'
4719
    long_name = 'Mister & Me (from Tapastic)'
4720
    url = 'https://tapastic.com/series/Mister-and-Me'
4721
4722
4723
class TalesOfAbsurdityTapa(GenericTapasticComic):
4724
    """Class to retrieve Tales Of Absurdity comics."""
4725
    # Also on http://talesofabsurdity.com
4726
    # Also on http://talesofabsurdity.tumblr.com
4727
    name = 'absurdity-tapa'
4728
    long_name = 'Tales of Absurdity (from Tapastic)'
4729
    url = 'http://tapastic.com/series/Tales-Of-Absurdity'
4730
    _categories = ('ABSURDITY', )
4731
4732
4733
class BFGFSTapa(GenericTapasticComic):
4734
    """Class to retrieve BFGFS comics."""
4735
    # Also on http://bfgfs.com
4736
    # Also on http://bfgfs.tumblr.com
4737
    name = 'bfgfs-tapa'
4738
    long_name = 'BFGFS (from Tapastic)'
4739
    url = 'https://tapastic.com/series/BFGFS'
4740
4741
4742
class DoodleForFoodTapa(GenericTapasticComic):
4743
    """Class to retrieve Doodle For Food comics."""
4744
    # Also on http://doodleforfood.com
4745
    name = 'doodle-tapa'
4746
    long_name = 'Doodle For Food (from Tapastic)'
4747
    url = 'https://tapastic.com/series/Doodle-for-Food'
4748
4749
4750
class MrLovensteinTapa(GenericTapasticComic):
4751
    """Class to retrieve Mr Lovenstein comics."""
4752
    # Also on  https://tapastic.com/series/MrLovenstein
4753
    name = 'mrlovenstein-tapa'
4754
    long_name = 'Mr. Lovenstein (from Tapastic)'
4755
    url = 'https://tapastic.com/series/MrLovenstein'
4756
4757
4758
class CassandraCalinTapa(GenericTapasticComic):
4759
    """Class to retrieve C. Cassandra comics."""
4760
    # Also on http://cassandracalin.com
4761
    # Also on http://c-cassandra.tumblr.com
4762
    name = 'cassandra-tapa'
4763
    long_name = 'Cassandra Calin (from Tapastic)'
4764
    url = 'https://tapastic.com/series/C-Cassandra-comics'
4765
4766
4767
class WafflesAndPancakes(GenericTapasticComic):
4768
    """Class to retrieve Waffles And Pancakes comics."""
4769
    # Also on http://wandpcomic.com
4770
    name = 'waffles'
4771
    long_name = 'Waffles And Pancakes'
4772
    url = 'https://tapastic.com/series/Waffles-and-Pancakes'
4773
4774
4775
class YesterdaysPopcornTapastic(GenericTapasticComic):
4776
    """Class to retrieve Yesterday's Popcorn comics."""
4777
    # Also on http://www.yesterdayspopcorn.com
4778
    # Also on http://yesterdayspopcorn.tumblr.com
4779
    name = 'popcorn-tapa'
4780
    long_name = 'Yesterday\'s Popcorn (from Tapastic)'
4781
    url = 'https://tapastic.com/series/Yesterdays-Popcorn'
4782
4783
4784
class OurSuperAdventureTapastic(GenericTapasticComic):
4785
    """Class to retrieve Our Super Adventure comics."""
4786
    # Also on http://www.oursuperadventure.com
4787
    # http://sarahssketchbook.tumblr.com
4788
    # http://sarahgraley.com
4789
    name = 'superadventure-tapastic'
4790
    long_name = 'Our Super Adventure (from Tapastic)'
4791
    url = 'https://tapastic.com/series/Our-Super-Adventure'
4792
4793
4794
class NamelessPCs(GenericTapasticComic):
4795
    """Class to retrieve Nameless PCs comics."""
4796
    # Also on http://namelesspcs.com
4797
    name = 'namelesspcs-tapa'
4798
    long_name = 'NamelessPCs (from Tapastic)'
4799
    url = 'https://tapastic.com/series/NamelessPC'
4800
4801
4802
class UbertoolTapa(GenericTapasticComic):
4803
    """Class to retrieve Ubertool comics."""
4804
    # Also on http://ubertoolcomic.com
4805
    # Also on http://ubertool.tumblr.com
4806
    name = 'ubertool-tapa'
4807
    long_name = 'Ubertool (from Tapastic)'
4808
    url = 'https://tapastic.com/series/ubertool'
4809
    _categories = ('UBERTOOL', )
4810
4811
4812
class SmallBlueYonderTapa(GenericTapasticComic):
4813
    """Class to retrieve Small Blue Yonder comics."""
4814
    # Also on http://www.smallblueyonder.com
4815
    name = 'smallblue-tapa'
4816
    long_name = 'Small Blue Yonder (from Tapastic)'
4817
    url = 'https://tapastic.com/series/Small-Blue-Yonder'
4818
4819
4820
def get_subclasses(klass):
4821
    """Gets the list of direct/indirect subclasses of a class"""
4822
    subclasses = klass.__subclasses__()
4823
    for derived in list(subclasses):
4824
        subclasses.extend(get_subclasses(derived))
4825
    return subclasses
4826
4827
4828
def remove_st_nd_rd_th_from_date(string):
4829
    """Function to transform 1st/2nd/3rd/4th in a parsable date format."""
4830
    # Hackish way to convert string with numeral "1st"/"2nd"/etc to date
4831
    return (string.replace('st', '')
4832
            .replace('nd', '')
4833
            .replace('rd', '')
4834
            .replace('th', '')
4835
            .replace('Augu', 'August'))
4836
4837
4838
def string_to_date(string, date_format, local=DEFAULT_LOCAL):
4839
    """Function to convert string to date object.
4840
    Wrapper around datetime.datetime.strptime."""
4841
    # format described in https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior
4842
    prev_locale = locale.setlocale(locale.LC_ALL)
4843
    if local != prev_locale:
4844
        locale.setlocale(locale.LC_ALL, local)
4845
    ret = datetime.datetime.strptime(string, date_format).date()
4846
    if local != prev_locale:
4847
        locale.setlocale(locale.LC_ALL, prev_locale)
4848
    return ret
4849
4850
4851
COMICS = set(get_subclasses(GenericComic))
4852
VALID_COMICS = [c for c in COMICS if c.name is not None]
4853
COMIC_NAMES = {c.name: c for c in VALID_COMICS}
4854
assert len(VALID_COMICS) == len(COMIC_NAMES)
4855
CLASS_NAMES = {c.__name__ for c in VALID_COMICS}
4856
assert len(VALID_COMICS) == len(CLASS_NAMES)
4857