Completed
Push — master ( e6ff53...bb76ae )
by De
01:09
created

comics.py (11 issues)

1
#! /usr/bin/python3
2
# vim: set expandtab tabstop=4 shiftwidth=4 :
3
"""Module to retrieve webcomics"""
4
5
from comic_abstract import GenericComic, get_date_for_comic
6
import re
7
from datetime import date, timedelta
8
import datetime
9
from urlfunctions import get_soup_at_url, urljoin_wrapper,\
10
    convert_iri_to_plain_ascii_uri, load_json_at_url, urlopen_wrapper
11
import json
12
import locale
13
import urllib
14
15
DEFAULT_LOCAL = 'en_GB.UTF-8'
16
17
18
class Xkcd(GenericComic):
19
    """Class to retrieve Xkcd comics."""
20
    name = 'xkcd'
21
    long_name = 'xkcd'
22
    url = 'http://xkcd.com'
23
24
    @classmethod
25
    def get_next_comic(cls, last_comic):
26
        first_num = last_comic['num'] if last_comic else 0
27
        last_num = load_json_at_url(
28
            urljoin_wrapper(cls.url, 'info.0.json'))['num']
29
30
        for num in range(first_num + 1, last_num + 1):
31
            if num != 404:
32
                json_url = urljoin_wrapper(cls.url, '%d/info.0.json' % num)
33
                comic = load_json_at_url(json_url)
34
                comic['img'] = [comic['img']]
35
                comic['prefix'] = '%d-' % num
36
                comic['json_url'] = json_url
37
                comic['url'] = urljoin_wrapper(cls.url, str(num))
38
                comic['day'] = int(comic['day'])
39
                comic['month'] = int(comic['month'])
40
                comic['year'] = int(comic['year'])
41
                assert comic['num'] == num
42
                yield comic
43
44
45
# Helper functions corresponding to get_url_from_link/get_url_from_archive_element
46
47
48
@classmethod
49
def get_href(cls, link):
50
    """Implementation of get_url_from_link/get_url_from_archive_element."""
51
    return link['href']
52
53
54
@classmethod
55
def join_cls_url_to_href(cls, link):
56
    """Implementation of get_url_from_link/get_url_from_archive_element."""
57
    return urljoin_wrapper(cls.url, link['href'])
58
59
60
class GenericNavigableComic(GenericComic):
61
    """Generic class for "navigable" comics : with first/next arrows.
62
63
    The method `get_next_comic` methods is implemented in terms of new
64
    more specialized methods to be implemented/overridden:
65
        - get_first_comic_link
66
        - get_navi_link
67
        - get_comic_info
68
        - get_url_from_link
69
    """
70
71
    @classmethod
72
    def get_first_comic_link(cls):
73
        """Get link to first comics.
74
75
        Sometimes this can be retrieved of any comic page, sometimes on
76
        the archive page, sometimes it doesn't exist at all and one has
77
        to iterate backward to find it before hardcoding the result found.
78
        """
79
        raise NotImplementedError
80
81
    @classmethod
82
    def get_navi_link(cls, last_soup, next_):
83
        """Get link to next (or previous - for dev purposes) comic."""
84
        raise NotImplementedError
85
86
    @classmethod
87
    def get_comic_info(cls, soup, link):
88
        """Get information about a particular comics."""
89
        raise NotImplementedError
90
91
    @classmethod
92
    def get_url_from_link(cls, link):
93
        """Get url corresponding to a link. Default implementation is similar to get_href."""
94
        return link['href']
95
96
    @classmethod
97
    def get_next_link(cls, last_soup):
98
        """Get link to next comic."""
99
        return cls.get_navi_link(last_soup, True)
100
101
    @classmethod
102
    def get_prev_link(cls, last_soup):
103
        """Get link to previous comic."""
104
        return cls.get_navi_link(last_soup, False)
105
106
    @classmethod
107
    def get_next_comic(cls, last_comic):
108
        """Generic implementation of get_next_comic for navigable comics."""
109
        url = last_comic['url'] if last_comic else None
110
        next_comic = \
111
            cls.get_next_link(get_soup_at_url(url)) \
112
            if url else \
113
            cls.get_first_comic_link()
114
        cls.log("next/first comic will be %s (url is %s)" % (str(next_comic), url))
115
        while next_comic:
116
            prev_url, url = url, cls.get_url_from_link(next_comic)
117
            if prev_url == url:
118
                cls.log("got same url %s" % url)
119
                break
120
            cls.log("about to get %s (%s)" % (url, str(next_comic)))
121
            soup = get_soup_at_url(url)
122
            comic = cls.get_comic_info(soup, next_comic)
123
            if comic is not None:
124
                assert 'url' not in comic
125
                comic['url'] = url
126
                yield comic
127
            next_comic = cls.get_next_link(soup)
128
            cls.log("next comic will be %s" % str(next_comic))
129
130
    @classmethod
131
    def check_first_link(cls):
132
        """Check that navigation to first comic seems to be working - for dev purposes."""
133
        cls.log("about to check first link")
134
        ok = True
135
        firstlink = cls.get_first_comic_link()
136
        if firstlink is None:
137
            print("From %s : no first link" % cls.url)
138
            ok = False
139
        else:
140
            firsturl = cls.get_url_from_link(firstlink)
141
            try:
142
                get_soup_at_url(firsturl)
143
            except urllib.error.HTTPError:
144
                print("From %s : invalid first url" % cls.url)
145
                ok = False
146
        cls.log("checked first link -> returned %d" % ok)
147
        return ok
148
149
    @classmethod
150
    def check_prev_next_links(cls, url):
151
        """Check that navigation to prev/next from a given URL seems to be working - for dev purposes."""
152
        cls.log("about to check prev/next from %s" % url)
153
        ok = True
154
        if url is None:
155
            prevlink, nextlink = None, None
156
        else:
157
            soup = get_soup_at_url(url)
158
            prevlink, nextlink = cls.get_prev_link(soup), cls.get_next_link(soup)
159
        if prevlink is None and nextlink is None:
160
            print("From %s : no previous nor next" % url)
161
            ok = False
162
        else:
163
            if prevlink:
164
                prevurl = cls.get_url_from_link(prevlink)
165
                prevsoup = get_soup_at_url(prevurl)
166
                prevnext = cls.get_url_from_link(cls.get_next_link(prevsoup))
167
                if prevnext != url:
168
                    print("From %s, going backward then forward leads to %s" % (url, prevnext))
169
                    ok = False
170
            if nextlink:
171
                nexturl = cls.get_url_from_link(nextlink)
172
                if nexturl != url:
173
                    nextsoup = get_soup_at_url(nexturl)
174
                    nextprev = cls.get_url_from_link(cls.get_prev_link(nextsoup))
175
                    if nextprev != url:
176
                        print("From %s, going forward then backward leads to %s" % (url, nextprev))
177
                        ok = False
178
        cls.log("checked prev/next from %s -> returned %d" % (url, ok))
179
        return ok
180
181
    @classmethod
182
    def check_navigation(cls, url):
183
        """Check that navigation functions seem to be working - for dev purposes."""
184
        cls.log("about to check navigation from %s" % url)
185
        first = cls.check_first_link()
186
        prevnext = cls.check_prev_next_links(url)
187
        ok = first and prevnext
188
        cls.log("checked navigation from %s -> returned %d" % (url, ok))
189
        return ok
190
191
    # This method is not defined by default and is not part of this class'API.
192
    # It is only used:
193
    # - during development
194
    # - in subclasses implementing it correctly
195
    if False:
196
        @classmethod
197
        def get_first_comic_url(cls):
198
            """Get first comic url
199
200
            Sometimes, the first comic cannot be reached directly so to start
201
            from the first comic one has to go to the previous comic until
202
            there is no previous comics. Once this URL is reached, it
203
            is better to hardcode it but for development purposes, it
204
            is convenient to have an automatic way to find it.
205
            """
206
            url = input("Get starting URL: ")
207
            print(url)
208
            comic = cls.get_prev_link(get_soup_at_url(url))
209
            while comic:
210
                url = cls.get_url_from_link(comic)
211
                print(url)
212
                comic = cls.get_prev_link(get_soup_at_url(url))
213
            return url
214
215
216
class GenericListableComic(GenericComic):
217
    """Generic class for "listable" comics : with a list of comics (aka 'archive')
218
219
    The method `get_next_comic` methods is implemented in terms of new
220
    more specialized methods to be implemented/overridden:
221
        - get_archive_elements
222
        - get_url_from_archive_element
223
        - get_comic_info
224
    """
225
226
    @classmethod
227
    def get_archive_elements(cls):
228
        """Get the archive elements (iterable)."""
229
        raise NotImplementedError
230
231
    @classmethod
232
    def get_url_from_archive_element(cls, archive_elt):
233
        """Get url corresponding to an archive element."""
234
        raise NotImplementedError
235
236
    @classmethod
237
    def get_comic_info(cls, soup, archive_elt):
238
        """Get information about a particular comics."""
239
        raise NotImplementedError
240
241
    @classmethod
242
    def get_next_comic(cls, last_comic):
243
        """Generic implementation of get_next_comic for listable comics."""
244
        waiting_for_url = last_comic['url'] if last_comic else None
245
        for archive_elt in cls.get_archive_elements():
246
            url = cls.get_url_from_archive_element(archive_elt)
247
            cls.log("considering %s" % url)
248
            if waiting_for_url and waiting_for_url == url:
249
                waiting_for_url = None
250
            elif waiting_for_url is None:
251
                cls.log("about to get %s (%s)" % (url, str(archive_elt)))
252
                soup = get_soup_at_url(url)
253
                comic = cls.get_comic_info(soup, archive_elt)
254
                if comic is not None:
255
                    assert 'url' not in comic
256
                    comic['url'] = url
257
                    yield comic
258
        if waiting_for_url is not None:
259
            print("Did not find %s : there might be a problem" % waiting_for_url)
260
261
# Helper functions corresponding to get_first_comic_link/get_navi_link
262
263
264
@classmethod
265
def get_link_rel_next(cls, last_soup, next_):
266
    """Implementation of get_navi_link."""
267
    return last_soup.find('link', rel='next' if next_ else 'prev')
268
269
270
@classmethod
271
def get_a_rel_next(cls, last_soup, next_):
272
    """Implementation of get_navi_link."""
273
    return last_soup.find('a', rel='next' if next_ else 'prev')
274
275
276
@classmethod
277
def get_a_navi_navinext(cls, last_soup, next_):
278
    """Implementation of get_navi_link."""
279
    return last_soup.find('a', class_='navi navi-next' if next_ else 'navi navi-prev')
280
281
282
@classmethod
283
def get_a_navi_comicnavnext_navinext(cls, last_soup, next_):
284
    """Implementation of get_navi_link."""
285
    return last_soup.find('a', class_='navi comic-nav-next navi-next' if next_ else 'navi comic-nav-previous navi-prev')
286
287
288
@classmethod
289
def get_a_comicnavbase_comicnavnext(cls, last_soup, next_):
290
    """Implementation of get_navi_link."""
291
    return last_soup.find('a', class_='comic-nav-base comic-nav-next' if next_ else 'comic-nav-base comic-nav-previous')
292
293
294
@classmethod
295
def get_a_navi_navifirst(cls):
296
    """Implementation of get_first_comic_link."""
297
    return get_soup_at_url(cls.url).find('a', class_='navi navi-first')
298
299
300
@classmethod
301
def get_div_navfirst_a(cls):
302
    """Implementation of get_first_comic_link."""
303
    return get_soup_at_url(cls.url).find('div', class_="nav-first").find('a')
304
305
306
@classmethod
307
def get_a_comicnavbase_comicnavfirst(cls):
308
    """Implementation of get_first_comic_link."""
309
    return get_soup_at_url(cls.url).find('a', class_='comic-nav-base comic-nav-first')
310
311
312
class GenericEmptyComic(GenericComic):
313
    """Generic class for comics where nothing is to be done.
314
315
    It can be useful to deactivate temporarily comics that do not work
316
    properly by replacing `def MyComic(GenericWhateverComic)` with
317
    `def MyComic(GenericEmptyComic, GenericWhateverComic)`."""
318
319
    @classmethod
320
    def get_next_comic(cls, last_comic):
321
        cls.log("comic is considered as empty - returning no comic")
322
        return []
323
324
325
class ExtraFabulousComics(GenericNavigableComic):
326
    """Class to retrieve Extra Fabulous Comics."""
327
    name = 'efc'
328
    long_name = 'Extra Fabulous Comics'
329
    url = 'http://extrafabulouscomics.com'
330
    get_navi_link = get_link_rel_next
331
332
    @classmethod
333
    def get_first_comic_link(cls):
334
        """Get link to first comics."""
335
        return get_soup_at_url(cls.url).find('a', title='FIRST')
336
337
    @classmethod
338
    def get_comic_info(cls, soup, link):
339
        """Get information about a particular comics."""
340
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
341
        imgs = soup.find_all('img', src=img_src_re)
342
        title = soup.find('h2', class_='post-title').string
343
        return {
344
            'title': title,
345
            'img': [i['src'] for i in imgs],
346
            'prefix': title + '-'
347
        }
348
349
350
class GenericLeMondeBlog(GenericNavigableComic):
351
    """Generic class to retrieve comics from Le Monde blogs."""
352
    get_navi_link = get_link_rel_next
353
354
    @classmethod
355
    def get_first_comic_url(cls):
356
        """Get first comic url."""
357
        raise NotImplementedError
358
359
    @classmethod
360
    def get_first_comic_link(cls):
361
        """Get link to first comics."""
362
        return {'href': cls.get_first_comic_url()}
363
364
    @classmethod
365
    def get_comic_info(cls, soup, link):
366
        """Get information about a particular comics."""
367
        url2 = soup.find('link', rel='shortlink')['href']
368
        title = soup.find('meta', property='og:title')['content']
369
        date_str = soup.find("span", class_="entry-date").string
370
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
371
        imgs = soup.find_all('meta', property='og:image')
372
        return {
373
            'title': title,
374
            'url2': url2,
375
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
376
            'month': day.month,
377
            'year': day.year,
378
            'day': day.day,
379
        }
380
381
382
class ZepWorld(GenericLeMondeBlog):
383
    """Class to retrieve Zep World comics."""
384
    name = "zep"
385
    long_name = "Zep World"
386
    url = "http://zepworld.blog.lemonde.fr"
387
388
    @classmethod
389
    def get_first_comic_url(cls):
390
        return "http://zepworld.blog.lemonde.fr/2014/10/31/bientot-le-blog-de-zep/"
391
392
393
class Vidberg(GenericLeMondeBlog):
394
    """Class to retrieve Vidberg comics."""
395
    name = 'vidberg'
396
    long_name = "Vidberg - l'actu en patates"
397
    url = "http://vidberg.blog.lemonde.fr"
398
399
    @classmethod
400
    def get_first_comic_url(cls):
401
        # Not the first but I didn't find an efficient way to retrieve it
402
        return "http://vidberg.blog.lemonde.fr/2012/02/09/revue-de-campagne-la-campagne-du-modem-semballe/"
403
404
405
class Plantu(GenericLeMondeBlog):
406
    """Class to retrieve Plantu comics."""
407
    name = 'plantu'
408
    long_name = "Plantu"
409
    url = "http://plantu.blog.lemonde.fr"
410
411
    @classmethod
412
    def get_first_comic_url(cls):
413
        return "http://plantu.blog.lemonde.fr/2014/10/28/stress-test-a-bruxelles/"
414
415
416
class XavierGorce(GenericLeMondeBlog):
417
    """Class to retrieve Xavier Gorce comics."""
418
    name = 'gorce'
419
    long_name = "Xavier Gorce"
420
    url = "http://xaviergorce.blog.lemonde.fr"
421
422
    @classmethod
423
    def get_first_comic_url(cls):
424
        return "http://xaviergorce.blog.lemonde.fr/2015/01/09/distinction/"
425
426
427
class CartooningForPeace(GenericLeMondeBlog):
428
    """Class to retrieve Cartooning For Peace comics."""
429
    name = 'forpeace'
430
    long_name = "Cartooning For Peace"
431
    url = "http://cartooningforpeace.blog.lemonde.fr"
432
433
    @classmethod
434
    def get_first_comic_url(cls):
435
        return "http://cartooningforpeace.blog.lemonde.fr/2014/12/15/bado/"
436
437
438
class Aurel(GenericLeMondeBlog):
439
    """Class to retrieve Aurel comics."""
440
    name = 'aurel'
441
    long_name = "Aurel"
442
    url = "http://aurel.blog.lemonde.fr"
443
444
    @classmethod
445
    def get_first_comic_url(cls):
446
        return "http://aurel.blog.lemonde.fr/2014/09/29/le-senat-repasse-a-droite/"
447
448
449
class LesCulottees(GenericLeMondeBlog):
450
    """Class to retrieve Les Culottees comics."""
451
    name = 'culottees'
452
    long_name = 'Les Culottees'
453
    url = "http://lesculottees.blog.lemonde.fr"
454
455
    @classmethod
456
    def get_first_comic_url(cls):
457
        return "http://lesculottees.blog.lemonde.fr/2016/01/11/clementine-delait-femme-a-barbe/"
458
459
460
class UneAnneeAuLycee(GenericLeMondeBlog):
461
    """Class to retrieve Une Annee Au Lycee comics."""
462
    name = 'lycee'
463
    long_name = 'Une Annee au Lycee'
464
    url = 'http://uneanneeaulycee.blog.lemonde.fr'
465
466
    @classmethod
467
    def get_first_comic_url(cls):
468
        return "http://uneanneeaulycee.blog.lemonde.fr/2016/06/13/la-semaine-du-bac-est-arrivee/"
469
470
471
class Rall(GenericNavigableComic):
472
    """Class to retrieve Ted Rall comics."""
473
    # Also on http://www.gocomics.com/tedrall
474
    name = 'rall'
475
    long_name = "Ted Rall"
476
    url = "http://rall.com/comic"
477
    get_navi_link = get_link_rel_next
478
479
    @classmethod
480
    def get_first_comic_link(cls):
481
        """Get link to first comics."""
482
        # Not the first but I didn't find an efficient way to retrieve it
483
        return {'href': "http://rall.com/2014/01/30/los-angeles-times-cartoon-well-miss-those-california-flowers"}
484
485
    @classmethod
486
    def get_comic_info(cls, soup, link):
487
        """Get information about a particular comics."""
488
        title = soup.find('meta', property='og:title')['content']
489
        author = soup.find("span", class_="author vcard").find("a").string
490
        date_str = soup.find("span", class_="entry-date").string
491
        day = string_to_date(date_str, "%B %d, %Y")
492
        desc = soup.find('meta', property='og:description')['content']
493
        imgs = soup.find('div', class_='entry-content').find_all('img')
494
        imgs = imgs[:-7]  # remove social media buttons
495
        return {
496
            'title': title,
497
            'author': author,
498 View Code Duplication
            'month': day.month,
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
499
            'year': day.year,
500
            'day': day.day,
501
            'description': desc,
502
            'img': [i['src'] for i in imgs],
503
        }
504
505
506
class Dilem(GenericNavigableComic):
507
    """Class to retrieve Ali Dilem comics."""
508
    name = 'dilem'
509
    long_name = 'Ali Dilem'
510
    url = 'http://information.tv5monde.com/dilem'
511
    get_url_from_link = join_cls_url_to_href
512
513
    @classmethod
514
    def get_first_comic_link(cls):
515
        """Get link to first comics."""
516
        return {'href': "http://information.tv5monde.com/dilem/2004-06-26"}
517
518
    @classmethod
519
    def get_navi_link(cls, last_soup, next_):
520
        # prev is next / next is prev
521
        li = last_soup.find('li', class_='prev' if next_ else 'next')
522
        return li.find('a') if li else None
523
524
    @classmethod
525
    def get_comic_info(cls, soup, link):
526
        """Get information about a particular comics."""
527
        short_url = soup.find('link', rel='shortlink')['href']
528
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
529
        imgs = soup.find_all('meta', property='og:image')
530
        date_str = soup.find('span', property='dc:date')['content']
531
        date_str = date_str[:10]
532
        day = string_to_date(date_str, "%Y-%m-%d")
533
        return {
534
            'short_url': short_url,
535
            'title': title,
536
            'img': [i['content'] for i in imgs],
537
            'day': day.day,
538
            'month': day.month,
539
            'year': day.year,
540
        }
541
542
543
class SpaceAvalanche(GenericNavigableComic):
544
    """Class to retrieve Space Avalanche comics."""
545
    name = 'avalanche'
546
    long_name = 'Space Avalanche'
547
    url = 'http://www.spaceavalanche.com'
548
    get_navi_link = get_link_rel_next
549
550
    @classmethod
551
    def get_first_comic_link(cls):
552
        """Get link to first comics."""
553
        return {'href': "http://www.spaceavalanche.com/2009/02/02/irish-sea/", 'title': "Irish Sea"}
554
555
    @classmethod
556
    def get_comic_info(cls, soup, link):
557
        """Get information about a particular comics."""
558
        url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
559
        title = link['title']
560
        url = cls.get_url_from_link(link)
561
        year, month, day = [int(s)
562
                            for s in url_date_re.match(url).groups()]
563
        imgs = soup.find("div", class_="entry").find_all("img")
564
        return {
565
            'title': title,
566
            'day': day,
567
            'month': month,
568
            'year': year,
569
            'img': [i['src'] for i in imgs],
570
        }
571
572
573
class ZenPencils(GenericNavigableComic):
574
    """Class to retrieve ZenPencils comics."""
575
    # Also on http://zenpencils.tumblr.com
576
    # Also on http://www.gocomics.com/zen-pencils
577
    name = 'zenpencils'
578
    long_name = 'Zen Pencils'
579
    url = 'http://zenpencils.com'
580
    get_navi_link = get_link_rel_next
581
582
    @classmethod
583
    def get_first_comic_link(cls):
584
        """Get link to first comics."""
585
        return {'href': "http://zenpencils.com/comic/1-ralph-waldo-emerson-make-them-cry/"}
586
587
    @classmethod
588
    def get_comic_info(cls, soup, link):
589
        """Get information about a particular comics."""
590
        imgs = soup.find('div', id='comic').find_all('img')
591
        post = soup.find('div', class_='post-content')
592
        author = post.find("span", class_="post-author").find("a").string
593
        title = post.find('h2', class_='post-title').string
594
        date_str = post.find('span', class_='post-date').string
595
        day = string_to_date(date_str, "%B %d, %Y")
596
        assert imgs
597
        assert all(i['alt'] == i['title'] for i in imgs)
598
        assert all(i['alt'] in (title, "") for i in imgs)
599
        desc = soup.find('meta', property='og:description')['content']
600
        return {
601
            'title': title,
602
            'description': desc,
603
            'author': author,
604
            'day': day.day,
605
            'month': day.month,
606
            'year': day.year,
607
            'img': [i['src'] for i in imgs],
608
        }
609
610
611
class ItsTheTie(GenericNavigableComic):
612
    """Class to retrieve It's the tie comics."""
613
    # Also on http://itsthetie.tumblr.com
614
    # Also on https://tapastic.com/series/itsthetie
615
    name = 'tie'
616
    long_name = "It's the tie"
617
    url = "http://itsthetie.com"
618
    get_first_comic_link = get_div_navfirst_a
619
    get_navi_link = get_a_rel_next
620 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
621
    @classmethod
622
    def get_comic_info(cls, soup, link):
623
        """Get information about a particular comics."""
624
        title = soup.find('h1', class_='comic-title').find('a').string
625
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
626
        day = string_to_date(date_str, "%B %d, %Y")
627
        # Bonus images may or may not be in meta og:image.
628
        imgs = soup.find_all('meta', property='og:image')
629
        imgs_src = [i['content'] for i in imgs]
630
        bonus = soup.find_all('img', attrs={'data-oversrc': True})
631
        bonus_src = [b['data-oversrc'] for b in bonus]
632
        all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src]
633
        all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")]
634
        tag_meta = soup.find('meta', property='article:tag')
635
        tags = tag_meta['content'] if tag_meta else ""
636
        return {
637
            'title': title,
638
            'month': day.month,
639
            'year': day.year,
640
            'day': day.day,
641
            'img': all_imgs_src,
642
            'tags': tags,
643
        }
644
645
646
class PenelopeBagieu(GenericNavigableComic):
647
    """Class to retrieve comics from Penelope Bagieu's blog."""
648 View Code Duplication
    name = 'bagieu'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
649
    long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
650
    url = 'http://www.penelope-jolicoeur.com'
651
    get_navi_link = get_link_rel_next
652
653
    @classmethod
654
    def get_first_comic_link(cls):
655
        """Get link to first comics."""
656
        return {'href': 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'}
657
658
    @classmethod
659
    def get_comic_info(cls, soup, link):
660
        """Get information about a particular comics."""
661
        date_str = soup.find('h2', class_='date-header').string
662
        day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
663
        imgs = soup.find('div', class_='entry-body').find_all('img')
664
        title = soup.find('h3', class_='entry-header').string
665
        return {
666
            'title': title,
667
            'img': [i['src'] for i in imgs],
668
            'month': day.month,
669
            'year': day.year,
670
            'day': day.day,
671
        }
672
673
674
class OneOneOneOneComic(GenericNavigableComic):
675
    """Class to retrieve 1111 Comics."""
676
    # Also on http://comics1111.tumblr.com
677
    # Also on https://tapastic.com/series/1111-Comics
678
    name = '1111'
679
    long_name = '1111 Comics'
680
    url = 'http://www.1111comics.me'
681
    get_first_comic_link = get_div_navfirst_a
682
    get_navi_link = get_link_rel_next
683
684
    @classmethod
685
    def get_comic_info(cls, soup, link):
686
        """Get information about a particular comics."""
687
        title = soup.find('h1', class_='comic-title').find('a').string
688
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
689
        day = string_to_date(date_str, "%B %d, %Y")
690
        imgs = soup.find_all('meta', property='og:image')
691
        return {
692
            'title': title,
693
            'month': day.month,
694
            'year': day.year,
695
            'day': day.day,
696
            'img': [i['content'] for i in imgs],
697
        }
698
699
700
class AngryAtNothing(GenericNavigableComic):
701
    """Class to retrieve Angry at Nothing comics."""
702
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
703
    name = 'angry'
704
    long_name = 'Angry At Nothing'
705
    url = 'http://www.angryatnothing.net'
706
    get_first_comic_link = get_div_navfirst_a
707
    get_navi_link = get_a_rel_next
708
709
    @classmethod
710
    def get_comic_info(cls, soup, link):
711
        """Get information about a particular comics."""
712
        title = soup.find('h1', class_='comic-title').find('a').string
713
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
714
        day = string_to_date(date_str, "%B %d, %Y")
715
        imgs = soup.find_all('meta', property='og:image')
716
        return {
717
            'title': title,
718
            'month': day.month,
719
            'year': day.year,
720
            'day': day.day,
721
            'img': [i['content'] for i in imgs],
722
        }
723
724
725
class NeDroid(GenericNavigableComic):
726
    """Class to retrieve NeDroid comics."""
727
    name = 'nedroid'
728
    long_name = 'NeDroid'
729
    url = 'http://nedroid.com'
730
    get_first_comic_link = get_div_navfirst_a
731
    get_navi_link = get_link_rel_next
732
    get_url_from_link = join_cls_url_to_href
733
734
    @classmethod
735
    def get_comic_info(cls, soup, link):
736
        """Get information about a particular comics."""
737
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
738
        comic_url_re = re.compile('//nedroid.com/comics/([0-9]*)-([0-9]*)-([0-9]*).*')
739
        short_url = cls.get_url_from_link(soup.find('link', rel='shortlink'))
740
        num = int(short_url_re.match(short_url).groups()[0])
741
        imgs = soup.find('div', id='comic').find_all('img')
742
        year, month, day = [int(s) for s in comic_url_re.match(imgs[0]['src']).groups()]
743
        assert len(imgs) == 1
744
        title = imgs[0]['alt']
745
        title2 = imgs[0]['title']
746
        return {
747
            'short_url': short_url,
748
            'title': title,
749
            'title2': title2,
750
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
751
            'day': day,
752
            'month': month,
753
            'year': year,
754
            'num': num,
755
        }
756
757
758
class Garfield(GenericNavigableComic):
759
    """Class to retrieve Garfield comics."""
760
    # Also on http://www.gocomics.com/garfield
761
    name = 'garfield'
762
    long_name = 'Garfield'
763 View Code Duplication
    url = 'https://garfield.com'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
764
765
    @classmethod
766
    def get_first_comic_link(cls):
767
        """Get link to first comics."""
768
        return {'href': 'https://garfield.com/comic/1978/06/19'}
769
770
    @classmethod
771
    def get_navi_link(cls, last_soup, next_):
772
        return last_soup.find('a', class_='comic-arrow-right' if next_ else 'comic-arrow-left')
773
774
    @classmethod
775
    def get_comic_info(cls, soup, link):
776
        """Get information about a particular comics."""
777
        url = cls.get_url_from_link(link)
778
        date_re = re.compile('^%s/comic/([0-9]*)/([0-9]*)/([0-9]*)' % cls.url)
779
        year, month, day = [int(s) for s in date_re.match(url).groups()]
780
        imgs = soup.find('div', class_='comic-display').find_all('img', class_='img-responsive')
781
        return {
782
            'month': month,
783
            'year': year,
784
            'day': day,
785
            'img': [i['src'] for i in imgs],
786
        }
787
788
789
class Dilbert(GenericNavigableComic):
790
    """Class to retrieve Dilbert comics."""
791
    # Also on http://www.gocomics.com/dilbert-classics
792
    name = 'dilbert'
793
    long_name = 'Dilbert'
794
    url = 'http://dilbert.com'
795
    get_url_from_link = join_cls_url_to_href
796
797
    @classmethod
798
    def get_first_comic_link(cls):
799
        """Get link to first comics."""
800
        return {'href': 'http://dilbert.com/strip/1989-04-16'}
801
802
    @classmethod
803
    def get_navi_link(cls, last_soup, next_):
804
        link = last_soup.find('div', class_='nav-comic nav-right' if next_ else 'nav-comic nav-left')
805
        return link.find('a') if link else None
806
807
    @classmethod
808
    def get_comic_info(cls, soup, link):
809
        """Get information about a particular comics."""
810
        title = soup.find('meta', property='og:title')['content']
811
        imgs = soup.find_all('meta', property='og:image')
812
        desc = soup.find('meta', property='og:description')['content']
813
        date_str = soup.find('meta', property='article:publish_date')['content']
814
        day = string_to_date(date_str, "%B %d, %Y")
815
        author = soup.find('meta', property='article:author')['content']
816
        tags = soup.find('meta', property='article:tag')['content']
817
        return {
818
            'title': title,
819
            'description': desc,
820
            'img': [i['content'] for i in imgs],
821
            'author': author,
822
            'tags': tags,
823
            'day': day.day,
824
            'month': day.month,
825
            'year': day.year
826
        }
827
828
829
class VictimsOfCircumsolar(GenericNavigableComic):
830
    """Class to retrieve VictimsOfCircumsolar comics."""
831
    name = 'circumsolar'
832
    long_name = 'Victims Of Circumsolar'
833
    url = 'http://www.victimsofcircumsolar.com'
834
    get_navi_link = get_a_navi_comicnavnext_navinext
835
836
    @classmethod
837
    def get_first_comic_link(cls):
838
        """Get link to first comics."""
839
        return {'href': 'http://www.victimsofcircumsolar.com/comic/modern-addiction'}
840
841
    @classmethod
842
    def get_comic_info(cls, soup, link):
843
        """Get information about a particular comics."""
844
        # Date is on the archive page
845
        title = soup.find_all('meta', property='og:title')[-1]['content']
846
        desc = soup.find_all('meta', property='og:description')[-1]['content']
847
        imgs = soup.find('div', id='comic').find_all('img')
848
        assert all(i['title'] == i['alt'] == title for i in imgs)
849
        return {
850
            'title': title,
851
            'description': desc,
852
            'img': [i['src'] for i in imgs],
853
        }
854
855
856
class ThreeWordPhrase(GenericNavigableComic):
857
    """Class to retrieve Three Word Phrase comics."""
858
    # Also on http://www.threewordphrase.tumblr.com
859
    name = 'threeword'
860
    long_name = 'Three Word Phrase'
861
    url = 'http://threewordphrase.com'
862
    get_url_from_link = join_cls_url_to_href
863
864
    @classmethod
865
    def get_first_comic_link(cls):
866
        """Get link to first comics."""
867
        return get_soup_at_url(cls.url).find('img', src='/firstlink.gif').parent
868
869
    @classmethod
870
    def get_navi_link(cls, last_soup, next_):
871
        link = last_soup.find('img', src='/nextlink.gif' if next_ else '/prevlink.gif').parent
872
        return None if link.get('href') is None else link
873
874
    @classmethod
875
    def get_comic_info(cls, soup, link):
876
        """Get information about a particular comics."""
877
        title = soup.find('title')
878
        imgs = [img for img in soup.find_all('img')
879
                if not img['src'].endswith(
880
                    ('link.gif', '32.png', 'twpbookad.jpg',
881
                     'merchad.jpg', 'header.gif', 'tipjar.jpg'))]
882 View Code Duplication
        return {
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
883
            'title': title.string if title else None,
884
            'title2': '  '.join(img.get('alt') for img in imgs if img.get('alt')),
885
            'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs],
886
        }
887
888
889
class DeadlyPanel(GenericEmptyComic, GenericNavigableComic):
890
    """Class to retrieve Deadly Panel comics."""
891
    # Also on https://tapastic.com/series/deadlypanel
892
    name = 'deadly'
893
    long_name = 'Deadly Panel'
894
    url = 'http://www.deadlypanel.com'
895
    get_first_comic_link = get_a_navi_navifirst
896
    get_navi_link = get_a_navi_comicnavnext_navinext
897
898
    @classmethod
899
    def get_comic_info(cls, soup, link):
900
        """Get information about a particular comics."""
901
        imgs = soup.find('div', id='comic').find_all('img')
902
        assert all(i['alt'] == i['title'] for i in imgs)
903
        return {
904
            'img': [i['src'] for i in imgs],
905
        }
906
907
908
class TheGentlemanArmchair(GenericNavigableComic):
909
    """Class to retrieve The Gentleman Armchair comics."""
910
    name = 'gentlemanarmchair'
911
    long_name = 'The Gentleman Armchair'
912
    url = 'http://thegentlemansarmchair.com'
913
    get_first_comic_link = get_a_navi_navifirst
914
    get_navi_link = get_link_rel_next
915
916
    @classmethod
917
    def get_comic_info(cls, soup, link):
918
        """Get information about a particular comics."""
919
        title = soup.find('h2', class_='post-title').string
920
        author = soup.find("span", class_="post-author").find("a").string
921
        date_str = soup.find('span', class_='post-date').string
922
        day = string_to_date(date_str, "%B %d, %Y")
923
        imgs = soup.find('div', id='comic').find_all('img')
924
        return {
925
            'img': [i['src'] for i in imgs],
926
            'title': title,
927
            'author': author,
928
            'month': day.month,
929
            'year': day.year,
930
            'day': day.day,
931
        }
932
933
934
class MyExtraLife(GenericNavigableComic):
935
    """Class to retrieve My Extra Life comics."""
936
    name = 'extralife'
937
    long_name = 'My Extra Life'
938
    url = 'http://www.myextralife.com'
939
    get_navi_link = get_link_rel_next
940
941
    @classmethod
942
    def get_first_comic_link(cls):
943
        """Get link to first comics."""
944
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
945
946
    @classmethod
947
    def get_comic_info(cls, soup, link):
948
        """Get information about a particular comics."""
949
        title = soup.find("h1", class_="comic_title").string
950
        date_str = soup.find("span", class_="comic_date").string
951
        day = string_to_date(date_str, "%B %d, %Y")
952
        imgs = soup.find_all("img", class_="comic")
953
        assert all(i['alt'] == i['title'] == title for i in imgs)
954
        return {
955
            'title': title,
956
            'img': [i['src'] for i in imgs if i["src"]],
957
            'day': day.day,
958
            'month': day.month,
959
            'year': day.year
960
        }
961
962
963
class SaturdayMorningBreakfastCereal(GenericNavigableComic):
964
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
965
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
966
    # Also on http://smbc-comics.tumblr.com
967
    name = 'smbc'
968
    long_name = 'Saturday Morning Breakfast Cereal'
969
    url = 'http://www.smbc-comics.com'
970
    get_navi_link = get_a_rel_next
971
972
    @classmethod
973
    def get_first_comic_link(cls):
974
        """Get link to first comics."""
975
        return get_soup_at_url(cls.url).find('a', rel='start')
976
977
    @classmethod
978
    def get_comic_info(cls, soup, link):
979
        """Get information about a particular comics."""
980
        image1 = soup.find('img', id='cc-comic')
981
        image_url1 = image1['src']
982
        aftercomic = soup.find('div', id='aftercomic')
983
        image_url2 = aftercomic.find('img')['src'] if aftercomic else ''
984
        imgs = [image_url1] + ([image_url2] if image_url2 else [])
985
        date_str = soup.find('div', class_='cc-publishtime').contents[0]
986
        day = string_to_date(date_str, "%B %d, %Y")
987
        return {
988
            'title': image1['title'],
989
            'img': [urljoin_wrapper(cls.url, i) for i in imgs],
990
            'day': day.day,
991
            'month': day.month,
992
            'year': day.year
993
        }
994
995
996
class PerryBibleFellowship(GenericListableComic):
997
    """Class to retrieve Perry Bible Fellowship comics."""
998
    name = 'pbf'
999
    long_name = 'Perry Bible Fellowship'
1000
    url = 'http://pbfcomics.com'
1001
    get_url_from_archive_element = join_cls_url_to_href
1002
1003
    @classmethod
1004
    def get_archive_elements(cls):
1005
        comic_link_re = re.compile('^/[0-9]*/$')
1006
        return reversed(get_soup_at_url(cls.url).find_all('a', href=comic_link_re))
1007
1008
    @classmethod
1009
    def get_comic_info(cls, soup, link):
1010
        """Get information about a particular comics."""
1011 View Code Duplication
        url = cls.get_url_from_archive_element(link)
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1012
        comic_img_re = re.compile('^/archive_b/PBF.*')
1013
        name = link.string
1014
        num = int(link['name'])
1015
        href = link['href']
1016
        assert href == '/%d/' % num
1017
        imgs = soup.find_all('img', src=comic_img_re)
1018
        assert len(imgs) == 1
1019
        assert imgs[0]['alt'] == name
1020
        return {
1021
            'num': num,
1022
            'name': name,
1023
            'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1024
            'prefix': '%d-' % num,
1025
        }
1026
1027
1028
class Mercworks(GenericNavigableComic):
1029
    """Class to retrieve Mercworks comics."""
1030
    # Also on http://mercworks.tumblr.com
1031
    name = 'mercworks'
1032
    long_name = 'Mercworks'
1033
    url = 'http://mercworks.net'
1034
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1035
    get_navi_link = get_a_rel_next
1036
1037
    @classmethod
1038
    def get_comic_info(cls, soup, link):
1039
        """Get information about a particular comics."""
1040
        title = soup.find('meta', property='og:title')['content']
1041
        metadesc = soup.find('meta', property='og:description')
1042
        desc = metadesc['content'] if metadesc else ""
1043
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1044
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1045
        date_str = date_str[:10]
1046
        day = string_to_date(date_str, "%Y-%m-%d")
1047
        imgs = soup.find_all('meta', property='og:image')
1048
        return {
1049
            'img': [i['content'] for i in imgs],
1050
            'title': title,
1051
            'author': author,
1052
            'desc': desc,
1053
            'day': day.day,
1054
            'month': day.month,
1055
            'year': day.year
1056
        }
1057
1058
1059
class BerkeleyMews(GenericListableComic):
1060
    """Class to retrieve Berkeley Mews comics."""
1061
    # Also on http://mews.tumblr.com
1062
    # Also on http://www.gocomics.com/berkeley-mews
1063
    name = 'berkeley'
1064
    long_name = 'Berkeley Mews'
1065
    url = 'http://www.berkeleymews.com'
1066
    get_url_from_archive_element = get_href
1067
    comic_num_re = re.compile('%s/\\?p=([0-9]*)$' % url)
1068
1069
    @classmethod
1070
    def get_archive_elements(cls):
1071
        archive_url = urljoin_wrapper(cls.url, "?page_id=2")
1072
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_num_re))
1073
1074
    @classmethod
1075
    def get_comic_info(cls, soup, link):
1076
        """Get information about a particular comics."""
1077
        comic_date_re = re.compile('.*/([0-9]*)-([0-9]*)-([0-9]*)-.*')
1078
        url = cls.get_url_from_archive_element(link)
1079
        num = int(cls.comic_num_re.match(url).groups()[0])
1080
        img = soup.find('div', id='comic').find('img')
1081
        assert all(i['alt'] == i['title'] for i in [img])
1082
        title2 = img['title']
1083
        img_url = img['src']
1084
        year, month, day = [int(s) for s in comic_date_re.match(img_url).groups()]
1085
        return {
1086
            'num': num,
1087
            'title': link.string,
1088
            'title2': title2,
1089
            'img': [img_url],
1090
            'year': year,
1091
            'month': month,
1092
            'day': day,
1093
        }
1094
1095
1096
class GenericBouletCorp(GenericNavigableComic):
1097
    """Generic class to retrieve BouletCorp comics in different languages."""
1098
    # Also on http://bouletcorp.tumblr.com
1099
    get_navi_link = get_link_rel_next
1100
1101
    @classmethod
1102
    def get_first_comic_link(cls):
1103
        """Get link to first comics."""
1104
        return get_soup_at_url(cls.url).find('div', id='centered_nav').find_all('a')[0]
1105
1106
    @classmethod
1107
    def get_comic_info(cls, soup, link):
1108
        """Get information about a particular comics."""
1109
        url = cls.get_url_from_link(link)
1110
        date_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/' % cls.url)
1111
        year, month, day = [int(s) for s in date_re.match(url).groups()]
1112
        imgs = soup.find('div', id='notes').find('div', class_='storycontent').find_all('img')
1113
        texts = '  '.join(t for t in (i.get('title') for i in imgs) if t)
1114
        title = soup.find('title').string
1115
        return {
1116
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs if i.get('src') is not None],
1117
            'title': title,
1118
            'texts': texts,
1119
            'year': year,
1120
            'month': month,
1121
            'day': day,
1122
        }
1123
1124
1125
class BouletCorp(GenericBouletCorp):
1126
    """Class to retrieve BouletCorp comics."""
1127
    name = 'boulet'
1128
    long_name = 'Boulet Corp'
1129
    url = 'http://www.bouletcorp.com'
1130
1131
1132
class BouletCorpEn(GenericBouletCorp):
1133
    """Class to retrieve EnglishBouletCorp comics."""
1134
    name = 'boulet_en'
1135
    long_name = 'Boulet Corp English'
1136
    url = 'http://english.bouletcorp.com'
1137
1138
1139
class AmazingSuperPowers(GenericNavigableComic):
1140
    """Class to retrieve Amazing Super Powers comics."""
1141
    name = 'asp'
1142
    long_name = 'Amazing Super Powers'
1143
    url = 'http://www.amazingsuperpowers.com'
1144
    get_first_comic_link = get_a_navi_navifirst
1145
    get_navi_link = get_a_navi_navinext
1146
1147
    @classmethod
1148
    def get_comic_info(cls, soup, link):
1149
        """Get information about a particular comics."""
1150
        author = soup.find("span", class_="post-author").find("a").string
1151
        date_str = soup.find('span', class_='post-date').string
1152
        day = string_to_date(date_str, "%B %d, %Y")
1153
        imgs = soup.find('div', id='comic').find_all('img')
1154
        title = ' '.join(i['title'] for i in imgs)
1155
        assert all(i['alt'] == i['title'] for i in imgs)
1156
        return {
1157
            'title': title,
1158
            'author': author,
1159
            'img': [img['src'] for img in imgs],
1160
            'day': day.day,
1161
            'month': day.month,
1162
            'year': day.year
1163
        }
1164
1165
1166
class ToonHole(GenericListableComic):
1167
    """Class to retrieve Toon Holes comics."""
1168
    # Also on http://tapastic.com/series/TOONHOLE
1169
    name = 'toonhole'
1170
    long_name = 'Toon Hole'
1171
    url = 'http://www.toonhole.com'
1172
    get_url_from_archive_element = get_href
1173
1174
    @classmethod
1175
    def get_comic_info(cls, soup, link):
1176
        """Get information about a particular comics."""
1177
        title = link.string
1178
        date_str = remove_st_nd_rd_th_from_date(soup.find('div', class_='comicdate').string.strip())
1179
        day = string_to_date(date_str, "%B %d, %Y")
1180
        imgs = soup.find('div', id='comic').find_all('img')
1181
        assert all(i['alt'] == i['title'] == title for i in imgs)
1182
        return {
1183
            'title': title,
1184
            'month': day.month,
1185
            'year': day.year,
1186
            'day': day.day,
1187
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs],
1188
        }
1189
1190
    @classmethod
1191
    def get_archive_elements(cls):
1192
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1193
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1194
1195
1196
class Channelate(GenericNavigableComic):
1197
    """Class to retrieve Channelate comics."""
1198
    name = 'channelate'
1199
    long_name = 'Channelate'
1200
    url = 'http://www.channelate.com'
1201
    get_first_comic_link = get_div_navfirst_a
1202
    get_navi_link = get_link_rel_next
1203
1204
    @classmethod
1205
    def get_comic_info(cls, soup, link):
1206
        """Get information about a particular comics."""
1207
        author = soup.find("span", class_="post-author").find("a").string
1208
        date_str = soup.find('span', class_='post-date').string
1209
        day = string_to_date(date_str, '%Y/%m/%d')
1210
        title = soup.find('meta', property='og:title')['content']
1211
        post = soup.find('div', id='comic')
1212
        imgs = post.find_all('img') if post else []
1213
        assert all(i['alt'] == i['title'] for i in imgs)
1214
        extra_url = None
1215
        extra_div = soup.find('div', id='extrapanelbutton')
1216
        if extra_div:
1217
            extra_url = extra_div.find('a')['href']
1218
            extra_soup = get_soup_at_url(extra_url)
1219
            extra_imgs = extra_soup.find_all('img', class_='extrapanelimage')
1220
            imgs.extend(extra_imgs)
1221
        return {
1222
            'url_extra': extra_url,
1223
            'title': title,
1224
            'author': author,
1225
            'month': day.month,
1226
            'year': day.year,
1227
            'day': day.day,
1228
            'img': [i['src'] for i in imgs],
1229
        }
1230
1231
1232
class CyanideAndHappiness(GenericNavigableComic):
1233
    """Class to retrieve Cyanide And Happiness comics."""
1234
    name = 'cyanide'
1235
    long_name = 'Cyanide and Happiness'
1236
    url = 'http://explosm.net'
1237
    get_url_from_link = join_cls_url_to_href
1238
1239
    @classmethod
1240
    def get_first_comic_link(cls):
1241
        """Get link to first comics."""
1242
        return get_soup_at_url(cls.url).find('a', title='Oldest comic')
1243
1244
    @classmethod
1245
    def get_navi_link(cls, last_soup, next_):
1246
        link = last_soup.find('a', class_='next-comic' if next_ else 'previous-comic ')
1247
        return None if link.get('href') is None else link
1248
1249
    @classmethod
1250
    def get_comic_info(cls, soup, link):
1251
        """Get information about a particular comics."""
1252
        url2 = soup.find('meta', property='og:url')['content']
1253
        num = int(url2.split('/')[-2])
1254
        date_str = soup.find('h3').find('a').string
1255
        day = string_to_date(date_str, '%Y.%m.%d')
1256
        author = soup.find('small', class_="author-credit-name").string
1257
        assert author.startswith('by ')
1258
        author = author[3:]
1259
        imgs = soup.find_all('img', id='main-comic')
1260
        return {
1261
            'num': num,
1262
            'author': author,
1263
            'month': day.month,
1264
            'year': day.year,
1265
            'day': day.day,
1266
            'prefix': '%d-' % num,
1267
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i['src'])) for i in imgs]
1268
        }
1269
1270
1271
class MrLovenstein(GenericComic):
1272
    """Class to retrieve Mr Lovenstein comics."""
1273
    # Also on https://tapastic.com/series/MrLovenstein
1274
    name = 'mrlovenstein'
1275
    long_name = 'Mr. Lovenstein'
1276
    url = 'http://www.mrlovenstein.com'
1277
1278
    @classmethod
1279
    def get_next_comic(cls, last_comic):
1280
        # TODO: more info from http://www.mrlovenstein.com/archive
1281
        comic_num_re = re.compile('^/comic/([0-9]*)$')
1282
        nums = [int(comic_num_re.match(link['href']).groups()[0])
1283
                for link in get_soup_at_url(cls.url).find_all('a', href=comic_num_re)]
1284
        first, last = min(nums), max(nums)
1285
        if last_comic:
1286
            first = last_comic['num'] + 1
1287
        for num in range(first, last + 1):
1288
            url = urljoin_wrapper(cls.url, '/comic/%d' % num)
1289
            soup = get_soup_at_url(url)
1290
            imgs = list(
1291
                reversed(soup.find_all('img', src=re.compile('^/images/comics/'))))
1292
            description = soup.find('meta', attrs={'name': 'description'})['content']
1293
            yield {
1294
                'url': url,
1295
                'num': num,
1296
                'texts': '  '.join(t for t in (i.get('title') for i in imgs) if t),
1297
                'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1298
                'description': description,
1299
            }
1300
1301
1302
class DinosaurComics(GenericListableComic):
1303
    """Class to retrieve Dinosaur Comics comics."""
1304
    name = 'dinosaur'
1305
    long_name = 'Dinosaur Comics'
1306
    url = 'http://www.qwantz.com'
1307
    get_url_from_archive_element = get_href
1308
    comic_link_re = re.compile('^%s/index.php\\?comic=([0-9]*)$' % url)
1309 View Code Duplication
1310
    @classmethod
1311
    def get_archive_elements(cls):
1312
        archive_url = urljoin_wrapper(cls.url, 'archive.php')
1313
        # first link is random -> skip it
1314
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re)[1:])
1315
1316
    @classmethod
1317
    def get_comic_info(cls, soup, link):
1318
        """Get information about a particular comics."""
1319
        url = cls.get_url_from_archive_element(link)
1320
        num = int(cls.comic_link_re.match(url).groups()[0])
1321
        date_str = link.string
1322
        text = link.next_sibling.string
1323
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1324
        comic_img_re = re.compile('^%s/comics/' % cls.url)
1325
        img = soup.find('img', src=comic_img_re)
1326
        return {
1327
            'month': day.month,
1328
            'year': day.year,
1329
            'day': day.day,
1330
            'img': [img.get('src')],
1331
            'title': img.get('title'),
1332
            'text': text,
1333
            'num': num,
1334
        }
1335
1336
1337
class ButterSafe(GenericListableComic):
1338
    """Class to retrieve Butter Safe comics."""
1339
    name = 'butter'
1340
    long_name = 'ButterSafe'
1341
    url = 'http://buttersafe.com'
1342
    get_url_from_archive_element = get_href
1343
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)
1344
1345
    @classmethod
1346
    def get_archive_elements(cls):
1347
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1348
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
1349
1350
    @classmethod
1351
    def get_comic_info(cls, soup, link):
1352
        """Get information about a particular comics."""
1353
        url = cls.get_url_from_archive_element(link)
1354
        title = link.string
1355
        year, month, day = [int(s) for s in cls.comic_link_re.match(url).groups()]
1356
        img = soup.find('div', id='comic').find('img')
1357
        assert img['alt'] == title
1358
        return {
1359
            'title': title,
1360
            'day': day,
1361
            'month': month,
1362
            'year': year,
1363
            'img': [img['src']],
1364
        }
1365
1366
1367
class CalvinAndHobbes(GenericComic):
1368
    """Class to retrieve Calvin and Hobbes comics."""
1369
    # Also on http://www.gocomics.com/calvinandhobbes/
1370
    name = 'calvin'
1371
    long_name = 'Calvin and Hobbes'
1372
    # This is not through any official webpage but eh...
1373
    url = 'http://marcel-oehler.marcellosendos.ch/comics/ch/'
1374 View Code Duplication
1375
    @classmethod
1376
    def get_next_comic(cls, last_comic):
1377
        last_date = get_date_for_comic(
1378
            last_comic) if last_comic else date(1985, 11, 1)
1379
        link_re = re.compile('^([0-9]*)/([0-9]*)/')
1380
        img_re = re.compile('')
1381
        for link in get_soup_at_url(cls.url).find_all('a', href=link_re):
1382
            url = link['href']
1383
            year, month = link_re.match(url).groups()
1384
            if date(int(year), int(month), 1) + timedelta(days=31) >= last_date:
1385
                img_re = re.compile('^%s%s([0-9]*)' % (year, month))
1386
                month_url = urljoin_wrapper(cls.url, url)
1387
                for img in get_soup_at_url(month_url).find_all('img', src=img_re):
1388
                    img_src = img['src']
1389
                    day = int(img_re.match(img_src).groups()[0])
1390
                    comic_date = date(int(year), int(month), day)
1391
                    if comic_date > last_date:
1392
                        yield {
1393
                            'url': month_url,
1394
                            'year': int(year),
1395
                            'month': int(month),
1396
                            'day': int(day),
1397
                            'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)],
1398
                        }
1399
                        last_date = comic_date
1400
1401
1402
class AbstruseGoose(GenericListableComic):
1403
    """Class to retrieve AbstruseGoose Comics."""
1404
    name = 'abstruse'
1405
    long_name = 'Abstruse Goose'
1406
    url = 'http://abstrusegoose.com'
1407
    get_url_from_archive_element = get_href
1408
    comic_url_re = re.compile('^%s/([0-9]*)$' % url)
1409
    comic_img_re = re.compile('^%s/strips/.*' % url)
1410
1411
    @classmethod
1412
    def get_archive_elements(cls):
1413
        archive_url = urljoin_wrapper(cls.url, 'archive')
1414
        return get_soup_at_url(archive_url).find_all('a', href=cls.comic_url_re)
1415
1416
    @classmethod
1417
    def get_comic_info(cls, soup, archive_elt):
1418
        comic_url = cls.get_url_from_archive_element(archive_elt)
1419
        num = int(cls.comic_url_re.match(comic_url).groups()[0])
1420
        return {
1421
            'num': num,
1422
            'title': archive_elt.string,
1423
            'img': [soup.find('img', src=cls.comic_img_re)['src']]
1424
        }
1425
1426
1427
class PhDComics(GenericNavigableComic):
1428
    """Class to retrieve PHD Comics."""
1429
    name = 'phd'
1430
    long_name = 'PhD Comics'
1431
    url = 'http://phdcomics.com/comics/archive.php'
1432
    get_url_from_link = join_cls_url_to_href
1433
1434
    @classmethod
1435
    def get_first_comic_link(cls):
1436
        """Get link to first comics."""
1437 View Code Duplication
        return get_soup_at_url(cls.url).find('img', src='images/first_button.gif').parent
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1438
1439
    @classmethod
1440
    def get_navi_link(cls, last_soup, next_):
1441
        img = last_soup.find('img', src='images/next_button.gif' if next_ else 'images/prev_button.gif')
1442
        return None if img is None else img.parent
1443
1444
    @classmethod
1445
    def get_comic_info(cls, soup, link):
1446
        """Get information about a particular comics."""
1447
        date_str = soup.find('font', face='Arial,Helvetica,Geneva,Swiss,SunSans-Regular', color='white').string.strip()
1448
        try:
1449
            day = string_to_date(date_str, '%m/%d/%Y')
1450
        except ValueError:
1451
            print("Invalid date %s" % date_str)
1452
            day = date.today()
1453
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
1454
        return {
1455
            'year': day.year,
1456
            'month': day.month,
1457
            'day': day.day,
1458
            'img': [soup.find('img', id='comic')['src']],
1459
            'title': title,
1460
        }
1461
1462
1463
class Octopuns(GenericNavigableComic):
1464
    """Class to retrieve Octopuns comics."""
1465
    # Also on http://octopuns.tumblr.com
1466
    name = 'octopuns'
1467
    long_name = 'Octopuns'
1468
    url = 'http://www.octopuns.net'
1469
1470
    @classmethod
1471
    def get_first_comic_link(cls):
1472
        """Get link to first comics."""
1473
        return get_soup_at_url(cls.url).find('img', src=re.compile('.*/First.png')).parent
1474
1475
    @classmethod
1476
    def get_navi_link(cls, last_soup, next_):
1477
        link = last_soup.find('img', src=re.compile('.*/Next.png' if next_ else '.*/Back.png')).parent
1478
        return None if link.get('href') is None else link
1479
1480
    @classmethod
1481
    def get_comic_info(cls, soup, link):
1482
        """Get information about a particular comics."""
1483
        title = soup.find('h3', class_='post-title entry-title').string
1484
        date_str = soup.find('h2', class_='date-header').string
1485
        day = string_to_date(date_str, "%A, %B %d, %Y")
1486
        imgs = soup.find_all('link', rel='image_src')
1487
        return {
1488
            'img': [i['href'] for i in imgs],
1489
            'title': title,
1490
            'day': day.day,
1491
            'month': day.month,
1492
            'year': day.year,
1493
        }
1494
1495
1496
class Quarktees(GenericNavigableComic):
1497
    """Class to retrieve the Quarktees comics."""
1498
    name = 'quarktees'
1499
    long_name = 'Quarktees'
1500
    url = 'http://www.quarktees.com/blogs/news'
1501
    get_url_from_link = join_cls_url_to_href
1502
1503
    @classmethod
1504
    def get_first_comic_link(cls):
1505
        """Get link to first comics."""
1506
        return {'href': 'http://www.quarktees.com/blogs/news/12486621-coming-soon'}
1507
1508
    @classmethod
1509
    def get_navi_link(cls, last_soup, next_):
1510
        return last_soup.find('a', id='article-next' if next_ else 'article-prev')
1511
1512
    @classmethod
1513
    def get_comic_info(cls, soup, link):
1514
        """Get information about a particular comics."""
1515
        title = soup.find('meta', property='og:title')['content']
1516
        article = soup.find('div', class_='single-article')
1517
        imgs = article.find_all('img')
1518
        return {
1519
            'title': title,
1520
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1521
        }
1522
1523
1524
class OverCompensating(GenericNavigableComic):
1525
    """Class to retrieve the Over Compensating comics."""
1526
    name = 'compensating'
1527
    long_name = 'Over Compensating'
1528
    url = 'http://www.overcompensating.com'
1529
    get_url_from_link = join_cls_url_to_href
1530
1531
    @classmethod
1532
    def get_first_comic_link(cls):
1533
        """Get link to first comics."""
1534
        return get_soup_at_url(cls.url).find('a', href=re.compile('comic=1$'))
1535
1536
    @classmethod
1537
    def get_navi_link(cls, last_soup, next_):
1538
        return last_soup.find('a', title='next comic' if next_ else 'go back already')
1539
1540
    @classmethod
1541
    def get_comic_info(cls, soup, link):
1542
        """Get information about a particular comics."""
1543
        img_src_re = re.compile('^/oc/comics/.*')
1544
        comic_num_re = re.compile('.*comic=([0-9]*)$')
1545
        comic_url = cls.get_url_from_link(link)
1546
        num = int(comic_num_re.match(comic_url).groups()[0])
1547
        img = soup.find('img', src=img_src_re)
1548
        return {
1549
            'num': num,
1550
            'img': [urljoin_wrapper(comic_url, img['src'])],
1551
            'title': img.get('title')
1552
        }
1553
1554
1555
class Oglaf(GenericNavigableComic):
1556
    """Class to retrieve Oglaf comics."""
1557
    name = 'oglaf'
1558
    long_name = 'Oglaf [NSFW]'
1559
    url = 'http://oglaf.com'
1560
    get_url_from_link = join_cls_url_to_href
1561
1562
    @classmethod
1563
    def get_first_comic_link(cls):
1564
        """Get link to first comics."""
1565
        return get_soup_at_url(cls.url).find("div", id="st").parent
1566
1567
    @classmethod
1568
    def get_navi_link(cls, last_soup, next_):
1569
        div = last_soup.find("div", id="nx" if next_ else "pvs")
1570
        return div.parent if div else None
1571
1572
    @classmethod
1573
    def get_comic_info(cls, soup, link):
1574
        """Get information about a particular comics."""
1575
        title = soup.find('title').string
1576
        title_imgs = soup.find('div', id='tt').find_all('img')
1577
        assert len(title_imgs) == 1
1578
        strip_imgs = soup.find_all('img', id='strip')
1579
        assert len(strip_imgs) == 1
1580
        imgs = title_imgs + strip_imgs
1581
        desc = ' '.join(i['title'] for i in imgs)
1582
        return {
1583
            'title': title,
1584
            'img': [i['src'] for i in imgs],
1585
            'description': desc,
1586
        }
1587
1588
1589
class ScandinaviaAndTheWorld(GenericNavigableComic):
1590
    """Class to retrieve Scandinavia And The World comics."""
1591
    name = 'satw'
1592
    long_name = 'Scandinavia And The World'
1593
    url = 'http://satwcomic.com'
1594
1595
    @classmethod
1596
    def get_first_comic_link(cls):
1597
        """Get link to first comics."""
1598
        return {'href': 'http://satwcomic.com/sweden-denmark-and-norway'}
1599
1600
    @classmethod
1601
    def get_navi_link(cls, last_soup, next_):
1602
        return last_soup.find('a', accesskey='n' if next_ else 'p')
1603
1604
    @classmethod
1605
    def get_comic_info(cls, soup, link):
1606
        """Get information about a particular comics."""
1607
        title = soup.find('meta', attrs={'name': 'twitter:label1'})['content']
1608
        desc = soup.find('meta', property='og:description')['content']
1609
        imgs = soup.find_all('img', itemprop="image")
1610
        return {
1611
            'title': title,
1612
            'description': desc,
1613
            'img': [i['src'] for i in imgs],
1614
        }
1615
1616
1617
class SomethingOfThatIlk(GenericEmptyComic):  # Does not exist anymore
1618
    """Class to retrieve the Something Of That Ilk comics."""
1619
    name = 'somethingofthatilk'
1620
    long_name = 'Something Of That Ilk'
1621
    url = 'http://www.somethingofthatilk.com'
1622
1623
1624
class InfiniteMonkeyBusiness(GenericNavigableComic):
1625
    """Generic class to retrieve InfiniteMonkeyBusiness comics."""
1626
    name = 'monkey'
1627
    long_name = 'Infinite Monkey Business'
1628
    url = 'http://infinitemonkeybusiness.net'
1629
    get_navi_link = get_a_navi_comicnavnext_navinext
1630
1631
    @classmethod
1632
    def get_first_comic_link(cls):
1633
        """Get link to first comics."""
1634
        return {'href': 'http://infinitemonkeybusiness.net/comic/pillory/'}
1635
1636
    @classmethod
1637
    def get_comic_info(cls, soup, link):
1638
        """Get information about a particular comics."""
1639
        title = soup.find('meta', property='og:title')['content']
1640
        imgs = soup.find('div', id='comic').find_all('img')
1641
        return {
1642
            'title': title,
1643
            'img': [i['src'] for i in imgs],
1644
        }
1645
1646
1647
class Wondermark(GenericListableComic):
1648
    """Class to retrieve the Wondermark comics."""
1649
    name = 'wondermark'
1650
    long_name = 'Wondermark'
1651
    url = 'http://wondermark.com'
1652
    get_url_from_archive_element = get_href
1653
1654
    @classmethod
1655
    def get_archive_elements(cls):
1656
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1657
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1658
1659
    @classmethod
1660 View Code Duplication
    def get_comic_info(cls, soup, link):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1661
        """Get information about a particular comics."""
1662
        date_str = soup.find('div', class_='postdate').find('em').string
1663
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1664
        div = soup.find('div', id='comic')
1665
        if div:
1666
            img = div.find('img')
1667
            img_src = [img['src']]
1668
            alt = img['alt']
1669
            assert alt == img['title']
1670
            title = soup.find('meta', property='og:title')['content']
1671
        else:
1672
            img_src = []
1673
            alt = ''
1674
            title = ''
1675
        return {
1676
            'month': day.month,
1677
            'year': day.year,
1678
            'day': day.day,
1679
            'img': img_src,
1680
            'title': title,
1681
            'alt': alt,
1682
            'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')),
1683
        }
1684
1685
1686
class WarehouseComic(GenericNavigableComic):
1687
    """Class to retrieve Warehouse Comic comics."""
1688
    name = 'warehouse'
1689
    long_name = 'Warehouse Comic'
1690
    url = 'http://warehousecomic.com'
1691
    get_first_comic_link = get_a_navi_navifirst
1692
    get_navi_link = get_link_rel_next
1693
1694
    @classmethod
1695
    def get_comic_info(cls, soup, link):
1696
        """Get information about a particular comics."""
1697
        title = soup.find('h2', class_='post-title').string
1698
        date_str = soup.find('span', class_='post-date').string
1699
        day = string_to_date(date_str, "%B %d, %Y")
1700
        imgs = soup.find('div', id='comic').find_all('img')
1701
        return {
1702
            'img': [i['src'] for i in imgs],
1703
            'title': title,
1704
            'day': day.day,
1705
            'month': day.month,
1706
            'year': day.year,
1707
        }
1708
1709
1710
class JustSayEh(GenericNavigableComic):
1711
    """Class to retrieve Just Say Eh comics."""
1712
    # Also on http//tapastic.com/series/Just-Say-Eh
1713
    name = 'justsayeh'
1714
    long_name = 'Just Say Eh'
1715
    url = 'http://www.justsayeh.com'
1716
    get_first_comic_link = get_a_navi_navifirst
1717
    get_navi_link = get_a_navi_comicnavnext_navinext
1718
1719
    @classmethod
1720
    def get_comic_info(cls, soup, link):
1721
        """Get information about a particular comics."""
1722
        title = soup.find('h2', class_='post-title').string
1723
        imgs = soup.find("div", id="comic").find_all("img")
1724
        assert all(i['alt'] == i['title'] for i in imgs)
1725
        alt = imgs[0]['alt']
1726
        return {
1727
            'img': [i['src'] for i in imgs],
1728
            'title': title,
1729
            'alt': alt,
1730
        }
1731
1732
1733
class MouseBearComedy(GenericNavigableComic):
1734
    """Class to retrieve Mouse Bear Comedy comics."""
1735
    # Also on http://mousebearcomedy.tumblr.com
1736
    name = 'mousebear'
1737
    long_name = 'Mouse Bear Comedy'
1738
    url = 'http://www.mousebearcomedy.com'
1739
    get_first_comic_link = get_a_navi_navifirst
1740
    get_navi_link = get_a_navi_comicnavnext_navinext
1741
1742
    @classmethod
1743
    def get_comic_info(cls, soup, link):
1744
        """Get information about a particular comics."""
1745
        title = soup.find('h2', class_='post-title').string
1746
        author = soup.find("span", class_="post-author").find("a").string
1747
        date_str = soup.find("span", class_="post-date").string
1748
        day = string_to_date(date_str, '%B %d, %Y')
1749
        imgs = soup.find("div", id="comic").find_all("img")
1750
        assert all(i['alt'] == i['title'] == title for i in imgs)
1751
        return {
1752
            'day': day.day,
1753
            'month': day.month,
1754
            'year': day.year,
1755
            'img': [i['src'] for i in imgs],
1756
            'title': title,
1757
            'author': author,
1758
        }
1759
1760
1761
class BigFootJustice(GenericNavigableComic):
1762
    """Class to retrieve Big Foot Justice comics."""
1763
    # Also on http://tapastic.com/series/bigfoot-justice
1764
    name = 'bigfoot'
1765
    long_name = 'Big Foot Justice'
1766
    url = 'http://bigfootjustice.com'
1767
    get_first_comic_link = get_a_navi_navifirst
1768
    get_navi_link = get_a_navi_comicnavnext_navinext
1769 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1770
    @classmethod
1771
    def get_comic_info(cls, soup, link):
1772
        """Get information about a particular comics."""
1773
        imgs = soup.find('div', id='comic').find_all('img')
1774
        assert all(i['title'] == i['alt'] for i in imgs)
1775
        title = ' '.join(i['title'] for i in imgs)
1776
        return {
1777
            'img': [i['src'] for i in imgs],
1778
            'title': title,
1779
        }
1780
1781
1782
class RespawnComic(GenericNavigableComic):
1783
    """Class to retrieve Respawn Comic."""
1784
    # Also on http://respawncomic.tumblr.com
1785
    name = 'respawn'
1786
    long_name = 'Respawn Comic'
1787
    url = 'http://respawncomic.com '
1788
    get_navi_link = get_a_navi_comicnavnext_navinext
1789
1790
    @classmethod
1791
    def get_first_comic_link(cls):
1792
        """Get link to first comics."""
1793
        return {'href': 'http://respawncomic.com/comic/c0001/'}
1794
1795
    @classmethod
1796
    def get_comic_info(cls, soup, link):
1797
        """Get information about a particular comics."""
1798
        title = soup.find('meta', property='og:title')['content']
1799
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1800
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1801
        date_str = date_str[:10]
1802
        day = string_to_date(date_str, "%Y-%m-%d")
1803
        imgs = soup.find_all('meta', property='og:image')
1804
        skip_imgs = {
1805
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1806
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1807
        }
1808
        return {
1809
            'title': title,
1810
            'author': author,
1811
            'day': day.day,
1812
            'month': day.month,
1813
            'year': day.year,
1814
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1815
        }
1816
1817
1818
class SafelyEndangered(GenericNavigableComic):
1819
    """Class to retrieve Safely Endangered comics."""
1820
    # Also on http://tumblr.safelyendangered.com
1821
    name = 'endangered'
1822
    long_name = 'Safely Endangered'
1823
    url = 'http://www.safelyendangered.com'
1824
    get_navi_link = get_link_rel_next
1825
1826
    @classmethod
1827
    def get_first_comic_link(cls):
1828
        """Get link to first comics."""
1829
        return {'href': 'http://www.safelyendangered.com/comic/ignored/'}
1830
1831
    @classmethod
1832
    def get_comic_info(cls, soup, link):
1833
        """Get information about a particular comics."""
1834
        title = soup.find('h2', class_='post-title').string
1835
        date_str = soup.find('span', class_='post-date').string
1836
        day = string_to_date(date_str, '%B %d, %Y')
1837
        imgs = soup.find('div', id='comic').find_all('img')
1838
        alt = imgs[0]['alt']
1839
        assert all(i['alt'] == i['title'] for i in imgs)
1840
        return {
1841
            'day': day.day,
1842
            'month': day.month,
1843
            'year': day.year,
1844
            'img': [i['src'] for i in imgs],
1845
            'title': title,
1846
            'alt': alt,
1847
        }
1848
1849
1850
class PicturesInBoxes(GenericNavigableComic):
1851
    """Class to retrieve Pictures In Boxes comics."""
1852
    # Also on http://picturesinboxescomic.tumblr.com
1853
    name = 'picturesinboxes'
1854
    long_name = 'Pictures in Boxes'
1855
    url = 'http://www.picturesinboxes.com'
1856
    get_navi_link = get_a_navi_navinext
1857
1858
    @classmethod
1859
    def get_first_comic_link(cls):
1860
        """Get link to first comics."""
1861
        return {'href': 'http://www.picturesinboxes.com/2013/10/26/tetris/'}
1862
1863
    @classmethod
1864
    def get_comic_info(cls, soup, link):
1865
        """Get information about a particular comics."""
1866
        title = soup.find('h2', class_='post-title').string
1867
        author = soup.find("span", class_="post-author").find("a").string
1868
        date_str = soup.find('span', class_='post-date').string
1869
        day = string_to_date(date_str, '%B %d, %Y')
1870
        imgs = soup.find('div', class_='comicpane').find_all('img')
1871
        assert imgs
1872
        assert all(i['title'] == i['alt'] == title for i in imgs)
1873
        return {
1874
            'day': day.day,
1875
            'month': day.month,
1876
            'year': day.year,
1877
            'img': [i['src'] for i in imgs],
1878
            'title': title,
1879
            'author': author,
1880
        }
1881
1882
1883
class Penmen(GenericEmptyComic):
1884
    """Class to retrieve Penmen comics."""
1885
    name = 'penmen'
1886
    long_name = 'Penmen'
1887
    url = 'http://penmen.com'
1888
1889
1890
class TheDoghouseDiaries(GenericNavigableComic):
1891
    """Class to retrieve The Dog House Diaries comics."""
1892
    name = 'doghouse'
1893
    long_name = 'The Dog House Diaries'
1894
    url = 'http://thedoghousediaries.com'
1895
1896
    @classmethod
1897
    def get_first_comic_link(cls):
1898
        """Get link to first comics."""
1899
        return get_soup_at_url(cls.url).find('a', id='firstlink')
1900
1901
    @classmethod
1902
    def get_navi_link(cls, last_soup, next_):
1903
        return last_soup.find('a', id='nextlink' if next_ else 'previouslink')
1904
1905
    @classmethod
1906
    def get_comic_info(cls, soup, link):
1907
        """Get information about a particular comics."""
1908
        comic_img_re = re.compile('^dhdcomics/.*')
1909
        img = soup.find('img', src=comic_img_re)
1910
        comic_url = cls.get_url_from_link(link)
1911
        return {
1912
            'title': soup.find('h2', id='titleheader').string,
1913
            'title2': soup.find('div', id='subtext').string,
1914
            'alt': img.get('title'),
1915
            'img': [urljoin_wrapper(comic_url, img['src'].strip())],
1916
            'num': int(comic_url.split('/')[-1]),
1917
        }
1918
1919
1920
class InvisibleBread(GenericListableComic):
1921
    """Class to retrieve Invisible Bread comics."""
1922
    # Also on http://www.gocomics.com/invisible-bread
1923
    name = 'invisiblebread'
1924
    long_name = 'Invisible Bread'
1925
    url = 'http://invisiblebread.com'
1926
1927
    @classmethod
1928
    def get_archive_elements(cls):
1929
        archive_url = urljoin_wrapper(cls.url, 'archives/')
1930
        return reversed(get_soup_at_url(archive_url).find_all('td', class_='archive-title'))
1931
1932
    @classmethod
1933
    def get_url_from_archive_element(cls, td):
1934
        return td.find('a')['href']
1935
1936
    @classmethod
1937
    def get_comic_info(cls, soup, td):
1938
        """Get information about a particular comics."""
1939
        url = cls.get_url_from_archive_element(td)
1940
        title = td.find('a').string
1941
        month_and_day = td.previous_sibling.string
1942
        link_re = re.compile('^%s/([0-9]+)/' % cls.url)
1943
        year = link_re.match(url).groups()[0]
1944
        date_str = month_and_day + ' ' + year
1945
        day = string_to_date(date_str, '%b %d %Y')
1946
        imgs = [soup.find('div', id='comic').find('img')]
1947
        assert len(imgs) == 1
1948
        assert all(i['title'] == i['alt'] == title for i in imgs)
1949
        return {
1950
            'month': day.month,
1951
            'year': day.year,
1952
            'day': day.day,
1953
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1954
            'title': title,
1955
        }
1956
1957
1958
class DiscoBleach(GenericEmptyComic):  # Does not work anymore
1959
    """Class to retrieve Disco Bleach Comics."""
1960
    name = 'discobleach'
1961
    long_name = 'Disco Bleach'
1962
    url = 'http://discobleach.com'
1963
1964
1965
class TubeyToons(GenericEmptyComic):  # Does not work anymore
1966
    """Class to retrieve TubeyToons comics."""
1967
    # Also on http://tapastic.com/series/Tubey-Toons
1968
    # Also on http://tubeytoons.tumblr.com
1969
    name = 'tubeytoons'
1970
    long_name = 'Tubey Toons'
1971
    url = 'http://tubeytoons.com'
1972
1973
1974
class CompletelySeriousComics(GenericNavigableComic):
1975
    """Class to retrieve Completely Serious comics."""
1976
    name = 'completelyserious'
1977
    long_name = 'Completely Serious Comics'
1978
    url = 'http://completelyseriouscomics.com'
1979
    get_first_comic_link = get_a_navi_navifirst
1980
    get_navi_link = get_a_navi_navinext
1981
1982
    @classmethod
1983
    def get_comic_info(cls, soup, link):
1984
        """Get information about a particular comics."""
1985
        title = soup.find('h2', class_='post-title').string
1986
        author = soup.find('span', class_='post-author').contents[1].string
1987
        date_str = soup.find('span', class_='post-date').string
1988
        day = string_to_date(date_str, '%B %d, %Y')
1989
        imgs = soup.find('div', class_='comicpane').find_all('img')
1990
        assert imgs
1991
        alt = imgs[0]['title']
1992
        assert all(i['title'] == i['alt'] == alt for i in imgs)
1993
        return {
1994
            'month': day.month,
1995
            'year': day.year,
1996
            'day': day.day,
1997
            'img': [i['src'] for i in imgs],
1998
            'title': title,
1999
            'alt': alt,
2000
            'author': author,
2001
        }
2002
2003 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2004
class PoorlyDrawnLines(GenericListableComic):
2005
    """Class to retrieve Poorly Drawn Lines comics."""
2006
    # Also on http://pdlcomics.tumblr.com
2007
    name = 'poorlydrawn'
2008
    long_name = 'Poorly Drawn Lines'
2009
    url = 'http://poorlydrawnlines.com'
2010
    get_url_from_archive_element = get_href
2011
2012
    @classmethod
2013
    def get_comic_info(cls, soup, link):
2014
        """Get information about a particular comics."""
2015
        imgs = soup.find('div', class_='post').find_all('img')
2016
        assert len(imgs) <= 1
2017
        return {
2018
            'img': [i['src'] for i in imgs],
2019
            'title': imgs[0].get('title', "") if imgs else "",
2020
        }
2021
2022
    @classmethod
2023
    def get_archive_elements(cls):
2024
        archive_url = urljoin_wrapper(cls.url, 'archive')
2025
        url_re = re.compile('^%s/comic/.' % cls.url)
2026
        return reversed(get_soup_at_url(archive_url).find_all('a', href=url_re))
2027
2028
2029
class LoadingComics(GenericNavigableComic):
2030
    """Class to retrieve Loading Artist comics."""
2031
    name = 'loadingartist'
2032
    long_name = 'Loading Artist'
2033
    url = 'http://www.loadingartist.com/latest'
2034
2035
    @classmethod
2036
    def get_first_comic_link(cls):
2037
        """Get link to first comics."""
2038
        return get_soup_at_url(cls.url).find('a', title="First")
2039
2040
    @classmethod
2041
    def get_navi_link(cls, last_soup, next_):
2042
        return last_soup.find('a', title='Next' if next_ else 'Previous')
2043
2044
    @classmethod
2045
    def get_comic_info(cls, soup, link):
2046
        """Get information about a particular comics."""
2047
        title = soup.find('h1').string
2048
        date_str = soup.find('span', class_='date').string.strip()
2049
        day = string_to_date(date_str, "%B %d, %Y")
2050
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
2051
        return {
2052
            'title': title,
2053
            'img': [i['src'] for i in imgs],
2054
            'month': day.month,
2055
            'year': day.year,
2056
            'day': day.day,
2057
        }
2058
2059
2060
class ChuckleADuck(GenericNavigableComic):
2061
    """Class to retrieve Chuckle-A-Duck comics."""
2062
    name = 'chuckleaduck'
2063
    long_name = 'Chuckle-A-duck'
2064
    url = 'http://chuckleaduck.com'
2065
    get_first_comic_link = get_div_navfirst_a
2066
    get_navi_link = get_link_rel_next
2067
2068
    @classmethod
2069
    def get_comic_info(cls, soup, link):
2070
        """Get information about a particular comics."""
2071
        date_str = soup.find('span', class_='post-date').string
2072
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
2073
        author = soup.find('span', class_='post-author').string
2074
        div = soup.find('div', id='comic')
2075
        imgs = div.find_all('img') if div else []
2076
        title = imgs[0]['title'] if imgs else ""
2077
        assert all(i['title'] == i['alt'] == title for i in imgs)
2078
        return {
2079
            'month': day.month,
2080
            'year': day.year,
2081
            'day': day.day,
2082
            'img': [i['src'] for i in imgs],
2083
            'title': title,
2084
            'author': author,
2085
        }
2086
2087
2088
class DepressedAlien(GenericNavigableComic):
2089
    """Class to retrieve Depressed Alien Comics."""
2090
    name = 'depressedalien'
2091
    long_name = 'Depressed Alien'
2092
    url = 'http://depressedalien.com'
2093
    get_url_from_link = join_cls_url_to_href
2094
2095
    @classmethod
2096
    def get_first_comic_link(cls):
2097
        """Get link to first comics."""
2098
        return get_soup_at_url(cls.url).find('img', attrs={'name': 'beginArrow'}).parent
2099
2100
    @classmethod
2101
    def get_navi_link(cls, last_soup, next_):
2102
        return last_soup.find('img', attrs={'name': 'rightArrow' if next_ else 'leftArrow'}).parent
2103
2104
    @classmethod
2105
    def get_comic_info(cls, soup, link):
2106
        """Get information about a particular comics."""
2107
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2108
        imgs = soup.find_all('meta', property='og:image')
2109
        return {
2110
            'title': title,
2111
            'img': [i['content'] for i in imgs],
2112
        }
2113
2114
2115
class ThingsInSquares(GenericListableComic):
2116
    """Class to retrieve Things In Squares comics."""
2117
    # This can be retrieved in other languages
2118
    # Also on https://tapastic.com/series/Things-in-Squares
2119
    name = 'squares'
2120
    long_name = 'Things in squares'
2121
    url = 'http://www.thingsinsquares.com'
2122
2123
    @classmethod
2124
    def get_comic_info(cls, soup, tr):
2125
        """Get information about a particular comics."""
2126
        _, td2, td3 = tr.find_all('td')
2127
        a = td2.find('a')
2128
        date_str = td3.string
2129
        day = string_to_date(date_str, "%m.%d.%y")
2130
        title = a.string
2131
        title2 = soup.find('meta', property='og:title')['content']
2132
        desc = soup.find('meta', property='og:description')
2133
        description = desc['content'] if desc else ''
2134
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2135
        imgs = soup.find('div', class_='entry-content').find_all('img')
2136
        return {
2137
            'day': day.day,
2138
            'month': day.month,
2139
            'year': day.year,
2140
            'title': title,
2141
            'title2': title2,
2142
            'description': description,
2143
            'tags': tags,
2144
            'img': [i['src'] for i in imgs],
2145
            'alt': ' '.join(i['alt'] for i in imgs),
2146
        }
2147
2148
    @classmethod
2149
    def get_url_from_archive_element(cls, tr):
2150
        _, td2, td3 = tr.find_all('td')
2151
        return td2.find('a')['href']
2152
2153
    @classmethod
2154
    def get_archive_elements(cls):
2155
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
2156
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2157
2158
2159
class HappleTea(GenericNavigableComic):
2160
    """Class to retrieve Happle Tea Comics."""
2161
    name = 'happletea'
2162
    long_name = 'Happle Tea'
2163
    url = 'http://www.happletea.com'
2164
    get_first_comic_link = get_a_navi_navifirst
2165
    get_navi_link = get_link_rel_next
2166
2167
    @classmethod
2168
    def get_comic_info(cls, soup, link):
2169
        """Get information about a particular comics."""
2170
        imgs = soup.find('div', id='comic').find_all('img')
2171
        post = soup.find('div', class_='post-content')
2172
        title = post.find('h2', class_='post-title').string
2173
        author = post.find('a', rel='author').string
2174
        date_str = post.find('span', class_='post-date').string
2175
        day = string_to_date(date_str, "%B %d, %Y")
2176
        assert all(i['alt'] == i['title'] for i in imgs)
2177
        return {
2178
            'title': title,
2179
            'img': [i['src'] for i in imgs],
2180
            'alt': ''.join(i['alt'] for i in imgs),
2181
            'month': day.month,
2182
            'year': day.year,
2183
            'day': day.day,
2184
            'author': author,
2185
        }
2186
2187
2188
class FatAwesomeComics(GenericNavigableComic):
2189
    """Class to retrieve Fat Awesome Comics."""
2190
    # Also on http://fatawesomecomedy.tumblr.com
2191
    name = 'fatawesome'
2192
    long_name = 'Fat Awesome'
2193
    url = 'http://fatawesome.com/comics'
2194
    get_navi_link = get_a_rel_next
2195
2196
    @classmethod
2197
    def get_first_comic_link(cls):
2198
        """Get link to first comics."""
2199
        return {'href': 'http://fatawesome.com/shortbus/'}
2200
2201
    @classmethod
2202
    def get_comic_info(cls, soup, link):
2203
        """Get information about a particular comics."""
2204
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2205
        description = soup.find('meta', attrs={'name': 'description'})['content']
2206
        tags_prop = soup.find('meta', property='article:tag')
2207
        tags = tags_prop['content'] if tags_prop else ""
2208
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2209
        day = string_to_date(date_str, "%Y-%m-%d")
2210
        imgs = soup.find_all('img', attrs={'data-recalc-dims': "1"})
2211
        assert len(imgs) == 1
2212
        return {
2213
            'title': title,
2214
            'description': description,
2215
            'tags': tags,
2216
            'alt': "".join(i['alt'] for i in imgs),
2217
            'img': [i['src'].rsplit('?', 1)[0] for i in imgs],
2218
            'month': day.month,
2219
            'year': day.year,
2220
            'day': day.day,
2221
        }
2222
2223
2224
class AnythingComic(GenericListableComic):
2225
    """Class to retrieve Anything Comics."""
2226
    # Also on http://tapastic.com/series/anything
2227
    name = 'anythingcomic'
2228
    long_name = 'Anything Comic'
2229
    url = 'http://www.anythingcomic.com'
2230
2231
    @classmethod
2232
    def get_archive_elements(cls):
2233
        archive_url = urljoin_wrapper(cls.url, 'archive/')
2234
        # The first 2 <tr>'s do not correspond to comics
2235
        return get_soup_at_url(archive_url).find('table', id='chapter_table').find_all('tr')[2:]
2236
2237
    @classmethod
2238
    def get_url_from_archive_element(cls, tr):
2239
        """Get url corresponding to an archive element."""
2240
        td_num, td_comic, td_date, _ = tr.find_all('td')
2241
        link = td_comic.find('a')
2242
        return urljoin_wrapper(cls.url, link['href'])
2243
2244
    @classmethod
2245
    def get_comic_info(cls, soup, tr):
2246
        """Get information about a particular comics."""
2247
        td_num, td_comic, td_date, _ = tr.find_all('td')
2248
        num = int(td_num.string)
2249
        link = td_comic.find('a')
2250
        title = link.string
2251
        imgs = soup.find_all('img', id='comic_image')
2252
        day = string_to_date(td_date.string, '%d %b %Y %I:%M %p')
2253
        assert len(imgs) == 1
2254
        assert all(i.get('alt') == i.get('title') for i in imgs)
2255
        return {
2256
            'num': num,
2257
            'title': title,
2258
            'alt': imgs[0].get('alt', ''),
2259
            'img': [i['src'] for i in imgs],
2260
            'month': day.month,
2261
            'year': day.year,
2262
            'day': day.day,
2263
        }
2264
2265
2266
class LonnieMillsap(GenericNavigableComic):
2267
    """Class to retrieve Lonnie Millsap's comics."""
2268
    name = 'millsap'
2269
    long_name = 'Lonnie Millsap'
2270
    url = 'http://www.lonniemillsap.com'
2271
    get_navi_link = get_link_rel_next
2272
2273
    @classmethod
2274
    def get_first_comic_link(cls):
2275
        """Get link to first comics."""
2276
        return {'href': 'http://www.lonniemillsap.com/?p=42'}
2277
2278
    @classmethod
2279
    def get_comic_info(cls, soup, link):
2280
        """Get information about a particular comics."""
2281
        title = soup.find('h2', class_='post-title').string
2282
        post = soup.find('div', class_='post-content')
2283
        author = post.find("span", class_="post-author").find("a").string
2284
        date_str = post.find("span", class_="post-date").string
2285
        day = string_to_date(date_str, "%B %d, %Y")
2286
        imgs = post.find("div", class_="entry").find_all("img")
2287
        return {
2288
            'title': title,
2289
            'author': author,
2290
            'img': [i['src'] for i in imgs],
2291
            'month': day.month,
2292
            'year': day.year,
2293
            'day': day.day,
2294
        }
2295
2296
2297
class LinsEditions(GenericNavigableComic):
2298
    """Class to retrieve L.I.N.S. Editions comics."""
2299
    # Also on http://linscomics.tumblr.com
2300
    name = 'lins'
2301
    long_name = 'L.I.N.S. Editions'
2302
    url = 'https://linsedition.com'
2303
    get_navi_link = get_link_rel_next
2304
2305
    @classmethod
2306
    def get_first_comic_link(cls):
2307
        """Get link to first comics."""
2308
        return {'href': 'https://linsedition.com/2011/09/07/l-i-n-s/'}
2309
2310
    @classmethod
2311
    def get_comic_info(cls, soup, link):
2312
        """Get information about a particular comics."""
2313
        title = soup.find('meta', property='og:title')['content']
2314
        imgs = soup.find_all('meta', property='og:image')
2315
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2316
        day = string_to_date(date_str, "%Y-%m-%d")
2317
        return {
2318
            'title': title,
2319
            'img': [i['content'] for i in imgs],
2320
            'month': day.month,
2321
            'year': day.year,
2322
            'day': day.day,
2323
        }
2324
2325
2326
class ThorsThundershack(GenericNavigableComic):
2327
    """Class to retrieve Thor's Thundershack comics."""
2328
    # Also on http://tapastic.com/series/Thors-Thundershac
2329
    name = 'thor'
2330
    long_name = 'Thor\'s Thundershack'
2331
    url = 'http://www.thorsthundershack.com'
2332
    get_url_from_link = join_cls_url_to_href
2333
2334
    @classmethod
2335
    def get_first_comic_link(cls):
2336
        """Get link to first comics."""
2337
        return get_soup_at_url(cls.url).find('a', class_='first navlink')
2338
2339
    @classmethod
2340
    def get_navi_link(cls, last_soup, next_):
2341
        for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
2342
            if link['href'] != '/comic':
2343
                return link
2344
        return None
2345
2346
    @classmethod
2347
    def get_comic_info(cls, soup, link):
2348
        """Get information about a particular comics."""
2349
        title = soup.find('meta', attrs={'name': 'description'})["content"]
2350
        description = soup.find('div', itemprop='articleBody').text
2351
        author = soup.find('span', itemprop='author copyrightHolder').string
2352
        imgs = soup.find_all('img', itemprop='image')
2353
        assert all(i['title'] == i['alt'] for i in imgs)
2354
        alt = imgs[0]['alt'] if imgs else ""
2355
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
2356
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2357
        return {
2358
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2359
            'month': day.month,
2360
            'year': day.year,
2361
            'day': day.day,
2362
            'author': author,
2363
            'title': title,
2364
            'alt': alt,
2365
            'description': description,
2366
        }
2367
2368
2369
class GerbilWithAJetpack(GenericNavigableComic):
2370
    """Class to retrieve GerbilWithAJetpack comics."""
2371
    name = 'gerbil'
2372
    long_name = 'Gerbil With A Jetpack'
2373
    url = 'http://gerbilwithajetpack.com'
2374
    get_first_comic_link = get_a_navi_navifirst
2375
    get_navi_link = get_a_rel_next
2376
2377
    @classmethod
2378
    def get_comic_info(cls, soup, link):
2379
        """Get information about a particular comics."""
2380
        title = soup.find('h2', class_='post-title').string
2381
        author = soup.find("span", class_="post-author").find("a").string
2382
        date_str = soup.find("span", class_="post-date").string
2383
        day = string_to_date(date_str, "%B %d, %Y")
2384
        imgs = soup.find("div", id="comic").find_all("img")
2385
        alt = imgs[0]['alt']
2386
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2387
        return {
2388
            'img': [i['src'] for i in imgs],
2389
            'title': title,
2390
            'alt': alt,
2391
            'author': author,
2392
            'day': day.day,
2393
            'month': day.month,
2394
            'year': day.year
2395
        }
2396
2397
2398
class EveryDayBlues(GenericNavigableComic):
2399
    """Class to retrieve EveryDayBlues Comics."""
2400
    name = "blues"
2401
    long_name = "Every Day Blues"
2402
    url = "http://everydayblues.net"
2403
    get_first_comic_link = get_a_navi_navifirst
2404
    get_navi_link = get_link_rel_next
2405
2406
    @classmethod
2407
    def get_comic_info(cls, soup, link):
2408
        """Get information about a particular comics."""
2409
        title = soup.find("h2", class_="post-title").string
2410
        author = soup.find("span", class_="post-author").find("a").string
2411
        date_str = soup.find("span", class_="post-date").string
2412
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2413
        imgs = soup.find("div", id="comic").find_all("img")
2414
        assert all(i['alt'] == i['title'] == title for i in imgs)
2415
        assert len(imgs) <= 1
2416
        return {
2417
            'img': [i['src'] for i in imgs],
2418
            'title': title,
2419
            'author': author,
2420
            'day': day.day,
2421
            'month': day.month,
2422
            'year': day.year
2423
        }
2424
2425
2426
class BiterComics(GenericNavigableComic):
2427
    """Class to retrieve Biter Comics."""
2428
    name = "biter"
2429
    long_name = "Biter Comics"
2430
    url = "http://www.bitercomics.com"
2431
    get_first_comic_link = get_a_navi_navifirst
2432
    get_navi_link = get_link_rel_next
2433
2434
    @classmethod
2435
    def get_comic_info(cls, soup, link):
2436
        """Get information about a particular comics."""
2437
        title = soup.find("h1", class_="entry-title").string
2438
        author = soup.find("span", class_="author vcard").find("a").string
2439
        date_str = soup.find("span", class_="entry-date").string
2440
        day = string_to_date(date_str, "%B %d, %Y")
2441
        imgs = soup.find("div", id="comic").find_all("img")
2442
        assert all(i['alt'] == i['title'] for i in imgs)
2443
        assert len(imgs) == 1
2444
        alt = imgs[0]['alt']
2445
        return {
2446
            'img': [i['src'] for i in imgs],
2447
            'title': title,
2448
            'alt': alt,
2449
            'author': author,
2450
            'day': day.day,
2451
            'month': day.month,
2452
            'year': day.year
2453
        }
2454
2455
2456
class TheAwkwardYeti(GenericNavigableComic):
2457
    """Class to retrieve The Awkward Yeti comics."""
2458
    # Also on http://www.gocomics.com/the-awkward-yeti
2459
    # Also on http://larstheyeti.tumblr.com
2460
    # Also on https://tapastic.com/series/TheAwkwardYeti
2461
    name = 'yeti'
2462
    long_name = 'The Awkward Yeti'
2463
    url = 'http://theawkwardyeti.com'
2464
    get_first_comic_link = get_a_navi_navifirst
2465
    get_navi_link = get_link_rel_next
2466
2467
    @classmethod
2468
    def get_comic_info(cls, soup, link):
2469
        """Get information about a particular comics."""
2470
        title = soup.find('h2', class_='post-title').string
2471
        date_str = soup.find("span", class_="post-date").string
2472
        day = string_to_date(date_str, "%B %d, %Y")
2473
        imgs = soup.find("div", id="comic").find_all("img")
2474
        assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
2475
        return {
2476
            'img': [i['src'] for i in imgs],
2477
            'title': title,
2478
            'day': day.day,
2479
            'month': day.month,
2480
            'year': day.year
2481
        }
2482
2483
2484
class PleasantThoughts(GenericNavigableComic):
2485
    """Class to retrieve Pleasant Thoughts comics."""
2486
    name = 'pleasant'
2487
    long_name = 'Pleasant Thoughts'
2488
    url = 'http://pleasant-thoughts.com'
2489
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2490
    get_navi_link = get_link_rel_next
2491
2492
    @classmethod
2493
    def get_comic_info(cls, soup, link):
2494
        """Get information about a particular comics."""
2495
        post = soup.find('div', class_='post-content')
2496
        title = post.find('h2', class_='post-title').string
2497
        imgs = post.find("div", class_="entry").find_all("img")
2498
        return {
2499
            'title': title,
2500
            'img': [i['src'] for i in imgs],
2501
        }
2502
2503
2504
class MisterAndMe(GenericNavigableComic):
2505
    """Class to retrieve Mister & Me Comics."""
2506
    # Also on http://www.gocomics.com/mister-and-me
2507
    # Also on https://tapastic.com/series/Mister-and-Me
2508
    name = 'mister'
2509
    long_name = 'Mister & Me'
2510
    url = 'http://www.mister-and-me.com'
2511
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2512
    get_navi_link = get_link_rel_next
2513
2514
    @classmethod
2515
    def get_comic_info(cls, soup, link):
2516
        """Get information about a particular comics."""
2517
        title = soup.find('h2', class_='post-title').string
2518
        author = soup.find("span", class_="post-author").find("a").string
2519
        date_str = soup.find("span", class_="post-date").string
2520
        day = string_to_date(date_str, "%B %d, %Y")
2521
        imgs = soup.find("div", id="comic").find_all("img")
2522
        assert all(i['alt'] == i['title'] for i in imgs)
2523
        assert len(imgs) <= 1
2524
        alt = imgs[0]['alt'] if imgs else ""
2525
        return {
2526
            'img': [i['src'] for i in imgs],
2527
            'title': title,
2528
            'alt': alt,
2529
            'author': author,
2530
            'day': day.day,
2531
            'month': day.month,
2532
            'year': day.year
2533
        }
2534
2535
2536
class LastPlaceComics(GenericNavigableComic):
2537
    """Class to retrieve Last Place Comics."""
2538
    name = 'lastplace'
2539
    long_name = 'LastPlaceComics'
2540
    url = "http://lastplacecomics.com"
2541
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2542
    get_navi_link = get_link_rel_next
2543
2544
    @classmethod
2545
    def get_comic_info(cls, soup, link):
2546
        """Get information about a particular comics."""
2547
        title = soup.find('h2', class_='post-title').string
2548
        author = soup.find("span", class_="post-author").find("a").string
2549
        date_str = soup.find("span", class_="post-date").string
2550
        day = string_to_date(date_str, "%B %d, %Y")
2551
        imgs = soup.find("div", id="comic").find_all("img")
2552
        assert all(i['alt'] == i['title'] for i in imgs)
2553
        assert len(imgs) <= 1
2554
        alt = imgs[0]['alt'] if imgs else ""
2555
        return {
2556
            'img': [i['src'] for i in imgs],
2557
            'title': title,
2558
            'alt': alt,
2559
            'author': author,
2560
            'day': day.day,
2561
            'month': day.month,
2562
            'year': day.year
2563
        }
2564
2565
2566
class TalesOfAbsurdity(GenericNavigableComic):
2567
    """Class to retrieve Tales Of Absurdity comics."""
2568
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
2569
    # Also on http://talesofabsurdity.tumblr.com
2570
    name = 'absurdity'
2571
    long_name = 'Tales of Absurdity'
2572
    url = 'http://talesofabsurdity.com'
2573
    get_first_comic_link = get_a_navi_navifirst
2574
    get_navi_link = get_a_navi_comicnavnext_navinext
2575
2576
    @classmethod
2577
    def get_comic_info(cls, soup, link):
2578
        """Get information about a particular comics."""
2579
        title = soup.find('h2', class_='post-title').string
2580
        author = soup.find("span", class_="post-author").find("a").string
2581
        date_str = soup.find("span", class_="post-date").string
2582
        day = string_to_date(date_str, "%B %d, %Y")
2583
        imgs = soup.find("div", id="comic").find_all("img")
2584
        assert all(i['alt'] == i['title'] for i in imgs)
2585
        alt = imgs[0]['alt'] if imgs else ""
2586
        return {
2587
            'img': [i['src'] for i in imgs],
2588
            'title': title,
2589
            'alt': alt,
2590
            'author': author,
2591
            'day': day.day,
2592
            'month': day.month,
2593
            'year': day.year
2594
        }
2595
2596
2597
class EndlessOrigami(GenericNavigableComic):
2598
    """Class to retrieve Endless Origami Comics."""
2599
    name = "origami"
2600
    long_name = "Endless Origami"
2601
    url = "http://endlessorigami.com"
2602
    get_first_comic_link = get_a_navi_navifirst
2603
    get_navi_link = get_link_rel_next
2604
2605
    @classmethod
2606
    def get_comic_info(cls, soup, link):
2607
        """Get information about a particular comics."""
2608
        title = soup.find('h2', class_='post-title').string
2609
        author = soup.find("span", class_="post-author").find("a").string
2610
        date_str = soup.find("span", class_="post-date").string
2611
        day = string_to_date(date_str, "%B %d, %Y")
2612
        imgs = soup.find("div", id="comic").find_all("img")
2613
        assert all(i['alt'] == i['title'] for i in imgs)
2614
        alt = imgs[0]['alt'] if imgs else ""
2615
        return {
2616
            'img': [i['src'] for i in imgs],
2617
            'title': title,
2618
            'alt': alt,
2619
            'author': author,
2620
            'day': day.day,
2621
            'month': day.month,
2622
            'year': day.year
2623
        }
2624
2625
2626
class PlanC(GenericNavigableComic):
2627
    """Class to retrieve Plan C comics."""
2628
    name = 'planc'
2629
    long_name = 'Plan C'
2630
    url = 'http://www.plancomic.com'
2631
    get_first_comic_link = get_a_navi_navifirst
2632
    get_navi_link = get_a_navi_comicnavnext_navinext
2633
2634
    @classmethod
2635
    def get_comic_info(cls, soup, link):
2636
        """Get information about a particular comics."""
2637
        title = soup.find('h2', class_='post-title').string
2638
        date_str = soup.find("span", class_="post-date").string
2639
        day = string_to_date(date_str, "%B %d, %Y")
2640
        imgs = soup.find('div', id='comic').find_all('img')
2641
        return {
2642
            'title': title,
2643
            'img': [i['src'] for i in imgs],
2644
            'month': day.month,
2645
            'year': day.year,
2646
            'day': day.day,
2647
        }
2648
2649
2650
class BuniComic(GenericNavigableComic):
2651
    """Class to retrieve Buni Comics."""
2652
    name = 'buni'
2653
    long_name = 'BuniComics'
2654
    url = 'http://www.bunicomic.com'
2655
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2656
    get_navi_link = get_link_rel_next
2657
2658
    @classmethod
2659
    def get_comic_info(cls, soup, link):
2660
        """Get information about a particular comics."""
2661
        imgs = soup.find('div', id='comic').find_all('img')
2662
        assert all(i['alt'] == i['title'] for i in imgs)
2663
        assert len(imgs) == 1
2664
        return {
2665
            'img': [i['src'] for i in imgs],
2666
            'title': imgs[0]['title'],
2667
        }
2668
2669
2670
class GenericCommitStrip(GenericNavigableComic):
2671
    """Generic class to retrieve Commit Strips in different languages."""
2672
    get_navi_link = get_a_rel_next
2673
2674 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2675
    def get_comic_info(cls, soup, link):
2676
        """Get information about a particular comics."""
2677
        desc = soup.find('meta', property='og:description')['content']
2678
        title = soup.find('meta', property='og:title')['content']
2679
        imgs = soup.find('div', class_='entry-content').find_all('img')
2680
        title2 = ' '.join(i.get('title', '') for i in imgs)
2681
        return {
2682
            'title': title,
2683
            'title2': title2,
2684
            'description': desc,
2685
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2686
        }
2687
2688
2689
class CommitStripFr(GenericCommitStrip):
2690
    """Class to retrieve Commit Strips in French."""
2691
    name = 'commit_fr'
2692
    long_name = 'Commit Strip (Fr)'
2693
    url = 'http://www.commitstrip.com/fr'
2694
2695
    @classmethod
2696
    def get_first_comic_link(cls):
2697
        """Get link to first comics."""
2698
        return {'href': 'http://www.commitstrip.com/fr/2012/02/22/interview/'}
2699
2700
2701
class CommitStripEn(GenericCommitStrip):
2702
    """Class to retrieve Commit Strips in English."""
2703
    name = 'commit_en'
2704
    long_name = 'Commit Strip (En)'
2705
    url = 'http://www.commitstrip.com/en'
2706
2707
    @classmethod
2708
    def get_first_comic_link(cls):
2709
        """Get link to first comics."""
2710
        return {'href': 'http://www.commitstrip.com/en/2012/02/22/interview/'}
2711
2712
2713
class GenericBoumerie(GenericNavigableComic):
2714
    """Generic class to retrieve Boumeries comics in different languages."""
2715
    get_first_comic_link = get_a_navi_navifirst
2716
    get_navi_link = get_link_rel_next
2717
    date_format = NotImplemented
2718
    lang = NotImplemented
2719
2720
    @classmethod
2721
    def get_comic_info(cls, soup, link):
2722
        """Get information about a particular comics."""
2723
        title = soup.find('h2', class_='post-title').string
2724
        short_url = soup.find('link', rel='shortlink')['href']
2725
        author = soup.find("span", class_="post-author").find("a").string
2726
        date_str = soup.find('span', class_='post-date').string
2727
        day = string_to_date(date_str, cls.date_format, cls.lang)
2728
        imgs = soup.find('div', id='comic').find_all('img')
2729
        assert all(i['alt'] == i['title'] for i in imgs)
2730
        return {
2731
            'short_url': short_url,
2732
            'img': [i['src'] for i in imgs],
2733
            'title': title,
2734
            'author': author,
2735
            'month': day.month,
2736
            'year': day.year,
2737
            'day': day.day,
2738
        }
2739
2740
2741
class BoumerieEn(GenericBoumerie):
2742
    """Class to retrieve Boumeries comics in English."""
2743
    name = 'boumeries_en'
2744
    long_name = 'Boumeries (En)'
2745
    url = 'http://comics.boumerie.com'
2746
    date_format = "%B %d, %Y"
2747
    lang = 'en_GB.UTF-8'
2748
2749
2750
class BoumerieFr(GenericBoumerie):
2751
    """Class to retrieve Boumeries comics in French."""
2752
    name = 'boumeries_fr'
2753
    long_name = 'Boumeries (Fr)'
2754
    url = 'http://bd.boumerie.com'
2755
    date_format = "%A, %d %B %Y"
2756
    lang = "fr_FR.utf8"
2757
2758
2759
class UnearthedComics(GenericNavigableComic):
2760
    """Class to retrieve Unearthed comics."""
2761
    # Also on http://tapastic.com/series/UnearthedComics
2762
    # Also on http://unearthedcomics.tumblr.com
2763
    name = 'unearthed'
2764
    long_name = 'Unearthed Comics'
2765
    url = 'http://unearthedcomics.com'
2766
    get_navi_link = get_link_rel_next
2767
2768
    @classmethod
2769
    def get_first_comic_link(cls):
2770
        """Get link to first comics."""
2771
        return {'href': 'http://unearthedcomics.com/comics/world-with-turn-signals/'}
2772
2773
    @classmethod
2774
    def get_comic_info(cls, soup, link):
2775
        """Get information about a particular comics."""
2776
        short_url = soup.find('link', rel='shortlink')['href']
2777
        title_elt = soup.find('h1') or soup.find('h2')
2778
        title = title_elt.string if title_elt else ""
2779
        desc = soup.find('meta', property='og:description')
2780
        date_str = soup.find('time', class_='published updated hidden')['datetime']
2781
        day = string_to_date(date_str, "%Y-%m-%d")
2782
        post = soup.find('div', class_="entry content entry-content type-portfolio")
2783
        imgs = post.find_all('img')
2784
        return {
2785
            'title': title,
2786
            'description': desc,
2787
            'url2': short_url,
2788
            'img': [i['src'] for i in imgs],
2789
            'month': day.month,
2790
            'year': day.year,
2791
            'day': day.day,
2792
        }
2793
2794
2795
class Optipess(GenericNavigableComic):
2796
    """Class to retrieve Optipess comics."""
2797
    name = 'optipess'
2798
    long_name = 'Optipess'
2799
    url = 'http://www.optipess.com'
2800
    get_first_comic_link = get_a_navi_navifirst
2801
    get_navi_link = get_link_rel_next
2802
2803
    @classmethod
2804
    def get_comic_info(cls, soup, link):
2805
        """Get information about a particular comics."""
2806
        title = soup.find('h2', class_='post-title').string
2807
        author = soup.find("span", class_="post-author").find("a").string
2808
        comic = soup.find('div', id='comic')
2809
        imgs = comic.find_all('img') if comic else []
2810
        alt = imgs[0]['title'] if imgs else ""
2811
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2812
        date_str = soup.find('span', class_='post-date').string
2813
        day = string_to_date(date_str, "%B %d, %Y")
2814
        return {
2815
            'title': title,
2816
            'alt': alt,
2817
            'author': author,
2818
            'img': [i['src'] for i in imgs],
2819
            'month': day.month,
2820
            'year': day.year,
2821
            'day': day.day,
2822
        }
2823
2824
2825
class PainTrainComic(GenericNavigableComic):
2826
    """Class to retrieve Pain Train Comics."""
2827
    name = 'paintrain'
2828
    long_name = 'Pain Train Comics'
2829
    url = 'http://paintraincomic.com'
2830
    get_first_comic_link = get_a_navi_navifirst
2831
    get_navi_link = get_link_rel_next
2832
2833
    @classmethod
2834
    def get_comic_info(cls, soup, link):
2835
        """Get information about a particular comics."""
2836
        title = soup.find('h2', class_='post-title').string
2837
        short_url = soup.find('link', rel='shortlink')['href']
2838
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2839
        num = int(short_url_re.match(short_url).groups()[0])
2840
        imgs = soup.find('div', id='comic').find_all('img')
2841
        alt = imgs[0]['title']
2842
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2843
        date_str = soup.find('span', class_='post-date').string
2844
        day = string_to_date(date_str, "%d/%m/%Y")
2845
        return {
2846
            'short_url': short_url,
2847
            'num': num,
2848
            'img': [i['src'] for i in imgs],
2849
            'month': day.month,
2850
            'year': day.year,
2851
            'day': day.day,
2852
            'alt': alt,
2853
            'title': title,
2854
        }
2855
2856
2857
class MoonBeard(GenericNavigableComic):
2858
    """Class to retrieve MoonBeard comics."""
2859
    # Also on http://blog.squiresjam.es/moonbeard
2860
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
2861
    name = 'moonbeard'
2862
    long_name = 'Moon Beard'
2863
    url = 'http://moonbeard.com'
2864
    get_first_comic_link = get_a_navi_navifirst
2865
    get_navi_link = get_a_navi_navinext
2866
2867
    @classmethod
2868
    def get_comic_info(cls, soup, link):
2869
        """Get information about a particular comics."""
2870
        title = soup.find('h2', class_='post-title').string
2871
        short_url = soup.find('link', rel='shortlink')['href']
2872
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2873
        num = int(short_url_re.match(short_url).groups()[0])
2874
        imgs = soup.find('div', id='comic').find_all('img')
2875
        alt = imgs[0]['title']
2876
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2877
        date_str = soup.find('span', class_='post-date').string
2878
        day = string_to_date(date_str, "%B %d, %Y")
2879
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2880
        author = soup.find('span', class_='post-author').string
2881
        return {
2882
            'short_url': short_url,
2883
            'num': num,
2884
            'img': [i['src'] for i in imgs],
2885
            'month': day.month,
2886
            'year': day.year,
2887
            'day': day.day,
2888
            'title': title,
2889
            'tags': tags,
2890
            'alt': alt,
2891
            'author': author,
2892
        }
2893
2894
2895
class AHamADay(GenericNavigableComic):
2896
    """Class to retrieve class A Ham A Day comics."""
2897
    name = 'ham'
2898
    long_name = 'A Ham A Day'
2899
    url = 'http://www.ahammaday.com'
2900
    get_url_from_link = join_cls_url_to_href
2901
2902 View Code Duplication
    @classmethod
2903
    def get_first_comic_link(cls):
2904
        """Get link to first comics."""
2905
        return {'href': 'http://www.ahammaday.com/today/3/6/french'}
2906
2907
    @classmethod
2908
    def get_navi_link(cls, last_soup, next_):
2909
        # prev is next / next is prev
2910
        return last_soup.find('li', class_='previous' if next_ else 'next').find('a')
2911
2912
    @classmethod
2913
    def get_comic_info(cls, soup, link):
2914
        """Get information about a particular comics."""
2915
        date_str = soup.find('time', class_='published')['datetime']
2916
        day = string_to_date(date_str, "%Y-%m-%d")
2917
        author = soup.find('span', class_='blog-author').find('a').string
2918
        title = soup.find('meta', property='og:title')['content']
2919
        imgs = soup.find_all('meta', itemprop='image')
2920
        return {
2921
            'img': [i['content'] for i in imgs],
2922
            'title': title,
2923
            'author': author,
2924
            'day': day.day,
2925
            'month': day.month,
2926
            'year': day.year,
2927
        }
2928
2929
2930
class LittleLifeLines(GenericNavigableComic):
2931
    """Class to retrieve Little Life Lines comics."""
2932
    name = 'life'
2933
    long_name = 'Little Life Lines'
2934
    url = 'http://www.littlelifelines.com'
2935
    get_url_from_link = join_cls_url_to_href
2936
2937
    @classmethod
2938
    def get_first_comic_link(cls):
2939
        """Get link to first comics."""
2940
        return {'href': 'http://www.littlelifelines.com/comics/well-done'}
2941
2942
    @classmethod
2943
    def get_navi_link(cls, last_soup, next_):
2944
        # prev is next / next is prev
2945
        li = last_soup.find('li', class_='prev' if next_ else 'next')
2946
        return li.find('a') if li else None
2947
2948
    @classmethod
2949
    def get_comic_info(cls, soup, link):
2950
        """Get information about a particular comics."""
2951
        title = soup.find('meta', property='og:title')['content']
2952
        desc = soup.find('meta', property='og:description')['content']
2953
        date_str = soup.find('time', class_='published')['datetime']
2954
        day = string_to_date(date_str, "%Y-%m-%d")
2955
        author = soup.find('a', rel='author').string
2956
        div_content = soup.find('div', class_="body entry-content")
2957
        imgs = div_content.find_all('img')
2958
        imgs = [i for i in imgs if i.get('src') is not None]
2959
        alt = imgs[0]['alt']
2960
        return {
2961
            'title': title,
2962
            'alt': alt,
2963
            'description': desc,
2964
            'author': author,
2965
            'day': day.day,
2966
            'month': day.month,
2967
            'year': day.year,
2968
            'img': [i['src'] for i in imgs],
2969
        }
2970
2971
2972
class GenericWordPressInkblot(GenericNavigableComic):
2973
    """Generic class to retrieve comics using WordPress with Inkblot."""
2974
    get_navi_link = get_link_rel_next
2975
2976
    @classmethod
2977
    def get_first_comic_link(cls):
2978
        """Get link to first comics."""
2979
        return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')
2980
2981 View Code Duplication
    @classmethod
2982
    def get_comic_info(cls, soup, link):
2983
        """Get information about a particular comics."""
2984
        title = soup.find('meta', property='og:title')['content']
2985
        imgs = soup.find('div', class_='webcomic-image').find_all('img')
2986
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2987
        day = string_to_date(date_str, "%Y-%m-%d")
2988
        return {
2989
            'title': title,
2990
            'day': day.day,
2991
            'month': day.month,
2992
            'year': day.year,
2993
            'img': [i['src'] for i in imgs],
2994
        }
2995
2996
2997
class EverythingsStupid(GenericWordPressInkblot):
2998
    """Class to retrieve Everything's stupid Comics."""
2999
    # Also on http://tapastic.com/series/EverythingsStupid
3000
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
3001
    # Also on http://everythingsstupidcomics.tumblr.com
3002
    name = 'stupid'
3003
    long_name = "Everything's Stupid"
3004
    url = 'http://everythingsstupid.net'
3005
3006
3007
class TheIsmComics(GenericWordPressInkblot):
3008
    """Class to retrieve The Ism Comics."""
3009
    # Also on https://tapastic.com/series/TheIsm (?)
3010
    name = 'theism'
3011
    long_name = "The Ism"
3012
    url = 'http://www.theism-comics.com'
3013
3014
3015
class WoodenPlankStudios(GenericWordPressInkblot):
3016
    """Class to retrieve Wooden Plank Studios comics."""
3017
    name = 'woodenplank'
3018
    long_name = 'Wooden Plank Studios'
3019
    url = 'http://woodenplankstudios.com'
3020
3021
3022
class ElectricBunnyComic(GenericNavigableComic):
3023
    """Class to retrieve Electric Bunny Comics."""
3024
    # Also on http://electricbunnycomics.tumblr.com
3025
    name = 'bunny'
3026
    long_name = 'Electric Bunny Comic'
3027
    url = 'http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell'
3028
    get_url_from_link = join_cls_url_to_href
3029
3030
    @classmethod
3031
    def get_first_comic_link(cls):
3032
        """Get link to first comics."""
3033
        return get_soup_at_url(cls.url).find('img', alt='First').parent
3034
3035
    @classmethod
3036
    def get_navi_link(cls, last_soup, next_):
3037
        img = last_soup.find('img', alt='Next' if next_ else 'Back')
3038
        return img.parent if img else None
3039
3040
    @classmethod
3041
    def get_comic_info(cls, soup, link):
3042
        """Get information about a particular comics."""
3043
        title = soup.find('meta', property='og:title')['content']
3044
        imgs = soup.find_all('meta', property='og:image')
3045
        return {
3046
            'title': title,
3047
            'img': [i['content'] for i in imgs],
3048
        }
3049
3050
3051
class SheldonComics(GenericNavigableComic):
3052
    """Class to retrieve Sheldon comics."""
3053
    # Also on http://www.gocomics.com/sheldon
3054
    name = 'sheldon'
3055
    long_name = 'Sheldon Comics'
3056
    url = 'http://www.sheldoncomics.com'
3057
3058
    @classmethod
3059
    def get_first_comic_link(cls):
3060
        """Get link to first comics."""
3061
        return get_soup_at_url(cls.url).find("a", id="nav-first")
3062
3063
    @classmethod
3064
    def get_navi_link(cls, last_soup, next_):
3065
        for link in last_soup.find_all("a", id="nav-next" if next_ else "nav-prev"):
3066
            if link['href'] != 'http://www.sheldoncomics.com':
3067
                return link
3068
        return None
3069
3070
    @classmethod
3071
    def get_comic_info(cls, soup, link):
3072
        """Get information about a particular comics."""
3073
        imgs = soup.find("div", id="comic-foot").find_all("img")
3074
        assert all(i['alt'] == i['title'] for i in imgs)
3075
        assert len(imgs) == 1
3076
        title = imgs[0]['title']
3077
        return {
3078
            'title': title,
3079
            'img': [i['src'] for i in imgs],
3080
        }
3081
3082
3083
class CubeDrone(GenericNavigableComic):
3084
    """Class to retrieve Cube Drone comics."""
3085
    name = 'cubedrone'
3086
    long_name = 'Cube Drone'
3087
    url = 'http://cube-drone.com/comics'
3088
    get_url_from_link = join_cls_url_to_href
3089
3090
    @classmethod
3091
    def get_first_comic_link(cls):
3092
        """Get link to first comics."""
3093
        return get_soup_at_url(cls.url).find('span', class_='glyphicon glyphicon-backward').parent
3094
3095
    @classmethod
3096
    def get_navi_link(cls, last_soup, next_):
3097
        class_ = 'glyphicon glyphicon-chevron-' + ('right' if next_ else 'left')
3098
        return last_soup.find('span', class_=class_).parent
3099
3100
    @classmethod
3101
    def get_comic_info(cls, soup, link):
3102
        """Get information about a particular comics."""
3103
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
3104
        url2 = soup.find('meta', attrs={'name': 'twitter:url'})['content']
3105
        # date_str = soup.find('h2', class_='comic_title').find('small').string
3106
        # day = string_to_date(date_str, "%B %d, %Y, %I:%M %p")
3107
        imgs = soup.find_all('img', class_='comic img-responsive')
3108
        title2 = imgs[0]['title']
3109
        alt = imgs[0]['alt']
3110
        return {
3111
            'url2': url2,
3112
            'title': title,
3113
            'title2': title2,
3114
            'alt': alt,
3115
            'img': [i['src'] for i in imgs],
3116
        }
3117
3118
3119
class MakeItStoopid(GenericNavigableComic):
3120
    """Class to retrieve Make It Stoopid Comics."""
3121
    name = 'stoopid'
3122
    long_name = 'Make it stoopid'
3123
    url = 'http://makeitstoopid.com/comic.php'
3124
3125
    @classmethod
3126
    def get_nav(cls, soup):
3127
        cnav = soup.find_all(class_='cnav')
3128
        nav1, nav2 = cnav[:5], cnav[5:]
3129
        assert nav1 == nav2
3130
        # begin, prev, archive, next_, end = nav1
3131
        return [None if i.get('href') is None else i for i in nav1]
3132
3133
    @classmethod
3134
    def get_first_comic_link(cls):
3135
        """Get link to first comics."""
3136
        return cls.get_nav(get_soup_at_url(cls.url))[0]
3137
3138
    @classmethod
3139
    def get_navi_link(cls, last_soup, next_):
3140
        return cls.get_nav(last_soup)[3 if next_ else 1]
3141
3142
    @classmethod
3143
    def get_comic_info(cls, soup, link):
3144
        """Get information about a particular comics."""
3145
        title = link['title']
3146
        imgs = soup.find_all('img', id='comicimg')
3147
        return {
3148
            'title': title,
3149
            'img': [i['src'] for i in imgs],
3150
        }
3151
3152
3153
class GeekAndPoke(GenericNavigableComic):
3154
    """Class to retrieve Geek And Poke comics."""
3155
    name = 'geek'
3156
    long_name = 'Geek And Poke'
3157
    url = 'http://geek-and-poke.com'
3158
    get_url_from_link = join_cls_url_to_href
3159
3160
    @classmethod
3161
    def get_first_comic_link(cls):
3162
        """Get link to first comics."""
3163
        return {'href': 'http://geek-and-poke.com/geekandpoke/2006/8/27/a-new-place-for-a-not-so-old-blog.html'}
3164
3165
    @classmethod
3166
    def get_navi_link(cls, last_soup, next_):
3167
        return last_soup.find('a', class_='prev-item' if next_ else 'next-item')
3168
3169
    @classmethod
3170
    def get_comic_info(cls, soup, link):
3171
        """Get information about a particular comics."""
3172
        title = soup.find('meta', property='og:title')['content']
3173
        desc = soup.find('meta', property='og:description')['content']
3174
        date_str = soup.find('time', class_='published')['datetime']
3175
        day = string_to_date(date_str, "%Y-%m-%d")
3176
        author = soup.find('a', rel='author').string
3177
        div_content = (soup.find('div', class_="body entry-content") or
3178
                       soup.find('div', class_="special-content"))
3179
        imgs = div_content.find_all('img')
3180
        imgs = [i for i in imgs if i.get('src') is not None]
3181
        assert all('title' not in i or i['alt'] == i['title'] for i in imgs)
3182
        alt = imgs[0].get('alt', "") if imgs else []
3183
        return {
3184
            'title': title,
3185
            'alt': alt,
3186
            'description': desc,
3187
            'author': author,
3188
            'day': day.day,
3189
            'month': day.month,
3190
            'year': day.year,
3191
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3192
        }
3193
3194
3195
class GenericTumblrV1(GenericComic):
3196
    """Generic class to retrieve comics from Tumblr using the V1 API."""
3197
3198
    @classmethod
3199
    def get_next_comic(cls, last_comic):
3200
        for p in cls.get_posts(last_comic):
3201
            comic = cls.get_comic_info(p)
3202
            if comic is not None:
3203
                yield comic
3204
3205
    @classmethod
3206
    def get_url_from_post(cls, post):
3207
        return post['url']
3208
3209
    @classmethod
3210
    def get_api_url(cls):
3211
        return urljoin_wrapper(cls.url, '/api/read/')
3212
3213
    @classmethod
3214
    def get_comic_info(cls, post):
3215
        """Get information about a particular comics."""
3216
        # print(post)
3217
        type_ = post['type']
3218
        if type_ != 'photo':
3219
            # print("Type is %s" % type_)
3220
            return None
3221
        tumblr_id = int(post['id'])
3222
        api_url = cls.get_api_url() + '?id=%d' % (tumblr_id)
3223
        day = datetime.datetime.fromtimestamp(int(post['unix-timestamp'])).date()
3224
        caption = post.find('photo-caption')
3225
        title = caption.string if caption else ""
3226
        tags = ' '.join(t.string for t in post.find_all('tag'))
3227
        # Photos may appear in 'photo' tags and/or straight in the post
3228
        photo_tags = post.find_all('photo')
3229
        if not photo_tags:
3230
            photo_tags = [post]
3231
        # Images are in multiple resolutions - taking the first one
3232
        imgs = [photo.find('photo-url') for photo in photo_tags]
3233
        return {
3234
            'url': cls.get_url_from_post(post),
3235
            'url2': post['url-with-slug'],
3236
            'day': day.day,
3237
            'month': day.month,
3238
            'year': day.year,
3239
            'title': title,
3240
            'tags': tags,
3241
            'img': [i.string for i in imgs],
3242
            'tumblr-id': tumblr_id,
3243
            'api_url': api_url,  # for debug purposes
3244
        }
3245
3246
    @classmethod
3247
    def get_posts(cls, last_comic, nb_post_per_call=10):
3248
        """Get posts using API. nb_post_per_call is max 50.
3249
3250
        Posts are retrieved from newer to older as per the tumblr v1 api
3251
        but are returned in chronological order."""
3252
        waiting_for_url = last_comic['url'] if last_comic else None
3253
        posts_acc = []
3254
        if last_comic is not None:
3255
            # Sometimes, tumblr posts are deleted. We previous post is deleted, we
3256
            # might end up spending a lot of time looking for something that
3257
            # doesn't exist. Failing early and clearly might be a better option.
3258
            last_api_url = last_comic['api_url']
3259
            try:
3260
                get_soup_at_url(last_api_url)
3261
            except urllib.error.HTTPError:
3262
                try:
3263
                    get_soup_at_url(cls.url)
3264
                except urllib.error.HTTPError:
3265
                    print("Did not find previous post nor main url %s" % cls.url)
3266
                else:
3267
                    print("Did not find previous post %s : it might have been deleted" % last_api_url)
3268
                return reversed(posts_acc)
3269
        api_url = cls.get_api_url()
3270
        posts = get_soup_at_url(api_url).find('posts')
3271
        start, total = int(posts['start']), int(posts['total'])
3272
        assert start == 0
3273
        for starting_num in range(0, total, nb_post_per_call):
3274
            api_url2 = api_url + '?start=%d&num=%d' % (starting_num, nb_post_per_call)
3275
            # print(api_url2)
3276
            posts2 = get_soup_at_url(api_url2).find('posts')
3277
            start2, total2 = int(posts2['start']), int(posts2['total'])
3278
            assert starting_num == start2, "%d != %d" % (starting_num, start2)
3279
            # This may happen and should be handled in the future
3280
            assert total == total2, "%d != %d" % (total, total2)
3281
            for p in posts2.find_all('post'):
3282
                if waiting_for_url and waiting_for_url == cls.get_url_from_post(p):
3283
                    return reversed(posts_acc)
3284
                posts_acc.append(p)
3285
        if waiting_for_url is None:
3286
            return reversed(posts_acc)
3287
        print("Did not find %s : there might be a problem" % waiting_for_url)
3288
        return []
3289
3290
3291
class IrwinCardozo(GenericTumblrV1):
3292
    """Class to retrieve Irwin Cardozo Comics."""
3293
    name = 'irwinc'
3294
    long_name = 'Irwin Cardozo'
3295
    url = 'http://irwincardozocomics.tumblr.com'
3296
3297
3298
class AccordingToDevin(GenericTumblrV1):
3299
    """Class to retrieve According To Devin comics."""
3300
    name = 'devin'
3301
    long_name = 'According To Devin'
3302
    url = 'http://accordingtodevin.tumblr.com'
3303
3304
3305
class ItsTheTieTumblr(GenericTumblrV1):
3306
    """Class to retrieve It's the tie comics."""
3307
    # Also on http://itsthetie.com
3308
    # Also on https://tapastic.com/series/itsthetie
3309
    name = 'tie-tumblr'
3310
    long_name = "It's the tie (from Tumblr)"
3311
    url = "http://itsthetie.tumblr.com"
3312
3313
3314
class OctopunsTumblr(GenericTumblrV1):
3315
    """Class to retrieve Octopuns comics."""
3316
    # Also on http://www.octopuns.net
3317
    name = 'octopuns-tumblr'
3318
    long_name = 'Octopuns (from Tumblr)'
3319
    url = 'http://octopuns.tumblr.com'
3320
3321
3322
class PicturesInBoxesTumblr(GenericTumblrV1):
3323
    """Class to retrieve Pictures In Boxes comics."""
3324
    # Also on http://www.picturesinboxes.com
3325
    name = 'picturesinboxes-tumblr'
3326
    long_name = 'Pictures in Boxes (from Tumblr)'
3327
    url = 'http://picturesinboxescomic.tumblr.com'
3328
3329
3330
class TubeyToonsTumblr(GenericTumblrV1):
3331
    """Class to retrieve TubeyToons comics."""
3332
    # Also on http://tapastic.com/series/Tubey-Toons
3333
    # Also on http://tubeytoons.com
3334
    name = 'tubeytoons-tumblr'
3335
    long_name = 'Tubey Toons (from Tumblr)'
3336
    url = 'http://tubeytoons.tumblr.com'
3337
3338
3339
class UnearthedComicsTumblr(GenericTumblrV1):
3340
    """Class to retrieve Unearthed comics."""
3341
    # Also on http://tapastic.com/series/UnearthedComics
3342
    # Also on http://unearthedcomics.com
3343
    name = 'unearthed-tumblr'
3344
    long_name = 'Unearthed Comics (from Tumblr)'
3345
    url = 'http://unearthedcomics.tumblr.com'
3346
3347
3348
class PieComic(GenericTumblrV1):
3349
    """Class to retrieve Pie Comic comics."""
3350
    name = 'pie'
3351
    long_name = 'Pie Comic'
3352
    url = "http://piecomic.tumblr.com"
3353
3354
3355
class MrEthanDiamond(GenericTumblrV1):
3356
    """Class to retrieve Mr Ethan Diamond comics."""
3357
    name = 'diamond'
3358
    long_name = 'Mr Ethan Diamond'
3359
    url = 'http://mrethandiamond.tumblr.com'
3360
3361
3362
class Flocci(GenericTumblrV1):
3363
    """Class to retrieve floccinaucinihilipilification comics."""
3364
    name = 'flocci'
3365
    long_name = 'floccinaucinihilipilification'
3366
    url = "http://floccinaucinihilipilificationa.tumblr.com"
3367
3368
3369
class UpAndOut(GenericTumblrV1):
3370
    """Class to retrieve Up & Out comics."""
3371
    # Also on http://tapastic.com/series/UP-and-OUT
3372
    name = 'upandout'
3373
    long_name = 'Up And Out (from Tumblr)'
3374
    url = 'http://upandoutcomic.tumblr.com'
3375
3376
3377
class Pundemonium(GenericTumblrV1):
3378
    """Class to retrieve Pundemonium comics."""
3379
    name = 'pundemonium'
3380
    long_name = 'Pundemonium'
3381
    url = 'http://monstika.tumblr.com'
3382
3383
3384
class PoorlyDrawnLinesTumblr(GenericTumblrV1):
3385
    """Class to retrieve Poorly Drawn Lines comics."""
3386
    # Also on http://poorlydrawnlines.com
3387
    name = 'poorlydrawn-tumblr'
3388
    long_name = 'Poorly Drawn Lines (from Tumblr)'
3389
    url = 'http://pdlcomics.tumblr.com'
3390
3391
3392
class PearShapedComics(GenericTumblrV1):
3393
    """Class to retrieve Pear Shaped Comics."""
3394
    name = 'pearshaped'
3395
    long_name = 'Pear-Shaped Comics'
3396
    url = 'http://pearshapedcomics.com'
3397
3398
3399
class PondScumComics(GenericTumblrV1):
3400
    """Class to retrieve Pond Scum Comics."""
3401
    name = 'pond'
3402
    long_name = 'Pond Scum'
3403
    url = 'http://pondscumcomic.tumblr.com'
3404
3405
3406
class MercworksTumblr(GenericTumblrV1):
3407
    """Class to retrieve Mercworks comics."""
3408
    # Also on http://mercworks.net
3409
    name = 'mercworks-tumblr'
3410
    long_name = 'Mercworks (from Tumblr)'
3411
    url = 'http://mercworks.tumblr.com'
3412
3413
3414
class OwlTurdTumblr(GenericTumblrV1):
3415
    """Class to retrieve Owl Turd comics."""
3416
    # Also on http://tapastic.com/series/Owl-Turd-Comix
3417
    name = 'owlturd-tumblr'
3418
    long_name = 'Owl Turd (from Tumblr)'
3419
    url = 'http://owlturd.com'
3420
3421
3422
class VectorBelly(GenericTumblrV1):
3423
    """Class to retrieve Vector Belly comics."""
3424
    # Also on http://vectorbelly.com
3425
    name = 'vector'
3426
    long_name = 'Vector Belly'
3427
    url = 'http://vectorbelly.tumblr.com'
3428
3429
3430
class GoneIntoRapture(GenericTumblrV1):
3431
    """Class to retrieve Gone Into Rapture comics."""
3432
    # Also on http://goneintorapture.tumblr.com
3433
    # Also on http://tapastic.com/series/Goneintorapture
3434
    name = 'rapture'
3435
    long_name = 'Gone Into Rapture'
3436
    url = 'http://www.goneintorapture.com'
3437
3438
3439
class TheOatmealTumblr(GenericTumblrV1):
3440
    """Class to retrieve The Oatmeal comics."""
3441
    # Also on http://theoatmeal.com
3442
    name = 'oatmeal-tumblr'
3443
    long_name = 'The Oatmeal (from Tumblr)'
3444
    url = 'http://oatmeal.tumblr.com'
3445
3446
3447
class HeckIfIKnowComicsTumblr(GenericTumblrV1):
3448
    """Class to retrieve Heck If I Know Comics."""
3449
    # Also on http://tapastic.com/series/Regular
3450
    name = 'heck-tumblr'
3451
    long_name = 'Heck if I Know comics (from Tumblr)'
3452
    url = 'http://heckifiknowcomics.com'
3453
3454
3455
class MyJetPack(GenericTumblrV1):
3456
    """Class to retrieve My Jet Pack comics."""
3457
    name = 'jetpack'
3458
    long_name = 'My Jet Pack'
3459
    url = 'http://myjetpack.tumblr.com'
3460
3461
3462
class CheerUpEmoKidTumblr(GenericTumblrV1):
3463
    """Class to retrieve CheerUpEmoKid comics."""
3464
    # Also on http://www.cheerupemokid.com
3465
    # Also on http://tapastic.com/series/CUEK
3466
    name = 'cuek-tumblr'
3467
    long_name = 'Cheer Up Emo Kid (from Tumblr)'
3468
    url = 'http://enzocomics.tumblr.com'
3469
3470
3471
class ForLackOfABetterComic(GenericTumblrV1):
3472
    """Class to retrieve For Lack Of A Better Comics."""
3473
    # Also on http://forlackofabettercomic.com
3474
    name = 'lack'
3475
    long_name = 'For Lack Of A Better Comic'
3476
    url = 'http://forlackofabettercomic.tumblr.com'
3477
3478
3479
class ZenPencilsTumblr(GenericTumblrV1):
3480
    """Class to retrieve ZenPencils comics."""
3481
    # Also on http://zenpencils.com
3482
    # Also on http://www.gocomics.com/zen-pencils
3483
    name = 'zenpencils-tumblr'
3484
    long_name = 'Zen Pencils (from Tumblr)'
3485
    url = 'http://zenpencils.tumblr.com'
3486
3487
3488
class ThreeWordPhraseTumblr(GenericTumblrV1):
3489
    """Class to retrieve Three Word Phrase comics."""
3490
    # Also on http://threewordphrase.com
3491
    name = 'threeword-tumblr'
3492
    long_name = 'Three Word Phrase (from Tumblr)'
3493
    url = 'http://www.threewordphrase.tumblr.com'
3494
3495
3496
class TimeTrabbleTumblr(GenericTumblrV1):
3497
    """Class to retrieve Time Trabble comics."""
3498
    # Also on http://timetrabble.com
3499
    name = 'timetrabble-tumblr'
3500
    long_name = 'Time Trabble (from Tumblr)'
3501
    url = 'http://timetrabble.tumblr.com'
3502
3503
3504
class SafelyEndangeredTumblr(GenericTumblrV1):
3505
    """Class to retrieve Safely Endangered comics."""
3506
    # Also on http://www.safelyendangered.com
3507
    name = 'endangered-tumblr'
3508
    long_name = 'Safely Endangered (from Tumblr)'
3509
    url = 'http://tumblr.safelyendangered.com'
3510
3511
3512
class MouseBearComedyTumblr(GenericTumblrV1):
3513
    """Class to retrieve Mouse Bear Comedy comics."""
3514
    # Also on http://www.mousebearcomedy.com
3515
    name = 'mousebear-tumblr'
3516
    long_name = 'Mouse Bear Comedy (from Tumblr)'
3517
    url = 'http://mousebearcomedy.tumblr.com'
3518
3519
3520
class BouletCorpTumblr(GenericTumblrV1):
3521
    """Class to retrieve BouletCorp comics."""
3522
    # Also on http://www.bouletcorp.com
3523
    name = 'boulet-tumblr'
3524
    long_name = 'Boulet Corp (from Tumblr)'
3525
    url = 'http://bouletcorp.tumblr.com'
3526
3527
3528
class TheAwkwardYetiTumblr(GenericTumblrV1):
3529
    """Class to retrieve The Awkward Yeti comics."""
3530
    # Also on http://www.gocomics.com/the-awkward-yeti
3531
    # Also on http://theawkwardyeti.com
3532
    # Also on https://tapastic.com/series/TheAwkwardYeti
3533
    name = 'yeti-tumblr'
3534
    long_name = 'The Awkward Yeti (from Tumblr)'
3535
    url = 'http://larstheyeti.tumblr.com'
3536
3537
3538
class NellucNhoj(GenericTumblrV1):
3539
    """Class to retrieve NellucNhoj comics."""
3540
    name = 'nhoj'
3541
    long_name = 'Nelluc Nhoj'
3542
    url = 'http://nellucnhoj.com'
3543
3544
3545
class DownTheUpwardSpiralTumblr(GenericTumblrV1):
3546
    """Class to retrieve Down The Upward Spiral comics."""
3547
    # Also on http://www.downtheupwardspiral.com
3548
    name = 'spiral-tumblr'
3549
    long_name = 'Down the Upward Spiral (from Tumblr)'
3550
    url = 'http://downtheupwardspiral.tumblr.com'
3551
3552
3553
class AsPerUsualTumblr(GenericTumblrV1):
3554
    """Class to retrieve As Per Usual comics."""
3555
    # Also on https://tapastic.com/series/AsPerUsual
3556
    name = 'usual-tumblr'
3557
    long_name = 'As Per Usual (from Tumblr)'
3558
    url = 'http://as-per-usual.tumblr.com'
3559
3560
3561
class OneOneOneOneComicTumblr(GenericTumblrV1):
3562
    """Class to retrieve 1111 Comics."""
3563
    # Also on http://www.1111comics.me
3564
    # Also on https://tapastic.com/series/1111-Comics
3565
    name = '1111-tumblr'
3566
    long_name = '1111 Comics (from Tumblr)'
3567
    url = 'http://comics1111.tumblr.com'
3568
3569
3570
class JhallComicsTumblr(GenericTumblrV1):
3571
    """Class to retrieve Jhall Comics."""
3572
    # Also on http://jhallcomics.com
3573
    name = 'jhall-tumblr'
3574
    long_name = 'Jhall Comics (from Tumblr)'
3575
    url = 'http://jhallcomics.tumblr.com'
3576
3577
3578
class BerkeleyMewsTumblr(GenericTumblrV1):
3579
    """Class to retrieve Berkeley Mews comics."""
3580
    # Also on http://www.gocomics.com/berkeley-mews
3581
    # Also on http://www.berkeleymews.com
3582
    name = 'berkeley-tumblr'
3583
    long_name = 'Berkeley Mews (from Tumblr)'
3584
    url = 'http://mews.tumblr.com'
3585
3586
3587
class JoanCornellaTumblr(GenericTumblrV1):
3588
    """Class to retrieve Joan Cornella comics."""
3589
    # Also on http://joancornella.net
3590
    name = 'cornella-tumblr'
3591
    long_name = 'Joan Cornella (from Tumblr)'
3592
    url = 'http://cornellajoan.tumblr.com'
3593
3594
3595
class RespawnComicTumblr(GenericTumblrV1):
3596
    """Class to retrieve Respawn Comic."""
3597
    # Also on http://respawncomic.com
3598
    name = 'respawn-tumblr'
3599
    long_name = 'Respawn Comic (from Tumblr)'
3600
    url = 'http://respawncomic.tumblr.com'
3601
3602
3603
class ChrisHallbeckTumblr(GenericTumblrV1):
3604
    """Class to retrieve Chris Hallbeck comics."""
3605
    # Also on https://tapastic.com/ChrisHallbeck
3606
    # Also on http://maximumble.com
3607
    # Also on http://minimumble.com
3608
    # Also on http://thebookofbiff.com
3609
    name = 'hallbeck-tumblr'
3610
    long_name = 'Chris Hallback (from Tumblr)'
3611
    url = 'http://chrishallbeck.tumblr.com'
3612
3613
3614
class ComicNuggets(GenericTumblrV1):
3615
    """Class to retrieve Comic Nuggets."""
3616
    name = 'nuggets'
3617
    long_name = 'Comic Nuggets'
3618
    url = 'http://comicnuggets.com'
3619
3620
3621
class PigeonGazetteTumblr(GenericTumblrV1):
3622
    """Class to retrieve The Pigeon Gazette comics."""
3623
    # Also on https://tapastic.com/series/The-Pigeon-Gazette
3624
    name = 'pigeon-tumblr'
3625
    long_name = 'The Pigeon Gazette (from Tumblr)'
3626
    url = 'http://thepigeongazette.tumblr.com'
3627
3628
3629
class CancerOwl(GenericTumblrV1):
3630
    """Class to retrieve Cancer Owl comics."""
3631
    # Also on http://cancerowl.com
3632
    name = 'cancerowl-tumblr'
3633
    long_name = 'Cancer Owl (from Tumblr)'
3634
    url = 'http://cancerowl.tumblr.com'
3635
3636
3637
class FowlLanguageTumblr(GenericTumblrV1):
3638
    """Class to retrieve Fowl Language comics."""
3639
    # Also on http://www.fowllanguagecomics.com
3640
    # Also on http://tapastic.com/series/Fowl-Language-Comics
3641
    # Also on http://www.gocomics.com/fowl-language
3642
    name = 'fowllanguage-tumblr'
3643
    long_name = 'Fowl Language Comics (from Tumblr)'
3644
    url = 'http://fowllanguagecomics.tumblr.com'
3645
3646
3647
class TheOdd1sOutTumblr(GenericTumblrV1):
3648
    """Class to retrieve The Odd 1s Out comics."""
3649
    # Also on http://theodd1sout.com
3650
    # Also on https://tapastic.com/series/Theodd1sout
3651
    name = 'theodd-tumblr'
3652
    long_name = 'The Odd 1s Out (from Tumblr)'
3653
    url = 'http://theodd1sout.tumblr.com'
3654
3655
3656
class TheUnderfoldTumblr(GenericTumblrV1):
3657
    """Class to retrieve The Underfold comics."""
3658
    # Also on http://theunderfold.com
3659
    name = 'underfold-tumblr'
3660
    long_name = 'The Underfold (from Tumblr)'
3661
    url = 'http://theunderfold.tumblr.com'
3662
3663
3664
class LolNeinTumblr(GenericTumblrV1):
3665
    """Class to retrieve Lol Nein comics."""
3666
    # Also on http://lolnein.com
3667
    name = 'lolnein-tumblr'
3668
    long_name = 'Lol Nein (from Tumblr)'
3669
    url = 'http://lolneincom.tumblr.com'
3670
3671
3672
class FatAwesomeComicsTumblr(GenericTumblrV1):
3673
    """Class to retrieve Fat Awesome Comics."""
3674
    # Also on http://fatawesome.com/comics
3675
    name = 'fatawesome-tumblr'
3676
    long_name = 'Fat Awesome (from Tumblr)'
3677
    url = 'http://fatawesomecomedy.tumblr.com'
3678
3679
3680
class TheWorldIsFlatTumblr(GenericTumblrV1):
3681
    """Class to retrieve The World Is Flat Comics."""
3682
    # Also on https://tapastic.com/series/The-World-is-Flat
3683
    name = 'flatworld-tumblr'
3684
    long_name = 'The World Is Flat (from Tumblr)'
3685
    url = 'http://theworldisflatcomics.tumblr.com'
3686
3687
3688
class DorrisMc(GenericTumblrV1):
3689
    """Class to retrieve Dorris Mc Comics"""
3690
    # Also on http://www.gocomics.com/dorris-mccomics
3691
    name = 'dorrismc'
3692
    long_name = 'Dorris Mc'
3693
    url = 'http://dorrismccomics.com'
3694
3695
3696
class LeleozTumblr(GenericEmptyComic, GenericTumblrV1):
3697
    """Class to retrieve Leleoz comics."""
3698
    # Also on https://tapastic.com/series/Leleoz
3699
    name = 'leleoz-tumblr'
3700
    long_name = 'Leleoz (from Tumblr)'
3701
    url = 'http://leleozcomics.tumblr.com'
3702
3703
3704
class MoonBeardTumblr(GenericTumblrV1):
3705
    """Class to retrieve MoonBeard comics."""
3706
    # Also on http://moonbeard.com
3707
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
3708
    name = 'moonbeard-tumblr'
3709
    long_name = 'Moon Beard (from Tumblr)'
3710
    url = 'http://blog.squiresjam.es/moonbeard'
3711
3712
3713
class AComik(GenericTumblrV1):
3714
    """Class to retrieve A Comik"""
3715
    name = 'comik'
3716
    long_name = 'A Comik'
3717
    url = 'http://acomik.com'
3718
3719
3720
class ClassicRandy(GenericTumblrV1):
3721
    """Class to retrieve Classic Randy comics."""
3722
    name = 'randy'
3723
    long_name = 'Classic Randy'
3724
    url = 'http://classicrandy.tumblr.com'
3725
3726
3727
class DagssonTumblr(GenericTumblrV1):
3728
    """Class to retrieve Dagsson comics."""
3729
    # Also on http://www.dagsson.com
3730
    name = 'dagsson-tumblr'
3731
    long_name = 'Dagsson Hugleikur (from Tumblr)'
3732
    url = 'http://hugleikurdagsson.tumblr.com'
3733
3734
3735
class LinsEditionsTumblr(GenericTumblrV1):
3736
    """Class to retrieve L.I.N.S. Editions comics."""
3737
    # Also on https://linsedition.com
3738
    name = 'lins-tumblr'
3739
    long_name = 'L.I.N.S. Editions (from Tumblr)'
3740
    url = 'http://linscomics.tumblr.com'
3741
3742
3743
class OrigamiHotDish(GenericTumblrV1):
3744
    """Class to retrieve Origami Hot Dish comics."""
3745
    name = 'origamihotdish'
3746
    long_name = 'Origami Hot Dish'
3747
    url = 'http://origamihotdish.com'
3748
3749
3750
class HitAndMissComicsTumblr(GenericTumblrV1):
3751
    """Class to retrieve Hit and Miss Comics."""
3752
    name = 'hitandmiss'
3753
    long_name = 'Hit and Miss Comics'
3754
    url = 'http://hitandmisscomics.tumblr.com'
3755
3756
3757
class HMBlanc(GenericTumblrV1):
3758
    """Class to retrieve HM Blanc comics."""
3759
    name = 'hmblanc'
3760
    long_name = 'HM Blanc'
3761
    url = 'http://hmblanc.tumblr.com'
3762
3763
3764
class TalesOfAbsurdityTumblr(GenericTumblrV1):
3765
    """Class to retrieve Tales Of Absurdity comics."""
3766
    # Also on http://talesofabsurdity.com
3767
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
3768
    name = 'absurdity-tumblr'
3769
    long_name = 'Tales of Absurdity (from Tumblr)'
3770
    url = 'http://talesofabsurdity.tumblr.com'
3771
3772
3773
class RobbieAndBobby(GenericTumblrV1):
3774
    """Class to retrieve Robbie And Bobby comics."""
3775
    # Also on http://robbieandbobby.com
3776
    name = 'robbie-tumblr'
3777
    long_name = 'Robbie And Bobby (from Tumblr)'
3778
    url = 'http://robbieandbobby.tumblr.com'
3779
3780
3781
class ElectricBunnyComicTumblr(GenericTumblrV1):
3782
    """Class to retrieve Electric Bunny Comics."""
3783
    # Also on http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell
3784
    name = 'bunny-tumblr'
3785
    long_name = 'Electric Bunny Comic (from Tumblr)'
3786
    url = 'http://electricbunnycomics.tumblr.com'
3787
3788
3789
class Hoomph(GenericTumblrV1):
3790
    """Class to retrieve Hoomph comics."""
3791
    name = 'hoomph'
3792
    long_name = 'Hoomph'
3793
    url = 'http://hoom.ph'
3794
3795
3796
class BFGFSTumblr(GenericTumblrV1):
3797
    """Class to retrieve BFGFS comics."""
3798
    # Also on https://tapastic.com/series/BFGFS
3799
    # Also on http://bfgfs.com
3800
    name = 'bfgfs-tumblr'
3801
    long_name = 'BFGFS (from Tumblr)'
3802
    url = 'http://bfgfs.tumblr.com'
3803
3804
3805
class DoodleForFood(GenericTumblrV1):
3806
    """Class to retrieve Doodle For Food comics."""
3807
    # Also on http://doodleforfood.com
3808
    name = 'doodle'
3809
    long_name = 'Doodle For Food'
3810
    url = 'http://doodleforfood.com'
3811
3812
3813
class DougWasTaken(GenericTumblrV1):
3814
    """Class to retrieve Doug Was Taken comics."""
3815
    name = 'doog'
3816
    long_name = 'Doug Was Taken'
3817
    url = 'http://dougwastaken.tumblr.com'
3818
3819
3820
class HorovitzComics(GenericListableComic):
3821
    """Generic class to handle the logic common to the different comics from Horovitz."""
3822
    url = 'http://www.horovitzcomics.com'
3823
    img_re = re.compile('.*comics/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
3824
    link_re = NotImplemented
3825
    get_url_from_archive_element = join_cls_url_to_href
3826
3827
    @classmethod
3828
    def get_comic_info(cls, soup, link):
3829
        """Get information about a particular comics."""
3830
        href = link['href']
3831
        num = int(cls.link_re.match(href).groups()[0])
3832
        title = link.string
3833
        imgs = soup.find_all('img', id='comic')
3834
        assert len(imgs) == 1
3835
        year, month, day = [int(s)
3836
                            for s in cls.img_re.match(imgs[0]['src']).groups()]
3837
        return {
3838
            'title': title,
3839
            'day': day,
3840
            'month': month,
3841
            'year': year,
3842
            'img': [i['src'] for i in imgs],
3843
            'num': num,
3844
        }
3845
3846
    @classmethod
3847
    def get_archive_elements(cls):
3848
        archive_url = 'http://www.horovitzcomics.com/comics/archive/'
3849
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.link_re))
3850
3851
3852
class HorovitzNew(HorovitzComics):
3853
    """Class to retrieve Horovitz new comics."""
3854
    name = 'horovitznew'
3855
    long_name = 'Horovitz New'
3856
    link_re = re.compile('^/comics/new/([0-9]+)$')
3857
3858
3859
class HorovitzClassic(HorovitzComics):
3860
    """Class to retrieve Horovitz classic comics."""
3861
    name = 'horovitzclassic'
3862
    long_name = 'Horovitz Classic'
3863
    link_re = re.compile('^/comics/classic/([0-9]+)$')
3864
3865
3866
class GenericGoComic(GenericNavigableComic):
3867
    """Generic class to handle the logic common to comics from gocomics.com."""
3868
    url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)$')
3869
3870
    @classmethod
3871
    def get_first_comic_link(cls):
3872
        """Get link to first comics."""
3873
        return get_soup_at_url(cls.url).find('a', class_='beginning')
3874
3875
    @classmethod
3876
    def get_navi_link(cls, last_soup, next_):
3877
        return last_soup.find('a', class_='next' if next_ else 'prev', href=cls.url_date_re)
3878
3879
    @classmethod
3880
    def get_url_from_link(cls, link):
3881
        gocomics = 'http://www.gocomics.com'
3882
        return urljoin_wrapper(gocomics, link['href'])
3883
3884
    @classmethod
3885
    def get_comic_info(cls, soup, link):
3886
        """Get information about a particular comics."""
3887
        url = cls.get_url_from_link(link)
3888
        year, month, day = [int(s)
3889
                            for s in cls.url_date_re.match(url).groups()]
3890
        return {
3891
            'day': day,
3892
            'month': month,
3893
            'year': year,
3894
            'img': [soup.find_all('img', class_='strip')[-1]['src']],
3895
            'author': soup.find('meta', attrs={'name': 'author'})['content']
3896
        }
3897
3898
3899
class PearlsBeforeSwine(GenericGoComic):
3900
    """Class to retrieve Pearls Before Swine comics."""
3901
    name = 'pearls'
3902
    long_name = 'Pearls Before Swine'
3903
    url = 'http://www.gocomics.com/pearlsbeforeswine'
3904
3905
3906
class Peanuts(GenericGoComic):
3907
    """Class to retrieve Peanuts comics."""
3908
    name = 'peanuts'
3909
    long_name = 'Peanuts'
3910
    url = 'http://www.gocomics.com/peanuts'
3911
3912
3913
class MattWuerker(GenericGoComic):
3914
    """Class to retrieve Matt Wuerker comics."""
3915
    name = 'wuerker'
3916
    long_name = 'Matt Wuerker'
3917
    url = 'http://www.gocomics.com/mattwuerker'
3918
3919
3920
class TomToles(GenericGoComic):
3921
    """Class to retrieve Tom Toles comics."""
3922
    name = 'toles'
3923
    long_name = 'Tom Toles'
3924
    url = 'http://www.gocomics.com/tomtoles'
3925
3926
3927
class BreakOfDay(GenericGoComic):
3928
    """Class to retrieve Break Of Day comics."""
3929
    name = 'breakofday'
3930
    long_name = 'Break Of Day'
3931
    url = 'http://www.gocomics.com/break-of-day'
3932
3933
3934
class Brevity(GenericGoComic):
3935
    """Class to retrieve Brevity comics."""
3936
    name = 'brevity'
3937
    long_name = 'Brevity'
3938
    url = 'http://www.gocomics.com/brevity'
3939
3940
3941
class MichaelRamirez(GenericGoComic):
3942
    """Class to retrieve Michael Ramirez comics."""
3943
    name = 'ramirez'
3944
    long_name = 'Michael Ramirez'
3945
    url = 'http://www.gocomics.com/michaelramirez'
3946
3947
3948
class MikeLuckovich(GenericGoComic):
3949
    """Class to retrieve Mike Luckovich comics."""
3950
    name = 'luckovich'
3951
    long_name = 'Mike Luckovich'
3952
    url = 'http://www.gocomics.com/mikeluckovich'
3953
3954
3955
class JimBenton(GenericGoComic):
3956
    """Class to retrieve Jim Benton comics."""
3957
    # Also on http://jimbenton.tumblr.com
3958
    name = 'benton'
3959
    long_name = 'Jim Benton'
3960
    url = 'http://www.gocomics.com/jim-benton-cartoons'
3961
3962
3963
class TheArgyleSweater(GenericGoComic):
3964
    """Class to retrieve the Argyle Sweater comics."""
3965
    name = 'argyle'
3966
    long_name = 'Argyle Sweater'
3967
    url = 'http://www.gocomics.com/theargylesweater'
3968
3969
3970
class SunnyStreet(GenericGoComic):
3971
    """Class to retrieve Sunny Street comics."""
3972
    # Also on http://www.sunnystreetcomics.com
3973
    name = 'sunny'
3974
    long_name = 'Sunny Street'
3975
    url = 'http://www.gocomics.com/sunny-street'
3976
3977
3978
class OffTheMark(GenericGoComic):
3979
    """Class to retrieve Off The Mark comics."""
3980
    # Also on https://www.offthemark.com
3981
    name = 'offthemark'
3982
    long_name = 'Off The Mark'
3983
    url = 'http://www.gocomics.com/offthemark'
3984
3985
3986
class WuMo(GenericGoComic):
3987
    """Class to retrieve WuMo comics."""
3988
    # Also on http://wumo.com
3989
    name = 'wumo'
3990
    long_name = 'WuMo'
3991
    url = 'http://www.gocomics.com/wumo'
3992
3993
3994
class LunarBaboon(GenericGoComic):
3995
    """Class to retrieve Lunar Baboon comics."""
3996
    # Also on http://www.lunarbaboon.com
3997
    # Also on https://tapastic.com/series/Lunarbaboon
3998
    name = 'lunarbaboon'
3999
    long_name = 'Lunar Baboon'
4000
    url = 'http://www.gocomics.com/lunarbaboon'
4001
4002
4003
class SandersenGocomic(GenericGoComic):
4004
    """Class to retrieve Sarah Andersen comics."""
4005
    # Also on http://sarahcandersen.com
4006
    # Also on http://tapastic.com/series/Doodle-Time
4007
    name = 'sandersen-goc'
4008
    long_name = 'Sarah Andersen (from GoComics)'
4009
    url = 'http://www.gocomics.com/sarahs-scribbles'
4010
4011
4012
class CalvinAndHobbesGoComic(GenericGoComic):
4013
    """Class to retrieve Calvin and Hobbes comics."""
4014
    # From gocomics, not http://marcel-oehler.marcellosendos.ch/comics/ch/
4015
    name = 'calvin-goc'
4016
    long_name = 'Calvin and Hobbes (from GoComics)'
4017
    url = 'http://www.gocomics.com/calvinandhobbes'
4018
4019
4020
class RallGoComic(GenericGoComic):
4021
    """Class to retrieve Ted Rall comics."""
4022
    # Also on http://rall.com/comic
4023
    name = 'rall-goc'
4024
    long_name = "Ted Rall (from GoComics)"
4025
    url = "http://www.gocomics.com/tedrall"
4026
4027
4028
class TheAwkwardYetiGoComic(GenericGoComic):
4029
    """Class to retrieve The Awkward Yeti comics."""
4030
    # Also on http://larstheyeti.tumblr.com
4031
    # Also on http://theawkwardyeti.com
4032
    # Also on https://tapastic.com/series/TheAwkwardYeti
4033
    name = 'yeti-goc'
4034
    long_name = 'The Awkward Yeti (from GoComics)'
4035
    url = 'http://www.gocomics.com/the-awkward-yeti'
4036
4037
4038
class BerkeleyMewsGoComics(GenericGoComic):
4039
    """Class to retrieve Berkeley Mews comics."""
4040
    # Also on http://mews.tumblr.com
4041
    # Also on http://www.berkeleymews.com
4042
    name = 'berkeley-goc'
4043
    long_name = 'Berkeley Mews (from GoComics)'
4044
    url = 'http://www.gocomics.com/berkeley-mews'
4045
4046
4047
class SheldonGoComics(GenericGoComic):
4048
    """Class to retrieve Sheldon comics."""
4049
    # Also on http://www.sheldoncomics.com
4050
    name = 'sheldon-goc'
4051
    long_name = 'Sheldon Comics (from GoComics)'
4052
    url = 'http://www.gocomics.com/sheldon'
4053
4054
4055
class FowlLanguageGoComics(GenericGoComic):
4056
    """Class to retrieve Fowl Language comics."""
4057
    # Also on http://www.fowllanguagecomics.com
4058
    # Also on http://tapastic.com/series/Fowl-Language-Comics
4059
    # Also on http://fowllanguagecomics.tumblr.com
4060
    name = 'fowllanguage-goc'
4061
    long_name = 'Fowl Language Comics (from GoComics)'
4062
    url = 'http://www.gocomics.com/fowl-language'
4063
4064
4065
class NickAnderson(GenericGoComic):
4066
    """Class to retrieve Nick Anderson comics."""
4067
    name = 'nickanderson'
4068
    long_name = 'Nick Anderson'
4069
    url = 'http://www.gocomics.com/nickanderson'
4070
4071
4072
class GarfieldGoComics(GenericGoComic):
4073
    """Class to retrieve Garfield comics."""
4074
    # Also on http://garfield.com
4075
    name = 'garfield-goc'
4076
    long_name = 'Garfield (from GoComics)'
4077
    url = 'http://www.gocomics.com/garfield'
4078
4079
4080
class DorrisMcGoComics(GenericGoComic):
4081
    """Class to retrieve Dorris Mc Comics"""
4082
    # Also on http://dorrismccomics.com
4083
    name = 'dorrismc-goc'
4084
    long_name = 'Dorris Mc (from GoComics)'
4085
    url = 'http://www.gocomics.com/dorris-mccomics'
4086
4087
4088
class MisterAndMeGoComics(GenericGoComic):
4089
    """Class to retrieve Mister & Me Comics."""
4090
    # Also on http://www.mister-and-me.com
4091
    # Also on https://tapastic.com/series/Mister-and-Me
4092
    name = 'mister-goc'
4093
    long_name = 'Mister & Me (from GoComics)'
4094
    url = 'http://www.gocomics.com/mister-and-me'
4095
4096
4097
class GenericTapasticComic(GenericListableComic):
4098
    """Generic class to handle the logic common to comics from tapastic.com."""
4099
4100
    @classmethod
4101
    def get_comic_info(cls, soup, archive_elt):
4102
        """Get information about a particular comics."""
4103
        timestamp = int(archive_elt['publishDate']) / 1000.0
4104
        day = datetime.datetime.fromtimestamp(timestamp).date()
4105
        imgs = soup.find_all('img', class_='art-image')
4106
        if not imgs:
4107
            print("Comic %s is being uploaded, retry later" % cls.get_url_from_archive_element(archive_elt))
4108
            return None
4109
        assert len(imgs) > 0
4110
        return {
4111
            'day': day.day,
4112
            'year': day.year,
4113
            'month': day.month,
4114
            'img': [i['src'] for i in imgs],
4115
            'title': archive_elt['title'],
4116
        }
4117
4118
    @classmethod
4119
    def get_url_from_archive_element(cls, archive_elt):
4120
        return 'http://tapastic.com/episode/' + str(archive_elt['id'])
4121
4122
    @classmethod
4123
    def get_archive_elements(cls):
4124
        pref, suff = 'episodeList : ', ','
4125
        # Information is stored in the javascript part
4126
        # I don't know the clean way to get it so this is the ugly way.
4127
        string = [s[len(pref):-len(suff)] for s in (s.decode('utf-8').strip() for s in urlopen_wrapper(cls.url).readlines()) if s.startswith(pref) and s.endswith(suff)][0]
4128
        return json.loads(string)
4129
4130
4131
class VegetablesForDessert(GenericTapasticComic):
4132
    """Class to retrieve Vegetables For Dessert comics."""
4133
    # Also on http://vegetablesfordessert.tumblr.com
4134
    name = 'vegetables'
4135
    long_name = 'Vegetables For Dessert'
4136
    url = 'http://tapastic.com/series/vegetablesfordessert'
4137
4138
4139
class FowlLanguageTapa(GenericTapasticComic):
4140
    """Class to retrieve Fowl Language comics."""
4141
    # Also on http://www.fowllanguagecomics.com
4142
    # Also on http://fowllanguagecomics.tumblr.com
4143
    # Also on http://www.gocomics.com/fowl-language
4144
    name = 'fowllanguage-tapa'
4145
    long_name = 'Fowl Language Comics (from Tapastic)'
4146
    url = 'http://tapastic.com/series/Fowl-Language-Comics'
4147
4148
4149
class OscillatingProfundities(GenericTapasticComic):
4150
    """Class to retrieve Oscillating Profundities comics."""
4151
    name = 'oscillating'
4152
    long_name = 'Oscillating Profundities'
4153
    url = 'http://tapastic.com/series/oscillatingprofundities'
4154
4155
4156
class ZnoflatsComics(GenericTapasticComic):
4157
    """Class to retrieve Znoflats comics."""
4158
    name = 'znoflats'
4159
    long_name = 'Znoflats Comics'
4160
    url = 'http://tapastic.com/series/Znoflats-Comics'
4161
4162
4163
class SandersenTapastic(GenericTapasticComic):
4164
    """Class to retrieve Sarah Andersen comics."""
4165
    # Also on http://sarahcandersen.com
4166
    # Also on http://www.gocomics.com/sarahs-scribbles
4167
    name = 'sandersen-tapa'
4168
    long_name = 'Sarah Andersen (from Tapastic)'
4169
    url = 'http://tapastic.com/series/Doodle-Time'
4170
4171
4172
class TubeyToonsTapastic(GenericTapasticComic):
4173
    """Class to retrieve TubeyToons comics."""
4174
    # Also on http://tubeytoons.com
4175
    # Also on http://tubeytoons.tumblr.com
4176
    name = 'tubeytoons-tapa'
4177
    long_name = 'Tubey Toons (from Tapastic)'
4178
    url = 'http://tapastic.com/series/Tubey-Toons'
4179
4180
4181
class AnythingComicTapastic(GenericTapasticComic):
4182
    """Class to retrieve Anything Comics."""
4183
    # Also on http://www.anythingcomic.com
4184
    name = 'anythingcomic-tapa'
4185
    long_name = 'Anything Comic (from Tapastic)'
4186
    url = 'http://tapastic.com/series/anything'
4187
4188
4189
class UnearthedComicsTapastic(GenericTapasticComic):
4190
    """Class to retrieve Unearthed comics."""
4191
    # Also on http://unearthedcomics.com
4192
    # Also on http://unearthedcomics.tumblr.com
4193
    name = 'unearthed-tapa'
4194
    long_name = 'Unearthed Comics (from Tapastic)'
4195
    url = 'http://tapastic.com/series/UnearthedComics'
4196
4197
4198
class EverythingsStupidTapastic(GenericTapasticComic):
4199
    """Class to retrieve Everything's stupid Comics."""
4200
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
4201
    # Also on http://everythingsstupid.net
4202
    name = 'stupid-tapa'
4203
    long_name = "Everything's Stupid (from Tapastic)"
4204
    url = 'http://tapastic.com/series/EverythingsStupid'
4205
4206
4207
class JustSayEhTapastic(GenericTapasticComic):
4208
    """Class to retrieve Just Say Eh comics."""
4209
    # Also on http://www.justsayeh.com
4210
    name = 'justsayeh-tapa'
4211
    long_name = 'Just Say Eh (from Tapastic)'
4212
    url = 'http://tapastic.com/series/Just-Say-Eh'
4213
4214
4215
class ThorsThundershackTapastic(GenericTapasticComic):
4216
    """Class to retrieve Thor's Thundershack comics."""
4217
    # Also on http://www.thorsthundershack.com
4218
    name = 'thor-tapa'
4219
    long_name = 'Thor\'s Thundershack (from Tapastic)'
4220
    url = 'http://tapastic.com/series/Thors-Thundershac'
4221
4222
4223
class OwlTurdTapastic(GenericTapasticComic):
4224
    """Class to retrieve Owl Turd comics."""
4225
    # Also on http://owlturd.com
4226
    name = 'owlturd-tapa'
4227
    long_name = 'Owl Turd (from Tapastic)'
4228
    url = 'http://tapastic.com/series/Owl-Turd-Comix'
4229
4230
4231
class GoneIntoRaptureTapastic(GenericTapasticComic):
4232
    """Class to retrieve Gone Into Rapture comics."""
4233
    # Also on http://goneintorapture.tumblr.com
4234
    # Also on http://www.goneintorapture.com
4235
    name = 'rapture-tapa'
4236
    long_name = 'Gone Into Rapture (from Tapastic)'
4237
    url = 'http://tapastic.com/series/Goneintorapture'
4238
4239
4240
class HeckIfIKnowComicsTapa(GenericTapasticComic):
4241
    """Class to retrieve Heck If I Know Comics."""
4242
    # Also on http://heckifiknowcomics.com
4243
    name = 'heck-tapa'
4244
    long_name = 'Heck if I Know comics (from Tapastic)'
4245
    url = 'http://tapastic.com/series/Regular'
4246
4247
4248
class CheerUpEmoKidTapa(GenericTapasticComic):
4249
    """Class to retrieve CheerUpEmoKid comics."""
4250
    # Also on http://www.cheerupemokid.com
4251
    # Also on http://enzocomics.tumblr.com
4252
    name = 'cuek-tapa'
4253
    long_name = 'Cheer Up Emo Kid (from Tapastic)'
4254
    url = 'http://tapastic.com/series/CUEK'
4255
4256
4257
class BigFootJusticeTapa(GenericTapasticComic):
4258
    """Class to retrieve Big Foot Justice comics."""
4259
    # Also on http://bigfootjustice.com
4260
    name = 'bigfoot-tapa'
4261
    long_name = 'Big Foot Justice (from Tapastic)'
4262
    url = 'http://tapastic.com/series/bigfoot-justice'
4263
4264
4265
class UpAndOutTapa(GenericTapasticComic):
4266
    """Class to retrieve Up & Out comics."""
4267
    # Also on http://upandoutcomic.tumblr.com
4268
    name = 'upandout-tapa'
4269
    long_name = 'Up And Out (from Tapastic)'
4270
    url = 'http://tapastic.com/series/UP-and-OUT'
4271
4272
4273
class ToonHoleTapa(GenericTapasticComic):
4274
    """Class to retrieve Toon Holes comics."""
4275
    # Also on http://www.toonhole.com
4276
    name = 'toonhole-tapa'
4277
    long_name = 'Toon Hole (from Tapastic)'
4278
    url = 'http://tapastic.com/series/TOONHOLE'
4279
4280
4281
class AngryAtNothingTapa(GenericTapasticComic):
4282
    """Class to retrieve Angry at Nothing comics."""
4283
    # Also on http://www.angryatnothing.net
4284
    name = 'angry-tapa'
4285
    long_name = 'Angry At Nothing (from Tapastic)'
4286
    url = 'http://tapastic.com/series/Comics-yeah-definitely-comics-'
4287
4288
4289
class LeleozTapa(GenericTapasticComic):
4290
    """Class to retrieve Leleoz comics."""
4291
    # Also on http://leleozcomics.tumblr.com
4292
    name = 'leleoz-tapa'
4293
    long_name = 'Leleoz (from Tapastic)'
4294
    url = 'https://tapastic.com/series/Leleoz'
4295
4296
4297
class TheAwkwardYetiTapa(GenericTapasticComic):
4298
    """Class to retrieve The Awkward Yeti comics."""
4299
    # Also on http://www.gocomics.com/the-awkward-yeti
4300
    # Also on http://theawkwardyeti.com
4301
    # Also on http://larstheyeti.tumblr.com
4302
    name = 'yeti-tapa'
4303
    long_name = 'The Awkward Yeti (from Tapastic)'
4304
    url = 'https://tapastic.com/series/TheAwkwardYeti'
4305
4306
4307
class AsPerUsualTapa(GenericTapasticComic):
4308
    """Class to retrieve As Per Usual comics."""
4309
    # Also on http://as-per-usual.tumblr.com
4310
    name = 'usual-tapa'
4311
    long_name = 'As Per Usual (from Tapastic)'
4312
    url = 'https://tapastic.com/series/AsPerUsual'
4313
4314
4315
class OneOneOneOneComicTapa(GenericTapasticComic):
4316
    """Class to retrieve 1111 Comics."""
4317
    # Also on http://www.1111comics.me
4318
    # Also on http://comics1111.tumblr.com
4319
    name = '1111-tapa'
4320
    long_name = '1111 Comics (from Tapastic)'
4321
    url = 'https://tapastic.com/series/1111-Comics'
4322
4323
4324
class TumbleDryTapa(GenericTapasticComic):
4325
    """Class to retrieve Tumble Dry comics."""
4326
    # Also on http://tumbledrycomics.com
4327
    name = 'tumbledry-tapa'
4328
    long_name = 'Tumblr Dry (from Tapastic)'
4329
    url = 'https://tapastic.com/series/TumbleDryComics'
4330
4331
4332
class DeadlyPanelTapa(GenericTapasticComic):
4333
    """Class to retrieve Deadly Panel comics."""
4334
    # Also on http://www.deadlypanel.com
4335
    name = 'deadly-tapa'
4336
    long_name = 'Deadly Panel (from Tapastic)'
4337
    url = 'https://tapastic.com/series/deadlypanel'
4338
4339
4340
class ChrisHallbeckMaxiTapa(GenericTapasticComic):
4341
    """Class to retrieve Chris Hallbeck comics."""
4342
    # Also on http://chrishallbeck.tumblr.com
4343
    # Also on http://maximumble.com
4344
    name = 'hallbeckmaxi-tapa'
4345
    long_name = 'Chris Hallback - Maximumble (from Tapastic)'
4346
    url = 'https://tapastic.com/series/Maximumble'
4347
4348
4349
class ChrisHallbeckMiniTapa(GenericTapasticComic):
4350
    """Class to retrieve Chris Hallbeck comics."""
4351
    # Also on http://chrishallbeck.tumblr.com
4352
    # Also on http://minimumble.com
4353
    name = 'hallbeckmini-tapa'
4354
    long_name = 'Chris Hallback - Minimumble (from Tapastic)'
4355
    url = 'https://tapastic.com/series/Minimumble'
4356
4357
4358
class ChrisHallbeckBiffTapa(GenericTapasticComic):
4359
    """Class to retrieve Chris Hallbeck comics."""
4360
    # Also on http://chrishallbeck.tumblr.com
4361
    # Also on http://thebookofbiff.com
4362
    name = 'hallbeckbiff-tapa'
4363
    long_name = 'Chris Hallback - The Book of Biff (from Tapastic)'
4364
    url = 'https://tapastic.com/series/Biff'
4365
4366
4367
class RandoWisTapa(GenericTapasticComic):
4368
    """Class to retrieve RandoWis comics."""
4369
    # Also on https://randowis.com
4370
    name = 'randowis-tapa'
4371
    long_name = 'RandoWis (from Tapastic)'
4372
    url = 'https://tapastic.com/series/RandoWis'
4373
4374
4375
class PigeonGazetteTapa(GenericTapasticComic):
4376
    """Class to retrieve The Pigeon Gazette comics."""
4377
    # Also on http://thepigeongazette.tumblr.com
4378
    name = 'pigeon-tapa'
4379
    long_name = 'The Pigeon Gazette (from Tapastic)'
4380
    url = 'https://tapastic.com/series/The-Pigeon-Gazette'
4381
4382
4383
class TheOdd1sOutTapa(GenericTapasticComic):
4384
    """Class to retrieve The Odd 1s Out comics."""
4385
    # Also on http://theodd1sout.com
4386
    # Also on http://theodd1sout.tumblr.com
4387
    name = 'theodd-tapa'
4388
    long_name = 'The Odd 1s Out (from Tapastic)'
4389
    url = 'https://tapastic.com/series/Theodd1sout'
4390
4391
4392
class TheWorldIsFlatTapa(GenericTapasticComic):
4393
    """Class to retrieve The World Is Flat Comics."""
4394
    # Also on http://theworldisflatcomics.tumblr.com
4395
    name = 'flatworld-tapa'
4396
    long_name = 'The World Is Flat (from Tapastic)'
4397
    url = 'https://tapastic.com/series/The-World-is-Flat'
4398
4399
4400
class MisterAndMeTapa(GenericTapasticComic):
4401
    """Class to retrieve Mister & Me Comics."""
4402
    # Also on http://www.mister-and-me.com
4403
    # Also on http://www.gocomics.com/mister-and-me
4404
    name = 'mister-tapa'
4405
    long_name = 'Mister & Me (from Tapastic)'
4406
    url = 'https://tapastic.com/series/Mister-and-Me'
4407
4408
4409
class TalesOfAbsurdityTapa(GenericTapasticComic):
4410
    """Class to retrieve Tales Of Absurdity comics."""
4411
    # Also on http://talesofabsurdity.com
4412
    # Also on http://talesofabsurdity.tumblr.com
4413
    name = 'absurdity-tapa'
4414
    long_name = 'Tales of Absurdity (from Tapastic)'
4415
    url = 'http://tapastic.com/series/Tales-Of-Absurdity'
4416
4417
4418
class BFGFSTapa(GenericTapasticComic):
4419
    """Class to retrieve BFGFS comics."""
4420
    # Also on http://bfgfs.com
4421
    # Also on http://bfgfs.tumblr.com
4422
    name = 'bfgfs-tapa'
4423
    long_name = 'BFGFS (from Tapastic)'
4424
    url = 'https://tapastic.com/series/BFGFS'
4425
4426
4427
class DoodleForFoodTapa(GenericTapasticComic):
4428
    """Class to retrieve Doodle For Food comics."""
4429
    # Also on http://doodleforfood.com
4430
    name = 'doodle-tapa'
4431
    long_name = 'Doodle For Food (from Tapastic)'
4432
    url = 'https://tapastic.com/series/Doodle-for-Food'
4433
4434
4435
class MrLovensteinTapa(GenericTapasticComic):
4436
    """Class to retrieve Mr Lovenstein comics."""
4437
    # Also on  https://tapastic.com/series/MrLovenstein
4438
    name = 'mrlovenstein-tapa'
4439
    long_name = 'Mr. Lovenstein (from Tapastic)'
4440
    url = 'https://tapastic.com/series/MrLovenstein'
4441
4442
4443
def get_subclasses(klass):
4444
    """Gets the list of direct/indirect subclasses of a class"""
4445
    subclasses = klass.__subclasses__()
4446
    for derived in list(subclasses):
4447
        subclasses.extend(get_subclasses(derived))
4448
    return subclasses
4449
4450
4451
def remove_st_nd_rd_th_from_date(string):
4452
    """Function to transform 1st/2nd/3rd/4th in a parsable date format."""
4453
    # Hackish way to convert string with numeral "1st"/"2nd"/etc to date
4454
    return (string.replace('st', '')
4455
            .replace('nd', '')
4456
            .replace('rd', '')
4457
            .replace('th', '')
4458
            .replace('Augu', 'August'))
4459
4460
4461
def string_to_date(string, date_format, local=DEFAULT_LOCAL):
4462
    """Function to convert string to date object.
4463
    Wrapper around datetime.datetime.strptime."""
4464
    # format described in https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior
4465
    prev_locale = locale.setlocale(locale.LC_ALL)
4466
    if local != prev_locale:
4467
        locale.setlocale(locale.LC_ALL, local)
4468
    ret = datetime.datetime.strptime(string, date_format).date()
4469
    if local != prev_locale:
4470
        locale.setlocale(locale.LC_ALL, prev_locale)
4471
    return ret
4472
4473
4474
COMICS = set(get_subclasses(GenericComic))
4475
VALID_COMICS = [c for c in COMICS if c.name is not None]
4476
COMIC_NAMES = {c.name: c for c in VALID_COMICS}
4477
assert len(VALID_COMICS) == len(COMIC_NAMES)
4478
CLASS_NAMES = {c.__name__ for c in VALID_COMICS}
4479
assert len(VALID_COMICS) == len(CLASS_NAMES)
4480