Completed
Push — master ( 6f7896...0de88c )
by De
06:41
created

comics.py (16 issues)

1
#! /usr/bin/python3
2
# vim: set expandtab tabstop=4 shiftwidth=4 :
3
"""Module to retrieve webcomics"""
4
5
from comic_abstract import GenericComic, get_date_for_comic
6
import re
7
from datetime import date, timedelta
8
import datetime
9
from urlfunctions import get_soup_at_url, urljoin_wrapper,\
10
    convert_iri_to_plain_ascii_uri, load_json_at_url, urlopen_wrapper
11
import json
12
import locale
13
import urllib
14
15
DEFAULT_LOCAL = 'en_GB.UTF-8'
16
17
18
class Xkcd(GenericComic):
19
    """Class to retrieve Xkcd comics."""
20
    name = 'xkcd'
21
    long_name = 'xkcd'
22
    url = 'http://xkcd.com'
23
24
    @classmethod
25
    def get_next_comic(cls, last_comic):
26
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
27
        first_num = last_comic['num'] if last_comic else 0
28
        last_num = load_json_at_url(
29
            urljoin_wrapper(cls.url, 'info.0.json'))['num']
30
31
        for num in range(first_num + 1, last_num + 1):
32
            if num != 404:
33
                json_url = urljoin_wrapper(cls.url, '%d/info.0.json' % num)
34
                comic = load_json_at_url(json_url)
35
                comic['img'] = [comic['img']]
36
                comic['prefix'] = '%d-' % num
37
                comic['json_url'] = json_url
38
                comic['url'] = urljoin_wrapper(cls.url, str(num))
39
                comic['day'] = int(comic['day'])
40
                comic['month'] = int(comic['month'])
41
                comic['year'] = int(comic['year'])
42
                assert comic['num'] == num
43
                yield comic
44
45
46
# Helper functions corresponding to get_url_from_link/get_url_from_archive_element
47
48
49
@classmethod
50
def get_href(cls, link):
51
    """Implementation of get_url_from_link/get_url_from_archive_element."""
52
    return link['href']
53
54
55
@classmethod
56
def join_cls_url_to_href(cls, link):
57
    """Implementation of get_url_from_link/get_url_from_archive_element."""
58
    return urljoin_wrapper(cls.url, link['href'])
59
60
61
class GenericNavigableComic(GenericComic):
62
    """Generic class for "navigable" comics : with first/next arrows.
63
64
    This class applies to comic where previous and next comics can be
65
    accessed from a given comic. Once given a starting point (either
66
    the first comic or the last comic retrieved), it will handle the
67
    navigation, the retrieval of the soup object and the setting of
68
    the 'url' attribute on retrieved comics. This limits a lot the
69
    amount of boilerplate code in the different implementation classes.
70
71
    The method `get_next_comic` methods is implemented in terms of new
72
    more specialized methods to be implemented/overridden:
73
        - get_first_comic_link
74
        - get_navi_link
75
        - get_comic_info
76
        - get_url_from_link
77
    """
78
79
    @classmethod
80
    def get_first_comic_link(cls):
81
        """Get link to first comics.
82
83
        Sometimes this can be retrieved of any comic page, sometimes on
84
        the archive page, sometimes it doesn't exist at all and one has
85
        to iterate backward to find it before hardcoding the result found.
86
        """
87
        raise NotImplementedError
88
89
    @classmethod
90
    def get_navi_link(cls, last_soup, next_):
91
        """Get link to next (or previous - for dev purposes) comic."""
92
        raise NotImplementedError
93
94
    @classmethod
95
    def get_comic_info(cls, soup, link):
96
        """Get information about a particular comics."""
97
        raise NotImplementedError
98
99
    @classmethod
100
    def get_url_from_link(cls, link):
101
        """Get url corresponding to a link. Default implementation is similar to get_href."""
102
        return link['href']
103
104
    @classmethod
105
    def get_next_link(cls, last_soup):
106
        """Get link to next comic."""
107
        return cls.get_navi_link(last_soup, True)
108
109
    @classmethod
110
    def get_prev_link(cls, last_soup):
111
        """Get link to previous comic."""
112
        return cls.get_navi_link(last_soup, False)
113
114
    @classmethod
115
    def get_next_comic(cls, last_comic):
116
        """Generic implementation of get_next_comic for navigable comics."""
117
        url = last_comic['url'] if last_comic else None
118
        next_comic = \
119
            cls.get_next_link(get_soup_at_url(url)) \
120
            if url else \
121
            cls.get_first_comic_link()
122
        cls.log("next/first comic will be %s (url is %s)" % (str(next_comic), url))
123
        while next_comic:
124
            prev_url, url = url, cls.get_url_from_link(next_comic)
125
            if prev_url == url:
126
                cls.log("got same url %s" % url)
127
                break
128
            cls.log("about to get %s (%s)" % (url, str(next_comic)))
129
            soup = get_soup_at_url(url)
130
            comic = cls.get_comic_info(soup, next_comic)
131
            if comic is not None:
132
                assert 'url' not in comic
133
                comic['url'] = url
134
                yield comic
135
            next_comic = cls.get_next_link(soup)
136
            cls.log("next comic will be %s" % str(next_comic))
137
138
    @classmethod
139
    def check_first_link(cls):
140
        """Check that navigation to first comic seems to be working - for dev purposes."""
141
        cls.log("about to check first link")
142
        ok = True
143
        firstlink = cls.get_first_comic_link()
144
        if firstlink is None:
145
            print("From %s : no first link" % cls.url)
146
            ok = False
147
        else:
148
            firsturl = cls.get_url_from_link(firstlink)
149
            try:
150
                get_soup_at_url(firsturl)
151
            except urllib.error.HTTPError:
152
                print("From %s : invalid first url" % cls.url)
153
                ok = False
154
        cls.log("checked first link -> returned %d" % ok)
155
        return ok
156
157
    @classmethod
158
    def check_prev_next_links(cls, url):
159
        """Check that navigation to prev/next from a given URL seems to be working - for dev purposes."""
160
        cls.log("about to check prev/next from %s" % url)
161
        ok = True
162
        if url is None:
163
            prevlink, nextlink = None, None
164
        else:
165
            soup = get_soup_at_url(url)
166
            prevlink, nextlink = cls.get_prev_link(soup), cls.get_next_link(soup)
167
        if prevlink is None and nextlink is None:
168
            print("From %s : no previous nor next" % url)
169
            ok = False
170
        else:
171
            if prevlink:
172
                prevurl = cls.get_url_from_link(prevlink)
173
                prevsoup = get_soup_at_url(prevurl)
174
                prevnext = cls.get_url_from_link(cls.get_next_link(prevsoup))
175
                if prevnext != url:
176
                    print("From %s, going backward then forward leads to %s" % (url, prevnext))
177
                    ok = False
178
            if nextlink:
179
                nexturl = cls.get_url_from_link(nextlink)
180
                if nexturl != url:
181
                    nextsoup = get_soup_at_url(nexturl)
182
                    nextprev = cls.get_url_from_link(cls.get_prev_link(nextsoup))
183
                    if nextprev != url:
184
                        print("From %s, going forward then backward leads to %s" % (url, nextprev))
185
                        ok = False
186
        cls.log("checked prev/next from %s -> returned %d" % (url, ok))
187
        return ok
188
189
    @classmethod
190
    def check_navigation(cls, url):
191
        """Check that navigation functions seem to be working - for dev purposes."""
192
        cls.log("about to check navigation from %s" % url)
193
        first = cls.check_first_link()
194
        prevnext = cls.check_prev_next_links(url)
195
        ok = first and prevnext
196
        cls.log("checked navigation from %s -> returned %d" % (url, ok))
197
        return ok
198
199
200
class GenericListableComic(GenericComic):
201
    """Generic class for "listable" comics : with a list of comics (aka 'archive')
202
203
    The method `get_next_comic` methods is implemented in terms of new
204
    more specialized methods to be implemented/overridden:
205
        - get_archive_elements
206
        - get_url_from_archive_element
207
        - get_comic_info
208
    """
209
210
    @classmethod
211
    def get_archive_elements(cls):
212
        """Get the archive elements (iterable)."""
213
        raise NotImplementedError
214
215
    @classmethod
216
    def get_url_from_archive_element(cls, archive_elt):
217
        """Get url corresponding to an archive element."""
218
        raise NotImplementedError
219
220
    @classmethod
221
    def get_comic_info(cls, soup, archive_elt):
222
        """Get information about a particular comics."""
223
        raise NotImplementedError
224
225
    @classmethod
226
    def get_next_comic(cls, last_comic):
227
        """Generic implementation of get_next_comic for listable comics."""
228
        waiting_for_url = last_comic['url'] if last_comic else None
229
        for archive_elt in cls.get_archive_elements():
230
            url = cls.get_url_from_archive_element(archive_elt)
231
            cls.log("considering %s" % url)
232
            if waiting_for_url and waiting_for_url == url:
233
                waiting_for_url = None
234
            elif waiting_for_url is None:
235
                cls.log("about to get %s (%s)" % (url, str(archive_elt)))
236
                soup = get_soup_at_url(url)
237
                comic = cls.get_comic_info(soup, archive_elt)
238
                if comic is not None:
239
                    assert 'url' not in comic
240
                    comic['url'] = url
241
                    yield comic
242
        if waiting_for_url is not None:
243
            print("Did not find %s : there might be a problem" % waiting_for_url)
244
245
# Helper functions corresponding to get_first_comic_link/get_navi_link
246
247
248
@classmethod
249
def get_link_rel_next(cls, last_soup, next_):
250
    """Implementation of get_navi_link."""
251
    return last_soup.find('link', rel='next' if next_ else 'prev')
252
253
254
@classmethod
255
def get_a_rel_next(cls, last_soup, next_):
256
    """Implementation of get_navi_link."""
257
    return last_soup.find('a', rel='next' if next_ else 'prev')
258
259
260
@classmethod
261
def get_a_navi_navinext(cls, last_soup, next_):
262
    """Implementation of get_navi_link."""
263
    return last_soup.find('a', class_='navi navi-next' if next_ else 'navi navi-prev')
264
265
266
@classmethod
267
def get_a_navi_comicnavnext_navinext(cls, last_soup, next_):
268
    """Implementation of get_navi_link."""
269
    return last_soup.find('a', class_='navi comic-nav-next navi-next' if next_ else 'navi comic-nav-previous navi-prev')
270
271
272
@classmethod
273
def get_a_comicnavbase_comicnavnext(cls, last_soup, next_):
274
    """Implementation of get_navi_link."""
275
    return last_soup.find('a', class_='comic-nav-base comic-nav-next' if next_ else 'comic-nav-base comic-nav-previous')
276
277
278
@classmethod
279
def get_a_navi_navifirst(cls):
280
    """Implementation of get_first_comic_link."""
281
    return get_soup_at_url(cls.url).find('a', class_='navi navi-first')
282
283
284
@classmethod
285
def get_div_navfirst_a(cls):
286
    """Implementation of get_first_comic_link."""
287
    return get_soup_at_url(cls.url).find('div', class_="nav-first").find('a')
288
289
290
@classmethod
291
def get_a_comicnavbase_comicnavfirst(cls):
292
    """Implementation of get_first_comic_link."""
293
    return get_soup_at_url(cls.url).find('a', class_='comic-nav-base comic-nav-first')
294
295
296
@classmethod
297
def simulate_first_link(cls):
298
    """Implementation of get_first_comic_link creating a link-like object from
299
    an URL provided by the class."""
300
    return {'href': cls.first_url}
301
302
303
@classmethod
304
def navigate_to_first_comic(cls):
305
    """Implementation of get_first_comic_link navigating from a user provided
306
    URL to the first comic.
307
308
    Sometimes, the first comic cannot be reached directly so to start
309
    from the first comic one has to go to the previous comic until
310
    there is no previous comics. Once this URL is reached, it
311
    is better to hardcode it but for development purposes, it
312
    is convenient to have an automatic way to find it.
313
    """
314
    url = input("Get starting URL: ")
315
    print(url)
316
    comic = cls.get_prev_link(get_soup_at_url(url))
317
    while comic:
318
        url = cls.get_url_from_link(comic)
319
        print(url)
320
        comic = cls.get_prev_link(get_soup_at_url(url))
321
    return {'href': url}
322
323
324
class GenericEmptyComic(GenericComic):
325
    """Generic class for comics where nothing is to be done.
326
327
    It can be useful to deactivate temporarily comics that do not work
328
    properly by replacing `def MyComic(GenericWhateverComic)` with
329
    `def MyComic(GenericEmptyComic, GenericWhateverComic)`."""
330
331
    @classmethod
332
    def get_next_comic(cls, last_comic):
333
        """Implementation of get_next_comic returning no comics."""
334
        cls.log("comic is considered as empty - returning no comic")
335
        return []
336
337
338
class ExtraFabulousComics(GenericNavigableComic):
339
    """Class to retrieve Extra Fabulous Comics."""
340
    name = 'efc'
341
    long_name = 'Extra Fabulous Comics'
342
    url = 'http://extrafabulouscomics.com'
343
    get_navi_link = get_link_rel_next
344
345
    @classmethod
346
    def get_first_comic_link(cls):
347
        """Get link to first comics."""
348
        return get_soup_at_url(cls.url).find('a', title='FIRST')
349
350
    @classmethod
351
    def get_comic_info(cls, soup, link):
352
        """Get information about a particular comics."""
353
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
354
        imgs = soup.find_all('img', src=img_src_re)
355
        title = soup.find('h2', class_='post-title').string
356
        return {
357
            'title': title,
358
            'img': [i['src'] for i in imgs],
359
            'prefix': title + '-'
360
        }
361
362
363 View Code Duplication
class GenericLeMondeBlog(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
364
    """Generic class to retrieve comics from Le Monde blogs."""
365
    get_navi_link = get_link_rel_next
366
    get_first_comic_link = simulate_first_link
367
    first_url = NotImplemented
368
369
    @classmethod
370
    def get_comic_info(cls, soup, link):
371
        """Get information about a particular comics."""
372
        url2 = soup.find('link', rel='shortlink')['href']
373
        title = soup.find('meta', property='og:title')['content']
374
        date_str = soup.find("span", class_="entry-date").string
375
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
376
        imgs = soup.find_all('meta', property='og:image')
377
        return {
378
            'title': title,
379
            'url2': url2,
380
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
381
            'month': day.month,
382
            'year': day.year,
383
            'day': day.day,
384
        }
385
386
387
class ZepWorld(GenericLeMondeBlog):
388
    """Class to retrieve Zep World comics."""
389
    name = "zep"
390
    long_name = "Zep World"
391
    url = "http://zepworld.blog.lemonde.fr"
392
    first_url = "http://zepworld.blog.lemonde.fr/2014/10/31/bientot-le-blog-de-zep/"
393
394
395
class Vidberg(GenericLeMondeBlog):
396
    """Class to retrieve Vidberg comics."""
397
    name = 'vidberg'
398
    long_name = "Vidberg - l'actu en patates"
399
    url = "http://vidberg.blog.lemonde.fr"
400
    # Not the first but I didn't find an efficient way to retrieve it
401
    first_url = "http://vidberg.blog.lemonde.fr/2012/02/09/revue-de-campagne-la-campagne-du-modem-semballe/"
402
403
404
class Plantu(GenericLeMondeBlog):
405
    """Class to retrieve Plantu comics."""
406
    name = 'plantu'
407
    long_name = "Plantu"
408
    url = "http://plantu.blog.lemonde.fr"
409
    first_url = "http://plantu.blog.lemonde.fr/2014/10/28/stress-test-a-bruxelles/"
410
411
412
class XavierGorce(GenericLeMondeBlog):
413
    """Class to retrieve Xavier Gorce comics."""
414
    name = 'gorce'
415
    long_name = "Xavier Gorce"
416
    url = "http://xaviergorce.blog.lemonde.fr"
417
    first_url = "http://xaviergorce.blog.lemonde.fr/2015/01/09/distinction/"
418
419
420
class CartooningForPeace(GenericLeMondeBlog):
421
    """Class to retrieve Cartooning For Peace comics."""
422
    name = 'forpeace'
423
    long_name = "Cartooning For Peace"
424
    url = "http://cartooningforpeace.blog.lemonde.fr"
425
    first_url = "http://cartooningforpeace.blog.lemonde.fr/2014/12/15/bado/"
426
427
428
class Aurel(GenericLeMondeBlog):
429
    """Class to retrieve Aurel comics."""
430
    name = 'aurel'
431
    long_name = "Aurel"
432
    url = "http://aurel.blog.lemonde.fr"
433
    first_url = "http://aurel.blog.lemonde.fr/2014/09/29/le-senat-repasse-a-droite/"
434
435
436
class LesCulottees(GenericLeMondeBlog):
437
    """Class to retrieve Les Culottees comics."""
438
    name = 'culottees'
439
    long_name = 'Les Culottees'
440
    url = "http://lesculottees.blog.lemonde.fr"
441
    first_url = "http://lesculottees.blog.lemonde.fr/2016/01/11/clementine-delait-femme-a-barbe/"
442
443
444
class UneAnneeAuLycee(GenericLeMondeBlog):
445
    """Class to retrieve Une Annee Au Lycee comics."""
446
    name = 'lycee'
447
    long_name = 'Une Annee au Lycee'
448
    url = 'http://uneanneeaulycee.blog.lemonde.fr'
449
    first_url = "http://uneanneeaulycee.blog.lemonde.fr/2016/06/13/la-semaine-du-bac-est-arrivee/"
450
451
452 View Code Duplication
class Rall(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
453
    """Class to retrieve Ted Rall comics."""
454
    # Also on http://www.gocomics.com/tedrall
455
    name = 'rall'
456
    long_name = "Ted Rall"
457
    url = "http://rall.com/comic"
458
    get_navi_link = get_link_rel_next
459
    get_first_comic_link = simulate_first_link
460
    # Not the first but I didn't find an efficient way to retrieve it
461
    first_url = "http://rall.com/2014/01/30/los-angeles-times-cartoon-well-miss-those-california-flowers"
462
463
    @classmethod
464
    def get_comic_info(cls, soup, link):
465
        """Get information about a particular comics."""
466
        title = soup.find('meta', property='og:title')['content']
467
        author = soup.find("span", class_="author vcard").find("a").string
468
        date_str = soup.find("span", class_="entry-date").string
469
        day = string_to_date(date_str, "%B %d, %Y")
470
        desc = soup.find('meta', property='og:description')['content']
471
        imgs = soup.find('div', class_='entry-content').find_all('img')
472
        imgs = imgs[:-7]  # remove social media buttons
473
        return {
474
            'title': title,
475
            'author': author,
476
            'month': day.month,
477
            'year': day.year,
478
            'day': day.day,
479
            'description': desc,
480
            'img': [i['src'] for i in imgs],
481
        }
482
483
484 View Code Duplication
class Dilem(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
485
    """Class to retrieve Ali Dilem comics."""
486
    name = 'dilem'
487
    long_name = 'Ali Dilem'
488
    url = 'http://information.tv5monde.com/dilem'
489
    get_url_from_link = join_cls_url_to_href
490
    get_first_comic_link = simulate_first_link
491
    first_url = "http://information.tv5monde.com/dilem/2004-06-26"
492
493
    @classmethod
494
    def get_navi_link(cls, last_soup, next_):
495
        """Get link to next or previous comic."""
496
        # prev is next / next is prev
497
        li = last_soup.find('li', class_='prev' if next_ else 'next')
498
        return li.find('a') if li else None
499
500
    @classmethod
501
    def get_comic_info(cls, soup, link):
502
        """Get information about a particular comics."""
503
        short_url = soup.find('link', rel='shortlink')['href']
504
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
505
        imgs = soup.find_all('meta', property='og:image')
506
        date_str = soup.find('span', property='dc:date')['content']
507
        date_str = date_str[:10]
508
        day = string_to_date(date_str, "%Y-%m-%d")
509
        return {
510
            'short_url': short_url,
511
            'title': title,
512
            'img': [i['content'] for i in imgs],
513
            'day': day.day,
514
            'month': day.month,
515
            'year': day.year,
516
        }
517
518
519
class SpaceAvalanche(GenericNavigableComic):
520
    """Class to retrieve Space Avalanche comics."""
521
    name = 'avalanche'
522
    long_name = 'Space Avalanche'
523
    url = 'http://www.spaceavalanche.com'
524
    get_navi_link = get_link_rel_next
525
526
    @classmethod
527
    def get_first_comic_link(cls):
528
        """Get link to first comics."""
529
        return {'href': "http://www.spaceavalanche.com/2009/02/02/irish-sea/", 'title': "Irish Sea"}
530
531
    @classmethod
532
    def get_comic_info(cls, soup, link):
533
        """Get information about a particular comics."""
534
        url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
535
        title = link['title']
536
        url = cls.get_url_from_link(link)
537
        year, month, day = [int(s)
538
                            for s in url_date_re.match(url).groups()]
539
        imgs = soup.find("div", class_="entry").find_all("img")
540
        return {
541
            'title': title,
542
            'day': day,
543 View Code Duplication
            'month': month,
544
            'year': year,
545
            'img': [i['src'] for i in imgs],
546
        }
547
548
549
class ZenPencils(GenericNavigableComic):
550
    """Class to retrieve ZenPencils comics."""
551
    # Also on http://zenpencils.tumblr.com
552
    # Also on http://www.gocomics.com/zen-pencils
553
    name = 'zenpencils'
554
    long_name = 'Zen Pencils'
555
    url = 'http://zenpencils.com'
556
    get_navi_link = get_link_rel_next
557
    get_first_comic_link = simulate_first_link
558
    first_url = "http://zenpencils.com/comic/1-ralph-waldo-emerson-make-them-cry/"
559
560
    @classmethod
561
    def get_comic_info(cls, soup, link):
562
        """Get information about a particular comics."""
563
        imgs = soup.find('div', id='comic').find_all('img')
564
        post = soup.find('div', class_='post-content')
565
        author = post.find("span", class_="post-author").find("a").string
566
        title = post.find('h2', class_='post-title').string
567
        date_str = post.find('span', class_='post-date').string
568
        day = string_to_date(date_str, "%B %d, %Y")
569
        assert imgs
570
        assert all(i['alt'] == i['title'] for i in imgs)
571
        assert all(i['alt'] in (title, "") for i in imgs)
572
        desc = soup.find('meta', property='og:description')['content']
573
        return {
574
            'title': title,
575
            'description': desc,
576
            'author': author,
577
            'day': day.day,
578
            'month': day.month,
579
            'year': day.year,
580
            'img': [i['src'] for i in imgs],
581
        }
582
583
584
class ItsTheTie(GenericNavigableComic):
585
    """Class to retrieve It's the tie comics."""
586
    # Also on http://itsthetie.tumblr.com
587
    # Also on https://tapastic.com/series/itsthetie
588
    name = 'tie'
589
    long_name = "It's the tie"
590
    url = "http://itsthetie.com"
591
    get_first_comic_link = get_div_navfirst_a
592
    get_navi_link = get_a_rel_next
593
594
    @classmethod
595
    def get_comic_info(cls, soup, link):
596
        """Get information about a particular comics."""
597
        title = soup.find('h1', class_='comic-title').find('a').string
598
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
599
        day = string_to_date(date_str, "%B %d, %Y")
600
        # Bonus images may or may not be in meta og:image.
601
        imgs = soup.find_all('meta', property='og:image')
602
        imgs_src = [i['content'] for i in imgs]
603
        bonus = soup.find_all('img', attrs={'data-oversrc': True})
604
        bonus_src = [b['data-oversrc'] for b in bonus]
605
        all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src]
606
        all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")]
607
        tag_meta = soup.find('meta', property='article:tag')
608
        tags = tag_meta['content'] if tag_meta else ""
609
        return {
610
            'title': title,
611
            'month': day.month,
612
            'year': day.year,
613
            'day': day.day,
614
            'img': all_imgs_src,
615
            'tags': tags,
616
        }
617
618
619
class PenelopeBagieu(GenericNavigableComic):
620
    """Class to retrieve comics from Penelope Bagieu's blog."""
621
    name = 'bagieu'
622
    long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
623
    url = 'http://www.penelope-jolicoeur.com'
624
    get_navi_link = get_link_rel_next
625
    get_first_comic_link = simulate_first_link
626
    first_url = 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'
627
628
    @classmethod
629
    def get_comic_info(cls, soup, link):
630
        """Get information about a particular comics."""
631
        date_str = soup.find('h2', class_='date-header').string
632
        day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
633
        imgs = soup.find('div', class_='entry-body').find_all('img')
634
        title = soup.find('h3', class_='entry-header').string
635
        return {
636
            'title': title,
637
            'img': [i['src'] for i in imgs],
638
            'month': day.month,
639
            'year': day.year,
640
            'day': day.day,
641
        }
642
643 View Code Duplication
644
class OneOneOneOneComic(GenericNavigableComic):
645
    """Class to retrieve 1111 Comics."""
646
    # Also on http://comics1111.tumblr.com
647
    # Also on https://tapastic.com/series/1111-Comics
648
    name = '1111'
649
    long_name = '1111 Comics'
650
    url = 'http://www.1111comics.me'
651
    get_first_comic_link = get_div_navfirst_a
652
    get_navi_link = get_link_rel_next
653
654
    @classmethod
655
    def get_comic_info(cls, soup, link):
656
        """Get information about a particular comics."""
657
        title = soup.find('h1', class_='comic-title').find('a').string
658
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
659
        day = string_to_date(date_str, "%B %d, %Y")
660
        imgs = soup.find_all('meta', property='og:image')
661
        return {
662
            'title': title,
663
            'month': day.month,
664
            'year': day.year,
665
            'day': day.day,
666
            'img': [i['content'] for i in imgs],
667
        }
668
669
670
class AngryAtNothing(GenericNavigableComic):
671
    """Class to retrieve Angry at Nothing comics."""
672
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
673
    name = 'angry'
674
    long_name = 'Angry At Nothing'
675
    url = 'http://www.angryatnothing.net'
676
    get_first_comic_link = get_div_navfirst_a
677
    get_navi_link = get_a_rel_next
678
679
    @classmethod
680
    def get_comic_info(cls, soup, link):
681
        """Get information about a particular comics."""
682
        title = soup.find('h1', class_='comic-title').find('a').string
683
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
684
        day = string_to_date(date_str, "%B %d, %Y")
685
        imgs = soup.find_all('meta', property='og:image')
686
        return {
687
            'title': title,
688
            'month': day.month,
689
            'year': day.year,
690
            'day': day.day,
691
            'img': [i['content'] for i in imgs],
692
        }
693
694 View Code Duplication
695
class NeDroid(GenericNavigableComic):
696
    """Class to retrieve NeDroid comics."""
697
    name = 'nedroid'
698
    long_name = 'NeDroid'
699
    url = 'http://nedroid.com'
700
    get_first_comic_link = get_div_navfirst_a
701
    get_navi_link = get_link_rel_next
702
    get_url_from_link = join_cls_url_to_href
703
704
    @classmethod
705
    def get_comic_info(cls, soup, link):
706
        """Get information about a particular comics."""
707
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
708
        comic_url_re = re.compile('//nedroid.com/comics/([0-9]*)-([0-9]*)-([0-9]*).*')
709
        short_url = cls.get_url_from_link(soup.find('link', rel='shortlink'))
710
        num = int(short_url_re.match(short_url).groups()[0])
711
        imgs = soup.find('div', id='comic').find_all('img')
712
        year, month, day = [int(s) for s in comic_url_re.match(imgs[0]['src']).groups()]
713
        assert len(imgs) == 1
714
        title = imgs[0]['alt']
715
        title2 = imgs[0]['title']
716
        return {
717
            'short_url': short_url,
718
            'title': title,
719
            'title2': title2,
720
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
721
            'day': day,
722
            'month': month,
723
            'year': year,
724
            'num': num,
725
        }
726
727
728
class Garfield(GenericNavigableComic):
729
    """Class to retrieve Garfield comics."""
730
    # Also on http://www.gocomics.com/garfield
731
    name = 'garfield'
732
    long_name = 'Garfield'
733
    url = 'https://garfield.com'
734
    get_first_comic_link = simulate_first_link
735
    first_url = 'https://garfield.com/comic/1978/06/19'
736
737
    @classmethod
738
    def get_navi_link(cls, last_soup, next_):
739
        """Get link to next or previous comic."""
740
        return last_soup.find('a', class_='comic-arrow-right' if next_ else 'comic-arrow-left')
741
742
    @classmethod
743
    def get_comic_info(cls, soup, link):
744
        """Get information about a particular comics."""
745
        url = cls.get_url_from_link(link)
746
        date_re = re.compile('^%s/comic/([0-9]*)/([0-9]*)/([0-9]*)' % cls.url)
747
        year, month, day = [int(s) for s in date_re.match(url).groups()]
748
        imgs = soup.find('div', class_='comic-display').find_all('img', class_='img-responsive')
749
        return {
750
            'month': month,
751
            'year': year,
752 View Code Duplication
            'day': day,
753
            'img': [i['src'] for i in imgs],
754
        }
755
756
757
class Dilbert(GenericNavigableComic):
758
    """Class to retrieve Dilbert comics."""
759
    # Also on http://www.gocomics.com/dilbert-classics
760
    name = 'dilbert'
761
    long_name = 'Dilbert'
762
    url = 'http://dilbert.com'
763
    get_url_from_link = join_cls_url_to_href
764
    get_first_comic_link = simulate_first_link
765
    first_url = 'http://dilbert.com/strip/1989-04-16'
766
767
    @classmethod
768
    def get_navi_link(cls, last_soup, next_):
769
        """Get link to next or previous comic."""
770
        link = last_soup.find('div', class_='nav-comic nav-right' if next_ else 'nav-comic nav-left')
771
        return link.find('a') if link else None
772
773
    @classmethod
774
    def get_comic_info(cls, soup, link):
775
        """Get information about a particular comics."""
776
        title = soup.find('meta', property='og:title')['content']
777
        imgs = soup.find_all('meta', property='og:image')
778
        desc = soup.find('meta', property='og:description')['content']
779
        date_str = soup.find('meta', property='article:publish_date')['content']
780
        day = string_to_date(date_str, "%B %d, %Y")
781 View Code Duplication
        author = soup.find('meta', property='article:author')['content']
782
        tags = soup.find('meta', property='article:tag')['content']
783
        return {
784
            'title': title,
785
            'description': desc,
786
            'img': [i['content'] for i in imgs],
787
            'author': author,
788
            'tags': tags,
789
            'day': day.day,
790
            'month': day.month,
791
            'year': day.year
792
        }
793
794
795
class VictimsOfCircumsolar(GenericNavigableComic):
796
    """Class to retrieve VictimsOfCircumsolar comics."""
797
    name = 'circumsolar'
798
    long_name = 'Victims Of Circumsolar'
799
    url = 'http://www.victimsofcircumsolar.com'
800
    get_navi_link = get_a_navi_comicnavnext_navinext
801
    get_first_comic_link = simulate_first_link
802
    first_url = 'http://www.victimsofcircumsolar.com/comic/modern-addiction'
803
804
    @classmethod
805
    def get_comic_info(cls, soup, link):
806
        """Get information about a particular comics."""
807
        # Date is on the archive page
808
        title = soup.find_all('meta', property='og:title')[-1]['content']
809
        desc = soup.find_all('meta', property='og:description')[-1]['content']
810
        imgs = soup.find('div', id='comic').find_all('img')
811
        assert all(i['title'] == i['alt'] == title for i in imgs)
812
        return {
813
            'title': title,
814
            'description': desc,
815
            'img': [i['src'] for i in imgs],
816
        }
817
818
819
class ThreeWordPhrase(GenericNavigableComic):
820
    """Class to retrieve Three Word Phrase comics."""
821
    # Also on http://www.threewordphrase.tumblr.com
822
    name = 'threeword'
823
    long_name = 'Three Word Phrase'
824
    url = 'http://threewordphrase.com'
825
    get_url_from_link = join_cls_url_to_href
826
827
    @classmethod
828
    def get_first_comic_link(cls):
829
        """Get link to first comics."""
830
        return get_soup_at_url(cls.url).find('img', src='/firstlink.gif').parent
831
832
    @classmethod
833
    def get_navi_link(cls, last_soup, next_):
834
        """Get link to next or previous comic."""
835
        link = last_soup.find('img', src='/nextlink.gif' if next_ else '/prevlink.gif').parent
836
        return None if link.get('href') is None else link
837
838
    @classmethod
839
    def get_comic_info(cls, soup, link):
840
        """Get information about a particular comics."""
841
        title = soup.find('title')
842
        imgs = [img for img in soup.find_all('img')
843
                if not img['src'].endswith(
844
                    ('link.gif', '32.png', 'twpbookad.jpg',
845
                     'merchad.jpg', 'header.gif', 'tipjar.jpg'))]
846
        return {
847
            'title': title.string if title else None,
848
            'title2': '  '.join(img.get('alt') for img in imgs if img.get('alt')),
849
            'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs],
850
        }
851
852
853
class DeadlyPanel(GenericEmptyComic, GenericNavigableComic):
854
    """Class to retrieve Deadly Panel comics."""
855
    # Also on https://tapastic.com/series/deadlypanel
856
    name = 'deadly'
857
    long_name = 'Deadly Panel'
858
    url = 'http://www.deadlypanel.com'
859
    get_first_comic_link = get_a_navi_navifirst
860
    get_navi_link = get_a_navi_comicnavnext_navinext
861
862
    @classmethod
863
    def get_comic_info(cls, soup, link):
864
        """Get information about a particular comics."""
865
        imgs = soup.find('div', id='comic').find_all('img')
866
        assert all(i['alt'] == i['title'] for i in imgs)
867
        return {
868
            'img': [i['src'] for i in imgs],
869
        }
870
871
872
class TheGentlemanArmchair(GenericNavigableComic):
873
    """Class to retrieve The Gentleman Armchair comics."""
874
    name = 'gentlemanarmchair'
875
    long_name = 'The Gentleman Armchair'
876
    url = 'http://thegentlemansarmchair.com'
877
    get_first_comic_link = get_a_navi_navifirst
878
    get_navi_link = get_link_rel_next
879
880
    @classmethod
881
    def get_comic_info(cls, soup, link):
882
        """Get information about a particular comics."""
883
        title = soup.find('h2', class_='post-title').string
884
        author = soup.find("span", class_="post-author").find("a").string
885
        date_str = soup.find('span', class_='post-date').string
886
        day = string_to_date(date_str, "%B %d, %Y")
887
        imgs = soup.find('div', id='comic').find_all('img')
888
        return {
889
            'img': [i['src'] for i in imgs],
890
            'title': title,
891
            'author': author,
892
            'month': day.month,
893
            'year': day.year,
894
            'day': day.day,
895
        }
896
897
898 View Code Duplication
class MyExtraLife(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
899
    """Class to retrieve My Extra Life comics."""
900
    name = 'extralife'
901
    long_name = 'My Extra Life'
902
    url = 'http://www.myextralife.com'
903
    get_navi_link = get_link_rel_next
904
905
    @classmethod
906
    def get_first_comic_link(cls):
907
        """Get link to first comics."""
908
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
909
910
    @classmethod
911
    def get_comic_info(cls, soup, link):
912
        """Get information about a particular comics."""
913
        title = soup.find("h1", class_="comic_title").string
914
        date_str = soup.find("span", class_="comic_date").string
915
        day = string_to_date(date_str, "%B %d, %Y")
916
        imgs = soup.find_all("img", class_="comic")
917
        assert all(i['alt'] == i['title'] == title for i in imgs)
918
        return {
919
            'title': title,
920
            'img': [i['src'] for i in imgs if i["src"]],
921
            'day': day.day,
922
            'month': day.month,
923
            'year': day.year
924
        }
925
926
927
class SaturdayMorningBreakfastCereal(GenericNavigableComic):
928
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
929
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
930
    # Also on http://smbc-comics.tumblr.com
931
    name = 'smbc'
932
    long_name = 'Saturday Morning Breakfast Cereal'
933
    url = 'http://www.smbc-comics.com'
934
    get_navi_link = get_a_rel_next
935
936
    @classmethod
937
    def get_first_comic_link(cls):
938
        """Get link to first comics."""
939
        return get_soup_at_url(cls.url).find('a', rel='start')
940
941
    @classmethod
942
    def get_comic_info(cls, soup, link):
943
        """Get information about a particular comics."""
944
        image1 = soup.find('img', id='cc-comic')
945
        image_url1 = image1['src']
946
        aftercomic = soup.find('div', id='aftercomic')
947
        image_url2 = aftercomic.find('img')['src'] if aftercomic else ''
948
        imgs = [image_url1] + ([image_url2] if image_url2 else [])
949
        date_str = soup.find('div', class_='cc-publishtime').contents[0]
950
        day = string_to_date(date_str, "%B %d, %Y")
951
        return {
952
            'title': image1['title'],
953
            'img': [urljoin_wrapper(cls.url, i) for i in imgs],
954
            'day': day.day,
955
            'month': day.month,
956
            'year': day.year
957
        }
958
959
960
class PerryBibleFellowship(GenericListableComic):
961
    """Class to retrieve Perry Bible Fellowship comics."""
962
    name = 'pbf'
963
    long_name = 'Perry Bible Fellowship'
964
    url = 'http://pbfcomics.com'
965
    get_url_from_archive_element = join_cls_url_to_href
966
967
    @classmethod
968
    def get_archive_elements(cls):
969
        comic_link_re = re.compile('^/[0-9]*/$')
970
        return reversed(get_soup_at_url(cls.url).find_all('a', href=comic_link_re))
971
972
    @classmethod
973
    def get_comic_info(cls, soup, link):
974
        """Get information about a particular comics."""
975
        url = cls.get_url_from_archive_element(link)
976
        comic_img_re = re.compile('^/archive_b/PBF.*')
977
        name = link.string
978
        num = int(link['name'])
979
        href = link['href']
980
        assert href == '/%d/' % num
981
        imgs = soup.find_all('img', src=comic_img_re)
982
        assert len(imgs) == 1
983
        assert imgs[0]['alt'] == name
984
        return {
985
            'num': num,
986
            'name': name,
987
            'img': [urljoin_wrapper(url, i['src']) for i in imgs],
988
            'prefix': '%d-' % num,
989
        }
990
991
992 View Code Duplication
class Mercworks(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
993
    """Class to retrieve Mercworks comics."""
994
    # Also on http://mercworks.tumblr.com
995
    name = 'mercworks'
996
    long_name = 'Mercworks'
997
    url = 'http://mercworks.net'
998
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
999
    get_navi_link = get_a_rel_next
1000
1001
    @classmethod
1002
    def get_comic_info(cls, soup, link):
1003
        """Get information about a particular comics."""
1004
        title = soup.find('meta', property='og:title')['content']
1005
        metadesc = soup.find('meta', property='og:description')
1006
        desc = metadesc['content'] if metadesc else ""
1007
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1008
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1009
        date_str = date_str[:10]
1010
        day = string_to_date(date_str, "%Y-%m-%d")
1011
        imgs = soup.find_all('meta', property='og:image')
1012
        return {
1013
            'img': [i['content'] for i in imgs],
1014
            'title': title,
1015
            'author': author,
1016
            'desc': desc,
1017
            'day': day.day,
1018
            'month': day.month,
1019
            'year': day.year
1020
        }
1021
1022
1023
class BerkeleyMews(GenericListableComic):
1024
    """Class to retrieve Berkeley Mews comics."""
1025
    # Also on http://mews.tumblr.com
1026
    # Also on http://www.gocomics.com/berkeley-mews
1027
    name = 'berkeley'
1028
    long_name = 'Berkeley Mews'
1029
    url = 'http://www.berkeleymews.com'
1030
    get_url_from_archive_element = get_href
1031
    comic_num_re = re.compile('%s/\\?p=([0-9]*)$' % url)
1032
1033
    @classmethod
1034
    def get_archive_elements(cls):
1035
        archive_url = urljoin_wrapper(cls.url, "?page_id=2")
1036
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_num_re))
1037
1038
    @classmethod
1039
    def get_comic_info(cls, soup, link):
1040
        """Get information about a particular comics."""
1041
        comic_date_re = re.compile('.*/([0-9]*)-([0-9]*)-([0-9]*)-.*')
1042
        url = cls.get_url_from_archive_element(link)
1043
        num = int(cls.comic_num_re.match(url).groups()[0])
1044
        img = soup.find('div', id='comic').find('img')
1045
        assert all(i['alt'] == i['title'] for i in [img])
1046
        title2 = img['title']
1047
        img_url = img['src']
1048
        year, month, day = [int(s) for s in comic_date_re.match(img_url).groups()]
1049
        return {
1050
            'num': num,
1051
            'title': link.string,
1052
            'title2': title2,
1053
            'img': [img_url],
1054
            'year': year,
1055
            'month': month,
1056
            'day': day,
1057
        }
1058
1059
1060
class GenericBouletCorp(GenericNavigableComic):
1061
    """Generic class to retrieve BouletCorp comics in different languages."""
1062
    # Also on http://bouletcorp.tumblr.com
1063
    get_navi_link = get_link_rel_next
1064
1065
    @classmethod
1066
    def get_first_comic_link(cls):
1067
        """Get link to first comics."""
1068
        return get_soup_at_url(cls.url).find('div', id='centered_nav').find_all('a')[0]
1069
1070
    @classmethod
1071
    def get_comic_info(cls, soup, link):
1072
        """Get information about a particular comics."""
1073
        url = cls.get_url_from_link(link)
1074
        date_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/' % cls.url)
1075
        year, month, day = [int(s) for s in date_re.match(url).groups()]
1076
        imgs = soup.find('div', id='notes').find('div', class_='storycontent').find_all('img')
1077
        texts = '  '.join(t for t in (i.get('title') for i in imgs) if t)
1078
        title = soup.find('title').string
1079
        return {
1080
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs if i.get('src') is not None],
1081
            'title': title,
1082
            'texts': texts,
1083
            'year': year,
1084
            'month': month,
1085
            'day': day,
1086
        }
1087
1088
1089
class BouletCorp(GenericBouletCorp):
1090
    """Class to retrieve BouletCorp comics."""
1091
    name = 'boulet'
1092
    long_name = 'Boulet Corp'
1093
    url = 'http://www.bouletcorp.com'
1094
1095
1096
class BouletCorpEn(GenericBouletCorp):
1097
    """Class to retrieve EnglishBouletCorp comics."""
1098
    name = 'boulet_en'
1099
    long_name = 'Boulet Corp English'
1100
    url = 'http://english.bouletcorp.com'
1101
1102
1103
class AmazingSuperPowers(GenericNavigableComic):
1104
    """Class to retrieve Amazing Super Powers comics."""
1105
    name = 'asp'
1106
    long_name = 'Amazing Super Powers'
1107
    url = 'http://www.amazingsuperpowers.com'
1108
    get_first_comic_link = get_a_navi_navifirst
1109
    get_navi_link = get_a_navi_navinext
1110
1111
    @classmethod
1112
    def get_comic_info(cls, soup, link):
1113
        """Get information about a particular comics."""
1114 View Code Duplication
        author = soup.find("span", class_="post-author").find("a").string
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1115
        date_str = soup.find('span', class_='post-date').string
1116
        day = string_to_date(date_str, "%B %d, %Y")
1117
        imgs = soup.find('div', id='comic').find_all('img')
1118
        title = ' '.join(i['title'] for i in imgs)
1119
        assert all(i['alt'] == i['title'] for i in imgs)
1120
        return {
1121
            'title': title,
1122
            'author': author,
1123
            'img': [img['src'] for img in imgs],
1124
            'day': day.day,
1125
            'month': day.month,
1126
            'year': day.year
1127
        }
1128
1129
1130
class ToonHole(GenericListableComic):
1131
    """Class to retrieve Toon Holes comics."""
1132
    # Also on http://tapastic.com/series/TOONHOLE
1133
    name = 'toonhole'
1134
    long_name = 'Toon Hole'
1135
    url = 'http://www.toonhole.com'
1136
    get_url_from_archive_element = get_href
1137
1138
    @classmethod
1139
    def get_comic_info(cls, soup, link):
1140
        """Get information about a particular comics."""
1141
        title = link.string
1142
        date_str = remove_st_nd_rd_th_from_date(soup.find('div', class_='comicdate').string.strip())
1143
        day = string_to_date(date_str, "%B %d, %Y")
1144
        imgs = soup.find('div', id='comic').find_all('img')
1145
        assert all(i['alt'] == i['title'] == title for i in imgs)
1146
        return {
1147
            'title': title,
1148
            'month': day.month,
1149
            'year': day.year,
1150
            'day': day.day,
1151
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs],
1152
        }
1153
1154
    @classmethod
1155
    def get_archive_elements(cls):
1156
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1157
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1158
1159
1160
class Channelate(GenericNavigableComic):
1161
    """Class to retrieve Channelate comics."""
1162
    name = 'channelate'
1163
    long_name = 'Channelate'
1164
    url = 'http://www.channelate.com'
1165
    get_first_comic_link = get_div_navfirst_a
1166
    get_navi_link = get_link_rel_next
1167
1168
    @classmethod
1169
    def get_comic_info(cls, soup, link):
1170
        """Get information about a particular comics."""
1171
        author = soup.find("span", class_="post-author").find("a").string
1172
        date_str = soup.find('span', class_='post-date').string
1173
        day = string_to_date(date_str, '%Y/%m/%d')
1174
        title = soup.find('meta', property='og:title')['content']
1175
        post = soup.find('div', id='comic')
1176
        imgs = post.find_all('img') if post else []
1177
        extra_url = None
1178
        extra_div = soup.find('div', id='extrapanelbutton')
1179
        if extra_div:
1180
            extra_url = extra_div.find('a')['href']
1181
            extra_soup = get_soup_at_url(extra_url)
1182
            extra_imgs = extra_soup.find_all('img', class_='extrapanelimage')
1183
            imgs.extend(extra_imgs)
1184
        return {
1185
            'url_extra': extra_url,
1186
            'title': title,
1187
            'author': author,
1188
            'month': day.month,
1189
            'year': day.year,
1190
            'day': day.day,
1191
            'img': [i['src'] for i in imgs],
1192
        }
1193
1194
1195
class CyanideAndHappiness(GenericNavigableComic):
1196
    """Class to retrieve Cyanide And Happiness comics."""
1197
    name = 'cyanide'
1198
    long_name = 'Cyanide and Happiness'
1199
    url = 'http://explosm.net'
1200
    get_url_from_link = join_cls_url_to_href
1201
1202
    @classmethod
1203
    def get_first_comic_link(cls):
1204
        """Get link to first comics."""
1205
        return get_soup_at_url(cls.url).find('a', title='Oldest comic')
1206
1207
    @classmethod
1208
    def get_navi_link(cls, last_soup, next_):
1209
        """Get link to next or previous comic."""
1210
        link = last_soup.find('a', class_='next-comic' if next_ else 'previous-comic ')
1211
        return None if link.get('href') is None else link
1212
1213
    @classmethod
1214
    def get_comic_info(cls, soup, link):
1215
        """Get information about a particular comics."""
1216
        url2 = soup.find('meta', property='og:url')['content']
1217
        num = int(url2.split('/')[-2])
1218
        date_str = soup.find('h3').find('a').string
1219
        day = string_to_date(date_str, '%Y.%m.%d')
1220
        author = soup.find('small', class_="author-credit-name").string
1221
        assert author.startswith('by ')
1222
        author = author[3:]
1223
        imgs = soup.find_all('img', id='main-comic')
1224
        return {
1225
            'num': num,
1226
            'author': author,
1227
            'month': day.month,
1228
            'year': day.year,
1229
            'day': day.day,
1230
            'prefix': '%d-' % num,
1231
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i['src'])) for i in imgs]
1232
        }
1233
1234
1235
class MrLovenstein(GenericComic):
1236
    """Class to retrieve Mr Lovenstein comics."""
1237
    # Also on https://tapastic.com/series/MrLovenstein
1238
    name = 'mrlovenstein'
1239
    long_name = 'Mr. Lovenstein'
1240
    url = 'http://www.mrlovenstein.com'
1241
1242
    @classmethod
1243
    def get_next_comic(cls, last_comic):
1244
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1245
        # TODO: more info from http://www.mrlovenstein.com/archive
1246
        comic_num_re = re.compile('^/comic/([0-9]*)$')
1247
        nums = [int(comic_num_re.match(link['href']).groups()[0])
1248
                for link in get_soup_at_url(cls.url).find_all('a', href=comic_num_re)]
1249
        first, last = min(nums), max(nums)
1250
        if last_comic:
1251
            first = last_comic['num'] + 1
1252
        for num in range(first, last + 1):
1253
            url = urljoin_wrapper(cls.url, '/comic/%d' % num)
1254
            soup = get_soup_at_url(url)
1255
            imgs = list(
1256
                reversed(soup.find_all('img', src=re.compile('^/images/comics/'))))
1257
            description = soup.find('meta', attrs={'name': 'description'})['content']
1258
            yield {
1259
                'url': url,
1260
                'num': num,
1261
                'texts': '  '.join(t for t in (i.get('title') for i in imgs) if t),
1262
                'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1263
                'description': description,
1264
            }
1265
1266
1267
class DinosaurComics(GenericListableComic):
1268
    """Class to retrieve Dinosaur Comics comics."""
1269
    name = 'dinosaur'
1270
    long_name = 'Dinosaur Comics'
1271
    url = 'http://www.qwantz.com'
1272
    get_url_from_archive_element = get_href
1273
    comic_link_re = re.compile('^%s/index.php\\?comic=([0-9]*)$' % url)
1274
1275
    @classmethod
1276
    def get_archive_elements(cls):
1277
        archive_url = urljoin_wrapper(cls.url, 'archive.php')
1278
        # first link is random -> skip it
1279
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re)[1:])
1280
1281
    @classmethod
1282
    def get_comic_info(cls, soup, link):
1283
        """Get information about a particular comics."""
1284
        url = cls.get_url_from_archive_element(link)
1285
        num = int(cls.comic_link_re.match(url).groups()[0])
1286
        date_str = link.string
1287
        text = link.next_sibling.string
1288
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1289
        comic_img_re = re.compile('^%s/comics/' % cls.url)
1290
        img = soup.find('img', src=comic_img_re)
1291
        return {
1292
            'month': day.month,
1293
            'year': day.year,
1294
            'day': day.day,
1295
            'img': [img.get('src')],
1296
            'title': img.get('title'),
1297
            'text': text,
1298
            'num': num,
1299
        }
1300
1301
1302
class ButterSafe(GenericListableComic):
1303
    """Class to retrieve Butter Safe comics."""
1304
    name = 'butter'
1305
    long_name = 'ButterSafe'
1306
    url = 'http://buttersafe.com'
1307
    get_url_from_archive_element = get_href
1308
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)
1309
1310
    @classmethod
1311
    def get_archive_elements(cls):
1312
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1313
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
1314
1315
    @classmethod
1316
    def get_comic_info(cls, soup, link):
1317
        """Get information about a particular comics."""
1318
        url = cls.get_url_from_archive_element(link)
1319
        title = link.string
1320
        year, month, day = [int(s) for s in cls.comic_link_re.match(url).groups()]
1321
        img = soup.find('div', id='comic').find('img')
1322
        assert img['alt'] == title
1323
        return {
1324
            'title': title,
1325
            'day': day,
1326
            'month': month,
1327 View Code Duplication
            'year': year,
1328
            'img': [img['src']],
1329
        }
1330
1331
1332
class CalvinAndHobbes(GenericComic):
1333
    """Class to retrieve Calvin and Hobbes comics."""
1334
    # Also on http://www.gocomics.com/calvinandhobbes/
1335
    name = 'calvin'
1336
    long_name = 'Calvin and Hobbes'
1337
    # This is not through any official webpage but eh...
1338
    url = 'http://marcel-oehler.marcellosendos.ch/comics/ch/'
1339
1340
    @classmethod
1341
    def get_next_comic(cls, last_comic):
1342
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1343
        last_date = get_date_for_comic(
1344
            last_comic) if last_comic else date(1985, 11, 1)
1345
        link_re = re.compile('^([0-9]*)/([0-9]*)/')
1346
        img_re = re.compile('')
1347
        for link in get_soup_at_url(cls.url).find_all('a', href=link_re):
1348
            url = link['href']
1349
            year, month = link_re.match(url).groups()
1350
            if date(int(year), int(month), 1) + timedelta(days=31) >= last_date:
1351
                img_re = re.compile('^%s%s([0-9]*)' % (year, month))
1352
                month_url = urljoin_wrapper(cls.url, url)
1353
                for img in get_soup_at_url(month_url).find_all('img', src=img_re):
1354
                    img_src = img['src']
1355
                    day = int(img_re.match(img_src).groups()[0])
1356
                    comic_date = date(int(year), int(month), day)
1357
                    if comic_date > last_date:
1358
                        yield {
1359
                            'url': month_url,
1360
                            'year': int(year),
1361
                            'month': int(month),
1362
                            'day': int(day),
1363
                            'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)],
1364
                        }
1365
                        last_date = comic_date
1366
1367
1368
class AbstruseGoose(GenericListableComic):
1369
    """Class to retrieve AbstruseGoose Comics."""
1370
    name = 'abstruse'
1371
    long_name = 'Abstruse Goose'
1372
    url = 'http://abstrusegoose.com'
1373
    get_url_from_archive_element = get_href
1374
    comic_url_re = re.compile('^%s/([0-9]*)$' % url)
1375
    comic_img_re = re.compile('^%s/strips/.*' % url)
1376
1377
    @classmethod
1378
    def get_archive_elements(cls):
1379
        archive_url = urljoin_wrapper(cls.url, 'archive')
1380
        return get_soup_at_url(archive_url).find_all('a', href=cls.comic_url_re)
1381
1382
    @classmethod
1383
    def get_comic_info(cls, soup, archive_elt):
1384
        comic_url = cls.get_url_from_archive_element(archive_elt)
1385
        num = int(cls.comic_url_re.match(comic_url).groups()[0])
1386
        return {
1387
            'num': num,
1388
            'title': archive_elt.string,
1389
            'img': [soup.find('img', src=cls.comic_img_re)['src']]
1390
        }
1391
1392
1393 View Code Duplication
class PhDComics(GenericNavigableComic):
1394
    """Class to retrieve PHD Comics."""
1395
    name = 'phd'
1396
    long_name = 'PhD Comics'
1397
    url = 'http://phdcomics.com/comics/archive.php'
1398
    get_url_from_link = join_cls_url_to_href
1399
1400
    @classmethod
1401
    def get_first_comic_link(cls):
1402
        """Get link to first comics."""
1403
        return get_soup_at_url(cls.url).find('img', src='images/first_button.gif').parent
1404
1405
    @classmethod
1406
    def get_navi_link(cls, last_soup, next_):
1407
        """Get link to next or previous comic."""
1408
        img = last_soup.find('img', src='images/next_button.gif' if next_ else 'images/prev_button.gif')
1409
        return None if img is None else img.parent
1410
1411
    @classmethod
1412
    def get_comic_info(cls, soup, link):
1413
        """Get information about a particular comics."""
1414
        date_str = soup.find('font', face='Arial,Helvetica,Geneva,Swiss,SunSans-Regular', color='white').string.strip()
1415
        try:
1416
            day = string_to_date(date_str, '%m/%d/%Y')
1417
        except ValueError:
1418
            print("Invalid date %s" % date_str)
1419
            day = date.today()
1420
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
1421
        return {
1422
            'year': day.year,
1423
            'month': day.month,
1424
            'day': day.day,
1425
            'img': [soup.find('img', id='comic')['src']],
1426
            'title': title,
1427
        }
1428
1429
1430
class Octopuns(GenericNavigableComic):
1431 View Code Duplication
    """Class to retrieve Octopuns comics."""
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1432
    # Also on http://octopuns.tumblr.com
1433
    name = 'octopuns'
1434
    long_name = 'Octopuns'
1435
    url = 'http://www.octopuns.net'
1436
1437
    @classmethod
1438
    def get_first_comic_link(cls):
1439
        """Get link to first comics."""
1440
        return get_soup_at_url(cls.url).find('img', src=re.compile('.*/First.png')).parent
1441
1442
    @classmethod
1443
    def get_navi_link(cls, last_soup, next_):
1444
        """Get link to next or previous comic."""
1445
        link = last_soup.find('img', src=re.compile('.*/Next.png' if next_ else '.*/Back.png')).parent
1446
        return None if link.get('href') is None else link
1447
1448
    @classmethod
1449
    def get_comic_info(cls, soup, link):
1450
        """Get information about a particular comics."""
1451
        title = soup.find('h3', class_='post-title entry-title').string
1452
        date_str = soup.find('h2', class_='date-header').string
1453
        day = string_to_date(date_str, "%A, %B %d, %Y")
1454
        imgs = soup.find_all('link', rel='image_src')
1455
        return {
1456
            'img': [i['href'] for i in imgs],
1457
            'title': title,
1458
            'day': day.day,
1459
            'month': day.month,
1460
            'year': day.year,
1461
        }
1462
1463
1464
class Quarktees(GenericNavigableComic):
1465
    """Class to retrieve the Quarktees comics."""
1466
    name = 'quarktees'
1467
    long_name = 'Quarktees'
1468
    url = 'http://www.quarktees.com/blogs/news'
1469
    get_url_from_link = join_cls_url_to_href
1470
    get_first_comic_link = simulate_first_link
1471
    first_url = 'http://www.quarktees.com/blogs/news/12486621-coming-soon'
1472
1473
    @classmethod
1474
    def get_navi_link(cls, last_soup, next_):
1475
        """Get link to next or previous comic."""
1476
        return last_soup.find('a', id='article-next' if next_ else 'article-prev')
1477
1478
    @classmethod
1479
    def get_comic_info(cls, soup, link):
1480
        """Get information about a particular comics."""
1481
        title = soup.find('meta', property='og:title')['content']
1482
        article = soup.find('div', class_='single-article')
1483
        imgs = article.find_all('img')
1484
        return {
1485
            'title': title,
1486
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1487
        }
1488
1489
1490
class OverCompensating(GenericNavigableComic):
1491
    """Class to retrieve the Over Compensating comics."""
1492
    name = 'compensating'
1493
    long_name = 'Over Compensating'
1494
    url = 'http://www.overcompensating.com'
1495
    get_url_from_link = join_cls_url_to_href
1496
1497
    @classmethod
1498
    def get_first_comic_link(cls):
1499
        """Get link to first comics."""
1500
        return get_soup_at_url(cls.url).find('a', href=re.compile('comic=1$'))
1501
1502
    @classmethod
1503
    def get_navi_link(cls, last_soup, next_):
1504
        """Get link to next or previous comic."""
1505
        return last_soup.find('a', title='next comic' if next_ else 'go back already')
1506
1507
    @classmethod
1508
    def get_comic_info(cls, soup, link):
1509
        """Get information about a particular comics."""
1510
        img_src_re = re.compile('^/oc/comics/.*')
1511
        comic_num_re = re.compile('.*comic=([0-9]*)$')
1512
        comic_url = cls.get_url_from_link(link)
1513
        num = int(comic_num_re.match(comic_url).groups()[0])
1514
        img = soup.find('img', src=img_src_re)
1515
        return {
1516
            'num': num,
1517
            'img': [urljoin_wrapper(comic_url, img['src'])],
1518
            'title': img.get('title')
1519
        }
1520
1521
1522
class Oglaf(GenericNavigableComic):
1523
    """Class to retrieve Oglaf comics."""
1524
    name = 'oglaf'
1525
    long_name = 'Oglaf [NSFW]'
1526
    url = 'http://oglaf.com'
1527
    get_url_from_link = join_cls_url_to_href
1528
1529
    @classmethod
1530
    def get_first_comic_link(cls):
1531
        """Get link to first comics."""
1532
        return get_soup_at_url(cls.url).find("div", id="st").parent
1533
1534
    @classmethod
1535
    def get_navi_link(cls, last_soup, next_):
1536
        """Get link to next or previous comic."""
1537
        div = last_soup.find("div", id="nx" if next_ else "pvs")
1538
        return div.parent if div else None
1539
1540
    @classmethod
1541
    def get_comic_info(cls, soup, link):
1542
        """Get information about a particular comics."""
1543
        title = soup.find('title').string
1544
        title_imgs = soup.find('div', id='tt').find_all('img')
1545
        assert len(title_imgs) == 1
1546
        strip_imgs = soup.find_all('img', id='strip')
1547
        assert len(strip_imgs) == 1
1548
        imgs = title_imgs + strip_imgs
1549
        desc = ' '.join(i['title'] for i in imgs)
1550
        return {
1551
            'title': title,
1552
            'img': [i['src'] for i in imgs],
1553
            'description': desc,
1554
        }
1555
1556
1557
class ScandinaviaAndTheWorld(GenericNavigableComic):
1558
    """Class to retrieve Scandinavia And The World comics."""
1559
    name = 'satw'
1560
    long_name = 'Scandinavia And The World'
1561
    url = 'http://satwcomic.com'
1562
    get_first_comic_link = simulate_first_link
1563
    first_url = 'http://satwcomic.com/sweden-denmark-and-norway'
1564
1565
    @classmethod
1566
    def get_navi_link(cls, last_soup, next_):
1567
        """Get link to next or previous comic."""
1568
        return last_soup.find('a', accesskey='n' if next_ else 'p')
1569
1570
    @classmethod
1571
    def get_comic_info(cls, soup, link):
1572
        """Get information about a particular comics."""
1573
        title = soup.find('meta', attrs={'name': 'twitter:label1'})['content']
1574
        desc = soup.find('meta', property='og:description')['content']
1575
        imgs = soup.find_all('img', itemprop="image")
1576
        return {
1577
            'title': title,
1578
            'description': desc,
1579
            'img': [i['src'] for i in imgs],
1580
        }
1581
1582
1583
class SomethingOfThatIlk(GenericEmptyComic):  # Does not exist anymore
1584
    """Class to retrieve the Something Of That Ilk comics."""
1585
    name = 'somethingofthatilk'
1586
    long_name = 'Something Of That Ilk'
1587
    url = 'http://www.somethingofthatilk.com'
1588
1589
1590
class InfiniteMonkeyBusiness(GenericNavigableComic):
1591
    """Generic class to retrieve InfiniteMonkeyBusiness comics."""
1592
    name = 'monkey'
1593
    long_name = 'Infinite Monkey Business'
1594
    url = 'http://infinitemonkeybusiness.net'
1595
    get_navi_link = get_a_navi_comicnavnext_navinext
1596
    get_first_comic_link = simulate_first_link
1597
    first_url = 'http://infinitemonkeybusiness.net/comic/pillory/'
1598
1599
    @classmethod
1600
    def get_comic_info(cls, soup, link):
1601
        """Get information about a particular comics."""
1602
        title = soup.find('meta', property='og:title')['content']
1603
        imgs = soup.find('div', id='comic').find_all('img')
1604
        return {
1605
            'title': title,
1606
            'img': [i['src'] for i in imgs],
1607
        }
1608
1609
1610
class Wondermark(GenericListableComic):
1611
    """Class to retrieve the Wondermark comics."""
1612
    name = 'wondermark'
1613
    long_name = 'Wondermark'
1614
    url = 'http://wondermark.com'
1615
    get_url_from_archive_element = get_href
1616
1617
    @classmethod
1618
    def get_archive_elements(cls):
1619
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1620
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1621
1622
    @classmethod
1623
    def get_comic_info(cls, soup, link):
1624
        """Get information about a particular comics."""
1625
        date_str = soup.find('div', class_='postdate').find('em').string
1626
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1627
        div = soup.find('div', id='comic')
1628
        if div:
1629
            img = div.find('img')
1630
            img_src = [img['src']]
1631
            alt = img['alt']
1632
            assert alt == img['title']
1633
            title = soup.find('meta', property='og:title')['content']
1634
        else:
1635
            img_src = []
1636
            alt = ''
1637
            title = ''
1638
        return {
1639
            'month': day.month,
1640
            'year': day.year,
1641
            'day': day.day,
1642
            'img': img_src,
1643
            'title': title,
1644
            'alt': alt,
1645
            'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')),
1646
        }
1647
1648
1649
class WarehouseComic(GenericNavigableComic):
1650
    """Class to retrieve Warehouse Comic comics."""
1651
    name = 'warehouse'
1652
    long_name = 'Warehouse Comic'
1653
    url = 'http://warehousecomic.com'
1654
    get_first_comic_link = get_a_navi_navifirst
1655
    get_navi_link = get_link_rel_next
1656
1657
    @classmethod
1658
    def get_comic_info(cls, soup, link):
1659
        """Get information about a particular comics."""
1660
        title = soup.find('h2', class_='post-title').string
1661
        date_str = soup.find('span', class_='post-date').string
1662
        day = string_to_date(date_str, "%B %d, %Y")
1663
        imgs = soup.find('div', id='comic').find_all('img')
1664
        return {
1665
            'img': [i['src'] for i in imgs],
1666
            'title': title,
1667
            'day': day.day,
1668
            'month': day.month,
1669
            'year': day.year,
1670
        }
1671
1672
1673
class JustSayEh(GenericNavigableComic):
1674
    """Class to retrieve Just Say Eh comics."""
1675
    # Also on http//tapastic.com/series/Just-Say-Eh
1676
    name = 'justsayeh'
1677
    long_name = 'Just Say Eh'
1678
    url = 'http://www.justsayeh.com'
1679
    get_first_comic_link = get_a_navi_navifirst
1680
    get_navi_link = get_a_navi_comicnavnext_navinext
1681
1682
    @classmethod
1683
    def get_comic_info(cls, soup, link):
1684
        """Get information about a particular comics."""
1685
        title = soup.find('h2', class_='post-title').string
1686
        imgs = soup.find("div", id="comic").find_all("img")
1687
        assert all(i['alt'] == i['title'] for i in imgs)
1688
        alt = imgs[0]['alt']
1689
        return {
1690
            'img': [i['src'] for i in imgs],
1691
            'title': title,
1692
            'alt': alt,
1693
        }
1694
1695
1696
class MouseBearComedy(GenericNavigableComic):
1697
    """Class to retrieve Mouse Bear Comedy comics."""
1698
    # Also on http://mousebearcomedy.tumblr.com
1699 View Code Duplication
    name = 'mousebear'
1700
    long_name = 'Mouse Bear Comedy'
1701
    url = 'http://www.mousebearcomedy.com'
1702
    get_first_comic_link = get_a_navi_navifirst
1703
    get_navi_link = get_a_navi_comicnavnext_navinext
1704
1705
    @classmethod
1706
    def get_comic_info(cls, soup, link):
1707
        """Get information about a particular comics."""
1708
        title = soup.find('h2', class_='post-title').string
1709
        author = soup.find("span", class_="post-author").find("a").string
1710
        date_str = soup.find("span", class_="post-date").string
1711
        day = string_to_date(date_str, '%B %d, %Y')
1712
        imgs = soup.find("div", id="comic").find_all("img")
1713
        assert all(i['alt'] == i['title'] == title for i in imgs)
1714
        return {
1715
            'day': day.day,
1716
            'month': day.month,
1717
            'year': day.year,
1718
            'img': [i['src'] for i in imgs],
1719
            'title': title,
1720
            'author': author,
1721
        }
1722
1723
1724
class BigFootJustice(GenericNavigableComic):
1725
    """Class to retrieve Big Foot Justice comics."""
1726
    # Also on http://tapastic.com/series/bigfoot-justice
1727
    name = 'bigfoot'
1728
    long_name = 'Big Foot Justice'
1729
    url = 'http://bigfootjustice.com'
1730
    get_first_comic_link = get_a_navi_navifirst
1731
    get_navi_link = get_a_navi_comicnavnext_navinext
1732
1733
    @classmethod
1734
    def get_comic_info(cls, soup, link):
1735
        """Get information about a particular comics."""
1736
        imgs = soup.find('div', id='comic').find_all('img')
1737
        assert all(i['title'] == i['alt'] for i in imgs)
1738
        title = ' '.join(i['title'] for i in imgs)
1739
        return {
1740
            'img': [i['src'] for i in imgs],
1741
            'title': title,
1742
        }
1743
1744
1745
class RespawnComic(GenericNavigableComic):
1746 View Code Duplication
    """Class to retrieve Respawn Comic."""
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1747
    # Also on http://respawncomic.tumblr.com
1748
    name = 'respawn'
1749
    long_name = 'Respawn Comic'
1750
    url = 'http://respawncomic.com '
1751
    get_navi_link = get_a_navi_comicnavnext_navinext
1752
    get_first_comic_link = simulate_first_link
1753
    first_url = 'http://respawncomic.com/comic/c0001/'
1754
1755
    @classmethod
1756
    def get_comic_info(cls, soup, link):
1757
        """Get information about a particular comics."""
1758
        title = soup.find('meta', property='og:title')['content']
1759
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1760
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1761
        date_str = date_str[:10]
1762
        day = string_to_date(date_str, "%Y-%m-%d")
1763
        imgs = soup.find_all('meta', property='og:image')
1764
        skip_imgs = {
1765
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1766
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1767
        }
1768
        return {
1769
            'title': title,
1770
            'author': author,
1771
            'day': day.day,
1772
            'month': day.month,
1773
            'year': day.year,
1774
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1775
        }
1776
1777
1778
class SafelyEndangered(GenericNavigableComic):
1779
    """Class to retrieve Safely Endangered comics."""
1780
    # Also on http://tumblr.safelyendangered.com
1781 View Code Duplication
    name = 'endangered'
1782
    long_name = 'Safely Endangered'
1783
    url = 'http://www.safelyendangered.com'
1784
    get_navi_link = get_link_rel_next
1785
    get_first_comic_link = simulate_first_link
1786
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1787
1788
    @classmethod
1789
    def get_comic_info(cls, soup, link):
1790
        """Get information about a particular comics."""
1791
        title = soup.find('h2', class_='post-title').string
1792
        date_str = soup.find('span', class_='post-date').string
1793
        day = string_to_date(date_str, '%B %d, %Y')
1794
        imgs = soup.find('div', id='comic').find_all('img')
1795
        alt = imgs[0]['alt']
1796
        assert all(i['alt'] == i['title'] for i in imgs)
1797
        return {
1798
            'day': day.day,
1799
            'month': day.month,
1800
            'year': day.year,
1801
            'img': [i['src'] for i in imgs],
1802
            'title': title,
1803
            'alt': alt,
1804
        }
1805
1806
1807
class PicturesInBoxes(GenericNavigableComic):
1808
    """Class to retrieve Pictures In Boxes comics."""
1809
    # Also on http://picturesinboxescomic.tumblr.com
1810 View Code Duplication
    name = 'picturesinboxes'
1811
    long_name = 'Pictures in Boxes'
1812
    url = 'http://www.picturesinboxes.com'
1813
    get_navi_link = get_a_navi_navinext
1814
    get_first_comic_link = simulate_first_link
1815
    first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'
1816
1817
    @classmethod
1818
    def get_comic_info(cls, soup, link):
1819
        """Get information about a particular comics."""
1820
        title = soup.find('h2', class_='post-title').string
1821
        author = soup.find("span", class_="post-author").find("a").string
1822
        date_str = soup.find('span', class_='post-date').string
1823
        day = string_to_date(date_str, '%B %d, %Y')
1824
        imgs = soup.find('div', class_='comicpane').find_all('img')
1825
        assert imgs
1826
        assert all(i['title'] == i['alt'] == title for i in imgs)
1827
        return {
1828
            'day': day.day,
1829
            'month': day.month,
1830
            'year': day.year,
1831
            'img': [i['src'] for i in imgs],
1832
            'title': title,
1833
            'author': author,
1834
        }
1835
1836
1837
class Penmen(GenericEmptyComic):
1838
    """Class to retrieve Penmen comics."""
1839
    name = 'penmen'
1840
    long_name = 'Penmen'
1841
    url = 'http://penmen.com'
1842
1843
1844
class TheDoghouseDiaries(GenericNavigableComic):
1845
    """Class to retrieve The Dog House Diaries comics."""
1846
    name = 'doghouse'
1847
    long_name = 'The Dog House Diaries'
1848
    url = 'http://thedoghousediaries.com'
1849
1850
    @classmethod
1851
    def get_first_comic_link(cls):
1852
        """Get link to first comics."""
1853
        return get_soup_at_url(cls.url).find('a', id='firstlink')
1854
1855
    @classmethod
1856
    def get_navi_link(cls, last_soup, next_):
1857
        """Get link to next or previous comic."""
1858
        return last_soup.find('a', id='nextlink' if next_ else 'previouslink')
1859
1860
    @classmethod
1861
    def get_comic_info(cls, soup, link):
1862
        """Get information about a particular comics."""
1863
        comic_img_re = re.compile('^dhdcomics/.*')
1864
        img = soup.find('img', src=comic_img_re)
1865
        comic_url = cls.get_url_from_link(link)
1866
        return {
1867
            'title': soup.find('h2', id='titleheader').string,
1868
            'title2': soup.find('div', id='subtext').string,
1869
            'alt': img.get('title'),
1870
            'img': [urljoin_wrapper(comic_url, img['src'].strip())],
1871
            'num': int(comic_url.split('/')[-1]),
1872
        }
1873
1874
1875
class InvisibleBread(GenericListableComic):
1876
    """Class to retrieve Invisible Bread comics."""
1877
    # Also on http://www.gocomics.com/invisible-bread
1878
    name = 'invisiblebread'
1879
    long_name = 'Invisible Bread'
1880
    url = 'http://invisiblebread.com'
1881
1882
    @classmethod
1883
    def get_archive_elements(cls):
1884
        archive_url = urljoin_wrapper(cls.url, 'archives/')
1885
        return reversed(get_soup_at_url(archive_url).find_all('td', class_='archive-title'))
1886
1887
    @classmethod
1888
    def get_url_from_archive_element(cls, td):
1889
        return td.find('a')['href']
1890
1891
    @classmethod
1892
    def get_comic_info(cls, soup, td):
1893
        """Get information about a particular comics."""
1894
        url = cls.get_url_from_archive_element(td)
1895
        title = td.find('a').string
1896
        month_and_day = td.previous_sibling.string
1897
        link_re = re.compile('^%s/([0-9]+)/' % cls.url)
1898
        year = link_re.match(url).groups()[0]
1899
        date_str = month_and_day + ' ' + year
1900
        day = string_to_date(date_str, '%b %d %Y')
1901
        imgs = [soup.find('div', id='comic').find('img')]
1902
        assert len(imgs) == 1
1903
        assert all(i['title'] == i['alt'] == title for i in imgs)
1904
        return {
1905
            'month': day.month,
1906
            'year': day.year,
1907
            'day': day.day,
1908
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1909
            'title': title,
1910
        }
1911
1912
1913
class DiscoBleach(GenericEmptyComic):  # Does not work anymore
1914
    """Class to retrieve Disco Bleach Comics."""
1915
    name = 'discobleach'
1916
    long_name = 'Disco Bleach'
1917
    url = 'http://discobleach.com'
1918
1919
1920
class TubeyToons(GenericEmptyComic):  # Does not work anymore
1921
    """Class to retrieve TubeyToons comics."""
1922
    # Also on http://tapastic.com/series/Tubey-Toons
1923
    # Also on http://tubeytoons.tumblr.com
1924
    name = 'tubeytoons'
1925
    long_name = 'Tubey Toons'
1926
    url = 'http://tubeytoons.com'
1927
1928
1929
class CompletelySeriousComics(GenericNavigableComic):
1930
    """Class to retrieve Completely Serious comics."""
1931 View Code Duplication
    name = 'completelyserious'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1932
    long_name = 'Completely Serious Comics'
1933
    url = 'http://completelyseriouscomics.com'
1934
    get_first_comic_link = get_a_navi_navifirst
1935
    get_navi_link = get_a_navi_navinext
1936
1937
    @classmethod
1938
    def get_comic_info(cls, soup, link):
1939
        """Get information about a particular comics."""
1940
        title = soup.find('h2', class_='post-title').string
1941
        author = soup.find('span', class_='post-author').contents[1].string
1942
        date_str = soup.find('span', class_='post-date').string
1943
        day = string_to_date(date_str, '%B %d, %Y')
1944
        imgs = soup.find('div', class_='comicpane').find_all('img')
1945
        assert imgs
1946
        alt = imgs[0]['title']
1947
        assert all(i['title'] == i['alt'] == alt for i in imgs)
1948
        return {
1949
            'month': day.month,
1950
            'year': day.year,
1951
            'day': day.day,
1952
            'img': [i['src'] for i in imgs],
1953
            'title': title,
1954
            'alt': alt,
1955
            'author': author,
1956
        }
1957
1958
1959
class PoorlyDrawnLines(GenericListableComic):
1960
    """Class to retrieve Poorly Drawn Lines comics."""
1961
    # Also on http://pdlcomics.tumblr.com
1962
    name = 'poorlydrawn'
1963
    long_name = 'Poorly Drawn Lines'
1964
    url = 'http://poorlydrawnlines.com'
1965
    get_url_from_archive_element = get_href
1966
1967
    @classmethod
1968
    def get_comic_info(cls, soup, link):
1969
        """Get information about a particular comics."""
1970
        imgs = soup.find('div', class_='post').find_all('img')
1971
        assert len(imgs) <= 1
1972
        return {
1973
            'img': [i['src'] for i in imgs],
1974
            'title': imgs[0].get('title', "") if imgs else "",
1975
        }
1976
1977
    @classmethod
1978
    def get_archive_elements(cls):
1979
        archive_url = urljoin_wrapper(cls.url, 'archive')
1980
        url_re = re.compile('^%s/comic/.' % cls.url)
1981
        return reversed(get_soup_at_url(archive_url).find_all('a', href=url_re))
1982
1983
1984
class LoadingComics(GenericNavigableComic):
1985 View Code Duplication
    """Class to retrieve Loading Artist comics."""
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1986
    name = 'loadingartist'
1987
    long_name = 'Loading Artist'
1988
    url = 'http://www.loadingartist.com/latest'
1989
1990
    @classmethod
1991
    def get_first_comic_link(cls):
1992
        """Get link to first comics."""
1993
        return get_soup_at_url(cls.url).find('a', title="First")
1994
1995
    @classmethod
1996
    def get_navi_link(cls, last_soup, next_):
1997
        """Get link to next or previous comic."""
1998
        return last_soup.find('a', title='Next' if next_ else 'Previous')
1999
2000
    @classmethod
2001
    def get_comic_info(cls, soup, link):
2002
        """Get information about a particular comics."""
2003
        title = soup.find('h1').string
2004
        date_str = soup.find('span', class_='date').string.strip()
2005
        day = string_to_date(date_str, "%B %d, %Y")
2006
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
2007
        return {
2008
            'title': title,
2009
            'img': [i['src'] for i in imgs],
2010
            'month': day.month,
2011
            'year': day.year,
2012
            'day': day.day,
2013
        }
2014
2015
2016
class ChuckleADuck(GenericNavigableComic):
2017 View Code Duplication
    """Class to retrieve Chuckle-A-Duck comics."""
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2018
    name = 'chuckleaduck'
2019
    long_name = 'Chuckle-A-duck'
2020
    url = 'http://chuckleaduck.com'
2021
    get_first_comic_link = get_div_navfirst_a
2022
    get_navi_link = get_link_rel_next
2023
2024
    @classmethod
2025
    def get_comic_info(cls, soup, link):
2026
        """Get information about a particular comics."""
2027
        date_str = soup.find('span', class_='post-date').string
2028
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
2029
        author = soup.find('span', class_='post-author').string
2030
        div = soup.find('div', id='comic')
2031
        imgs = div.find_all('img') if div else []
2032
        title = imgs[0]['title'] if imgs else ""
2033
        assert all(i['title'] == i['alt'] == title for i in imgs)
2034
        return {
2035
            'month': day.month,
2036
            'year': day.year,
2037
            'day': day.day,
2038
            'img': [i['src'] for i in imgs],
2039
            'title': title,
2040
            'author': author,
2041
        }
2042
2043
2044
class DepressedAlien(GenericNavigableComic):
2045
    """Class to retrieve Depressed Alien Comics."""
2046
    name = 'depressedalien'
2047
    long_name = 'Depressed Alien'
2048
    url = 'http://depressedalien.com'
2049
    get_url_from_link = join_cls_url_to_href
2050
2051
    @classmethod
2052
    def get_first_comic_link(cls):
2053
        """Get link to first comics."""
2054
        return get_soup_at_url(cls.url).find('img', attrs={'name': 'beginArrow'}).parent
2055
2056
    @classmethod
2057
    def get_navi_link(cls, last_soup, next_):
2058
        """Get link to next or previous comic."""
2059
        return last_soup.find('img', attrs={'name': 'rightArrow' if next_ else 'leftArrow'}).parent
2060
2061
    @classmethod
2062
    def get_comic_info(cls, soup, link):
2063
        """Get information about a particular comics."""
2064
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2065
        imgs = soup.find_all('meta', property='og:image')
2066
        return {
2067
            'title': title,
2068
            'img': [i['content'] for i in imgs],
2069
        }
2070
2071
2072
class ThingsInSquares(GenericListableComic):
2073
    """Class to retrieve Things In Squares comics."""
2074
    # This can be retrieved in other languages
2075
    # Also on https://tapastic.com/series/Things-in-Squares
2076
    name = 'squares'
2077
    long_name = 'Things in squares'
2078
    url = 'http://www.thingsinsquares.com'
2079
2080
    @classmethod
2081
    def get_comic_info(cls, soup, tr):
2082
        """Get information about a particular comics."""
2083
        _, td2, td3 = tr.find_all('td')
2084
        a = td2.find('a')
2085
        date_str = td3.string
2086
        day = string_to_date(date_str, "%m.%d.%y")
2087
        title = a.string
2088
        title2 = soup.find('meta', property='og:title')['content']
2089
        desc = soup.find('meta', property='og:description')
2090
        description = desc['content'] if desc else ''
2091
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2092
        imgs = soup.find('div', class_='entry-content').find_all('img')
2093
        return {
2094
            'day': day.day,
2095
            'month': day.month,
2096
            'year': day.year,
2097
            'title': title,
2098
            'title2': title2,
2099
            'description': description,
2100
            'tags': tags,
2101
            'img': [i['src'] for i in imgs],
2102
            'alt': ' '.join(i['alt'] for i in imgs),
2103
        }
2104
2105
    @classmethod
2106
    def get_url_from_archive_element(cls, tr):
2107
        _, td2, td3 = tr.find_all('td')
2108
        return td2.find('a')['href']
2109
2110
    @classmethod
2111
    def get_archive_elements(cls):
2112
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
2113
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2114
2115
2116 View Code Duplication
class HappleTea(GenericNavigableComic):
2117
    """Class to retrieve Happle Tea Comics."""
2118
    name = 'happletea'
2119
    long_name = 'Happle Tea'
2120
    url = 'http://www.happletea.com'
2121
    get_first_comic_link = get_a_navi_navifirst
2122
    get_navi_link = get_link_rel_next
2123
2124
    @classmethod
2125
    def get_comic_info(cls, soup, link):
2126
        """Get information about a particular comics."""
2127
        imgs = soup.find('div', id='comic').find_all('img')
2128
        post = soup.find('div', class_='post-content')
2129
        title = post.find('h2', class_='post-title').string
2130
        author = post.find('a', rel='author').string
2131
        date_str = post.find('span', class_='post-date').string
2132
        day = string_to_date(date_str, "%B %d, %Y")
2133
        assert all(i['alt'] == i['title'] for i in imgs)
2134
        return {
2135
            'title': title,
2136
            'img': [i['src'] for i in imgs],
2137
            'alt': ''.join(i['alt'] for i in imgs),
2138
            'month': day.month,
2139
            'year': day.year,
2140
            'day': day.day,
2141
            'author': author,
2142
        }
2143
2144
2145
class FatAwesomeComics(GenericNavigableComic):
2146
    """Class to retrieve Fat Awesome Comics."""
2147
    # Also on http://fatawesomecomedy.tumblr.com
2148
    name = 'fatawesome'
2149
    long_name = 'Fat Awesome'
2150
    url = 'http://fatawesome.com/comics'
2151
    get_navi_link = get_a_rel_next
2152
    get_first_comic_link = simulate_first_link
2153
    first_url = 'http://fatawesome.com/shortbus/'
2154
2155
    @classmethod
2156
    def get_comic_info(cls, soup, link):
2157
        """Get information about a particular comics."""
2158
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2159
        description = soup.find('meta', attrs={'name': 'description'})['content']
2160
        tags_prop = soup.find('meta', property='article:tag')
2161
        tags = tags_prop['content'] if tags_prop else ""
2162
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2163
        day = string_to_date(date_str, "%Y-%m-%d")
2164
        imgs = soup.find_all('img', attrs={'data-recalc-dims': "1"})
2165
        assert len(imgs) == 1
2166
        return {
2167
            'title': title,
2168
            'description': description,
2169
            'tags': tags,
2170
            'alt': "".join(i['alt'] for i in imgs),
2171
            'img': [i['src'].rsplit('?', 1)[0] for i in imgs],
2172
            'month': day.month,
2173
            'year': day.year,
2174
            'day': day.day,
2175
        }
2176
2177
2178
class AnythingComic(GenericListableComic):
2179
    """Class to retrieve Anything Comics."""
2180
    # Also on http://tapastic.com/series/anything
2181
    name = 'anythingcomic'
2182
    long_name = 'Anything Comic'
2183
    url = 'http://www.anythingcomic.com'
2184
2185
    @classmethod
2186
    def get_archive_elements(cls):
2187
        archive_url = urljoin_wrapper(cls.url, 'archive/')
2188
        # The first 2 <tr>'s do not correspond to comics
2189
        return get_soup_at_url(archive_url).find('table', id='chapter_table').find_all('tr')[2:]
2190
2191
    @classmethod
2192
    def get_url_from_archive_element(cls, tr):
2193
        """Get url corresponding to an archive element."""
2194
        td_num, td_comic, td_date, _ = tr.find_all('td')
2195
        link = td_comic.find('a')
2196
        return urljoin_wrapper(cls.url, link['href'])
2197
2198
    @classmethod
2199
    def get_comic_info(cls, soup, tr):
2200
        """Get information about a particular comics."""
2201
        td_num, td_comic, td_date, _ = tr.find_all('td')
2202
        num = int(td_num.string)
2203
        link = td_comic.find('a')
2204
        title = link.string
2205
        imgs = soup.find_all('img', id='comic_image')
2206
        day = string_to_date(td_date.string, '%d %b %Y %I:%M %p')
2207
        assert len(imgs) == 1
2208
        assert all(i.get('alt') == i.get('title') for i in imgs)
2209
        return {
2210
            'num': num,
2211
            'title': title,
2212
            'alt': imgs[0].get('alt', ''),
2213
            'img': [i['src'] for i in imgs],
2214
            'month': day.month,
2215
            'year': day.year,
2216
            'day': day.day,
2217
        }
2218
2219
2220
class LonnieMillsap(GenericNavigableComic):
2221
    """Class to retrieve Lonnie Millsap's comics."""
2222
    name = 'millsap'
2223
    long_name = 'Lonnie Millsap'
2224
    url = 'http://www.lonniemillsap.com'
2225
    get_navi_link = get_link_rel_next
2226
    get_first_comic_link = simulate_first_link
2227
    first_url = 'http://www.lonniemillsap.com/?p=42'
2228
2229
    @classmethod
2230
    def get_comic_info(cls, soup, link):
2231
        """Get information about a particular comics."""
2232
        title = soup.find('h2', class_='post-title').string
2233
        post = soup.find('div', class_='post-content')
2234
        author = post.find("span", class_="post-author").find("a").string
2235
        date_str = post.find("span", class_="post-date").string
2236
        day = string_to_date(date_str, "%B %d, %Y")
2237
        imgs = post.find("div", class_="entry").find_all("img")
2238
        return {
2239
            'title': title,
2240
            'author': author,
2241
            'img': [i['src'] for i in imgs],
2242
            'month': day.month,
2243
            'year': day.year,
2244
            'day': day.day,
2245
        }
2246
2247
2248
class LinsEditions(GenericNavigableComic):
2249 View Code Duplication
    """Class to retrieve L.I.N.S. Editions comics."""
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2250
    # Also on http://linscomics.tumblr.com
2251
    name = 'lins'
2252
    long_name = 'L.I.N.S. Editions'
2253
    url = 'https://linsedition.com'
2254
    get_navi_link = get_link_rel_next
2255
    get_first_comic_link = simulate_first_link
2256
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2257
2258
    @classmethod
2259
    def get_comic_info(cls, soup, link):
2260
        """Get information about a particular comics."""
2261
        title = soup.find('meta', property='og:title')['content']
2262
        imgs = soup.find_all('meta', property='og:image')
2263
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2264
        day = string_to_date(date_str, "%Y-%m-%d")
2265
        return {
2266
            'title': title,
2267
            'img': [i['content'] for i in imgs],
2268
            'month': day.month,
2269
            'year': day.year,
2270
            'day': day.day,
2271
        }
2272
2273
2274
class ThorsThundershack(GenericNavigableComic):
2275
    """Class to retrieve Thor's Thundershack comics."""
2276
    # Also on http://tapastic.com/series/Thors-Thundershac
2277
    name = 'thor'
2278
    long_name = 'Thor\'s Thundershack'
2279
    url = 'http://www.thorsthundershack.com'
2280
    get_url_from_link = join_cls_url_to_href
2281
2282
    @classmethod
2283
    def get_first_comic_link(cls):
2284
        """Get link to first comics."""
2285
        return get_soup_at_url(cls.url).find('a', class_='first navlink')
2286
2287
    @classmethod
2288
    def get_navi_link(cls, last_soup, next_):
2289
        """Get link to next or previous comic."""
2290
        for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
2291
            if link['href'] != '/comic':
2292
                return link
2293
        return None
2294
2295
    @classmethod
2296
    def get_comic_info(cls, soup, link):
2297
        """Get information about a particular comics."""
2298
        title = soup.find('meta', attrs={'name': 'description'})["content"]
2299
        description = soup.find('div', itemprop='articleBody').text
2300
        author = soup.find('span', itemprop='author copyrightHolder').string
2301
        imgs = soup.find_all('img', itemprop='image')
2302
        assert all(i['title'] == i['alt'] for i in imgs)
2303
        alt = imgs[0]['alt'] if imgs else ""
2304
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
2305
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2306
        return {
2307
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2308
            'month': day.month,
2309
            'year': day.year,
2310
            'day': day.day,
2311
            'author': author,
2312
            'title': title,
2313
            'alt': alt,
2314
            'description': description,
2315
        }
2316
2317 View Code Duplication
2318
class GerbilWithAJetpack(GenericNavigableComic):
2319
    """Class to retrieve GerbilWithAJetpack comics."""
2320
    name = 'gerbil'
2321
    long_name = 'Gerbil With A Jetpack'
2322
    url = 'http://gerbilwithajetpack.com'
2323
    get_first_comic_link = get_a_navi_navifirst
2324
    get_navi_link = get_a_rel_next
2325
2326
    @classmethod
2327
    def get_comic_info(cls, soup, link):
2328
        """Get information about a particular comics."""
2329
        title = soup.find('h2', class_='post-title').string
2330
        author = soup.find("span", class_="post-author").find("a").string
2331
        date_str = soup.find("span", class_="post-date").string
2332
        day = string_to_date(date_str, "%B %d, %Y")
2333
        imgs = soup.find("div", id="comic").find_all("img")
2334
        alt = imgs[0]['alt']
2335
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2336
        return {
2337
            'img': [i['src'] for i in imgs],
2338
            'title': title,
2339
            'alt': alt,
2340
            'author': author,
2341
            'day': day.day,
2342
            'month': day.month,
2343
            'year': day.year
2344
        }
2345
2346 View Code Duplication
2347
class EveryDayBlues(GenericNavigableComic):
2348
    """Class to retrieve EveryDayBlues Comics."""
2349
    name = "blues"
2350
    long_name = "Every Day Blues"
2351
    url = "http://everydayblues.net"
2352
    get_first_comic_link = get_a_navi_navifirst
2353
    get_navi_link = get_link_rel_next
2354
2355
    @classmethod
2356
    def get_comic_info(cls, soup, link):
2357
        """Get information about a particular comics."""
2358
        title = soup.find("h2", class_="post-title").string
2359
        author = soup.find("span", class_="post-author").find("a").string
2360
        date_str = soup.find("span", class_="post-date").string
2361
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2362
        imgs = soup.find("div", id="comic").find_all("img")
2363
        assert all(i['alt'] == i['title'] == title for i in imgs)
2364
        assert len(imgs) <= 1
2365
        return {
2366
            'img': [i['src'] for i in imgs],
2367
            'title': title,
2368
            'author': author,
2369
            'day': day.day,
2370
            'month': day.month,
2371
            'year': day.year
2372
        }
2373
2374 View Code Duplication
2375
class BiterComics(GenericNavigableComic):
2376
    """Class to retrieve Biter Comics."""
2377
    name = "biter"
2378
    long_name = "Biter Comics"
2379
    url = "http://www.bitercomics.com"
2380
    get_first_comic_link = get_a_navi_navifirst
2381
    get_navi_link = get_link_rel_next
2382
2383
    @classmethod
2384
    def get_comic_info(cls, soup, link):
2385
        """Get information about a particular comics."""
2386
        title = soup.find("h1", class_="entry-title").string
2387
        author = soup.find("span", class_="author vcard").find("a").string
2388
        date_str = soup.find("span", class_="entry-date").string
2389
        day = string_to_date(date_str, "%B %d, %Y")
2390
        imgs = soup.find("div", id="comic").find_all("img")
2391
        assert all(i['alt'] == i['title'] for i in imgs)
2392
        assert len(imgs) == 1
2393
        alt = imgs[0]['alt']
2394
        return {
2395
            'img': [i['src'] for i in imgs],
2396
            'title': title,
2397
            'alt': alt,
2398
            'author': author,
2399
            'day': day.day,
2400
            'month': day.month,
2401
            'year': day.year
2402
        }
2403
2404
2405
class TheAwkwardYeti(GenericNavigableComic):
2406 View Code Duplication
    """Class to retrieve The Awkward Yeti comics."""
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2407
    # Also on http://www.gocomics.com/the-awkward-yeti
2408
    # Also on http://larstheyeti.tumblr.com
2409
    # Also on https://tapastic.com/series/TheAwkwardYeti
2410
    name = 'yeti'
2411
    long_name = 'The Awkward Yeti'
2412
    url = 'http://theawkwardyeti.com'
2413
    get_first_comic_link = get_a_navi_navifirst
2414
    get_navi_link = get_link_rel_next
2415
2416
    @classmethod
2417
    def get_comic_info(cls, soup, link):
2418
        """Get information about a particular comics."""
2419
        title = soup.find('h2', class_='post-title').string
2420
        date_str = soup.find("span", class_="post-date").string
2421
        day = string_to_date(date_str, "%B %d, %Y")
2422
        imgs = soup.find("div", id="comic").find_all("img")
2423
        assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
2424
        return {
2425
            'img': [i['src'] for i in imgs],
2426
            'title': title,
2427
            'day': day.day,
2428
            'month': day.month,
2429
            'year': day.year
2430
        }
2431
2432
2433
class PleasantThoughts(GenericNavigableComic):
2434
    """Class to retrieve Pleasant Thoughts comics."""
2435
    name = 'pleasant'
2436
    long_name = 'Pleasant Thoughts'
2437
    url = 'http://pleasant-thoughts.com'
2438
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2439
    get_navi_link = get_link_rel_next
2440
2441
    @classmethod
2442
    def get_comic_info(cls, soup, link):
2443
        """Get information about a particular comics."""
2444
        post = soup.find('div', class_='post-content')
2445
        title = post.find('h2', class_='post-title').string
2446
        imgs = post.find("div", class_="entry").find_all("img")
2447
        return {
2448
            'title': title,
2449
            'img': [i['src'] for i in imgs],
2450
        }
2451
2452
2453
class MisterAndMe(GenericNavigableComic):
2454
    """Class to retrieve Mister & Me Comics."""
2455
    # Also on http://www.gocomics.com/mister-and-me
2456
    # Also on https://tapastic.com/series/Mister-and-Me
2457
    name = 'mister'
2458
    long_name = 'Mister & Me'
2459
    url = 'http://www.mister-and-me.com'
2460
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2461
    get_navi_link = get_link_rel_next
2462
2463
    @classmethod
2464
    def get_comic_info(cls, soup, link):
2465
        """Get information about a particular comics."""
2466
        title = soup.find('h2', class_='post-title').string
2467
        author = soup.find("span", class_="post-author").find("a").string
2468
        date_str = soup.find("span", class_="post-date").string
2469
        day = string_to_date(date_str, "%B %d, %Y")
2470
        imgs = soup.find("div", id="comic").find_all("img")
2471
        assert all(i['alt'] == i['title'] for i in imgs)
2472
        assert len(imgs) <= 1
2473
        alt = imgs[0]['alt'] if imgs else ""
2474
        return {
2475
            'img': [i['src'] for i in imgs],
2476
            'title': title,
2477
            'alt': alt,
2478
            'author': author,
2479
            'day': day.day,
2480
            'month': day.month,
2481
            'year': day.year
2482
        }
2483
2484 View Code Duplication
2485
class LastPlaceComics(GenericNavigableComic):
2486
    """Class to retrieve Last Place Comics."""
2487
    name = 'lastplace'
2488
    long_name = 'LastPlaceComics'
2489
    url = "http://lastplacecomics.com"
2490
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2491
    get_navi_link = get_link_rel_next
2492
2493
    @classmethod
2494
    def get_comic_info(cls, soup, link):
2495
        """Get information about a particular comics."""
2496
        title = soup.find('h2', class_='post-title').string
2497
        author = soup.find("span", class_="post-author").find("a").string
2498
        date_str = soup.find("span", class_="post-date").string
2499
        day = string_to_date(date_str, "%B %d, %Y")
2500
        imgs = soup.find("div", id="comic").find_all("img")
2501
        assert all(i['alt'] == i['title'] for i in imgs)
2502
        assert len(imgs) <= 1
2503
        alt = imgs[0]['alt'] if imgs else ""
2504
        return {
2505
            'img': [i['src'] for i in imgs],
2506
            'title': title,
2507
            'alt': alt,
2508
            'author': author,
2509
            'day': day.day,
2510
            'month': day.month,
2511
            'year': day.year
2512
        }
2513
2514
2515
class TalesOfAbsurdity(GenericNavigableComic):
2516
    """Class to retrieve Tales Of Absurdity comics."""
2517
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
2518
    # Also on http://talesofabsurdity.tumblr.com
2519
    name = 'absurdity'
2520
    long_name = 'Tales of Absurdity'
2521
    url = 'http://talesofabsurdity.com'
2522
    get_first_comic_link = get_a_navi_navifirst
2523
    get_navi_link = get_a_navi_comicnavnext_navinext
2524
2525
    @classmethod
2526
    def get_comic_info(cls, soup, link):
2527
        """Get information about a particular comics."""
2528
        title = soup.find('h2', class_='post-title').string
2529
        author = soup.find("span", class_="post-author").find("a").string
2530
        date_str = soup.find("span", class_="post-date").string
2531
        day = string_to_date(date_str, "%B %d, %Y")
2532
        imgs = soup.find("div", id="comic").find_all("img")
2533
        assert all(i['alt'] == i['title'] for i in imgs)
2534
        alt = imgs[0]['alt'] if imgs else ""
2535
        return {
2536
            'img': [i['src'] for i in imgs],
2537
            'title': title,
2538
            'alt': alt,
2539
            'author': author,
2540
            'day': day.day,
2541
            'month': day.month,
2542
            'year': day.year
2543
        }
2544
2545 View Code Duplication
2546
class EndlessOrigami(GenericNavigableComic):
2547
    """Class to retrieve Endless Origami Comics."""
2548
    name = "origami"
2549
    long_name = "Endless Origami"
2550
    url = "http://endlessorigami.com"
2551
    get_first_comic_link = get_a_navi_navifirst
2552
    get_navi_link = get_link_rel_next
2553
2554
    @classmethod
2555
    def get_comic_info(cls, soup, link):
2556
        """Get information about a particular comics."""
2557
        title = soup.find('h2', class_='post-title').string
2558
        author = soup.find("span", class_="post-author").find("a").string
2559
        date_str = soup.find("span", class_="post-date").string
2560
        day = string_to_date(date_str, "%B %d, %Y")
2561
        imgs = soup.find("div", id="comic").find_all("img")
2562
        assert all(i['alt'] == i['title'] for i in imgs)
2563
        alt = imgs[0]['alt'] if imgs else ""
2564
        return {
2565
            'img': [i['src'] for i in imgs],
2566
            'title': title,
2567
            'alt': alt,
2568
            'author': author,
2569
            'day': day.day,
2570
            'month': day.month,
2571
            'year': day.year
2572
        }
2573
2574
2575
class PlanC(GenericNavigableComic):
2576
    """Class to retrieve Plan C comics."""
2577
    name = 'planc'
2578
    long_name = 'Plan C'
2579
    url = 'http://www.plancomic.com'
2580
    get_first_comic_link = get_a_navi_navifirst
2581
    get_navi_link = get_a_navi_comicnavnext_navinext
2582
2583
    @classmethod
2584
    def get_comic_info(cls, soup, link):
2585
        """Get information about a particular comics."""
2586
        title = soup.find('h2', class_='post-title').string
2587
        date_str = soup.find("span", class_="post-date").string
2588
        day = string_to_date(date_str, "%B %d, %Y")
2589
        imgs = soup.find('div', id='comic').find_all('img')
2590
        return {
2591
            'title': title,
2592
            'img': [i['src'] for i in imgs],
2593
            'month': day.month,
2594
            'year': day.year,
2595
            'day': day.day,
2596
        }
2597
2598
2599
class BuniComic(GenericNavigableComic):
2600 View Code Duplication
    """Class to retrieve Buni Comics."""
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2601
    name = 'buni'
2602
    long_name = 'BuniComics'
2603
    url = 'http://www.bunicomic.com'
2604
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2605
    get_navi_link = get_link_rel_next
2606
2607
    @classmethod
2608
    def get_comic_info(cls, soup, link):
2609
        """Get information about a particular comics."""
2610
        imgs = soup.find('div', id='comic').find_all('img')
2611
        assert all(i['alt'] == i['title'] for i in imgs)
2612
        assert len(imgs) == 1
2613
        return {
2614
            'img': [i['src'] for i in imgs],
2615
            'title': imgs[0]['title'],
2616
        }
2617
2618
2619
class GenericCommitStrip(GenericNavigableComic):
2620
    """Generic class to retrieve Commit Strips in different languages."""
2621
    get_navi_link = get_a_rel_next
2622
    get_first_comic_link = simulate_first_link
2623
    first_url = NotImplemented
2624
2625
    @classmethod
2626
    def get_comic_info(cls, soup, link):
2627
        """Get information about a particular comics."""
2628
        desc = soup.find('meta', property='og:description')['content']
2629
        title = soup.find('meta', property='og:title')['content']
2630
        imgs = soup.find('div', class_='entry-content').find_all('img')
2631
        title2 = ' '.join(i.get('title', '') for i in imgs)
2632
        return {
2633
            'title': title,
2634
            'title2': title2,
2635
            'description': desc,
2636
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2637
        }
2638
2639
2640
class CommitStripFr(GenericCommitStrip):
2641
    """Class to retrieve Commit Strips in French."""
2642
    name = 'commit_fr'
2643
    long_name = 'Commit Strip (Fr)'
2644
    url = 'http://www.commitstrip.com/fr'
2645
    first_url = 'http://www.commitstrip.com/fr/2012/02/22/interview/'
2646
2647
2648
class CommitStripEn(GenericCommitStrip):
2649
    """Class to retrieve Commit Strips in English."""
2650 View Code Duplication
    name = 'commit_en'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2651
    long_name = 'Commit Strip (En)'
2652
    url = 'http://www.commitstrip.com/en'
2653
    first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'
2654
2655
2656
class GenericBoumerie(GenericNavigableComic):
2657
    """Generic class to retrieve Boumeries comics in different languages."""
2658
    get_first_comic_link = get_a_navi_navifirst
2659
    get_navi_link = get_link_rel_next
2660
    date_format = NotImplemented
2661
    lang = NotImplemented
2662
2663
    @classmethod
2664
    def get_comic_info(cls, soup, link):
2665
        """Get information about a particular comics."""
2666
        title = soup.find('h2', class_='post-title').string
2667
        short_url = soup.find('link', rel='shortlink')['href']
2668
        author = soup.find("span", class_="post-author").find("a").string
2669
        date_str = soup.find('span', class_='post-date').string
2670
        day = string_to_date(date_str, cls.date_format, cls.lang)
2671
        imgs = soup.find('div', id='comic').find_all('img')
2672
        assert all(i['alt'] == i['title'] for i in imgs)
2673
        return {
2674
            'short_url': short_url,
2675
            'img': [i['src'] for i in imgs],
2676
            'title': title,
2677
            'author': author,
2678
            'month': day.month,
2679
            'year': day.year,
2680
            'day': day.day,
2681
        }
2682
2683
2684
class BoumerieEn(GenericBoumerie):
2685
    """Class to retrieve Boumeries comics in English."""
2686
    name = 'boumeries_en'
2687
    long_name = 'Boumeries (En)'
2688
    url = 'http://comics.boumerie.com'
2689
    date_format = "%B %d, %Y"
2690
    lang = 'en_GB.UTF-8'
2691
2692
2693
class BoumerieFr(GenericBoumerie):
2694
    """Class to retrieve Boumeries comics in French."""
2695
    name = 'boumeries_fr'
2696
    long_name = 'Boumeries (Fr)'
2697
    url = 'http://bd.boumerie.com'
2698
    date_format = "%A, %d %B %Y"
2699
    lang = "fr_FR.utf8"
2700
2701
2702
class UnearthedComics(GenericNavigableComic):
2703 View Code Duplication
    """Class to retrieve Unearthed comics."""
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2704
    # Also on http://tapastic.com/series/UnearthedComics
2705
    # Also on http://unearthedcomics.tumblr.com
2706
    name = 'unearthed'
2707
    long_name = 'Unearthed Comics'
2708
    url = 'http://unearthedcomics.com'
2709
    get_navi_link = get_link_rel_next
2710
    get_first_comic_link = simulate_first_link
2711
    first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'
2712
2713
    @classmethod
2714
    def get_comic_info(cls, soup, link):
2715
        """Get information about a particular comics."""
2716
        short_url = soup.find('link', rel='shortlink')['href']
2717
        title_elt = soup.find('h1') or soup.find('h2')
2718
        title = title_elt.string if title_elt else ""
2719
        desc = soup.find('meta', property='og:description')
2720
        date_str = soup.find('time', class_='published updated hidden')['datetime']
2721
        day = string_to_date(date_str, "%Y-%m-%d")
2722
        post = soup.find('div', class_="entry content entry-content type-portfolio")
2723
        imgs = post.find_all('img')
2724
        return {
2725
            'title': title,
2726
            'description': desc,
2727
            'url2': short_url,
2728
            'img': [i['src'] for i in imgs],
2729
            'month': day.month,
2730
            'year': day.year,
2731
            'day': day.day,
2732
        }
2733
2734 View Code Duplication
2735
class Optipess(GenericNavigableComic):
2736
    """Class to retrieve Optipess comics."""
2737
    name = 'optipess'
2738
    long_name = 'Optipess'
2739
    url = 'http://www.optipess.com'
2740
    get_first_comic_link = get_a_navi_navifirst
2741
    get_navi_link = get_link_rel_next
2742
2743
    @classmethod
2744
    def get_comic_info(cls, soup, link):
2745
        """Get information about a particular comics."""
2746
        title = soup.find('h2', class_='post-title').string
2747
        author = soup.find("span", class_="post-author").find("a").string
2748
        comic = soup.find('div', id='comic')
2749
        imgs = comic.find_all('img') if comic else []
2750
        alt = imgs[0]['title'] if imgs else ""
2751
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2752
        date_str = soup.find('span', class_='post-date').string
2753
        day = string_to_date(date_str, "%B %d, %Y")
2754
        return {
2755
            'title': title,
2756
            'alt': alt,
2757
            'author': author,
2758
            'img': [i['src'] for i in imgs],
2759
            'month': day.month,
2760
            'year': day.year,
2761
            'day': day.day,
2762
        }
2763
2764
2765
class PainTrainComic(GenericNavigableComic):
2766
    """Class to retrieve Pain Train Comics."""
2767
    name = 'paintrain'
2768
    long_name = 'Pain Train Comics'
2769
    url = 'http://paintraincomic.com'
2770
    get_first_comic_link = get_a_navi_navifirst
2771
    get_navi_link = get_link_rel_next
2772
2773
    @classmethod
2774
    def get_comic_info(cls, soup, link):
2775
        """Get information about a particular comics."""
2776
        title = soup.find('h2', class_='post-title').string
2777
        short_url = soup.find('link', rel='shortlink')['href']
2778
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2779
        num = int(short_url_re.match(short_url).groups()[0])
2780
        imgs = soup.find('div', id='comic').find_all('img')
2781
        alt = imgs[0]['title']
2782
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2783
        date_str = soup.find('span', class_='post-date').string
2784
        day = string_to_date(date_str, "%d/%m/%Y")
2785
        return {
2786
            'short_url': short_url,
2787
            'num': num,
2788
            'img': [i['src'] for i in imgs],
2789
            'month': day.month,
2790
            'year': day.year,
2791
            'day': day.day,
2792
            'alt': alt,
2793
            'title': title,
2794
        }
2795
2796
2797
class MoonBeard(GenericNavigableComic):
2798
    """Class to retrieve MoonBeard comics."""
2799
    # Also on http://blog.squiresjam.es/moonbeard
2800
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
2801
    name = 'moonbeard'
2802
    long_name = 'Moon Beard'
2803
    url = 'http://moonbeard.com'
2804
    get_first_comic_link = get_a_navi_navifirst
2805
    get_navi_link = get_a_navi_navinext
2806
2807
    @classmethod
2808
    def get_comic_info(cls, soup, link):
2809
        """Get information about a particular comics."""
2810
        title = soup.find('h2', class_='post-title').string
2811
        short_url = soup.find('link', rel='shortlink')['href']
2812
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2813
        num = int(short_url_re.match(short_url).groups()[0])
2814
        imgs = soup.find('div', id='comic').find_all('img')
2815
        alt = imgs[0]['title']
2816
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2817
        date_str = soup.find('span', class_='post-date').string
2818
        day = string_to_date(date_str, "%B %d, %Y")
2819
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2820
        author = soup.find('span', class_='post-author').string
2821
        return {
2822
            'short_url': short_url,
2823
            'num': num,
2824
            'img': [i['src'] for i in imgs],
2825
            'month': day.month,
2826
            'year': day.year,
2827
            'day': day.day,
2828
            'title': title,
2829
            'tags': tags,
2830
            'alt': alt,
2831
            'author': author,
2832
        }
2833
2834
2835
class AHamADay(GenericNavigableComic):
2836
    """Class to retrieve class A Ham A Day comics."""
2837
    name = 'ham'
2838
    long_name = 'A Ham A Day'
2839
    url = 'http://www.ahammaday.com'
2840
    get_url_from_link = join_cls_url_to_href
2841
    get_first_comic_link = simulate_first_link
2842
    first_url = 'http://www.ahammaday.com/today/3/6/french'
2843
2844
    @classmethod
2845
    def get_navi_link(cls, last_soup, next_):
2846
        """Get link to next or previous comic."""
2847
        # prev is next / next is prev
2848
        return last_soup.find('li', class_='previous' if next_ else 'next').find('a')
2849
2850
    @classmethod
2851
    def get_comic_info(cls, soup, link):
2852
        """Get information about a particular comics."""
2853
        date_str = soup.find('time', class_='published')['datetime']
2854
        day = string_to_date(date_str, "%Y-%m-%d")
2855
        author = soup.find('span', class_='blog-author').find('a').string
2856
        title = soup.find('meta', property='og:title')['content']
2857
        imgs = soup.find_all('meta', itemprop='image')
2858
        return {
2859
            'img': [i['content'] for i in imgs],
2860
            'title': title,
2861
            'author': author,
2862
            'day': day.day,
2863
            'month': day.month,
2864
            'year': day.year,
2865
        }
2866
2867
2868
class LittleLifeLines(GenericNavigableComic):
2869
    """Class to retrieve Little Life Lines comics."""
2870
    name = 'life'
2871
    long_name = 'Little Life Lines'
2872
    url = 'http://www.littlelifelines.com'
2873
    get_url_from_link = join_cls_url_to_href
2874
    get_first_comic_link = simulate_first_link
2875
    first_url = 'http://www.littlelifelines.com/comics/well-done'
2876
2877
    @classmethod
2878
    def get_navi_link(cls, last_soup, next_):
2879
        """Get link to next or previous comic."""
2880
        # prev is next / next is prev
2881
        li = last_soup.find('li', class_='prev' if next_ else 'next')
2882
        return li.find('a') if li else None
2883
2884
    @classmethod
2885
    def get_comic_info(cls, soup, link):
2886
        """Get information about a particular comics."""
2887
        title = soup.find('meta', property='og:title')['content']
2888
        desc = soup.find('meta', property='og:description')['content']
2889
        date_str = soup.find('time', class_='published')['datetime']
2890
        day = string_to_date(date_str, "%Y-%m-%d")
2891
        author = soup.find('a', rel='author').string
2892
        div_content = soup.find('div', class_="body entry-content")
2893 View Code Duplication
        imgs = div_content.find_all('img')
2894
        imgs = [i for i in imgs if i.get('src') is not None]
2895
        alt = imgs[0]['alt']
2896
        return {
2897
            'title': title,
2898
            'alt': alt,
2899
            'description': desc,
2900
            'author': author,
2901
            'day': day.day,
2902
            'month': day.month,
2903
            'year': day.year,
2904
            'img': [i['src'] for i in imgs],
2905
        }
2906
2907
2908
class GenericWordPressInkblot(GenericNavigableComic):
2909
    """Generic class to retrieve comics using WordPress with Inkblot."""
2910
    get_navi_link = get_link_rel_next
2911
2912
    @classmethod
2913
    def get_first_comic_link(cls):
2914
        """Get link to first comics."""
2915
        return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')
2916
2917
    @classmethod
2918
    def get_comic_info(cls, soup, link):
2919
        """Get information about a particular comics."""
2920
        title = soup.find('meta', property='og:title')['content']
2921
        imgs = soup.find('div', class_='webcomic-image').find_all('img')
2922
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2923
        day = string_to_date(date_str, "%Y-%m-%d")
2924
        return {
2925
            'title': title,
2926
            'day': day.day,
2927
            'month': day.month,
2928
            'year': day.year,
2929
            'img': [i['src'] for i in imgs],
2930
        }
2931
2932
2933
class EverythingsStupid(GenericWordPressInkblot):
2934
    """Class to retrieve Everything's stupid Comics."""
2935
    # Also on http://tapastic.com/series/EverythingsStupid
2936
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
2937
    # Also on http://everythingsstupidcomics.tumblr.com
2938
    name = 'stupid'
2939
    long_name = "Everything's Stupid"
2940
    url = 'http://everythingsstupid.net'
2941
2942 View Code Duplication
2943
class TheIsmComics(GenericWordPressInkblot):
2944
    """Class to retrieve The Ism Comics."""
2945
    # Also on https://tapastic.com/series/TheIsm (?)
2946
    name = 'theism'
2947
    long_name = "The Ism"
2948
    url = 'http://www.theism-comics.com'
2949
2950
2951
class WoodenPlankStudios(GenericWordPressInkblot):
2952
    """Class to retrieve Wooden Plank Studios comics."""
2953
    name = 'woodenplank'
2954
    long_name = 'Wooden Plank Studios'
2955
    url = 'http://woodenplankstudios.com'
2956
2957
2958
class ElectricBunnyComic(GenericNavigableComic):
2959
    """Class to retrieve Electric Bunny Comics."""
2960
    # Also on http://electricbunnycomics.tumblr.com
2961
    name = 'bunny'
2962
    long_name = 'Electric Bunny Comic'
2963
    url = 'http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell'
2964
    get_url_from_link = join_cls_url_to_href
2965
2966
    @classmethod
2967
    def get_first_comic_link(cls):
2968
        """Get link to first comics."""
2969
        return get_soup_at_url(cls.url).find('img', alt='First').parent
2970
2971
    @classmethod
2972
    def get_navi_link(cls, last_soup, next_):
2973
        """Get link to next or previous comic."""
2974
        img = last_soup.find('img', alt='Next' if next_ else 'Back')
2975
        return img.parent if img else None
2976
2977
    @classmethod
2978
    def get_comic_info(cls, soup, link):
2979
        """Get information about a particular comics."""
2980
        title = soup.find('meta', property='og:title')['content']
2981
        imgs = soup.find_all('meta', property='og:image')
2982
        return {
2983
            'title': title,
2984
            'img': [i['content'] for i in imgs],
2985
        }
2986
2987
2988
class SheldonComics(GenericNavigableComic):
2989
    """Class to retrieve Sheldon comics."""
2990
    # Also on http://www.gocomics.com/sheldon
2991
    name = 'sheldon'
2992
    long_name = 'Sheldon Comics'
2993
    url = 'http://www.sheldoncomics.com'
2994
2995
    @classmethod
2996
    def get_first_comic_link(cls):
2997
        """Get link to first comics."""
2998
        return get_soup_at_url(cls.url).find("a", id="nav-first")
2999
3000
    @classmethod
3001
    def get_navi_link(cls, last_soup, next_):
3002
        """Get link to next or previous comic."""
3003
        for link in last_soup.find_all("a", id="nav-next" if next_ else "nav-prev"):
3004
            if link['href'] != 'http://www.sheldoncomics.com':
3005
                return link
3006
        return None
3007
3008
    @classmethod
3009
    def get_comic_info(cls, soup, link):
3010
        """Get information about a particular comics."""
3011
        imgs = soup.find("div", id="comic-foot").find_all("img")
3012
        assert all(i['alt'] == i['title'] for i in imgs)
3013
        assert len(imgs) == 1
3014
        title = imgs[0]['title']
3015
        return {
3016
            'title': title,
3017
            'img': [i['src'] for i in imgs],
3018
        }
3019
3020
3021
class CubeDrone(GenericNavigableComic):
3022
    """Class to retrieve Cube Drone comics."""
3023
    name = 'cubedrone'
3024
    long_name = 'Cube Drone'
3025
    url = 'http://cube-drone.com/comics'
3026
    get_url_from_link = join_cls_url_to_href
3027
3028
    @classmethod
3029
    def get_first_comic_link(cls):
3030
        """Get link to first comics."""
3031
        return get_soup_at_url(cls.url).find('span', class_='glyphicon glyphicon-backward').parent
3032
3033
    @classmethod
3034
    def get_navi_link(cls, last_soup, next_):
3035
        """Get link to next or previous comic."""
3036
        class_ = 'glyphicon glyphicon-chevron-' + ('right' if next_ else 'left')
3037
        return last_soup.find('span', class_=class_).parent
3038
3039
    @classmethod
3040
    def get_comic_info(cls, soup, link):
3041
        """Get information about a particular comics."""
3042
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
3043
        url2 = soup.find('meta', attrs={'name': 'twitter:url'})['content']
3044
        # date_str = soup.find('h2', class_='comic_title').find('small').string
3045
        # day = string_to_date(date_str, "%B %d, %Y, %I:%M %p")
3046
        imgs = soup.find_all('img', class_='comic img-responsive')
3047
        title2 = imgs[0]['title']
3048
        alt = imgs[0]['alt']
3049
        return {
3050
            'url2': url2,
3051
            'title': title,
3052
            'title2': title2,
3053
            'alt': alt,
3054
            'img': [i['src'] for i in imgs],
3055
        }
3056
3057
3058
class MakeItStoopid(GenericNavigableComic):
3059
    """Class to retrieve Make It Stoopid Comics."""
3060
    name = 'stoopid'
3061
    long_name = 'Make it stoopid'
3062
    url = 'http://makeitstoopid.com/comic.php'
3063
3064
    @classmethod
3065
    def get_nav(cls, soup):
3066
        """Get the navigation elements from soup object."""
3067
        cnav = soup.find_all(class_='cnav')
3068
        nav1, nav2 = cnav[:5], cnav[5:]
3069
        assert nav1 == nav2
3070
        # begin, prev, archive, next_, end = nav1
3071
        return [None if i.get('href') is None else i for i in nav1]
3072
3073
    @classmethod
3074
    def get_first_comic_link(cls):
3075
        """Get link to first comics."""
3076
        return cls.get_nav(get_soup_at_url(cls.url))[0]
3077
3078
    @classmethod
3079
    def get_navi_link(cls, last_soup, next_):
3080
        """Get link to next or previous comic."""
3081
        return cls.get_nav(last_soup)[3 if next_ else 1]
3082
3083
    @classmethod
3084
    def get_comic_info(cls, soup, link):
3085
        """Get information about a particular comics."""
3086
        title = link['title']
3087
        imgs = soup.find_all('img', id='comicimg')
3088
        return {
3089
            'title': title,
3090
            'img': [i['src'] for i in imgs],
3091
        }
3092
3093
3094
class GeekAndPoke(GenericNavigableComic):
3095
    """Class to retrieve Geek And Poke comics."""
3096
    name = 'geek'
3097
    long_name = 'Geek And Poke'
3098
    url = 'http://geek-and-poke.com'
3099
    get_url_from_link = join_cls_url_to_href
3100
    get_first_comic_link = simulate_first_link
3101
    first_url = 'http://geek-and-poke.com/geekandpoke/2006/8/27/a-new-place-for-a-not-so-old-blog.html'
3102
3103
    @classmethod
3104
    def get_navi_link(cls, last_soup, next_):
3105
        """Get link to next or previous comic."""
3106
        return last_soup.find('a', class_='prev-item' if next_ else 'next-item')
3107
3108
    @classmethod
3109
    def get_comic_info(cls, soup, link):
3110
        """Get information about a particular comics."""
3111
        title = soup.find('meta', property='og:title')['content']
3112
        desc = soup.find('meta', property='og:description')['content']
3113
        date_str = soup.find('time', class_='published')['datetime']
3114
        day = string_to_date(date_str, "%Y-%m-%d")
3115
        author = soup.find('a', rel='author').string
3116
        div_content = (soup.find('div', class_="body entry-content") or
3117
                       soup.find('div', class_="special-content"))
3118
        imgs = div_content.find_all('img')
3119
        imgs = [i for i in imgs if i.get('src') is not None]
3120
        assert all('title' not in i or i['alt'] == i['title'] for i in imgs)
3121
        alt = imgs[0].get('alt', "") if imgs else []
3122
        return {
3123
            'title': title,
3124
            'alt': alt,
3125
            'description': desc,
3126
            'author': author,
3127
            'day': day.day,
3128
            'month': day.month,
3129
            'year': day.year,
3130
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3131
        }
3132
3133
3134
class GenericTumblrV1(GenericComic):
3135
    """Generic class to retrieve comics from Tumblr using the V1 API."""
3136
3137
    @classmethod
3138
    def get_next_comic(cls, last_comic):
3139
        """Generic implementation of get_next_comic for Tumblr comics."""
3140
        for p in cls.get_posts(last_comic):
3141
            comic = cls.get_comic_info(p)
3142
            if comic is not None:
3143
                yield comic
3144
3145
    @classmethod
3146
    def get_url_from_post(cls, post):
3147
        return post['url']
3148
3149
    @classmethod
3150
    def get_api_url(cls):
3151
        return urljoin_wrapper(cls.url, '/api/read/')
3152
3153
    @classmethod
3154
    def get_comic_info(cls, post):
3155
        """Get information about a particular comics."""
3156
        # print(post)
3157
        type_ = post['type']
3158
        if type_ != 'photo':
3159
            # print("Type is %s" % type_)
3160
            return None
3161
        tumblr_id = int(post['id'])
3162
        api_url = cls.get_api_url() + '?id=%d' % (tumblr_id)
3163
        day = datetime.datetime.fromtimestamp(int(post['unix-timestamp'])).date()
3164
        caption = post.find('photo-caption')
3165
        title = caption.string if caption else ""
3166
        tags = ' '.join(t.string for t in post.find_all('tag'))
3167
        # Photos may appear in 'photo' tags and/or straight in the post
3168
        photo_tags = post.find_all('photo')
3169
        if not photo_tags:
3170
            photo_tags = [post]
3171
        # Images are in multiple resolutions - taking the first one
3172
        imgs = [photo.find('photo-url') for photo in photo_tags]
3173
        return {
3174
            'url': cls.get_url_from_post(post),
3175
            'url2': post['url-with-slug'],
3176
            'day': day.day,
3177
            'month': day.month,
3178
            'year': day.year,
3179
            'title': title,
3180
            'tags': tags,
3181
            'img': [i.string for i in imgs],
3182
            'tumblr-id': tumblr_id,
3183
            'api_url': api_url,  # for debug purposes
3184
        }
3185
3186
    @classmethod
3187
    def get_posts(cls, last_comic, nb_post_per_call=10):
3188
        """Get posts using API. nb_post_per_call is max 50.
3189
3190
        Posts are retrieved from newer to older as per the tumblr v1 api
3191
        but are returned in chronological order."""
3192
        waiting_for_url = last_comic['url'] if last_comic else None
3193
        posts_acc = []
3194
        if last_comic is not None:
3195
            # Sometimes, tumblr posts are deleted. We previous post is deleted, we
3196
            # might end up spending a lot of time looking for something that
3197
            # doesn't exist. Failing early and clearly might be a better option.
3198
            last_api_url = last_comic['api_url']
3199
            try:
3200
                get_soup_at_url(last_api_url)
3201
            except urllib.error.HTTPError:
3202
                try:
3203
                    get_soup_at_url(cls.url)
3204
                except urllib.error.HTTPError:
3205
                    print("Did not find previous post nor main url %s" % cls.url)
3206
                else:
3207
                    print("Did not find previous post %s : it might have been deleted" % last_api_url)
3208
                return reversed(posts_acc)
3209
        api_url = cls.get_api_url()
3210
        posts = get_soup_at_url(api_url).find('posts')
3211
        start, total = int(posts['start']), int(posts['total'])
3212
        assert start == 0
3213
        for starting_num in range(0, total, nb_post_per_call):
3214
            api_url2 = api_url + '?start=%d&num=%d' % (starting_num, nb_post_per_call)
3215
            # print(api_url2)
3216
            posts2 = get_soup_at_url(api_url2).find('posts')
3217
            start2, total2 = int(posts2['start']), int(posts2['total'])
3218
            assert starting_num == start2, "%d != %d" % (starting_num, start2)
3219
            # This may happen and should be handled in the future
3220
            assert total == total2, "%d != %d" % (total, total2)
3221
            for p in posts2.find_all('post'):
3222
                if waiting_for_url and waiting_for_url == cls.get_url_from_post(p):
3223
                    return reversed(posts_acc)
3224
                posts_acc.append(p)
3225
        if waiting_for_url is None:
3226
            return reversed(posts_acc)
3227
        print("Did not find %s : there might be a problem" % waiting_for_url)
3228
        return []
3229
3230
3231
class IrwinCardozo(GenericTumblrV1):
3232
    """Class to retrieve Irwin Cardozo Comics."""
3233
    name = 'irwinc'
3234
    long_name = 'Irwin Cardozo'
3235
    url = 'http://irwincardozocomics.tumblr.com'
3236
3237
3238
class AccordingToDevin(GenericTumblrV1):
3239
    """Class to retrieve According To Devin comics."""
3240
    name = 'devin'
3241
    long_name = 'According To Devin'
3242
    url = 'http://accordingtodevin.tumblr.com'
3243
3244
3245
class ItsTheTieTumblr(GenericTumblrV1):
3246
    """Class to retrieve It's the tie comics."""
3247
    # Also on http://itsthetie.com
3248
    # Also on https://tapastic.com/series/itsthetie
3249
    name = 'tie-tumblr'
3250
    long_name = "It's the tie (from Tumblr)"
3251
    url = "http://itsthetie.tumblr.com"
3252
3253
3254
class OctopunsTumblr(GenericTumblrV1):
3255
    """Class to retrieve Octopuns comics."""
3256
    # Also on http://www.octopuns.net
3257
    name = 'octopuns-tumblr'
3258
    long_name = 'Octopuns (from Tumblr)'
3259
    url = 'http://octopuns.tumblr.com'
3260
3261
3262
class PicturesInBoxesTumblr(GenericTumblrV1):
3263
    """Class to retrieve Pictures In Boxes comics."""
3264
    # Also on http://www.picturesinboxes.com
3265
    name = 'picturesinboxes-tumblr'
3266
    long_name = 'Pictures in Boxes (from Tumblr)'
3267
    url = 'http://picturesinboxescomic.tumblr.com'
3268
3269
3270
class TubeyToonsTumblr(GenericTumblrV1):
3271
    """Class to retrieve TubeyToons comics."""
3272
    # Also on http://tapastic.com/series/Tubey-Toons
3273
    # Also on http://tubeytoons.com
3274
    name = 'tubeytoons-tumblr'
3275
    long_name = 'Tubey Toons (from Tumblr)'
3276
    url = 'http://tubeytoons.tumblr.com'
3277
3278
3279
class UnearthedComicsTumblr(GenericTumblrV1):
3280
    """Class to retrieve Unearthed comics."""
3281
    # Also on http://tapastic.com/series/UnearthedComics
3282
    # Also on http://unearthedcomics.com
3283
    name = 'unearthed-tumblr'
3284
    long_name = 'Unearthed Comics (from Tumblr)'
3285
    url = 'http://unearthedcomics.tumblr.com'
3286
3287
3288
class PieComic(GenericTumblrV1):
3289
    """Class to retrieve Pie Comic comics."""
3290
    name = 'pie'
3291
    long_name = 'Pie Comic'
3292
    url = "http://piecomic.tumblr.com"
3293
3294
3295
class MrEthanDiamond(GenericTumblrV1):
3296
    """Class to retrieve Mr Ethan Diamond comics."""
3297
    name = 'diamond'
3298
    long_name = 'Mr Ethan Diamond'
3299
    url = 'http://mrethandiamond.tumblr.com'
3300
3301
3302
class Flocci(GenericTumblrV1):
3303
    """Class to retrieve floccinaucinihilipilification comics."""
3304
    name = 'flocci'
3305
    long_name = 'floccinaucinihilipilification'
3306
    url = "http://floccinaucinihilipilificationa.tumblr.com"
3307
3308
3309
class UpAndOut(GenericTumblrV1):
3310
    """Class to retrieve Up & Out comics."""
3311
    # Also on http://tapastic.com/series/UP-and-OUT
3312
    name = 'upandout'
3313
    long_name = 'Up And Out (from Tumblr)'
3314
    url = 'http://upandoutcomic.tumblr.com'
3315
3316
3317
class Pundemonium(GenericTumblrV1):
3318
    """Class to retrieve Pundemonium comics."""
3319
    name = 'pundemonium'
3320
    long_name = 'Pundemonium'
3321
    url = 'http://monstika.tumblr.com'
3322
3323
3324
class PoorlyDrawnLinesTumblr(GenericTumblrV1):
3325
    """Class to retrieve Poorly Drawn Lines comics."""
3326
    # Also on http://poorlydrawnlines.com
3327
    name = 'poorlydrawn-tumblr'
3328
    long_name = 'Poorly Drawn Lines (from Tumblr)'
3329
    url = 'http://pdlcomics.tumblr.com'
3330
3331
3332
class PearShapedComics(GenericTumblrV1):
3333
    """Class to retrieve Pear Shaped Comics."""
3334
    name = 'pearshaped'
3335
    long_name = 'Pear-Shaped Comics'
3336
    url = 'http://pearshapedcomics.com'
3337
3338
3339
class PondScumComics(GenericTumblrV1):
3340
    """Class to retrieve Pond Scum Comics."""
3341
    name = 'pond'
3342
    long_name = 'Pond Scum'
3343
    url = 'http://pondscumcomic.tumblr.com'
3344
3345
3346
class MercworksTumblr(GenericTumblrV1):
3347
    """Class to retrieve Mercworks comics."""
3348
    # Also on http://mercworks.net
3349
    name = 'mercworks-tumblr'
3350
    long_name = 'Mercworks (from Tumblr)'
3351
    url = 'http://mercworks.tumblr.com'
3352
3353
3354
class OwlTurdTumblr(GenericTumblrV1):
3355
    """Class to retrieve Owl Turd comics."""
3356
    # Also on http://tapastic.com/series/Owl-Turd-Comix
3357
    name = 'owlturd-tumblr'
3358
    long_name = 'Owl Turd (from Tumblr)'
3359
    url = 'http://owlturd.com'
3360
3361
3362
class VectorBelly(GenericTumblrV1):
3363
    """Class to retrieve Vector Belly comics."""
3364
    # Also on http://vectorbelly.com
3365
    name = 'vector'
3366
    long_name = 'Vector Belly'
3367
    url = 'http://vectorbelly.tumblr.com'
3368
3369
3370
class GoneIntoRapture(GenericTumblrV1):
3371
    """Class to retrieve Gone Into Rapture comics."""
3372
    # Also on http://goneintorapture.tumblr.com
3373
    # Also on http://tapastic.com/series/Goneintorapture
3374
    name = 'rapture'
3375
    long_name = 'Gone Into Rapture'
3376
    url = 'http://www.goneintorapture.com'
3377
3378
3379
class TheOatmealTumblr(GenericTumblrV1):
3380
    """Class to retrieve The Oatmeal comics."""
3381
    # Also on http://theoatmeal.com
3382
    name = 'oatmeal-tumblr'
3383
    long_name = 'The Oatmeal (from Tumblr)'
3384
    url = 'http://oatmeal.tumblr.com'
3385
3386
3387
class HeckIfIKnowComicsTumblr(GenericTumblrV1):
3388
    """Class to retrieve Heck If I Know Comics."""
3389
    # Also on http://tapastic.com/series/Regular
3390
    name = 'heck-tumblr'
3391
    long_name = 'Heck if I Know comics (from Tumblr)'
3392
    url = 'http://heckifiknowcomics.com'
3393
3394
3395
class MyJetPack(GenericTumblrV1):
3396
    """Class to retrieve My Jet Pack comics."""
3397
    name = 'jetpack'
3398
    long_name = 'My Jet Pack'
3399
    url = 'http://myjetpack.tumblr.com'
3400
3401
3402
class CheerUpEmoKidTumblr(GenericTumblrV1):
3403
    """Class to retrieve CheerUpEmoKid comics."""
3404
    # Also on http://www.cheerupemokid.com
3405
    # Also on http://tapastic.com/series/CUEK
3406
    name = 'cuek-tumblr'
3407
    long_name = 'Cheer Up Emo Kid (from Tumblr)'
3408
    url = 'http://enzocomics.tumblr.com'
3409
3410
3411
class ForLackOfABetterComic(GenericTumblrV1):
3412
    """Class to retrieve For Lack Of A Better Comics."""
3413
    # Also on http://forlackofabettercomic.com
3414
    name = 'lack'
3415
    long_name = 'For Lack Of A Better Comic'
3416
    url = 'http://forlackofabettercomic.tumblr.com'
3417
3418
3419
class ZenPencilsTumblr(GenericTumblrV1):
3420
    """Class to retrieve ZenPencils comics."""
3421
    # Also on http://zenpencils.com
3422
    # Also on http://www.gocomics.com/zen-pencils
3423
    name = 'zenpencils-tumblr'
3424
    long_name = 'Zen Pencils (from Tumblr)'
3425
    url = 'http://zenpencils.tumblr.com'
3426
3427
3428
class ThreeWordPhraseTumblr(GenericTumblrV1):
3429
    """Class to retrieve Three Word Phrase comics."""
3430
    # Also on http://threewordphrase.com
3431
    name = 'threeword-tumblr'
3432
    long_name = 'Three Word Phrase (from Tumblr)'
3433
    url = 'http://www.threewordphrase.tumblr.com'
3434
3435
3436
class TimeTrabbleTumblr(GenericTumblrV1):
3437
    """Class to retrieve Time Trabble comics."""
3438
    # Also on http://timetrabble.com
3439
    name = 'timetrabble-tumblr'
3440
    long_name = 'Time Trabble (from Tumblr)'
3441
    url = 'http://timetrabble.tumblr.com'
3442
3443
3444
class SafelyEndangeredTumblr(GenericTumblrV1):
3445
    """Class to retrieve Safely Endangered comics."""
3446
    # Also on http://www.safelyendangered.com
3447
    name = 'endangered-tumblr'
3448
    long_name = 'Safely Endangered (from Tumblr)'
3449
    url = 'http://tumblr.safelyendangered.com'
3450
3451
3452
class MouseBearComedyTumblr(GenericTumblrV1):
3453
    """Class to retrieve Mouse Bear Comedy comics."""
3454
    # Also on http://www.mousebearcomedy.com
3455
    name = 'mousebear-tumblr'
3456
    long_name = 'Mouse Bear Comedy (from Tumblr)'
3457
    url = 'http://mousebearcomedy.tumblr.com'
3458
3459
3460
class BouletCorpTumblr(GenericTumblrV1):
3461
    """Class to retrieve BouletCorp comics."""
3462
    # Also on http://www.bouletcorp.com
3463
    name = 'boulet-tumblr'
3464
    long_name = 'Boulet Corp (from Tumblr)'
3465
    url = 'http://bouletcorp.tumblr.com'
3466
3467
3468
class TheAwkwardYetiTumblr(GenericTumblrV1):
3469
    """Class to retrieve The Awkward Yeti comics."""
3470
    # Also on http://www.gocomics.com/the-awkward-yeti
3471
    # Also on http://theawkwardyeti.com
3472
    # Also on https://tapastic.com/series/TheAwkwardYeti
3473
    name = 'yeti-tumblr'
3474
    long_name = 'The Awkward Yeti (from Tumblr)'
3475
    url = 'http://larstheyeti.tumblr.com'
3476
3477
3478
class NellucNhoj(GenericTumblrV1):
3479
    """Class to retrieve NellucNhoj comics."""
3480
    name = 'nhoj'
3481
    long_name = 'Nelluc Nhoj'
3482
    url = 'http://nellucnhoj.com'
3483
3484
3485
class DownTheUpwardSpiralTumblr(GenericTumblrV1):
3486
    """Class to retrieve Down The Upward Spiral comics."""
3487
    # Also on http://www.downtheupwardspiral.com
3488
    name = 'spiral-tumblr'
3489
    long_name = 'Down the Upward Spiral (from Tumblr)'
3490
    url = 'http://downtheupwardspiral.tumblr.com'
3491
3492
3493
class AsPerUsualTumblr(GenericTumblrV1):
3494
    """Class to retrieve As Per Usual comics."""
3495
    # Also on https://tapastic.com/series/AsPerUsual
3496
    name = 'usual-tumblr'
3497
    long_name = 'As Per Usual (from Tumblr)'
3498
    url = 'http://as-per-usual.tumblr.com'
3499
3500
3501
class OneOneOneOneComicTumblr(GenericTumblrV1):
3502
    """Class to retrieve 1111 Comics."""
3503
    # Also on http://www.1111comics.me
3504
    # Also on https://tapastic.com/series/1111-Comics
3505
    name = '1111-tumblr'
3506
    long_name = '1111 Comics (from Tumblr)'
3507
    url = 'http://comics1111.tumblr.com'
3508
3509
3510
class JhallComicsTumblr(GenericTumblrV1):
3511
    """Class to retrieve Jhall Comics."""
3512
    # Also on http://jhallcomics.com
3513
    name = 'jhall-tumblr'
3514
    long_name = 'Jhall Comics (from Tumblr)'
3515
    url = 'http://jhallcomics.tumblr.com'
3516
3517
3518
class BerkeleyMewsTumblr(GenericTumblrV1):
3519
    """Class to retrieve Berkeley Mews comics."""
3520
    # Also on http://www.gocomics.com/berkeley-mews
3521
    # Also on http://www.berkeleymews.com
3522
    name = 'berkeley-tumblr'
3523
    long_name = 'Berkeley Mews (from Tumblr)'
3524
    url = 'http://mews.tumblr.com'
3525
3526
3527
class JoanCornellaTumblr(GenericTumblrV1):
3528
    """Class to retrieve Joan Cornella comics."""
3529
    # Also on http://joancornella.net
3530
    name = 'cornella-tumblr'
3531
    long_name = 'Joan Cornella (from Tumblr)'
3532
    url = 'http://cornellajoan.tumblr.com'
3533
3534
3535
class RespawnComicTumblr(GenericTumblrV1):
3536
    """Class to retrieve Respawn Comic."""
3537
    # Also on http://respawncomic.com
3538
    name = 'respawn-tumblr'
3539
    long_name = 'Respawn Comic (from Tumblr)'
3540
    url = 'http://respawncomic.tumblr.com'
3541
3542
3543
class ChrisHallbeckTumblr(GenericTumblrV1):
3544
    """Class to retrieve Chris Hallbeck comics."""
3545
    # Also on https://tapastic.com/ChrisHallbeck
3546
    # Also on http://maximumble.com
3547
    # Also on http://minimumble.com
3548
    # Also on http://thebookofbiff.com
3549
    name = 'hallbeck-tumblr'
3550
    long_name = 'Chris Hallback (from Tumblr)'
3551
    url = 'http://chrishallbeck.tumblr.com'
3552
3553
3554
class ComicNuggets(GenericTumblrV1):
3555
    """Class to retrieve Comic Nuggets."""
3556
    name = 'nuggets'
3557
    long_name = 'Comic Nuggets'
3558
    url = 'http://comicnuggets.com'
3559
3560
3561
class PigeonGazetteTumblr(GenericTumblrV1):
3562
    """Class to retrieve The Pigeon Gazette comics."""
3563
    # Also on https://tapastic.com/series/The-Pigeon-Gazette
3564
    name = 'pigeon-tumblr'
3565
    long_name = 'The Pigeon Gazette (from Tumblr)'
3566
    url = 'http://thepigeongazette.tumblr.com'
3567
3568
3569
class CancerOwl(GenericTumblrV1):
3570
    """Class to retrieve Cancer Owl comics."""
3571
    # Also on http://cancerowl.com
3572
    name = 'cancerowl-tumblr'
3573
    long_name = 'Cancer Owl (from Tumblr)'
3574
    url = 'http://cancerowl.tumblr.com'
3575
3576
3577
class FowlLanguageTumblr(GenericTumblrV1):
3578
    """Class to retrieve Fowl Language comics."""
3579
    # Also on http://www.fowllanguagecomics.com
3580
    # Also on http://tapastic.com/series/Fowl-Language-Comics
3581
    # Also on http://www.gocomics.com/fowl-language
3582
    name = 'fowllanguage-tumblr'
3583
    long_name = 'Fowl Language Comics (from Tumblr)'
3584
    url = 'http://fowllanguagecomics.tumblr.com'
3585
3586
3587
class TheOdd1sOutTumblr(GenericTumblrV1):
3588
    """Class to retrieve The Odd 1s Out comics."""
3589
    # Also on http://theodd1sout.com
3590
    # Also on https://tapastic.com/series/Theodd1sout
3591
    name = 'theodd-tumblr'
3592
    long_name = 'The Odd 1s Out (from Tumblr)'
3593
    url = 'http://theodd1sout.tumblr.com'
3594
3595
3596
class TheUnderfoldTumblr(GenericTumblrV1):
3597
    """Class to retrieve The Underfold comics."""
3598
    # Also on http://theunderfold.com
3599
    name = 'underfold-tumblr'
3600
    long_name = 'The Underfold (from Tumblr)'
3601
    url = 'http://theunderfold.tumblr.com'
3602
3603
3604
class LolNeinTumblr(GenericTumblrV1):
3605
    """Class to retrieve Lol Nein comics."""
3606
    # Also on http://lolnein.com
3607
    name = 'lolnein-tumblr'
3608
    long_name = 'Lol Nein (from Tumblr)'
3609
    url = 'http://lolneincom.tumblr.com'
3610
3611
3612
class FatAwesomeComicsTumblr(GenericTumblrV1):
3613
    """Class to retrieve Fat Awesome Comics."""
3614
    # Also on http://fatawesome.com/comics
3615
    name = 'fatawesome-tumblr'
3616
    long_name = 'Fat Awesome (from Tumblr)'
3617
    url = 'http://fatawesomecomedy.tumblr.com'
3618
3619
3620
class TheWorldIsFlatTumblr(GenericTumblrV1):
3621
    """Class to retrieve The World Is Flat Comics."""
3622
    # Also on https://tapastic.com/series/The-World-is-Flat
3623
    name = 'flatworld-tumblr'
3624
    long_name = 'The World Is Flat (from Tumblr)'
3625
    url = 'http://theworldisflatcomics.tumblr.com'
3626
3627
3628
class DorrisMc(GenericEmptyComic, GenericTumblrV1):
3629
    """Class to retrieve Dorris Mc Comics"""
3630
    # Also on http://www.gocomics.com/dorris-mccomics
3631
    name = 'dorrismc'
3632
    long_name = 'Dorris Mc'
3633
    url = 'http://dorrismccomics.com'
3634
3635
3636
class LeleozTumblr(GenericEmptyComic, GenericTumblrV1):
3637
    """Class to retrieve Leleoz comics."""
3638
    # Also on https://tapastic.com/series/Leleoz
3639
    name = 'leleoz-tumblr'
3640
    long_name = 'Leleoz (from Tumblr)'
3641
    url = 'http://leleozcomics.tumblr.com'
3642
3643
3644
class MoonBeardTumblr(GenericTumblrV1):
3645
    """Class to retrieve MoonBeard comics."""
3646
    # Also on http://moonbeard.com
3647
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
3648
    name = 'moonbeard-tumblr'
3649
    long_name = 'Moon Beard (from Tumblr)'
3650
    url = 'http://blog.squiresjam.es/moonbeard'
3651
3652
3653
class AComik(GenericTumblrV1):
3654
    """Class to retrieve A Comik"""
3655
    name = 'comik'
3656
    long_name = 'A Comik'
3657
    url = 'http://acomik.com'
3658
3659
3660
class ClassicRandy(GenericTumblrV1):
3661
    """Class to retrieve Classic Randy comics."""
3662
    name = 'randy'
3663
    long_name = 'Classic Randy'
3664
    url = 'http://classicrandy.tumblr.com'
3665
3666
3667
class DagssonTumblr(GenericTumblrV1):
3668
    """Class to retrieve Dagsson comics."""
3669
    # Also on http://www.dagsson.com
3670
    name = 'dagsson-tumblr'
3671
    long_name = 'Dagsson Hugleikur (from Tumblr)'
3672
    url = 'http://hugleikurdagsson.tumblr.com'
3673
3674
3675
class LinsEditionsTumblr(GenericTumblrV1):
3676
    """Class to retrieve L.I.N.S. Editions comics."""
3677
    # Also on https://linsedition.com
3678
    name = 'lins-tumblr'
3679
    long_name = 'L.I.N.S. Editions (from Tumblr)'
3680
    url = 'http://linscomics.tumblr.com'
3681
3682
3683
class OrigamiHotDish(GenericTumblrV1):
3684
    """Class to retrieve Origami Hot Dish comics."""
3685
    name = 'origamihotdish'
3686
    long_name = 'Origami Hot Dish'
3687
    url = 'http://origamihotdish.com'
3688
3689
3690
class HitAndMissComicsTumblr(GenericTumblrV1):
3691
    """Class to retrieve Hit and Miss Comics."""
3692
    name = 'hitandmiss'
3693
    long_name = 'Hit and Miss Comics'
3694
    url = 'http://hitandmisscomics.tumblr.com'
3695
3696
3697
class HMBlanc(GenericTumblrV1):
3698
    """Class to retrieve HM Blanc comics."""
3699
    name = 'hmblanc'
3700
    long_name = 'HM Blanc'
3701
    url = 'http://hmblanc.tumblr.com'
3702
3703
3704
class TalesOfAbsurdityTumblr(GenericTumblrV1):
3705
    """Class to retrieve Tales Of Absurdity comics."""
3706
    # Also on http://talesofabsurdity.com
3707
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
3708
    name = 'absurdity-tumblr'
3709
    long_name = 'Tales of Absurdity (from Tumblr)'
3710
    url = 'http://talesofabsurdity.tumblr.com'
3711
3712
3713
class RobbieAndBobby(GenericTumblrV1):
3714
    """Class to retrieve Robbie And Bobby comics."""
3715
    # Also on http://robbieandbobby.com
3716
    name = 'robbie-tumblr'
3717
    long_name = 'Robbie And Bobby (from Tumblr)'
3718
    url = 'http://robbieandbobby.tumblr.com'
3719
3720
3721
class ElectricBunnyComicTumblr(GenericTumblrV1):
3722
    """Class to retrieve Electric Bunny Comics."""
3723
    # Also on http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell
3724
    name = 'bunny-tumblr'
3725
    long_name = 'Electric Bunny Comic (from Tumblr)'
3726
    url = 'http://electricbunnycomics.tumblr.com'
3727
3728
3729
class Hoomph(GenericTumblrV1):
3730
    """Class to retrieve Hoomph comics."""
3731
    name = 'hoomph'
3732
    long_name = 'Hoomph'
3733
    url = 'http://hoom.ph'
3734
3735
3736
class BFGFSTumblr(GenericTumblrV1):
3737
    """Class to retrieve BFGFS comics."""
3738
    # Also on https://tapastic.com/series/BFGFS
3739
    # Also on http://bfgfs.com
3740
    name = 'bfgfs-tumblr'
3741
    long_name = 'BFGFS (from Tumblr)'
3742
    url = 'http://bfgfs.tumblr.com'
3743
3744
3745
class DoodleForFood(GenericTumblrV1):
3746
    """Class to retrieve Doodle For Food comics."""
3747
    # Also on http://doodleforfood.com
3748
    name = 'doodle'
3749
    long_name = 'Doodle For Food'
3750
    url = 'http://doodleforfood.com'
3751
3752
3753
class CassandraCalinTumblr(GenericEmptyComic, GenericTumblrV1):
3754
    """Class to retrieve C. Cassandra comics."""
3755
    # Also on http://cassandracalin.com
3756
    # Also on https://tapastic.com/series/C-Cassandra-comics
3757
    name = 'cassandra-tumblr'
3758
    long_name = 'Cassandra Calin (from Tumblr)'
3759
    url = 'http://c-cassandra.tumblr.com'
3760
3761
3762
class DougWasTaken(GenericTumblrV1):
3763
    """Class to retrieve Doug Was Taken comics."""
3764
    name = 'doog'
3765
    long_name = 'Doug Was Taken'
3766
    url = 'http://dougwastaken.tumblr.com'
3767
3768
3769
class MandatoryRollerCoaster(GenericEmptyComic, GenericTumblrV1):
3770
    """Class to retrieve Mandatory Roller Coaster comics."""
3771
    name = 'rollercoaster'
3772
    long_name = 'Mandatory Roller Coaster'
3773
    url = 'http://mandatoryrollercoaster.com'
3774
3775
3776
class CEstPasEnRegardantSesPompes(GenericEmptyComic, GenericTumblrV1):
3777
    """Class to retrieve C'Est Pas En Regardant Ses Pompes (...)  comics."""
3778
    name = 'cperspqccltt'
3779
    long_name = 'C Est Pas En Regardant Ses Pompes (...)'
3780
    url = 'http://cperspqccltt.tumblr.com'
3781
3782
3783
class HorovitzComics(GenericListableComic):
3784
    """Generic class to handle the logic common to the different comics from Horovitz."""
3785
    url = 'http://www.horovitzcomics.com'
3786
    img_re = re.compile('.*comics/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
3787
    link_re = NotImplemented
3788
    get_url_from_archive_element = join_cls_url_to_href
3789
3790
    @classmethod
3791
    def get_comic_info(cls, soup, link):
3792
        """Get information about a particular comics."""
3793
        href = link['href']
3794
        num = int(cls.link_re.match(href).groups()[0])
3795
        title = link.string
3796
        imgs = soup.find_all('img', id='comic')
3797
        assert len(imgs) == 1
3798
        year, month, day = [int(s)
3799
                            for s in cls.img_re.match(imgs[0]['src']).groups()]
3800
        return {
3801
            'title': title,
3802
            'day': day,
3803
            'month': month,
3804
            'year': year,
3805
            'img': [i['src'] for i in imgs],
3806
            'num': num,
3807
        }
3808
3809
    @classmethod
3810
    def get_archive_elements(cls):
3811
        archive_url = 'http://www.horovitzcomics.com/comics/archive/'
3812
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.link_re))
3813
3814
3815
class HorovitzNew(HorovitzComics):
3816
    """Class to retrieve Horovitz new comics."""
3817
    name = 'horovitznew'
3818
    long_name = 'Horovitz New'
3819
    link_re = re.compile('^/comics/new/([0-9]+)$')
3820
3821
3822
class HorovitzClassic(HorovitzComics):
3823
    """Class to retrieve Horovitz classic comics."""
3824
    name = 'horovitzclassic'
3825
    long_name = 'Horovitz Classic'
3826
    link_re = re.compile('^/comics/classic/([0-9]+)$')
3827
3828
3829
class GenericGoComic(GenericNavigableComic):
3830
    """Generic class to handle the logic common to comics from gocomics.com."""
3831
    url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)$')
3832
3833
    @classmethod
3834
    def get_first_comic_link(cls):
3835
        """Get link to first comics."""
3836
        return get_soup_at_url(cls.url).find('a', class_='beginning')
3837
3838
    @classmethod
3839
    def get_navi_link(cls, last_soup, next_):
3840
        """Get link to next or previous comic."""
3841
        return last_soup.find('a', class_='next' if next_ else 'prev', href=cls.url_date_re)
3842
3843
    @classmethod
3844
    def get_url_from_link(cls, link):
3845
        gocomics = 'http://www.gocomics.com'
3846
        return urljoin_wrapper(gocomics, link['href'])
3847
3848
    @classmethod
3849
    def get_comic_info(cls, soup, link):
3850
        """Get information about a particular comics."""
3851
        url = cls.get_url_from_link(link)
3852
        year, month, day = [int(s)
3853
                            for s in cls.url_date_re.match(url).groups()]
3854
        return {
3855
            'day': day,
3856
            'month': month,
3857
            'year': year,
3858
            'img': [soup.find_all('img', class_='strip')[-1]['src']],
3859
            'author': soup.find('meta', attrs={'name': 'author'})['content']
3860
        }
3861
3862
3863
class PearlsBeforeSwine(GenericGoComic):
3864
    """Class to retrieve Pearls Before Swine comics."""
3865
    name = 'pearls'
3866
    long_name = 'Pearls Before Swine'
3867
    url = 'http://www.gocomics.com/pearlsbeforeswine'
3868
3869
3870
class Peanuts(GenericGoComic):
3871
    """Class to retrieve Peanuts comics."""
3872
    name = 'peanuts'
3873
    long_name = 'Peanuts'
3874
    url = 'http://www.gocomics.com/peanuts'
3875
3876
3877
class MattWuerker(GenericGoComic):
3878
    """Class to retrieve Matt Wuerker comics."""
3879
    name = 'wuerker'
3880
    long_name = 'Matt Wuerker'
3881
    url = 'http://www.gocomics.com/mattwuerker'
3882
3883
3884
class TomToles(GenericGoComic):
3885
    """Class to retrieve Tom Toles comics."""
3886
    name = 'toles'
3887
    long_name = 'Tom Toles'
3888
    url = 'http://www.gocomics.com/tomtoles'
3889
3890
3891
class BreakOfDay(GenericGoComic):
3892
    """Class to retrieve Break Of Day comics."""
3893
    name = 'breakofday'
3894
    long_name = 'Break Of Day'
3895
    url = 'http://www.gocomics.com/break-of-day'
3896
3897
3898
class Brevity(GenericGoComic):
3899
    """Class to retrieve Brevity comics."""
3900
    name = 'brevity'
3901
    long_name = 'Brevity'
3902
    url = 'http://www.gocomics.com/brevity'
3903
3904
3905
class MichaelRamirez(GenericGoComic):
3906
    """Class to retrieve Michael Ramirez comics."""
3907
    name = 'ramirez'
3908
    long_name = 'Michael Ramirez'
3909
    url = 'http://www.gocomics.com/michaelramirez'
3910
3911
3912
class MikeLuckovich(GenericGoComic):
3913
    """Class to retrieve Mike Luckovich comics."""
3914
    name = 'luckovich'
3915
    long_name = 'Mike Luckovich'
3916
    url = 'http://www.gocomics.com/mikeluckovich'
3917
3918
3919
class JimBenton(GenericGoComic):
3920
    """Class to retrieve Jim Benton comics."""
3921
    # Also on http://jimbenton.tumblr.com
3922
    name = 'benton'
3923
    long_name = 'Jim Benton'
3924
    url = 'http://www.gocomics.com/jim-benton-cartoons'
3925
3926
3927
class TheArgyleSweater(GenericGoComic):
3928
    """Class to retrieve the Argyle Sweater comics."""
3929
    name = 'argyle'
3930
    long_name = 'Argyle Sweater'
3931
    url = 'http://www.gocomics.com/theargylesweater'
3932
3933
3934
class SunnyStreet(GenericGoComic):
3935
    """Class to retrieve Sunny Street comics."""
3936
    # Also on http://www.sunnystreetcomics.com
3937
    name = 'sunny'
3938
    long_name = 'Sunny Street'
3939
    url = 'http://www.gocomics.com/sunny-street'
3940
3941
3942
class OffTheMark(GenericGoComic):
3943
    """Class to retrieve Off The Mark comics."""
3944
    # Also on https://www.offthemark.com
3945
    name = 'offthemark'
3946
    long_name = 'Off The Mark'
3947
    url = 'http://www.gocomics.com/offthemark'
3948
3949
3950
class WuMo(GenericGoComic):
3951
    """Class to retrieve WuMo comics."""
3952
    # Also on http://wumo.com
3953
    name = 'wumo'
3954
    long_name = 'WuMo'
3955
    url = 'http://www.gocomics.com/wumo'
3956
3957
3958
class LunarBaboon(GenericGoComic):
3959
    """Class to retrieve Lunar Baboon comics."""
3960
    # Also on http://www.lunarbaboon.com
3961
    # Also on https://tapastic.com/series/Lunarbaboon
3962
    name = 'lunarbaboon'
3963
    long_name = 'Lunar Baboon'
3964
    url = 'http://www.gocomics.com/lunarbaboon'
3965
3966
3967
class SandersenGocomic(GenericGoComic):
3968
    """Class to retrieve Sarah Andersen comics."""
3969
    # Also on http://sarahcandersen.com
3970
    # Also on http://tapastic.com/series/Doodle-Time
3971
    name = 'sandersen-goc'
3972
    long_name = 'Sarah Andersen (from GoComics)'
3973
    url = 'http://www.gocomics.com/sarahs-scribbles'
3974
3975
3976
class CalvinAndHobbesGoComic(GenericGoComic):
3977
    """Class to retrieve Calvin and Hobbes comics."""
3978
    # From gocomics, not http://marcel-oehler.marcellosendos.ch/comics/ch/
3979
    name = 'calvin-goc'
3980
    long_name = 'Calvin and Hobbes (from GoComics)'
3981
    url = 'http://www.gocomics.com/calvinandhobbes'
3982
3983
3984
class RallGoComic(GenericGoComic):
3985
    """Class to retrieve Ted Rall comics."""
3986
    # Also on http://rall.com/comic
3987
    name = 'rall-goc'
3988
    long_name = "Ted Rall (from GoComics)"
3989
    url = "http://www.gocomics.com/tedrall"
3990
3991
3992
class TheAwkwardYetiGoComic(GenericGoComic):
3993
    """Class to retrieve The Awkward Yeti comics."""
3994
    # Also on http://larstheyeti.tumblr.com
3995
    # Also on http://theawkwardyeti.com
3996
    # Also on https://tapastic.com/series/TheAwkwardYeti
3997
    name = 'yeti-goc'
3998
    long_name = 'The Awkward Yeti (from GoComics)'
3999
    url = 'http://www.gocomics.com/the-awkward-yeti'
4000
4001
4002
class BerkeleyMewsGoComics(GenericGoComic):
4003
    """Class to retrieve Berkeley Mews comics."""
4004
    # Also on http://mews.tumblr.com
4005
    # Also on http://www.berkeleymews.com
4006
    name = 'berkeley-goc'
4007
    long_name = 'Berkeley Mews (from GoComics)'
4008
    url = 'http://www.gocomics.com/berkeley-mews'
4009
4010
4011
class SheldonGoComics(GenericGoComic):
4012
    """Class to retrieve Sheldon comics."""
4013
    # Also on http://www.sheldoncomics.com
4014
    name = 'sheldon-goc'
4015
    long_name = 'Sheldon Comics (from GoComics)'
4016
    url = 'http://www.gocomics.com/sheldon'
4017
4018
4019
class FowlLanguageGoComics(GenericGoComic):
4020
    """Class to retrieve Fowl Language comics."""
4021
    # Also on http://www.fowllanguagecomics.com
4022
    # Also on http://tapastic.com/series/Fowl-Language-Comics
4023
    # Also on http://fowllanguagecomics.tumblr.com
4024
    name = 'fowllanguage-goc'
4025
    long_name = 'Fowl Language Comics (from GoComics)'
4026
    url = 'http://www.gocomics.com/fowl-language'
4027
4028
4029
class NickAnderson(GenericGoComic):
4030
    """Class to retrieve Nick Anderson comics."""
4031
    name = 'nickanderson'
4032
    long_name = 'Nick Anderson'
4033
    url = 'http://www.gocomics.com/nickanderson'
4034
4035
4036
class GarfieldGoComics(GenericGoComic):
4037
    """Class to retrieve Garfield comics."""
4038
    # Also on http://garfield.com
4039
    name = 'garfield-goc'
4040
    long_name = 'Garfield (from GoComics)'
4041
    url = 'http://www.gocomics.com/garfield'
4042
4043
4044
class DorrisMcGoComics(GenericGoComic):
4045
    """Class to retrieve Dorris Mc Comics"""
4046
    # Also on http://dorrismccomics.com
4047
    name = 'dorrismc-goc'
4048
    long_name = 'Dorris Mc (from GoComics)'
4049
    url = 'http://www.gocomics.com/dorris-mccomics'
4050
4051
4052
class FoxTrot(GenericGoComic):
4053
    """Class to retrieve FoxTrot comics."""
4054
    name = 'foxtrot'
4055
    long_name = 'FoxTrot'
4056
    url = 'http://www.gocomics.com/foxtrot'
4057
4058
4059
class FoxTrotClassics(GenericGoComic):
4060
    """Class to retrieve FoxTrot Classics comics."""
4061
    name = 'foxtrot-classics'
4062
    long_name = 'FoxTrot Classics'
4063
    url = 'http://www.gocomics.com/foxtrotclassics'
4064
4065
4066
class MisterAndMeGoComics(GenericGoComic):
4067
    """Class to retrieve Mister & Me Comics."""
4068
    # Also on http://www.mister-and-me.com
4069
    # Also on https://tapastic.com/series/Mister-and-Me
4070
    name = 'mister-goc'
4071
    long_name = 'Mister & Me (from GoComics)'
4072
    url = 'http://www.gocomics.com/mister-and-me'
4073
4074
4075
class NonSequitur(GenericGoComic):
4076
    """Class to retrieve Non Sequitur (Wiley Miller) comics."""
4077
    name = 'nonsequitur'
4078
    long_name = 'Non Sequitur'
4079
    url = 'http://www.gocomics.com/nonsequitur'
4080
4081
4082
class GenericTapasticComic(GenericListableComic):
4083
    """Generic class to handle the logic common to comics from tapastic.com."""
4084
4085
    @classmethod
4086
    def get_comic_info(cls, soup, archive_elt):
4087
        """Get information about a particular comics."""
4088
        timestamp = int(archive_elt['publishDate']) / 1000.0
4089
        day = datetime.datetime.fromtimestamp(timestamp).date()
4090
        imgs = soup.find_all('img', class_='art-image')
4091
        if not imgs:
4092
            print("Comic %s is being uploaded, retry later" % cls.get_url_from_archive_element(archive_elt))
4093
            return None
4094
        assert len(imgs) > 0
4095
        return {
4096
            'day': day.day,
4097
            'year': day.year,
4098
            'month': day.month,
4099
            'img': [i['src'] for i in imgs],
4100
            'title': archive_elt['title'],
4101
        }
4102
4103
    @classmethod
4104
    def get_url_from_archive_element(cls, archive_elt):
4105
        return 'http://tapastic.com/episode/' + str(archive_elt['id'])
4106
4107
    @classmethod
4108
    def get_archive_elements(cls):
4109
        pref, suff = 'episodeList : ', ','
4110
        # Information is stored in the javascript part
4111
        # I don't know the clean way to get it so this is the ugly way.
4112
        string = [s[len(pref):-len(suff)] for s in (s.decode('utf-8').strip() for s in urlopen_wrapper(cls.url).readlines()) if s.startswith(pref) and s.endswith(suff)][0]
4113
        return json.loads(string)
4114
4115
4116
class VegetablesForDessert(GenericTapasticComic):
4117
    """Class to retrieve Vegetables For Dessert comics."""
4118
    # Also on http://vegetablesfordessert.tumblr.com
4119
    name = 'vegetables'
4120
    long_name = 'Vegetables For Dessert'
4121
    url = 'http://tapastic.com/series/vegetablesfordessert'
4122
4123
4124
class FowlLanguageTapa(GenericTapasticComic):
4125
    """Class to retrieve Fowl Language comics."""
4126
    # Also on http://www.fowllanguagecomics.com
4127
    # Also on http://fowllanguagecomics.tumblr.com
4128
    # Also on http://www.gocomics.com/fowl-language
4129
    name = 'fowllanguage-tapa'
4130
    long_name = 'Fowl Language Comics (from Tapastic)'
4131
    url = 'http://tapastic.com/series/Fowl-Language-Comics'
4132
4133
4134
class OscillatingProfundities(GenericTapasticComic):
4135
    """Class to retrieve Oscillating Profundities comics."""
4136
    name = 'oscillating'
4137
    long_name = 'Oscillating Profundities'
4138
    url = 'http://tapastic.com/series/oscillatingprofundities'
4139
4140
4141
class ZnoflatsComics(GenericTapasticComic):
4142
    """Class to retrieve Znoflats comics."""
4143
    name = 'znoflats'
4144
    long_name = 'Znoflats Comics'
4145
    url = 'http://tapastic.com/series/Znoflats-Comics'
4146
4147
4148
class SandersenTapastic(GenericTapasticComic):
4149
    """Class to retrieve Sarah Andersen comics."""
4150
    # Also on http://sarahcandersen.com
4151
    # Also on http://www.gocomics.com/sarahs-scribbles
4152
    name = 'sandersen-tapa'
4153
    long_name = 'Sarah Andersen (from Tapastic)'
4154
    url = 'http://tapastic.com/series/Doodle-Time'
4155
4156
4157
class TubeyToonsTapastic(GenericTapasticComic):
4158
    """Class to retrieve TubeyToons comics."""
4159
    # Also on http://tubeytoons.com
4160
    # Also on http://tubeytoons.tumblr.com
4161
    name = 'tubeytoons-tapa'
4162
    long_name = 'Tubey Toons (from Tapastic)'
4163
    url = 'http://tapastic.com/series/Tubey-Toons'
4164
4165
4166
class AnythingComicTapastic(GenericTapasticComic):
4167
    """Class to retrieve Anything Comics."""
4168
    # Also on http://www.anythingcomic.com
4169
    name = 'anythingcomic-tapa'
4170
    long_name = 'Anything Comic (from Tapastic)'
4171
    url = 'http://tapastic.com/series/anything'
4172
4173
4174
class UnearthedComicsTapastic(GenericTapasticComic):
4175
    """Class to retrieve Unearthed comics."""
4176
    # Also on http://unearthedcomics.com
4177
    # Also on http://unearthedcomics.tumblr.com
4178
    name = 'unearthed-tapa'
4179
    long_name = 'Unearthed Comics (from Tapastic)'
4180
    url = 'http://tapastic.com/series/UnearthedComics'
4181
4182
4183
class EverythingsStupidTapastic(GenericTapasticComic):
4184
    """Class to retrieve Everything's stupid Comics."""
4185
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
4186
    # Also on http://everythingsstupid.net
4187
    name = 'stupid-tapa'
4188
    long_name = "Everything's Stupid (from Tapastic)"
4189
    url = 'http://tapastic.com/series/EverythingsStupid'
4190
4191
4192
class JustSayEhTapastic(GenericTapasticComic):
4193
    """Class to retrieve Just Say Eh comics."""
4194
    # Also on http://www.justsayeh.com
4195
    name = 'justsayeh-tapa'
4196
    long_name = 'Just Say Eh (from Tapastic)'
4197
    url = 'http://tapastic.com/series/Just-Say-Eh'
4198
4199
4200
class ThorsThundershackTapastic(GenericTapasticComic):
4201
    """Class to retrieve Thor's Thundershack comics."""
4202
    # Also on http://www.thorsthundershack.com
4203
    name = 'thor-tapa'
4204
    long_name = 'Thor\'s Thundershack (from Tapastic)'
4205
    url = 'http://tapastic.com/series/Thors-Thundershac'
4206
4207
4208
class OwlTurdTapastic(GenericTapasticComic):
4209
    """Class to retrieve Owl Turd comics."""
4210
    # Also on http://owlturd.com
4211
    name = 'owlturd-tapa'
4212
    long_name = 'Owl Turd (from Tapastic)'
4213
    url = 'http://tapastic.com/series/Owl-Turd-Comix'
4214
4215
4216
class GoneIntoRaptureTapastic(GenericTapasticComic):
4217
    """Class to retrieve Gone Into Rapture comics."""
4218
    # Also on http://goneintorapture.tumblr.com
4219
    # Also on http://www.goneintorapture.com
4220
    name = 'rapture-tapa'
4221
    long_name = 'Gone Into Rapture (from Tapastic)'
4222
    url = 'http://tapastic.com/series/Goneintorapture'
4223
4224
4225
class HeckIfIKnowComicsTapa(GenericTapasticComic):
4226
    """Class to retrieve Heck If I Know Comics."""
4227
    # Also on http://heckifiknowcomics.com
4228
    name = 'heck-tapa'
4229
    long_name = 'Heck if I Know comics (from Tapastic)'
4230
    url = 'http://tapastic.com/series/Regular'
4231
4232
4233
class CheerUpEmoKidTapa(GenericTapasticComic):
4234
    """Class to retrieve CheerUpEmoKid comics."""
4235
    # Also on http://www.cheerupemokid.com
4236
    # Also on http://enzocomics.tumblr.com
4237
    name = 'cuek-tapa'
4238
    long_name = 'Cheer Up Emo Kid (from Tapastic)'
4239
    url = 'http://tapastic.com/series/CUEK'
4240
4241
4242
class BigFootJusticeTapa(GenericTapasticComic):
4243
    """Class to retrieve Big Foot Justice comics."""
4244
    # Also on http://bigfootjustice.com
4245
    name = 'bigfoot-tapa'
4246
    long_name = 'Big Foot Justice (from Tapastic)'
4247
    url = 'http://tapastic.com/series/bigfoot-justice'
4248
4249
4250
class UpAndOutTapa(GenericTapasticComic):
4251
    """Class to retrieve Up & Out comics."""
4252
    # Also on http://upandoutcomic.tumblr.com
4253
    name = 'upandout-tapa'
4254
    long_name = 'Up And Out (from Tapastic)'
4255
    url = 'http://tapastic.com/series/UP-and-OUT'
4256
4257
4258
class ToonHoleTapa(GenericTapasticComic):
4259
    """Class to retrieve Toon Holes comics."""
4260
    # Also on http://www.toonhole.com
4261
    name = 'toonhole-tapa'
4262
    long_name = 'Toon Hole (from Tapastic)'
4263
    url = 'http://tapastic.com/series/TOONHOLE'
4264
4265
4266
class AngryAtNothingTapa(GenericTapasticComic):
4267
    """Class to retrieve Angry at Nothing comics."""
4268
    # Also on http://www.angryatnothing.net
4269
    name = 'angry-tapa'
4270
    long_name = 'Angry At Nothing (from Tapastic)'
4271
    url = 'http://tapastic.com/series/Comics-yeah-definitely-comics-'
4272
4273
4274
class LeleozTapa(GenericTapasticComic):
4275
    """Class to retrieve Leleoz comics."""
4276
    # Also on http://leleozcomics.tumblr.com
4277
    name = 'leleoz-tapa'
4278
    long_name = 'Leleoz (from Tapastic)'
4279
    url = 'https://tapastic.com/series/Leleoz'
4280
4281
4282
class TheAwkwardYetiTapa(GenericTapasticComic):
4283
    """Class to retrieve The Awkward Yeti comics."""
4284
    # Also on http://www.gocomics.com/the-awkward-yeti
4285
    # Also on http://theawkwardyeti.com
4286
    # Also on http://larstheyeti.tumblr.com
4287
    name = 'yeti-tapa'
4288
    long_name = 'The Awkward Yeti (from Tapastic)'
4289
    url = 'https://tapastic.com/series/TheAwkwardYeti'
4290
4291
4292
class AsPerUsualTapa(GenericTapasticComic):
4293
    """Class to retrieve As Per Usual comics."""
4294
    # Also on http://as-per-usual.tumblr.com
4295
    name = 'usual-tapa'
4296
    long_name = 'As Per Usual (from Tapastic)'
4297
    url = 'https://tapastic.com/series/AsPerUsual'
4298
4299
4300
class OneOneOneOneComicTapa(GenericTapasticComic):
4301
    """Class to retrieve 1111 Comics."""
4302
    # Also on http://www.1111comics.me
4303
    # Also on http://comics1111.tumblr.com
4304
    name = '1111-tapa'
4305
    long_name = '1111 Comics (from Tapastic)'
4306
    url = 'https://tapastic.com/series/1111-Comics'
4307
4308
4309
class TumbleDryTapa(GenericTapasticComic):
4310
    """Class to retrieve Tumble Dry comics."""
4311
    # Also on http://tumbledrycomics.com
4312
    name = 'tumbledry-tapa'
4313
    long_name = 'Tumblr Dry (from Tapastic)'
4314
    url = 'https://tapastic.com/series/TumbleDryComics'
4315
4316
4317
class DeadlyPanelTapa(GenericTapasticComic):
4318
    """Class to retrieve Deadly Panel comics."""
4319
    # Also on http://www.deadlypanel.com
4320
    name = 'deadly-tapa'
4321
    long_name = 'Deadly Panel (from Tapastic)'
4322
    url = 'https://tapastic.com/series/deadlypanel'
4323
4324
4325
class ChrisHallbeckMaxiTapa(GenericTapasticComic):
4326
    """Class to retrieve Chris Hallbeck comics."""
4327
    # Also on http://chrishallbeck.tumblr.com
4328
    # Also on http://maximumble.com
4329
    name = 'hallbeckmaxi-tapa'
4330
    long_name = 'Chris Hallback - Maximumble (from Tapastic)'
4331
    url = 'https://tapastic.com/series/Maximumble'
4332
4333
4334
class ChrisHallbeckMiniTapa(GenericTapasticComic):
4335
    """Class to retrieve Chris Hallbeck comics."""
4336
    # Also on http://chrishallbeck.tumblr.com
4337
    # Also on http://minimumble.com
4338
    name = 'hallbeckmini-tapa'
4339
    long_name = 'Chris Hallback - Minimumble (from Tapastic)'
4340
    url = 'https://tapastic.com/series/Minimumble'
4341
4342
4343
class ChrisHallbeckBiffTapa(GenericTapasticComic):
4344
    """Class to retrieve Chris Hallbeck comics."""
4345
    # Also on http://chrishallbeck.tumblr.com
4346
    # Also on http://thebookofbiff.com
4347
    name = 'hallbeckbiff-tapa'
4348
    long_name = 'Chris Hallback - The Book of Biff (from Tapastic)'
4349
    url = 'https://tapastic.com/series/Biff'
4350
4351
4352
class RandoWisTapa(GenericTapasticComic):
4353
    """Class to retrieve RandoWis comics."""
4354
    # Also on https://randowis.com
4355
    name = 'randowis-tapa'
4356
    long_name = 'RandoWis (from Tapastic)'
4357
    url = 'https://tapastic.com/series/RandoWis'
4358
4359
4360
class PigeonGazetteTapa(GenericTapasticComic):
4361
    """Class to retrieve The Pigeon Gazette comics."""
4362
    # Also on http://thepigeongazette.tumblr.com
4363
    name = 'pigeon-tapa'
4364
    long_name = 'The Pigeon Gazette (from Tapastic)'
4365
    url = 'https://tapastic.com/series/The-Pigeon-Gazette'
4366
4367
4368
class TheOdd1sOutTapa(GenericTapasticComic):
4369
    """Class to retrieve The Odd 1s Out comics."""
4370
    # Also on http://theodd1sout.com
4371
    # Also on http://theodd1sout.tumblr.com
4372
    name = 'theodd-tapa'
4373
    long_name = 'The Odd 1s Out (from Tapastic)'
4374
    url = 'https://tapastic.com/series/Theodd1sout'
4375
4376
4377
class TheWorldIsFlatTapa(GenericTapasticComic):
4378
    """Class to retrieve The World Is Flat Comics."""
4379
    # Also on http://theworldisflatcomics.tumblr.com
4380
    name = 'flatworld-tapa'
4381
    long_name = 'The World Is Flat (from Tapastic)'
4382
    url = 'https://tapastic.com/series/The-World-is-Flat'
4383
4384
4385
class MisterAndMeTapa(GenericTapasticComic):
4386
    """Class to retrieve Mister & Me Comics."""
4387
    # Also on http://www.mister-and-me.com
4388
    # Also on http://www.gocomics.com/mister-and-me
4389
    name = 'mister-tapa'
4390
    long_name = 'Mister & Me (from Tapastic)'
4391
    url = 'https://tapastic.com/series/Mister-and-Me'
4392
4393
4394
class TalesOfAbsurdityTapa(GenericTapasticComic):
4395
    """Class to retrieve Tales Of Absurdity comics."""
4396
    # Also on http://talesofabsurdity.com
4397
    # Also on http://talesofabsurdity.tumblr.com
4398
    name = 'absurdity-tapa'
4399
    long_name = 'Tales of Absurdity (from Tapastic)'
4400
    url = 'http://tapastic.com/series/Tales-Of-Absurdity'
4401
4402
4403
class BFGFSTapa(GenericTapasticComic):
4404
    """Class to retrieve BFGFS comics."""
4405
    # Also on http://bfgfs.com
4406
    # Also on http://bfgfs.tumblr.com
4407
    name = 'bfgfs-tapa'
4408
    long_name = 'BFGFS (from Tapastic)'
4409
    url = 'https://tapastic.com/series/BFGFS'
4410
4411
4412
class DoodleForFoodTapa(GenericTapasticComic):
4413
    """Class to retrieve Doodle For Food comics."""
4414
    # Also on http://doodleforfood.com
4415
    name = 'doodle-tapa'
4416
    long_name = 'Doodle For Food (from Tapastic)'
4417
    url = 'https://tapastic.com/series/Doodle-for-Food'
4418
4419
4420
class MrLovensteinTapa(GenericTapasticComic):
4421
    """Class to retrieve Mr Lovenstein comics."""
4422
    # Also on  https://tapastic.com/series/MrLovenstein
4423
    name = 'mrlovenstein-tapa'
4424
    long_name = 'Mr. Lovenstein (from Tapastic)'
4425
    url = 'https://tapastic.com/series/MrLovenstein'
4426
4427
4428
class CassandraCalinTapa(GenericTapasticComic):
4429
    """Class to retrieve C. Cassandra comics."""
4430
    # Also on http://cassandracalin.com
4431
    # Also on http://c-cassandra.tumblr.com
4432
    name = 'cassandra-tapa'
4433
    long_name = 'Cassandra Calin (from Tapastic)'
4434
    url = 'https://tapastic.com/series/C-Cassandra-comics'
4435
4436
4437
class WafflesAndPancakes(GenericTapasticComic):
4438
    """Class to retrieve Waffles And Pancakes comics."""
4439
    # Also on http://wandpcomic.com
4440
    name = 'waffles'
4441
    long_name = 'Waffles And Pancakes'
4442
    url = 'https://tapastic.com/series/Waffles-and-Pancakes'
4443
4444
4445
def get_subclasses(klass):
4446
    """Gets the list of direct/indirect subclasses of a class"""
4447
    subclasses = klass.__subclasses__()
4448
    for derived in list(subclasses):
4449
        subclasses.extend(get_subclasses(derived))
4450
    return subclasses
4451
4452
4453
def remove_st_nd_rd_th_from_date(string):
4454
    """Function to transform 1st/2nd/3rd/4th in a parsable date format."""
4455
    # Hackish way to convert string with numeral "1st"/"2nd"/etc to date
4456
    return (string.replace('st', '')
4457
            .replace('nd', '')
4458
            .replace('rd', '')
4459
            .replace('th', '')
4460
            .replace('Augu', 'August'))
4461
4462
4463
def string_to_date(string, date_format, local=DEFAULT_LOCAL):
4464
    """Function to convert string to date object.
4465
    Wrapper around datetime.datetime.strptime."""
4466
    # format described in https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior
4467
    prev_locale = locale.setlocale(locale.LC_ALL)
4468
    if local != prev_locale:
4469
        locale.setlocale(locale.LC_ALL, local)
4470
    ret = datetime.datetime.strptime(string, date_format).date()
4471
    if local != prev_locale:
4472
        locale.setlocale(locale.LC_ALL, prev_locale)
4473
    return ret
4474
4475
4476
COMICS = set(get_subclasses(GenericComic))
4477
VALID_COMICS = [c for c in COMICS if c.name is not None]
4478
COMIC_NAMES = {c.name: c for c in VALID_COMICS}
4479
assert len(VALID_COMICS) == len(COMIC_NAMES)
4480
CLASS_NAMES = {c.__name__ for c in VALID_COMICS}
4481
assert len(VALID_COMICS) == len(CLASS_NAMES)
4482