Completed
Push — master ( c3d761...46e76e )
by De
01:17
created

comics.py (26 issues)

1
#! /usr/bin/python3
2
# vim: set expandtab tabstop=4 shiftwidth=4 :
3
"""Module to retrieve webcomics"""
4
5
from comic_abstract import GenericComic, get_date_for_comic
6
import re
7
from datetime import date, timedelta
8
import datetime
9
from urlfunctions import get_soup_at_url, urljoin_wrapper,\
10
    convert_iri_to_plain_ascii_uri, load_json_at_url, urlopen_wrapper
11
import json
12
import locale
13
import urllib
14
15
DEFAULT_LOCAL = 'en_GB.UTF-8'
16
17
18
class Xkcd(GenericComic):
19
    """Class to retrieve Xkcd comics."""
20
    name = 'xkcd'
21
    long_name = 'xkcd'
22
    url = 'http://xkcd.com'
23
24
    @classmethod
25
    def get_next_comic(cls, last_comic):
26
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
27
        first_num = last_comic['num'] if last_comic else 0
28
        last_num = load_json_at_url(
29
            urljoin_wrapper(cls.url, 'info.0.json'))['num']
30
31
        for num in range(first_num + 1, last_num + 1):
32
            if num != 404:
33
                json_url = urljoin_wrapper(cls.url, '%d/info.0.json' % num)
34
                comic = load_json_at_url(json_url)
35
                comic['img'] = [comic['img']]
36
                comic['prefix'] = '%d-' % num
37
                comic['json_url'] = json_url
38
                comic['url'] = urljoin_wrapper(cls.url, str(num))
39
                comic['day'] = int(comic['day'])
40
                comic['month'] = int(comic['month'])
41
                comic['year'] = int(comic['year'])
42
                assert comic['num'] == num
43
                yield comic
44
45
46
# Helper functions corresponding to get_url_from_link/get_url_from_archive_element
47
48
49
@classmethod
50
def get_href(cls, link):
51
    """Implementation of get_url_from_link/get_url_from_archive_element."""
52
    return link['href']
53
54
55
@classmethod
56
def join_cls_url_to_href(cls, link):
57
    """Implementation of get_url_from_link/get_url_from_archive_element."""
58
    return urljoin_wrapper(cls.url, link['href'])
59
60
61
class GenericNavigableComic(GenericComic):
62
    """Generic class for "navigable" comics : with first/next arrows.
63
64
    This class applies to comic where previous and next comics can be
65
    accessed from a given comic. Once given a starting point (either
66
    the first comic or the last comic retrieved), it will handle the
67
    navigation, the retrieval of the soup object and the setting of
68
    the 'url' attribute on retrieved comics. This limits a lot the
69
    amount of boilerplate code in the different implementation classes.
70
71
    The method `get_next_comic` methods is implemented in terms of new
72
    more specialized methods to be implemented/overridden:
73
        - get_first_comic_link
74
        - get_navi_link
75
        - get_comic_info
76
        - get_url_from_link
77
    """
78
79
    @classmethod
80
    def get_first_comic_link(cls):
81
        """Get link to first comics.
82
83
        Sometimes this can be retrieved of any comic page, sometimes on
84
        the archive page, sometimes it doesn't exist at all and one has
85
        to iterate backward to find it before hardcoding the result found.
86
        """
87
        raise NotImplementedError
88
89
    @classmethod
90
    def get_navi_link(cls, last_soup, next_):
91
        """Get link to next (or previous - for dev purposes) comic."""
92
        raise NotImplementedError
93
94
    @classmethod
95
    def get_comic_info(cls, soup, link):
96
        """Get information about a particular comics."""
97
        raise NotImplementedError
98
99
    @classmethod
100
    def get_url_from_link(cls, link):
101
        """Get url corresponding to a link. Default implementation is similar to get_href."""
102
        return link['href']
103
104
    @classmethod
105
    def get_next_link(cls, last_soup):
106
        """Get link to next comic."""
107
        return cls.get_navi_link(last_soup, True)
108
109
    @classmethod
110
    def get_prev_link(cls, last_soup):
111
        """Get link to previous comic."""
112
        return cls.get_navi_link(last_soup, False)
113
114
    @classmethod
115
    def get_next_comic(cls, last_comic):
116
        """Generic implementation of get_next_comic for navigable comics."""
117
        url = last_comic['url'] if last_comic else None
118
        next_comic = \
119
            cls.get_next_link(get_soup_at_url(url)) \
120
            if url else \
121
            cls.get_first_comic_link()
122
        cls.log("next/first comic will be %s (url is %s)" % (str(next_comic), url))
123
        while next_comic:
124
            prev_url, url = url, cls.get_url_from_link(next_comic)
125
            if prev_url == url:
126
                cls.log("got same url %s" % url)
127
                break
128
            cls.log("about to get %s (%s)" % (url, str(next_comic)))
129
            soup = get_soup_at_url(url)
130
            comic = cls.get_comic_info(soup, next_comic)
131
            if comic is not None:
132
                assert 'url' not in comic
133
                comic['url'] = url
134
                yield comic
135
            next_comic = cls.get_next_link(soup)
136
            cls.log("next comic will be %s" % str(next_comic))
137
138
    @classmethod
139
    def check_first_link(cls):
140
        """Check that navigation to first comic seems to be working - for dev purposes."""
141
        cls.log("about to check first link")
142
        ok = True
143
        firstlink = cls.get_first_comic_link()
144
        if firstlink is None:
145
            print("From %s : no first link" % cls.url)
146
            ok = False
147
        else:
148
            firsturl = cls.get_url_from_link(firstlink)
149
            try:
150
                get_soup_at_url(firsturl)
151
            except urllib.error.HTTPError:
152
                print("From %s : invalid first url" % cls.url)
153
                ok = False
154
        cls.log("checked first link -> returned %d" % ok)
155
        return ok
156
157
    @classmethod
158
    def check_prev_next_links(cls, url):
159
        """Check that navigation to prev/next from a given URL seems to be working - for dev purposes."""
160
        cls.log("about to check prev/next from %s" % url)
161
        ok = True
162
        if url is None:
163
            prevlink, nextlink = None, None
164
        else:
165
            soup = get_soup_at_url(url)
166
            prevlink, nextlink = cls.get_prev_link(soup), cls.get_next_link(soup)
167
        if prevlink is None and nextlink is None:
168
            print("From %s : no previous nor next" % url)
169
            ok = False
170
        else:
171
            if prevlink:
172
                prevurl = cls.get_url_from_link(prevlink)
173
                prevsoup = get_soup_at_url(prevurl)
174
                prevnext = cls.get_url_from_link(cls.get_next_link(prevsoup))
175
                if prevnext != url:
176
                    print("From %s, going backward then forward leads to %s" % (url, prevnext))
177
                    ok = False
178
            if nextlink:
179
                nexturl = cls.get_url_from_link(nextlink)
180
                if nexturl != url:
181
                    nextsoup = get_soup_at_url(nexturl)
182
                    nextprev = cls.get_url_from_link(cls.get_prev_link(nextsoup))
183
                    if nextprev != url:
184
                        print("From %s, going forward then backward leads to %s" % (url, nextprev))
185
                        ok = False
186
        cls.log("checked prev/next from %s -> returned %d" % (url, ok))
187
        return ok
188
189
    @classmethod
190
    def check_navigation(cls, url):
191
        """Check that navigation functions seem to be working - for dev purposes."""
192
        cls.log("about to check navigation from %s" % url)
193
        first = cls.check_first_link()
194
        prevnext = cls.check_prev_next_links(url)
195
        ok = first and prevnext
196
        cls.log("checked navigation from %s -> returned %d" % (url, ok))
197
        return ok
198
199
200
class GenericListableComic(GenericComic):
201
    """Generic class for "listable" comics : with a list of comics (aka 'archive')
202
203
    The method `get_next_comic` methods is implemented in terms of new
204
    more specialized methods to be implemented/overridden:
205
        - get_archive_elements
206
        - get_url_from_archive_element
207
        - get_comic_info
208
    """
209
210
    @classmethod
211
    def get_archive_elements(cls):
212
        """Get the archive elements (iterable)."""
213
        raise NotImplementedError
214
215
    @classmethod
216
    def get_url_from_archive_element(cls, archive_elt):
217
        """Get url corresponding to an archive element."""
218
        raise NotImplementedError
219
220
    @classmethod
221
    def get_comic_info(cls, soup, archive_elt):
222
        """Get information about a particular comics."""
223
        raise NotImplementedError
224
225
    @classmethod
226
    def get_next_comic(cls, last_comic):
227
        """Generic implementation of get_next_comic for listable comics."""
228
        waiting_for_url = last_comic['url'] if last_comic else None
229
        for archive_elt in cls.get_archive_elements():
230
            url = cls.get_url_from_archive_element(archive_elt)
231
            cls.log("considering %s" % url)
232
            if waiting_for_url and waiting_for_url == url:
233
                waiting_for_url = None
234
            elif waiting_for_url is None:
235
                cls.log("about to get %s (%s)" % (url, str(archive_elt)))
236
                soup = get_soup_at_url(url)
237
                comic = cls.get_comic_info(soup, archive_elt)
238
                if comic is not None:
239
                    assert 'url' not in comic
240
                    comic['url'] = url
241
                    yield comic
242
        if waiting_for_url is not None:
243
            print("Did not find %s : there might be a problem" % waiting_for_url)
244
245
# Helper functions corresponding to get_first_comic_link/get_navi_link
246
247
248
@classmethod
249
def get_link_rel_next(cls, last_soup, next_):
250
    """Implementation of get_navi_link."""
251
    return last_soup.find('link', rel='next' if next_ else 'prev')
252
253
254
@classmethod
255
def get_a_rel_next(cls, last_soup, next_):
256
    """Implementation of get_navi_link."""
257
    return last_soup.find('a', rel='next' if next_ else 'prev')
258
259
260
@classmethod
261
def get_a_navi_navinext(cls, last_soup, next_):
262
    """Implementation of get_navi_link."""
263
    return last_soup.find('a', class_='navi navi-next' if next_ else 'navi navi-prev')
264
265
266
@classmethod
267
def get_a_navi_comicnavnext_navinext(cls, last_soup, next_):
268
    """Implementation of get_navi_link."""
269
    return last_soup.find('a', class_='navi comic-nav-next navi-next' if next_ else 'navi comic-nav-previous navi-prev')
270
271
272
@classmethod
273
def get_a_comicnavbase_comicnavnext(cls, last_soup, next_):
274
    """Implementation of get_navi_link."""
275
    return last_soup.find('a', class_='comic-nav-base comic-nav-next' if next_ else 'comic-nav-base comic-nav-previous')
276
277
278
@classmethod
279
def get_a_navi_navifirst(cls):
280
    """Implementation of get_first_comic_link."""
281
    return get_soup_at_url(cls.url).find('a', class_='navi navi-first')
282
283
284
@classmethod
285
def get_div_navfirst_a(cls):
286
    """Implementation of get_first_comic_link."""
287
    return get_soup_at_url(cls.url).find('div', class_="nav-first").find('a')
288
289
290
@classmethod
291
def get_a_comicnavbase_comicnavfirst(cls):
292
    """Implementation of get_first_comic_link."""
293
    return get_soup_at_url(cls.url).find('a', class_='comic-nav-base comic-nav-first')
294
295
296
@classmethod
297
def simulate_first_link(cls):
298
    """Implementation of get_first_comic_link creating a link-like object from
299
    an URL provided by the class."""
300
    return {'href': cls.first_url}
301
302
303
@classmethod
304
def navigate_to_first_comic(cls):
305
    """Implementation of get_first_comic_link navigating from a user provided
306
    URL to the first comic.
307
308
    Sometimes, the first comic cannot be reached directly so to start
309
    from the first comic one has to go to the previous comic until
310
    there is no previous comics. Once this URL is reached, it
311
    is better to hardcode it but for development purposes, it
312
    is convenient to have an automatic way to find it.
313
    """
314
    url = input("Get starting URL: ")
315
    print(url)
316
    comic = cls.get_prev_link(get_soup_at_url(url))
317
    while comic:
318
        url = cls.get_url_from_link(comic)
319
        print(url)
320
        comic = cls.get_prev_link(get_soup_at_url(url))
321
    return {'href': url}
322
323
324
class GenericEmptyComic(GenericComic):
325
    """Generic class for comics where nothing is to be done.
326
327
    It can be useful to deactivate temporarily comics that do not work
328
    properly by replacing `def MyComic(GenericWhateverComic)` with
329
    `def MyComic(GenericEmptyComic, GenericWhateverComic)`."""
330
331
    @classmethod
332
    def get_next_comic(cls, last_comic):
333
        """Implementation of get_next_comic returning no comics."""
334
        cls.log("comic is considered as empty - returning no comic")
335
        return []
336
337
338 View Code Duplication
class ExtraFabulousComics(GenericNavigableComic):
339
    """Class to retrieve Extra Fabulous Comics."""
340
    name = 'efc'
341
    long_name = 'Extra Fabulous Comics'
342
    url = 'http://extrafabulouscomics.com'
343
    get_first_comic_link = get_a_navi_navifirst
344
    get_navi_link = get_link_rel_next
345
346
    @classmethod
347
    def get_comic_info(cls, soup, link):
348
        """Get information about a particular comics."""
349
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
350
        imgs = soup.find_all('img', src=img_src_re)
351
        title = soup.find('meta', property='og:title')['content']
352
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
353
        day = string_to_date(date_str, "%Y-%m-%d")
354
        return {
355
            'title': title,
356
            'img': [i['src'] for i in imgs],
357
            'month': day.month,
358
            'year': day.year,
359
            'day': day.day,
360
            'prefix': title + '-'
361
        }
362
363
364 View Code Duplication
class GenericLeMondeBlog(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
365
    """Generic class to retrieve comics from Le Monde blogs."""
366
    get_navi_link = get_link_rel_next
367
    get_first_comic_link = simulate_first_link
368
    first_url = NotImplemented
369
370
    @classmethod
371
    def get_comic_info(cls, soup, link):
372
        """Get information about a particular comics."""
373
        url2 = soup.find('link', rel='shortlink')['href']
374
        title = soup.find('meta', property='og:title')['content']
375
        date_str = soup.find("span", class_="entry-date").string
376
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
377
        imgs = soup.find_all('meta', property='og:image')
378
        return {
379
            'title': title,
380
            'url2': url2,
381
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
382
            'month': day.month,
383
            'year': day.year,
384
            'day': day.day,
385
        }
386
387
388
class ZepWorld(GenericLeMondeBlog):
389
    """Class to retrieve Zep World comics."""
390
    name = "zep"
391
    long_name = "Zep World"
392
    url = "http://zepworld.blog.lemonde.fr"
393
    first_url = "http://zepworld.blog.lemonde.fr/2014/10/31/bientot-le-blog-de-zep/"
394
395
396
class Vidberg(GenericLeMondeBlog):
397
    """Class to retrieve Vidberg comics."""
398
    name = 'vidberg'
399
    long_name = "Vidberg - l'actu en patates"
400
    url = "http://vidberg.blog.lemonde.fr"
401
    # Not the first but I didn't find an efficient way to retrieve it
402
    first_url = "http://vidberg.blog.lemonde.fr/2012/02/09/revue-de-campagne-la-campagne-du-modem-semballe/"
403
404
405
class Plantu(GenericLeMondeBlog):
406
    """Class to retrieve Plantu comics."""
407
    name = 'plantu'
408
    long_name = "Plantu"
409
    url = "http://plantu.blog.lemonde.fr"
410
    first_url = "http://plantu.blog.lemonde.fr/2014/10/28/stress-test-a-bruxelles/"
411
412
413
class XavierGorce(GenericLeMondeBlog):
414
    """Class to retrieve Xavier Gorce comics."""
415
    name = 'gorce'
416
    long_name = "Xavier Gorce"
417
    url = "http://xaviergorce.blog.lemonde.fr"
418
    first_url = "http://xaviergorce.blog.lemonde.fr/2015/01/09/distinction/"
419
420
421
class CartooningForPeace(GenericLeMondeBlog):
422
    """Class to retrieve Cartooning For Peace comics."""
423
    name = 'forpeace'
424
    long_name = "Cartooning For Peace"
425
    url = "http://cartooningforpeace.blog.lemonde.fr"
426
    first_url = "http://cartooningforpeace.blog.lemonde.fr/2014/12/15/bado/"
427
428
429
class Aurel(GenericLeMondeBlog):
430
    """Class to retrieve Aurel comics."""
431
    name = 'aurel'
432
    long_name = "Aurel"
433
    url = "http://aurel.blog.lemonde.fr"
434
    first_url = "http://aurel.blog.lemonde.fr/2014/09/29/le-senat-repasse-a-droite/"
435
436
437
class LesCulottees(GenericLeMondeBlog):
438
    """Class to retrieve Les Culottees comics."""
439
    name = 'culottees'
440
    long_name = 'Les Culottees'
441
    url = "http://lesculottees.blog.lemonde.fr"
442
    first_url = "http://lesculottees.blog.lemonde.fr/2016/01/11/clementine-delait-femme-a-barbe/"
443
444
445
class UneAnneeAuLycee(GenericLeMondeBlog):
446
    """Class to retrieve Une Annee Au Lycee comics."""
447
    name = 'lycee'
448
    long_name = 'Une Annee au Lycee'
449
    url = 'http://uneanneeaulycee.blog.lemonde.fr'
450
    first_url = "http://uneanneeaulycee.blog.lemonde.fr/2016/06/13/la-semaine-du-bac-est-arrivee/"
451
452
453 View Code Duplication
class Rall(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
454
    """Class to retrieve Ted Rall comics."""
455
    # Also on http://www.gocomics.com/tedrall
456
    name = 'rall'
457
    long_name = "Ted Rall"
458
    url = "http://rall.com/comic"
459
    get_navi_link = get_link_rel_next
460
    get_first_comic_link = simulate_first_link
461
    # Not the first but I didn't find an efficient way to retrieve it
462
    first_url = "http://rall.com/2014/01/30/los-angeles-times-cartoon-well-miss-those-california-flowers"
463
464
    @classmethod
465
    def get_comic_info(cls, soup, link):
466
        """Get information about a particular comics."""
467
        title = soup.find('meta', property='og:title')['content']
468
        author = soup.find("span", class_="author vcard").find("a").string
469
        date_str = soup.find("span", class_="entry-date").string
470
        day = string_to_date(date_str, "%B %d, %Y")
471
        desc = soup.find('meta', property='og:description')['content']
472
        imgs = soup.find('div', class_='entry-content').find_all('img')
473
        imgs = imgs[:-7]  # remove social media buttons
474
        return {
475
            'title': title,
476
            'author': author,
477
            'month': day.month,
478
            'year': day.year,
479
            'day': day.day,
480
            'description': desc,
481
            'img': [i['src'] for i in imgs],
482
        }
483
484
485 View Code Duplication
class Dilem(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
486
    """Class to retrieve Ali Dilem comics."""
487
    name = 'dilem'
488
    long_name = 'Ali Dilem'
489
    url = 'http://information.tv5monde.com/dilem'
490
    get_url_from_link = join_cls_url_to_href
491
    get_first_comic_link = simulate_first_link
492
    first_url = "http://information.tv5monde.com/dilem/2004-06-26"
493
494
    @classmethod
495
    def get_navi_link(cls, last_soup, next_):
496
        """Get link to next or previous comic."""
497
        # prev is next / next is prev
498
        li = last_soup.find('li', class_='prev' if next_ else 'next')
499
        return li.find('a') if li else None
500
501
    @classmethod
502
    def get_comic_info(cls, soup, link):
503
        """Get information about a particular comics."""
504
        short_url = soup.find('link', rel='shortlink')['href']
505
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
506
        imgs = soup.find_all('meta', property='og:image')
507
        date_str = soup.find('span', property='dc:date')['content']
508
        date_str = date_str[:10]
509
        day = string_to_date(date_str, "%Y-%m-%d")
510
        return {
511
            'short_url': short_url,
512
            'title': title,
513
            'img': [i['content'] for i in imgs],
514
            'day': day.day,
515
            'month': day.month,
516
            'year': day.year,
517
        }
518
519
520 View Code Duplication
class SpaceAvalanche(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
521
    """Class to retrieve Space Avalanche comics."""
522
    name = 'avalanche'
523
    long_name = 'Space Avalanche'
524
    url = 'http://www.spaceavalanche.com'
525
    get_navi_link = get_link_rel_next
526
527
    @classmethod
528
    def get_first_comic_link(cls):
529
        """Get link to first comics."""
530
        return {'href': "http://www.spaceavalanche.com/2009/02/02/irish-sea/", 'title': "Irish Sea"}
531
532
    @classmethod
533
    def get_comic_info(cls, soup, link):
534
        """Get information about a particular comics."""
535
        url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
536
        title = link['title']
537
        url = cls.get_url_from_link(link)
538
        year, month, day = [int(s)
539
                            for s in url_date_re.match(url).groups()]
540
        imgs = soup.find("div", class_="entry").find_all("img")
541
        return {
542
            'title': title,
543
            'day': day,
544
            'month': month,
545
            'year': year,
546
            'img': [i['src'] for i in imgs],
547
        }
548
549
550
class ZenPencils(GenericNavigableComic):
551
    """Class to retrieve ZenPencils comics."""
552
    # Also on http://zenpencils.tumblr.com
553
    # Also on http://www.gocomics.com/zen-pencils
554
    name = 'zenpencils'
555
    long_name = 'Zen Pencils'
556
    url = 'http://zenpencils.com'
557
    get_navi_link = get_link_rel_next
558
    get_first_comic_link = simulate_first_link
559
    first_url = "http://zenpencils.com/comic/1-ralph-waldo-emerson-make-them-cry/"
560
561
    @classmethod
562
    def get_comic_info(cls, soup, link):
563
        """Get information about a particular comics."""
564
        imgs = soup.find('div', id='comic').find_all('img')
565
        # imgs2 = soup.find_all('meta', property='og:image')
566
        post = soup.find('div', class_='post-content')
567
        author = post.find("span", class_="post-author").find("a").string
568
        title = soup.find('meta', property='og:title')['content']
569
        date_str = post.find('span', class_='post-date').string
570
        day = string_to_date(date_str, "%B %d, %Y")
571
        assert imgs
572
        assert all(i['alt'] == i['title'] for i in imgs)
573
        assert all(i['alt'] in (title, "") for i in imgs)
574
        desc = soup.find('meta', property='og:description')['content']
575
        return {
576
            'title': title,
577
            'description': desc,
578
            'author': author,
579
            'day': day.day,
580
            'month': day.month,
581
            'year': day.year,
582
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
583
        }
584
585
586
class ItsTheTie(GenericNavigableComic):
587
    """Class to retrieve It's the tie comics."""
588
    # Also on http://itsthetie.tumblr.com
589
    # Also on https://tapastic.com/series/itsthetie
590
    name = 'tie'
591
    long_name = "It's the tie"
592
    url = "http://itsthetie.com"
593
    get_first_comic_link = get_div_navfirst_a
594
    get_navi_link = get_a_rel_next
595
596
    @classmethod
597
    def get_comic_info(cls, soup, link):
598
        """Get information about a particular comics."""
599
        title = soup.find('h1', class_='comic-title').find('a').string
600
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
601
        day = string_to_date(date_str, "%B %d, %Y")
602
        # Bonus images may or may not be in meta og:image.
603
        imgs = soup.find_all('meta', property='og:image')
604
        imgs_src = [i['content'] for i in imgs]
605
        bonus = soup.find_all('img', attrs={'data-oversrc': True})
606
        bonus_src = [b['data-oversrc'] for b in bonus]
607
        all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src]
608
        all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")]
609
        tag_meta = soup.find('meta', property='article:tag')
610
        tags = tag_meta['content'] if tag_meta else ""
611
        return {
612
            'title': title,
613
            'month': day.month,
614
            'year': day.year,
615
            'day': day.day,
616
            'img': all_imgs_src,
617
            'tags': tags,
618
        }
619
620 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
621
class PenelopeBagieu(GenericNavigableComic):
622
    """Class to retrieve comics from Penelope Bagieu's blog."""
623
    name = 'bagieu'
624
    long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
625
    url = 'http://www.penelope-jolicoeur.com'
626
    get_navi_link = get_link_rel_next
627
    get_first_comic_link = simulate_first_link
628
    first_url = 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'
629
630
    @classmethod
631
    def get_comic_info(cls, soup, link):
632
        """Get information about a particular comics."""
633
        date_str = soup.find('h2', class_='date-header').string
634
        day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
635
        imgs = soup.find('div', class_='entry-body').find_all('img')
636
        title = soup.find('h3', class_='entry-header').string
637
        return {
638
            'title': title,
639
            'img': [i['src'] for i in imgs],
640
            'month': day.month,
641
            'year': day.year,
642
            'day': day.day,
643
        }
644
645 View Code Duplication
646
class OneOneOneOneComic(GenericNavigableComic):
647
    """Class to retrieve 1111 Comics."""
648
    # Also on http://comics1111.tumblr.com
649
    # Also on https://tapastic.com/series/1111-Comics
650
    name = '1111'
651
    long_name = '1111 Comics'
652
    url = 'http://www.1111comics.me'
653
    get_first_comic_link = get_div_navfirst_a
654
    get_navi_link = get_link_rel_next
655
656
    @classmethod
657
    def get_comic_info(cls, soup, link):
658
        """Get information about a particular comics."""
659
        title = soup.find('h1', class_='comic-title').find('a').string
660
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
661
        day = string_to_date(date_str, "%B %d, %Y")
662
        imgs = soup.find_all('meta', property='og:image')
663
        return {
664
            'title': title,
665
            'month': day.month,
666
            'year': day.year,
667
            'day': day.day,
668
            'img': [i['content'] for i in imgs],
669
        }
670
671
672
class AngryAtNothing(GenericNavigableComic):
673
    """Class to retrieve Angry at Nothing comics."""
674
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
675
    name = 'angry'
676
    long_name = 'Angry At Nothing'
677
    url = 'http://www.angryatnothing.net'
678
    get_first_comic_link = get_div_navfirst_a
679
    get_navi_link = get_a_rel_next
680
681
    @classmethod
682
    def get_comic_info(cls, soup, link):
683
        """Get information about a particular comics."""
684
        title = soup.find('h1', class_='comic-title').find('a').string
685
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
686
        day = string_to_date(date_str, "%B %d, %Y")
687
        imgs = soup.find_all('meta', property='og:image')
688
        return {
689
            'title': title,
690
            'month': day.month,
691
            'year': day.year,
692
            'day': day.day,
693
            'img': [i['content'] for i in imgs],
694
        }
695
696
697
class NeDroid(GenericNavigableComic):
698
    """Class to retrieve NeDroid comics."""
699
    name = 'nedroid'
700
    long_name = 'NeDroid'
701
    url = 'http://nedroid.com'
702
    get_first_comic_link = get_div_navfirst_a
703
    get_navi_link = get_link_rel_next
704
    get_url_from_link = join_cls_url_to_href
705
706
    @classmethod
707
    def get_comic_info(cls, soup, link):
708
        """Get information about a particular comics."""
709
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
710
        comic_url_re = re.compile('//nedroid.com/comics/([0-9]*)-([0-9]*)-([0-9]*).*')
711
        short_url = cls.get_url_from_link(soup.find('link', rel='shortlink'))
712
        num = int(short_url_re.match(short_url).groups()[0])
713
        imgs = soup.find('div', id='comic').find_all('img')
714
        year, month, day = [int(s) for s in comic_url_re.match(imgs[0]['src']).groups()]
715
        assert len(imgs) == 1
716
        title = imgs[0]['alt']
717
        title2 = imgs[0]['title']
718
        return {
719
            'short_url': short_url,
720
            'title': title,
721
            'title2': title2,
722
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
723
            'day': day,
724
            'month': month,
725
            'year': year,
726
            'num': num,
727
        }
728
729 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
730
class Garfield(GenericNavigableComic):
731
    """Class to retrieve Garfield comics."""
732
    # Also on http://www.gocomics.com/garfield
733
    name = 'garfield'
734
    long_name = 'Garfield'
735
    url = 'https://garfield.com'
736
    get_first_comic_link = simulate_first_link
737
    first_url = 'https://garfield.com/comic/1978/06/19'
738
739
    @classmethod
740
    def get_navi_link(cls, last_soup, next_):
741
        """Get link to next or previous comic."""
742
        return last_soup.find('a', class_='comic-arrow-right' if next_ else 'comic-arrow-left')
743
744
    @classmethod
745
    def get_comic_info(cls, soup, link):
746
        """Get information about a particular comics."""
747
        url = cls.get_url_from_link(link)
748
        date_re = re.compile('^%s/comic/([0-9]*)/([0-9]*)/([0-9]*)' % cls.url)
749
        year, month, day = [int(s) for s in date_re.match(url).groups()]
750
        imgs = soup.find('div', class_='comic-display').find_all('img', class_='img-responsive')
751
        return {
752
            'month': month,
753
            'year': year,
754
            'day': day,
755
            'img': [i['src'] for i in imgs],
756
        }
757
758 View Code Duplication
759
class Dilbert(GenericNavigableComic):
760
    """Class to retrieve Dilbert comics."""
761
    # Also on http://www.gocomics.com/dilbert-classics
762
    name = 'dilbert'
763
    long_name = 'Dilbert'
764
    url = 'http://dilbert.com'
765
    get_url_from_link = join_cls_url_to_href
766
    get_first_comic_link = simulate_first_link
767
    first_url = 'http://dilbert.com/strip/1989-04-16'
768
769
    @classmethod
770
    def get_navi_link(cls, last_soup, next_):
771
        """Get link to next or previous comic."""
772
        link = last_soup.find('div', class_='nav-comic nav-right' if next_ else 'nav-comic nav-left')
773
        return link.find('a') if link else None
774
775
    @classmethod
776
    def get_comic_info(cls, soup, link):
777
        """Get information about a particular comics."""
778
        title = soup.find('meta', property='og:title')['content']
779
        imgs = soup.find_all('meta', property='og:image')
780
        desc = soup.find('meta', property='og:description')['content']
781
        date_str = soup.find('meta', property='article:publish_date')['content']
782
        day = string_to_date(date_str, "%B %d, %Y")
783
        author = soup.find('meta', property='article:author')['content']
784
        tags = soup.find('meta', property='article:tag')['content']
785
        return {
786
            'title': title,
787
            'description': desc,
788
            'img': [i['content'] for i in imgs],
789
            'author': author,
790
            'tags': tags,
791
            'day': day.day,
792
            'month': day.month,
793
            'year': day.year
794
        }
795
796
797
class VictimsOfCircumsolar(GenericNavigableComic):
798
    """Class to retrieve VictimsOfCircumsolar comics."""
799
    name = 'circumsolar'
800
    long_name = 'Victims Of Circumsolar'
801
    url = 'http://www.victimsofcircumsolar.com'
802
    get_navi_link = get_a_navi_comicnavnext_navinext
803
    get_first_comic_link = simulate_first_link
804
    first_url = 'http://www.victimsofcircumsolar.com/comic/modern-addiction'
805
806
    @classmethod
807
    def get_comic_info(cls, soup, link):
808
        """Get information about a particular comics."""
809
        # Date is on the archive page
810
        title = soup.find_all('meta', property='og:title')[-1]['content']
811
        desc = soup.find_all('meta', property='og:description')[-1]['content']
812
        imgs = soup.find('div', id='comic').find_all('img')
813
        assert all(i['title'] == i['alt'] == title for i in imgs)
814
        return {
815
            'title': title,
816
            'description': desc,
817
            'img': [i['src'] for i in imgs],
818
        }
819
820
821
class ThreeWordPhrase(GenericNavigableComic):
822
    """Class to retrieve Three Word Phrase comics."""
823
    # Also on http://www.threewordphrase.tumblr.com
824
    name = 'threeword'
825
    long_name = 'Three Word Phrase'
826
    url = 'http://threewordphrase.com'
827
    get_url_from_link = join_cls_url_to_href
828
829
    @classmethod
830
    def get_first_comic_link(cls):
831
        """Get link to first comics."""
832
        return get_soup_at_url(cls.url).find('img', src='/firstlink.gif').parent
833
834
    @classmethod
835
    def get_navi_link(cls, last_soup, next_):
836
        """Get link to next or previous comic."""
837
        link = last_soup.find('img', src='/nextlink.gif' if next_ else '/prevlink.gif').parent
838
        return None if link.get('href') is None else link
839
840
    @classmethod
841
    def get_comic_info(cls, soup, link):
842
        """Get information about a particular comics."""
843
        title = soup.find('title')
844
        imgs = [img for img in soup.find_all('img')
845
                if not img['src'].endswith(
846
                    ('link.gif', '32.png', 'twpbookad.jpg',
847
                     'merchad.jpg', 'header.gif', 'tipjar.jpg'))]
848
        return {
849
            'title': title.string if title else None,
850
            'title2': '  '.join(img.get('alt') for img in imgs if img.get('alt')),
851
            'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs],
852
        }
853
854
855
class DeadlyPanel(GenericEmptyComic, GenericNavigableComic):
856
    """Class to retrieve Deadly Panel comics."""
857
    # Also on https://tapastic.com/series/deadlypanel
858
    name = 'deadly'
859
    long_name = 'Deadly Panel'
860
    url = 'http://www.deadlypanel.com'
861
    get_first_comic_link = get_a_navi_navifirst
862
    get_navi_link = get_a_navi_comicnavnext_navinext
863
864
    @classmethod
865
    def get_comic_info(cls, soup, link):
866
        """Get information about a particular comics."""
867
        imgs = soup.find('div', id='comic').find_all('img')
868
        assert all(i['alt'] == i['title'] for i in imgs)
869
        return {
870
            'img': [i['src'] for i in imgs],
871
        }
872
873
874 View Code Duplication
class TheGentlemanArmchair(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
875
    """Class to retrieve The Gentleman Armchair comics."""
876
    name = 'gentlemanarmchair'
877
    long_name = 'The Gentleman Armchair'
878
    url = 'http://thegentlemansarmchair.com'
879
    get_first_comic_link = get_a_navi_navifirst
880
    get_navi_link = get_link_rel_next
881
882
    @classmethod
883
    def get_comic_info(cls, soup, link):
884
        """Get information about a particular comics."""
885
        title = soup.find('h2', class_='post-title').string
886
        author = soup.find("span", class_="post-author").find("a").string
887
        date_str = soup.find('span', class_='post-date').string
888
        day = string_to_date(date_str, "%B %d, %Y")
889
        imgs = soup.find('div', id='comic').find_all('img')
890
        return {
891
            'img': [i['src'] for i in imgs],
892
            'title': title,
893
            'author': author,
894
            'month': day.month,
895
            'year': day.year,
896
            'day': day.day,
897
        }
898
899
900 View Code Duplication
class MyExtraLife(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
901
    """Class to retrieve My Extra Life comics."""
902
    name = 'extralife'
903
    long_name = 'My Extra Life'
904
    url = 'http://www.myextralife.com'
905
    get_navi_link = get_link_rel_next
906
907
    @classmethod
908
    def get_first_comic_link(cls):
909
        """Get link to first comics."""
910
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
911
912
    @classmethod
913
    def get_comic_info(cls, soup, link):
914
        """Get information about a particular comics."""
915
        title = soup.find("h1", class_="comic_title").string
916
        date_str = soup.find("span", class_="comic_date").string
917
        day = string_to_date(date_str, "%B %d, %Y")
918
        imgs = soup.find_all("img", class_="comic")
919
        assert all(i['alt'] == i['title'] == title for i in imgs)
920
        return {
921
            'title': title,
922
            'img': [i['src'] for i in imgs if i["src"]],
923
            'day': day.day,
924
            'month': day.month,
925
            'year': day.year
926
        }
927
928
929
class SaturdayMorningBreakfastCereal(GenericNavigableComic):
930
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
931
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
932
    # Also on http://smbc-comics.tumblr.com
933
    name = 'smbc'
934
    long_name = 'Saturday Morning Breakfast Cereal'
935
    url = 'http://www.smbc-comics.com'
936
    get_navi_link = get_a_rel_next
937
938
    @classmethod
939
    def get_first_comic_link(cls):
940
        """Get link to first comics."""
941
        return get_soup_at_url(cls.url).find('a', rel='start')
942
943
    @classmethod
944
    def get_comic_info(cls, soup, link):
945
        """Get information about a particular comics."""
946
        image1 = soup.find('img', id='cc-comic')
947
        image_url1 = image1['src']
948
        aftercomic = soup.find('div', id='aftercomic')
949
        image_url2 = aftercomic.find('img')['src'] if aftercomic else ''
950
        imgs = [image_url1] + ([image_url2] if image_url2 else [])
951
        date_str = soup.find('div', class_='cc-publishtime').contents[0]
952
        day = string_to_date(date_str, "%B %d, %Y")
953
        return {
954
            'title': image1['title'],
955
            'img': [urljoin_wrapper(cls.url, i) for i in imgs],
956
            'day': day.day,
957
            'month': day.month,
958
            'year': day.year
959
        }
960
961
962
class PerryBibleFellowship(GenericListableComic):
963
    """Class to retrieve Perry Bible Fellowship comics."""
964
    name = 'pbf'
965
    long_name = 'Perry Bible Fellowship'
966
    url = 'http://pbfcomics.com'
967
    get_url_from_archive_element = join_cls_url_to_href
968
969
    @classmethod
970
    def get_archive_elements(cls):
971
        comic_link_re = re.compile('^/[0-9]*/$')
972
        return reversed(get_soup_at_url(cls.url).find_all('a', href=comic_link_re))
973
974
    @classmethod
975
    def get_comic_info(cls, soup, link):
976
        """Get information about a particular comics."""
977
        url = cls.get_url_from_archive_element(link)
978
        comic_img_re = re.compile('^/archive_b/PBF.*')
979
        name = link.string
980
        num = int(link['name'])
981
        href = link['href']
982
        assert href == '/%d/' % num
983
        imgs = soup.find_all('img', src=comic_img_re)
984
        assert len(imgs) == 1
985
        assert imgs[0]['alt'] == name
986
        return {
987
            'num': num,
988
            'name': name,
989
            'img': [urljoin_wrapper(url, i['src']) for i in imgs],
990
            'prefix': '%d-' % num,
991
        }
992
993
994 View Code Duplication
class Mercworks(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
995
    """Class to retrieve Mercworks comics."""
996
    # Also on http://mercworks.tumblr.com
997
    name = 'mercworks'
998
    long_name = 'Mercworks'
999
    url = 'http://mercworks.net'
1000
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1001
    get_navi_link = get_a_rel_next
1002
1003
    @classmethod
1004
    def get_comic_info(cls, soup, link):
1005
        """Get information about a particular comics."""
1006
        title = soup.find('meta', property='og:title')['content']
1007
        metadesc = soup.find('meta', property='og:description')
1008
        desc = metadesc['content'] if metadesc else ""
1009
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1010
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1011
        date_str = date_str[:10]
1012
        day = string_to_date(date_str, "%Y-%m-%d")
1013
        imgs = soup.find_all('meta', property='og:image')
1014
        return {
1015
            'img': [i['content'] for i in imgs],
1016
            'title': title,
1017
            'author': author,
1018
            'desc': desc,
1019
            'day': day.day,
1020
            'month': day.month,
1021
            'year': day.year
1022
        }
1023
1024
1025
class BerkeleyMews(GenericListableComic):
1026
    """Class to retrieve Berkeley Mews comics."""
1027
    # Also on http://mews.tumblr.com
1028
    # Also on http://www.gocomics.com/berkeley-mews
1029
    name = 'berkeley'
1030
    long_name = 'Berkeley Mews'
1031
    url = 'http://www.berkeleymews.com'
1032
    get_url_from_archive_element = get_href
1033
    comic_num_re = re.compile('%s/\\?p=([0-9]*)$' % url)
1034
1035
    @classmethod
1036
    def get_archive_elements(cls):
1037
        archive_url = urljoin_wrapper(cls.url, "?page_id=2")
1038
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_num_re))
1039
1040
    @classmethod
1041
    def get_comic_info(cls, soup, link):
1042
        """Get information about a particular comics."""
1043
        comic_date_re = re.compile('.*/([0-9]*)-([0-9]*)-([0-9]*)-.*')
1044
        url = cls.get_url_from_archive_element(link)
1045
        num = int(cls.comic_num_re.match(url).groups()[0])
1046
        img = soup.find('div', id='comic').find('img')
1047
        assert all(i['alt'] == i['title'] for i in [img])
1048
        title2 = img['title']
1049
        img_url = img['src']
1050
        year, month, day = [int(s) for s in comic_date_re.match(img_url).groups()]
1051
        return {
1052
            'num': num,
1053
            'title': link.string,
1054
            'title2': title2,
1055
            'img': [img_url],
1056
            'year': year,
1057
            'month': month,
1058
            'day': day,
1059
        }
1060
1061
1062
class GenericBouletCorp(GenericNavigableComic):
1063
    """Generic class to retrieve BouletCorp comics in different languages."""
1064
    # Also on http://bouletcorp.tumblr.com
1065
    get_navi_link = get_link_rel_next
1066
1067
    @classmethod
1068
    def get_first_comic_link(cls):
1069
        """Get link to first comics."""
1070
        return get_soup_at_url(cls.url).find('div', id='centered_nav').find_all('a')[0]
1071
1072
    @classmethod
1073
    def get_comic_info(cls, soup, link):
1074
        """Get information about a particular comics."""
1075
        url = cls.get_url_from_link(link)
1076
        date_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/' % cls.url)
1077
        year, month, day = [int(s) for s in date_re.match(url).groups()]
1078
        imgs = soup.find('div', id='notes').find('div', class_='storycontent').find_all('img')
1079
        texts = '  '.join(t for t in (i.get('title') for i in imgs) if t)
1080
        title = soup.find('title').string
1081
        return {
1082
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs if i.get('src') is not None],
1083
            'title': title,
1084
            'texts': texts,
1085
            'year': year,
1086
            'month': month,
1087
            'day': day,
1088
        }
1089
1090
1091
class BouletCorp(GenericBouletCorp):
1092
    """Class to retrieve BouletCorp comics."""
1093
    name = 'boulet'
1094
    long_name = 'Boulet Corp'
1095
    url = 'http://www.bouletcorp.com'
1096
1097
1098
class BouletCorpEn(GenericBouletCorp):
1099
    """Class to retrieve EnglishBouletCorp comics."""
1100
    name = 'boulet_en'
1101
    long_name = 'Boulet Corp English'
1102
    url = 'http://english.bouletcorp.com'
1103
1104
1105 View Code Duplication
class AmazingSuperPowers(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1106
    """Class to retrieve Amazing Super Powers comics."""
1107
    name = 'asp'
1108
    long_name = 'Amazing Super Powers'
1109
    url = 'http://www.amazingsuperpowers.com'
1110
    get_first_comic_link = get_a_navi_navifirst
1111
    get_navi_link = get_a_navi_navinext
1112
1113
    @classmethod
1114
    def get_comic_info(cls, soup, link):
1115
        """Get information about a particular comics."""
1116
        author = soup.find("span", class_="post-author").find("a").string
1117
        date_str = soup.find('span', class_='post-date').string
1118
        day = string_to_date(date_str, "%B %d, %Y")
1119
        imgs = soup.find('div', id='comic').find_all('img')
1120
        title = ' '.join(i['title'] for i in imgs)
1121
        assert all(i['alt'] == i['title'] for i in imgs)
1122
        return {
1123
            'title': title,
1124
            'author': author,
1125
            'img': [img['src'] for img in imgs],
1126
            'day': day.day,
1127
            'month': day.month,
1128
            'year': day.year
1129
        }
1130
1131
1132
class ToonHole(GenericListableComic):
1133
    """Class to retrieve Toon Holes comics."""
1134
    # Also on http://tapastic.com/series/TOONHOLE
1135
    name = 'toonhole'
1136
    long_name = 'Toon Hole'
1137
    url = 'http://www.toonhole.com'
1138
    get_url_from_archive_element = get_href
1139
1140
    @classmethod
1141
    def get_comic_info(cls, soup, link):
1142
        """Get information about a particular comics."""
1143
        title = link.string
1144
        date_str = remove_st_nd_rd_th_from_date(soup.find('div', class_='comicdate').string.strip())
1145
        day = string_to_date(date_str, "%B %d, %Y")
1146
        imgs = soup.find('div', id='comic').find_all('img')
1147
        assert all(i['alt'] == i['title'] == title for i in imgs)
1148
        return {
1149
            'title': title,
1150
            'month': day.month,
1151
            'year': day.year,
1152
            'day': day.day,
1153
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs],
1154
        }
1155
1156
    @classmethod
1157
    def get_archive_elements(cls):
1158
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1159
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1160
1161
1162
class Channelate(GenericNavigableComic):
1163
    """Class to retrieve Channelate comics."""
1164
    name = 'channelate'
1165
    long_name = 'Channelate'
1166
    url = 'http://www.channelate.com'
1167
    get_first_comic_link = get_div_navfirst_a
1168
    get_navi_link = get_link_rel_next
1169
    get_url_from_link = join_cls_url_to_href
1170
1171
    @classmethod
1172
    def get_comic_info(cls, soup, link):
1173
        """Get information about a particular comics."""
1174
        author = soup.find("span", class_="post-author").find("a").string
1175
        date_str = soup.find('span', class_='post-date').string
1176
        day = string_to_date(date_str, '%Y/%m/%d')
1177
        title = soup.find('meta', property='og:title')['content']
1178
        post = soup.find('div', id='comic')
1179
        imgs = post.find_all('img') if post else []
1180
        extra_url = None
1181
        extra_div = soup.find('div', id='extrapanelbutton')
1182
        if extra_div:
1183
            extra_url = extra_div.find('a')['href']
1184
            extra_soup = get_soup_at_url(extra_url)
1185
            extra_imgs = extra_soup.find_all('img', class_='extrapanelimage')
1186
            imgs.extend(extra_imgs)
1187
        return {
1188
            'url_extra': extra_url,
1189
            'title': title,
1190
            'author': author,
1191
            'month': day.month,
1192
            'year': day.year,
1193
            'day': day.day,
1194
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1195
        }
1196
1197
1198
class CyanideAndHappiness(GenericNavigableComic):
1199
    """Class to retrieve Cyanide And Happiness comics."""
1200
    name = 'cyanide'
1201
    long_name = 'Cyanide and Happiness'
1202
    url = 'http://explosm.net'
1203
    get_url_from_link = join_cls_url_to_href
1204
1205
    @classmethod
1206
    def get_first_comic_link(cls):
1207
        """Get link to first comics."""
1208
        return get_soup_at_url(cls.url).find('a', title='Oldest comic')
1209
1210
    @classmethod
1211
    def get_navi_link(cls, last_soup, next_):
1212
        """Get link to next or previous comic."""
1213
        link = last_soup.find('a', class_='next-comic' if next_ else 'previous-comic ')
1214
        return None if link.get('href') is None else link
1215
1216
    @classmethod
1217
    def get_comic_info(cls, soup, link):
1218
        """Get information about a particular comics."""
1219
        url2 = soup.find('meta', property='og:url')['content']
1220
        num = int(url2.split('/')[-2])
1221
        date_str = soup.find('h3').find('a').string
1222
        day = string_to_date(date_str, '%Y.%m.%d')
1223
        author = soup.find('small', class_="author-credit-name").string
1224
        assert author.startswith('by ')
1225
        author = author[3:]
1226
        imgs = soup.find_all('img', id='main-comic')
1227
        return {
1228
            'num': num,
1229
            'author': author,
1230
            'month': day.month,
1231
            'year': day.year,
1232
            'day': day.day,
1233
            'prefix': '%d-' % num,
1234
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i['src'])) for i in imgs]
1235
        }
1236
1237
1238
class MrLovenstein(GenericComic):
1239
    """Class to retrieve Mr Lovenstein comics."""
1240
    # Also on https://tapastic.com/series/MrLovenstein
1241
    name = 'mrlovenstein'
1242
    long_name = 'Mr. Lovenstein'
1243
    url = 'http://www.mrlovenstein.com'
1244
1245
    @classmethod
1246
    def get_next_comic(cls, last_comic):
1247
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1248
        # TODO: more info from http://www.mrlovenstein.com/archive
1249
        comic_num_re = re.compile('^/comic/([0-9]*)$')
1250
        nums = [int(comic_num_re.match(link['href']).groups()[0])
1251
                for link in get_soup_at_url(cls.url).find_all('a', href=comic_num_re)]
1252
        first, last = min(nums), max(nums)
1253
        if last_comic:
1254
            first = last_comic['num'] + 1
1255
        for num in range(first, last + 1):
1256
            url = urljoin_wrapper(cls.url, '/comic/%d' % num)
1257
            soup = get_soup_at_url(url)
1258
            imgs = list(
1259
                reversed(soup.find_all('img', src=re.compile('^/images/comics/'))))
1260
            description = soup.find('meta', attrs={'name': 'description'})['content']
1261
            yield {
1262
                'url': url,
1263
                'num': num,
1264
                'texts': '  '.join(t for t in (i.get('title') for i in imgs) if t),
1265
                'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1266
                'description': description,
1267
            }
1268
1269
1270
class DinosaurComics(GenericListableComic):
1271
    """Class to retrieve Dinosaur Comics comics."""
1272
    name = 'dinosaur'
1273
    long_name = 'Dinosaur Comics'
1274
    url = 'http://www.qwantz.com'
1275
    get_url_from_archive_element = get_href
1276
    comic_link_re = re.compile('^%s/index.php\\?comic=([0-9]*)$' % url)
1277
1278
    @classmethod
1279
    def get_archive_elements(cls):
1280
        archive_url = urljoin_wrapper(cls.url, 'archive.php')
1281
        # first link is random -> skip it
1282
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re)[1:])
1283
1284
    @classmethod
1285
    def get_comic_info(cls, soup, link):
1286
        """Get information about a particular comics."""
1287
        url = cls.get_url_from_archive_element(link)
1288
        num = int(cls.comic_link_re.match(url).groups()[0])
1289
        date_str = link.string
1290
        text = link.next_sibling.string
1291
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1292
        comic_img_re = re.compile('^%s/comics/' % cls.url)
1293
        img = soup.find('img', src=comic_img_re)
1294
        return {
1295
            'month': day.month,
1296
            'year': day.year,
1297
            'day': day.day,
1298
            'img': [img.get('src')],
1299
            'title': img.get('title'),
1300
            'text': text,
1301
            'num': num,
1302
        }
1303
1304
1305
class ButterSafe(GenericListableComic):
1306
    """Class to retrieve Butter Safe comics."""
1307
    name = 'butter'
1308
    long_name = 'ButterSafe'
1309
    url = 'http://buttersafe.com'
1310
    get_url_from_archive_element = get_href
1311
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)
1312
1313
    @classmethod
1314
    def get_archive_elements(cls):
1315
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1316
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
1317
1318
    @classmethod
1319
    def get_comic_info(cls, soup, link):
1320
        """Get information about a particular comics."""
1321
        url = cls.get_url_from_archive_element(link)
1322
        title = link.string
1323
        year, month, day = [int(s) for s in cls.comic_link_re.match(url).groups()]
1324
        img = soup.find('div', id='comic').find('img')
1325
        assert img['alt'] == title
1326
        return {
1327 View Code Duplication
            'title': title,
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1328
            'day': day,
1329
            'month': month,
1330
            'year': year,
1331
            'img': [img['src']],
1332
        }
1333
1334
1335
class CalvinAndHobbes(GenericComic):
1336
    """Class to retrieve Calvin and Hobbes comics."""
1337
    # Also on http://www.gocomics.com/calvinandhobbes/
1338
    name = 'calvin'
1339
    long_name = 'Calvin and Hobbes'
1340
    # This is not through any official webpage but eh...
1341
    url = 'http://marcel-oehler.marcellosendos.ch/comics/ch/'
1342
1343
    @classmethod
1344
    def get_next_comic(cls, last_comic):
1345
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1346
        last_date = get_date_for_comic(
1347
            last_comic) if last_comic else date(1985, 11, 1)
1348
        link_re = re.compile('^([0-9]*)/([0-9]*)/')
1349
        img_re = re.compile('')
1350
        for link in get_soup_at_url(cls.url).find_all('a', href=link_re):
1351
            url = link['href']
1352
            year, month = link_re.match(url).groups()
1353
            if date(int(year), int(month), 1) + timedelta(days=31) >= last_date:
1354
                img_re = re.compile('^%s%s([0-9]*)' % (year, month))
1355
                month_url = urljoin_wrapper(cls.url, url)
1356
                for img in get_soup_at_url(month_url).find_all('img', src=img_re):
1357
                    img_src = img['src']
1358
                    day = int(img_re.match(img_src).groups()[0])
1359
                    comic_date = date(int(year), int(month), day)
1360
                    if comic_date > last_date:
1361
                        yield {
1362
                            'url': month_url,
1363
                            'year': int(year),
1364
                            'month': int(month),
1365
                            'day': int(day),
1366
                            'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)],
1367
                        }
1368
                        last_date = comic_date
1369
1370
1371
class AbstruseGoose(GenericListableComic):
1372
    """Class to retrieve AbstruseGoose Comics."""
1373
    name = 'abstruse'
1374
    long_name = 'Abstruse Goose'
1375
    url = 'http://abstrusegoose.com'
1376
    get_url_from_archive_element = get_href
1377
    comic_url_re = re.compile('^%s/([0-9]*)$' % url)
1378
    comic_img_re = re.compile('^%s/strips/.*' % url)
1379
1380
    @classmethod
1381
    def get_archive_elements(cls):
1382
        archive_url = urljoin_wrapper(cls.url, 'archive')
1383
        return get_soup_at_url(archive_url).find_all('a', href=cls.comic_url_re)
1384
1385
    @classmethod
1386
    def get_comic_info(cls, soup, archive_elt):
1387
        comic_url = cls.get_url_from_archive_element(archive_elt)
1388
        num = int(cls.comic_url_re.match(comic_url).groups()[0])
1389
        return {
1390
            'num': num,
1391
            'title': archive_elt.string,
1392
            'img': [soup.find('img', src=cls.comic_img_re)['src']]
1393 View Code Duplication
        }
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1394
1395
1396
class PhDComics(GenericNavigableComic):
1397
    """Class to retrieve PHD Comics."""
1398
    name = 'phd'
1399
    long_name = 'PhD Comics'
1400
    url = 'http://phdcomics.com/comics/archive.php'
1401
    get_url_from_link = join_cls_url_to_href
1402
1403
    @classmethod
1404
    def get_first_comic_link(cls):
1405
        """Get link to first comics."""
1406
        return get_soup_at_url(cls.url).find('img', src='images/first_button.gif').parent
1407
1408
    @classmethod
1409
    def get_navi_link(cls, last_soup, next_):
1410
        """Get link to next or previous comic."""
1411
        img = last_soup.find('img', src='images/next_button.gif' if next_ else 'images/prev_button.gif')
1412
        return None if img is None else img.parent
1413
1414
    @classmethod
1415
    def get_comic_info(cls, soup, link):
1416
        """Get information about a particular comics."""
1417
        date_str = soup.find('font', face='Arial,Helvetica,Geneva,Swiss,SunSans-Regular', color='white').string.strip()
1418
        try:
1419
            day = string_to_date(date_str, '%m/%d/%Y')
1420
        except ValueError:
1421
            print("Invalid date %s" % date_str)
1422
            day = date.today()
1423
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
1424
        return {
1425
            'year': day.year,
1426
            'month': day.month,
1427
            'day': day.day,
1428
            'img': [soup.find('img', id='comic')['src']],
1429
            'title': title,
1430
        }
1431
1432
1433 View Code Duplication
class Octopuns(GenericNavigableComic):
1434
    """Class to retrieve Octopuns comics."""
1435
    # Also on http://octopuns.tumblr.com
1436
    name = 'octopuns'
1437
    long_name = 'Octopuns'
1438
    url = 'http://www.octopuns.net'
1439
1440
    @classmethod
1441
    def get_first_comic_link(cls):
1442
        """Get link to first comics."""
1443
        return get_soup_at_url(cls.url).find('img', src=re.compile('.*/First.png')).parent
1444
1445
    @classmethod
1446
    def get_navi_link(cls, last_soup, next_):
1447
        """Get link to next or previous comic."""
1448
        link = last_soup.find('img', src=re.compile('.*/Next.png' if next_ else '.*/Back.png')).parent
1449
        return None if link.get('href') is None else link
1450
1451
    @classmethod
1452
    def get_comic_info(cls, soup, link):
1453
        """Get information about a particular comics."""
1454
        title = soup.find('h3', class_='post-title entry-title').string
1455
        date_str = soup.find('h2', class_='date-header').string
1456
        day = string_to_date(date_str, "%A, %B %d, %Y")
1457
        imgs = soup.find_all('link', rel='image_src')
1458
        return {
1459
            'img': [i['href'] for i in imgs],
1460
            'title': title,
1461
            'day': day.day,
1462
            'month': day.month,
1463
            'year': day.year,
1464
        }
1465
1466
1467
class Quarktees(GenericNavigableComic):
1468
    """Class to retrieve the Quarktees comics."""
1469
    name = 'quarktees'
1470
    long_name = 'Quarktees'
1471
    url = 'http://www.quarktees.com/blogs/news'
1472
    get_url_from_link = join_cls_url_to_href
1473
    get_first_comic_link = simulate_first_link
1474
    first_url = 'http://www.quarktees.com/blogs/news/12486621-coming-soon'
1475
1476
    @classmethod
1477
    def get_navi_link(cls, last_soup, next_):
1478
        """Get link to next or previous comic."""
1479
        return last_soup.find('a', id='article-next' if next_ else 'article-prev')
1480
1481
    @classmethod
1482
    def get_comic_info(cls, soup, link):
1483
        """Get information about a particular comics."""
1484
        title = soup.find('meta', property='og:title')['content']
1485
        article = soup.find('div', class_='single-article')
1486
        imgs = article.find_all('img')
1487
        return {
1488
            'title': title,
1489
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1490
        }
1491
1492
1493
class OverCompensating(GenericNavigableComic):
1494
    """Class to retrieve the Over Compensating comics."""
1495
    name = 'compensating'
1496
    long_name = 'Over Compensating'
1497
    url = 'http://www.overcompensating.com'
1498
    get_url_from_link = join_cls_url_to_href
1499
1500
    @classmethod
1501
    def get_first_comic_link(cls):
1502
        """Get link to first comics."""
1503
        return get_soup_at_url(cls.url).find('a', href=re.compile('comic=1$'))
1504
1505
    @classmethod
1506
    def get_navi_link(cls, last_soup, next_):
1507
        """Get link to next or previous comic."""
1508
        return last_soup.find('a', title='next comic' if next_ else 'go back already')
1509
1510
    @classmethod
1511
    def get_comic_info(cls, soup, link):
1512
        """Get information about a particular comics."""
1513
        img_src_re = re.compile('^/oc/comics/.*')
1514
        comic_num_re = re.compile('.*comic=([0-9]*)$')
1515
        comic_url = cls.get_url_from_link(link)
1516
        num = int(comic_num_re.match(comic_url).groups()[0])
1517
        img = soup.find('img', src=img_src_re)
1518
        return {
1519
            'num': num,
1520
            'img': [urljoin_wrapper(comic_url, img['src'])],
1521
            'title': img.get('title')
1522
        }
1523
1524
1525
class Oglaf(GenericNavigableComic):
1526
    """Class to retrieve Oglaf comics."""
1527
    name = 'oglaf'
1528
    long_name = 'Oglaf [NSFW]'
1529
    url = 'http://oglaf.com'
1530
    get_url_from_link = join_cls_url_to_href
1531
1532
    @classmethod
1533
    def get_first_comic_link(cls):
1534
        """Get link to first comics."""
1535
        return get_soup_at_url(cls.url).find("div", id="st").parent
1536
1537
    @classmethod
1538
    def get_navi_link(cls, last_soup, next_):
1539
        """Get link to next or previous comic."""
1540
        div = last_soup.find("div", id="nx" if next_ else "pvs")
1541
        return div.parent if div else None
1542
1543
    @classmethod
1544
    def get_comic_info(cls, soup, link):
1545
        """Get information about a particular comics."""
1546
        title = soup.find('title').string
1547
        title_imgs = soup.find('div', id='tt').find_all('img')
1548
        assert len(title_imgs) == 1
1549
        strip_imgs = soup.find_all('img', id='strip')
1550
        assert len(strip_imgs) == 1
1551
        imgs = title_imgs + strip_imgs
1552
        desc = ' '.join(i['title'] for i in imgs)
1553
        return {
1554
            'title': title,
1555
            'img': [i['src'] for i in imgs],
1556
            'description': desc,
1557
        }
1558
1559
1560
class ScandinaviaAndTheWorld(GenericNavigableComic):
1561
    """Class to retrieve Scandinavia And The World comics."""
1562
    name = 'satw'
1563
    long_name = 'Scandinavia And The World'
1564
    url = 'http://satwcomic.com'
1565
    get_first_comic_link = simulate_first_link
1566
    first_url = 'http://satwcomic.com/sweden-denmark-and-norway'
1567
1568
    @classmethod
1569
    def get_navi_link(cls, last_soup, next_):
1570
        """Get link to next or previous comic."""
1571
        return last_soup.find('a', accesskey='n' if next_ else 'p')
1572
1573
    @classmethod
1574
    def get_comic_info(cls, soup, link):
1575
        """Get information about a particular comics."""
1576
        title = soup.find('meta', attrs={'name': 'twitter:label1'})['content']
1577
        desc = soup.find('meta', property='og:description')['content']
1578
        imgs = soup.find_all('img', itemprop="image")
1579
        return {
1580
            'title': title,
1581
            'description': desc,
1582
            'img': [i['src'] for i in imgs],
1583
        }
1584
1585
1586
class SomethingOfThatIlk(GenericEmptyComic):  # Does not exist anymore
1587
    """Class to retrieve the Something Of That Ilk comics."""
1588
    name = 'somethingofthatilk'
1589
    long_name = 'Something Of That Ilk'
1590
    url = 'http://www.somethingofthatilk.com'
1591
1592
1593
class InfiniteMonkeyBusiness(GenericNavigableComic):
1594
    """Generic class to retrieve InfiniteMonkeyBusiness comics."""
1595
    name = 'monkey'
1596
    long_name = 'Infinite Monkey Business'
1597
    url = 'http://infinitemonkeybusiness.net'
1598
    get_navi_link = get_a_navi_comicnavnext_navinext
1599
    get_first_comic_link = simulate_first_link
1600
    first_url = 'http://infinitemonkeybusiness.net/comic/pillory/'
1601
1602
    @classmethod
1603
    def get_comic_info(cls, soup, link):
1604
        """Get information about a particular comics."""
1605
        title = soup.find('meta', property='og:title')['content']
1606
        imgs = soup.find('div', id='comic').find_all('img')
1607
        return {
1608
            'title': title,
1609
            'img': [i['src'] for i in imgs],
1610
        }
1611
1612
1613
class Wondermark(GenericListableComic):
1614
    """Class to retrieve the Wondermark comics."""
1615
    name = 'wondermark'
1616
    long_name = 'Wondermark'
1617
    url = 'http://wondermark.com'
1618
    get_url_from_archive_element = get_href
1619
1620
    @classmethod
1621
    def get_archive_elements(cls):
1622
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1623
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1624
1625
    @classmethod
1626
    def get_comic_info(cls, soup, link):
1627
        """Get information about a particular comics."""
1628
        date_str = soup.find('div', class_='postdate').find('em').string
1629
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1630
        div = soup.find('div', id='comic')
1631
        if div:
1632
            img = div.find('img')
1633
            img_src = [img['src']]
1634
            alt = img['alt']
1635
            assert alt == img['title']
1636
            title = soup.find('meta', property='og:title')['content']
1637
        else:
1638
            img_src = []
1639
            alt = ''
1640
            title = ''
1641
        return {
1642
            'month': day.month,
1643
            'year': day.year,
1644
            'day': day.day,
1645
            'img': img_src,
1646
            'title': title,
1647
            'alt': alt,
1648
            'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')),
1649
        }
1650 View Code Duplication
1651
1652
class WarehouseComic(GenericNavigableComic):
1653
    """Class to retrieve Warehouse Comic comics."""
1654
    name = 'warehouse'
1655
    long_name = 'Warehouse Comic'
1656
    url = 'http://warehousecomic.com'
1657
    get_first_comic_link = get_a_navi_navifirst
1658
    get_navi_link = get_link_rel_next
1659
1660
    @classmethod
1661
    def get_comic_info(cls, soup, link):
1662
        """Get information about a particular comics."""
1663
        title = soup.find('h2', class_='post-title').string
1664
        date_str = soup.find('span', class_='post-date').string
1665
        day = string_to_date(date_str, "%B %d, %Y")
1666
        imgs = soup.find('div', id='comic').find_all('img')
1667
        return {
1668
            'img': [i['src'] for i in imgs],
1669
            'title': title,
1670
            'day': day.day,
1671
            'month': day.month,
1672
            'year': day.year,
1673
        }
1674
1675
1676
class JustSayEh(GenericNavigableComic):
1677
    """Class to retrieve Just Say Eh comics."""
1678
    # Also on http//tapastic.com/series/Just-Say-Eh
1679
    name = 'justsayeh'
1680
    long_name = 'Just Say Eh'
1681
    url = 'http://www.justsayeh.com'
1682
    get_first_comic_link = get_a_navi_navifirst
1683
    get_navi_link = get_a_navi_comicnavnext_navinext
1684
1685
    @classmethod
1686
    def get_comic_info(cls, soup, link):
1687
        """Get information about a particular comics."""
1688
        title = soup.find('h2', class_='post-title').string
1689
        imgs = soup.find("div", id="comic").find_all("img")
1690
        assert all(i['alt'] == i['title'] for i in imgs)
1691
        alt = imgs[0]['alt']
1692
        return {
1693
            'img': [i['src'] for i in imgs],
1694
            'title': title,
1695
            'alt': alt,
1696
        }
1697
1698
1699 View Code Duplication
class MouseBearComedy(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1700
    """Class to retrieve Mouse Bear Comedy comics."""
1701
    # Also on http://mousebearcomedy.tumblr.com
1702
    name = 'mousebear'
1703
    long_name = 'Mouse Bear Comedy'
1704
    url = 'http://www.mousebearcomedy.com'
1705
    get_first_comic_link = get_a_navi_navifirst
1706
    get_navi_link = get_a_navi_comicnavnext_navinext
1707
1708
    @classmethod
1709
    def get_comic_info(cls, soup, link):
1710
        """Get information about a particular comics."""
1711
        title = soup.find('h2', class_='post-title').string
1712
        author = soup.find("span", class_="post-author").find("a").string
1713
        date_str = soup.find("span", class_="post-date").string
1714
        day = string_to_date(date_str, '%B %d, %Y')
1715
        imgs = soup.find("div", id="comic").find_all("img")
1716
        assert all(i['alt'] == i['title'] == title for i in imgs)
1717
        return {
1718
            'day': day.day,
1719
            'month': day.month,
1720
            'year': day.year,
1721
            'img': [i['src'] for i in imgs],
1722
            'title': title,
1723
            'author': author,
1724
        }
1725 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1726
1727
class BigFootJustice(GenericNavigableComic):
1728
    """Class to retrieve Big Foot Justice comics."""
1729
    # Also on http://tapastic.com/series/bigfoot-justice
1730
    name = 'bigfoot'
1731
    long_name = 'Big Foot Justice'
1732
    url = 'http://bigfootjustice.com'
1733
    get_first_comic_link = get_a_navi_navifirst
1734
    get_navi_link = get_a_navi_comicnavnext_navinext
1735
1736
    @classmethod
1737
    def get_comic_info(cls, soup, link):
1738
        """Get information about a particular comics."""
1739
        imgs = soup.find('div', id='comic').find_all('img')
1740
        assert all(i['title'] == i['alt'] for i in imgs)
1741
        title = ' '.join(i['title'] for i in imgs)
1742
        return {
1743
            'img': [i['src'] for i in imgs],
1744
            'title': title,
1745
        }
1746
1747
1748 View Code Duplication
class RespawnComic(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1749
    """Class to retrieve Respawn Comic."""
1750
    # Also on http://respawncomic.tumblr.com
1751
    name = 'respawn'
1752
    long_name = 'Respawn Comic'
1753
    url = 'http://respawncomic.com '
1754
    get_navi_link = get_a_rel_next
1755
    get_first_comic_link = simulate_first_link
1756
    first_url = 'http://respawncomic.com/comic/c0001/'
1757
1758
    @classmethod
1759
    def get_comic_info(cls, soup, link):
1760
        """Get information about a particular comics."""
1761
        title = soup.find('meta', property='og:title')['content']
1762
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1763
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1764
        date_str = date_str[:10]
1765
        day = string_to_date(date_str, "%Y-%m-%d")
1766
        imgs = soup.find_all('meta', property='og:image')
1767
        skip_imgs = {
1768
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1769
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1770
        }
1771
        return {
1772
            'title': title,
1773
            'author': author,
1774
            'day': day.day,
1775
            'month': day.month,
1776
            'year': day.year,
1777
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1778
        }
1779
1780
1781 View Code Duplication
class SafelyEndangered(GenericNavigableComic):
1782
    """Class to retrieve Safely Endangered comics."""
1783
    # Also on http://tumblr.safelyendangered.com
1784
    name = 'endangered'
1785
    long_name = 'Safely Endangered'
1786
    url = 'http://www.safelyendangered.com'
1787
    get_navi_link = get_link_rel_next
1788
    get_first_comic_link = simulate_first_link
1789
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1790
1791
    @classmethod
1792
    def get_comic_info(cls, soup, link):
1793
        """Get information about a particular comics."""
1794
        title = soup.find('h2', class_='post-title').string
1795
        date_str = soup.find('span', class_='post-date').string
1796
        day = string_to_date(date_str, '%B %d, %Y')
1797
        imgs = soup.find('div', id='comic').find_all('img')
1798
        alt = imgs[0]['alt']
1799
        assert all(i['alt'] == i['title'] for i in imgs)
1800
        return {
1801
            'day': day.day,
1802
            'month': day.month,
1803
            'year': day.year,
1804
            'img': [i['src'] for i in imgs],
1805
            'title': title,
1806
            'alt': alt,
1807
        }
1808
1809
1810 View Code Duplication
class PicturesInBoxes(GenericNavigableComic):
1811
    """Class to retrieve Pictures In Boxes comics."""
1812
    # Also on http://picturesinboxescomic.tumblr.com
1813
    name = 'picturesinboxes'
1814
    long_name = 'Pictures in Boxes'
1815
    url = 'http://www.picturesinboxes.com'
1816
    get_navi_link = get_a_navi_navinext
1817
    get_first_comic_link = simulate_first_link
1818
    first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'
1819
1820
    @classmethod
1821
    def get_comic_info(cls, soup, link):
1822
        """Get information about a particular comics."""
1823
        title = soup.find('h2', class_='post-title').string
1824
        author = soup.find("span", class_="post-author").find("a").string
1825
        date_str = soup.find('span', class_='post-date').string
1826
        day = string_to_date(date_str, '%B %d, %Y')
1827
        imgs = soup.find('div', class_='comicpane').find_all('img')
1828
        assert imgs
1829
        assert all(i['title'] == i['alt'] == title for i in imgs)
1830
        return {
1831
            'day': day.day,
1832
            'month': day.month,
1833
            'year': day.year,
1834
            'img': [i['src'] for i in imgs],
1835
            'title': title,
1836
            'author': author,
1837
        }
1838
1839
1840
class Penmen(GenericEmptyComic):
1841
    """Class to retrieve Penmen comics."""
1842
    name = 'penmen'
1843
    long_name = 'Penmen'
1844
    url = 'http://penmen.com'
1845
1846
1847
class TheDoghouseDiaries(GenericNavigableComic):
1848
    """Class to retrieve The Dog House Diaries comics."""
1849
    name = 'doghouse'
1850
    long_name = 'The Dog House Diaries'
1851
    url = 'http://thedoghousediaries.com'
1852
1853
    @classmethod
1854
    def get_first_comic_link(cls):
1855
        """Get link to first comics."""
1856
        return get_soup_at_url(cls.url).find('a', id='firstlink')
1857
1858
    @classmethod
1859
    def get_navi_link(cls, last_soup, next_):
1860
        """Get link to next or previous comic."""
1861
        return last_soup.find('a', id='nextlink' if next_ else 'previouslink')
1862
1863
    @classmethod
1864
    def get_comic_info(cls, soup, link):
1865
        """Get information about a particular comics."""
1866
        comic_img_re = re.compile('^dhdcomics/.*')
1867
        img = soup.find('img', src=comic_img_re)
1868
        comic_url = cls.get_url_from_link(link)
1869
        return {
1870
            'title': soup.find('h2', id='titleheader').string,
1871
            'title2': soup.find('div', id='subtext').string,
1872
            'alt': img.get('title'),
1873
            'img': [urljoin_wrapper(comic_url, img['src'].strip())],
1874
            'num': int(comic_url.split('/')[-1]),
1875
        }
1876
1877
1878
class InvisibleBread(GenericListableComic):
1879
    """Class to retrieve Invisible Bread comics."""
1880
    # Also on http://www.gocomics.com/invisible-bread
1881
    name = 'invisiblebread'
1882
    long_name = 'Invisible Bread'
1883
    url = 'http://invisiblebread.com'
1884
1885
    @classmethod
1886
    def get_archive_elements(cls):
1887
        archive_url = urljoin_wrapper(cls.url, 'archives/')
1888
        return reversed(get_soup_at_url(archive_url).find_all('td', class_='archive-title'))
1889
1890
    @classmethod
1891
    def get_url_from_archive_element(cls, td):
1892
        return td.find('a')['href']
1893
1894
    @classmethod
1895
    def get_comic_info(cls, soup, td):
1896
        """Get information about a particular comics."""
1897
        url = cls.get_url_from_archive_element(td)
1898
        title = td.find('a').string
1899
        month_and_day = td.previous_sibling.string
1900
        link_re = re.compile('^%s/([0-9]+)/' % cls.url)
1901
        year = link_re.match(url).groups()[0]
1902
        date_str = month_and_day + ' ' + year
1903
        day = string_to_date(date_str, '%b %d %Y')
1904
        imgs = [soup.find('div', id='comic').find('img')]
1905
        assert len(imgs) == 1
1906
        assert all(i['title'] == i['alt'] == title for i in imgs)
1907
        return {
1908
            'month': day.month,
1909
            'year': day.year,
1910
            'day': day.day,
1911
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1912
            'title': title,
1913
        }
1914
1915
1916
class DiscoBleach(GenericEmptyComic):  # Does not work anymore
1917
    """Class to retrieve Disco Bleach Comics."""
1918
    name = 'discobleach'
1919
    long_name = 'Disco Bleach'
1920
    url = 'http://discobleach.com'
1921
1922
1923
class TubeyToons(GenericEmptyComic):  # Does not work anymore
1924
    """Class to retrieve TubeyToons comics."""
1925
    # Also on http://tapastic.com/series/Tubey-Toons
1926
    # Also on http://tubeytoons.tumblr.com
1927
    name = 'tubeytoons'
1928
    long_name = 'Tubey Toons'
1929
    url = 'http://tubeytoons.com'
1930
1931
1932 View Code Duplication
class CompletelySeriousComics(GenericNavigableComic):
1933
    """Class to retrieve Completely Serious comics."""
1934
    name = 'completelyserious'
1935
    long_name = 'Completely Serious Comics'
1936
    url = 'http://completelyseriouscomics.com'
1937
    get_first_comic_link = get_a_navi_navifirst
1938
    get_navi_link = get_a_navi_navinext
1939
1940
    @classmethod
1941
    def get_comic_info(cls, soup, link):
1942
        """Get information about a particular comics."""
1943
        title = soup.find('h2', class_='post-title').string
1944
        author = soup.find('span', class_='post-author').contents[1].string
1945
        date_str = soup.find('span', class_='post-date').string
1946
        day = string_to_date(date_str, '%B %d, %Y')
1947
        imgs = soup.find('div', class_='comicpane').find_all('img')
1948
        assert imgs
1949
        alt = imgs[0]['title']
1950
        assert all(i['title'] == i['alt'] == alt for i in imgs)
1951
        return {
1952
            'month': day.month,
1953
            'year': day.year,
1954
            'day': day.day,
1955
            'img': [i['src'] for i in imgs],
1956
            'title': title,
1957
            'alt': alt,
1958
            'author': author,
1959
        }
1960
1961
1962
class PoorlyDrawnLines(GenericListableComic):
1963
    """Class to retrieve Poorly Drawn Lines comics."""
1964
    # Also on http://pdlcomics.tumblr.com
1965
    name = 'poorlydrawn'
1966
    long_name = 'Poorly Drawn Lines'
1967
    url = 'http://poorlydrawnlines.com'
1968
    get_url_from_archive_element = get_href
1969
1970
    @classmethod
1971
    def get_comic_info(cls, soup, link):
1972
        """Get information about a particular comics."""
1973
        imgs = soup.find('div', class_='post').find_all('img')
1974
        assert len(imgs) <= 1
1975
        return {
1976
            'img': [i['src'] for i in imgs],
1977
            'title': imgs[0].get('title', "") if imgs else "",
1978
        }
1979
1980
    @classmethod
1981
    def get_archive_elements(cls):
1982
        archive_url = urljoin_wrapper(cls.url, 'archive')
1983
        url_re = re.compile('^%s/comic/.' % cls.url)
1984
        return reversed(get_soup_at_url(archive_url).find_all('a', href=url_re))
1985
1986
1987 View Code Duplication
class LoadingComics(GenericNavigableComic):
1988
    """Class to retrieve Loading Artist comics."""
1989
    name = 'loadingartist'
1990
    long_name = 'Loading Artist'
1991
    url = 'http://www.loadingartist.com/latest'
1992
1993
    @classmethod
1994
    def get_first_comic_link(cls):
1995
        """Get link to first comics."""
1996
        return get_soup_at_url(cls.url).find('a', title="First")
1997
1998
    @classmethod
1999
    def get_navi_link(cls, last_soup, next_):
2000
        """Get link to next or previous comic."""
2001
        return last_soup.find('a', title='Next' if next_ else 'Previous')
2002
2003
    @classmethod
2004
    def get_comic_info(cls, soup, link):
2005
        """Get information about a particular comics."""
2006
        title = soup.find('h1').string
2007
        date_str = soup.find('span', class_='date').string.strip()
2008
        day = string_to_date(date_str, "%B %d, %Y")
2009
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
2010
        return {
2011
            'title': title,
2012
            'img': [i['src'] for i in imgs],
2013
            'month': day.month,
2014
            'year': day.year,
2015
            'day': day.day,
2016
        }
2017
2018
2019 View Code Duplication
class ChuckleADuck(GenericNavigableComic):
2020
    """Class to retrieve Chuckle-A-Duck comics."""
2021
    name = 'chuckleaduck'
2022
    long_name = 'Chuckle-A-duck'
2023
    url = 'http://chuckleaduck.com'
2024
    get_first_comic_link = get_div_navfirst_a
2025
    get_navi_link = get_link_rel_next
2026
2027
    @classmethod
2028
    def get_comic_info(cls, soup, link):
2029
        """Get information about a particular comics."""
2030
        date_str = soup.find('span', class_='post-date').string
2031
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
2032
        author = soup.find('span', class_='post-author').string
2033
        div = soup.find('div', id='comic')
2034
        imgs = div.find_all('img') if div else []
2035
        title = imgs[0]['title'] if imgs else ""
2036
        assert all(i['title'] == i['alt'] == title for i in imgs)
2037
        return {
2038
            'month': day.month,
2039
            'year': day.year,
2040
            'day': day.day,
2041
            'img': [i['src'] for i in imgs],
2042
            'title': title,
2043
            'author': author,
2044
        }
2045
2046
2047
class DepressedAlien(GenericNavigableComic):
2048
    """Class to retrieve Depressed Alien Comics."""
2049
    name = 'depressedalien'
2050
    long_name = 'Depressed Alien'
2051
    url = 'http://depressedalien.com'
2052
    get_url_from_link = join_cls_url_to_href
2053
2054
    @classmethod
2055
    def get_first_comic_link(cls):
2056
        """Get link to first comics."""
2057
        return get_soup_at_url(cls.url).find('img', attrs={'name': 'beginArrow'}).parent
2058
2059
    @classmethod
2060
    def get_navi_link(cls, last_soup, next_):
2061
        """Get link to next or previous comic."""
2062
        return last_soup.find('img', attrs={'name': 'rightArrow' if next_ else 'leftArrow'}).parent
2063
2064
    @classmethod
2065
    def get_comic_info(cls, soup, link):
2066
        """Get information about a particular comics."""
2067
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2068
        imgs = soup.find_all('meta', property='og:image')
2069
        return {
2070
            'title': title,
2071
            'img': [i['content'] for i in imgs],
2072
        }
2073
2074
2075
class ThingsInSquares(GenericListableComic):
2076
    """Class to retrieve Things In Squares comics."""
2077
    # This can be retrieved in other languages
2078
    # Also on https://tapastic.com/series/Things-in-Squares
2079
    name = 'squares'
2080
    long_name = 'Things in squares'
2081
    url = 'http://www.thingsinsquares.com'
2082
2083
    @classmethod
2084
    def get_comic_info(cls, soup, tr):
2085
        """Get information about a particular comics."""
2086
        _, td2, td3 = tr.find_all('td')
2087
        a = td2.find('a')
2088
        date_str = td3.string
2089
        day = string_to_date(date_str, "%m.%d.%y")
2090
        title = a.string
2091
        title2 = soup.find('meta', property='og:title')['content']
2092
        desc = soup.find('meta', property='og:description')
2093
        description = desc['content'] if desc else ''
2094
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2095
        imgs = soup.find('div', class_='entry-content').find_all('img')
2096
        return {
2097
            'day': day.day,
2098
            'month': day.month,
2099
            'year': day.year,
2100
            'title': title,
2101
            'title2': title2,
2102
            'description': description,
2103
            'tags': tags,
2104
            'img': [i['src'] for i in imgs],
2105
            'alt': ' '.join(i['alt'] for i in imgs),
2106
        }
2107
2108
    @classmethod
2109
    def get_url_from_archive_element(cls, tr):
2110
        _, td2, td3 = tr.find_all('td')
2111
        return td2.find('a')['href']
2112
2113
    @classmethod
2114
    def get_archive_elements(cls):
2115
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
2116
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2117
2118
2119 View Code Duplication
class HappleTea(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2120
    """Class to retrieve Happle Tea Comics."""
2121
    name = 'happletea'
2122
    long_name = 'Happle Tea'
2123
    url = 'http://www.happletea.com'
2124
    get_first_comic_link = get_a_navi_navifirst
2125
    get_navi_link = get_link_rel_next
2126
2127
    @classmethod
2128
    def get_comic_info(cls, soup, link):
2129
        """Get information about a particular comics."""
2130
        imgs = soup.find('div', id='comic').find_all('img')
2131
        post = soup.find('div', class_='post-content')
2132
        title = post.find('h2', class_='post-title').string
2133
        author = post.find('a', rel='author').string
2134
        date_str = post.find('span', class_='post-date').string
2135
        day = string_to_date(date_str, "%B %d, %Y")
2136
        assert all(i['alt'] == i['title'] for i in imgs)
2137
        return {
2138
            'title': title,
2139
            'img': [i['src'] for i in imgs],
2140
            'alt': ''.join(i['alt'] for i in imgs),
2141
            'month': day.month,
2142
            'year': day.year,
2143
            'day': day.day,
2144
            'author': author,
2145
        }
2146
2147
2148
class FatAwesomeComics(GenericNavigableComic):
2149
    """Class to retrieve Fat Awesome Comics."""
2150
    # Also on http://fatawesomecomedy.tumblr.com
2151
    name = 'fatawesome'
2152
    long_name = 'Fat Awesome'
2153
    url = 'http://fatawesome.com/comics'
2154
    get_navi_link = get_a_rel_next
2155
    get_first_comic_link = simulate_first_link
2156
    first_url = 'http://fatawesome.com/shortbus/'
2157
2158
    @classmethod
2159
    def get_comic_info(cls, soup, link):
2160
        """Get information about a particular comics."""
2161
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2162
        description = soup.find('meta', attrs={'name': 'description'})['content']
2163
        tags_prop = soup.find('meta', property='article:tag')
2164
        tags = tags_prop['content'] if tags_prop else ""
2165
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2166
        day = string_to_date(date_str, "%Y-%m-%d")
2167
        imgs = soup.find_all('img', attrs={'data-recalc-dims': "1"})
2168
        assert len(imgs) == 1
2169
        return {
2170
            'title': title,
2171
            'description': description,
2172
            'tags': tags,
2173
            'alt': "".join(i['alt'] for i in imgs),
2174
            'img': [i['src'].rsplit('?', 1)[0] for i in imgs],
2175
            'month': day.month,
2176
            'year': day.year,
2177
            'day': day.day,
2178
        }
2179
2180
2181
class AnythingComic(GenericListableComic):
2182
    """Class to retrieve Anything Comics."""
2183
    # Also on http://tapastic.com/series/anything
2184
    name = 'anythingcomic'
2185
    long_name = 'Anything Comic'
2186
    url = 'http://www.anythingcomic.com'
2187
2188
    @classmethod
2189
    def get_archive_elements(cls):
2190
        archive_url = urljoin_wrapper(cls.url, 'archive/')
2191
        # The first 2 <tr>'s do not correspond to comics
2192
        return get_soup_at_url(archive_url).find('table', id='chapter_table').find_all('tr')[2:]
2193
2194
    @classmethod
2195
    def get_url_from_archive_element(cls, tr):
2196
        """Get url corresponding to an archive element."""
2197
        td_num, td_comic, td_date, _ = tr.find_all('td')
2198
        link = td_comic.find('a')
2199
        return urljoin_wrapper(cls.url, link['href'])
2200
2201
    @classmethod
2202
    def get_comic_info(cls, soup, tr):
2203
        """Get information about a particular comics."""
2204
        td_num, td_comic, td_date, _ = tr.find_all('td')
2205
        num = int(td_num.string)
2206
        link = td_comic.find('a')
2207
        title = link.string
2208
        imgs = soup.find_all('img', id='comic_image')
2209
        day = string_to_date(td_date.string, '%d %b %Y %I:%M %p')
2210
        assert len(imgs) == 1
2211
        assert all(i.get('alt') == i.get('title') for i in imgs)
2212
        return {
2213
            'num': num,
2214
            'title': title,
2215
            'alt': imgs[0].get('alt', ''),
2216
            'img': [i['src'] for i in imgs],
2217
            'month': day.month,
2218
            'year': day.year,
2219
            'day': day.day,
2220
        }
2221
2222
2223 View Code Duplication
class LonnieMillsap(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2224
    """Class to retrieve Lonnie Millsap's comics."""
2225
    name = 'millsap'
2226
    long_name = 'Lonnie Millsap'
2227
    url = 'http://www.lonniemillsap.com'
2228
    get_navi_link = get_link_rel_next
2229
    get_first_comic_link = simulate_first_link
2230
    first_url = 'http://www.lonniemillsap.com/?p=42'
2231
2232
    @classmethod
2233
    def get_comic_info(cls, soup, link):
2234
        """Get information about a particular comics."""
2235
        title = soup.find('h2', class_='post-title').string
2236
        post = soup.find('div', class_='post-content')
2237
        author = post.find("span", class_="post-author").find("a").string
2238
        date_str = post.find("span", class_="post-date").string
2239
        day = string_to_date(date_str, "%B %d, %Y")
2240
        imgs = post.find("div", class_="entry").find_all("img")
2241
        return {
2242
            'title': title,
2243
            'author': author,
2244
            'img': [i['src'] for i in imgs],
2245
            'month': day.month,
2246
            'year': day.year,
2247
            'day': day.day,
2248
        }
2249
2250
2251 View Code Duplication
class LinsEditions(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2252
    """Class to retrieve L.I.N.S. Editions comics."""
2253
    # Also on http://linscomics.tumblr.com
2254
    name = 'lins'
2255
    long_name = 'L.I.N.S. Editions'
2256
    url = 'https://linsedition.com'
2257
    get_navi_link = get_link_rel_next
2258
    get_first_comic_link = simulate_first_link
2259
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2260
2261
    @classmethod
2262
    def get_comic_info(cls, soup, link):
2263
        """Get information about a particular comics."""
2264
        title = soup.find('meta', property='og:title')['content']
2265
        imgs = soup.find_all('meta', property='og:image')
2266
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2267
        day = string_to_date(date_str, "%Y-%m-%d")
2268
        return {
2269
            'title': title,
2270
            'img': [i['content'] for i in imgs],
2271
            'month': day.month,
2272
            'year': day.year,
2273
            'day': day.day,
2274
        }
2275
2276
2277
class ThorsThundershack(GenericNavigableComic):
2278
    """Class to retrieve Thor's Thundershack comics."""
2279
    # Also on http://tapastic.com/series/Thors-Thundershac
2280
    name = 'thor'
2281
    long_name = 'Thor\'s Thundershack'
2282
    url = 'http://www.thorsthundershack.com'
2283
    get_url_from_link = join_cls_url_to_href
2284
2285
    @classmethod
2286
    def get_first_comic_link(cls):
2287
        """Get link to first comics."""
2288
        return get_soup_at_url(cls.url).find('a', class_='first navlink')
2289
2290
    @classmethod
2291
    def get_navi_link(cls, last_soup, next_):
2292
        """Get link to next or previous comic."""
2293
        for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
2294
            if link['href'] != '/comic':
2295
                return link
2296
        return None
2297
2298
    @classmethod
2299
    def get_comic_info(cls, soup, link):
2300
        """Get information about a particular comics."""
2301
        title = soup.find('meta', attrs={'name': 'description'})["content"]
2302
        description = soup.find('div', itemprop='articleBody').text
2303
        author = soup.find('span', itemprop='author copyrightHolder').string
2304
        imgs = soup.find_all('img', itemprop='image')
2305
        assert all(i['title'] == i['alt'] for i in imgs)
2306
        alt = imgs[0]['alt'] if imgs else ""
2307
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
2308
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2309
        return {
2310
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2311
            'month': day.month,
2312
            'year': day.year,
2313
            'day': day.day,
2314
            'author': author,
2315
            'title': title,
2316
            'alt': alt,
2317
            'description': description,
2318
        }
2319
2320
2321 View Code Duplication
class GerbilWithAJetpack(GenericNavigableComic):
2322
    """Class to retrieve GerbilWithAJetpack comics."""
2323
    name = 'gerbil'
2324
    long_name = 'Gerbil With A Jetpack'
2325
    url = 'http://gerbilwithajetpack.com'
2326
    get_first_comic_link = get_a_navi_navifirst
2327
    get_navi_link = get_a_rel_next
2328
2329
    @classmethod
2330
    def get_comic_info(cls, soup, link):
2331
        """Get information about a particular comics."""
2332
        title = soup.find('h2', class_='post-title').string
2333
        author = soup.find("span", class_="post-author").find("a").string
2334
        date_str = soup.find("span", class_="post-date").string
2335
        day = string_to_date(date_str, "%B %d, %Y")
2336
        imgs = soup.find("div", id="comic").find_all("img")
2337
        alt = imgs[0]['alt']
2338
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2339
        return {
2340
            'img': [i['src'] for i in imgs],
2341
            'title': title,
2342
            'alt': alt,
2343
            'author': author,
2344
            'day': day.day,
2345
            'month': day.month,
2346
            'year': day.year
2347
        }
2348
2349
2350 View Code Duplication
class EveryDayBlues(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2351
    """Class to retrieve EveryDayBlues Comics."""
2352
    name = "blues"
2353
    long_name = "Every Day Blues"
2354
    url = "http://everydayblues.net"
2355
    get_first_comic_link = get_a_navi_navifirst
2356
    get_navi_link = get_link_rel_next
2357
2358
    @classmethod
2359
    def get_comic_info(cls, soup, link):
2360
        """Get information about a particular comics."""
2361
        title = soup.find("h2", class_="post-title").string
2362
        author = soup.find("span", class_="post-author").find("a").string
2363
        date_str = soup.find("span", class_="post-date").string
2364
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2365
        imgs = soup.find("div", id="comic").find_all("img")
2366
        assert all(i['alt'] == i['title'] == title for i in imgs)
2367
        assert len(imgs) <= 1
2368
        return {
2369
            'img': [i['src'] for i in imgs],
2370
            'title': title,
2371
            'author': author,
2372
            'day': day.day,
2373
            'month': day.month,
2374
            'year': day.year
2375
        }
2376
2377
2378 View Code Duplication
class BiterComics(GenericNavigableComic):
2379
    """Class to retrieve Biter Comics."""
2380
    name = "biter"
2381
    long_name = "Biter Comics"
2382
    url = "http://www.bitercomics.com"
2383
    get_first_comic_link = get_a_navi_navifirst
2384
    get_navi_link = get_link_rel_next
2385
2386
    @classmethod
2387
    def get_comic_info(cls, soup, link):
2388
        """Get information about a particular comics."""
2389
        title = soup.find("h1", class_="entry-title").string
2390
        author = soup.find("span", class_="author vcard").find("a").string
2391
        date_str = soup.find("span", class_="entry-date").string
2392
        day = string_to_date(date_str, "%B %d, %Y")
2393
        imgs = soup.find("div", id="comic").find_all("img")
2394
        assert all(i['alt'] == i['title'] for i in imgs)
2395
        assert len(imgs) == 1
2396
        alt = imgs[0]['alt']
2397
        return {
2398
            'img': [i['src'] for i in imgs],
2399
            'title': title,
2400
            'alt': alt,
2401
            'author': author,
2402
            'day': day.day,
2403
            'month': day.month,
2404
            'year': day.year
2405
        }
2406
2407
2408 View Code Duplication
class TheAwkwardYeti(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2409
    """Class to retrieve The Awkward Yeti comics."""
2410
    # Also on http://www.gocomics.com/the-awkward-yeti
2411
    # Also on http://larstheyeti.tumblr.com
2412
    # Also on https://tapastic.com/series/TheAwkwardYeti
2413
    name = 'yeti'
2414
    long_name = 'The Awkward Yeti'
2415
    url = 'http://theawkwardyeti.com'
2416
    get_first_comic_link = get_a_navi_navifirst
2417
    get_navi_link = get_link_rel_next
2418
2419
    @classmethod
2420
    def get_comic_info(cls, soup, link):
2421
        """Get information about a particular comics."""
2422
        title = soup.find('h2', class_='post-title').string
2423
        date_str = soup.find("span", class_="post-date").string
2424
        day = string_to_date(date_str, "%B %d, %Y")
2425
        imgs = soup.find("div", id="comic").find_all("img")
2426
        assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
2427
        return {
2428
            'img': [i['src'] for i in imgs],
2429
            'title': title,
2430
            'day': day.day,
2431
            'month': day.month,
2432
            'year': day.year
2433
        }
2434
2435
2436
class PleasantThoughts(GenericNavigableComic):
2437
    """Class to retrieve Pleasant Thoughts comics."""
2438
    name = 'pleasant'
2439
    long_name = 'Pleasant Thoughts'
2440
    url = 'http://pleasant-thoughts.com'
2441
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2442
    get_navi_link = get_link_rel_next
2443
2444
    @classmethod
2445
    def get_comic_info(cls, soup, link):
2446
        """Get information about a particular comics."""
2447
        post = soup.find('div', class_='post-content')
2448
        title = post.find('h2', class_='post-title').string
2449
        imgs = post.find("div", class_="entry").find_all("img")
2450
        return {
2451
            'title': title,
2452
            'img': [i['src'] for i in imgs],
2453
        }
2454
2455
2456
class MisterAndMe(GenericNavigableComic):
2457
    """Class to retrieve Mister & Me Comics."""
2458
    # Also on http://www.gocomics.com/mister-and-me
2459
    # Also on https://tapastic.com/series/Mister-and-Me
2460
    name = 'mister'
2461
    long_name = 'Mister & Me'
2462
    url = 'http://www.mister-and-me.com'
2463
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2464
    get_navi_link = get_link_rel_next
2465
2466
    @classmethod
2467
    def get_comic_info(cls, soup, link):
2468
        """Get information about a particular comics."""
2469
        title = soup.find('h2', class_='post-title').string
2470
        author = soup.find("span", class_="post-author").find("a").string
2471
        date_str = soup.find("span", class_="post-date").string
2472
        day = string_to_date(date_str, "%B %d, %Y")
2473
        imgs = soup.find("div", id="comic").find_all("img")
2474
        assert all(i['alt'] == i['title'] for i in imgs)
2475
        assert len(imgs) <= 1
2476
        alt = imgs[0]['alt'] if imgs else ""
2477
        return {
2478
            'img': [i['src'] for i in imgs],
2479
            'title': title,
2480
            'alt': alt,
2481
            'author': author,
2482
            'day': day.day,
2483
            'month': day.month,
2484
            'year': day.year
2485
        }
2486
2487
2488 View Code Duplication
class LastPlaceComics(GenericNavigableComic):
2489
    """Class to retrieve Last Place Comics."""
2490
    name = 'lastplace'
2491
    long_name = 'Last Place Comics'
2492
    url = "http://lastplacecomics.com"
2493
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2494
    get_navi_link = get_link_rel_next
2495
2496
    @classmethod
2497
    def get_comic_info(cls, soup, link):
2498
        """Get information about a particular comics."""
2499
        title = soup.find('h2', class_='post-title').string
2500
        author = soup.find("span", class_="post-author").find("a").string
2501
        date_str = soup.find("span", class_="post-date").string
2502
        day = string_to_date(date_str, "%B %d, %Y")
2503
        imgs = soup.find("div", id="comic").find_all("img")
2504
        assert all(i['alt'] == i['title'] for i in imgs)
2505
        assert len(imgs) <= 1
2506
        alt = imgs[0]['alt'] if imgs else ""
2507
        return {
2508
            'img': [i['src'] for i in imgs],
2509
            'title': title,
2510
            'alt': alt,
2511
            'author': author,
2512
            'day': day.day,
2513
            'month': day.month,
2514
            'year': day.year
2515
        }
2516
2517
2518 View Code Duplication
class TalesOfAbsurdity(GenericNavigableComic):
2519
    """Class to retrieve Tales Of Absurdity comics."""
2520
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
2521
    # Also on http://talesofabsurdity.tumblr.com
2522
    name = 'absurdity'
2523
    long_name = 'Tales of Absurdity'
2524
    url = 'http://talesofabsurdity.com'
2525
    get_first_comic_link = get_a_navi_navifirst
2526
    get_navi_link = get_a_navi_comicnavnext_navinext
2527
2528
    @classmethod
2529
    def get_comic_info(cls, soup, link):
2530
        """Get information about a particular comics."""
2531
        title = soup.find('h2', class_='post-title').string
2532
        author = soup.find("span", class_="post-author").find("a").string
2533
        date_str = soup.find("span", class_="post-date").string
2534
        day = string_to_date(date_str, "%B %d, %Y")
2535
        imgs = soup.find("div", id="comic").find_all("img")
2536
        assert all(i['alt'] == i['title'] for i in imgs)
2537
        alt = imgs[0]['alt'] if imgs else ""
2538
        return {
2539
            'img': [i['src'] for i in imgs],
2540
            'title': title,
2541
            'alt': alt,
2542
            'author': author,
2543
            'day': day.day,
2544
            'month': day.month,
2545
            'year': day.year
2546
        }
2547
2548
2549
class EndlessOrigami(GenericNavigableComic):
2550
    """Class to retrieve Endless Origami Comics."""
2551
    name = "origami"
2552
    long_name = "Endless Origami"
2553
    url = "http://endlessorigami.com"
2554
    get_first_comic_link = get_a_navi_navifirst
2555
    get_navi_link = get_link_rel_next
2556
2557
    @classmethod
2558
    def get_comic_info(cls, soup, link):
2559
        """Get information about a particular comics."""
2560
        title = soup.find('h2', class_='post-title').string
2561
        author = soup.find("span", class_="post-author").find("a").string
2562
        date_str = soup.find("span", class_="post-date").string
2563
        day = string_to_date(date_str, "%B %d, %Y")
2564
        imgs = soup.find("div", id="comic").find_all("img")
2565
        assert all(i['alt'] == i['title'] for i in imgs)
2566
        alt = imgs[0]['alt'] if imgs else ""
2567
        return {
2568
            'img': [i['src'] for i in imgs],
2569
            'title': title,
2570
            'alt': alt,
2571
            'author': author,
2572
            'day': day.day,
2573
            'month': day.month,
2574
            'year': day.year
2575
        }
2576
2577
2578
class PlanC(GenericNavigableComic):
2579
    """Class to retrieve Plan C comics."""
2580
    name = 'planc'
2581
    long_name = 'Plan C'
2582
    url = 'http://www.plancomic.com'
2583
    get_first_comic_link = get_a_navi_navifirst
2584
    get_navi_link = get_a_navi_comicnavnext_navinext
2585
2586
    @classmethod
2587
    def get_comic_info(cls, soup, link):
2588
        """Get information about a particular comics."""
2589
        title = soup.find('h2', class_='post-title').string
2590
        date_str = soup.find("span", class_="post-date").string
2591
        day = string_to_date(date_str, "%B %d, %Y")
2592
        imgs = soup.find('div', id='comic').find_all('img')
2593
        return {
2594
            'title': title,
2595
            'img': [i['src'] for i in imgs],
2596
            'month': day.month,
2597
            'year': day.year,
2598
            'day': day.day,
2599
        }
2600 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2601
2602
class BuniComic(GenericNavigableComic):
2603
    """Class to retrieve Buni Comics."""
2604
    name = 'buni'
2605
    long_name = 'BuniComics'
2606
    url = 'http://www.bunicomic.com'
2607
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2608
    get_navi_link = get_link_rel_next
2609
2610
    @classmethod
2611
    def get_comic_info(cls, soup, link):
2612
        """Get information about a particular comics."""
2613
        imgs = soup.find('div', id='comic').find_all('img')
2614
        assert all(i['alt'] == i['title'] for i in imgs)
2615
        assert len(imgs) == 1
2616
        return {
2617
            'img': [i['src'] for i in imgs],
2618
            'title': imgs[0]['title'],
2619
        }
2620
2621
2622
class GenericCommitStrip(GenericNavigableComic):
2623
    """Generic class to retrieve Commit Strips in different languages."""
2624
    get_navi_link = get_a_rel_next
2625
    get_first_comic_link = simulate_first_link
2626
    first_url = NotImplemented
2627
2628 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2629
    def get_comic_info(cls, soup, link):
2630
        """Get information about a particular comics."""
2631
        desc = soup.find('meta', property='og:description')['content']
2632
        title = soup.find('meta', property='og:title')['content']
2633
        imgs = soup.find('div', class_='entry-content').find_all('img')
2634
        title2 = ' '.join(i.get('title', '') for i in imgs)
2635
        return {
2636
            'title': title,
2637
            'title2': title2,
2638
            'description': desc,
2639
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2640
        }
2641
2642
2643
class CommitStripFr(GenericCommitStrip):
2644
    """Class to retrieve Commit Strips in French."""
2645
    name = 'commit_fr'
2646
    long_name = 'Commit Strip (Fr)'
2647
    url = 'http://www.commitstrip.com/fr'
2648
    first_url = 'http://www.commitstrip.com/fr/2012/02/22/interview/'
2649
2650
2651
class CommitStripEn(GenericCommitStrip):
2652
    """Class to retrieve Commit Strips in English."""
2653
    name = 'commit_en'
2654
    long_name = 'Commit Strip (En)'
2655
    url = 'http://www.commitstrip.com/en'
2656
    first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'
2657
2658
2659 View Code Duplication
class GenericBoumerie(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2660
    """Generic class to retrieve Boumeries comics in different languages."""
2661
    get_first_comic_link = get_a_navi_navifirst
2662
    get_navi_link = get_link_rel_next
2663
    date_format = NotImplemented
2664
    lang = NotImplemented
2665
2666
    @classmethod
2667
    def get_comic_info(cls, soup, link):
2668
        """Get information about a particular comics."""
2669
        title = soup.find('h2', class_='post-title').string
2670
        short_url = soup.find('link', rel='shortlink')['href']
2671
        author = soup.find("span", class_="post-author").find("a").string
2672
        date_str = soup.find('span', class_='post-date').string
2673
        day = string_to_date(date_str, cls.date_format, cls.lang)
2674
        imgs = soup.find('div', id='comic').find_all('img')
2675
        assert all(i['alt'] == i['title'] for i in imgs)
2676
        return {
2677
            'short_url': short_url,
2678
            'img': [i['src'] for i in imgs],
2679
            'title': title,
2680
            'author': author,
2681
            'month': day.month,
2682
            'year': day.year,
2683
            'day': day.day,
2684
        }
2685
2686
2687
class BoumerieEn(GenericBoumerie):
2688
    """Class to retrieve Boumeries comics in English."""
2689
    name = 'boumeries_en'
2690
    long_name = 'Boumeries (En)'
2691
    url = 'http://comics.boumerie.com'
2692
    date_format = "%B %d, %Y"
2693
    lang = 'en_GB.UTF-8'
2694
2695
2696
class BoumerieFr(GenericBoumerie):
2697
    """Class to retrieve Boumeries comics in French."""
2698
    name = 'boumeries_fr'
2699
    long_name = 'Boumeries (Fr)'
2700
    url = 'http://bd.boumerie.com'
2701
    date_format = "%A, %d %B %Y"
2702
    lang = "fr_FR.utf8"
2703
2704
2705 View Code Duplication
class UnearthedComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2706
    """Class to retrieve Unearthed comics."""
2707
    # Also on http://tapastic.com/series/UnearthedComics
2708
    # Also on http://unearthedcomics.tumblr.com
2709
    name = 'unearthed'
2710
    long_name = 'Unearthed Comics'
2711
    url = 'http://unearthedcomics.com'
2712
    get_navi_link = get_link_rel_next
2713
    get_first_comic_link = simulate_first_link
2714
    first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'
2715
2716
    @classmethod
2717
    def get_comic_info(cls, soup, link):
2718
        """Get information about a particular comics."""
2719
        short_url = soup.find('link', rel='shortlink')['href']
2720
        title_elt = soup.find('h1') or soup.find('h2')
2721
        title = title_elt.string if title_elt else ""
2722
        desc = soup.find('meta', property='og:description')
2723
        date_str = soup.find('time', class_='published updated hidden')['datetime']
2724
        day = string_to_date(date_str, "%Y-%m-%d")
2725
        post = soup.find('div', class_="entry content entry-content type-portfolio")
2726
        imgs = post.find_all('img')
2727
        return {
2728
            'title': title,
2729
            'description': desc,
2730
            'url2': short_url,
2731
            'img': [i['src'] for i in imgs],
2732
            'month': day.month,
2733
            'year': day.year,
2734
            'day': day.day,
2735
        }
2736
2737
2738 View Code Duplication
class Optipess(GenericNavigableComic):
2739
    """Class to retrieve Optipess comics."""
2740
    name = 'optipess'
2741
    long_name = 'Optipess'
2742
    url = 'http://www.optipess.com'
2743
    get_first_comic_link = get_a_navi_navifirst
2744
    get_navi_link = get_link_rel_next
2745
2746
    @classmethod
2747
    def get_comic_info(cls, soup, link):
2748
        """Get information about a particular comics."""
2749
        title = soup.find('h2', class_='post-title').string
2750
        author = soup.find("span", class_="post-author").find("a").string
2751
        comic = soup.find('div', id='comic')
2752
        imgs = comic.find_all('img') if comic else []
2753
        alt = imgs[0]['title'] if imgs else ""
2754
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2755
        date_str = soup.find('span', class_='post-date').string
2756
        day = string_to_date(date_str, "%B %d, %Y")
2757
        return {
2758
            'title': title,
2759
            'alt': alt,
2760
            'author': author,
2761
            'img': [i['src'] for i in imgs],
2762
            'month': day.month,
2763
            'year': day.year,
2764
            'day': day.day,
2765
        }
2766
2767
2768
class PainTrainComic(GenericNavigableComic):
2769
    """Class to retrieve Pain Train Comics."""
2770
    name = 'paintrain'
2771
    long_name = 'Pain Train Comics'
2772
    url = 'http://paintraincomic.com'
2773
    get_first_comic_link = get_a_navi_navifirst
2774
    get_navi_link = get_link_rel_next
2775
2776
    @classmethod
2777
    def get_comic_info(cls, soup, link):
2778
        """Get information about a particular comics."""
2779
        title = soup.find('h2', class_='post-title').string
2780
        short_url = soup.find('link', rel='shortlink')['href']
2781
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2782
        num = int(short_url_re.match(short_url).groups()[0])
2783
        imgs = soup.find('div', id='comic').find_all('img')
2784
        alt = imgs[0]['title']
2785
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2786
        date_str = soup.find('span', class_='post-date').string
2787
        day = string_to_date(date_str, "%d/%m/%Y")
2788
        return {
2789
            'short_url': short_url,
2790
            'num': num,
2791
            'img': [i['src'] for i in imgs],
2792
            'month': day.month,
2793
            'year': day.year,
2794
            'day': day.day,
2795
            'alt': alt,
2796
            'title': title,
2797
        }
2798
2799
2800
class MoonBeard(GenericNavigableComic):
2801
    """Class to retrieve MoonBeard comics."""
2802
    # Also on http://blog.squiresjam.es/moonbeard
2803
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
2804
    name = 'moonbeard'
2805
    long_name = 'Moon Beard'
2806
    url = 'http://moonbeard.com'
2807
    get_first_comic_link = get_a_navi_navifirst
2808
    get_navi_link = get_a_navi_navinext
2809
2810
    @classmethod
2811
    def get_comic_info(cls, soup, link):
2812
        """Get information about a particular comics."""
2813
        title = soup.find('h2', class_='post-title').string
2814
        short_url = soup.find('link', rel='shortlink')['href']
2815
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2816
        num = int(short_url_re.match(short_url).groups()[0])
2817
        imgs = soup.find('div', id='comic').find_all('img')
2818
        alt = imgs[0]['title']
2819
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2820
        date_str = soup.find('span', class_='post-date').string
2821
        day = string_to_date(date_str, "%B %d, %Y")
2822
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2823
        author = soup.find('span', class_='post-author').string
2824
        return {
2825
            'short_url': short_url,
2826
            'num': num,
2827
            'img': [i['src'] for i in imgs],
2828
            'month': day.month,
2829
            'year': day.year,
2830
            'day': day.day,
2831
            'title': title,
2832
            'tags': tags,
2833
            'alt': alt,
2834
            'author': author,
2835
        }
2836
2837
2838
class AHamADay(GenericNavigableComic):
2839
    """Class to retrieve class A Ham A Day comics."""
2840
    name = 'ham'
2841
    long_name = 'A Ham A Day'
2842
    url = 'http://www.ahammaday.com'
2843
    get_url_from_link = join_cls_url_to_href
2844
    get_first_comic_link = simulate_first_link
2845
    first_url = 'http://www.ahammaday.com/today/3/6/french'
2846
2847
    @classmethod
2848
    def get_navi_link(cls, last_soup, next_):
2849
        """Get link to next or previous comic."""
2850
        # prev is next / next is prev
2851
        return last_soup.find('li', class_='previous' if next_ else 'next').find('a')
2852
2853 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2854
    def get_comic_info(cls, soup, link):
2855
        """Get information about a particular comics."""
2856
        date_str = soup.find('time', class_='published')['datetime']
2857
        day = string_to_date(date_str, "%Y-%m-%d")
2858
        author = soup.find('span', class_='blog-author').find('a').string
2859
        title = soup.find('meta', property='og:title')['content']
2860
        imgs = soup.find_all('meta', itemprop='image')
2861
        return {
2862
            'img': [i['content'] for i in imgs],
2863
            'title': title,
2864
            'author': author,
2865
            'day': day.day,
2866
            'month': day.month,
2867
            'year': day.year,
2868
        }
2869 View Code Duplication
2870
2871
class LittleLifeLines(GenericNavigableComic):
2872
    """Class to retrieve Little Life Lines comics."""
2873
    name = 'life'
2874
    long_name = 'Little Life Lines'
2875
    url = 'http://www.littlelifelines.com'
2876
    get_url_from_link = join_cls_url_to_href
2877
    get_first_comic_link = simulate_first_link
2878
    first_url = 'http://www.littlelifelines.com/comics/well-done'
2879
2880
    @classmethod
2881
    def get_navi_link(cls, last_soup, next_):
2882
        """Get link to next or previous comic."""
2883
        # prev is next / next is prev
2884
        li = last_soup.find('li', class_='prev' if next_ else 'next')
2885
        return li.find('a') if li else None
2886
2887
    @classmethod
2888
    def get_comic_info(cls, soup, link):
2889
        """Get information about a particular comics."""
2890
        title = soup.find('meta', property='og:title')['content']
2891
        desc = soup.find('meta', property='og:description')['content']
2892
        date_str = soup.find('time', class_='published')['datetime']
2893
        day = string_to_date(date_str, "%Y-%m-%d")
2894
        author = soup.find('a', rel='author').string
2895
        div_content = soup.find('div', class_="body entry-content")
2896
        imgs = div_content.find_all('img')
2897
        imgs = [i for i in imgs if i.get('src') is not None]
2898
        alt = imgs[0]['alt']
2899
        return {
2900
            'title': title,
2901
            'alt': alt,
2902
            'description': desc,
2903
            'author': author,
2904
            'day': day.day,
2905
            'month': day.month,
2906
            'year': day.year,
2907
            'img': [i['src'] for i in imgs],
2908
        }
2909
2910
2911
class GenericWordPressInkblot(GenericNavigableComic):
2912
    """Generic class to retrieve comics using WordPress with Inkblot."""
2913
    get_navi_link = get_link_rel_next
2914
2915
    @classmethod
2916
    def get_first_comic_link(cls):
2917
        """Get link to first comics."""
2918
        return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')
2919
2920 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2921
    def get_comic_info(cls, soup, link):
2922
        """Get information about a particular comics."""
2923
        title = soup.find('meta', property='og:title')['content']
2924
        imgs = soup.find('div', class_='webcomic-image').find_all('img')
2925
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2926
        day = string_to_date(date_str, "%Y-%m-%d")
2927
        return {
2928
            'title': title,
2929
            'day': day.day,
2930
            'month': day.month,
2931
            'year': day.year,
2932
            'img': [i['src'] for i in imgs],
2933
        }
2934
2935
2936
class EverythingsStupid(GenericWordPressInkblot):
2937
    """Class to retrieve Everything's stupid Comics."""
2938
    # Also on http://tapastic.com/series/EverythingsStupid
2939
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
2940
    # Also on http://everythingsstupidcomics.tumblr.com
2941
    name = 'stupid'
2942
    long_name = "Everything's Stupid"
2943
    url = 'http://everythingsstupid.net'
2944
2945
2946
class TheIsmComics(GenericWordPressInkblot):
2947
    """Class to retrieve The Ism Comics."""
2948
    # Also on https://tapastic.com/series/TheIsm (?)
2949
    name = 'theism'
2950
    long_name = "The Ism"
2951
    url = 'http://www.theism-comics.com'
2952
2953
2954
class WoodenPlankStudios(GenericWordPressInkblot):
2955
    """Class to retrieve Wooden Plank Studios comics."""
2956
    name = 'woodenplank'
2957
    long_name = 'Wooden Plank Studios'
2958
    url = 'http://woodenplankstudios.com'
2959
2960
2961
class ElectricBunnyComic(GenericNavigableComic):
2962
    """Class to retrieve Electric Bunny Comics."""
2963
    # Also on http://electricbunnycomics.tumblr.com
2964
    name = 'bunny'
2965
    long_name = 'Electric Bunny Comic'
2966
    url = 'http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell'
2967
    get_url_from_link = join_cls_url_to_href
2968
2969
    @classmethod
2970
    def get_first_comic_link(cls):
2971
        """Get link to first comics."""
2972
        return get_soup_at_url(cls.url).find('img', alt='First').parent
2973
2974
    @classmethod
2975
    def get_navi_link(cls, last_soup, next_):
2976
        """Get link to next or previous comic."""
2977
        img = last_soup.find('img', alt='Next' if next_ else 'Back')
2978
        return img.parent if img else None
2979
2980
    @classmethod
2981
    def get_comic_info(cls, soup, link):
2982
        """Get information about a particular comics."""
2983
        title = soup.find('meta', property='og:title')['content']
2984
        imgs = soup.find_all('meta', property='og:image')
2985
        return {
2986
            'title': title,
2987
            'img': [i['content'] for i in imgs],
2988
        }
2989
2990
2991
class SheldonComics(GenericNavigableComic):
2992
    """Class to retrieve Sheldon comics."""
2993
    # Also on http://www.gocomics.com/sheldon
2994
    name = 'sheldon'
2995
    long_name = 'Sheldon Comics'
2996
    url = 'http://www.sheldoncomics.com'
2997
2998
    @classmethod
2999
    def get_first_comic_link(cls):
3000
        """Get link to first comics."""
3001
        return get_soup_at_url(cls.url).find("a", id="nav-first")
3002
3003
    @classmethod
3004
    def get_navi_link(cls, last_soup, next_):
3005
        """Get link to next or previous comic."""
3006
        for link in last_soup.find_all("a", id="nav-next" if next_ else "nav-prev"):
3007
            if link['href'] != 'http://www.sheldoncomics.com':
3008
                return link
3009
        return None
3010
3011
    @classmethod
3012
    def get_comic_info(cls, soup, link):
3013
        """Get information about a particular comics."""
3014
        imgs = soup.find("div", id="comic-foot").find_all("img")
3015
        assert all(i['alt'] == i['title'] for i in imgs)
3016
        assert len(imgs) == 1
3017
        title = imgs[0]['title']
3018
        return {
3019
            'title': title,
3020
            'img': [i['src'] for i in imgs],
3021
        }
3022
3023
3024
class CubeDrone(GenericNavigableComic):
3025
    """Class to retrieve Cube Drone comics."""
3026
    name = 'cubedrone'
3027
    long_name = 'Cube Drone'
3028
    url = 'http://cube-drone.com/comics'
3029
    get_url_from_link = join_cls_url_to_href
3030
3031
    @classmethod
3032
    def get_first_comic_link(cls):
3033
        """Get link to first comics."""
3034
        return get_soup_at_url(cls.url).find('span', class_='glyphicon glyphicon-backward').parent
3035
3036
    @classmethod
3037
    def get_navi_link(cls, last_soup, next_):
3038
        """Get link to next or previous comic."""
3039
        class_ = 'glyphicon glyphicon-chevron-' + ('right' if next_ else 'left')
3040
        return last_soup.find('span', class_=class_).parent
3041
3042
    @classmethod
3043
    def get_comic_info(cls, soup, link):
3044
        """Get information about a particular comics."""
3045
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
3046
        url2 = soup.find('meta', attrs={'name': 'twitter:url'})['content']
3047
        # date_str = soup.find('h2', class_='comic_title').find('small').string
3048
        # day = string_to_date(date_str, "%B %d, %Y, %I:%M %p")
3049
        imgs = soup.find_all('img', class_='comic img-responsive')
3050
        title2 = imgs[0]['title']
3051
        alt = imgs[0]['alt']
3052
        return {
3053
            'url2': url2,
3054
            'title': title,
3055
            'title2': title2,
3056
            'alt': alt,
3057
            'img': [i['src'] for i in imgs],
3058
        }
3059
3060
3061
class MakeItStoopid(GenericNavigableComic):
3062
    """Class to retrieve Make It Stoopid Comics."""
3063
    name = 'stoopid'
3064
    long_name = 'Make it stoopid'
3065
    url = 'http://makeitstoopid.com/comic.php'
3066
3067
    @classmethod
3068
    def get_nav(cls, soup):
3069
        """Get the navigation elements from soup object."""
3070
        cnav = soup.find_all(class_='cnav')
3071
        nav1, nav2 = cnav[:5], cnav[5:]
3072
        assert nav1 == nav2
3073
        # begin, prev, archive, next_, end = nav1
3074
        return [None if i.get('href') is None else i for i in nav1]
3075
3076
    @classmethod
3077
    def get_first_comic_link(cls):
3078
        """Get link to first comics."""
3079
        return cls.get_nav(get_soup_at_url(cls.url))[0]
3080
3081
    @classmethod
3082
    def get_navi_link(cls, last_soup, next_):
3083
        """Get link to next or previous comic."""
3084
        return cls.get_nav(last_soup)[3 if next_ else 1]
3085
3086
    @classmethod
3087
    def get_comic_info(cls, soup, link):
3088
        """Get information about a particular comics."""
3089
        title = link['title']
3090
        imgs = soup.find_all('img', id='comicimg')
3091
        return {
3092
            'title': title,
3093
            'img': [i['src'] for i in imgs],
3094
        }
3095
3096
3097 View Code Duplication
class TuMourrasMoinsBete(GenericNavigableComic):
3098
    """Class to retrieve Tu Mourras Moins Bete comics."""
3099
    name = 'mourrasmoinsbete'
3100
    long_name = 'Tu Mourras Moins Bete'
3101
    url = 'http://tumourrasmoinsbete.blogspot.fr'
3102
    get_first_comic_link = simulate_first_link
3103
    first_url = 'http://tumourrasmoinsbete.blogspot.fr/2008/06/essai.html'
3104
3105
    @classmethod
3106
    def get_navi_link(cls, last_soup, next_):
3107
        """Get link to next or previous comic."""
3108
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3109
3110
    @classmethod
3111
    def get_comic_info(cls, soup, link):
3112
        """Get information about a particular comics."""
3113
        title = soup.find('title').string
3114
        imgs = soup.find('div', itemprop='description articleBody').find_all('img')
3115
        author = soup.find('span', itemprop='author').string
3116
        return {
3117
            'img': [i['src'] for i in imgs],
3118
            'author': author,
3119
            'title': title,
3120
        }
3121
3122
3123
class GeekAndPoke(GenericNavigableComic):
3124
    """Class to retrieve Geek And Poke comics."""
3125
    name = 'geek'
3126
    long_name = 'Geek And Poke'
3127
    url = 'http://geek-and-poke.com'
3128
    get_url_from_link = join_cls_url_to_href
3129
    get_first_comic_link = simulate_first_link
3130
    first_url = 'http://geek-and-poke.com/geekandpoke/2006/8/27/a-new-place-for-a-not-so-old-blog.html'
3131
3132
    @classmethod
3133
    def get_navi_link(cls, last_soup, next_):
3134
        """Get link to next or previous comic."""
3135
        return last_soup.find('a', class_='prev-item' if next_ else 'next-item')
3136
3137
    @classmethod
3138
    def get_comic_info(cls, soup, link):
3139
        """Get information about a particular comics."""
3140
        title = soup.find('meta', property='og:title')['content']
3141
        desc = soup.find('meta', property='og:description')['content']
3142
        date_str = soup.find('time', class_='published')['datetime']
3143
        day = string_to_date(date_str, "%Y-%m-%d")
3144
        author = soup.find('a', rel='author').string
3145
        div_content = (soup.find('div', class_="body entry-content") or
3146
                       soup.find('div', class_="special-content"))
3147
        imgs = div_content.find_all('img')
3148
        imgs = [i for i in imgs if i.get('src') is not None]
3149
        assert all('title' not in i or i['alt'] == i['title'] for i in imgs)
3150
        alt = imgs[0].get('alt', "") if imgs else []
3151
        return {
3152
            'title': title,
3153
            'alt': alt,
3154
            'description': desc,
3155
            'author': author,
3156
            'day': day.day,
3157
            'month': day.month,
3158
            'year': day.year,
3159
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3160
        }
3161
3162
3163 View Code Duplication
class GloryOwlComix(GenericNavigableComic):
3164
    """Class to retrieve Glory Owl comics."""
3165
    name = 'gloryowl'
3166
    long_name = 'Glory Owl'
3167
    url = 'http://gloryowlcomix.blogspot.fr'
3168
    get_first_comic_link = simulate_first_link
3169
    first_url = 'http://gloryowlcomix.blogspot.fr/2013/02/1_7.html'
3170
3171
    @classmethod
3172
    def get_navi_link(cls, last_soup, next_):
3173
        """Get link to next or previous comic."""
3174
        return last_soup.find('a', id='Blog1_blog-pager-newer-link' if next_ else 'Blog1_blog-pager-older-link')
3175
3176
    @classmethod
3177
    def get_comic_info(cls, soup, link):
3178
        """Get information about a particular comics."""
3179
        title = soup.find('title').string
3180
        imgs = soup.find_all('link', rel='image_src')
3181
        author = soup.find('a', rel='author').string
3182
        return {
3183
            'img': [i['href'] for i in imgs],
3184
            'author': author,
3185
            'title': title,
3186
        }
3187
3188
3189
class GenericTumblrV1(GenericComic):
3190
    """Generic class to retrieve comics from Tumblr using the V1 API."""
3191
3192
    @classmethod
3193
    def get_next_comic(cls, last_comic):
3194
        """Generic implementation of get_next_comic for Tumblr comics."""
3195
        for p in cls.get_posts(last_comic):
3196
            comic = cls.get_comic_info(p)
3197
            if comic is not None:
3198
                yield comic
3199
3200
    @classmethod
3201
    def get_url_from_post(cls, post):
3202
        return post['url']
3203
3204
    @classmethod
3205
    def get_api_url(cls):
3206
        return urljoin_wrapper(cls.url, '/api/read/')
3207
3208
    @classmethod
3209
    def get_comic_info(cls, post):
3210
        """Get information about a particular comics."""
3211
        # print(post)
3212
        type_ = post['type']
3213
        if type_ != 'photo':
3214
            # print("Type is %s" % type_)
3215
            return None
3216
        tumblr_id = int(post['id'])
3217
        api_url = cls.get_api_url() + '?id=%d' % (tumblr_id)
3218
        day = datetime.datetime.fromtimestamp(int(post['unix-timestamp'])).date()
3219
        caption = post.find('photo-caption')
3220
        title = caption.string if caption else ""
3221
        tags = ' '.join(t.string for t in post.find_all('tag'))
3222
        # Photos may appear in 'photo' tags and/or straight in the post
3223
        photo_tags = post.find_all('photo')
3224
        if not photo_tags:
3225
            photo_tags = [post]
3226
        # Images are in multiple resolutions - taking the first one
3227
        imgs = [photo.find('photo-url') for photo in photo_tags]
3228
        return {
3229
            'url': cls.get_url_from_post(post),
3230
            'url2': post['url-with-slug'],
3231
            'day': day.day,
3232
            'month': day.month,
3233
            'year': day.year,
3234
            'title': title,
3235
            'tags': tags,
3236
            'img': [i.string for i in imgs],
3237
            'tumblr-id': tumblr_id,
3238
            'api_url': api_url,  # for debug purposes
3239
        }
3240
3241
    @classmethod
3242
    def get_posts(cls, last_comic, nb_post_per_call=10):
3243
        """Get posts using API. nb_post_per_call is max 50.
3244
3245
        Posts are retrieved from newer to older as per the tumblr v1 api
3246
        but are returned in chronological order."""
3247
        waiting_for_url = last_comic['url'] if last_comic else None
3248
        posts_acc = []
3249
        if last_comic is not None:
3250
            # Sometimes, tumblr posts are deleted. We previous post is deleted, we
3251
            # might end up spending a lot of time looking for something that
3252
            # doesn't exist. Failing early and clearly might be a better option.
3253
            last_api_url = last_comic['api_url']
3254
            try:
3255
                get_soup_at_url(last_api_url)
3256
            except urllib.error.HTTPError:
3257
                try:
3258
                    get_soup_at_url(cls.url)
3259
                except urllib.error.HTTPError:
3260
                    print("Did not find previous post nor main url %s" % cls.url)
3261
                else:
3262
                    print("Did not find previous post %s : it might have been deleted" % last_api_url)
3263
                return reversed(posts_acc)
3264
        api_url = cls.get_api_url()
3265
        posts = get_soup_at_url(api_url).find('posts')
3266
        start, total = int(posts['start']), int(posts['total'])
3267
        assert start == 0
3268
        for starting_num in range(0, total, nb_post_per_call):
3269
            api_url2 = api_url + '?start=%d&num=%d' % (starting_num, nb_post_per_call)
3270
            # print(api_url2)
3271
            posts2 = get_soup_at_url(api_url2).find('posts')
3272
            start2, total2 = int(posts2['start']), int(posts2['total'])
3273
            assert starting_num == start2, "%d != %d" % (starting_num, start2)
3274
            # This may happen and should be handled in the future
3275
            assert total == total2, "%d != %d" % (total, total2)
3276
            for p in posts2.find_all('post'):
3277
                if waiting_for_url and waiting_for_url == cls.get_url_from_post(p):
3278
                    return reversed(posts_acc)
3279
                posts_acc.append(p)
3280
        if waiting_for_url is None:
3281
            return reversed(posts_acc)
3282
        print("Did not find %s : there might be a problem" % waiting_for_url)
3283
        return []
3284
3285
3286
class SaturdayMorningBreakfastCerealTumblr(GenericEmptyComic, GenericTumblrV1):
3287
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
3288
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
3289
    # Also on http://www.smbc-comics.com
3290
    name = 'smbc-tumblr'
3291
    long_name = 'Saturday Morning Breakfast Cereal (from Tumblr)'
3292
    url = 'http://smbc-comics.tumblr.com'
3293
3294
3295
class IrwinCardozo(GenericTumblrV1):
3296
    """Class to retrieve Irwin Cardozo Comics."""
3297
    name = 'irwinc'
3298
    long_name = 'Irwin Cardozo'
3299
    url = 'http://irwincardozocomics.tumblr.com'
3300
3301
3302
class AccordingToDevin(GenericTumblrV1):
3303
    """Class to retrieve According To Devin comics."""
3304
    name = 'devin'
3305
    long_name = 'According To Devin'
3306
    url = 'http://accordingtodevin.tumblr.com'
3307
3308
3309
class ItsTheTieTumblr(GenericTumblrV1):
3310
    """Class to retrieve It's the tie comics."""
3311
    # Also on http://itsthetie.com
3312
    # Also on https://tapastic.com/series/itsthetie
3313
    name = 'tie-tumblr'
3314
    long_name = "It's the tie (from Tumblr)"
3315
    url = "http://itsthetie.tumblr.com"
3316
3317
3318
class OctopunsTumblr(GenericTumblrV1):
3319
    """Class to retrieve Octopuns comics."""
3320
    # Also on http://www.octopuns.net
3321
    name = 'octopuns-tumblr'
3322
    long_name = 'Octopuns (from Tumblr)'
3323
    url = 'http://octopuns.tumblr.com'
3324
3325
3326
class PicturesInBoxesTumblr(GenericTumblrV1):
3327
    """Class to retrieve Pictures In Boxes comics."""
3328
    # Also on http://www.picturesinboxes.com
3329
    name = 'picturesinboxes-tumblr'
3330
    long_name = 'Pictures in Boxes (from Tumblr)'
3331
    url = 'http://picturesinboxescomic.tumblr.com'
3332
3333
3334
class TubeyToonsTumblr(GenericTumblrV1):
3335
    """Class to retrieve TubeyToons comics."""
3336
    # Also on http://tapastic.com/series/Tubey-Toons
3337
    # Also on http://tubeytoons.com
3338
    name = 'tubeytoons-tumblr'
3339
    long_name = 'Tubey Toons (from Tumblr)'
3340
    url = 'http://tubeytoons.tumblr.com'
3341
3342
3343
class UnearthedComicsTumblr(GenericTumblrV1):
3344
    """Class to retrieve Unearthed comics."""
3345
    # Also on http://tapastic.com/series/UnearthedComics
3346
    # Also on http://unearthedcomics.com
3347
    name = 'unearthed-tumblr'
3348
    long_name = 'Unearthed Comics (from Tumblr)'
3349
    url = 'http://unearthedcomics.tumblr.com'
3350
3351
3352
class PieComic(GenericTumblrV1):
3353
    """Class to retrieve Pie Comic comics."""
3354
    name = 'pie'
3355
    long_name = 'Pie Comic'
3356
    url = "http://piecomic.tumblr.com"
3357
3358
3359
class MrEthanDiamond(GenericTumblrV1):
3360
    """Class to retrieve Mr Ethan Diamond comics."""
3361
    name = 'diamond'
3362
    long_name = 'Mr Ethan Diamond'
3363
    url = 'http://mrethandiamond.tumblr.com'
3364
3365
3366
class Flocci(GenericTumblrV1):
3367
    """Class to retrieve floccinaucinihilipilification comics."""
3368
    name = 'flocci'
3369
    long_name = 'floccinaucinihilipilification'
3370
    url = "http://floccinaucinihilipilificationa.tumblr.com"
3371
3372
3373
class UpAndOut(GenericTumblrV1):
3374
    """Class to retrieve Up & Out comics."""
3375
    # Also on http://tapastic.com/series/UP-and-OUT
3376
    name = 'upandout'
3377
    long_name = 'Up And Out (from Tumblr)'
3378
    url = 'http://upandoutcomic.tumblr.com'
3379
3380
3381
class Pundemonium(GenericTumblrV1):
3382
    """Class to retrieve Pundemonium comics."""
3383
    name = 'pundemonium'
3384
    long_name = 'Pundemonium'
3385
    url = 'http://monstika.tumblr.com'
3386
3387
3388
class PoorlyDrawnLinesTumblr(GenericEmptyComic, GenericTumblrV1):
3389
    """Class to retrieve Poorly Drawn Lines comics."""
3390
    # Also on http://poorlydrawnlines.com
3391
    name = 'poorlydrawn-tumblr'
3392
    long_name = 'Poorly Drawn Lines (from Tumblr)'
3393
    url = 'http://pdlcomics.tumblr.com'
3394
3395
3396
class PearShapedComics(GenericTumblrV1):
3397
    """Class to retrieve Pear Shaped Comics."""
3398
    name = 'pearshaped'
3399
    long_name = 'Pear-Shaped Comics'
3400
    url = 'http://pearshapedcomics.com'
3401
3402
3403
class PondScumComics(GenericTumblrV1):
3404
    """Class to retrieve Pond Scum Comics."""
3405
    name = 'pond'
3406
    long_name = 'Pond Scum'
3407
    url = 'http://pondscumcomic.tumblr.com'
3408
3409
3410
class MercworksTumblr(GenericTumblrV1):
3411
    """Class to retrieve Mercworks comics."""
3412
    # Also on http://mercworks.net
3413
    name = 'mercworks-tumblr'
3414
    long_name = 'Mercworks (from Tumblr)'
3415
    url = 'http://mercworks.tumblr.com'
3416
3417
3418
class OwlTurdTumblr(GenericEmptyComic, GenericTumblrV1):
3419
    """Class to retrieve Owl Turd comics."""
3420
    # Also on http://tapastic.com/series/Owl-Turd-Comix
3421
    name = 'owlturd-tumblr'
3422
    long_name = 'Owl Turd (from Tumblr)'
3423
    url = 'http://owlturd.com'
3424
3425
3426
class VectorBelly(GenericTumblrV1):
3427
    """Class to retrieve Vector Belly comics."""
3428
    # Also on http://vectorbelly.com
3429
    name = 'vector'
3430
    long_name = 'Vector Belly'
3431
    url = 'http://vectorbelly.tumblr.com'
3432
3433
3434
class GoneIntoRapture(GenericTumblrV1):
3435
    """Class to retrieve Gone Into Rapture comics."""
3436
    # Also on http://goneintorapture.tumblr.com
3437
    # Also on http://tapastic.com/series/Goneintorapture
3438
    name = 'rapture'
3439
    long_name = 'Gone Into Rapture'
3440
    url = 'http://www.goneintorapture.com'
3441
3442
3443
class TheOatmealTumblr(GenericTumblrV1):
3444
    """Class to retrieve The Oatmeal comics."""
3445
    # Also on http://theoatmeal.com
3446
    name = 'oatmeal-tumblr'
3447
    long_name = 'The Oatmeal (from Tumblr)'
3448
    url = 'http://oatmeal.tumblr.com'
3449
3450
3451
class HeckIfIKnowComicsTumblr(GenericTumblrV1):
3452
    """Class to retrieve Heck If I Know Comics."""
3453
    # Also on http://tapastic.com/series/Regular
3454
    name = 'heck-tumblr'
3455
    long_name = 'Heck if I Know comics (from Tumblr)'
3456
    url = 'http://heckifiknowcomics.com'
3457
3458
3459
class MyJetPack(GenericTumblrV1):
3460
    """Class to retrieve My Jet Pack comics."""
3461
    name = 'jetpack'
3462
    long_name = 'My Jet Pack'
3463
    url = 'http://myjetpack.tumblr.com'
3464
3465
3466
class CheerUpEmoKidTumblr(GenericTumblrV1):
3467
    """Class to retrieve CheerUpEmoKid comics."""
3468
    # Also on http://www.cheerupemokid.com
3469
    # Also on http://tapastic.com/series/CUEK
3470
    name = 'cuek-tumblr'
3471
    long_name = 'Cheer Up Emo Kid (from Tumblr)'
3472
    url = 'http://enzocomics.tumblr.com'
3473
3474
3475
class ForLackOfABetterComic(GenericEmptyComic, GenericTumblrV1):
3476
    """Class to retrieve For Lack Of A Better Comics."""
3477
    # Also on http://forlackofabettercomic.com
3478
    name = 'lack'
3479
    long_name = 'For Lack Of A Better Comic'
3480
    url = 'http://forlackofabettercomic.tumblr.com'
3481
3482
3483
class ZenPencilsTumblr(GenericTumblrV1):
3484
    """Class to retrieve ZenPencils comics."""
3485
    # Also on http://zenpencils.com
3486
    # Also on http://www.gocomics.com/zen-pencils
3487
    name = 'zenpencils-tumblr'
3488
    long_name = 'Zen Pencils (from Tumblr)'
3489
    url = 'http://zenpencils.tumblr.com'
3490
3491
3492
class ThreeWordPhraseTumblr(GenericTumblrV1):
3493
    """Class to retrieve Three Word Phrase comics."""
3494
    # Also on http://threewordphrase.com
3495
    name = 'threeword-tumblr'
3496
    long_name = 'Three Word Phrase (from Tumblr)'
3497
    url = 'http://www.threewordphrase.tumblr.com'
3498
3499
3500
class TimeTrabbleTumblr(GenericTumblrV1):
3501
    """Class to retrieve Time Trabble comics."""
3502
    # Also on http://timetrabble.com
3503
    name = 'timetrabble-tumblr'
3504
    long_name = 'Time Trabble (from Tumblr)'
3505
    url = 'http://timetrabble.tumblr.com'
3506
3507
3508
class SafelyEndangeredTumblr(GenericTumblrV1):
3509
    """Class to retrieve Safely Endangered comics."""
3510
    # Also on http://www.safelyendangered.com
3511
    name = 'endangered-tumblr'
3512
    long_name = 'Safely Endangered (from Tumblr)'
3513
    url = 'http://tumblr.safelyendangered.com'
3514
3515
3516
class MouseBearComedyTumblr(GenericTumblrV1):
3517
    """Class to retrieve Mouse Bear Comedy comics."""
3518
    # Also on http://www.mousebearcomedy.com
3519
    name = 'mousebear-tumblr'
3520
    long_name = 'Mouse Bear Comedy (from Tumblr)'
3521
    url = 'http://mousebearcomedy.tumblr.com'
3522
3523
3524
class BouletCorpTumblr(GenericTumblrV1):
3525
    """Class to retrieve BouletCorp comics."""
3526
    # Also on http://www.bouletcorp.com
3527
    name = 'boulet-tumblr'
3528
    long_name = 'Boulet Corp (from Tumblr)'
3529
    url = 'http://bouletcorp.tumblr.com'
3530
3531
3532
class TheAwkwardYetiTumblr(GenericEmptyComic, GenericTumblrV1):
3533
    """Class to retrieve The Awkward Yeti comics."""
3534
    # Also on http://www.gocomics.com/the-awkward-yeti
3535
    # Also on http://theawkwardyeti.com
3536
    # Also on https://tapastic.com/series/TheAwkwardYeti
3537
    name = 'yeti-tumblr'
3538
    long_name = 'The Awkward Yeti (from Tumblr)'
3539
    url = 'http://larstheyeti.tumblr.com'
3540
3541
3542
class NellucNhoj(GenericTumblrV1):
3543
    """Class to retrieve NellucNhoj comics."""
3544
    name = 'nhoj'
3545
    long_name = 'Nelluc Nhoj'
3546
    url = 'http://nellucnhoj.com'
3547
3548
3549
class DownTheUpwardSpiralTumblr(GenericTumblrV1):
3550
    """Class to retrieve Down The Upward Spiral comics."""
3551
    # Also on http://www.downtheupwardspiral.com
3552
    name = 'spiral-tumblr'
3553
    long_name = 'Down the Upward Spiral (from Tumblr)'
3554
    url = 'http://downtheupwardspiral.tumblr.com'
3555
3556
3557
class AsPerUsualTumblr(GenericTumblrV1):
3558
    """Class to retrieve As Per Usual comics."""
3559
    # Also on https://tapastic.com/series/AsPerUsual
3560
    name = 'usual-tumblr'
3561
    long_name = 'As Per Usual (from Tumblr)'
3562
    url = 'http://as-per-usual.tumblr.com'
3563
3564
3565
class OneOneOneOneComicTumblr(GenericTumblrV1):
3566
    """Class to retrieve 1111 Comics."""
3567
    # Also on http://www.1111comics.me
3568
    # Also on https://tapastic.com/series/1111-Comics
3569
    name = '1111-tumblr'
3570
    long_name = '1111 Comics (from Tumblr)'
3571
    url = 'http://comics1111.tumblr.com'
3572
3573
3574
class JhallComicsTumblr(GenericTumblrV1):
3575
    """Class to retrieve Jhall Comics."""
3576
    # Also on http://jhallcomics.com
3577
    name = 'jhall-tumblr'
3578
    long_name = 'Jhall Comics (from Tumblr)'
3579
    url = 'http://jhallcomics.tumblr.com'
3580
3581
3582
class BerkeleyMewsTumblr(GenericTumblrV1):
3583
    """Class to retrieve Berkeley Mews comics."""
3584
    # Also on http://www.gocomics.com/berkeley-mews
3585
    # Also on http://www.berkeleymews.com
3586
    name = 'berkeley-tumblr'
3587
    long_name = 'Berkeley Mews (from Tumblr)'
3588
    url = 'http://mews.tumblr.com'
3589
3590
3591
class JoanCornellaTumblr(GenericTumblrV1):
3592
    """Class to retrieve Joan Cornella comics."""
3593
    # Also on http://joancornella.net
3594
    name = 'cornella-tumblr'
3595
    long_name = 'Joan Cornella (from Tumblr)'
3596
    url = 'http://cornellajoan.tumblr.com'
3597
3598
3599
class RespawnComicTumblr(GenericTumblrV1):
3600
    """Class to retrieve Respawn Comic."""
3601
    # Also on http://respawncomic.com
3602
    name = 'respawn-tumblr'
3603
    long_name = 'Respawn Comic (from Tumblr)'
3604
    url = 'http://respawncomic.tumblr.com'
3605
3606
3607
class ChrisHallbeckTumblr(GenericEmptyComic, GenericTumblrV1):
3608
    """Class to retrieve Chris Hallbeck comics."""
3609
    # Also on https://tapastic.com/ChrisHallbeck
3610
    # Also on http://maximumble.com
3611
    # Also on http://minimumble.com
3612
    # Also on http://thebookofbiff.com
3613
    name = 'hallbeck-tumblr'
3614
    long_name = 'Chris Hallback (from Tumblr)'
3615
    url = 'http://chrishallbeck.tumblr.com'
3616
3617
3618
class ComicNuggets(GenericTumblrV1):
3619
    """Class to retrieve Comic Nuggets."""
3620
    name = 'nuggets'
3621
    long_name = 'Comic Nuggets'
3622
    url = 'http://comicnuggets.com'
3623
3624
3625
class PigeonGazetteTumblr(GenericTumblrV1):
3626
    """Class to retrieve The Pigeon Gazette comics."""
3627
    # Also on https://tapastic.com/series/The-Pigeon-Gazette
3628
    name = 'pigeon-tumblr'
3629
    long_name = 'The Pigeon Gazette (from Tumblr)'
3630
    url = 'http://thepigeongazette.tumblr.com'
3631
3632
3633
class CancerOwl(GenericTumblrV1):
3634
    """Class to retrieve Cancer Owl comics."""
3635
    # Also on http://cancerowl.com
3636
    name = 'cancerowl-tumblr'
3637
    long_name = 'Cancer Owl (from Tumblr)'
3638
    url = 'http://cancerowl.tumblr.com'
3639
3640
3641
class FowlLanguageTumblr(GenericTumblrV1):
3642
    """Class to retrieve Fowl Language comics."""
3643
    # Also on http://www.fowllanguagecomics.com
3644
    # Also on http://tapastic.com/series/Fowl-Language-Comics
3645
    # Also on http://www.gocomics.com/fowl-language
3646
    name = 'fowllanguage-tumblr'
3647
    long_name = 'Fowl Language Comics (from Tumblr)'
3648
    url = 'http://fowllanguagecomics.tumblr.com'
3649
3650
3651
class TheOdd1sOutTumblr(GenericTumblrV1):
3652
    """Class to retrieve The Odd 1s Out comics."""
3653
    # Also on http://theodd1sout.com
3654
    # Also on https://tapastic.com/series/Theodd1sout
3655
    name = 'theodd-tumblr'
3656
    long_name = 'The Odd 1s Out (from Tumblr)'
3657
    url = 'http://theodd1sout.tumblr.com'
3658
3659
3660
class TheUnderfoldTumblr(GenericTumblrV1):
3661
    """Class to retrieve The Underfold comics."""
3662
    # Also on http://theunderfold.com
3663
    name = 'underfold-tumblr'
3664
    long_name = 'The Underfold (from Tumblr)'
3665
    url = 'http://theunderfold.tumblr.com'
3666
3667
3668
class LolNeinTumblr(GenericTumblrV1):
3669
    """Class to retrieve Lol Nein comics."""
3670
    # Also on http://lolnein.com
3671
    name = 'lolnein-tumblr'
3672
    long_name = 'Lol Nein (from Tumblr)'
3673
    url = 'http://lolneincom.tumblr.com'
3674
3675
3676
class FatAwesomeComicsTumblr(GenericTumblrV1):
3677
    """Class to retrieve Fat Awesome Comics."""
3678
    # Also on http://fatawesome.com/comics
3679
    name = 'fatawesome-tumblr'
3680
    long_name = 'Fat Awesome (from Tumblr)'
3681
    url = 'http://fatawesomecomedy.tumblr.com'
3682
3683
3684
class TheWorldIsFlatTumblr(GenericTumblrV1):
3685
    """Class to retrieve The World Is Flat Comics."""
3686
    # Also on https://tapastic.com/series/The-World-is-Flat
3687
    name = 'flatworld-tumblr'
3688
    long_name = 'The World Is Flat (from Tumblr)'
3689
    url = 'http://theworldisflatcomics.tumblr.com'
3690
3691
3692
class DorrisMc(GenericEmptyComic, GenericTumblrV1):
3693
    """Class to retrieve Dorris Mc Comics"""
3694
    # Also on http://www.gocomics.com/dorris-mccomics
3695
    name = 'dorrismc'
3696
    long_name = 'Dorris Mc'
3697
    url = 'http://dorrismccomics.com'
3698
3699
3700
class LeleozTumblr(GenericEmptyComic, GenericTumblrV1):
3701
    """Class to retrieve Leleoz comics."""
3702
    # Also on https://tapastic.com/series/Leleoz
3703
    name = 'leleoz-tumblr'
3704
    long_name = 'Leleoz (from Tumblr)'
3705
    url = 'http://leleozcomics.tumblr.com'
3706
3707
3708
class MoonBeardTumblr(GenericTumblrV1):
3709
    """Class to retrieve MoonBeard comics."""
3710
    # Also on http://moonbeard.com
3711
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
3712
    name = 'moonbeard-tumblr'
3713
    long_name = 'Moon Beard (from Tumblr)'
3714
    url = 'http://blog.squiresjam.es/moonbeard'
3715
3716
3717
class AComik(GenericTumblrV1):
3718
    """Class to retrieve A Comik"""
3719
    name = 'comik'
3720
    long_name = 'A Comik'
3721
    url = 'http://acomik.com'
3722
3723
3724
class ClassicRandy(GenericTumblrV1):
3725
    """Class to retrieve Classic Randy comics."""
3726
    name = 'randy'
3727
    long_name = 'Classic Randy'
3728
    url = 'http://classicrandy.tumblr.com'
3729
3730
3731
class DagssonTumblr(GenericTumblrV1):
3732
    """Class to retrieve Dagsson comics."""
3733
    # Also on http://www.dagsson.com
3734
    name = 'dagsson-tumblr'
3735
    long_name = 'Dagsson Hugleikur (from Tumblr)'
3736
    url = 'http://hugleikurdagsson.tumblr.com'
3737
3738
3739
class LinsEditionsTumblr(GenericTumblrV1):
3740
    """Class to retrieve L.I.N.S. Editions comics."""
3741
    # Also on https://linsedition.com
3742
    name = 'lins-tumblr'
3743
    long_name = 'L.I.N.S. Editions (from Tumblr)'
3744
    url = 'http://linscomics.tumblr.com'
3745
3746
3747
class OrigamiHotDish(GenericTumblrV1):
3748
    """Class to retrieve Origami Hot Dish comics."""
3749
    name = 'origamihotdish'
3750
    long_name = 'Origami Hot Dish'
3751
    url = 'http://origamihotdish.com'
3752
3753
3754
class HitAndMissComicsTumblr(GenericTumblrV1):
3755
    """Class to retrieve Hit and Miss Comics."""
3756
    name = 'hitandmiss'
3757
    long_name = 'Hit and Miss Comics'
3758
    url = 'http://hitandmisscomics.tumblr.com'
3759
3760
3761
class HMBlanc(GenericTumblrV1):
3762
    """Class to retrieve HM Blanc comics."""
3763
    name = 'hmblanc'
3764
    long_name = 'HM Blanc'
3765
    url = 'http://hmblanc.tumblr.com'
3766
3767
3768
class TalesOfAbsurdityTumblr(GenericTumblrV1):
3769
    """Class to retrieve Tales Of Absurdity comics."""
3770
    # Also on http://talesofabsurdity.com
3771
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
3772
    name = 'absurdity-tumblr'
3773
    long_name = 'Tales of Absurdity (from Tumblr)'
3774
    url = 'http://talesofabsurdity.tumblr.com'
3775
3776
3777
class RobbieAndBobby(GenericTumblrV1):
3778
    """Class to retrieve Robbie And Bobby comics."""
3779
    # Also on http://robbieandbobby.com
3780
    name = 'robbie-tumblr'
3781
    long_name = 'Robbie And Bobby (from Tumblr)'
3782
    url = 'http://robbieandbobby.tumblr.com'
3783
3784
3785
class ElectricBunnyComicTumblr(GenericTumblrV1):
3786
    """Class to retrieve Electric Bunny Comics."""
3787
    # Also on http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell
3788
    name = 'bunny-tumblr'
3789
    long_name = 'Electric Bunny Comic (from Tumblr)'
3790
    url = 'http://electricbunnycomics.tumblr.com'
3791
3792
3793
class Hoomph(GenericTumblrV1):
3794
    """Class to retrieve Hoomph comics."""
3795
    name = 'hoomph'
3796
    long_name = 'Hoomph'
3797
    url = 'http://hoom.ph'
3798
3799
3800
class BFGFSTumblr(GenericTumblrV1):
3801
    """Class to retrieve BFGFS comics."""
3802
    # Also on https://tapastic.com/series/BFGFS
3803
    # Also on http://bfgfs.com
3804
    name = 'bfgfs-tumblr'
3805
    long_name = 'BFGFS (from Tumblr)'
3806
    url = 'http://bfgfs.tumblr.com'
3807
3808
3809
class DoodleForFood(GenericTumblrV1):
3810
    """Class to retrieve Doodle For Food comics."""
3811
    # Also on http://doodleforfood.com
3812
    name = 'doodle'
3813
    long_name = 'Doodle For Food'
3814
    url = 'http://doodleforfood.com'
3815
3816
3817
class CassandraCalinTumblr(GenericEmptyComic, GenericTumblrV1):
3818
    """Class to retrieve C. Cassandra comics."""
3819
    # Also on http://cassandracalin.com
3820
    # Also on https://tapastic.com/series/C-Cassandra-comics
3821
    name = 'cassandra-tumblr'
3822
    long_name = 'Cassandra Calin (from Tumblr)'
3823
    url = 'http://c-cassandra.tumblr.com'
3824
3825
3826
class DougWasTaken(GenericTumblrV1):
3827
    """Class to retrieve Doug Was Taken comics."""
3828
    name = 'doog'
3829
    long_name = 'Doug Was Taken'
3830
    url = 'http://dougwastaken.tumblr.com'
3831
3832
3833
class MandatoryRollerCoaster(GenericEmptyComic, GenericTumblrV1):
3834
    """Class to retrieve Mandatory Roller Coaster comics."""
3835
    name = 'rollercoaster'
3836
    long_name = 'Mandatory Roller Coaster'
3837
    url = 'http://mandatoryrollercoaster.com'
3838
3839
3840
class CEstPasEnRegardantSesPompes(GenericEmptyComic, GenericTumblrV1):
3841
    """Class to retrieve C'Est Pas En Regardant Ses Pompes (...)  comics."""
3842
    name = 'cperspqccltt'
3843
    long_name = 'C Est Pas En Regardant Ses Pompes (...)'
3844
    url = 'http://cperspqccltt.tumblr.com'
3845
3846
3847
class TheGrohlTroll(GenericEmptyComic, GenericTumblrV1):
3848
    """Class to retrieve The Grohl Troll comics."""
3849
    name = 'grohltroll'
3850
    long_name = 'The Grohl Troll'
3851
    url = 'http://thegrohltroll.com'
3852
3853
3854
class WebcomicName(GenericEmptyComic, GenericTumblrV1):
3855
    """Class to retrieve Webcomic Name comics."""
3856
    name = 'webcomicname'
3857
    long_name = 'Webcomic Name'
3858
    url = 'http://webcomicname.com'
3859
3860
3861
class BooksOfAdam(GenericEmptyComic, GenericTumblrV1):
3862
    """Class to retrieve Books of Adam comics."""
3863
    # Also on http://www.booksofadam.com
3864
    name = 'booksofadam'
3865
    long_name = 'Books of Adam'
3866
    url = 'http://booksofadam.tumblr.com'
3867
3868
3869
class HarkAVagrant(GenericEmptyComic, GenericTumblrV1):
3870
    """Class to retrieve Hark A Vagrant comics."""
3871
    # Also on http://www.harkavagrant.com
3872
    name = 'hark-tumblr'
3873
    long_name = 'Hark A Vagrant (from Tumblr)'
3874
    url = 'http://beatonna.tumblr.com'
3875
3876
3877
class OurSuperAdventureTumblr(GenericEmptyComic, GenericTumblrV1):
3878
    """Class to retrieve Our Super Adventure comics."""
3879
    # Also on https://tapastic.com/series/Our-Super-Adventure
3880
    # Also on http://www.oursuperadventure.com
3881
    # http://sarahgraley.com
3882
    name = 'superadventure-tumblr'
3883
    long_name = 'Our Super Adventure (from Tumblr)'
3884
    url = 'http://sarahssketchbook.tumblr.com'
3885
3886
3887
class JakeLikesOnions(GenericTumblrV1):
3888
    """Class to retrieve Jake Likes Onions comics."""
3889
    name = 'jake'
3890
    long_name = 'Jake Likes Onions'
3891
    url = 'http://jakelikesonions.com'
3892
3893
3894
class InYourFaceCake(GenericEmptyComic, GenericTumblrV1):
3895
    """Class to retrieve In Your Face Cake comics."""
3896
    name = 'inyourfacecake-tumblr'
3897
    long_name = 'In Your Face Cake (from Tumblr)'
3898
    url = 'http://in-your-face-cake.tumblr.com'
3899
3900
3901
class Robospunk(GenericTumblrV1):
3902
    """Class to retrieve Robospunk comics."""
3903
    name = 'robospunk'
3904
    long_name = 'Robospunk'
3905
    url = 'http://robospunk.com'
3906
3907
3908
class BananaTwinky(GenericTumblrV1):
3909
    """Class to retrieve Banana Twinky comics."""
3910
    name = 'banana'
3911
    long_name = 'Banana Twinky'
3912
    url = 'http://bananatwinky.tumblr.com'
3913
3914
3915
class YesterdaysPopcornTumblr(GenericTumblrV1):
3916
    """Class to retrieve Yesterday's Popcorn comics."""
3917
    # Also on http://www.yesterdayspopcorn.com
3918
    # Also on https://tapastic.com/series/Yesterdays-Popcorn
3919
    name = 'popcorn-tumblr'
3920
    long_name = 'Yesterday\'s Popcorn (from Tumblr)'
3921
    url = 'http://yesterdayspopcorn.tumblr.com'
3922
3923
3924
class TwistedDoodles(GenericEmptyComic, GenericTumblrV1):
3925
    """Class to retrieve Twisted Doodles comics."""
3926
    name = 'twisted'
3927
    long_name = 'Twisted Doodles'
3928
    url = 'http://www.twisteddoodles.com'
3929
3930
3931
class HorovitzComics(GenericListableComic):
3932
    """Generic class to handle the logic common to the different comics from Horovitz."""
3933
    url = 'http://www.horovitzcomics.com'
3934
    img_re = re.compile('.*comics/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
3935
    link_re = NotImplemented
3936
    get_url_from_archive_element = join_cls_url_to_href
3937
3938
    @classmethod
3939
    def get_comic_info(cls, soup, link):
3940
        """Get information about a particular comics."""
3941
        href = link['href']
3942
        num = int(cls.link_re.match(href).groups()[0])
3943
        title = link.string
3944
        imgs = soup.find_all('img', id='comic')
3945
        assert len(imgs) == 1
3946
        year, month, day = [int(s)
3947
                            for s in cls.img_re.match(imgs[0]['src']).groups()]
3948
        return {
3949
            'title': title,
3950
            'day': day,
3951
            'month': month,
3952
            'year': year,
3953
            'img': [i['src'] for i in imgs],
3954
            'num': num,
3955
        }
3956
3957
    @classmethod
3958
    def get_archive_elements(cls):
3959
        archive_url = 'http://www.horovitzcomics.com/comics/archive/'
3960
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.link_re))
3961
3962
3963
class HorovitzNew(HorovitzComics):
3964
    """Class to retrieve Horovitz new comics."""
3965
    name = 'horovitznew'
3966
    long_name = 'Horovitz New'
3967
    link_re = re.compile('^/comics/new/([0-9]+)$')
3968
3969
3970
class HorovitzClassic(HorovitzComics):
3971
    """Class to retrieve Horovitz classic comics."""
3972
    name = 'horovitzclassic'
3973
    long_name = 'Horovitz Classic'
3974
    link_re = re.compile('^/comics/classic/([0-9]+)$')
3975
3976
3977
class GenericGoComic(GenericNavigableComic):
3978
    """Generic class to handle the logic common to comics from gocomics.com."""
3979
    url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)$')
3980
3981
    @classmethod
3982
    def get_first_comic_link(cls):
3983
        """Get link to first comics."""
3984
        return get_soup_at_url(cls.url).find('a', class_='beginning')
3985
3986
    @classmethod
3987
    def get_navi_link(cls, last_soup, next_):
3988
        """Get link to next or previous comic."""
3989
        return last_soup.find('a', class_='next' if next_ else 'prev', href=cls.url_date_re)
3990
3991
    @classmethod
3992
    def get_url_from_link(cls, link):
3993
        gocomics = 'http://www.gocomics.com'
3994
        return urljoin_wrapper(gocomics, link['href'])
3995
3996
    @classmethod
3997
    def get_comic_info(cls, soup, link):
3998
        """Get information about a particular comics."""
3999
        url = cls.get_url_from_link(link)
4000
        year, month, day = [int(s)
4001
                            for s in cls.url_date_re.match(url).groups()]
4002
        return {
4003
            'day': day,
4004
            'month': month,
4005
            'year': year,
4006
            'img': [soup.find_all('img', class_='strip')[-1]['src']],
4007
            'author': soup.find('meta', attrs={'name': 'author'})['content']
4008
        }
4009
4010
4011
class PearlsBeforeSwine(GenericGoComic):
4012
    """Class to retrieve Pearls Before Swine comics."""
4013
    name = 'pearls'
4014
    long_name = 'Pearls Before Swine'
4015
    url = 'http://www.gocomics.com/pearlsbeforeswine'
4016
4017
4018
class Peanuts(GenericGoComic):
4019
    """Class to retrieve Peanuts comics."""
4020
    name = 'peanuts'
4021
    long_name = 'Peanuts'
4022
    url = 'http://www.gocomics.com/peanuts'
4023
4024
4025
class MattWuerker(GenericGoComic):
4026
    """Class to retrieve Matt Wuerker comics."""
4027
    name = 'wuerker'
4028
    long_name = 'Matt Wuerker'
4029
    url = 'http://www.gocomics.com/mattwuerker'
4030
4031
4032
class TomToles(GenericGoComic):
4033
    """Class to retrieve Tom Toles comics."""
4034
    name = 'toles'
4035
    long_name = 'Tom Toles'
4036
    url = 'http://www.gocomics.com/tomtoles'
4037
4038
4039
class BreakOfDay(GenericGoComic):
4040
    """Class to retrieve Break Of Day comics."""
4041
    name = 'breakofday'
4042
    long_name = 'Break Of Day'
4043
    url = 'http://www.gocomics.com/break-of-day'
4044
4045
4046
class Brevity(GenericGoComic):
4047
    """Class to retrieve Brevity comics."""
4048
    name = 'brevity'
4049
    long_name = 'Brevity'
4050
    url = 'http://www.gocomics.com/brevity'
4051
4052
4053
class MichaelRamirez(GenericGoComic):
4054
    """Class to retrieve Michael Ramirez comics."""
4055
    name = 'ramirez'
4056
    long_name = 'Michael Ramirez'
4057
    url = 'http://www.gocomics.com/michaelramirez'
4058
4059
4060
class MikeLuckovich(GenericGoComic):
4061
    """Class to retrieve Mike Luckovich comics."""
4062
    name = 'luckovich'
4063
    long_name = 'Mike Luckovich'
4064
    url = 'http://www.gocomics.com/mikeluckovich'
4065
4066
4067
class JimBenton(GenericGoComic):
4068
    """Class to retrieve Jim Benton comics."""
4069
    # Also on http://jimbenton.tumblr.com
4070
    name = 'benton'
4071
    long_name = 'Jim Benton'
4072
    url = 'http://www.gocomics.com/jim-benton-cartoons'
4073
4074
4075
class TheArgyleSweater(GenericGoComic):
4076
    """Class to retrieve the Argyle Sweater comics."""
4077
    name = 'argyle'
4078
    long_name = 'Argyle Sweater'
4079
    url = 'http://www.gocomics.com/theargylesweater'
4080
4081
4082
class SunnyStreet(GenericGoComic):
4083
    """Class to retrieve Sunny Street comics."""
4084
    # Also on http://www.sunnystreetcomics.com
4085
    name = 'sunny'
4086
    long_name = 'Sunny Street'
4087
    url = 'http://www.gocomics.com/sunny-street'
4088
4089
4090
class OffTheMark(GenericGoComic):
4091
    """Class to retrieve Off The Mark comics."""
4092
    # Also on https://www.offthemark.com
4093
    name = 'offthemark'
4094
    long_name = 'Off The Mark'
4095
    url = 'http://www.gocomics.com/offthemark'
4096
4097
4098
class WuMo(GenericGoComic):
4099
    """Class to retrieve WuMo comics."""
4100
    # Also on http://wumo.com
4101
    name = 'wumo'
4102
    long_name = 'WuMo'
4103
    url = 'http://www.gocomics.com/wumo'
4104
4105
4106
class LunarBaboon(GenericGoComic):
4107
    """Class to retrieve Lunar Baboon comics."""
4108
    # Also on http://www.lunarbaboon.com
4109
    # Also on https://tapastic.com/series/Lunarbaboon
4110
    name = 'lunarbaboon'
4111
    long_name = 'Lunar Baboon'
4112
    url = 'http://www.gocomics.com/lunarbaboon'
4113
4114
4115
class SandersenGocomic(GenericGoComic):
4116
    """Class to retrieve Sarah Andersen comics."""
4117
    # Also on http://sarahcandersen.com
4118
    # Also on http://tapastic.com/series/Doodle-Time
4119
    name = 'sandersen-goc'
4120
    long_name = 'Sarah Andersen (from GoComics)'
4121
    url = 'http://www.gocomics.com/sarahs-scribbles'
4122
4123
4124
class SaturdayMorningBreakfastCerealGoComic(GenericGoComic):
4125
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
4126
    # Also on http://smbc-comics.tumblr.com
4127
    # Also on http://www.smbc-comics.com
4128
    name = 'smbc-goc'
4129
    long_name = 'Saturday Morning Breakfast Cereal (from GoComics)'
4130
    url = 'http://www.gocomics.com/saturday-morning-breakfast-cereal'
4131
4132
4133
class CalvinAndHobbesGoComic(GenericGoComic):
4134
    """Class to retrieve Calvin and Hobbes comics."""
4135
    # From gocomics, not http://marcel-oehler.marcellosendos.ch/comics/ch/
4136
    name = 'calvin-goc'
4137
    long_name = 'Calvin and Hobbes (from GoComics)'
4138
    url = 'http://www.gocomics.com/calvinandhobbes'
4139
4140
4141
class RallGoComic(GenericGoComic):
4142
    """Class to retrieve Ted Rall comics."""
4143
    # Also on http://rall.com/comic
4144
    name = 'rall-goc'
4145
    long_name = "Ted Rall (from GoComics)"
4146
    url = "http://www.gocomics.com/tedrall"
4147
4148
4149
class TheAwkwardYetiGoComic(GenericGoComic):
4150
    """Class to retrieve The Awkward Yeti comics."""
4151
    # Also on http://larstheyeti.tumblr.com
4152
    # Also on http://theawkwardyeti.com
4153
    # Also on https://tapastic.com/series/TheAwkwardYeti
4154
    name = 'yeti-goc'
4155
    long_name = 'The Awkward Yeti (from GoComics)'
4156
    url = 'http://www.gocomics.com/the-awkward-yeti'
4157
4158
4159
class BerkeleyMewsGoComics(GenericGoComic):
4160
    """Class to retrieve Berkeley Mews comics."""
4161
    # Also on http://mews.tumblr.com
4162
    # Also on http://www.berkeleymews.com
4163
    name = 'berkeley-goc'
4164
    long_name = 'Berkeley Mews (from GoComics)'
4165
    url = 'http://www.gocomics.com/berkeley-mews'
4166
4167
4168
class SheldonGoComics(GenericGoComic):
4169
    """Class to retrieve Sheldon comics."""
4170
    # Also on http://www.sheldoncomics.com
4171
    name = 'sheldon-goc'
4172
    long_name = 'Sheldon Comics (from GoComics)'
4173
    url = 'http://www.gocomics.com/sheldon'
4174
4175
4176
class FowlLanguageGoComics(GenericGoComic):
4177
    """Class to retrieve Fowl Language comics."""
4178
    # Also on http://www.fowllanguagecomics.com
4179
    # Also on http://tapastic.com/series/Fowl-Language-Comics
4180
    # Also on http://fowllanguagecomics.tumblr.com
4181
    name = 'fowllanguage-goc'
4182
    long_name = 'Fowl Language Comics (from GoComics)'
4183
    url = 'http://www.gocomics.com/fowl-language'
4184
4185
4186
class NickAnderson(GenericGoComic):
4187
    """Class to retrieve Nick Anderson comics."""
4188
    name = 'nickanderson'
4189
    long_name = 'Nick Anderson'
4190
    url = 'http://www.gocomics.com/nickanderson'
4191
4192
4193
class GarfieldGoComics(GenericGoComic):
4194
    """Class to retrieve Garfield comics."""
4195
    # Also on http://garfield.com
4196
    name = 'garfield-goc'
4197
    long_name = 'Garfield (from GoComics)'
4198
    url = 'http://www.gocomics.com/garfield'
4199
4200
4201
class DorrisMcGoComics(GenericGoComic):
4202
    """Class to retrieve Dorris Mc Comics"""
4203
    # Also on http://dorrismccomics.com
4204
    name = 'dorrismc-goc'
4205
    long_name = 'Dorris Mc (from GoComics)'
4206
    url = 'http://www.gocomics.com/dorris-mccomics'
4207
4208
4209
class FoxTrot(GenericGoComic):
4210
    """Class to retrieve FoxTrot comics."""
4211
    name = 'foxtrot'
4212
    long_name = 'FoxTrot'
4213
    url = 'http://www.gocomics.com/foxtrot'
4214
4215
4216
class FoxTrotClassics(GenericGoComic):
4217
    """Class to retrieve FoxTrot Classics comics."""
4218
    name = 'foxtrot-classics'
4219
    long_name = 'FoxTrot Classics'
4220
    url = 'http://www.gocomics.com/foxtrotclassics'
4221
4222
4223
class MisterAndMeGoComics(GenericGoComic):
4224
    """Class to retrieve Mister & Me Comics."""
4225
    # Also on http://www.mister-and-me.com
4226
    # Also on https://tapastic.com/series/Mister-and-Me
4227
    name = 'mister-goc'
4228
    long_name = 'Mister & Me (from GoComics)'
4229
    url = 'http://www.gocomics.com/mister-and-me'
4230
4231
4232
class NonSequitur(GenericGoComic):
4233
    """Class to retrieve Non Sequitur (Wiley Miller) comics."""
4234
    name = 'nonsequitur'
4235
    long_name = 'Non Sequitur'
4236
    url = 'http://www.gocomics.com/nonsequitur'
4237
4238
4239
class GenericTapasticComic(GenericListableComic):
4240
    """Generic class to handle the logic common to comics from tapastic.com."""
4241
4242
    @classmethod
4243
    def get_comic_info(cls, soup, archive_elt):
4244
        """Get information about a particular comics."""
4245
        timestamp = int(archive_elt['publishDate']) / 1000.0
4246
        day = datetime.datetime.fromtimestamp(timestamp).date()
4247
        imgs = soup.find_all('img', class_='art-image')
4248
        if not imgs:
4249
            print("Comic %s is being uploaded, retry later" % cls.get_url_from_archive_element(archive_elt))
4250
            return None
4251
        assert len(imgs) > 0
4252
        return {
4253
            'day': day.day,
4254
            'year': day.year,
4255
            'month': day.month,
4256
            'img': [i['src'] for i in imgs],
4257
            'title': archive_elt['title'],
4258
        }
4259
4260
    @classmethod
4261
    def get_url_from_archive_element(cls, archive_elt):
4262
        return 'http://tapastic.com/episode/' + str(archive_elt['id'])
4263
4264
    @classmethod
4265
    def get_archive_elements(cls):
4266
        pref, suff = 'episodeList : ', ','
4267
        # Information is stored in the javascript part
4268
        # I don't know the clean way to get it so this is the ugly way.
4269
        string = [s[len(pref):-len(suff)] for s in (s.decode('utf-8').strip() for s in urlopen_wrapper(cls.url).readlines()) if s.startswith(pref) and s.endswith(suff)][0]
4270
        return json.loads(string)
4271
4272
4273
class VegetablesForDessert(GenericTapasticComic):
4274
    """Class to retrieve Vegetables For Dessert comics."""
4275
    # Also on http://vegetablesfordessert.tumblr.com
4276
    name = 'vegetables'
4277
    long_name = 'Vegetables For Dessert'
4278
    url = 'http://tapastic.com/series/vegetablesfordessert'
4279
4280
4281
class FowlLanguageTapa(GenericTapasticComic):
4282
    """Class to retrieve Fowl Language comics."""
4283
    # Also on http://www.fowllanguagecomics.com
4284
    # Also on http://fowllanguagecomics.tumblr.com
4285
    # Also on http://www.gocomics.com/fowl-language
4286
    name = 'fowllanguage-tapa'
4287
    long_name = 'Fowl Language Comics (from Tapastic)'
4288
    url = 'http://tapastic.com/series/Fowl-Language-Comics'
4289
4290
4291
class OscillatingProfundities(GenericTapasticComic):
4292
    """Class to retrieve Oscillating Profundities comics."""
4293
    name = 'oscillating'
4294
    long_name = 'Oscillating Profundities'
4295
    url = 'http://tapastic.com/series/oscillatingprofundities'
4296
4297
4298
class ZnoflatsComics(GenericTapasticComic):
4299
    """Class to retrieve Znoflats comics."""
4300
    name = 'znoflats'
4301
    long_name = 'Znoflats Comics'
4302
    url = 'http://tapastic.com/series/Znoflats-Comics'
4303
4304
4305
class SandersenTapastic(GenericTapasticComic):
4306
    """Class to retrieve Sarah Andersen comics."""
4307
    # Also on http://sarahcandersen.com
4308
    # Also on http://www.gocomics.com/sarahs-scribbles
4309
    name = 'sandersen-tapa'
4310
    long_name = 'Sarah Andersen (from Tapastic)'
4311
    url = 'http://tapastic.com/series/Doodle-Time'
4312
4313
4314
class TubeyToonsTapastic(GenericTapasticComic):
4315
    """Class to retrieve TubeyToons comics."""
4316
    # Also on http://tubeytoons.com
4317
    # Also on http://tubeytoons.tumblr.com
4318
    name = 'tubeytoons-tapa'
4319
    long_name = 'Tubey Toons (from Tapastic)'
4320
    url = 'http://tapastic.com/series/Tubey-Toons'
4321
4322
4323
class AnythingComicTapastic(GenericTapasticComic):
4324
    """Class to retrieve Anything Comics."""
4325
    # Also on http://www.anythingcomic.com
4326
    name = 'anythingcomic-tapa'
4327
    long_name = 'Anything Comic (from Tapastic)'
4328
    url = 'http://tapastic.com/series/anything'
4329
4330
4331
class UnearthedComicsTapastic(GenericTapasticComic):
4332
    """Class to retrieve Unearthed comics."""
4333
    # Also on http://unearthedcomics.com
4334
    # Also on http://unearthedcomics.tumblr.com
4335
    name = 'unearthed-tapa'
4336
    long_name = 'Unearthed Comics (from Tapastic)'
4337
    url = 'http://tapastic.com/series/UnearthedComics'
4338
4339
4340
class EverythingsStupidTapastic(GenericTapasticComic):
4341
    """Class to retrieve Everything's stupid Comics."""
4342
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
4343
    # Also on http://everythingsstupid.net
4344
    name = 'stupid-tapa'
4345
    long_name = "Everything's Stupid (from Tapastic)"
4346
    url = 'http://tapastic.com/series/EverythingsStupid'
4347
4348
4349
class JustSayEhTapastic(GenericTapasticComic):
4350
    """Class to retrieve Just Say Eh comics."""
4351
    # Also on http://www.justsayeh.com
4352
    name = 'justsayeh-tapa'
4353
    long_name = 'Just Say Eh (from Tapastic)'
4354
    url = 'http://tapastic.com/series/Just-Say-Eh'
4355
4356
4357
class ThorsThundershackTapastic(GenericTapasticComic):
4358
    """Class to retrieve Thor's Thundershack comics."""
4359
    # Also on http://www.thorsthundershack.com
4360
    name = 'thor-tapa'
4361
    long_name = 'Thor\'s Thundershack (from Tapastic)'
4362
    url = 'http://tapastic.com/series/Thors-Thundershac'
4363
4364
4365
class OwlTurdTapastic(GenericTapasticComic):
4366
    """Class to retrieve Owl Turd comics."""
4367
    # Also on http://owlturd.com
4368
    name = 'owlturd-tapa'
4369
    long_name = 'Owl Turd (from Tapastic)'
4370
    url = 'http://tapastic.com/series/Owl-Turd-Comix'
4371
4372
4373
class GoneIntoRaptureTapastic(GenericTapasticComic):
4374
    """Class to retrieve Gone Into Rapture comics."""
4375
    # Also on http://goneintorapture.tumblr.com
4376
    # Also on http://www.goneintorapture.com
4377
    name = 'rapture-tapa'
4378
    long_name = 'Gone Into Rapture (from Tapastic)'
4379
    url = 'http://tapastic.com/series/Goneintorapture'
4380
4381
4382
class HeckIfIKnowComicsTapa(GenericTapasticComic):
4383
    """Class to retrieve Heck If I Know Comics."""
4384
    # Also on http://heckifiknowcomics.com
4385
    name = 'heck-tapa'
4386
    long_name = 'Heck if I Know comics (from Tapastic)'
4387
    url = 'http://tapastic.com/series/Regular'
4388
4389
4390
class CheerUpEmoKidTapa(GenericTapasticComic):
4391
    """Class to retrieve CheerUpEmoKid comics."""
4392
    # Also on http://www.cheerupemokid.com
4393
    # Also on http://enzocomics.tumblr.com
4394
    name = 'cuek-tapa'
4395
    long_name = 'Cheer Up Emo Kid (from Tapastic)'
4396
    url = 'http://tapastic.com/series/CUEK'
4397
4398
4399
class BigFootJusticeTapa(GenericTapasticComic):
4400
    """Class to retrieve Big Foot Justice comics."""
4401
    # Also on http://bigfootjustice.com
4402
    name = 'bigfoot-tapa'
4403
    long_name = 'Big Foot Justice (from Tapastic)'
4404
    url = 'http://tapastic.com/series/bigfoot-justice'
4405
4406
4407
class UpAndOutTapa(GenericTapasticComic):
4408
    """Class to retrieve Up & Out comics."""
4409
    # Also on http://upandoutcomic.tumblr.com
4410
    name = 'upandout-tapa'
4411
    long_name = 'Up And Out (from Tapastic)'
4412
    url = 'http://tapastic.com/series/UP-and-OUT'
4413
4414
4415
class ToonHoleTapa(GenericTapasticComic):
4416
    """Class to retrieve Toon Holes comics."""
4417
    # Also on http://www.toonhole.com
4418
    name = 'toonhole-tapa'
4419
    long_name = 'Toon Hole (from Tapastic)'
4420
    url = 'http://tapastic.com/series/TOONHOLE'
4421
4422
4423
class AngryAtNothingTapa(GenericTapasticComic):
4424
    """Class to retrieve Angry at Nothing comics."""
4425
    # Also on http://www.angryatnothing.net
4426
    name = 'angry-tapa'
4427
    long_name = 'Angry At Nothing (from Tapastic)'
4428
    url = 'http://tapastic.com/series/Comics-yeah-definitely-comics-'
4429
4430
4431
class LeleozTapa(GenericTapasticComic):
4432
    """Class to retrieve Leleoz comics."""
4433
    # Also on http://leleozcomics.tumblr.com
4434
    name = 'leleoz-tapa'
4435
    long_name = 'Leleoz (from Tapastic)'
4436
    url = 'https://tapastic.com/series/Leleoz'
4437
4438
4439
class TheAwkwardYetiTapa(GenericTapasticComic):
4440
    """Class to retrieve The Awkward Yeti comics."""
4441
    # Also on http://www.gocomics.com/the-awkward-yeti
4442
    # Also on http://theawkwardyeti.com
4443
    # Also on http://larstheyeti.tumblr.com
4444
    name = 'yeti-tapa'
4445
    long_name = 'The Awkward Yeti (from Tapastic)'
4446
    url = 'https://tapastic.com/series/TheAwkwardYeti'
4447
4448
4449
class AsPerUsualTapa(GenericTapasticComic):
4450
    """Class to retrieve As Per Usual comics."""
4451
    # Also on http://as-per-usual.tumblr.com
4452
    name = 'usual-tapa'
4453
    long_name = 'As Per Usual (from Tapastic)'
4454
    url = 'https://tapastic.com/series/AsPerUsual'
4455
4456
4457
class OneOneOneOneComicTapa(GenericTapasticComic):
4458
    """Class to retrieve 1111 Comics."""
4459
    # Also on http://www.1111comics.me
4460
    # Also on http://comics1111.tumblr.com
4461
    name = '1111-tapa'
4462
    long_name = '1111 Comics (from Tapastic)'
4463
    url = 'https://tapastic.com/series/1111-Comics'
4464
4465
4466
class TumbleDryTapa(GenericTapasticComic):
4467
    """Class to retrieve Tumble Dry comics."""
4468
    # Also on http://tumbledrycomics.com
4469
    name = 'tumbledry-tapa'
4470
    long_name = 'Tumblr Dry (from Tapastic)'
4471
    url = 'https://tapastic.com/series/TumbleDryComics'
4472
4473
4474
class DeadlyPanelTapa(GenericTapasticComic):
4475
    """Class to retrieve Deadly Panel comics."""
4476
    # Also on http://www.deadlypanel.com
4477
    name = 'deadly-tapa'
4478
    long_name = 'Deadly Panel (from Tapastic)'
4479
    url = 'https://tapastic.com/series/deadlypanel'
4480
4481
4482
class ChrisHallbeckMaxiTapa(GenericTapasticComic):
4483
    """Class to retrieve Chris Hallbeck comics."""
4484
    # Also on http://chrishallbeck.tumblr.com
4485
    # Also on http://maximumble.com
4486
    name = 'hallbeckmaxi-tapa'
4487
    long_name = 'Chris Hallback - Maximumble (from Tapastic)'
4488
    url = 'https://tapastic.com/series/Maximumble'
4489
4490
4491
class ChrisHallbeckMiniTapa(GenericTapasticComic):
4492
    """Class to retrieve Chris Hallbeck comics."""
4493
    # Also on http://chrishallbeck.tumblr.com
4494
    # Also on http://minimumble.com
4495
    name = 'hallbeckmini-tapa'
4496
    long_name = 'Chris Hallback - Minimumble (from Tapastic)'
4497
    url = 'https://tapastic.com/series/Minimumble'
4498
4499
4500
class ChrisHallbeckBiffTapa(GenericTapasticComic):
4501
    """Class to retrieve Chris Hallbeck comics."""
4502
    # Also on http://chrishallbeck.tumblr.com
4503
    # Also on http://thebookofbiff.com
4504
    name = 'hallbeckbiff-tapa'
4505
    long_name = 'Chris Hallback - The Book of Biff (from Tapastic)'
4506
    url = 'https://tapastic.com/series/Biff'
4507
4508
4509
class RandoWisTapa(GenericTapasticComic):
4510
    """Class to retrieve RandoWis comics."""
4511
    # Also on https://randowis.com
4512
    name = 'randowis-tapa'
4513
    long_name = 'RandoWis (from Tapastic)'
4514
    url = 'https://tapastic.com/series/RandoWis'
4515
4516
4517
class PigeonGazetteTapa(GenericTapasticComic):
4518
    """Class to retrieve The Pigeon Gazette comics."""
4519
    # Also on http://thepigeongazette.tumblr.com
4520
    name = 'pigeon-tapa'
4521
    long_name = 'The Pigeon Gazette (from Tapastic)'
4522
    url = 'https://tapastic.com/series/The-Pigeon-Gazette'
4523
4524
4525
class TheOdd1sOutTapa(GenericTapasticComic):
4526
    """Class to retrieve The Odd 1s Out comics."""
4527
    # Also on http://theodd1sout.com
4528
    # Also on http://theodd1sout.tumblr.com
4529
    name = 'theodd-tapa'
4530
    long_name = 'The Odd 1s Out (from Tapastic)'
4531
    url = 'https://tapastic.com/series/Theodd1sout'
4532
4533
4534
class TheWorldIsFlatTapa(GenericTapasticComic):
4535
    """Class to retrieve The World Is Flat Comics."""
4536
    # Also on http://theworldisflatcomics.tumblr.com
4537
    name = 'flatworld-tapa'
4538
    long_name = 'The World Is Flat (from Tapastic)'
4539
    url = 'https://tapastic.com/series/The-World-is-Flat'
4540
4541
4542
class MisterAndMeTapa(GenericTapasticComic):
4543
    """Class to retrieve Mister & Me Comics."""
4544
    # Also on http://www.mister-and-me.com
4545
    # Also on http://www.gocomics.com/mister-and-me
4546
    name = 'mister-tapa'
4547
    long_name = 'Mister & Me (from Tapastic)'
4548
    url = 'https://tapastic.com/series/Mister-and-Me'
4549
4550
4551
class TalesOfAbsurdityTapa(GenericTapasticComic):
4552
    """Class to retrieve Tales Of Absurdity comics."""
4553
    # Also on http://talesofabsurdity.com
4554
    # Also on http://talesofabsurdity.tumblr.com
4555
    name = 'absurdity-tapa'
4556
    long_name = 'Tales of Absurdity (from Tapastic)'
4557
    url = 'http://tapastic.com/series/Tales-Of-Absurdity'
4558
4559
4560
class BFGFSTapa(GenericTapasticComic):
4561
    """Class to retrieve BFGFS comics."""
4562
    # Also on http://bfgfs.com
4563
    # Also on http://bfgfs.tumblr.com
4564
    name = 'bfgfs-tapa'
4565
    long_name = 'BFGFS (from Tapastic)'
4566
    url = 'https://tapastic.com/series/BFGFS'
4567
4568
4569
class DoodleForFoodTapa(GenericTapasticComic):
4570
    """Class to retrieve Doodle For Food comics."""
4571
    # Also on http://doodleforfood.com
4572
    name = 'doodle-tapa'
4573
    long_name = 'Doodle For Food (from Tapastic)'
4574
    url = 'https://tapastic.com/series/Doodle-for-Food'
4575
4576
4577
class MrLovensteinTapa(GenericTapasticComic):
4578
    """Class to retrieve Mr Lovenstein comics."""
4579
    # Also on  https://tapastic.com/series/MrLovenstein
4580
    name = 'mrlovenstein-tapa'
4581
    long_name = 'Mr. Lovenstein (from Tapastic)'
4582
    url = 'https://tapastic.com/series/MrLovenstein'
4583
4584
4585
class CassandraCalinTapa(GenericTapasticComic):
4586
    """Class to retrieve C. Cassandra comics."""
4587
    # Also on http://cassandracalin.com
4588
    # Also on http://c-cassandra.tumblr.com
4589
    name = 'cassandra-tapa'
4590
    long_name = 'Cassandra Calin (from Tapastic)'
4591
    url = 'https://tapastic.com/series/C-Cassandra-comics'
4592
4593
4594
class WafflesAndPancakes(GenericTapasticComic):
4595
    """Class to retrieve Waffles And Pancakes comics."""
4596
    # Also on http://wandpcomic.com
4597
    name = 'waffles'
4598
    long_name = 'Waffles And Pancakes'
4599
    url = 'https://tapastic.com/series/Waffles-and-Pancakes'
4600
4601
4602
class YesterdaysPopcornTapastic(GenericTapasticComic):
4603
    """Class to retrieve Yesterday's Popcorn comics."""
4604
    # Also on http://www.yesterdayspopcorn.com
4605
    # Also on http://yesterdayspopcorn.tumblr.com
4606
    name = 'popcorn-tapa'
4607
    long_name = 'Yesterday\'s Popcorn (from Tapastic)'
4608
    url = 'https://tapastic.com/series/Yesterdays-Popcorn'
4609
4610
4611
class OurSuperAdventureTapastic(GenericTapasticComic):
4612
    """Class to retrieve Our Super Adventure comics."""
4613
    # Also on http://www.oursuperadventure.com
4614
    # http://sarahssketchbook.tumblr.com
4615
    # http://sarahgraley.com
4616
    name = 'superadventure-tapastic'
4617
    long_name = 'Our Super Adventure (from Tapastic)'
4618
    url = 'https://tapastic.com/series/Our-Super-Adventure'
4619
4620
4621
class NamelessPCs(GenericTapasticComic):
4622
    """Class to retrieve Nameless PCs comics."""
4623
    # Also on http://namelesspcs.com
4624
    name = 'namelesspcs-tapa'
4625
    long_name = 'NamelessPCs (from Tapastic)'
4626
    url = 'https://tapastic.com/series/NamelessPC'
4627
4628
4629
def get_subclasses(klass):
4630
    """Gets the list of direct/indirect subclasses of a class"""
4631
    subclasses = klass.__subclasses__()
4632
    for derived in list(subclasses):
4633
        subclasses.extend(get_subclasses(derived))
4634
    return subclasses
4635
4636
4637
def remove_st_nd_rd_th_from_date(string):
4638
    """Function to transform 1st/2nd/3rd/4th in a parsable date format."""
4639
    # Hackish way to convert string with numeral "1st"/"2nd"/etc to date
4640
    return (string.replace('st', '')
4641
            .replace('nd', '')
4642
            .replace('rd', '')
4643
            .replace('th', '')
4644
            .replace('Augu', 'August'))
4645
4646
4647
def string_to_date(string, date_format, local=DEFAULT_LOCAL):
4648
    """Function to convert string to date object.
4649
    Wrapper around datetime.datetime.strptime."""
4650
    # format described in https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior
4651
    prev_locale = locale.setlocale(locale.LC_ALL)
4652
    if local != prev_locale:
4653
        locale.setlocale(locale.LC_ALL, local)
4654
    ret = datetime.datetime.strptime(string, date_format).date()
4655
    if local != prev_locale:
4656
        locale.setlocale(locale.LC_ALL, prev_locale)
4657
    return ret
4658
4659
4660
COMICS = set(get_subclasses(GenericComic))
4661
VALID_COMICS = [c for c in COMICS if c.name is not None]
4662
COMIC_NAMES = {c.name: c for c in VALID_COMICS}
4663
assert len(VALID_COMICS) == len(COMIC_NAMES)
4664
CLASS_NAMES = {c.__name__ for c in VALID_COMICS}
4665
assert len(VALID_COMICS) == len(CLASS_NAMES)
4666