Completed
Pull Request — master (#44)
by De
01:06
created

comics.py (10 issues)

1
#! /usr/bin/python3
2
# vim: set expandtab tabstop=4 shiftwidth=4 :
3
"""Module to retrieve webcomics"""
4
5
from comic_abstract import GenericComic, get_date_for_comic
6
import re
7
from datetime import date, timedelta
8
import datetime
9
from urlfunctions import get_soup_at_url, urljoin_wrapper,\
10
    convert_iri_to_plain_ascii_uri, load_json_at_url, urlopen_wrapper
11
import json
12
import locale
13
import urllib
14
15
DEFAULT_LOCAL = 'en_GB.UTF-8'
16
17
18
class Xkcd(GenericComic):
19
    """Class to retrieve Xkcd comics."""
20
    name = 'xkcd'
21
    long_name = 'xkcd'
22
    url = 'http://xkcd.com'
23
24
    @classmethod
25
    def get_next_comic(cls, last_comic):
26
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
27
        first_num = last_comic['num'] if last_comic else 0
28
        last_num = load_json_at_url(
29
            urljoin_wrapper(cls.url, 'info.0.json'))['num']
30
31
        for num in range(first_num + 1, last_num + 1):
32
            if num != 404:
33
                json_url = urljoin_wrapper(cls.url, '%d/info.0.json' % num)
34
                comic = load_json_at_url(json_url)
35
                comic['img'] = [comic['img']]
36
                comic['prefix'] = '%d-' % num
37
                comic['json_url'] = json_url
38
                comic['url'] = urljoin_wrapper(cls.url, str(num))
39
                comic['day'] = int(comic['day'])
40
                comic['month'] = int(comic['month'])
41
                comic['year'] = int(comic['year'])
42
                assert comic['num'] == num
43
                yield comic
44
45
46
# Helper functions corresponding to get_url_from_link/get_url_from_archive_element
47
48
49
@classmethod
50
def get_href(cls, link):
51
    """Implementation of get_url_from_link/get_url_from_archive_element."""
52
    return link['href']
53
54
55
@classmethod
56
def join_cls_url_to_href(cls, link):
57
    """Implementation of get_url_from_link/get_url_from_archive_element."""
58
    return urljoin_wrapper(cls.url, link['href'])
59
60
61
class GenericNavigableComic(GenericComic):
62
    """Generic class for "navigable" comics : with first/next arrows.
63
64
    This class applies to comic where previous and next comics can be
65
    accessed from a given comic. Once given a starting point (either
66
    the first comic or the last comic retrieved), it will handle the
67
    navigation, the retrieval of the soup object and the setting of
68
    the 'url' attribute on retrieved comics. This limits a lot the
69
    amount of boilerplate code in the different implementation classes.
70
71
    The method `get_next_comic` methods is implemented in terms of new
72
    more specialized methods to be implemented/overridden:
73
        - get_first_comic_link
74
        - get_navi_link
75
        - get_comic_info
76
        - get_url_from_link
77
    """
78
79
    @classmethod
80
    def get_first_comic_link(cls):
81
        """Get link to first comics.
82
83
        Sometimes this can be retrieved of any comic page, sometimes on
84
        the archive page, sometimes it doesn't exist at all and one has
85
        to iterate backward to find it before hardcoding the result found.
86
        """
87
        raise NotImplementedError
88
89
    @classmethod
90
    def get_navi_link(cls, last_soup, next_):
91
        """Get link to next (or previous - for dev purposes) comic."""
92
        raise NotImplementedError
93
94
    @classmethod
95
    def get_comic_info(cls, soup, link):
96
        """Get information about a particular comics."""
97
        raise NotImplementedError
98
99
    @classmethod
100
    def get_url_from_link(cls, link):
101
        """Get url corresponding to a link. Default implementation is similar to get_href."""
102
        return link['href']
103
104
    @classmethod
105
    def get_next_link(cls, last_soup):
106
        """Get link to next comic."""
107
        return cls.get_navi_link(last_soup, True)
108
109
    @classmethod
110
    def get_prev_link(cls, last_soup):
111
        """Get link to previous comic."""
112
        return cls.get_navi_link(last_soup, False)
113
114
    @classmethod
115
    def get_next_comic(cls, last_comic):
116
        """Generic implementation of get_next_comic for navigable comics."""
117
        url = last_comic['url'] if last_comic else None
118
        next_comic = \
119
            cls.get_next_link(get_soup_at_url(url)) \
120
            if url else \
121
            cls.get_first_comic_link()
122
        cls.log("next/first comic will be %s (url is %s)" % (str(next_comic), url))
123
        while next_comic:
124
            prev_url, url = url, cls.get_url_from_link(next_comic)
125
            if prev_url == url:
126
                cls.log("got same url %s" % url)
127
                break
128
            cls.log("about to get %s (%s)" % (url, str(next_comic)))
129
            soup = get_soup_at_url(url)
130
            comic = cls.get_comic_info(soup, next_comic)
131
            if comic is not None:
132
                assert 'url' not in comic
133
                comic['url'] = url
134
                yield comic
135
            next_comic = cls.get_next_link(soup)
136
            cls.log("next comic will be %s" % str(next_comic))
137
138
    @classmethod
139
    def check_first_link(cls):
140
        """Check that navigation to first comic seems to be working - for dev purposes."""
141
        cls.log("about to check first link")
142
        ok = True
143
        firstlink = cls.get_first_comic_link()
144
        if firstlink is None:
145
            print("From %s : no first link" % cls.url)
146
            ok = False
147
        else:
148
            firsturl = cls.get_url_from_link(firstlink)
149
            try:
150
                get_soup_at_url(firsturl)
151
            except urllib.error.HTTPError:
152
                print("From %s : invalid first url" % cls.url)
153
                ok = False
154
        cls.log("checked first link -> returned %d" % ok)
155
        return ok
156
157
    @classmethod
158
    def check_prev_next_links(cls, url):
159
        """Check that navigation to prev/next from a given URL seems to be working - for dev purposes."""
160
        cls.log("about to check prev/next from %s" % url)
161
        ok = True
162
        if url is None:
163
            prevlink, nextlink = None, None
164
        else:
165
            soup = get_soup_at_url(url)
166
            prevlink, nextlink = cls.get_prev_link(soup), cls.get_next_link(soup)
167
        if prevlink is None and nextlink is None:
168
            print("From %s : no previous nor next" % url)
169
            ok = False
170
        else:
171
            if prevlink:
172
                prevurl = cls.get_url_from_link(prevlink)
173
                prevsoup = get_soup_at_url(prevurl)
174
                prevnext = cls.get_url_from_link(cls.get_next_link(prevsoup))
175
                if prevnext != url:
176
                    print("From %s, going backward then forward leads to %s" % (url, prevnext))
177
                    ok = False
178
            if nextlink:
179
                nexturl = cls.get_url_from_link(nextlink)
180
                if nexturl != url:
181
                    nextsoup = get_soup_at_url(nexturl)
182
                    nextprev = cls.get_url_from_link(cls.get_prev_link(nextsoup))
183
                    if nextprev != url:
184
                        print("From %s, going forward then backward leads to %s" % (url, nextprev))
185
                        ok = False
186
        cls.log("checked prev/next from %s -> returned %d" % (url, ok))
187
        return ok
188
189
    @classmethod
190
    def check_navigation(cls, url):
191
        """Check that navigation functions seem to be working - for dev purposes."""
192
        cls.log("about to check navigation from %s" % url)
193
        first = cls.check_first_link()
194
        prevnext = cls.check_prev_next_links(url)
195
        ok = first and prevnext
196
        cls.log("checked navigation from %s -> returned %d" % (url, ok))
197
        return ok
198
199
    # This method is not defined by default and is not part of this class'API.
200
    # It is only used:
201
    # - during development
202
    # - in subclasses implementing it correctly
203
    if False:
204
        @classmethod
205
        def get_first_comic_url(cls):
206
            """Get first comic url
207
208
            Sometimes, the first comic cannot be reached directly so to start
209
            from the first comic one has to go to the previous comic until
210
            there is no previous comics. Once this URL is reached, it
211
            is better to hardcode it but for development purposes, it
212
            is convenient to have an automatic way to find it.
213
            """
214
            url = input("Get starting URL: ")
215
            print(url)
216
            comic = cls.get_prev_link(get_soup_at_url(url))
217
            while comic:
218
                url = cls.get_url_from_link(comic)
219
                print(url)
220
                comic = cls.get_prev_link(get_soup_at_url(url))
221
            return url
222
223
224
class GenericListableComic(GenericComic):
225
    """Generic class for "listable" comics : with a list of comics (aka 'archive')
226
227
    The method `get_next_comic` methods is implemented in terms of new
228
    more specialized methods to be implemented/overridden:
229
        - get_archive_elements
230
        - get_url_from_archive_element
231
        - get_comic_info
232
    """
233
234
    @classmethod
235
    def get_archive_elements(cls):
236
        """Get the archive elements (iterable)."""
237
        raise NotImplementedError
238
239
    @classmethod
240
    def get_url_from_archive_element(cls, archive_elt):
241
        """Get url corresponding to an archive element."""
242
        raise NotImplementedError
243
244
    @classmethod
245
    def get_comic_info(cls, soup, archive_elt):
246
        """Get information about a particular comics."""
247
        raise NotImplementedError
248
249
    @classmethod
250
    def get_next_comic(cls, last_comic):
251
        """Generic implementation of get_next_comic for listable comics."""
252
        waiting_for_url = last_comic['url'] if last_comic else None
253
        for archive_elt in cls.get_archive_elements():
254
            url = cls.get_url_from_archive_element(archive_elt)
255
            cls.log("considering %s" % url)
256
            if waiting_for_url and waiting_for_url == url:
257
                waiting_for_url = None
258
            elif waiting_for_url is None:
259
                cls.log("about to get %s (%s)" % (url, str(archive_elt)))
260
                soup = get_soup_at_url(url)
261
                comic = cls.get_comic_info(soup, archive_elt)
262
                if comic is not None:
263
                    assert 'url' not in comic
264
                    comic['url'] = url
265
                    yield comic
266
        if waiting_for_url is not None:
267
            print("Did not find %s : there might be a problem" % waiting_for_url)
268
269
# Helper functions corresponding to get_first_comic_link/get_navi_link
270
271
272
@classmethod
273
def get_link_rel_next(cls, last_soup, next_):
274
    """Implementation of get_navi_link."""
275
    return last_soup.find('link', rel='next' if next_ else 'prev')
276
277
278
@classmethod
279
def get_a_rel_next(cls, last_soup, next_):
280
    """Implementation of get_navi_link."""
281
    return last_soup.find('a', rel='next' if next_ else 'prev')
282
283
284
@classmethod
285
def get_a_navi_navinext(cls, last_soup, next_):
286
    """Implementation of get_navi_link."""
287
    return last_soup.find('a', class_='navi navi-next' if next_ else 'navi navi-prev')
288
289
290
@classmethod
291
def get_a_navi_comicnavnext_navinext(cls, last_soup, next_):
292
    """Implementation of get_navi_link."""
293
    return last_soup.find('a', class_='navi comic-nav-next navi-next' if next_ else 'navi comic-nav-previous navi-prev')
294
295
296
@classmethod
297
def get_a_comicnavbase_comicnavnext(cls, last_soup, next_):
298
    """Implementation of get_navi_link."""
299
    return last_soup.find('a', class_='comic-nav-base comic-nav-next' if next_ else 'comic-nav-base comic-nav-previous')
300
301
302
@classmethod
303
def get_a_navi_navifirst(cls):
304
    """Implementation of get_first_comic_link."""
305
    return get_soup_at_url(cls.url).find('a', class_='navi navi-first')
306
307
308
@classmethod
309
def get_div_navfirst_a(cls):
310
    """Implementation of get_first_comic_link."""
311
    return get_soup_at_url(cls.url).find('div', class_="nav-first").find('a')
312
313
314
@classmethod
315
def get_a_comicnavbase_comicnavfirst(cls):
316
    """Implementation of get_first_comic_link."""
317
    return get_soup_at_url(cls.url).find('a', class_='comic-nav-base comic-nav-first')
318
319
320
@classmethod
321
def simulate_first_link(cls):
322
    """Implementation of get_first_comic_link creating a link-like object from
323
    an URL provided by the class."""
324
    return {'href': cls.first_url}
325
326
327
@classmethod
328
def navigate_to_first_comic(cls):
329
    """Implementation of get_first_comic_link navigating from a user provided
330
    URL to the first comic.
331
332
    Sometimes, the first comic cannot be reached directly so to start
333
    from the first comic one has to go to the previous comic until
334
    there is no previous comics. Once this URL is reached, it
335
    is better to hardcode it but for development purposes, it
336
    is convenient to have an automatic way to find it.
337
    """
338
    url = input("Get starting URL: ")
339
    print(url)
340
    comic = cls.get_prev_link(get_soup_at_url(url))
341
    while comic:
342
        url = cls.get_url_from_link(comic)
343
        print(url)
344
        comic = cls.get_prev_link(get_soup_at_url(url))
345
    return {'href': url}
346
347
348
class GenericEmptyComic(GenericComic):
349
    """Generic class for comics where nothing is to be done.
350
351
    It can be useful to deactivate temporarily comics that do not work
352
    properly by replacing `def MyComic(GenericWhateverComic)` with
353
    `def MyComic(GenericEmptyComic, GenericWhateverComic)`."""
354
355
    @classmethod
356
    def get_next_comic(cls, last_comic):
357
        """Implementation of get_next_comic returning no comics."""
358
        cls.log("comic is considered as empty - returning no comic")
359
        return []
360
361
362
class ExtraFabulousComics(GenericNavigableComic):
363
    """Class to retrieve Extra Fabulous Comics."""
364
    name = 'efc'
365
    long_name = 'Extra Fabulous Comics'
366
    url = 'http://extrafabulouscomics.com'
367
    get_navi_link = get_link_rel_next
368
369
    @classmethod
370
    def get_first_comic_link(cls):
371
        """Get link to first comics."""
372
        return get_soup_at_url(cls.url).find('a', title='FIRST')
373
374
    @classmethod
375
    def get_comic_info(cls, soup, link):
376
        """Get information about a particular comics."""
377
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
378
        imgs = soup.find_all('img', src=img_src_re)
379
        title = soup.find('h2', class_='post-title').string
380
        return {
381
            'title': title,
382
            'img': [i['src'] for i in imgs],
383
            'prefix': title + '-'
384
        }
385
386
387
class GenericLeMondeBlog(GenericNavigableComic):
388
    """Generic class to retrieve comics from Le Monde blogs."""
389
    get_navi_link = get_link_rel_next
390
    get_first_comic_link = simulate_first_link
391
    first_url = NotImplemented
392
393 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
394
    def get_comic_info(cls, soup, link):
395
        """Get information about a particular comics."""
396
        url2 = soup.find('link', rel='shortlink')['href']
397
        title = soup.find('meta', property='og:title')['content']
398
        date_str = soup.find("span", class_="entry-date").string
399
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
400
        imgs = soup.find_all('meta', property='og:image')
401
        return {
402
            'title': title,
403
            'url2': url2,
404
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
405
            'month': day.month,
406
            'year': day.year,
407
            'day': day.day,
408
        }
409
410
411
class ZepWorld(GenericLeMondeBlog):
412
    """Class to retrieve Zep World comics."""
413
    name = "zep"
414
    long_name = "Zep World"
415
    url = "http://zepworld.blog.lemonde.fr"
416
    first_url = "http://zepworld.blog.lemonde.fr/2014/10/31/bientot-le-blog-de-zep/"
417
418
419
class Vidberg(GenericLeMondeBlog):
420
    """Class to retrieve Vidberg comics."""
421
    name = 'vidberg'
422
    long_name = "Vidberg - l'actu en patates"
423
    url = "http://vidberg.blog.lemonde.fr"
424
    # Not the first but I didn't find an efficient way to retrieve it
425
    first_url = "http://vidberg.blog.lemonde.fr/2012/02/09/revue-de-campagne-la-campagne-du-modem-semballe/"
426
427
428
class Plantu(GenericLeMondeBlog):
429
    """Class to retrieve Plantu comics."""
430
    name = 'plantu'
431
    long_name = "Plantu"
432
    url = "http://plantu.blog.lemonde.fr"
433
    first_url = "http://plantu.blog.lemonde.fr/2014/10/28/stress-test-a-bruxelles/"
434
435
436
class XavierGorce(GenericLeMondeBlog):
437
    """Class to retrieve Xavier Gorce comics."""
438
    name = 'gorce'
439
    long_name = "Xavier Gorce"
440
    url = "http://xaviergorce.blog.lemonde.fr"
441
    first_url = "http://xaviergorce.blog.lemonde.fr/2015/01/09/distinction/"
442
443
444
class CartooningForPeace(GenericLeMondeBlog):
445
    """Class to retrieve Cartooning For Peace comics."""
446
    name = 'forpeace'
447
    long_name = "Cartooning For Peace"
448
    url = "http://cartooningforpeace.blog.lemonde.fr"
449
    first_url = "http://cartooningforpeace.blog.lemonde.fr/2014/12/15/bado/"
450
451
452
class Aurel(GenericLeMondeBlog):
453
    """Class to retrieve Aurel comics."""
454
    name = 'aurel'
455
    long_name = "Aurel"
456
    url = "http://aurel.blog.lemonde.fr"
457
    first_url = "http://aurel.blog.lemonde.fr/2014/09/29/le-senat-repasse-a-droite/"
458
459
460
class LesCulottees(GenericLeMondeBlog):
461
    """Class to retrieve Les Culottees comics."""
462
    name = 'culottees'
463
    long_name = 'Les Culottees'
464
    url = "http://lesculottees.blog.lemonde.fr"
465
    first_url = "http://lesculottees.blog.lemonde.fr/2016/01/11/clementine-delait-femme-a-barbe/"
466
467
468
class UneAnneeAuLycee(GenericLeMondeBlog):
469
    """Class to retrieve Une Annee Au Lycee comics."""
470
    name = 'lycee'
471
    long_name = 'Une Annee au Lycee'
472
    url = 'http://uneanneeaulycee.blog.lemonde.fr'
473
    first_url = "http://uneanneeaulycee.blog.lemonde.fr/2016/06/13/la-semaine-du-bac-est-arrivee/"
474
475
476
class Rall(GenericNavigableComic):
477
    """Class to retrieve Ted Rall comics."""
478
    # Also on http://www.gocomics.com/tedrall
479
    name = 'rall'
480
    long_name = "Ted Rall"
481
    url = "http://rall.com/comic"
482
    get_navi_link = get_link_rel_next
483
    get_first_comic_link = simulate_first_link
484
    # Not the first but I didn't find an efficient way to retrieve it
485
    first_url = "http://rall.com/2014/01/30/los-angeles-times-cartoon-well-miss-those-california-flowers"
486
487
    @classmethod
488
    def get_comic_info(cls, soup, link):
489
        """Get information about a particular comics."""
490
        title = soup.find('meta', property='og:title')['content']
491
        author = soup.find("span", class_="author vcard").find("a").string
492
        date_str = soup.find("span", class_="entry-date").string
493
        day = string_to_date(date_str, "%B %d, %Y")
494
        desc = soup.find('meta', property='og:description')['content']
495
        imgs = soup.find('div', class_='entry-content').find_all('img')
496
        imgs = imgs[:-7]  # remove social media buttons
497
        return {
498
            'title': title,
499
            'author': author,
500
            'month': day.month,
501
            'year': day.year,
502
            'day': day.day,
503
            'description': desc,
504
            'img': [i['src'] for i in imgs],
505
        }
506
507
508
class Dilem(GenericNavigableComic):
509
    """Class to retrieve Ali Dilem comics."""
510
    name = 'dilem'
511
    long_name = 'Ali Dilem'
512
    url = 'http://information.tv5monde.com/dilem'
513
    get_url_from_link = join_cls_url_to_href
514
    get_first_comic_link = simulate_first_link
515
    first_url = "http://information.tv5monde.com/dilem/2004-06-26"
516
517
    @classmethod
518
    def get_navi_link(cls, last_soup, next_):
519
        """Get link to next or previous comic."""
520
        # prev is next / next is prev
521
        li = last_soup.find('li', class_='prev' if next_ else 'next')
522
        return li.find('a') if li else None
523
524
    @classmethod
525
    def get_comic_info(cls, soup, link):
526
        """Get information about a particular comics."""
527
        short_url = soup.find('link', rel='shortlink')['href']
528
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
529
        imgs = soup.find_all('meta', property='og:image')
530
        date_str = soup.find('span', property='dc:date')['content']
531
        date_str = date_str[:10]
532
        day = string_to_date(date_str, "%Y-%m-%d")
533
        return {
534
            'short_url': short_url,
535
            'title': title,
536
            'img': [i['content'] for i in imgs],
537
            'day': day.day,
538
            'month': day.month,
539
            'year': day.year,
540
        }
541
542
543 View Code Duplication
class SpaceAvalanche(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
544
    """Class to retrieve Space Avalanche comics."""
545
    name = 'avalanche'
546
    long_name = 'Space Avalanche'
547
    url = 'http://www.spaceavalanche.com'
548
    get_navi_link = get_link_rel_next
549
550
    @classmethod
551
    def get_first_comic_link(cls):
552
        """Get link to first comics."""
553
        return {'href': "http://www.spaceavalanche.com/2009/02/02/irish-sea/", 'title': "Irish Sea"}
554
555
    @classmethod
556
    def get_comic_info(cls, soup, link):
557
        """Get information about a particular comics."""
558
        url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
559
        title = link['title']
560
        url = cls.get_url_from_link(link)
561
        year, month, day = [int(s)
562
                            for s in url_date_re.match(url).groups()]
563
        imgs = soup.find("div", class_="entry").find_all("img")
564
        return {
565
            'title': title,
566
            'day': day,
567
            'month': month,
568
            'year': year,
569
            'img': [i['src'] for i in imgs],
570
        }
571
572
573
class ZenPencils(GenericNavigableComic):
574
    """Class to retrieve ZenPencils comics."""
575
    # Also on http://zenpencils.tumblr.com
576
    # Also on http://www.gocomics.com/zen-pencils
577
    name = 'zenpencils'
578
    long_name = 'Zen Pencils'
579
    url = 'http://zenpencils.com'
580
    get_navi_link = get_link_rel_next
581
    get_first_comic_link = simulate_first_link
582
    first_url = "http://zenpencils.com/comic/1-ralph-waldo-emerson-make-them-cry/"
583
584
    @classmethod
585
    def get_comic_info(cls, soup, link):
586
        """Get information about a particular comics."""
587
        imgs = soup.find('div', id='comic').find_all('img')
588
        post = soup.find('div', class_='post-content')
589
        author = post.find("span", class_="post-author").find("a").string
590
        title = post.find('h2', class_='post-title').string
591
        date_str = post.find('span', class_='post-date').string
592
        day = string_to_date(date_str, "%B %d, %Y")
593
        assert imgs
594
        assert all(i['alt'] == i['title'] for i in imgs)
595
        assert all(i['alt'] in (title, "") for i in imgs)
596
        desc = soup.find('meta', property='og:description')['content']
597
        return {
598
            'title': title,
599
            'description': desc,
600
            'author': author,
601
            'day': day.day,
602
            'month': day.month,
603
            'year': day.year,
604
            'img': [i['src'] for i in imgs],
605
        }
606
607
608
class ItsTheTie(GenericNavigableComic):
609
    """Class to retrieve It's the tie comics."""
610
    # Also on http://itsthetie.tumblr.com
611
    # Also on https://tapastic.com/series/itsthetie
612
    name = 'tie'
613
    long_name = "It's the tie"
614
    url = "http://itsthetie.com"
615
    get_first_comic_link = get_div_navfirst_a
616
    get_navi_link = get_a_rel_next
617
618
    @classmethod
619
    def get_comic_info(cls, soup, link):
620
        """Get information about a particular comics."""
621
        title = soup.find('h1', class_='comic-title').find('a').string
622
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
623
        day = string_to_date(date_str, "%B %d, %Y")
624
        # Bonus images may or may not be in meta og:image.
625
        imgs = soup.find_all('meta', property='og:image')
626
        imgs_src = [i['content'] for i in imgs]
627
        bonus = soup.find_all('img', attrs={'data-oversrc': True})
628
        bonus_src = [b['data-oversrc'] for b in bonus]
629
        all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src]
630
        all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")]
631
        tag_meta = soup.find('meta', property='article:tag')
632
        tags = tag_meta['content'] if tag_meta else ""
633
        return {
634
            'title': title,
635
            'month': day.month,
636
            'year': day.year,
637
            'day': day.day,
638
            'img': all_imgs_src,
639
            'tags': tags,
640
        }
641
642
643
class PenelopeBagieu(GenericNavigableComic):
644
    """Class to retrieve comics from Penelope Bagieu's blog."""
645
    name = 'bagieu'
646
    long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
647
    url = 'http://www.penelope-jolicoeur.com'
648
    get_navi_link = get_link_rel_next
649
    get_first_comic_link = simulate_first_link
650
    first_url = 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'
651
652
    @classmethod
653
    def get_comic_info(cls, soup, link):
654
        """Get information about a particular comics."""
655
        date_str = soup.find('h2', class_='date-header').string
656
        day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
657
        imgs = soup.find('div', class_='entry-body').find_all('img')
658
        title = soup.find('h3', class_='entry-header').string
659
        return {
660
            'title': title,
661
            'img': [i['src'] for i in imgs],
662
            'month': day.month,
663
            'year': day.year,
664
            'day': day.day,
665
        }
666
667
668
class OneOneOneOneComic(GenericNavigableComic):
669
    """Class to retrieve 1111 Comics."""
670
    # Also on http://comics1111.tumblr.com
671
    # Also on https://tapastic.com/series/1111-Comics
672
    name = '1111'
673
    long_name = '1111 Comics'
674
    url = 'http://www.1111comics.me'
675
    get_first_comic_link = get_div_navfirst_a
676
    get_navi_link = get_link_rel_next
677
678
    @classmethod
679
    def get_comic_info(cls, soup, link):
680
        """Get information about a particular comics."""
681
        title = soup.find('h1', class_='comic-title').find('a').string
682
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
683
        day = string_to_date(date_str, "%B %d, %Y")
684
        imgs = soup.find_all('meta', property='og:image')
685
        return {
686
            'title': title,
687
            'month': day.month,
688
            'year': day.year,
689
            'day': day.day,
690
            'img': [i['content'] for i in imgs],
691
        }
692
693
694
class AngryAtNothing(GenericNavigableComic):
695
    """Class to retrieve Angry at Nothing comics."""
696
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
697
    name = 'angry'
698
    long_name = 'Angry At Nothing'
699
    url = 'http://www.angryatnothing.net'
700
    get_first_comic_link = get_div_navfirst_a
701
    get_navi_link = get_a_rel_next
702
703
    @classmethod
704
    def get_comic_info(cls, soup, link):
705
        """Get information about a particular comics."""
706
        title = soup.find('h1', class_='comic-title').find('a').string
707
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
708
        day = string_to_date(date_str, "%B %d, %Y")
709
        imgs = soup.find_all('meta', property='og:image')
710
        return {
711
            'title': title,
712
            'month': day.month,
713
            'year': day.year,
714
            'day': day.day,
715
            'img': [i['content'] for i in imgs],
716
        }
717
718
719
class NeDroid(GenericNavigableComic):
720
    """Class to retrieve NeDroid comics."""
721
    name = 'nedroid'
722
    long_name = 'NeDroid'
723
    url = 'http://nedroid.com'
724
    get_first_comic_link = get_div_navfirst_a
725
    get_navi_link = get_link_rel_next
726
    get_url_from_link = join_cls_url_to_href
727
728
    @classmethod
729
    def get_comic_info(cls, soup, link):
730
        """Get information about a particular comics."""
731
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
732
        comic_url_re = re.compile('//nedroid.com/comics/([0-9]*)-([0-9]*)-([0-9]*).*')
733
        short_url = cls.get_url_from_link(soup.find('link', rel='shortlink'))
734
        num = int(short_url_re.match(short_url).groups()[0])
735
        imgs = soup.find('div', id='comic').find_all('img')
736
        year, month, day = [int(s) for s in comic_url_re.match(imgs[0]['src']).groups()]
737
        assert len(imgs) == 1
738
        title = imgs[0]['alt']
739
        title2 = imgs[0]['title']
740
        return {
741
            'short_url': short_url,
742
            'title': title,
743
            'title2': title2,
744
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
745
            'day': day,
746
            'month': month,
747
            'year': year,
748
            'num': num,
749
        }
750
751
752 View Code Duplication
class Garfield(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
753
    """Class to retrieve Garfield comics."""
754
    # Also on http://www.gocomics.com/garfield
755
    name = 'garfield'
756
    long_name = 'Garfield'
757
    url = 'https://garfield.com'
758
    get_first_comic_link = simulate_first_link
759
    first_url = 'https://garfield.com/comic/1978/06/19'
760
761
    @classmethod
762
    def get_navi_link(cls, last_soup, next_):
763
        """Get link to next or previous comic."""
764
        return last_soup.find('a', class_='comic-arrow-right' if next_ else 'comic-arrow-left')
765
766
    @classmethod
767
    def get_comic_info(cls, soup, link):
768
        """Get information about a particular comics."""
769
        url = cls.get_url_from_link(link)
770
        date_re = re.compile('^%s/comic/([0-9]*)/([0-9]*)/([0-9]*)' % cls.url)
771
        year, month, day = [int(s) for s in date_re.match(url).groups()]
772
        imgs = soup.find('div', class_='comic-display').find_all('img', class_='img-responsive')
773
        return {
774
            'month': month,
775
            'year': year,
776
            'day': day,
777
            'img': [i['src'] for i in imgs],
778
        }
779
780
781 View Code Duplication
class Dilbert(GenericNavigableComic):
782
    """Class to retrieve Dilbert comics."""
783
    # Also on http://www.gocomics.com/dilbert-classics
784
    name = 'dilbert'
785
    long_name = 'Dilbert'
786
    url = 'http://dilbert.com'
787
    get_url_from_link = join_cls_url_to_href
788
    get_first_comic_link = simulate_first_link
789
    first_url = 'http://dilbert.com/strip/1989-04-16'
790
791
    @classmethod
792
    def get_navi_link(cls, last_soup, next_):
793
        """Get link to next or previous comic."""
794
        link = last_soup.find('div', class_='nav-comic nav-right' if next_ else 'nav-comic nav-left')
795
        return link.find('a') if link else None
796
797
    @classmethod
798
    def get_comic_info(cls, soup, link):
799
        """Get information about a particular comics."""
800
        title = soup.find('meta', property='og:title')['content']
801
        imgs = soup.find_all('meta', property='og:image')
802
        desc = soup.find('meta', property='og:description')['content']
803
        date_str = soup.find('meta', property='article:publish_date')['content']
804
        day = string_to_date(date_str, "%B %d, %Y")
805
        author = soup.find('meta', property='article:author')['content']
806
        tags = soup.find('meta', property='article:tag')['content']
807
        return {
808
            'title': title,
809
            'description': desc,
810
            'img': [i['content'] for i in imgs],
811
            'author': author,
812
            'tags': tags,
813
            'day': day.day,
814
            'month': day.month,
815
            'year': day.year
816
        }
817
818
819
class VictimsOfCircumsolar(GenericNavigableComic):
820
    """Class to retrieve VictimsOfCircumsolar comics."""
821
    name = 'circumsolar'
822
    long_name = 'Victims Of Circumsolar'
823
    url = 'http://www.victimsofcircumsolar.com'
824
    get_navi_link = get_a_navi_comicnavnext_navinext
825
    get_first_comic_link = simulate_first_link
826
    first_url = 'http://www.victimsofcircumsolar.com/comic/modern-addiction'
827
828
    @classmethod
829
    def get_comic_info(cls, soup, link):
830
        """Get information about a particular comics."""
831
        # Date is on the archive page
832
        title = soup.find_all('meta', property='og:title')[-1]['content']
833
        desc = soup.find_all('meta', property='og:description')[-1]['content']
834
        imgs = soup.find('div', id='comic').find_all('img')
835
        assert all(i['title'] == i['alt'] == title for i in imgs)
836
        return {
837
            'title': title,
838
            'description': desc,
839
            'img': [i['src'] for i in imgs],
840
        }
841
842
843
class ThreeWordPhrase(GenericNavigableComic):
844
    """Class to retrieve Three Word Phrase comics."""
845
    # Also on http://www.threewordphrase.tumblr.com
846
    name = 'threeword'
847
    long_name = 'Three Word Phrase'
848
    url = 'http://threewordphrase.com'
849
    get_url_from_link = join_cls_url_to_href
850
851
    @classmethod
852
    def get_first_comic_link(cls):
853
        """Get link to first comics."""
854
        return get_soup_at_url(cls.url).find('img', src='/firstlink.gif').parent
855
856
    @classmethod
857
    def get_navi_link(cls, last_soup, next_):
858
        """Get link to next or previous comic."""
859
        link = last_soup.find('img', src='/nextlink.gif' if next_ else '/prevlink.gif').parent
860
        return None if link.get('href') is None else link
861
862
    @classmethod
863
    def get_comic_info(cls, soup, link):
864
        """Get information about a particular comics."""
865
        title = soup.find('title')
866
        imgs = [img for img in soup.find_all('img')
867
                if not img['src'].endswith(
868
                    ('link.gif', '32.png', 'twpbookad.jpg',
869
                     'merchad.jpg', 'header.gif', 'tipjar.jpg'))]
870
        return {
871
            'title': title.string if title else None,
872
            'title2': '  '.join(img.get('alt') for img in imgs if img.get('alt')),
873
            'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs],
874
        }
875
876
877
class DeadlyPanel(GenericEmptyComic, GenericNavigableComic):
878
    """Class to retrieve Deadly Panel comics."""
879
    # Also on https://tapastic.com/series/deadlypanel
880
    name = 'deadly'
881
    long_name = 'Deadly Panel'
882
    url = 'http://www.deadlypanel.com'
883
    get_first_comic_link = get_a_navi_navifirst
884
    get_navi_link = get_a_navi_comicnavnext_navinext
885
886
    @classmethod
887
    def get_comic_info(cls, soup, link):
888
        """Get information about a particular comics."""
889
        imgs = soup.find('div', id='comic').find_all('img')
890
        assert all(i['alt'] == i['title'] for i in imgs)
891
        return {
892
            'img': [i['src'] for i in imgs],
893
        }
894
895
896
class TheGentlemanArmchair(GenericNavigableComic):
897
    """Class to retrieve The Gentleman Armchair comics."""
898
    name = 'gentlemanarmchair'
899
    long_name = 'The Gentleman Armchair'
900
    url = 'http://thegentlemansarmchair.com'
901
    get_first_comic_link = get_a_navi_navifirst
902
    get_navi_link = get_link_rel_next
903
904
    @classmethod
905
    def get_comic_info(cls, soup, link):
906
        """Get information about a particular comics."""
907
        title = soup.find('h2', class_='post-title').string
908
        author = soup.find("span", class_="post-author").find("a").string
909
        date_str = soup.find('span', class_='post-date').string
910
        day = string_to_date(date_str, "%B %d, %Y")
911
        imgs = soup.find('div', id='comic').find_all('img')
912
        return {
913
            'img': [i['src'] for i in imgs],
914
            'title': title,
915
            'author': author,
916
            'month': day.month,
917
            'year': day.year,
918
            'day': day.day,
919
        }
920
921
922
class MyExtraLife(GenericNavigableComic):
923
    """Class to retrieve My Extra Life comics."""
924
    name = 'extralife'
925
    long_name = 'My Extra Life'
926
    url = 'http://www.myextralife.com'
927
    get_navi_link = get_link_rel_next
928
929
    @classmethod
930
    def get_first_comic_link(cls):
931
        """Get link to first comics."""
932
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
933
934 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
935
    def get_comic_info(cls, soup, link):
936
        """Get information about a particular comics."""
937
        title = soup.find("h1", class_="comic_title").string
938
        date_str = soup.find("span", class_="comic_date").string
939
        day = string_to_date(date_str, "%B %d, %Y")
940
        imgs = soup.find_all("img", class_="comic")
941
        assert all(i['alt'] == i['title'] == title for i in imgs)
942
        return {
943
            'title': title,
944
            'img': [i['src'] for i in imgs if i["src"]],
945
            'day': day.day,
946
            'month': day.month,
947
            'year': day.year
948
        }
949
950
951
class SaturdayMorningBreakfastCereal(GenericNavigableComic):
952
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
953
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
954
    # Also on http://smbc-comics.tumblr.com
955
    name = 'smbc'
956
    long_name = 'Saturday Morning Breakfast Cereal'
957
    url = 'http://www.smbc-comics.com'
958
    get_navi_link = get_a_rel_next
959
960
    @classmethod
961
    def get_first_comic_link(cls):
962
        """Get link to first comics."""
963
        return get_soup_at_url(cls.url).find('a', rel='start')
964
965
    @classmethod
966
    def get_comic_info(cls, soup, link):
967
        """Get information about a particular comics."""
968
        image1 = soup.find('img', id='cc-comic')
969
        image_url1 = image1['src']
970
        aftercomic = soup.find('div', id='aftercomic')
971
        image_url2 = aftercomic.find('img')['src'] if aftercomic else ''
972
        imgs = [image_url1] + ([image_url2] if image_url2 else [])
973
        date_str = soup.find('div', class_='cc-publishtime').contents[0]
974
        day = string_to_date(date_str, "%B %d, %Y")
975
        return {
976
            'title': image1['title'],
977
            'img': [urljoin_wrapper(cls.url, i) for i in imgs],
978
            'day': day.day,
979
            'month': day.month,
980
            'year': day.year
981
        }
982
983
984
class PerryBibleFellowship(GenericListableComic):
985
    """Class to retrieve Perry Bible Fellowship comics."""
986
    name = 'pbf'
987
    long_name = 'Perry Bible Fellowship'
988
    url = 'http://pbfcomics.com'
989
    get_url_from_archive_element = join_cls_url_to_href
990
991
    @classmethod
992
    def get_archive_elements(cls):
993
        comic_link_re = re.compile('^/[0-9]*/$')
994
        return reversed(get_soup_at_url(cls.url).find_all('a', href=comic_link_re))
995
996
    @classmethod
997
    def get_comic_info(cls, soup, link):
998
        """Get information about a particular comics."""
999
        url = cls.get_url_from_archive_element(link)
1000
        comic_img_re = re.compile('^/archive_b/PBF.*')
1001
        name = link.string
1002
        num = int(link['name'])
1003
        href = link['href']
1004
        assert href == '/%d/' % num
1005
        imgs = soup.find_all('img', src=comic_img_re)
1006
        assert len(imgs) == 1
1007
        assert imgs[0]['alt'] == name
1008
        return {
1009
            'num': num,
1010
            'name': name,
1011
            'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1012
            'prefix': '%d-' % num,
1013
        }
1014
1015
1016
class Mercworks(GenericNavigableComic):
1017
    """Class to retrieve Mercworks comics."""
1018
    # Also on http://mercworks.tumblr.com
1019
    name = 'mercworks'
1020
    long_name = 'Mercworks'
1021
    url = 'http://mercworks.net'
1022
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1023
    get_navi_link = get_a_rel_next
1024
1025
    @classmethod
1026
    def get_comic_info(cls, soup, link):
1027
        """Get information about a particular comics."""
1028
        title = soup.find('meta', property='og:title')['content']
1029
        metadesc = soup.find('meta', property='og:description')
1030
        desc = metadesc['content'] if metadesc else ""
1031
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1032
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1033
        date_str = date_str[:10]
1034
        day = string_to_date(date_str, "%Y-%m-%d")
1035
        imgs = soup.find_all('meta', property='og:image')
1036
        return {
1037
            'img': [i['content'] for i in imgs],
1038
            'title': title,
1039
            'author': author,
1040
            'desc': desc,
1041
            'day': day.day,
1042
            'month': day.month,
1043
            'year': day.year
1044
        }
1045
1046
1047
class BerkeleyMews(GenericListableComic):
1048
    """Class to retrieve Berkeley Mews comics."""
1049
    # Also on http://mews.tumblr.com
1050
    # Also on http://www.gocomics.com/berkeley-mews
1051
    name = 'berkeley'
1052
    long_name = 'Berkeley Mews'
1053
    url = 'http://www.berkeleymews.com'
1054
    get_url_from_archive_element = get_href
1055
    comic_num_re = re.compile('%s/\\?p=([0-9]*)$' % url)
1056
1057
    @classmethod
1058
    def get_archive_elements(cls):
1059
        archive_url = urljoin_wrapper(cls.url, "?page_id=2")
1060
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_num_re))
1061
1062
    @classmethod
1063
    def get_comic_info(cls, soup, link):
1064
        """Get information about a particular comics."""
1065
        comic_date_re = re.compile('.*/([0-9]*)-([0-9]*)-([0-9]*)-.*')
1066
        url = cls.get_url_from_archive_element(link)
1067
        num = int(cls.comic_num_re.match(url).groups()[0])
1068
        img = soup.find('div', id='comic').find('img')
1069
        assert all(i['alt'] == i['title'] for i in [img])
1070
        title2 = img['title']
1071
        img_url = img['src']
1072
        year, month, day = [int(s) for s in comic_date_re.match(img_url).groups()]
1073
        return {
1074
            'num': num,
1075
            'title': link.string,
1076
            'title2': title2,
1077
            'img': [img_url],
1078
            'year': year,
1079
            'month': month,
1080
            'day': day,
1081
        }
1082
1083
1084
class GenericBouletCorp(GenericNavigableComic):
1085
    """Generic class to retrieve BouletCorp comics in different languages."""
1086
    # Also on http://bouletcorp.tumblr.com
1087
    get_navi_link = get_link_rel_next
1088
1089
    @classmethod
1090
    def get_first_comic_link(cls):
1091
        """Get link to first comics."""
1092
        return get_soup_at_url(cls.url).find('div', id='centered_nav').find_all('a')[0]
1093
1094
    @classmethod
1095
    def get_comic_info(cls, soup, link):
1096
        """Get information about a particular comics."""
1097
        url = cls.get_url_from_link(link)
1098
        date_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/' % cls.url)
1099
        year, month, day = [int(s) for s in date_re.match(url).groups()]
1100
        imgs = soup.find('div', id='notes').find('div', class_='storycontent').find_all('img')
1101
        texts = '  '.join(t for t in (i.get('title') for i in imgs) if t)
1102
        title = soup.find('title').string
1103
        return {
1104
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs if i.get('src') is not None],
1105
            'title': title,
1106
            'texts': texts,
1107
            'year': year,
1108
            'month': month,
1109
            'day': day,
1110
        }
1111
1112
1113
class BouletCorp(GenericBouletCorp):
1114 View Code Duplication
    """Class to retrieve BouletCorp comics."""
1115
    name = 'boulet'
1116
    long_name = 'Boulet Corp'
1117
    url = 'http://www.bouletcorp.com'
1118
1119
1120
class BouletCorpEn(GenericBouletCorp):
1121
    """Class to retrieve EnglishBouletCorp comics."""
1122
    name = 'boulet_en'
1123
    long_name = 'Boulet Corp English'
1124
    url = 'http://english.bouletcorp.com'
1125
1126
1127
class AmazingSuperPowers(GenericNavigableComic):
1128
    """Class to retrieve Amazing Super Powers comics."""
1129
    name = 'asp'
1130
    long_name = 'Amazing Super Powers'
1131
    url = 'http://www.amazingsuperpowers.com'
1132
    get_first_comic_link = get_a_navi_navifirst
1133
    get_navi_link = get_a_navi_navinext
1134
1135
    @classmethod
1136
    def get_comic_info(cls, soup, link):
1137
        """Get information about a particular comics."""
1138
        author = soup.find("span", class_="post-author").find("a").string
1139
        date_str = soup.find('span', class_='post-date').string
1140
        day = string_to_date(date_str, "%B %d, %Y")
1141
        imgs = soup.find('div', id='comic').find_all('img')
1142
        title = ' '.join(i['title'] for i in imgs)
1143
        assert all(i['alt'] == i['title'] for i in imgs)
1144
        return {
1145
            'title': title,
1146
            'author': author,
1147
            'img': [img['src'] for img in imgs],
1148
            'day': day.day,
1149
            'month': day.month,
1150
            'year': day.year
1151
        }
1152
1153
1154
class ToonHole(GenericListableComic):
1155
    """Class to retrieve Toon Holes comics."""
1156
    # Also on http://tapastic.com/series/TOONHOLE
1157
    name = 'toonhole'
1158
    long_name = 'Toon Hole'
1159
    url = 'http://www.toonhole.com'
1160
    get_url_from_archive_element = get_href
1161
1162
    @classmethod
1163
    def get_comic_info(cls, soup, link):
1164
        """Get information about a particular comics."""
1165
        title = link.string
1166
        date_str = remove_st_nd_rd_th_from_date(soup.find('div', class_='comicdate').string.strip())
1167
        day = string_to_date(date_str, "%B %d, %Y")
1168
        imgs = soup.find('div', id='comic').find_all('img')
1169
        assert all(i['alt'] == i['title'] == title for i in imgs)
1170
        return {
1171
            'title': title,
1172
            'month': day.month,
1173
            'year': day.year,
1174
            'day': day.day,
1175
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs],
1176
        }
1177
1178
    @classmethod
1179
    def get_archive_elements(cls):
1180
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1181
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1182
1183
1184
class Channelate(GenericNavigableComic):
1185
    """Class to retrieve Channelate comics."""
1186
    name = 'channelate'
1187
    long_name = 'Channelate'
1188
    url = 'http://www.channelate.com'
1189
    get_first_comic_link = get_div_navfirst_a
1190
    get_navi_link = get_link_rel_next
1191
1192
    @classmethod
1193
    def get_comic_info(cls, soup, link):
1194
        """Get information about a particular comics."""
1195
        author = soup.find("span", class_="post-author").find("a").string
1196
        date_str = soup.find('span', class_='post-date').string
1197
        day = string_to_date(date_str, '%Y/%m/%d')
1198
        title = soup.find('meta', property='og:title')['content']
1199
        post = soup.find('div', id='comic')
1200
        imgs = post.find_all('img') if post else []
1201
        assert all(i['alt'] == i['title'] for i in imgs)
1202
        extra_url = None
1203
        extra_div = soup.find('div', id='extrapanelbutton')
1204
        if extra_div:
1205
            extra_url = extra_div.find('a')['href']
1206
            extra_soup = get_soup_at_url(extra_url)
1207
            extra_imgs = extra_soup.find_all('img', class_='extrapanelimage')
1208
            imgs.extend(extra_imgs)
1209
        return {
1210
            'url_extra': extra_url,
1211
            'title': title,
1212
            'author': author,
1213
            'month': day.month,
1214
            'year': day.year,
1215
            'day': day.day,
1216
            'img': [i['src'] for i in imgs],
1217
        }
1218
1219
1220
class CyanideAndHappiness(GenericNavigableComic):
1221
    """Class to retrieve Cyanide And Happiness comics."""
1222
    name = 'cyanide'
1223
    long_name = 'Cyanide and Happiness'
1224
    url = 'http://explosm.net'
1225
    get_url_from_link = join_cls_url_to_href
1226
1227
    @classmethod
1228
    def get_first_comic_link(cls):
1229
        """Get link to first comics."""
1230
        return get_soup_at_url(cls.url).find('a', title='Oldest comic')
1231
1232
    @classmethod
1233
    def get_navi_link(cls, last_soup, next_):
1234
        """Get link to next or previous comic."""
1235
        link = last_soup.find('a', class_='next-comic' if next_ else 'previous-comic ')
1236
        return None if link.get('href') is None else link
1237
1238
    @classmethod
1239
    def get_comic_info(cls, soup, link):
1240
        """Get information about a particular comics."""
1241
        url2 = soup.find('meta', property='og:url')['content']
1242
        num = int(url2.split('/')[-2])
1243
        date_str = soup.find('h3').find('a').string
1244
        day = string_to_date(date_str, '%Y.%m.%d')
1245
        author = soup.find('small', class_="author-credit-name").string
1246
        assert author.startswith('by ')
1247
        author = author[3:]
1248
        imgs = soup.find_all('img', id='main-comic')
1249
        return {
1250
            'num': num,
1251
            'author': author,
1252
            'month': day.month,
1253
            'year': day.year,
1254
            'day': day.day,
1255
            'prefix': '%d-' % num,
1256
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i['src'])) for i in imgs]
1257
        }
1258
1259
1260
class MrLovenstein(GenericComic):
1261
    """Class to retrieve Mr Lovenstein comics."""
1262
    # Also on https://tapastic.com/series/MrLovenstein
1263
    name = 'mrlovenstein'
1264
    long_name = 'Mr. Lovenstein'
1265
    url = 'http://www.mrlovenstein.com'
1266
1267
    @classmethod
1268
    def get_next_comic(cls, last_comic):
1269
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1270
        # TODO: more info from http://www.mrlovenstein.com/archive
1271
        comic_num_re = re.compile('^/comic/([0-9]*)$')
1272
        nums = [int(comic_num_re.match(link['href']).groups()[0])
1273
                for link in get_soup_at_url(cls.url).find_all('a', href=comic_num_re)]
1274
        first, last = min(nums), max(nums)
1275
        if last_comic:
1276
            first = last_comic['num'] + 1
1277
        for num in range(first, last + 1):
1278
            url = urljoin_wrapper(cls.url, '/comic/%d' % num)
1279
            soup = get_soup_at_url(url)
1280
            imgs = list(
1281
                reversed(soup.find_all('img', src=re.compile('^/images/comics/'))))
1282
            description = soup.find('meta', attrs={'name': 'description'})['content']
1283
            yield {
1284
                'url': url,
1285
                'num': num,
1286
                'texts': '  '.join(t for t in (i.get('title') for i in imgs) if t),
1287
                'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1288
                'description': description,
1289
            }
1290
1291
1292
class DinosaurComics(GenericListableComic):
1293
    """Class to retrieve Dinosaur Comics comics."""
1294
    name = 'dinosaur'
1295
    long_name = 'Dinosaur Comics'
1296
    url = 'http://www.qwantz.com'
1297
    get_url_from_archive_element = get_href
1298
    comic_link_re = re.compile('^%s/index.php\\?comic=([0-9]*)$' % url)
1299
1300
    @classmethod
1301
    def get_archive_elements(cls):
1302
        archive_url = urljoin_wrapper(cls.url, 'archive.php')
1303
        # first link is random -> skip it
1304
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re)[1:])
1305
1306
    @classmethod
1307
    def get_comic_info(cls, soup, link):
1308
        """Get information about a particular comics."""
1309 View Code Duplication
        url = cls.get_url_from_archive_element(link)
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1310
        num = int(cls.comic_link_re.match(url).groups()[0])
1311
        date_str = link.string
1312
        text = link.next_sibling.string
1313
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1314
        comic_img_re = re.compile('^%s/comics/' % cls.url)
1315
        img = soup.find('img', src=comic_img_re)
1316
        return {
1317
            'month': day.month,
1318
            'year': day.year,
1319
            'day': day.day,
1320
            'img': [img.get('src')],
1321
            'title': img.get('title'),
1322
            'text': text,
1323
            'num': num,
1324
        }
1325
1326
1327
class ButterSafe(GenericListableComic):
1328
    """Class to retrieve Butter Safe comics."""
1329
    name = 'butter'
1330
    long_name = 'ButterSafe'
1331
    url = 'http://buttersafe.com'
1332
    get_url_from_archive_element = get_href
1333
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)
1334
1335
    @classmethod
1336
    def get_archive_elements(cls):
1337
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1338
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
1339
1340
    @classmethod
1341
    def get_comic_info(cls, soup, link):
1342
        """Get information about a particular comics."""
1343
        url = cls.get_url_from_archive_element(link)
1344
        title = link.string
1345
        year, month, day = [int(s) for s in cls.comic_link_re.match(url).groups()]
1346
        img = soup.find('div', id='comic').find('img')
1347
        assert img['alt'] == title
1348
        return {
1349
            'title': title,
1350
            'day': day,
1351
            'month': month,
1352
            'year': year,
1353
            'img': [img['src']],
1354
        }
1355
1356
1357
class CalvinAndHobbes(GenericComic):
1358
    """Class to retrieve Calvin and Hobbes comics."""
1359
    # Also on http://www.gocomics.com/calvinandhobbes/
1360
    name = 'calvin'
1361
    long_name = 'Calvin and Hobbes'
1362
    # This is not through any official webpage but eh...
1363
    url = 'http://marcel-oehler.marcellosendos.ch/comics/ch/'
1364
1365
    @classmethod
1366
    def get_next_comic(cls, last_comic):
1367
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1368
        last_date = get_date_for_comic(
1369
            last_comic) if last_comic else date(1985, 11, 1)
1370
        link_re = re.compile('^([0-9]*)/([0-9]*)/')
1371
        img_re = re.compile('')
1372
        for link in get_soup_at_url(cls.url).find_all('a', href=link_re):
1373
            url = link['href']
1374 View Code Duplication
            year, month = link_re.match(url).groups()
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1375
            if date(int(year), int(month), 1) + timedelta(days=31) >= last_date:
1376
                img_re = re.compile('^%s%s([0-9]*)' % (year, month))
1377
                month_url = urljoin_wrapper(cls.url, url)
1378
                for img in get_soup_at_url(month_url).find_all('img', src=img_re):
1379
                    img_src = img['src']
1380
                    day = int(img_re.match(img_src).groups()[0])
1381
                    comic_date = date(int(year), int(month), day)
1382
                    if comic_date > last_date:
1383
                        yield {
1384
                            'url': month_url,
1385
                            'year': int(year),
1386
                            'month': int(month),
1387
                            'day': int(day),
1388
                            'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)],
1389
                        }
1390
                        last_date = comic_date
1391
1392
1393
class AbstruseGoose(GenericListableComic):
1394
    """Class to retrieve AbstruseGoose Comics."""
1395
    name = 'abstruse'
1396
    long_name = 'Abstruse Goose'
1397
    url = 'http://abstrusegoose.com'
1398
    get_url_from_archive_element = get_href
1399
    comic_url_re = re.compile('^%s/([0-9]*)$' % url)
1400
    comic_img_re = re.compile('^%s/strips/.*' % url)
1401
1402
    @classmethod
1403
    def get_archive_elements(cls):
1404
        archive_url = urljoin_wrapper(cls.url, 'archive')
1405
        return get_soup_at_url(archive_url).find_all('a', href=cls.comic_url_re)
1406
1407
    @classmethod
1408
    def get_comic_info(cls, soup, archive_elt):
1409
        comic_url = cls.get_url_from_archive_element(archive_elt)
1410
        num = int(cls.comic_url_re.match(comic_url).groups()[0])
1411
        return {
1412
            'num': num,
1413
            'title': archive_elt.string,
1414
            'img': [soup.find('img', src=cls.comic_img_re)['src']]
1415
        }
1416
1417
1418
class PhDComics(GenericNavigableComic):
1419
    """Class to retrieve PHD Comics."""
1420
    name = 'phd'
1421
    long_name = 'PhD Comics'
1422
    url = 'http://phdcomics.com/comics/archive.php'
1423
    get_url_from_link = join_cls_url_to_href
1424
1425
    @classmethod
1426
    def get_first_comic_link(cls):
1427
        """Get link to first comics."""
1428
        return get_soup_at_url(cls.url).find('img', src='images/first_button.gif').parent
1429
1430
    @classmethod
1431
    def get_navi_link(cls, last_soup, next_):
1432
        """Get link to next or previous comic."""
1433
        img = last_soup.find('img', src='images/next_button.gif' if next_ else 'images/prev_button.gif')
1434
        return None if img is None else img.parent
1435
1436
    @classmethod
1437
    def get_comic_info(cls, soup, link):
1438
        """Get information about a particular comics."""
1439
        date_str = soup.find('font', face='Arial,Helvetica,Geneva,Swiss,SunSans-Regular', color='white').string.strip()
1440
        try:
1441
            day = string_to_date(date_str, '%m/%d/%Y')
1442
        except ValueError:
1443
            print("Invalid date %s" % date_str)
1444
            day = date.today()
1445
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
1446
        return {
1447
            'year': day.year,
1448
            'month': day.month,
1449
            'day': day.day,
1450
            'img': [soup.find('img', id='comic')['src']],
1451
            'title': title,
1452
        }
1453
1454
1455
class Octopuns(GenericNavigableComic):
1456
    """Class to retrieve Octopuns comics."""
1457
    # Also on http://octopuns.tumblr.com
1458
    name = 'octopuns'
1459
    long_name = 'Octopuns'
1460
    url = 'http://www.octopuns.net'
1461
1462
    @classmethod
1463
    def get_first_comic_link(cls):
1464
        """Get link to first comics."""
1465
        return get_soup_at_url(cls.url).find('img', src=re.compile('.*/First.png')).parent
1466
1467
    @classmethod
1468
    def get_navi_link(cls, last_soup, next_):
1469
        """Get link to next or previous comic."""
1470
        link = last_soup.find('img', src=re.compile('.*/Next.png' if next_ else '.*/Back.png')).parent
1471
        return None if link.get('href') is None else link
1472
1473
    @classmethod
1474
    def get_comic_info(cls, soup, link):
1475
        """Get information about a particular comics."""
1476
        title = soup.find('h3', class_='post-title entry-title').string
1477
        date_str = soup.find('h2', class_='date-header').string
1478
        day = string_to_date(date_str, "%A, %B %d, %Y")
1479
        imgs = soup.find_all('link', rel='image_src')
1480
        return {
1481
            'img': [i['href'] for i in imgs],
1482
            'title': title,
1483
            'day': day.day,
1484
            'month': day.month,
1485
            'year': day.year,
1486
        }
1487
1488
1489
class Quarktees(GenericNavigableComic):
1490
    """Class to retrieve the Quarktees comics."""
1491
    name = 'quarktees'
1492
    long_name = 'Quarktees'
1493
    url = 'http://www.quarktees.com/blogs/news'
1494
    get_url_from_link = join_cls_url_to_href
1495
    get_first_comic_link = simulate_first_link
1496
    first_url = 'http://www.quarktees.com/blogs/news/12486621-coming-soon'
1497
1498
    @classmethod
1499
    def get_navi_link(cls, last_soup, next_):
1500
        """Get link to next or previous comic."""
1501
        return last_soup.find('a', id='article-next' if next_ else 'article-prev')
1502
1503
    @classmethod
1504
    def get_comic_info(cls, soup, link):
1505
        """Get information about a particular comics."""
1506
        title = soup.find('meta', property='og:title')['content']
1507
        article = soup.find('div', class_='single-article')
1508
        imgs = article.find_all('img')
1509
        return {
1510
            'title': title,
1511
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1512
        }
1513
1514
1515
class OverCompensating(GenericNavigableComic):
1516
    """Class to retrieve the Over Compensating comics."""
1517
    name = 'compensating'
1518
    long_name = 'Over Compensating'
1519
    url = 'http://www.overcompensating.com'
1520
    get_url_from_link = join_cls_url_to_href
1521
1522
    @classmethod
1523
    def get_first_comic_link(cls):
1524
        """Get link to first comics."""
1525
        return get_soup_at_url(cls.url).find('a', href=re.compile('comic=1$'))
1526
1527
    @classmethod
1528
    def get_navi_link(cls, last_soup, next_):
1529
        """Get link to next or previous comic."""
1530
        return last_soup.find('a', title='next comic' if next_ else 'go back already')
1531
1532
    @classmethod
1533
    def get_comic_info(cls, soup, link):
1534
        """Get information about a particular comics."""
1535
        img_src_re = re.compile('^/oc/comics/.*')
1536
        comic_num_re = re.compile('.*comic=([0-9]*)$')
1537
        comic_url = cls.get_url_from_link(link)
1538
        num = int(comic_num_re.match(comic_url).groups()[0])
1539
        img = soup.find('img', src=img_src_re)
1540
        return {
1541
            'num': num,
1542
            'img': [urljoin_wrapper(comic_url, img['src'])],
1543
            'title': img.get('title')
1544
        }
1545
1546
1547
class Oglaf(GenericNavigableComic):
1548
    """Class to retrieve Oglaf comics."""
1549
    name = 'oglaf'
1550
    long_name = 'Oglaf [NSFW]'
1551
    url = 'http://oglaf.com'
1552
    get_url_from_link = join_cls_url_to_href
1553
1554
    @classmethod
1555
    def get_first_comic_link(cls):
1556
        """Get link to first comics."""
1557
        return get_soup_at_url(cls.url).find("div", id="st").parent
1558
1559
    @classmethod
1560
    def get_navi_link(cls, last_soup, next_):
1561
        """Get link to next or previous comic."""
1562
        div = last_soup.find("div", id="nx" if next_ else "pvs")
1563
        return div.parent if div else None
1564
1565
    @classmethod
1566
    def get_comic_info(cls, soup, link):
1567
        """Get information about a particular comics."""
1568
        title = soup.find('title').string
1569
        title_imgs = soup.find('div', id='tt').find_all('img')
1570
        assert len(title_imgs) == 1
1571
        strip_imgs = soup.find_all('img', id='strip')
1572
        assert len(strip_imgs) == 1
1573
        imgs = title_imgs + strip_imgs
1574
        desc = ' '.join(i['title'] for i in imgs)
1575
        return {
1576
            'title': title,
1577
            'img': [i['src'] for i in imgs],
1578
            'description': desc,
1579
        }
1580
1581
1582
class ScandinaviaAndTheWorld(GenericNavigableComic):
1583
    """Class to retrieve Scandinavia And The World comics."""
1584
    name = 'satw'
1585
    long_name = 'Scandinavia And The World'
1586
    url = 'http://satwcomic.com'
1587
    get_first_comic_link = simulate_first_link
1588
    first_url = 'http://satwcomic.com/sweden-denmark-and-norway'
1589
1590
    @classmethod
1591
    def get_navi_link(cls, last_soup, next_):
1592
        """Get link to next or previous comic."""
1593
        return last_soup.find('a', accesskey='n' if next_ else 'p')
1594
1595
    @classmethod
1596
    def get_comic_info(cls, soup, link):
1597
        """Get information about a particular comics."""
1598
        title = soup.find('meta', attrs={'name': 'twitter:label1'})['content']
1599
        desc = soup.find('meta', property='og:description')['content']
1600
        imgs = soup.find_all('img', itemprop="image")
1601
        return {
1602
            'title': title,
1603
            'description': desc,
1604
            'img': [i['src'] for i in imgs],
1605
        }
1606
1607
1608
class SomethingOfThatIlk(GenericEmptyComic):  # Does not exist anymore
1609
    """Class to retrieve the Something Of That Ilk comics."""
1610
    name = 'somethingofthatilk'
1611
    long_name = 'Something Of That Ilk'
1612
    url = 'http://www.somethingofthatilk.com'
1613
1614
1615
class InfiniteMonkeyBusiness(GenericNavigableComic):
1616
    """Generic class to retrieve InfiniteMonkeyBusiness comics."""
1617
    name = 'monkey'
1618
    long_name = 'Infinite Monkey Business'
1619
    url = 'http://infinitemonkeybusiness.net'
1620
    get_navi_link = get_a_navi_comicnavnext_navinext
1621
    get_first_comic_link = simulate_first_link
1622
    first_url = 'http://infinitemonkeybusiness.net/comic/pillory/'
1623
1624
    @classmethod
1625
    def get_comic_info(cls, soup, link):
1626
        """Get information about a particular comics."""
1627
        title = soup.find('meta', property='og:title')['content']
1628
        imgs = soup.find('div', id='comic').find_all('img')
1629
        return {
1630
            'title': title,
1631
            'img': [i['src'] for i in imgs],
1632
        }
1633
1634
1635
class Wondermark(GenericListableComic):
1636
    """Class to retrieve the Wondermark comics."""
1637
    name = 'wondermark'
1638
    long_name = 'Wondermark'
1639
    url = 'http://wondermark.com'
1640
    get_url_from_archive_element = get_href
1641
1642
    @classmethod
1643
    def get_archive_elements(cls):
1644
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1645
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1646
1647
    @classmethod
1648
    def get_comic_info(cls, soup, link):
1649
        """Get information about a particular comics."""
1650
        date_str = soup.find('div', class_='postdate').find('em').string
1651
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1652
        div = soup.find('div', id='comic')
1653
        if div:
1654
            img = div.find('img')
1655
            img_src = [img['src']]
1656
            alt = img['alt']
1657
            assert alt == img['title']
1658
            title = soup.find('meta', property='og:title')['content']
1659
        else:
1660
            img_src = []
1661
            alt = ''
1662
            title = ''
1663
        return {
1664
            'month': day.month,
1665
            'year': day.year,
1666
            'day': day.day,
1667
            'img': img_src,
1668
            'title': title,
1669
            'alt': alt,
1670
            'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')),
1671
        }
1672
1673
1674
class WarehouseComic(GenericNavigableComic):
1675
    """Class to retrieve Warehouse Comic comics."""
1676
    name = 'warehouse'
1677
    long_name = 'Warehouse Comic'
1678
    url = 'http://warehousecomic.com'
1679
    get_first_comic_link = get_a_navi_navifirst
1680
    get_navi_link = get_link_rel_next
1681
1682
    @classmethod
1683
    def get_comic_info(cls, soup, link):
1684
        """Get information about a particular comics."""
1685
        title = soup.find('h2', class_='post-title').string
1686
        date_str = soup.find('span', class_='post-date').string
1687
        day = string_to_date(date_str, "%B %d, %Y")
1688
        imgs = soup.find('div', id='comic').find_all('img')
1689
        return {
1690
            'img': [i['src'] for i in imgs],
1691
            'title': title,
1692
            'day': day.day,
1693
            'month': day.month,
1694
            'year': day.year,
1695
        }
1696
1697
1698
class JustSayEh(GenericNavigableComic):
1699 View Code Duplication
    """Class to retrieve Just Say Eh comics."""
1700
    # Also on http//tapastic.com/series/Just-Say-Eh
1701
    name = 'justsayeh'
1702
    long_name = 'Just Say Eh'
1703
    url = 'http://www.justsayeh.com'
1704
    get_first_comic_link = get_a_navi_navifirst
1705
    get_navi_link = get_a_navi_comicnavnext_navinext
1706
1707
    @classmethod
1708
    def get_comic_info(cls, soup, link):
1709
        """Get information about a particular comics."""
1710
        title = soup.find('h2', class_='post-title').string
1711
        imgs = soup.find("div", id="comic").find_all("img")
1712
        assert all(i['alt'] == i['title'] for i in imgs)
1713
        alt = imgs[0]['alt']
1714
        return {
1715
            'img': [i['src'] for i in imgs],
1716
            'title': title,
1717
            'alt': alt,
1718
        }
1719
1720
1721
class MouseBearComedy(GenericNavigableComic):
1722
    """Class to retrieve Mouse Bear Comedy comics."""
1723
    # Also on http://mousebearcomedy.tumblr.com
1724
    name = 'mousebear'
1725
    long_name = 'Mouse Bear Comedy'
1726
    url = 'http://www.mousebearcomedy.com'
1727
    get_first_comic_link = get_a_navi_navifirst
1728
    get_navi_link = get_a_navi_comicnavnext_navinext
1729
1730
    @classmethod
1731
    def get_comic_info(cls, soup, link):
1732
        """Get information about a particular comics."""
1733
        title = soup.find('h2', class_='post-title').string
1734
        author = soup.find("span", class_="post-author").find("a").string
1735
        date_str = soup.find("span", class_="post-date").string
1736
        day = string_to_date(date_str, '%B %d, %Y')
1737
        imgs = soup.find("div", id="comic").find_all("img")
1738
        assert all(i['alt'] == i['title'] == title for i in imgs)
1739
        return {
1740
            'day': day.day,
1741
            'month': day.month,
1742
            'year': day.year,
1743
            'img': [i['src'] for i in imgs],
1744
            'title': title,
1745
            'author': author,
1746
        }
1747
1748
1749
class BigFootJustice(GenericNavigableComic):
1750
    """Class to retrieve Big Foot Justice comics."""
1751
    # Also on http://tapastic.com/series/bigfoot-justice
1752
    name = 'bigfoot'
1753
    long_name = 'Big Foot Justice'
1754
    url = 'http://bigfootjustice.com'
1755
    get_first_comic_link = get_a_navi_navifirst
1756
    get_navi_link = get_a_navi_comicnavnext_navinext
1757
1758
    @classmethod
1759
    def get_comic_info(cls, soup, link):
1760
        """Get information about a particular comics."""
1761
        imgs = soup.find('div', id='comic').find_all('img')
1762
        assert all(i['title'] == i['alt'] for i in imgs)
1763
        title = ' '.join(i['title'] for i in imgs)
1764
        return {
1765
            'img': [i['src'] for i in imgs],
1766
            'title': title,
1767
        }
1768
1769
1770
class RespawnComic(GenericNavigableComic):
1771
    """Class to retrieve Respawn Comic."""
1772
    # Also on http://respawncomic.tumblr.com
1773
    name = 'respawn'
1774
    long_name = 'Respawn Comic'
1775
    url = 'http://respawncomic.com '
1776
    get_navi_link = get_a_navi_comicnavnext_navinext
1777
    get_first_comic_link = simulate_first_link
1778
    first_url = 'http://respawncomic.com/comic/c0001/'
1779
1780
    @classmethod
1781 View Code Duplication
    def get_comic_info(cls, soup, link):
1782
        """Get information about a particular comics."""
1783
        title = soup.find('meta', property='og:title')['content']
1784
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1785
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1786
        date_str = date_str[:10]
1787
        day = string_to_date(date_str, "%Y-%m-%d")
1788
        imgs = soup.find_all('meta', property='og:image')
1789
        skip_imgs = {
1790
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1791
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1792
        }
1793
        return {
1794
            'title': title,
1795
            'author': author,
1796
            'day': day.day,
1797
            'month': day.month,
1798
            'year': day.year,
1799
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1800
        }
1801
1802
1803
class SafelyEndangered(GenericNavigableComic):
1804
    """Class to retrieve Safely Endangered comics."""
1805
    # Also on http://tumblr.safelyendangered.com
1806
    name = 'endangered'
1807
    long_name = 'Safely Endangered'
1808
    url = 'http://www.safelyendangered.com'
1809
    get_navi_link = get_link_rel_next
1810 View Code Duplication
    get_first_comic_link = simulate_first_link
1811
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1812
1813
    @classmethod
1814
    def get_comic_info(cls, soup, link):
1815
        """Get information about a particular comics."""
1816
        title = soup.find('h2', class_='post-title').string
1817
        date_str = soup.find('span', class_='post-date').string
1818
        day = string_to_date(date_str, '%B %d, %Y')
1819
        imgs = soup.find('div', id='comic').find_all('img')
1820
        alt = imgs[0]['alt']
1821
        assert all(i['alt'] == i['title'] for i in imgs)
1822
        return {
1823
            'day': day.day,
1824
            'month': day.month,
1825
            'year': day.year,
1826
            'img': [i['src'] for i in imgs],
1827
            'title': title,
1828
            'alt': alt,
1829
        }
1830
1831
1832
class PicturesInBoxes(GenericNavigableComic):
1833
    """Class to retrieve Pictures In Boxes comics."""
1834
    # Also on http://picturesinboxescomic.tumblr.com
1835
    name = 'picturesinboxes'
1836
    long_name = 'Pictures in Boxes'
1837
    url = 'http://www.picturesinboxes.com'
1838
    get_navi_link = get_a_navi_navinext
1839
    get_first_comic_link = simulate_first_link
1840
    first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'
1841
1842
    @classmethod
1843
    def get_comic_info(cls, soup, link):
1844
        """Get information about a particular comics."""
1845
        title = soup.find('h2', class_='post-title').string
1846
        author = soup.find("span", class_="post-author").find("a").string
1847
        date_str = soup.find('span', class_='post-date').string
1848
        day = string_to_date(date_str, '%B %d, %Y')
1849
        imgs = soup.find('div', class_='comicpane').find_all('img')
1850
        assert imgs
1851
        assert all(i['title'] == i['alt'] == title for i in imgs)
1852
        return {
1853
            'day': day.day,
1854
            'month': day.month,
1855
            'year': day.year,
1856
            'img': [i['src'] for i in imgs],
1857
            'title': title,
1858
            'author': author,
1859
        }
1860
1861
1862
class Penmen(GenericEmptyComic):
1863
    """Class to retrieve Penmen comics."""
1864
    name = 'penmen'
1865
    long_name = 'Penmen'
1866
    url = 'http://penmen.com'
1867
1868
1869
class TheDoghouseDiaries(GenericNavigableComic):
1870
    """Class to retrieve The Dog House Diaries comics."""
1871
    name = 'doghouse'
1872
    long_name = 'The Dog House Diaries'
1873
    url = 'http://thedoghousediaries.com'
1874
1875
    @classmethod
1876
    def get_first_comic_link(cls):
1877
        """Get link to first comics."""
1878
        return get_soup_at_url(cls.url).find('a', id='firstlink')
1879
1880
    @classmethod
1881
    def get_navi_link(cls, last_soup, next_):
1882
        """Get link to next or previous comic."""
1883
        return last_soup.find('a', id='nextlink' if next_ else 'previouslink')
1884
1885
    @classmethod
1886
    def get_comic_info(cls, soup, link):
1887
        """Get information about a particular comics."""
1888
        comic_img_re = re.compile('^dhdcomics/.*')
1889
        img = soup.find('img', src=comic_img_re)
1890
        comic_url = cls.get_url_from_link(link)
1891
        return {
1892
            'title': soup.find('h2', id='titleheader').string,
1893
            'title2': soup.find('div', id='subtext').string,
1894
            'alt': img.get('title'),
1895
            'img': [urljoin_wrapper(comic_url, img['src'].strip())],
1896
            'num': int(comic_url.split('/')[-1]),
1897
        }
1898
1899
1900
class InvisibleBread(GenericListableComic):
1901
    """Class to retrieve Invisible Bread comics."""
1902
    # Also on http://www.gocomics.com/invisible-bread
1903
    name = 'invisiblebread'
1904
    long_name = 'Invisible Bread'
1905
    url = 'http://invisiblebread.com'
1906
1907
    @classmethod
1908
    def get_archive_elements(cls):
1909
        archive_url = urljoin_wrapper(cls.url, 'archives/')
1910
        return reversed(get_soup_at_url(archive_url).find_all('td', class_='archive-title'))
1911
1912
    @classmethod
1913
    def get_url_from_archive_element(cls, td):
1914
        return td.find('a')['href']
1915
1916
    @classmethod
1917
    def get_comic_info(cls, soup, td):
1918
        """Get information about a particular comics."""
1919
        url = cls.get_url_from_archive_element(td)
1920
        title = td.find('a').string
1921
        month_and_day = td.previous_sibling.string
1922
        link_re = re.compile('^%s/([0-9]+)/' % cls.url)
1923
        year = link_re.match(url).groups()[0]
1924
        date_str = month_and_day + ' ' + year
1925
        day = string_to_date(date_str, '%b %d %Y')
1926
        imgs = [soup.find('div', id='comic').find('img')]
1927
        assert len(imgs) == 1
1928
        assert all(i['title'] == i['alt'] == title for i in imgs)
1929
        return {
1930
            'month': day.month,
1931 View Code Duplication
            'year': day.year,
1932
            'day': day.day,
1933
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1934
            'title': title,
1935
        }
1936
1937
1938
class DiscoBleach(GenericEmptyComic):  # Does not work anymore
1939
    """Class to retrieve Disco Bleach Comics."""
1940
    name = 'discobleach'
1941
    long_name = 'Disco Bleach'
1942
    url = 'http://discobleach.com'
1943
1944
1945
class TubeyToons(GenericEmptyComic):  # Does not work anymore
1946
    """Class to retrieve TubeyToons comics."""
1947
    # Also on http://tapastic.com/series/Tubey-Toons
1948
    # Also on http://tubeytoons.tumblr.com
1949
    name = 'tubeytoons'
1950
    long_name = 'Tubey Toons'
1951
    url = 'http://tubeytoons.com'
1952
1953
1954
class CompletelySeriousComics(GenericNavigableComic):
1955
    """Class to retrieve Completely Serious comics."""
1956
    name = 'completelyserious'
1957
    long_name = 'Completely Serious Comics'
1958
    url = 'http://completelyseriouscomics.com'
1959
    get_first_comic_link = get_a_navi_navifirst
1960
    get_navi_link = get_a_navi_navinext
1961
1962
    @classmethod
1963
    def get_comic_info(cls, soup, link):
1964
        """Get information about a particular comics."""
1965
        title = soup.find('h2', class_='post-title').string
1966
        author = soup.find('span', class_='post-author').contents[1].string
1967
        date_str = soup.find('span', class_='post-date').string
1968
        day = string_to_date(date_str, '%B %d, %Y')
1969
        imgs = soup.find('div', class_='comicpane').find_all('img')
1970
        assert imgs
1971
        alt = imgs[0]['title']
1972
        assert all(i['title'] == i['alt'] == alt for i in imgs)
1973
        return {
1974
            'month': day.month,
1975
            'year': day.year,
1976
            'day': day.day,
1977
            'img': [i['src'] for i in imgs],
1978
            'title': title,
1979
            'alt': alt,
1980
            'author': author,
1981
        }
1982
1983
1984
class PoorlyDrawnLines(GenericListableComic):
1985
    """Class to retrieve Poorly Drawn Lines comics."""
1986
    # Also on http://pdlcomics.tumblr.com
1987
    name = 'poorlydrawn'
1988
    long_name = 'Poorly Drawn Lines'
1989
    url = 'http://poorlydrawnlines.com'
1990
    get_url_from_archive_element = get_href
1991
1992
    @classmethod
1993
    def get_comic_info(cls, soup, link):
1994
        """Get information about a particular comics."""
1995
        imgs = soup.find('div', class_='post').find_all('img')
1996
        assert len(imgs) <= 1
1997
        return {
1998
            'img': [i['src'] for i in imgs],
1999
            'title': imgs[0].get('title', "") if imgs else "",
2000
        }
2001
2002
    @classmethod
2003
    def get_archive_elements(cls):
2004
        archive_url = urljoin_wrapper(cls.url, 'archive')
2005
        url_re = re.compile('^%s/comic/.' % cls.url)
2006
        return reversed(get_soup_at_url(archive_url).find_all('a', href=url_re))
2007
2008
2009
class LoadingComics(GenericNavigableComic):
2010
    """Class to retrieve Loading Artist comics."""
2011
    name = 'loadingartist'
2012
    long_name = 'Loading Artist'
2013
    url = 'http://www.loadingartist.com/latest'
2014
2015
    @classmethod
2016
    def get_first_comic_link(cls):
2017 View Code Duplication
        """Get link to first comics."""
2018
        return get_soup_at_url(cls.url).find('a', title="First")
2019
2020
    @classmethod
2021
    def get_navi_link(cls, last_soup, next_):
2022
        """Get link to next or previous comic."""
2023
        return last_soup.find('a', title='Next' if next_ else 'Previous')
2024
2025
    @classmethod
2026
    def get_comic_info(cls, soup, link):
2027
        """Get information about a particular comics."""
2028
        title = soup.find('h1').string
2029
        date_str = soup.find('span', class_='date').string.strip()
2030
        day = string_to_date(date_str, "%B %d, %Y")
2031
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
2032
        return {
2033
            'title': title,
2034
            'img': [i['src'] for i in imgs],
2035
            'month': day.month,
2036
            'year': day.year,
2037
            'day': day.day,
2038
        }
2039
2040
2041
class ChuckleADuck(GenericNavigableComic):
2042
    """Class to retrieve Chuckle-A-Duck comics."""
2043
    name = 'chuckleaduck'
2044
    long_name = 'Chuckle-A-duck'
2045
    url = 'http://chuckleaduck.com'
2046
    get_first_comic_link = get_div_navfirst_a
2047
    get_navi_link = get_link_rel_next
2048
2049
    @classmethod
2050
    def get_comic_info(cls, soup, link):
2051
        """Get information about a particular comics."""
2052
        date_str = soup.find('span', class_='post-date').string
2053
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
2054
        author = soup.find('span', class_='post-author').string
2055
        div = soup.find('div', id='comic')
2056
        imgs = div.find_all('img') if div else []
2057
        title = imgs[0]['title'] if imgs else ""
2058
        assert all(i['title'] == i['alt'] == title for i in imgs)
2059
        return {
2060
            'month': day.month,
2061
            'year': day.year,
2062
            'day': day.day,
2063
            'img': [i['src'] for i in imgs],
2064
            'title': title,
2065
            'author': author,
2066
        }
2067
2068
2069
class DepressedAlien(GenericNavigableComic):
2070
    """Class to retrieve Depressed Alien Comics."""
2071
    name = 'depressedalien'
2072
    long_name = 'Depressed Alien'
2073
    url = 'http://depressedalien.com'
2074
    get_url_from_link = join_cls_url_to_href
2075
2076
    @classmethod
2077
    def get_first_comic_link(cls):
2078
        """Get link to first comics."""
2079
        return get_soup_at_url(cls.url).find('img', attrs={'name': 'beginArrow'}).parent
2080
2081
    @classmethod
2082
    def get_navi_link(cls, last_soup, next_):
2083
        """Get link to next or previous comic."""
2084
        return last_soup.find('img', attrs={'name': 'rightArrow' if next_ else 'leftArrow'}).parent
2085
2086
    @classmethod
2087
    def get_comic_info(cls, soup, link):
2088
        """Get information about a particular comics."""
2089
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2090
        imgs = soup.find_all('meta', property='og:image')
2091
        return {
2092
            'title': title,
2093
            'img': [i['content'] for i in imgs],
2094
        }
2095
2096
2097
class ThingsInSquares(GenericListableComic):
2098
    """Class to retrieve Things In Squares comics."""
2099
    # This can be retrieved in other languages
2100
    # Also on https://tapastic.com/series/Things-in-Squares
2101
    name = 'squares'
2102
    long_name = 'Things in squares'
2103
    url = 'http://www.thingsinsquares.com'
2104
2105
    @classmethod
2106
    def get_comic_info(cls, soup, tr):
2107
        """Get information about a particular comics."""
2108
        _, td2, td3 = tr.find_all('td')
2109
        a = td2.find('a')
2110
        date_str = td3.string
2111
        day = string_to_date(date_str, "%m.%d.%y")
2112
        title = a.string
2113
        title2 = soup.find('meta', property='og:title')['content']
2114
        desc = soup.find('meta', property='og:description')
2115
        description = desc['content'] if desc else ''
2116 View Code Duplication
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2117
        imgs = soup.find('div', class_='entry-content').find_all('img')
2118
        return {
2119
            'day': day.day,
2120
            'month': day.month,
2121
            'year': day.year,
2122
            'title': title,
2123
            'title2': title2,
2124
            'description': description,
2125
            'tags': tags,
2126
            'img': [i['src'] for i in imgs],
2127
            'alt': ' '.join(i['alt'] for i in imgs),
2128
        }
2129
2130
    @classmethod
2131
    def get_url_from_archive_element(cls, tr):
2132
        _, td2, td3 = tr.find_all('td')
2133
        return td2.find('a')['href']
2134
2135
    @classmethod
2136
    def get_archive_elements(cls):
2137
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
2138
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2139
2140
2141
class HappleTea(GenericNavigableComic):
2142
    """Class to retrieve Happle Tea Comics."""
2143
    name = 'happletea'
2144
    long_name = 'Happle Tea'
2145
    url = 'http://www.happletea.com'
2146
    get_first_comic_link = get_a_navi_navifirst
2147
    get_navi_link = get_link_rel_next
2148
2149
    @classmethod
2150
    def get_comic_info(cls, soup, link):
2151
        """Get information about a particular comics."""
2152
        imgs = soup.find('div', id='comic').find_all('img')
2153
        post = soup.find('div', class_='post-content')
2154
        title = post.find('h2', class_='post-title').string
2155
        author = post.find('a', rel='author').string
2156
        date_str = post.find('span', class_='post-date').string
2157
        day = string_to_date(date_str, "%B %d, %Y")
2158
        assert all(i['alt'] == i['title'] for i in imgs)
2159
        return {
2160
            'title': title,
2161
            'img': [i['src'] for i in imgs],
2162
            'alt': ''.join(i['alt'] for i in imgs),
2163
            'month': day.month,
2164
            'year': day.year,
2165
            'day': day.day,
2166
            'author': author,
2167
        }
2168
2169
2170
class FatAwesomeComics(GenericNavigableComic):
2171
    """Class to retrieve Fat Awesome Comics."""
2172
    # Also on http://fatawesomecomedy.tumblr.com
2173
    name = 'fatawesome'
2174
    long_name = 'Fat Awesome'
2175
    url = 'http://fatawesome.com/comics'
2176
    get_navi_link = get_a_rel_next
2177
    get_first_comic_link = simulate_first_link
2178
    first_url = 'http://fatawesome.com/shortbus/'
2179
2180
    @classmethod
2181
    def get_comic_info(cls, soup, link):
2182
        """Get information about a particular comics."""
2183
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2184
        description = soup.find('meta', attrs={'name': 'description'})['content']
2185
        tags_prop = soup.find('meta', property='article:tag')
2186
        tags = tags_prop['content'] if tags_prop else ""
2187
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2188
        day = string_to_date(date_str, "%Y-%m-%d")
2189
        imgs = soup.find_all('img', attrs={'data-recalc-dims': "1"})
2190
        assert len(imgs) == 1
2191
        return {
2192
            'title': title,
2193
            'description': description,
2194
            'tags': tags,
2195
            'alt': "".join(i['alt'] for i in imgs),
2196
            'img': [i['src'].rsplit('?', 1)[0] for i in imgs],
2197
            'month': day.month,
2198
            'year': day.year,
2199
            'day': day.day,
2200
        }
2201
2202
2203
class AnythingComic(GenericListableComic):
2204
    """Class to retrieve Anything Comics."""
2205
    # Also on http://tapastic.com/series/anything
2206
    name = 'anythingcomic'
2207
    long_name = 'Anything Comic'
2208
    url = 'http://www.anythingcomic.com'
2209
2210
    @classmethod
2211
    def get_archive_elements(cls):
2212
        archive_url = urljoin_wrapper(cls.url, 'archive/')
2213
        # The first 2 <tr>'s do not correspond to comics
2214
        return get_soup_at_url(archive_url).find('table', id='chapter_table').find_all('tr')[2:]
2215
2216
    @classmethod
2217
    def get_url_from_archive_element(cls, tr):
2218
        """Get url corresponding to an archive element."""
2219
        td_num, td_comic, td_date, _ = tr.find_all('td')
2220
        link = td_comic.find('a')
2221
        return urljoin_wrapper(cls.url, link['href'])
2222
2223
    @classmethod
2224
    def get_comic_info(cls, soup, tr):
2225
        """Get information about a particular comics."""
2226
        td_num, td_comic, td_date, _ = tr.find_all('td')
2227
        num = int(td_num.string)
2228
        link = td_comic.find('a')
2229
        title = link.string
2230
        imgs = soup.find_all('img', id='comic_image')
2231
        day = string_to_date(td_date.string, '%d %b %Y %I:%M %p')
2232
        assert len(imgs) == 1
2233
        assert all(i.get('alt') == i.get('title') for i in imgs)
2234
        return {
2235
            'num': num,
2236
            'title': title,
2237
            'alt': imgs[0].get('alt', ''),
2238
            'img': [i['src'] for i in imgs],
2239
            'month': day.month,
2240
            'year': day.year,
2241
            'day': day.day,
2242
        }
2243
2244
2245
class LonnieMillsap(GenericNavigableComic):
2246
    """Class to retrieve Lonnie Millsap's comics."""
2247
    name = 'millsap'
2248
    long_name = 'Lonnie Millsap'
2249
    url = 'http://www.lonniemillsap.com'
2250
    get_navi_link = get_link_rel_next
2251
    get_first_comic_link = simulate_first_link
2252
    first_url = 'http://www.lonniemillsap.com/?p=42'
2253
2254
    @classmethod
2255
    def get_comic_info(cls, soup, link):
2256
        """Get information about a particular comics."""
2257
        title = soup.find('h2', class_='post-title').string
2258
        post = soup.find('div', class_='post-content')
2259
        author = post.find("span", class_="post-author").find("a").string
2260
        date_str = post.find("span", class_="post-date").string
2261
        day = string_to_date(date_str, "%B %d, %Y")
2262
        imgs = post.find("div", class_="entry").find_all("img")
2263
        return {
2264
            'title': title,
2265
            'author': author,
2266
            'img': [i['src'] for i in imgs],
2267
            'month': day.month,
2268
            'year': day.year,
2269
            'day': day.day,
2270
        }
2271
2272
2273
class LinsEditions(GenericNavigableComic):
2274
    """Class to retrieve L.I.N.S. Editions comics."""
2275
    # Also on http://linscomics.tumblr.com
2276
    name = 'lins'
2277
    long_name = 'L.I.N.S. Editions'
2278
    url = 'https://linsedition.com'
2279
    get_navi_link = get_link_rel_next
2280
    get_first_comic_link = simulate_first_link
2281
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2282
2283
    @classmethod
2284
    def get_comic_info(cls, soup, link):
2285
        """Get information about a particular comics."""
2286
        title = soup.find('meta', property='og:title')['content']
2287
        imgs = soup.find_all('meta', property='og:image')
2288
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2289
        day = string_to_date(date_str, "%Y-%m-%d")
2290
        return {
2291
            'title': title,
2292
            'img': [i['content'] for i in imgs],
2293
            'month': day.month,
2294
            'year': day.year,
2295
            'day': day.day,
2296
        }
2297
2298
2299
class ThorsThundershack(GenericNavigableComic):
2300
    """Class to retrieve Thor's Thundershack comics."""
2301
    # Also on http://tapastic.com/series/Thors-Thundershac
2302
    name = 'thor'
2303
    long_name = 'Thor\'s Thundershack'
2304
    url = 'http://www.thorsthundershack.com'
2305
    get_url_from_link = join_cls_url_to_href
2306
2307
    @classmethod
2308
    def get_first_comic_link(cls):
2309
        """Get link to first comics."""
2310
        return get_soup_at_url(cls.url).find('a', class_='first navlink')
2311
2312
    @classmethod
2313
    def get_navi_link(cls, last_soup, next_):
2314
        """Get link to next or previous comic."""
2315
        for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
2316
            if link['href'] != '/comic':
2317 View Code Duplication
                return link
2318
        return None
2319
2320
    @classmethod
2321
    def get_comic_info(cls, soup, link):
2322
        """Get information about a particular comics."""
2323
        title = soup.find('meta', attrs={'name': 'description'})["content"]
2324
        description = soup.find('div', itemprop='articleBody').text
2325
        author = soup.find('span', itemprop='author copyrightHolder').string
2326
        imgs = soup.find_all('img', itemprop='image')
2327
        assert all(i['title'] == i['alt'] for i in imgs)
2328
        alt = imgs[0]['alt'] if imgs else ""
2329
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
2330
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2331
        return {
2332
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2333
            'month': day.month,
2334
            'year': day.year,
2335
            'day': day.day,
2336
            'author': author,
2337
            'title': title,
2338
            'alt': alt,
2339
            'description': description,
2340
        }
2341
2342
2343
class GerbilWithAJetpack(GenericNavigableComic):
2344
    """Class to retrieve GerbilWithAJetpack comics."""
2345
    name = 'gerbil'
2346 View Code Duplication
    long_name = 'Gerbil With A Jetpack'
2347
    url = 'http://gerbilwithajetpack.com'
2348
    get_first_comic_link = get_a_navi_navifirst
2349
    get_navi_link = get_a_rel_next
2350
2351
    @classmethod
2352
    def get_comic_info(cls, soup, link):
2353
        """Get information about a particular comics."""
2354
        title = soup.find('h2', class_='post-title').string
2355
        author = soup.find("span", class_="post-author").find("a").string
2356
        date_str = soup.find("span", class_="post-date").string
2357
        day = string_to_date(date_str, "%B %d, %Y")
2358
        imgs = soup.find("div", id="comic").find_all("img")
2359
        alt = imgs[0]['alt']
2360
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2361
        return {
2362
            'img': [i['src'] for i in imgs],
2363
            'title': title,
2364
            'alt': alt,
2365
            'author': author,
2366
            'day': day.day,
2367
            'month': day.month,
2368
            'year': day.year
2369
        }
2370
2371
2372
class EveryDayBlues(GenericNavigableComic):
2373
    """Class to retrieve EveryDayBlues Comics."""
2374 View Code Duplication
    name = "blues"
2375
    long_name = "Every Day Blues"
2376
    url = "http://everydayblues.net"
2377
    get_first_comic_link = get_a_navi_navifirst
2378
    get_navi_link = get_link_rel_next
2379
2380
    @classmethod
2381
    def get_comic_info(cls, soup, link):
2382
        """Get information about a particular comics."""
2383
        title = soup.find("h2", class_="post-title").string
2384
        author = soup.find("span", class_="post-author").find("a").string
2385
        date_str = soup.find("span", class_="post-date").string
2386
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2387
        imgs = soup.find("div", id="comic").find_all("img")
2388
        assert all(i['alt'] == i['title'] == title for i in imgs)
2389
        assert len(imgs) <= 1
2390
        return {
2391
            'img': [i['src'] for i in imgs],
2392
            'title': title,
2393
            'author': author,
2394
            'day': day.day,
2395
            'month': day.month,
2396
            'year': day.year
2397
        }
2398
2399
2400
class BiterComics(GenericNavigableComic):
2401
    """Class to retrieve Biter Comics."""
2402
    name = "biter"
2403
    long_name = "Biter Comics"
2404
    url = "http://www.bitercomics.com"
2405
    get_first_comic_link = get_a_navi_navifirst
2406
    get_navi_link = get_link_rel_next
2407
2408
    @classmethod
2409
    def get_comic_info(cls, soup, link):
2410
        """Get information about a particular comics."""
2411
        title = soup.find("h1", class_="entry-title").string
2412
        author = soup.find("span", class_="author vcard").find("a").string
2413
        date_str = soup.find("span", class_="entry-date").string
2414
        day = string_to_date(date_str, "%B %d, %Y")
2415
        imgs = soup.find("div", id="comic").find_all("img")
2416
        assert all(i['alt'] == i['title'] for i in imgs)
2417
        assert len(imgs) == 1
2418
        alt = imgs[0]['alt']
2419
        return {
2420
            'img': [i['src'] for i in imgs],
2421
            'title': title,
2422
            'alt': alt,
2423
            'author': author,
2424
            'day': day.day,
2425
            'month': day.month,
2426
            'year': day.year
2427
        }
2428
2429
2430
class TheAwkwardYeti(GenericNavigableComic):
2431
    """Class to retrieve The Awkward Yeti comics."""
2432
    # Also on http://www.gocomics.com/the-awkward-yeti
2433
    # Also on http://larstheyeti.tumblr.com
2434
    # Also on https://tapastic.com/series/TheAwkwardYeti
2435
    name = 'yeti'
2436
    long_name = 'The Awkward Yeti'
2437
    url = 'http://theawkwardyeti.com'
2438
    get_first_comic_link = get_a_navi_navifirst
2439
    get_navi_link = get_link_rel_next
2440
2441
    @classmethod
2442
    def get_comic_info(cls, soup, link):
2443
        """Get information about a particular comics."""
2444
        title = soup.find('h2', class_='post-title').string
2445
        date_str = soup.find("span", class_="post-date").string
2446
        day = string_to_date(date_str, "%B %d, %Y")
2447
        imgs = soup.find("div", id="comic").find_all("img")
2448
        assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
2449
        return {
2450
            'img': [i['src'] for i in imgs],
2451
            'title': title,
2452
            'day': day.day,
2453
            'month': day.month,
2454
            'year': day.year
2455
        }
2456
2457
2458
class PleasantThoughts(GenericNavigableComic):
2459
    """Class to retrieve Pleasant Thoughts comics."""
2460
    name = 'pleasant'
2461
    long_name = 'Pleasant Thoughts'
2462
    url = 'http://pleasant-thoughts.com'
2463
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2464
    get_navi_link = get_link_rel_next
2465
2466
    @classmethod
2467
    def get_comic_info(cls, soup, link):
2468
        """Get information about a particular comics."""
2469
        post = soup.find('div', class_='post-content')
2470
        title = post.find('h2', class_='post-title').string
2471
        imgs = post.find("div", class_="entry").find_all("img")
2472
        return {
2473
            'title': title,
2474
            'img': [i['src'] for i in imgs],
2475
        }
2476
2477
2478
class MisterAndMe(GenericNavigableComic):
2479
    """Class to retrieve Mister & Me Comics."""
2480
    # Also on http://www.gocomics.com/mister-and-me
2481
    # Also on https://tapastic.com/series/Mister-and-Me
2482
    name = 'mister'
2483
    long_name = 'Mister & Me'
2484 View Code Duplication
    url = 'http://www.mister-and-me.com'
2485
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2486
    get_navi_link = get_link_rel_next
2487
2488
    @classmethod
2489
    def get_comic_info(cls, soup, link):
2490
        """Get information about a particular comics."""
2491
        title = soup.find('h2', class_='post-title').string
2492
        author = soup.find("span", class_="post-author").find("a").string
2493
        date_str = soup.find("span", class_="post-date").string
2494
        day = string_to_date(date_str, "%B %d, %Y")
2495
        imgs = soup.find("div", id="comic").find_all("img")
2496
        assert all(i['alt'] == i['title'] for i in imgs)
2497
        assert len(imgs) <= 1
2498
        alt = imgs[0]['alt'] if imgs else ""
2499
        return {
2500
            'img': [i['src'] for i in imgs],
2501
            'title': title,
2502
            'alt': alt,
2503
            'author': author,
2504
            'day': day.day,
2505
            'month': day.month,
2506
            'year': day.year
2507
        }
2508
2509
2510
class LastPlaceComics(GenericNavigableComic):
2511
    """Class to retrieve Last Place Comics."""
2512
    name = 'lastplace'
2513
    long_name = 'LastPlaceComics'
2514
    url = "http://lastplacecomics.com"
2515
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2516
    get_navi_link = get_link_rel_next
2517
2518
    @classmethod
2519
    def get_comic_info(cls, soup, link):
2520
        """Get information about a particular comics."""
2521
        title = soup.find('h2', class_='post-title').string
2522
        author = soup.find("span", class_="post-author").find("a").string
2523
        date_str = soup.find("span", class_="post-date").string
2524
        day = string_to_date(date_str, "%B %d, %Y")
2525
        imgs = soup.find("div", id="comic").find_all("img")
2526
        assert all(i['alt'] == i['title'] for i in imgs)
2527
        assert len(imgs) <= 1
2528
        alt = imgs[0]['alt'] if imgs else ""
2529
        return {
2530
            'img': [i['src'] for i in imgs],
2531
            'title': title,
2532
            'alt': alt,
2533
            'author': author,
2534
            'day': day.day,
2535
            'month': day.month,
2536
            'year': day.year
2537
        }
2538
2539
2540
class TalesOfAbsurdity(GenericNavigableComic):
2541
    """Class to retrieve Tales Of Absurdity comics."""
2542
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
2543
    # Also on http://talesofabsurdity.tumblr.com
2544
    name = 'absurdity'
2545 View Code Duplication
    long_name = 'Tales of Absurdity'
2546
    url = 'http://talesofabsurdity.com'
2547
    get_first_comic_link = get_a_navi_navifirst
2548
    get_navi_link = get_a_navi_comicnavnext_navinext
2549
2550
    @classmethod
2551
    def get_comic_info(cls, soup, link):
2552
        """Get information about a particular comics."""
2553
        title = soup.find('h2', class_='post-title').string
2554
        author = soup.find("span", class_="post-author").find("a").string
2555
        date_str = soup.find("span", class_="post-date").string
2556
        day = string_to_date(date_str, "%B %d, %Y")
2557
        imgs = soup.find("div", id="comic").find_all("img")
2558
        assert all(i['alt'] == i['title'] for i in imgs)
2559
        alt = imgs[0]['alt'] if imgs else ""
2560
        return {
2561
            'img': [i['src'] for i in imgs],
2562
            'title': title,
2563
            'alt': alt,
2564
            'author': author,
2565
            'day': day.day,
2566
            'month': day.month,
2567
            'year': day.year
2568
        }
2569
2570
2571
class EndlessOrigami(GenericNavigableComic):
2572
    """Class to retrieve Endless Origami Comics."""
2573
    name = "origami"
2574
    long_name = "Endless Origami"
2575
    url = "http://endlessorigami.com"
2576
    get_first_comic_link = get_a_navi_navifirst
2577
    get_navi_link = get_link_rel_next
2578
2579
    @classmethod
2580
    def get_comic_info(cls, soup, link):
2581
        """Get information about a particular comics."""
2582
        title = soup.find('h2', class_='post-title').string
2583
        author = soup.find("span", class_="post-author").find("a").string
2584
        date_str = soup.find("span", class_="post-date").string
2585
        day = string_to_date(date_str, "%B %d, %Y")
2586
        imgs = soup.find("div", id="comic").find_all("img")
2587
        assert all(i['alt'] == i['title'] for i in imgs)
2588
        alt = imgs[0]['alt'] if imgs else ""
2589
        return {
2590
            'img': [i['src'] for i in imgs],
2591
            'title': title,
2592
            'alt': alt,
2593
            'author': author,
2594
            'day': day.day,
2595
            'month': day.month,
2596
            'year': day.year
2597
        }
2598
2599
2600
class PlanC(GenericNavigableComic):
2601
    """Class to retrieve Plan C comics."""
2602
    name = 'planc'
2603
    long_name = 'Plan C'
2604
    url = 'http://www.plancomic.com'
2605
    get_first_comic_link = get_a_navi_navifirst
2606
    get_navi_link = get_a_navi_comicnavnext_navinext
2607
2608
    @classmethod
2609
    def get_comic_info(cls, soup, link):
2610
        """Get information about a particular comics."""
2611
        title = soup.find('h2', class_='post-title').string
2612
        date_str = soup.find("span", class_="post-date").string
2613
        day = string_to_date(date_str, "%B %d, %Y")
2614
        imgs = soup.find('div', id='comic').find_all('img')
2615
        return {
2616
            'title': title,
2617
            'img': [i['src'] for i in imgs],
2618
            'month': day.month,
2619
            'year': day.year,
2620
            'day': day.day,
2621
        }
2622
2623
2624
class BuniComic(GenericNavigableComic):
2625
    """Class to retrieve Buni Comics."""
2626
    name = 'buni'
2627
    long_name = 'BuniComics'
2628
    url = 'http://www.bunicomic.com'
2629
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2630
    get_navi_link = get_link_rel_next
2631
2632
    @classmethod
2633
    def get_comic_info(cls, soup, link):
2634
        """Get information about a particular comics."""
2635
        imgs = soup.find('div', id='comic').find_all('img')
2636
        assert all(i['alt'] == i['title'] for i in imgs)
2637
        assert len(imgs) == 1
2638
        return {
2639
            'img': [i['src'] for i in imgs],
2640
            'title': imgs[0]['title'],
2641
        }
2642
2643
2644
class GenericCommitStrip(GenericNavigableComic):
2645
    """Generic class to retrieve Commit Strips in different languages."""
2646
    get_navi_link = get_a_rel_next
2647
    get_first_comic_link = simulate_first_link
2648
    first_url = NotImplemented
2649
2650
    @classmethod
2651
    def get_comic_info(cls, soup, link):
2652
        """Get information about a particular comics."""
2653
        desc = soup.find('meta', property='og:description')['content']
2654
        title = soup.find('meta', property='og:title')['content']
2655
        imgs = soup.find('div', class_='entry-content').find_all('img')
2656
        title2 = ' '.join(i.get('title', '') for i in imgs)
2657
        return {
2658
            'title': title,
2659
            'title2': title2,
2660
            'description': desc,
2661
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2662
        }
2663
2664
2665
class CommitStripFr(GenericCommitStrip):
2666
    """Class to retrieve Commit Strips in French."""
2667
    name = 'commit_fr'
2668
    long_name = 'Commit Strip (Fr)'
2669
    url = 'http://www.commitstrip.com/fr'
2670
    first_url = 'http://www.commitstrip.com/fr/2012/02/22/interview/'
2671
2672
2673
class CommitStripEn(GenericCommitStrip):
2674 View Code Duplication
    """Class to retrieve Commit Strips in English."""
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2675
    name = 'commit_en'
2676
    long_name = 'Commit Strip (En)'
2677
    url = 'http://www.commitstrip.com/en'
2678
    first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'
2679
2680
2681
class GenericBoumerie(GenericNavigableComic):
2682
    """Generic class to retrieve Boumeries comics in different languages."""
2683
    get_first_comic_link = get_a_navi_navifirst
2684
    get_navi_link = get_link_rel_next
2685
    date_format = NotImplemented
2686
    lang = NotImplemented
2687
2688
    @classmethod
2689
    def get_comic_info(cls, soup, link):
2690
        """Get information about a particular comics."""
2691
        title = soup.find('h2', class_='post-title').string
2692
        short_url = soup.find('link', rel='shortlink')['href']
2693
        author = soup.find("span", class_="post-author").find("a").string
2694
        date_str = soup.find('span', class_='post-date').string
2695
        day = string_to_date(date_str, cls.date_format, cls.lang)
2696
        imgs = soup.find('div', id='comic').find_all('img')
2697
        assert all(i['alt'] == i['title'] for i in imgs)
2698
        return {
2699
            'short_url': short_url,
2700
            'img': [i['src'] for i in imgs],
2701
            'title': title,
2702
            'author': author,
2703
            'month': day.month,
2704
            'year': day.year,
2705
            'day': day.day,
2706
        }
2707
2708
2709
class BoumerieEn(GenericBoumerie):
2710
    """Class to retrieve Boumeries comics in English."""
2711
    name = 'boumeries_en'
2712
    long_name = 'Boumeries (En)'
2713
    url = 'http://comics.boumerie.com'
2714
    date_format = "%B %d, %Y"
2715
    lang = 'en_GB.UTF-8'
2716
2717
2718
class BoumerieFr(GenericBoumerie):
2719
    """Class to retrieve Boumeries comics in French."""
2720
    name = 'boumeries_fr'
2721
    long_name = 'Boumeries (Fr)'
2722
    url = 'http://bd.boumerie.com'
2723
    date_format = "%A, %d %B %Y"
2724
    lang = "fr_FR.utf8"
2725
2726
2727
class UnearthedComics(GenericNavigableComic):
2728
    """Class to retrieve Unearthed comics."""
2729
    # Also on http://tapastic.com/series/UnearthedComics
2730
    # Also on http://unearthedcomics.tumblr.com
2731
    name = 'unearthed'
2732
    long_name = 'Unearthed Comics'
2733
    url = 'http://unearthedcomics.com'
2734 View Code Duplication
    get_navi_link = get_link_rel_next
2735
    get_first_comic_link = simulate_first_link
2736
    first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'
2737
2738
    @classmethod
2739
    def get_comic_info(cls, soup, link):
2740
        """Get information about a particular comics."""
2741
        short_url = soup.find('link', rel='shortlink')['href']
2742
        title_elt = soup.find('h1') or soup.find('h2')
2743
        title = title_elt.string if title_elt else ""
2744
        desc = soup.find('meta', property='og:description')
2745
        date_str = soup.find('time', class_='published updated hidden')['datetime']
2746
        day = string_to_date(date_str, "%Y-%m-%d")
2747
        post = soup.find('div', class_="entry content entry-content type-portfolio")
2748
        imgs = post.find_all('img')
2749
        return {
2750
            'title': title,
2751
            'description': desc,
2752
            'url2': short_url,
2753
            'img': [i['src'] for i in imgs],
2754
            'month': day.month,
2755
            'year': day.year,
2756
            'day': day.day,
2757
        }
2758
2759
2760
class Optipess(GenericNavigableComic):
2761
    """Class to retrieve Optipess comics."""
2762
    name = 'optipess'
2763
    long_name = 'Optipess'
2764
    url = 'http://www.optipess.com'
2765
    get_first_comic_link = get_a_navi_navifirst
2766
    get_navi_link = get_link_rel_next
2767
2768
    @classmethod
2769
    def get_comic_info(cls, soup, link):
2770
        """Get information about a particular comics."""
2771
        title = soup.find('h2', class_='post-title').string
2772
        author = soup.find("span", class_="post-author").find("a").string
2773
        comic = soup.find('div', id='comic')
2774
        imgs = comic.find_all('img') if comic else []
2775
        alt = imgs[0]['title'] if imgs else ""
2776
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2777
        date_str = soup.find('span', class_='post-date').string
2778
        day = string_to_date(date_str, "%B %d, %Y")
2779
        return {
2780
            'title': title,
2781
            'alt': alt,
2782
            'author': author,
2783
            'img': [i['src'] for i in imgs],
2784
            'month': day.month,
2785
            'year': day.year,
2786
            'day': day.day,
2787
        }
2788
2789
2790
class PainTrainComic(GenericNavigableComic):
2791
    """Class to retrieve Pain Train Comics."""
2792
    name = 'paintrain'
2793
    long_name = 'Pain Train Comics'
2794
    url = 'http://paintraincomic.com'
2795
    get_first_comic_link = get_a_navi_navifirst
2796
    get_navi_link = get_link_rel_next
2797
2798
    @classmethod
2799
    def get_comic_info(cls, soup, link):
2800
        """Get information about a particular comics."""
2801
        title = soup.find('h2', class_='post-title').string
2802
        short_url = soup.find('link', rel='shortlink')['href']
2803
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2804
        num = int(short_url_re.match(short_url).groups()[0])
2805
        imgs = soup.find('div', id='comic').find_all('img')
2806
        alt = imgs[0]['title']
2807
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2808
        date_str = soup.find('span', class_='post-date').string
2809
        day = string_to_date(date_str, "%d/%m/%Y")
2810
        return {
2811
            'short_url': short_url,
2812
            'num': num,
2813
            'img': [i['src'] for i in imgs],
2814
            'month': day.month,
2815
            'year': day.year,
2816
            'day': day.day,
2817
            'alt': alt,
2818
            'title': title,
2819
        }
2820
2821
2822
class MoonBeard(GenericNavigableComic):
2823
    """Class to retrieve MoonBeard comics."""
2824
    # Also on http://blog.squiresjam.es/moonbeard
2825
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
2826
    name = 'moonbeard'
2827
    long_name = 'Moon Beard'
2828
    url = 'http://moonbeard.com'
2829
    get_first_comic_link = get_a_navi_navifirst
2830
    get_navi_link = get_a_navi_navinext
2831
2832
    @classmethod
2833
    def get_comic_info(cls, soup, link):
2834
        """Get information about a particular comics."""
2835
        title = soup.find('h2', class_='post-title').string
2836
        short_url = soup.find('link', rel='shortlink')['href']
2837
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2838
        num = int(short_url_re.match(short_url).groups()[0])
2839
        imgs = soup.find('div', id='comic').find_all('img')
2840
        alt = imgs[0]['title']
2841
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2842
        date_str = soup.find('span', class_='post-date').string
2843
        day = string_to_date(date_str, "%B %d, %Y")
2844
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2845
        author = soup.find('span', class_='post-author').string
2846
        return {
2847
            'short_url': short_url,
2848
            'num': num,
2849
            'img': [i['src'] for i in imgs],
2850
            'month': day.month,
2851
            'year': day.year,
2852
            'day': day.day,
2853
            'title': title,
2854
            'tags': tags,
2855
            'alt': alt,
2856
            'author': author,
2857
        }
2858
2859
2860
class AHamADay(GenericNavigableComic):
2861
    """Class to retrieve class A Ham A Day comics."""
2862
    name = 'ham'
2863
    long_name = 'A Ham A Day'
2864
    url = 'http://www.ahammaday.com'
2865
    get_url_from_link = join_cls_url_to_href
2866
    get_first_comic_link = simulate_first_link
2867
    first_url = 'http://www.ahammaday.com/today/3/6/french'
2868
2869
    @classmethod
2870
    def get_navi_link(cls, last_soup, next_):
2871
        """Get link to next or previous comic."""
2872
        # prev is next / next is prev
2873
        return last_soup.find('li', class_='previous' if next_ else 'next').find('a')
2874
2875 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2876
    def get_comic_info(cls, soup, link):
2877
        """Get information about a particular comics."""
2878
        date_str = soup.find('time', class_='published')['datetime']
2879
        day = string_to_date(date_str, "%Y-%m-%d")
2880
        author = soup.find('span', class_='blog-author').find('a').string
2881
        title = soup.find('meta', property='og:title')['content']
2882
        imgs = soup.find_all('meta', itemprop='image')
2883
        return {
2884
            'img': [i['content'] for i in imgs],
2885
            'title': title,
2886
            'author': author,
2887
            'day': day.day,
2888
            'month': day.month,
2889
            'year': day.year,
2890
        }
2891
2892
2893 View Code Duplication
class LittleLifeLines(GenericNavigableComic):
2894
    """Class to retrieve Little Life Lines comics."""
2895
    name = 'life'
2896
    long_name = 'Little Life Lines'
2897
    url = 'http://www.littlelifelines.com'
2898
    get_url_from_link = join_cls_url_to_href
2899
    get_first_comic_link = simulate_first_link
2900
    first_url = 'http://www.littlelifelines.com/comics/well-done'
2901
2902
    @classmethod
2903
    def get_navi_link(cls, last_soup, next_):
2904
        """Get link to next or previous comic."""
2905
        # prev is next / next is prev
2906
        li = last_soup.find('li', class_='prev' if next_ else 'next')
2907
        return li.find('a') if li else None
2908
2909
    @classmethod
2910
    def get_comic_info(cls, soup, link):
2911
        """Get information about a particular comics."""
2912
        title = soup.find('meta', property='og:title')['content']
2913
        desc = soup.find('meta', property='og:description')['content']
2914
        date_str = soup.find('time', class_='published')['datetime']
2915
        day = string_to_date(date_str, "%Y-%m-%d")
2916
        author = soup.find('a', rel='author').string
2917
        div_content = soup.find('div', class_="body entry-content")
2918
        imgs = div_content.find_all('img')
2919
        imgs = [i for i in imgs if i.get('src') is not None]
2920
        alt = imgs[0]['alt']
2921
        return {
2922
            'title': title,
2923
            'alt': alt,
2924
            'description': desc,
2925
            'author': author,
2926
            'day': day.day,
2927
            'month': day.month,
2928
            'year': day.year,
2929
            'img': [i['src'] for i in imgs],
2930
        }
2931
2932
2933
class GenericWordPressInkblot(GenericNavigableComic):
2934
    """Generic class to retrieve comics using WordPress with Inkblot."""
2935
    get_navi_link = get_link_rel_next
2936
2937
    @classmethod
2938
    def get_first_comic_link(cls):
2939
        """Get link to first comics."""
2940
        return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')
2941
2942 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2943
    def get_comic_info(cls, soup, link):
2944
        """Get information about a particular comics."""
2945
        title = soup.find('meta', property='og:title')['content']
2946
        imgs = soup.find('div', class_='webcomic-image').find_all('img')
2947
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2948
        day = string_to_date(date_str, "%Y-%m-%d")
2949
        return {
2950
            'title': title,
2951
            'day': day.day,
2952
            'month': day.month,
2953
            'year': day.year,
2954
            'img': [i['src'] for i in imgs],
2955
        }
2956
2957
2958
class EverythingsStupid(GenericWordPressInkblot):
2959
    """Class to retrieve Everything's stupid Comics."""
2960
    # Also on http://tapastic.com/series/EverythingsStupid
2961
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
2962
    # Also on http://everythingsstupidcomics.tumblr.com
2963
    name = 'stupid'
2964
    long_name = "Everything's Stupid"
2965
    url = 'http://everythingsstupid.net'
2966
2967
2968
class TheIsmComics(GenericWordPressInkblot):
2969
    """Class to retrieve The Ism Comics."""
2970
    # Also on https://tapastic.com/series/TheIsm (?)
2971
    name = 'theism'
2972
    long_name = "The Ism"
2973
    url = 'http://www.theism-comics.com'
2974
2975
2976
class WoodenPlankStudios(GenericWordPressInkblot):
2977
    """Class to retrieve Wooden Plank Studios comics."""
2978
    name = 'woodenplank'
2979
    long_name = 'Wooden Plank Studios'
2980
    url = 'http://woodenplankstudios.com'
2981 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2982
2983
class ElectricBunnyComic(GenericNavigableComic):
2984
    """Class to retrieve Electric Bunny Comics."""
2985
    # Also on http://electricbunnycomics.tumblr.com
2986
    name = 'bunny'
2987
    long_name = 'Electric Bunny Comic'
2988
    url = 'http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell'
2989
    get_url_from_link = join_cls_url_to_href
2990
2991
    @classmethod
2992
    def get_first_comic_link(cls):
2993
        """Get link to first comics."""
2994
        return get_soup_at_url(cls.url).find('img', alt='First').parent
2995
2996
    @classmethod
2997
    def get_navi_link(cls, last_soup, next_):
2998
        """Get link to next or previous comic."""
2999
        img = last_soup.find('img', alt='Next' if next_ else 'Back')
3000
        return img.parent if img else None
3001
3002
    @classmethod
3003
    def get_comic_info(cls, soup, link):
3004
        """Get information about a particular comics."""
3005
        title = soup.find('meta', property='og:title')['content']
3006
        imgs = soup.find_all('meta', property='og:image')
3007
        return {
3008
            'title': title,
3009
            'img': [i['content'] for i in imgs],
3010
        }
3011
3012
3013
class SheldonComics(GenericNavigableComic):
3014
    """Class to retrieve Sheldon comics."""
3015
    # Also on http://www.gocomics.com/sheldon
3016
    name = 'sheldon'
3017
    long_name = 'Sheldon Comics'
3018
    url = 'http://www.sheldoncomics.com'
3019
3020
    @classmethod
3021
    def get_first_comic_link(cls):
3022
        """Get link to first comics."""
3023
        return get_soup_at_url(cls.url).find("a", id="nav-first")
3024
3025
    @classmethod
3026
    def get_navi_link(cls, last_soup, next_):
3027
        """Get link to next or previous comic."""
3028
        for link in last_soup.find_all("a", id="nav-next" if next_ else "nav-prev"):
3029
            if link['href'] != 'http://www.sheldoncomics.com':
3030
                return link
3031
        return None
3032
3033
    @classmethod
3034
    def get_comic_info(cls, soup, link):
3035
        """Get information about a particular comics."""
3036
        imgs = soup.find("div", id="comic-foot").find_all("img")
3037
        assert all(i['alt'] == i['title'] for i in imgs)
3038
        assert len(imgs) == 1
3039
        title = imgs[0]['title']
3040
        return {
3041
            'title': title,
3042
            'img': [i['src'] for i in imgs],
3043
        }
3044
3045
3046
class CubeDrone(GenericNavigableComic):
3047
    """Class to retrieve Cube Drone comics."""
3048
    name = 'cubedrone'
3049
    long_name = 'Cube Drone'
3050
    url = 'http://cube-drone.com/comics'
3051
    get_url_from_link = join_cls_url_to_href
3052
3053
    @classmethod
3054
    def get_first_comic_link(cls):
3055
        """Get link to first comics."""
3056
        return get_soup_at_url(cls.url).find('span', class_='glyphicon glyphicon-backward').parent
3057
3058
    @classmethod
3059
    def get_navi_link(cls, last_soup, next_):
3060
        """Get link to next or previous comic."""
3061
        class_ = 'glyphicon glyphicon-chevron-' + ('right' if next_ else 'left')
3062
        return last_soup.find('span', class_=class_).parent
3063
3064
    @classmethod
3065
    def get_comic_info(cls, soup, link):
3066
        """Get information about a particular comics."""
3067
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
3068
        url2 = soup.find('meta', attrs={'name': 'twitter:url'})['content']
3069
        # date_str = soup.find('h2', class_='comic_title').find('small').string
3070
        # day = string_to_date(date_str, "%B %d, %Y, %I:%M %p")
3071
        imgs = soup.find_all('img', class_='comic img-responsive')
3072
        title2 = imgs[0]['title']
3073
        alt = imgs[0]['alt']
3074
        return {
3075
            'url2': url2,
3076
            'title': title,
3077
            'title2': title2,
3078
            'alt': alt,
3079
            'img': [i['src'] for i in imgs],
3080
        }
3081
3082
3083
class MakeItStoopid(GenericNavigableComic):
3084
    """Class to retrieve Make It Stoopid Comics."""
3085
    name = 'stoopid'
3086
    long_name = 'Make it stoopid'
3087
    url = 'http://makeitstoopid.com/comic.php'
3088
3089
    @classmethod
3090
    def get_nav(cls, soup):
3091
        """Get the navigation elements from soup object."""
3092
        cnav = soup.find_all(class_='cnav')
3093
        nav1, nav2 = cnav[:5], cnav[5:]
3094
        assert nav1 == nav2
3095
        # begin, prev, archive, next_, end = nav1
3096
        return [None if i.get('href') is None else i for i in nav1]
3097
3098
    @classmethod
3099
    def get_first_comic_link(cls):
3100
        """Get link to first comics."""
3101
        return cls.get_nav(get_soup_at_url(cls.url))[0]
3102
3103
    @classmethod
3104
    def get_navi_link(cls, last_soup, next_):
3105
        """Get link to next or previous comic."""
3106
        return cls.get_nav(last_soup)[3 if next_ else 1]
3107
3108
    @classmethod
3109
    def get_comic_info(cls, soup, link):
3110
        """Get information about a particular comics."""
3111
        title = link['title']
3112
        imgs = soup.find_all('img', id='comicimg')
3113
        return {
3114
            'title': title,
3115
            'img': [i['src'] for i in imgs],
3116
        }
3117
3118
3119
class GeekAndPoke(GenericNavigableComic):
3120
    """Class to retrieve Geek And Poke comics."""
3121
    name = 'geek'
3122
    long_name = 'Geek And Poke'
3123
    url = 'http://geek-and-poke.com'
3124
    get_url_from_link = join_cls_url_to_href
3125
    get_first_comic_link = simulate_first_link
3126
    first_url = 'http://geek-and-poke.com/geekandpoke/2006/8/27/a-new-place-for-a-not-so-old-blog.html'
3127
3128
    @classmethod
3129
    def get_navi_link(cls, last_soup, next_):
3130
        """Get link to next or previous comic."""
3131
        return last_soup.find('a', class_='prev-item' if next_ else 'next-item')
3132
3133
    @classmethod
3134
    def get_comic_info(cls, soup, link):
3135
        """Get information about a particular comics."""
3136
        title = soup.find('meta', property='og:title')['content']
3137
        desc = soup.find('meta', property='og:description')['content']
3138
        date_str = soup.find('time', class_='published')['datetime']
3139
        day = string_to_date(date_str, "%Y-%m-%d")
3140
        author = soup.find('a', rel='author').string
3141
        div_content = (soup.find('div', class_="body entry-content") or
3142
                       soup.find('div', class_="special-content"))
3143
        imgs = div_content.find_all('img')
3144
        imgs = [i for i in imgs if i.get('src') is not None]
3145
        assert all('title' not in i or i['alt'] == i['title'] for i in imgs)
3146
        alt = imgs[0].get('alt', "") if imgs else []
3147
        return {
3148
            'title': title,
3149
            'alt': alt,
3150
            'description': desc,
3151
            'author': author,
3152
            'day': day.day,
3153
            'month': day.month,
3154
            'year': day.year,
3155
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3156
        }
3157
3158
3159
class GenericTumblrV1(GenericComic):
3160
    """Generic class to retrieve comics from Tumblr using the V1 API."""
3161
3162
    @classmethod
3163
    def get_next_comic(cls, last_comic):
3164
        """Generic implementation of get_next_comic for Tumblr comics."""
3165
        for p in cls.get_posts(last_comic):
3166
            comic = cls.get_comic_info(p)
3167
            if comic is not None:
3168
                yield comic
3169
3170
    @classmethod
3171
    def get_url_from_post(cls, post):
3172
        return post['url']
3173
3174
    @classmethod
3175
    def get_api_url(cls):
3176
        return urljoin_wrapper(cls.url, '/api/read/')
3177
3178
    @classmethod
3179
    def get_comic_info(cls, post):
3180
        """Get information about a particular comics."""
3181
        # print(post)
3182
        type_ = post['type']
3183
        if type_ != 'photo':
3184
            # print("Type is %s" % type_)
3185
            return None
3186
        tumblr_id = int(post['id'])
3187
        api_url = cls.get_api_url() + '?id=%d' % (tumblr_id)
3188
        day = datetime.datetime.fromtimestamp(int(post['unix-timestamp'])).date()
3189
        caption = post.find('photo-caption')
3190
        title = caption.string if caption else ""
3191
        tags = ' '.join(t.string for t in post.find_all('tag'))
3192
        # Photos may appear in 'photo' tags and/or straight in the post
3193
        photo_tags = post.find_all('photo')
3194
        if not photo_tags:
3195
            photo_tags = [post]
3196
        # Images are in multiple resolutions - taking the first one
3197
        imgs = [photo.find('photo-url') for photo in photo_tags]
3198
        return {
3199
            'url': cls.get_url_from_post(post),
3200
            'url2': post['url-with-slug'],
3201
            'day': day.day,
3202
            'month': day.month,
3203
            'year': day.year,
3204
            'title': title,
3205
            'tags': tags,
3206
            'img': [i.string for i in imgs],
3207
            'tumblr-id': tumblr_id,
3208
            'api_url': api_url,  # for debug purposes
3209
        }
3210
3211
    @classmethod
3212
    def get_posts(cls, last_comic, nb_post_per_call=10):
3213
        """Get posts using API. nb_post_per_call is max 50.
3214
3215
        Posts are retrieved from newer to older as per the tumblr v1 api
3216
        but are returned in chronological order."""
3217
        waiting_for_url = last_comic['url'] if last_comic else None
3218
        posts_acc = []
3219
        if last_comic is not None:
3220
            # Sometimes, tumblr posts are deleted. We previous post is deleted, we
3221
            # might end up spending a lot of time looking for something that
3222
            # doesn't exist. Failing early and clearly might be a better option.
3223
            last_api_url = last_comic['api_url']
3224
            try:
3225
                get_soup_at_url(last_api_url)
3226
            except urllib.error.HTTPError:
3227
                try:
3228
                    get_soup_at_url(cls.url)
3229
                except urllib.error.HTTPError:
3230
                    print("Did not find previous post nor main url %s" % cls.url)
3231
                else:
3232
                    print("Did not find previous post %s : it might have been deleted" % last_api_url)
3233
                return reversed(posts_acc)
3234
        api_url = cls.get_api_url()
3235
        posts = get_soup_at_url(api_url).find('posts')
3236
        start, total = int(posts['start']), int(posts['total'])
3237
        assert start == 0
3238
        for starting_num in range(0, total, nb_post_per_call):
3239
            api_url2 = api_url + '?start=%d&num=%d' % (starting_num, nb_post_per_call)
3240
            # print(api_url2)
3241
            posts2 = get_soup_at_url(api_url2).find('posts')
3242
            start2, total2 = int(posts2['start']), int(posts2['total'])
3243
            assert starting_num == start2, "%d != %d" % (starting_num, start2)
3244
            # This may happen and should be handled in the future
3245
            assert total == total2, "%d != %d" % (total, total2)
3246
            for p in posts2.find_all('post'):
3247
                if waiting_for_url and waiting_for_url == cls.get_url_from_post(p):
3248
                    return reversed(posts_acc)
3249
                posts_acc.append(p)
3250
        if waiting_for_url is None:
3251
            return reversed(posts_acc)
3252
        print("Did not find %s : there might be a problem" % waiting_for_url)
3253
        return []
3254
3255
3256
class IrwinCardozo(GenericTumblrV1):
3257
    """Class to retrieve Irwin Cardozo Comics."""
3258
    name = 'irwinc'
3259
    long_name = 'Irwin Cardozo'
3260
    url = 'http://irwincardozocomics.tumblr.com'
3261
3262
3263
class AccordingToDevin(GenericTumblrV1):
3264
    """Class to retrieve According To Devin comics."""
3265
    name = 'devin'
3266
    long_name = 'According To Devin'
3267
    url = 'http://accordingtodevin.tumblr.com'
3268
3269
3270
class ItsTheTieTumblr(GenericTumblrV1):
3271
    """Class to retrieve It's the tie comics."""
3272
    # Also on http://itsthetie.com
3273
    # Also on https://tapastic.com/series/itsthetie
3274
    name = 'tie-tumblr'
3275
    long_name = "It's the tie (from Tumblr)"
3276
    url = "http://itsthetie.tumblr.com"
3277
3278
3279
class OctopunsTumblr(GenericTumblrV1):
3280
    """Class to retrieve Octopuns comics."""
3281
    # Also on http://www.octopuns.net
3282
    name = 'octopuns-tumblr'
3283
    long_name = 'Octopuns (from Tumblr)'
3284
    url = 'http://octopuns.tumblr.com'
3285
3286
3287
class PicturesInBoxesTumblr(GenericTumblrV1):
3288
    """Class to retrieve Pictures In Boxes comics."""
3289
    # Also on http://www.picturesinboxes.com
3290
    name = 'picturesinboxes-tumblr'
3291
    long_name = 'Pictures in Boxes (from Tumblr)'
3292
    url = 'http://picturesinboxescomic.tumblr.com'
3293
3294
3295
class TubeyToonsTumblr(GenericTumblrV1):
3296
    """Class to retrieve TubeyToons comics."""
3297
    # Also on http://tapastic.com/series/Tubey-Toons
3298
    # Also on http://tubeytoons.com
3299
    name = 'tubeytoons-tumblr'
3300
    long_name = 'Tubey Toons (from Tumblr)'
3301
    url = 'http://tubeytoons.tumblr.com'
3302
3303
3304
class UnearthedComicsTumblr(GenericTumblrV1):
3305
    """Class to retrieve Unearthed comics."""
3306
    # Also on http://tapastic.com/series/UnearthedComics
3307
    # Also on http://unearthedcomics.com
3308
    name = 'unearthed-tumblr'
3309
    long_name = 'Unearthed Comics (from Tumblr)'
3310
    url = 'http://unearthedcomics.tumblr.com'
3311
3312
3313
class PieComic(GenericTumblrV1):
3314
    """Class to retrieve Pie Comic comics."""
3315
    name = 'pie'
3316
    long_name = 'Pie Comic'
3317
    url = "http://piecomic.tumblr.com"
3318
3319
3320
class MrEthanDiamond(GenericTumblrV1):
3321
    """Class to retrieve Mr Ethan Diamond comics."""
3322
    name = 'diamond'
3323
    long_name = 'Mr Ethan Diamond'
3324
    url = 'http://mrethandiamond.tumblr.com'
3325
3326
3327
class Flocci(GenericTumblrV1):
3328
    """Class to retrieve floccinaucinihilipilification comics."""
3329
    name = 'flocci'
3330
    long_name = 'floccinaucinihilipilification'
3331
    url = "http://floccinaucinihilipilificationa.tumblr.com"
3332
3333
3334
class UpAndOut(GenericTumblrV1):
3335
    """Class to retrieve Up & Out comics."""
3336
    # Also on http://tapastic.com/series/UP-and-OUT
3337
    name = 'upandout'
3338
    long_name = 'Up And Out (from Tumblr)'
3339
    url = 'http://upandoutcomic.tumblr.com'
3340
3341
3342
class Pundemonium(GenericTumblrV1):
3343
    """Class to retrieve Pundemonium comics."""
3344
    name = 'pundemonium'
3345
    long_name = 'Pundemonium'
3346
    url = 'http://monstika.tumblr.com'
3347
3348
3349
class PoorlyDrawnLinesTumblr(GenericTumblrV1):
3350
    """Class to retrieve Poorly Drawn Lines comics."""
3351
    # Also on http://poorlydrawnlines.com
3352
    name = 'poorlydrawn-tumblr'
3353
    long_name = 'Poorly Drawn Lines (from Tumblr)'
3354
    url = 'http://pdlcomics.tumblr.com'
3355
3356
3357
class PearShapedComics(GenericTumblrV1):
3358
    """Class to retrieve Pear Shaped Comics."""
3359
    name = 'pearshaped'
3360
    long_name = 'Pear-Shaped Comics'
3361
    url = 'http://pearshapedcomics.com'
3362
3363
3364
class PondScumComics(GenericTumblrV1):
3365
    """Class to retrieve Pond Scum Comics."""
3366
    name = 'pond'
3367
    long_name = 'Pond Scum'
3368
    url = 'http://pondscumcomic.tumblr.com'
3369
3370
3371
class MercworksTumblr(GenericTumblrV1):
3372
    """Class to retrieve Mercworks comics."""
3373
    # Also on http://mercworks.net
3374
    name = 'mercworks-tumblr'
3375
    long_name = 'Mercworks (from Tumblr)'
3376
    url = 'http://mercworks.tumblr.com'
3377
3378
3379
class OwlTurdTumblr(GenericTumblrV1):
3380
    """Class to retrieve Owl Turd comics."""
3381
    # Also on http://tapastic.com/series/Owl-Turd-Comix
3382
    name = 'owlturd-tumblr'
3383
    long_name = 'Owl Turd (from Tumblr)'
3384
    url = 'http://owlturd.com'
3385
3386
3387
class VectorBelly(GenericTumblrV1):
3388
    """Class to retrieve Vector Belly comics."""
3389
    # Also on http://vectorbelly.com
3390
    name = 'vector'
3391
    long_name = 'Vector Belly'
3392
    url = 'http://vectorbelly.tumblr.com'
3393
3394
3395
class GoneIntoRapture(GenericTumblrV1):
3396
    """Class to retrieve Gone Into Rapture comics."""
3397
    # Also on http://goneintorapture.tumblr.com
3398
    # Also on http://tapastic.com/series/Goneintorapture
3399
    name = 'rapture'
3400
    long_name = 'Gone Into Rapture'
3401
    url = 'http://www.goneintorapture.com'
3402
3403
3404
class TheOatmealTumblr(GenericTumblrV1):
3405
    """Class to retrieve The Oatmeal comics."""
3406
    # Also on http://theoatmeal.com
3407
    name = 'oatmeal-tumblr'
3408
    long_name = 'The Oatmeal (from Tumblr)'
3409
    url = 'http://oatmeal.tumblr.com'
3410
3411
3412
class HeckIfIKnowComicsTumblr(GenericTumblrV1):
3413
    """Class to retrieve Heck If I Know Comics."""
3414
    # Also on http://tapastic.com/series/Regular
3415
    name = 'heck-tumblr'
3416
    long_name = 'Heck if I Know comics (from Tumblr)'
3417
    url = 'http://heckifiknowcomics.com'
3418
3419
3420
class MyJetPack(GenericTumblrV1):
3421
    """Class to retrieve My Jet Pack comics."""
3422
    name = 'jetpack'
3423
    long_name = 'My Jet Pack'
3424
    url = 'http://myjetpack.tumblr.com'
3425
3426
3427
class CheerUpEmoKidTumblr(GenericTumblrV1):
3428
    """Class to retrieve CheerUpEmoKid comics."""
3429
    # Also on http://www.cheerupemokid.com
3430
    # Also on http://tapastic.com/series/CUEK
3431
    name = 'cuek-tumblr'
3432
    long_name = 'Cheer Up Emo Kid (from Tumblr)'
3433
    url = 'http://enzocomics.tumblr.com'
3434
3435
3436
class ForLackOfABetterComic(GenericTumblrV1):
3437
    """Class to retrieve For Lack Of A Better Comics."""
3438
    # Also on http://forlackofabettercomic.com
3439
    name = 'lack'
3440
    long_name = 'For Lack Of A Better Comic'
3441
    url = 'http://forlackofabettercomic.tumblr.com'
3442
3443
3444
class ZenPencilsTumblr(GenericTumblrV1):
3445
    """Class to retrieve ZenPencils comics."""
3446
    # Also on http://zenpencils.com
3447
    # Also on http://www.gocomics.com/zen-pencils
3448
    name = 'zenpencils-tumblr'
3449
    long_name = 'Zen Pencils (from Tumblr)'
3450
    url = 'http://zenpencils.tumblr.com'
3451
3452
3453
class ThreeWordPhraseTumblr(GenericTumblrV1):
3454
    """Class to retrieve Three Word Phrase comics."""
3455
    # Also on http://threewordphrase.com
3456
    name = 'threeword-tumblr'
3457
    long_name = 'Three Word Phrase (from Tumblr)'
3458
    url = 'http://www.threewordphrase.tumblr.com'
3459
3460
3461
class TimeTrabbleTumblr(GenericTumblrV1):
3462
    """Class to retrieve Time Trabble comics."""
3463
    # Also on http://timetrabble.com
3464
    name = 'timetrabble-tumblr'
3465
    long_name = 'Time Trabble (from Tumblr)'
3466
    url = 'http://timetrabble.tumblr.com'
3467
3468
3469
class SafelyEndangeredTumblr(GenericTumblrV1):
3470
    """Class to retrieve Safely Endangered comics."""
3471
    # Also on http://www.safelyendangered.com
3472
    name = 'endangered-tumblr'
3473
    long_name = 'Safely Endangered (from Tumblr)'
3474
    url = 'http://tumblr.safelyendangered.com'
3475
3476
3477
class MouseBearComedyTumblr(GenericTumblrV1):
3478
    """Class to retrieve Mouse Bear Comedy comics."""
3479
    # Also on http://www.mousebearcomedy.com
3480
    name = 'mousebear-tumblr'
3481
    long_name = 'Mouse Bear Comedy (from Tumblr)'
3482
    url = 'http://mousebearcomedy.tumblr.com'
3483
3484
3485
class BouletCorpTumblr(GenericTumblrV1):
3486
    """Class to retrieve BouletCorp comics."""
3487
    # Also on http://www.bouletcorp.com
3488
    name = 'boulet-tumblr'
3489
    long_name = 'Boulet Corp (from Tumblr)'
3490
    url = 'http://bouletcorp.tumblr.com'
3491
3492
3493
class TheAwkwardYetiTumblr(GenericTumblrV1):
3494
    """Class to retrieve The Awkward Yeti comics."""
3495
    # Also on http://www.gocomics.com/the-awkward-yeti
3496
    # Also on http://theawkwardyeti.com
3497
    # Also on https://tapastic.com/series/TheAwkwardYeti
3498
    name = 'yeti-tumblr'
3499
    long_name = 'The Awkward Yeti (from Tumblr)'
3500
    url = 'http://larstheyeti.tumblr.com'
3501
3502
3503
class NellucNhoj(GenericTumblrV1):
3504
    """Class to retrieve NellucNhoj comics."""
3505
    name = 'nhoj'
3506
    long_name = 'Nelluc Nhoj'
3507
    url = 'http://nellucnhoj.com'
3508
3509
3510
class DownTheUpwardSpiralTumblr(GenericTumblrV1):
3511
    """Class to retrieve Down The Upward Spiral comics."""
3512
    # Also on http://www.downtheupwardspiral.com
3513
    name = 'spiral-tumblr'
3514
    long_name = 'Down the Upward Spiral (from Tumblr)'
3515
    url = 'http://downtheupwardspiral.tumblr.com'
3516
3517
3518
class AsPerUsualTumblr(GenericTumblrV1):
3519
    """Class to retrieve As Per Usual comics."""
3520
    # Also on https://tapastic.com/series/AsPerUsual
3521
    name = 'usual-tumblr'
3522
    long_name = 'As Per Usual (from Tumblr)'
3523
    url = 'http://as-per-usual.tumblr.com'
3524
3525
3526
class OneOneOneOneComicTumblr(GenericTumblrV1):
3527
    """Class to retrieve 1111 Comics."""
3528
    # Also on http://www.1111comics.me
3529
    # Also on https://tapastic.com/series/1111-Comics
3530
    name = '1111-tumblr'
3531
    long_name = '1111 Comics (from Tumblr)'
3532
    url = 'http://comics1111.tumblr.com'
3533
3534
3535
class JhallComicsTumblr(GenericTumblrV1):
3536
    """Class to retrieve Jhall Comics."""
3537
    # Also on http://jhallcomics.com
3538
    name = 'jhall-tumblr'
3539
    long_name = 'Jhall Comics (from Tumblr)'
3540
    url = 'http://jhallcomics.tumblr.com'
3541
3542
3543
class BerkeleyMewsTumblr(GenericTumblrV1):
3544
    """Class to retrieve Berkeley Mews comics."""
3545
    # Also on http://www.gocomics.com/berkeley-mews
3546
    # Also on http://www.berkeleymews.com
3547
    name = 'berkeley-tumblr'
3548
    long_name = 'Berkeley Mews (from Tumblr)'
3549
    url = 'http://mews.tumblr.com'
3550
3551
3552
class JoanCornellaTumblr(GenericTumblrV1):
3553
    """Class to retrieve Joan Cornella comics."""
3554
    # Also on http://joancornella.net
3555
    name = 'cornella-tumblr'
3556
    long_name = 'Joan Cornella (from Tumblr)'
3557
    url = 'http://cornellajoan.tumblr.com'
3558
3559
3560
class RespawnComicTumblr(GenericTumblrV1):
3561
    """Class to retrieve Respawn Comic."""
3562
    # Also on http://respawncomic.com
3563
    name = 'respawn-tumblr'
3564
    long_name = 'Respawn Comic (from Tumblr)'
3565
    url = 'http://respawncomic.tumblr.com'
3566
3567
3568
class ChrisHallbeckTumblr(GenericTumblrV1):
3569
    """Class to retrieve Chris Hallbeck comics."""
3570
    # Also on https://tapastic.com/ChrisHallbeck
3571
    # Also on http://maximumble.com
3572
    # Also on http://minimumble.com
3573
    # Also on http://thebookofbiff.com
3574
    name = 'hallbeck-tumblr'
3575
    long_name = 'Chris Hallback (from Tumblr)'
3576
    url = 'http://chrishallbeck.tumblr.com'
3577
3578
3579
class ComicNuggets(GenericTumblrV1):
3580
    """Class to retrieve Comic Nuggets."""
3581
    name = 'nuggets'
3582
    long_name = 'Comic Nuggets'
3583
    url = 'http://comicnuggets.com'
3584
3585
3586
class PigeonGazetteTumblr(GenericTumblrV1):
3587
    """Class to retrieve The Pigeon Gazette comics."""
3588
    # Also on https://tapastic.com/series/The-Pigeon-Gazette
3589
    name = 'pigeon-tumblr'
3590
    long_name = 'The Pigeon Gazette (from Tumblr)'
3591
    url = 'http://thepigeongazette.tumblr.com'
3592
3593
3594
class CancerOwl(GenericTumblrV1):
3595
    """Class to retrieve Cancer Owl comics."""
3596
    # Also on http://cancerowl.com
3597
    name = 'cancerowl-tumblr'
3598
    long_name = 'Cancer Owl (from Tumblr)'
3599
    url = 'http://cancerowl.tumblr.com'
3600
3601
3602
class FowlLanguageTumblr(GenericTumblrV1):
3603
    """Class to retrieve Fowl Language comics."""
3604
    # Also on http://www.fowllanguagecomics.com
3605
    # Also on http://tapastic.com/series/Fowl-Language-Comics
3606
    # Also on http://www.gocomics.com/fowl-language
3607
    name = 'fowllanguage-tumblr'
3608
    long_name = 'Fowl Language Comics (from Tumblr)'
3609
    url = 'http://fowllanguagecomics.tumblr.com'
3610
3611
3612
class TheOdd1sOutTumblr(GenericTumblrV1):
3613
    """Class to retrieve The Odd 1s Out comics."""
3614
    # Also on http://theodd1sout.com
3615
    # Also on https://tapastic.com/series/Theodd1sout
3616
    name = 'theodd-tumblr'
3617
    long_name = 'The Odd 1s Out (from Tumblr)'
3618
    url = 'http://theodd1sout.tumblr.com'
3619
3620
3621
class TheUnderfoldTumblr(GenericTumblrV1):
3622
    """Class to retrieve The Underfold comics."""
3623
    # Also on http://theunderfold.com
3624
    name = 'underfold-tumblr'
3625
    long_name = 'The Underfold (from Tumblr)'
3626
    url = 'http://theunderfold.tumblr.com'
3627
3628
3629
class LolNeinTumblr(GenericTumblrV1):
3630
    """Class to retrieve Lol Nein comics."""
3631
    # Also on http://lolnein.com
3632
    name = 'lolnein-tumblr'
3633
    long_name = 'Lol Nein (from Tumblr)'
3634
    url = 'http://lolneincom.tumblr.com'
3635
3636
3637
class FatAwesomeComicsTumblr(GenericTumblrV1):
3638
    """Class to retrieve Fat Awesome Comics."""
3639
    # Also on http://fatawesome.com/comics
3640
    name = 'fatawesome-tumblr'
3641
    long_name = 'Fat Awesome (from Tumblr)'
3642
    url = 'http://fatawesomecomedy.tumblr.com'
3643
3644
3645
class TheWorldIsFlatTumblr(GenericTumblrV1):
3646
    """Class to retrieve The World Is Flat Comics."""
3647
    # Also on https://tapastic.com/series/The-World-is-Flat
3648
    name = 'flatworld-tumblr'
3649
    long_name = 'The World Is Flat (from Tumblr)'
3650
    url = 'http://theworldisflatcomics.tumblr.com'
3651
3652
3653
class DorrisMc(GenericTumblrV1):
3654
    """Class to retrieve Dorris Mc Comics"""
3655
    # Also on http://www.gocomics.com/dorris-mccomics
3656
    name = 'dorrismc'
3657
    long_name = 'Dorris Mc'
3658
    url = 'http://dorrismccomics.com'
3659
3660
3661
class LeleozTumblr(GenericEmptyComic, GenericTumblrV1):
3662
    """Class to retrieve Leleoz comics."""
3663
    # Also on https://tapastic.com/series/Leleoz
3664
    name = 'leleoz-tumblr'
3665
    long_name = 'Leleoz (from Tumblr)'
3666
    url = 'http://leleozcomics.tumblr.com'
3667
3668
3669
class MoonBeardTumblr(GenericTumblrV1):
3670
    """Class to retrieve MoonBeard comics."""
3671
    # Also on http://moonbeard.com
3672
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
3673
    name = 'moonbeard-tumblr'
3674
    long_name = 'Moon Beard (from Tumblr)'
3675
    url = 'http://blog.squiresjam.es/moonbeard'
3676
3677
3678
class AComik(GenericTumblrV1):
3679
    """Class to retrieve A Comik"""
3680
    name = 'comik'
3681
    long_name = 'A Comik'
3682
    url = 'http://acomik.com'
3683
3684
3685
class ClassicRandy(GenericTumblrV1):
3686
    """Class to retrieve Classic Randy comics."""
3687
    name = 'randy'
3688
    long_name = 'Classic Randy'
3689
    url = 'http://classicrandy.tumblr.com'
3690
3691
3692
class DagssonTumblr(GenericTumblrV1):
3693
    """Class to retrieve Dagsson comics."""
3694
    # Also on http://www.dagsson.com
3695
    name = 'dagsson-tumblr'
3696
    long_name = 'Dagsson Hugleikur (from Tumblr)'
3697
    url = 'http://hugleikurdagsson.tumblr.com'
3698
3699
3700
class LinsEditionsTumblr(GenericTumblrV1):
3701
    """Class to retrieve L.I.N.S. Editions comics."""
3702
    # Also on https://linsedition.com
3703
    name = 'lins-tumblr'
3704
    long_name = 'L.I.N.S. Editions (from Tumblr)'
3705
    url = 'http://linscomics.tumblr.com'
3706
3707
3708
class OrigamiHotDish(GenericTumblrV1):
3709
    """Class to retrieve Origami Hot Dish comics."""
3710
    name = 'origamihotdish'
3711
    long_name = 'Origami Hot Dish'
3712
    url = 'http://origamihotdish.com'
3713
3714
3715
class HitAndMissComicsTumblr(GenericTumblrV1):
3716
    """Class to retrieve Hit and Miss Comics."""
3717
    name = 'hitandmiss'
3718
    long_name = 'Hit and Miss Comics'
3719
    url = 'http://hitandmisscomics.tumblr.com'
3720
3721
3722
class HMBlanc(GenericTumblrV1):
3723
    """Class to retrieve HM Blanc comics."""
3724
    name = 'hmblanc'
3725
    long_name = 'HM Blanc'
3726
    url = 'http://hmblanc.tumblr.com'
3727
3728
3729
class TalesOfAbsurdityTumblr(GenericTumblrV1):
3730
    """Class to retrieve Tales Of Absurdity comics."""
3731
    # Also on http://talesofabsurdity.com
3732
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
3733
    name = 'absurdity-tumblr'
3734
    long_name = 'Tales of Absurdity (from Tumblr)'
3735
    url = 'http://talesofabsurdity.tumblr.com'
3736
3737
3738
class RobbieAndBobby(GenericTumblrV1):
3739
    """Class to retrieve Robbie And Bobby comics."""
3740
    # Also on http://robbieandbobby.com
3741
    name = 'robbie-tumblr'
3742
    long_name = 'Robbie And Bobby (from Tumblr)'
3743
    url = 'http://robbieandbobby.tumblr.com'
3744
3745
3746
class ElectricBunnyComicTumblr(GenericTumblrV1):
3747
    """Class to retrieve Electric Bunny Comics."""
3748
    # Also on http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell
3749
    name = 'bunny-tumblr'
3750
    long_name = 'Electric Bunny Comic (from Tumblr)'
3751
    url = 'http://electricbunnycomics.tumblr.com'
3752
3753
3754
class Hoomph(GenericTumblrV1):
3755
    """Class to retrieve Hoomph comics."""
3756
    name = 'hoomph'
3757
    long_name = 'Hoomph'
3758
    url = 'http://hoom.ph'
3759
3760
3761
class BFGFSTumblr(GenericTumblrV1):
3762
    """Class to retrieve BFGFS comics."""
3763
    # Also on https://tapastic.com/series/BFGFS
3764
    # Also on http://bfgfs.com
3765
    name = 'bfgfs-tumblr'
3766
    long_name = 'BFGFS (from Tumblr)'
3767
    url = 'http://bfgfs.tumblr.com'
3768
3769
3770
class DoodleForFood(GenericTumblrV1):
3771
    """Class to retrieve Doodle For Food comics."""
3772
    # Also on http://doodleforfood.com
3773
    name = 'doodle'
3774
    long_name = 'Doodle For Food'
3775
    url = 'http://doodleforfood.com'
3776
3777
3778
class CassandraCalinTumblr(GenericEmptyComic, GenericTumblrV1):
3779
    """Class to retrieve C. Cassandra comics."""
3780
    # Also on http://cassandracalin.com
3781
    # Also on https://tapastic.com/series/C-Cassandra-comics
3782
    name = 'cassandra-tumblr'
3783
    long_name = 'Cassandra Calin (from Tumblr)'
3784
    url = 'http://c-cassandra.tumblr.com'
3785
3786
3787
class DougWasTaken(GenericTumblrV1):
3788
    """Class to retrieve Doug Was Taken comics."""
3789
    name = 'doog'
3790
    long_name = 'Doug Was Taken'
3791
    url = 'http://dougwastaken.tumblr.com'
3792
3793
3794
class MandatoryRollerCoaster(GenericEmptyComic, GenericTumblrV1):
3795
    """Class to retrieve Mandatory Roller Coaster comics."""
3796
    name = 'rollercoaster'
3797
    long_name = 'Mandatory Roller Coaster'
3798
    url = 'http://mandatoryrollercoaster.com'
3799
3800
3801
class HorovitzComics(GenericListableComic):
3802
    """Generic class to handle the logic common to the different comics from Horovitz."""
3803
    url = 'http://www.horovitzcomics.com'
3804
    img_re = re.compile('.*comics/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
3805
    link_re = NotImplemented
3806
    get_url_from_archive_element = join_cls_url_to_href
3807
3808
    @classmethod
3809
    def get_comic_info(cls, soup, link):
3810
        """Get information about a particular comics."""
3811
        href = link['href']
3812
        num = int(cls.link_re.match(href).groups()[0])
3813
        title = link.string
3814
        imgs = soup.find_all('img', id='comic')
3815
        assert len(imgs) == 1
3816
        year, month, day = [int(s)
3817
                            for s in cls.img_re.match(imgs[0]['src']).groups()]
3818
        return {
3819
            'title': title,
3820
            'day': day,
3821
            'month': month,
3822
            'year': year,
3823
            'img': [i['src'] for i in imgs],
3824
            'num': num,
3825
        }
3826
3827
    @classmethod
3828
    def get_archive_elements(cls):
3829
        archive_url = 'http://www.horovitzcomics.com/comics/archive/'
3830
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.link_re))
3831
3832
3833
class HorovitzNew(HorovitzComics):
3834
    """Class to retrieve Horovitz new comics."""
3835
    name = 'horovitznew'
3836
    long_name = 'Horovitz New'
3837
    link_re = re.compile('^/comics/new/([0-9]+)$')
3838
3839
3840
class HorovitzClassic(HorovitzComics):
3841
    """Class to retrieve Horovitz classic comics."""
3842
    name = 'horovitzclassic'
3843
    long_name = 'Horovitz Classic'
3844
    link_re = re.compile('^/comics/classic/([0-9]+)$')
3845
3846
3847
class GenericGoComic(GenericNavigableComic):
3848
    """Generic class to handle the logic common to comics from gocomics.com."""
3849
    url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)$')
3850
3851
    @classmethod
3852
    def get_first_comic_link(cls):
3853
        """Get link to first comics."""
3854
        return get_soup_at_url(cls.url).find('a', class_='beginning')
3855
3856
    @classmethod
3857
    def get_navi_link(cls, last_soup, next_):
3858
        """Get link to next or previous comic."""
3859
        return last_soup.find('a', class_='next' if next_ else 'prev', href=cls.url_date_re)
3860
3861
    @classmethod
3862
    def get_url_from_link(cls, link):
3863
        gocomics = 'http://www.gocomics.com'
3864
        return urljoin_wrapper(gocomics, link['href'])
3865
3866
    @classmethod
3867
    def get_comic_info(cls, soup, link):
3868
        """Get information about a particular comics."""
3869
        url = cls.get_url_from_link(link)
3870
        year, month, day = [int(s)
3871
                            for s in cls.url_date_re.match(url).groups()]
3872
        return {
3873
            'day': day,
3874
            'month': month,
3875
            'year': year,
3876
            'img': [soup.find_all('img', class_='strip')[-1]['src']],
3877
            'author': soup.find('meta', attrs={'name': 'author'})['content']
3878
        }
3879
3880
3881
class PearlsBeforeSwine(GenericGoComic):
3882
    """Class to retrieve Pearls Before Swine comics."""
3883
    name = 'pearls'
3884
    long_name = 'Pearls Before Swine'
3885
    url = 'http://www.gocomics.com/pearlsbeforeswine'
3886
3887
3888
class Peanuts(GenericGoComic):
3889
    """Class to retrieve Peanuts comics."""
3890
    name = 'peanuts'
3891
    long_name = 'Peanuts'
3892
    url = 'http://www.gocomics.com/peanuts'
3893
3894
3895
class MattWuerker(GenericGoComic):
3896
    """Class to retrieve Matt Wuerker comics."""
3897
    name = 'wuerker'
3898
    long_name = 'Matt Wuerker'
3899
    url = 'http://www.gocomics.com/mattwuerker'
3900
3901
3902
class TomToles(GenericGoComic):
3903
    """Class to retrieve Tom Toles comics."""
3904
    name = 'toles'
3905
    long_name = 'Tom Toles'
3906
    url = 'http://www.gocomics.com/tomtoles'
3907
3908
3909
class BreakOfDay(GenericGoComic):
3910
    """Class to retrieve Break Of Day comics."""
3911
    name = 'breakofday'
3912
    long_name = 'Break Of Day'
3913
    url = 'http://www.gocomics.com/break-of-day'
3914
3915
3916
class Brevity(GenericGoComic):
3917
    """Class to retrieve Brevity comics."""
3918
    name = 'brevity'
3919
    long_name = 'Brevity'
3920
    url = 'http://www.gocomics.com/brevity'
3921
3922
3923
class MichaelRamirez(GenericGoComic):
3924
    """Class to retrieve Michael Ramirez comics."""
3925
    name = 'ramirez'
3926
    long_name = 'Michael Ramirez'
3927
    url = 'http://www.gocomics.com/michaelramirez'
3928
3929
3930
class MikeLuckovich(GenericGoComic):
3931
    """Class to retrieve Mike Luckovich comics."""
3932
    name = 'luckovich'
3933
    long_name = 'Mike Luckovich'
3934
    url = 'http://www.gocomics.com/mikeluckovich'
3935
3936
3937
class JimBenton(GenericGoComic):
3938
    """Class to retrieve Jim Benton comics."""
3939
    # Also on http://jimbenton.tumblr.com
3940
    name = 'benton'
3941
    long_name = 'Jim Benton'
3942
    url = 'http://www.gocomics.com/jim-benton-cartoons'
3943
3944
3945
class TheArgyleSweater(GenericGoComic):
3946
    """Class to retrieve the Argyle Sweater comics."""
3947
    name = 'argyle'
3948
    long_name = 'Argyle Sweater'
3949
    url = 'http://www.gocomics.com/theargylesweater'
3950
3951
3952
class SunnyStreet(GenericGoComic):
3953
    """Class to retrieve Sunny Street comics."""
3954
    # Also on http://www.sunnystreetcomics.com
3955
    name = 'sunny'
3956
    long_name = 'Sunny Street'
3957
    url = 'http://www.gocomics.com/sunny-street'
3958
3959
3960
class OffTheMark(GenericGoComic):
3961
    """Class to retrieve Off The Mark comics."""
3962
    # Also on https://www.offthemark.com
3963
    name = 'offthemark'
3964
    long_name = 'Off The Mark'
3965
    url = 'http://www.gocomics.com/offthemark'
3966
3967
3968
class WuMo(GenericGoComic):
3969
    """Class to retrieve WuMo comics."""
3970
    # Also on http://wumo.com
3971
    name = 'wumo'
3972
    long_name = 'WuMo'
3973
    url = 'http://www.gocomics.com/wumo'
3974
3975
3976
class LunarBaboon(GenericGoComic):
3977
    """Class to retrieve Lunar Baboon comics."""
3978
    # Also on http://www.lunarbaboon.com
3979
    # Also on https://tapastic.com/series/Lunarbaboon
3980
    name = 'lunarbaboon'
3981
    long_name = 'Lunar Baboon'
3982
    url = 'http://www.gocomics.com/lunarbaboon'
3983
3984
3985
class SandersenGocomic(GenericGoComic):
3986
    """Class to retrieve Sarah Andersen comics."""
3987
    # Also on http://sarahcandersen.com
3988
    # Also on http://tapastic.com/series/Doodle-Time
3989
    name = 'sandersen-goc'
3990
    long_name = 'Sarah Andersen (from GoComics)'
3991
    url = 'http://www.gocomics.com/sarahs-scribbles'
3992
3993
3994
class CalvinAndHobbesGoComic(GenericGoComic):
3995
    """Class to retrieve Calvin and Hobbes comics."""
3996
    # From gocomics, not http://marcel-oehler.marcellosendos.ch/comics/ch/
3997
    name = 'calvin-goc'
3998
    long_name = 'Calvin and Hobbes (from GoComics)'
3999
    url = 'http://www.gocomics.com/calvinandhobbes'
4000
4001
4002
class RallGoComic(GenericGoComic):
4003
    """Class to retrieve Ted Rall comics."""
4004
    # Also on http://rall.com/comic
4005
    name = 'rall-goc'
4006
    long_name = "Ted Rall (from GoComics)"
4007
    url = "http://www.gocomics.com/tedrall"
4008
4009
4010
class TheAwkwardYetiGoComic(GenericGoComic):
4011
    """Class to retrieve The Awkward Yeti comics."""
4012
    # Also on http://larstheyeti.tumblr.com
4013
    # Also on http://theawkwardyeti.com
4014
    # Also on https://tapastic.com/series/TheAwkwardYeti
4015
    name = 'yeti-goc'
4016
    long_name = 'The Awkward Yeti (from GoComics)'
4017
    url = 'http://www.gocomics.com/the-awkward-yeti'
4018
4019
4020
class BerkeleyMewsGoComics(GenericGoComic):
4021
    """Class to retrieve Berkeley Mews comics."""
4022
    # Also on http://mews.tumblr.com
4023
    # Also on http://www.berkeleymews.com
4024
    name = 'berkeley-goc'
4025
    long_name = 'Berkeley Mews (from GoComics)'
4026
    url = 'http://www.gocomics.com/berkeley-mews'
4027
4028
4029
class SheldonGoComics(GenericGoComic):
4030
    """Class to retrieve Sheldon comics."""
4031
    # Also on http://www.sheldoncomics.com
4032
    name = 'sheldon-goc'
4033
    long_name = 'Sheldon Comics (from GoComics)'
4034
    url = 'http://www.gocomics.com/sheldon'
4035
4036
4037
class FowlLanguageGoComics(GenericGoComic):
4038
    """Class to retrieve Fowl Language comics."""
4039
    # Also on http://www.fowllanguagecomics.com
4040
    # Also on http://tapastic.com/series/Fowl-Language-Comics
4041
    # Also on http://fowllanguagecomics.tumblr.com
4042
    name = 'fowllanguage-goc'
4043
    long_name = 'Fowl Language Comics (from GoComics)'
4044
    url = 'http://www.gocomics.com/fowl-language'
4045
4046
4047
class NickAnderson(GenericGoComic):
4048
    """Class to retrieve Nick Anderson comics."""
4049
    name = 'nickanderson'
4050
    long_name = 'Nick Anderson'
4051
    url = 'http://www.gocomics.com/nickanderson'
4052
4053
4054
class GarfieldGoComics(GenericGoComic):
4055
    """Class to retrieve Garfield comics."""
4056
    # Also on http://garfield.com
4057
    name = 'garfield-goc'
4058
    long_name = 'Garfield (from GoComics)'
4059
    url = 'http://www.gocomics.com/garfield'
4060
4061
4062
class DorrisMcGoComics(GenericGoComic):
4063
    """Class to retrieve Dorris Mc Comics"""
4064
    # Also on http://dorrismccomics.com
4065
    name = 'dorrismc-goc'
4066
    long_name = 'Dorris Mc (from GoComics)'
4067
    url = 'http://www.gocomics.com/dorris-mccomics'
4068
4069
4070
class MisterAndMeGoComics(GenericGoComic):
4071
    """Class to retrieve Mister & Me Comics."""
4072
    # Also on http://www.mister-and-me.com
4073
    # Also on https://tapastic.com/series/Mister-and-Me
4074
    name = 'mister-goc'
4075
    long_name = 'Mister & Me (from GoComics)'
4076
    url = 'http://www.gocomics.com/mister-and-me'
4077
4078
4079
class GenericTapasticComic(GenericListableComic):
4080
    """Generic class to handle the logic common to comics from tapastic.com."""
4081
4082
    @classmethod
4083
    def get_comic_info(cls, soup, archive_elt):
4084
        """Get information about a particular comics."""
4085
        timestamp = int(archive_elt['publishDate']) / 1000.0
4086
        day = datetime.datetime.fromtimestamp(timestamp).date()
4087
        imgs = soup.find_all('img', class_='art-image')
4088
        if not imgs:
4089
            print("Comic %s is being uploaded, retry later" % cls.get_url_from_archive_element(archive_elt))
4090
            return None
4091
        assert len(imgs) > 0
4092
        return {
4093
            'day': day.day,
4094
            'year': day.year,
4095
            'month': day.month,
4096
            'img': [i['src'] for i in imgs],
4097
            'title': archive_elt['title'],
4098
        }
4099
4100
    @classmethod
4101
    def get_url_from_archive_element(cls, archive_elt):
4102
        return 'http://tapastic.com/episode/' + str(archive_elt['id'])
4103
4104
    @classmethod
4105
    def get_archive_elements(cls):
4106
        pref, suff = 'episodeList : ', ','
4107
        # Information is stored in the javascript part
4108
        # I don't know the clean way to get it so this is the ugly way.
4109
        string = [s[len(pref):-len(suff)] for s in (s.decode('utf-8').strip() for s in urlopen_wrapper(cls.url).readlines()) if s.startswith(pref) and s.endswith(suff)][0]
4110
        return json.loads(string)
4111
4112
4113
class VegetablesForDessert(GenericTapasticComic):
4114
    """Class to retrieve Vegetables For Dessert comics."""
4115
    # Also on http://vegetablesfordessert.tumblr.com
4116
    name = 'vegetables'
4117
    long_name = 'Vegetables For Dessert'
4118
    url = 'http://tapastic.com/series/vegetablesfordessert'
4119
4120
4121
class FowlLanguageTapa(GenericTapasticComic):
4122
    """Class to retrieve Fowl Language comics."""
4123
    # Also on http://www.fowllanguagecomics.com
4124
    # Also on http://fowllanguagecomics.tumblr.com
4125
    # Also on http://www.gocomics.com/fowl-language
4126
    name = 'fowllanguage-tapa'
4127
    long_name = 'Fowl Language Comics (from Tapastic)'
4128
    url = 'http://tapastic.com/series/Fowl-Language-Comics'
4129
4130
4131
class OscillatingProfundities(GenericTapasticComic):
4132
    """Class to retrieve Oscillating Profundities comics."""
4133
    name = 'oscillating'
4134
    long_name = 'Oscillating Profundities'
4135
    url = 'http://tapastic.com/series/oscillatingprofundities'
4136
4137
4138
class ZnoflatsComics(GenericTapasticComic):
4139
    """Class to retrieve Znoflats comics."""
4140
    name = 'znoflats'
4141
    long_name = 'Znoflats Comics'
4142
    url = 'http://tapastic.com/series/Znoflats-Comics'
4143
4144
4145
class SandersenTapastic(GenericTapasticComic):
4146
    """Class to retrieve Sarah Andersen comics."""
4147
    # Also on http://sarahcandersen.com
4148
    # Also on http://www.gocomics.com/sarahs-scribbles
4149
    name = 'sandersen-tapa'
4150
    long_name = 'Sarah Andersen (from Tapastic)'
4151
    url = 'http://tapastic.com/series/Doodle-Time'
4152
4153
4154
class TubeyToonsTapastic(GenericTapasticComic):
4155
    """Class to retrieve TubeyToons comics."""
4156
    # Also on http://tubeytoons.com
4157
    # Also on http://tubeytoons.tumblr.com
4158
    name = 'tubeytoons-tapa'
4159
    long_name = 'Tubey Toons (from Tapastic)'
4160
    url = 'http://tapastic.com/series/Tubey-Toons'
4161
4162
4163
class AnythingComicTapastic(GenericTapasticComic):
4164
    """Class to retrieve Anything Comics."""
4165
    # Also on http://www.anythingcomic.com
4166
    name = 'anythingcomic-tapa'
4167
    long_name = 'Anything Comic (from Tapastic)'
4168
    url = 'http://tapastic.com/series/anything'
4169
4170
4171
class UnearthedComicsTapastic(GenericTapasticComic):
4172
    """Class to retrieve Unearthed comics."""
4173
    # Also on http://unearthedcomics.com
4174
    # Also on http://unearthedcomics.tumblr.com
4175
    name = 'unearthed-tapa'
4176
    long_name = 'Unearthed Comics (from Tapastic)'
4177
    url = 'http://tapastic.com/series/UnearthedComics'
4178
4179
4180
class EverythingsStupidTapastic(GenericTapasticComic):
4181
    """Class to retrieve Everything's stupid Comics."""
4182
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
4183
    # Also on http://everythingsstupid.net
4184
    name = 'stupid-tapa'
4185
    long_name = "Everything's Stupid (from Tapastic)"
4186
    url = 'http://tapastic.com/series/EverythingsStupid'
4187
4188
4189
class JustSayEhTapastic(GenericTapasticComic):
4190
    """Class to retrieve Just Say Eh comics."""
4191
    # Also on http://www.justsayeh.com
4192
    name = 'justsayeh-tapa'
4193
    long_name = 'Just Say Eh (from Tapastic)'
4194
    url = 'http://tapastic.com/series/Just-Say-Eh'
4195
4196
4197
class ThorsThundershackTapastic(GenericTapasticComic):
4198
    """Class to retrieve Thor's Thundershack comics."""
4199
    # Also on http://www.thorsthundershack.com
4200
    name = 'thor-tapa'
4201
    long_name = 'Thor\'s Thundershack (from Tapastic)'
4202
    url = 'http://tapastic.com/series/Thors-Thundershac'
4203
4204
4205
class OwlTurdTapastic(GenericTapasticComic):
4206
    """Class to retrieve Owl Turd comics."""
4207
    # Also on http://owlturd.com
4208
    name = 'owlturd-tapa'
4209
    long_name = 'Owl Turd (from Tapastic)'
4210
    url = 'http://tapastic.com/series/Owl-Turd-Comix'
4211
4212
4213
class GoneIntoRaptureTapastic(GenericTapasticComic):
4214
    """Class to retrieve Gone Into Rapture comics."""
4215
    # Also on http://goneintorapture.tumblr.com
4216
    # Also on http://www.goneintorapture.com
4217
    name = 'rapture-tapa'
4218
    long_name = 'Gone Into Rapture (from Tapastic)'
4219
    url = 'http://tapastic.com/series/Goneintorapture'
4220
4221
4222
class HeckIfIKnowComicsTapa(GenericTapasticComic):
4223
    """Class to retrieve Heck If I Know Comics."""
4224
    # Also on http://heckifiknowcomics.com
4225
    name = 'heck-tapa'
4226
    long_name = 'Heck if I Know comics (from Tapastic)'
4227
    url = 'http://tapastic.com/series/Regular'
4228
4229
4230
class CheerUpEmoKidTapa(GenericTapasticComic):
4231
    """Class to retrieve CheerUpEmoKid comics."""
4232
    # Also on http://www.cheerupemokid.com
4233
    # Also on http://enzocomics.tumblr.com
4234
    name = 'cuek-tapa'
4235
    long_name = 'Cheer Up Emo Kid (from Tapastic)'
4236
    url = 'http://tapastic.com/series/CUEK'
4237
4238
4239
class BigFootJusticeTapa(GenericTapasticComic):
4240
    """Class to retrieve Big Foot Justice comics."""
4241
    # Also on http://bigfootjustice.com
4242
    name = 'bigfoot-tapa'
4243
    long_name = 'Big Foot Justice (from Tapastic)'
4244
    url = 'http://tapastic.com/series/bigfoot-justice'
4245
4246
4247
class UpAndOutTapa(GenericTapasticComic):
4248
    """Class to retrieve Up & Out comics."""
4249
    # Also on http://upandoutcomic.tumblr.com
4250
    name = 'upandout-tapa'
4251
    long_name = 'Up And Out (from Tapastic)'
4252
    url = 'http://tapastic.com/series/UP-and-OUT'
4253
4254
4255
class ToonHoleTapa(GenericTapasticComic):
4256
    """Class to retrieve Toon Holes comics."""
4257
    # Also on http://www.toonhole.com
4258
    name = 'toonhole-tapa'
4259
    long_name = 'Toon Hole (from Tapastic)'
4260
    url = 'http://tapastic.com/series/TOONHOLE'
4261
4262
4263
class AngryAtNothingTapa(GenericTapasticComic):
4264
    """Class to retrieve Angry at Nothing comics."""
4265
    # Also on http://www.angryatnothing.net
4266
    name = 'angry-tapa'
4267
    long_name = 'Angry At Nothing (from Tapastic)'
4268
    url = 'http://tapastic.com/series/Comics-yeah-definitely-comics-'
4269
4270
4271
class LeleozTapa(GenericTapasticComic):
4272
    """Class to retrieve Leleoz comics."""
4273
    # Also on http://leleozcomics.tumblr.com
4274
    name = 'leleoz-tapa'
4275
    long_name = 'Leleoz (from Tapastic)'
4276
    url = 'https://tapastic.com/series/Leleoz'
4277
4278
4279
class TheAwkwardYetiTapa(GenericTapasticComic):
4280
    """Class to retrieve The Awkward Yeti comics."""
4281
    # Also on http://www.gocomics.com/the-awkward-yeti
4282
    # Also on http://theawkwardyeti.com
4283
    # Also on http://larstheyeti.tumblr.com
4284
    name = 'yeti-tapa'
4285
    long_name = 'The Awkward Yeti (from Tapastic)'
4286
    url = 'https://tapastic.com/series/TheAwkwardYeti'
4287
4288
4289
class AsPerUsualTapa(GenericTapasticComic):
4290
    """Class to retrieve As Per Usual comics."""
4291
    # Also on http://as-per-usual.tumblr.com
4292
    name = 'usual-tapa'
4293
    long_name = 'As Per Usual (from Tapastic)'
4294
    url = 'https://tapastic.com/series/AsPerUsual'
4295
4296
4297
class OneOneOneOneComicTapa(GenericTapasticComic):
4298
    """Class to retrieve 1111 Comics."""
4299
    # Also on http://www.1111comics.me
4300
    # Also on http://comics1111.tumblr.com
4301
    name = '1111-tapa'
4302
    long_name = '1111 Comics (from Tapastic)'
4303
    url = 'https://tapastic.com/series/1111-Comics'
4304
4305
4306
class TumbleDryTapa(GenericTapasticComic):
4307
    """Class to retrieve Tumble Dry comics."""
4308
    # Also on http://tumbledrycomics.com
4309
    name = 'tumbledry-tapa'
4310
    long_name = 'Tumblr Dry (from Tapastic)'
4311
    url = 'https://tapastic.com/series/TumbleDryComics'
4312
4313
4314
class DeadlyPanelTapa(GenericTapasticComic):
4315
    """Class to retrieve Deadly Panel comics."""
4316
    # Also on http://www.deadlypanel.com
4317
    name = 'deadly-tapa'
4318
    long_name = 'Deadly Panel (from Tapastic)'
4319
    url = 'https://tapastic.com/series/deadlypanel'
4320
4321
4322
class ChrisHallbeckMaxiTapa(GenericTapasticComic):
4323
    """Class to retrieve Chris Hallbeck comics."""
4324
    # Also on http://chrishallbeck.tumblr.com
4325
    # Also on http://maximumble.com
4326
    name = 'hallbeckmaxi-tapa'
4327
    long_name = 'Chris Hallback - Maximumble (from Tapastic)'
4328
    url = 'https://tapastic.com/series/Maximumble'
4329
4330
4331
class ChrisHallbeckMiniTapa(GenericTapasticComic):
4332
    """Class to retrieve Chris Hallbeck comics."""
4333
    # Also on http://chrishallbeck.tumblr.com
4334
    # Also on http://minimumble.com
4335
    name = 'hallbeckmini-tapa'
4336
    long_name = 'Chris Hallback - Minimumble (from Tapastic)'
4337
    url = 'https://tapastic.com/series/Minimumble'
4338
4339
4340
class ChrisHallbeckBiffTapa(GenericTapasticComic):
4341
    """Class to retrieve Chris Hallbeck comics."""
4342
    # Also on http://chrishallbeck.tumblr.com
4343
    # Also on http://thebookofbiff.com
4344
    name = 'hallbeckbiff-tapa'
4345
    long_name = 'Chris Hallback - The Book of Biff (from Tapastic)'
4346
    url = 'https://tapastic.com/series/Biff'
4347
4348
4349
class RandoWisTapa(GenericTapasticComic):
4350
    """Class to retrieve RandoWis comics."""
4351
    # Also on https://randowis.com
4352
    name = 'randowis-tapa'
4353
    long_name = 'RandoWis (from Tapastic)'
4354
    url = 'https://tapastic.com/series/RandoWis'
4355
4356
4357
class PigeonGazetteTapa(GenericTapasticComic):
4358
    """Class to retrieve The Pigeon Gazette comics."""
4359
    # Also on http://thepigeongazette.tumblr.com
4360
    name = 'pigeon-tapa'
4361
    long_name = 'The Pigeon Gazette (from Tapastic)'
4362
    url = 'https://tapastic.com/series/The-Pigeon-Gazette'
4363
4364
4365
class TheOdd1sOutTapa(GenericTapasticComic):
4366
    """Class to retrieve The Odd 1s Out comics."""
4367
    # Also on http://theodd1sout.com
4368
    # Also on http://theodd1sout.tumblr.com
4369
    name = 'theodd-tapa'
4370
    long_name = 'The Odd 1s Out (from Tapastic)'
4371
    url = 'https://tapastic.com/series/Theodd1sout'
4372
4373
4374
class TheWorldIsFlatTapa(GenericTapasticComic):
4375
    """Class to retrieve The World Is Flat Comics."""
4376
    # Also on http://theworldisflatcomics.tumblr.com
4377
    name = 'flatworld-tapa'
4378
    long_name = 'The World Is Flat (from Tapastic)'
4379
    url = 'https://tapastic.com/series/The-World-is-Flat'
4380
4381
4382
class MisterAndMeTapa(GenericTapasticComic):
4383
    """Class to retrieve Mister & Me Comics."""
4384
    # Also on http://www.mister-and-me.com
4385
    # Also on http://www.gocomics.com/mister-and-me
4386
    name = 'mister-tapa'
4387
    long_name = 'Mister & Me (from Tapastic)'
4388
    url = 'https://tapastic.com/series/Mister-and-Me'
4389
4390
4391
class TalesOfAbsurdityTapa(GenericTapasticComic):
4392
    """Class to retrieve Tales Of Absurdity comics."""
4393
    # Also on http://talesofabsurdity.com
4394
    # Also on http://talesofabsurdity.tumblr.com
4395
    name = 'absurdity-tapa'
4396
    long_name = 'Tales of Absurdity (from Tapastic)'
4397
    url = 'http://tapastic.com/series/Tales-Of-Absurdity'
4398
4399
4400
class BFGFSTapa(GenericTapasticComic):
4401
    """Class to retrieve BFGFS comics."""
4402
    # Also on http://bfgfs.com
4403
    # Also on http://bfgfs.tumblr.com
4404
    name = 'bfgfs-tapa'
4405
    long_name = 'BFGFS (from Tapastic)'
4406
    url = 'https://tapastic.com/series/BFGFS'
4407
4408
4409
class DoodleForFoodTapa(GenericTapasticComic):
4410
    """Class to retrieve Doodle For Food comics."""
4411
    # Also on http://doodleforfood.com
4412
    name = 'doodle-tapa'
4413
    long_name = 'Doodle For Food (from Tapastic)'
4414
    url = 'https://tapastic.com/series/Doodle-for-Food'
4415
4416
4417
class MrLovensteinTapa(GenericTapasticComic):
4418
    """Class to retrieve Mr Lovenstein comics."""
4419
    # Also on  https://tapastic.com/series/MrLovenstein
4420
    name = 'mrlovenstein-tapa'
4421
    long_name = 'Mr. Lovenstein (from Tapastic)'
4422
    url = 'https://tapastic.com/series/MrLovenstein'
4423
4424
4425
class CassandraCalinTapa(GenericTapasticComic):
4426
    """Class to retrieve C. Cassandra comics."""
4427
    # Also on http://cassandracalin.com
4428
    # Also on http://c-cassandra.tumblr.com
4429
    name = 'cassandra-tapa'
4430
    long_name = 'Cassandra Calin (from Tapastic)'
4431
    url = 'https://tapastic.com/series/C-Cassandra-comics'
4432
4433
4434
def get_subclasses(klass):
4435
    """Gets the list of direct/indirect subclasses of a class"""
4436
    subclasses = klass.__subclasses__()
4437
    for derived in list(subclasses):
4438
        subclasses.extend(get_subclasses(derived))
4439
    return subclasses
4440
4441
4442
def remove_st_nd_rd_th_from_date(string):
4443
    """Function to transform 1st/2nd/3rd/4th in a parsable date format."""
4444
    # Hackish way to convert string with numeral "1st"/"2nd"/etc to date
4445
    return (string.replace('st', '')
4446
            .replace('nd', '')
4447
            .replace('rd', '')
4448
            .replace('th', '')
4449
            .replace('Augu', 'August'))
4450
4451
4452
def string_to_date(string, date_format, local=DEFAULT_LOCAL):
4453
    """Function to convert string to date object.
4454
    Wrapper around datetime.datetime.strptime."""
4455
    # format described in https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior
4456
    prev_locale = locale.setlocale(locale.LC_ALL)
4457
    if local != prev_locale:
4458
        locale.setlocale(locale.LC_ALL, local)
4459
    ret = datetime.datetime.strptime(string, date_format).date()
4460
    if local != prev_locale:
4461
        locale.setlocale(locale.LC_ALL, prev_locale)
4462
    return ret
4463
4464
4465
COMICS = set(get_subclasses(GenericComic))
4466
VALID_COMICS = [c for c in COMICS if c.name is not None]
4467
COMIC_NAMES = {c.name: c for c in VALID_COMICS}
4468
assert len(VALID_COMICS) == len(COMIC_NAMES)
4469
CLASS_NAMES = {c.__name__ for c in VALID_COMICS}
4470
assert len(VALID_COMICS) == len(CLASS_NAMES)
4471