Completed
Push — master ( bfc1f3...e1e8dd )
by De
01:13
created

comics.py (22 issues)

Code
1
#! /usr/bin/python3
2
# vim: set expandtab tabstop=4 shiftwidth=4 :
3
"""Module to retrieve webcomics"""
4
5
from comic_abstract import GenericComic, get_date_for_comic
6
import re
7
from datetime import date, timedelta
8
import datetime
9
from urlfunctions import get_soup_at_url, urljoin_wrapper,\
10
    convert_iri_to_plain_ascii_uri, load_json_at_url, urlopen_wrapper
11
import json
12
import locale
13
import urllib
14
15
DEFAULT_LOCAL = 'en_GB.UTF-8'
16
17
18
class Xkcd(GenericComic):
19
    """Class to retrieve Xkcd comics."""
20
    name = 'xkcd'
21
    long_name = 'xkcd'
22
    url = 'http://xkcd.com'
23
24
    @classmethod
25
    def get_next_comic(cls, last_comic):
26
        first_num = last_comic['num'] if last_comic else 0
27
        last_num = load_json_at_url(
28
            urljoin_wrapper(cls.url, 'info.0.json'))['num']
29
30
        for num in range(first_num + 1, last_num + 1):
31
            if num != 404:
32
                json_url = urljoin_wrapper(cls.url, '%d/info.0.json' % num)
33
                comic = load_json_at_url(json_url)
34
                comic['img'] = [comic['img']]
35
                comic['prefix'] = '%d-' % num
36
                comic['json_url'] = json_url
37
                comic['url'] = urljoin_wrapper(cls.url, str(num))
38
                comic['day'] = int(comic['day'])
39
                comic['month'] = int(comic['month'])
40
                comic['year'] = int(comic['year'])
41
                assert comic['num'] == num
42
                yield comic
43
44
45
# Helper functions corresponding to get_url_from_link/get_url_from_archive_element
46
47
48
@classmethod
49
def get_href(cls, link):
50
    """Implementation of get_url_from_link/get_url_from_archive_element."""
51
    return link['href']
52
53
54
@classmethod
55
def join_cls_url_to_href(cls, link):
56
    """Implementation of get_url_from_link/get_url_from_archive_element."""
57
    return urljoin_wrapper(cls.url, link['href'])
58
59
60
class GenericNavigableComic(GenericComic):
61
    """Generic class for "navigable" comics : with first/next arrows.
62
63
    The method `get_next_comic` methods is implemented in terms of new
64
    more specialized methods to be implemented/overridden:
65
        - get_first_comic_link
66
        - get_navi_link
67
        - get_comic_info
68
        - get_url_from_link
69
    """
70
71
    @classmethod
72
    def get_first_comic_link(cls):
73
        """Get link to first comics.
74
75
        Sometimes this can be retrieved of any comic page, sometimes on
76
        the archive page, sometimes it doesn't exist at all and one has
77
        to iterate backward to find it before hardcoding the result found.
78
        """
79
        raise NotImplementedError
80
81
    @classmethod
82
    def get_navi_link(cls, last_soup, next_):
83
        """Get link to next (or previous - for dev purposes) comic."""
84
        raise NotImplementedError
85
86
    @classmethod
87
    def get_comic_info(cls, soup, link):
88
        """Get information about a particular comics."""
89
        raise NotImplementedError
90
91
    @classmethod
92
    def get_url_from_link(cls, link):
93
        """Get url corresponding to a link. Default implementation is similar to get_href."""
94
        return link['href']
95
96
    @classmethod
97
    def get_next_link(cls, last_soup):
98
        """Get link to next comic."""
99
        return cls.get_navi_link(last_soup, True)
100
101
    @classmethod
102
    def get_prev_link(cls, last_soup):
103
        """Get link to previous comic."""
104
        return cls.get_navi_link(last_soup, False)
105
106
    @classmethod
107
    def get_next_comic(cls, last_comic):
108
        """Generic implementation of get_next_comic for navigable comics."""
109
        url = last_comic['url'] if last_comic else None
110
        next_comic = \
111
            cls.get_next_link(get_soup_at_url(url)) \
112
            if url else \
113
            cls.get_first_comic_link()
114
        cls.log("next/first comic will be %s (url is %s)" % (str(next_comic), url))
115
        while next_comic:
116
            prev_url, url = url, cls.get_url_from_link(next_comic)
117
            if prev_url == url:
118
                cls.log("got same url %s" % url)
119
                break
120
            cls.log("about to get %s (%s)" % (url, str(next_comic)))
121
            soup = get_soup_at_url(url)
122
            comic = cls.get_comic_info(soup, next_comic)
123
            if comic is not None:
124
                assert 'url' not in comic
125
                comic['url'] = url
126
                yield comic
127
            next_comic = cls.get_next_link(soup)
128
            cls.log("next comic will be %s" % str(next_comic))
129
130
    @classmethod
131
    def check_navigation(cls, url):
132
        """Check that navigation functions seem to be working - for dev purposes."""
133
        cls.log("about to check navigation from %s" % url)
134
        ok = True
135
        firstlink = cls.get_first_comic_link()
136
        if firstlink is None:
137
            print("From %s : no first link" % cls.url)
138
            ok = False
139
        if url is None:
140
            prevlink, nextlink = None, None
141
        else:
142
            soup = get_soup_at_url(url)
143
            prevlink, nextlink = cls.get_prev_link(soup), cls.get_next_link(soup)
144
        if prevlink is None and nextlink is None:
145
            print("From %s : no previous nor next" % url)
146
            ok = False
147
        else:
148
            if prevlink:
149
                prevurl = cls.get_url_from_link(prevlink)
150
                prevsoup = get_soup_at_url(prevurl)
151
                prevnext = cls.get_url_from_link(cls.get_next_link(prevsoup))
152
                if prevnext != url:
153
                    print("From %s, going backward then forward leads to %s" % (url, prevnext))
154
                    ok = False
155
            if nextlink:
156
                nexturl = cls.get_url_from_link(nextlink)
157
                if nexturl != url:
158
                    nextsoup = get_soup_at_url(nexturl)
159
                    nextprev = cls.get_url_from_link(cls.get_prev_link(nextsoup))
160
                    if nextprev != url:
161
                        print("From %s, going forward then backward leads to %s" % (url, nextprev))
162
                        ok = False
163
        cls.log("checked navigation from %s -> returned %d" % (url, ok))
164
        return ok
165
166
    # This method is not defined by default and is not part of this class'API.
167
    # It is only used:
168
    # - during development
169
    # - in subclasses implementing it correctly
170
    if False:
171
        @classmethod
172
        def get_first_comic_url(cls):
173
            """Get first comic url
174
175
            Sometimes, the first comic cannot be reached directly so to start
176
            from the first comic one has to go to the previous comic until
177
            there is no previous comics. Once this URL is reached, it
178
            is better to hardcode it but for development purposes, it
179
            is convenient to have an automatic way to find it.
180
            """
181
            url = input("Get starting URL: ")
182
            print(url)
183
            comic = cls.get_prev_link(get_soup_at_url(url))
184
            while comic:
185
                url = cls.get_url_from_link(comic)
186
                print(url)
187
                comic = cls.get_prev_link(get_soup_at_url(url))
188
            return url
189
190
191
class GenericListableComic(GenericComic):
192
    """Generic class for "listable" comics : with a list of comics (aka 'archive')
193
194
    The method `get_next_comic` methods is implemented in terms of new
195
    more specialized methods to be implemented/overridden:
196
        - get_archive_elements
197
        - get_url_from_archive_element
198
        - get_comic_info
199
    """
200
201
    @classmethod
202
    def get_archive_elements(cls):
203
        """Get the archive elements (iterable)."""
204
        raise NotImplementedError
205
206
    @classmethod
207
    def get_url_from_archive_element(cls, archive_elt):
208
        """Get url corresponding to an archive element."""
209
        raise NotImplementedError
210
211
    @classmethod
212
    def get_comic_info(cls, soup, archive_elt):
213
        """Get information about a particular comics."""
214
        raise NotImplementedError
215
216
    @classmethod
217
    def get_next_comic(cls, last_comic):
218
        """Generic implementation of get_next_comic for listable comics."""
219
        waiting_for_url = last_comic['url'] if last_comic else None
220
        for archive_elt in cls.get_archive_elements():
221
            url = cls.get_url_from_archive_element(archive_elt)
222
            cls.log("considering %s" % url)
223
            if waiting_for_url and waiting_for_url == url:
224
                waiting_for_url = None
225
            elif waiting_for_url is None:
226
                cls.log("about to get %s (%s)" % (url, str(archive_elt)))
227
                soup = get_soup_at_url(url)
228
                comic = cls.get_comic_info(soup, archive_elt)
229
                if comic is not None:
230
                    assert 'url' not in comic
231
                    comic['url'] = url
232
                    yield comic
233
        if waiting_for_url is not None:
234
            print("Did not find %s : there might be a problem" % waiting_for_url)
235
236
# Helper functions corresponding to get_first_comic_link/get_navi_link
237
238
239
@classmethod
240
def get_link_rel_next(cls, last_soup, next_):
241
    """Implementation of get_navi_link."""
242
    return last_soup.find('link', rel='next' if next_ else 'prev')
243
244
245
@classmethod
246
def get_a_rel_next(cls, last_soup, next_):
247
    """Implementation of get_navi_link."""
248
    return last_soup.find('a', rel='next' if next_ else 'prev')
249
250
251
@classmethod
252
def get_a_navi_navinext(cls, last_soup, next_):
253
    """Implementation of get_navi_link."""
254
    return last_soup.find('a', class_='navi navi-next' if next_ else 'navi navi-prev')
255
256
257
@classmethod
258
def get_a_navi_comicnavnext_navinext(cls, last_soup, next_):
259
    """Implementation of get_navi_link."""
260
    return last_soup.find('a', class_='navi comic-nav-next navi-next' if next_ else 'navi comic-nav-previous navi-prev')
261
262
263
@classmethod
264
def get_a_comicnavbase_comicnavnext(cls, last_soup, next_):
265
    """Implementation of get_navi_link."""
266
    return last_soup.find('a', class_='comic-nav-base comic-nav-next' if next_ else 'comic-nav-base comic-nav-previous')
267
268
269
@classmethod
270
def get_a_navi_navifirst(cls):
271
    """Implementation of get_first_comic_link."""
272
    return get_soup_at_url(cls.url).find('a', class_='navi navi-first')
273
274
275
@classmethod
276
def get_div_navfirst_a(cls):
277
    """Implementation of get_first_comic_link."""
278
    return get_soup_at_url(cls.url).find('div', class_="nav-first").find('a')
279
280
281
@classmethod
282
def get_a_comicnavbase_comicnavfirst(cls):
283
    """Implementation of get_first_comic_link."""
284
    return get_soup_at_url(cls.url).find('a', class_='comic-nav-base comic-nav-first')
285
286
287
class GenericEmptyComic(GenericComic):
288
    """Generic class for comics where nothing is to be done.
289
290
    It can be useful to deactivate temporarily comics that do not work
291
    properly by replacing `def MyComic(GenericWhateverComic)` with
292
    `def MyComic(GenericEmptyComic, GenericWhateverComic)`."""
293
294
    @classmethod
295
    def get_next_comic(cls, last_comic):
296
        return []
297
298
299
class ExtraFabulousComics(GenericNavigableComic):
300
    """Class to retrieve Extra Fabulous Comics."""
301
    name = 'efc'
302
    long_name = 'Extra Fabulous Comics'
303
    url = 'http://extrafabulouscomics.com'
304
    get_navi_link = get_link_rel_next
305
306
    @classmethod
307
    def get_first_comic_link(cls):
308
        """Get link to first comics."""
309
        return get_soup_at_url(cls.url).find('a', title='FIRST')
310
311
    @classmethod
312
    def get_comic_info(cls, soup, link):
313
        """Get information about a particular comics."""
314
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
315
        imgs = soup.find_all('img', src=img_src_re)
316
        title = soup.find('h2', class_='post-title').string
317
        return {
318
            'title': title,
319
            'img': [i['src'] for i in imgs],
320
            'prefix': title + '-'
321
        }
322
323
324 View Code Duplication
class GenericLeMondeBlog(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
325
    """Generic class to retrieve comics from Le Monde blogs."""
326
    get_navi_link = get_link_rel_next
327
328
    @classmethod
329
    def get_first_comic_url(cls):
330
        """Get first comic url."""
331
        raise NotImplementedError
332
333
    @classmethod
334
    def get_first_comic_link(cls):
335
        """Get link to first comics."""
336
        return {'href': cls.get_first_comic_url()}
337
338
    @classmethod
339
    def get_comic_info(cls, soup, link):
340
        """Get information about a particular comics."""
341
        url2 = soup.find('link', rel='shortlink')['href']
342
        title = soup.find('meta', property='og:title')['content']
343
        date_str = soup.find("span", class_="entry-date").string
344
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
345
        imgs = soup.find_all('meta', property='og:image')
346
        return {
347
            'title': title,
348
            'url2': url2,
349
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
350
            'month': day.month,
351
            'year': day.year,
352
            'day': day.day,
353
        }
354
355
356
class ZepWorld(GenericLeMondeBlog):
357
    """Class to retrieve Zep World comics."""
358
    name = "zep"
359
    long_name = "Zep World"
360
    url = "http://zepworld.blog.lemonde.fr"
361
362
    @classmethod
363
    def get_first_comic_url(cls):
364
        return "http://zepworld.blog.lemonde.fr/2014/10/31/bientot-le-blog-de-zep/"
365
366
367
class Vidberg(GenericLeMondeBlog):
368
    """Class to retrieve Vidberg comics."""
369
    name = 'vidberg'
370
    long_name = "Vidberg - l'actu en patates"
371
    url = "http://vidberg.blog.lemonde.fr"
372
373
    @classmethod
374
    def get_first_comic_url(cls):
375
        # Not the first but I didn't find an efficient way to retrieve it
376
        return "http://vidberg.blog.lemonde.fr/2012/02/09/revue-de-campagne-la-campagne-du-modem-semballe/"
377
378
379
class Plantu(GenericLeMondeBlog):
380
    """Class to retrieve Plantu comics."""
381
    name = 'plantu'
382
    long_name = "Plantu"
383
    url = "http://plantu.blog.lemonde.fr"
384
385
    @classmethod
386
    def get_first_comic_url(cls):
387
        return "http://plantu.blog.lemonde.fr/2014/10/28/stress-test-a-bruxelles/"
388
389
390
class XavierGorce(GenericLeMondeBlog):
391
    """Class to retrieve Xavier Gorce comics."""
392
    name = 'gorce'
393
    long_name = "Xavier Gorce"
394
    url = "http://xaviergorce.blog.lemonde.fr"
395
396
    @classmethod
397
    def get_first_comic_url(cls):
398
        return "http://xaviergorce.blog.lemonde.fr/2015/01/09/distinction/"
399
400
401
class CartooningForPeace(GenericLeMondeBlog):
402
    """Class to retrieve Cartooning For Peace comics."""
403
    name = 'forpeace'
404
    long_name = "Cartooning For Peace"
405
    url = "http://cartooningforpeace.blog.lemonde.fr"
406
407
    @classmethod
408
    def get_first_comic_url(cls):
409
        return "http://cartooningforpeace.blog.lemonde.fr/2014/12/15/bado/"
410
411
412
class Aurel(GenericLeMondeBlog):
413
    """Class to retrieve Aurel comics."""
414
    name = 'aurel'
415
    long_name = "Aurel"
416
    url = "http://aurel.blog.lemonde.fr"
417
418
    @classmethod
419
    def get_first_comic_url(cls):
420
        return "http://aurel.blog.lemonde.fr/2014/09/29/le-senat-repasse-a-droite/"
421
422
423
class LesCulottees(GenericLeMondeBlog):
424
    """Class to retrieve Les Culottees comics."""
425
    name = 'culottees'
426
    long_name = 'Les Culottees'
427
    url = "http://lesculottees.blog.lemonde.fr"
428
429
    @classmethod
430
    def get_first_comic_url(cls):
431
        return "http://lesculottees.blog.lemonde.fr/2016/01/11/clementine-delait-femme-a-barbe/"
432
433
434
class UneAnneeAuLycee(GenericLeMondeBlog):
435
    """Class to retrieve Une Annee Au Lycee comics."""
436
    name = 'lycee'
437
    long_name = 'Une Annee au Lycee'
438
    url = 'http://uneanneeaulycee.blog.lemonde.fr'
439
440
    @classmethod
441
    def get_first_comic_url(cls):
442
        return "http://uneanneeaulycee.blog.lemonde.fr/2016/06/13/la-semaine-du-bac-est-arrivee/"
443
444
445 View Code Duplication
class Rall(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
446
    """Class to retrieve Ted Rall comics."""
447
    # Also on http://www.gocomics.com/tedrall
448
    name = 'rall'
449
    long_name = "Ted Rall"
450
    url = "http://rall.com/comic"
451
    get_navi_link = get_link_rel_next
452
453
    @classmethod
454
    def get_first_comic_link(cls):
455
        """Get link to first comics."""
456
        # Not the first but I didn't find an efficient way to retrieve it
457
        return {'href': "http://rall.com/2014/01/30/los-angeles-times-cartoon-well-miss-those-california-flowers"}
458
459
    @classmethod
460
    def get_comic_info(cls, soup, link):
461
        """Get information about a particular comics."""
462
        title = soup.find('meta', property='og:title')['content']
463
        author = soup.find("span", class_="author vcard").find("a").string
464
        date_str = soup.find("span", class_="entry-date").string
465
        day = string_to_date(date_str, "%B %d, %Y")
466
        desc = soup.find('meta', property='og:description')['content']
467
        imgs = soup.find('div', class_='entry-content').find_all('img')
468
        imgs = imgs[:-7]  # remove social media buttons
469
        return {
470
            'title': title,
471
            'author': author,
472
            'month': day.month,
473
            'year': day.year,
474
            'day': day.day,
475
            'description': desc,
476
            'img': [i['src'] for i in imgs],
477
        }
478
479
480
class Dilem(GenericNavigableComic):
481
    """Class to retrieve Ali Dilem comics."""
482
    name = 'dilem'
483
    long_name = 'Ali Dilem'
484
    url = 'http://information.tv5monde.com/dilem'
485
    get_url_from_link = join_cls_url_to_href
486
487
    @classmethod
488
    def get_first_comic_link(cls):
489
        """Get link to first comics."""
490
        return {'href': "http://information.tv5monde.com/dilem/2004-06-26"}
491
492
    @classmethod
493
    def get_navi_link(cls, last_soup, next_):
494
        # prev is next / next is prev
495
        li = last_soup.find('li', class_='prev' if next_ else 'next')
496
        return li.find('a') if li else None
497
498
    @classmethod
499
    def get_comic_info(cls, soup, link):
500
        """Get information about a particular comics."""
501
        short_url = soup.find('link', rel='shortlink')['href']
502
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
503
        imgs = soup.find_all('meta', property='og:image')
504
        date_str = soup.find('span', property='dc:date')['content']
505
        date_str = date_str[:10]
506
        day = string_to_date(date_str, "%Y-%m-%d")
507
        return {
508
            'short_url': short_url,
509
            'title': title,
510
            'img': [i['content'] for i in imgs],
511
            'day': day.day,
512
            'month': day.month,
513
            'year': day.year,
514
        }
515
516
517
class SpaceAvalanche(GenericNavigableComic):
518
    """Class to retrieve Space Avalanche comics."""
519
    name = 'avalanche'
520
    long_name = 'Space Avalanche'
521
    url = 'http://www.spaceavalanche.com'
522
    get_navi_link = get_link_rel_next
523
524
    @classmethod
525
    def get_first_comic_link(cls):
526
        """Get link to first comics."""
527
        return {'href': "http://www.spaceavalanche.com/2009/02/02/irish-sea/", 'title': "Irish Sea"}
528
529
    @classmethod
530
    def get_comic_info(cls, soup, link):
531
        """Get information about a particular comics."""
532
        url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
533
        title = link['title']
534
        url = cls.get_url_from_link(link)
535
        year, month, day = [int(s)
536
                            for s in url_date_re.match(url).groups()]
537
        imgs = soup.find("div", class_="entry").find_all("img")
538
        return {
539
            'title': title,
540
            'day': day,
541
            'month': month,
542
            'year': year,
543
            'img': [i['src'] for i in imgs],
544
        }
545
546
547
class ZenPencils(GenericNavigableComic):
548
    """Class to retrieve ZenPencils comics."""
549
    # Also on http://zenpencils.tumblr.com
550
    # Also on http://www.gocomics.com/zen-pencils
551
    name = 'zenpencils'
552
    long_name = 'Zen Pencils'
553
    url = 'http://zenpencils.com'
554
    get_navi_link = get_link_rel_next
555
556
    @classmethod
557
    def get_first_comic_link(cls):
558
        """Get link to first comics."""
559
        return {'href': "http://zenpencils.com/comic/1-ralph-waldo-emerson-make-them-cry/"}
560
561
    @classmethod
562
    def get_comic_info(cls, soup, link):
563
        """Get information about a particular comics."""
564
        imgs = soup.find('div', id='comic').find_all('img')
565
        post = soup.find('div', class_='post-content')
566
        author = post.find("span", class_="post-author").find("a").string
567
        title = post.find('h2', class_='post-title').string
568
        date_str = post.find('span', class_='post-date').string
569
        day = string_to_date(date_str, "%B %d, %Y")
570
        assert imgs
571
        assert all(i['alt'] == i['title'] for i in imgs)
572
        assert all(i['alt'] in (title, "") for i in imgs)
573
        desc = soup.find('meta', property='og:description')['content']
574
        return {
575
            'title': title,
576
            'description': desc,
577
            'author': author,
578
            'day': day.day,
579
            'month': day.month,
580
            'year': day.year,
581
            'img': [i['src'] for i in imgs],
582
        }
583
584
585
class ItsTheTie(GenericNavigableComic):
586
    """Class to retrieve It's the tie comics."""
587
    # Also on http://itsthetie.tumblr.com
588
    # Also on https://tapastic.com/series/itsthetie
589
    name = 'tie'
590
    long_name = "It's the tie"
591
    url = "http://itsthetie.com"
592
    get_first_comic_link = get_div_navfirst_a
593
    get_navi_link = get_a_rel_next
594
595
    @classmethod
596
    def get_comic_info(cls, soup, link):
597
        """Get information about a particular comics."""
598
        title = soup.find('h1', class_='comic-title').find('a').string
599
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
600
        day = string_to_date(date_str, "%B %d, %Y")
601
        # Bonus images may or may not be in meta og:image.
602
        imgs = soup.find_all('meta', property='og:image')
603
        imgs_src = [i['content'] for i in imgs]
604
        bonus = soup.find_all('img', attrs={'data-oversrc': True})
605
        bonus_src = [b['data-oversrc'] for b in bonus]
606
        all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src]
607
        all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")]
608
        tag_meta = soup.find('meta', property='article:tag')
609
        tags = tag_meta['content'] if tag_meta else ""
610
        return {
611
            'title': title,
612
            'month': day.month,
613
            'year': day.year,
614
            'day': day.day,
615
            'img': all_imgs_src,
616
            'tags': tags,
617
        }
618
619
620
class PenelopeBagieu(GenericNavigableComic):
621
    """Class to retrieve comics from Penelope Bagieu's blog."""
622
    name = 'bagieu'
623
    long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
624
    url = 'http://www.penelope-jolicoeur.com'
625
    get_navi_link = get_link_rel_next
626
627
    @classmethod
628
    def get_first_comic_link(cls):
629
        """Get link to first comics."""
630
        return {'href': 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'}
631
632
    @classmethod
633
    def get_comic_info(cls, soup, link):
634
        """Get information about a particular comics."""
635
        date_str = soup.find('h2', class_='date-header').string
636
        day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
637
        imgs = soup.find('div', class_='entry-body').find_all('img')
638
        title = soup.find('h3', class_='entry-header').string
639
        return {
640
            'title': title,
641
            'img': [i['src'] for i in imgs],
642
            'month': day.month,
643
            'year': day.year,
644
            'day': day.day,
645
        }
646
647
648 View Code Duplication
class OneOneOneOneComic(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
649
    """Class to retrieve 1111 Comics."""
650
    # Also on http://comics1111.tumblr.com
651
    # Also on https://tapastic.com/series/1111-Comics
652
    name = '1111'
653
    long_name = '1111 Comics'
654
    url = 'http://www.1111comics.me'
655
    get_first_comic_link = get_div_navfirst_a
656
    get_navi_link = get_link_rel_next
657
658
    @classmethod
659
    def get_comic_info(cls, soup, link):
660
        """Get information about a particular comics."""
661
        title = soup.find('h1', class_='comic-title').find('a').string
662
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
663
        day = string_to_date(date_str, "%B %d, %Y")
664
        imgs = soup.find_all('meta', property='og:image')
665
        return {
666
            'title': title,
667
            'month': day.month,
668
            'year': day.year,
669
            'day': day.day,
670
            'img': [i['content'] for i in imgs],
671
        }
672
673
674 View Code Duplication
class AngryAtNothing(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
675
    """Class to retrieve Angry at Nothing comics."""
676
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
677
    name = 'angry'
678
    long_name = 'Angry At Nothing'
679
    url = 'http://www.angryatnothing.net'
680
    get_first_comic_link = get_div_navfirst_a
681
    get_navi_link = get_a_rel_next
682
683
    @classmethod
684
    def get_comic_info(cls, soup, link):
685
        """Get information about a particular comics."""
686
        title = soup.find('h1', class_='comic-title').find('a').string
687
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
688
        day = string_to_date(date_str, "%B %d, %Y")
689
        imgs = soup.find_all('meta', property='og:image')
690
        return {
691
            'title': title,
692
            'month': day.month,
693
            'year': day.year,
694
            'day': day.day,
695
            'img': [i['content'] for i in imgs],
696
        }
697
698
699
class NeDroid(GenericNavigableComic):
700
    """Class to retrieve NeDroid comics."""
701
    name = 'nedroid'
702
    long_name = 'NeDroid'
703
    url = 'http://nedroid.com'
704
    get_first_comic_link = get_div_navfirst_a
705
    get_navi_link = get_link_rel_next
706
    get_url_from_link = join_cls_url_to_href
707
708
    @classmethod
709
    def get_comic_info(cls, soup, link):
710
        """Get information about a particular comics."""
711
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
712
        comic_url_re = re.compile('//nedroid.com/comics/([0-9]*)-([0-9]*)-([0-9]*).*')
713
        short_url = cls.get_url_from_link(soup.find('link', rel='shortlink'))
714
        num = int(short_url_re.match(short_url).groups()[0])
715
        imgs = soup.find('div', id='comic').find_all('img')
716
        year, month, day = [int(s) for s in comic_url_re.match(imgs[0]['src']).groups()]
717
        assert len(imgs) == 1
718
        title = imgs[0]['alt']
719
        title2 = imgs[0]['title']
720
        return {
721
            'short_url': short_url,
722
            'title': title,
723
            'title2': title2,
724
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
725
            'day': day,
726
            'month': month,
727
            'year': year,
728
            'num': num,
729
        }
730
731
732
class Garfield(GenericNavigableComic):
733
    """Class to retrieve Garfield comics."""
734
    # Also on http://www.gocomics.com/garfield
735
    name = 'garfield'
736
    long_name = 'Garfield'
737
    url = 'https://garfield.com'
738
739
    @classmethod
740
    def get_first_comic_link(cls):
741
        """Get link to first comics."""
742
        return {'href': 'https://garfield.com/comic/1978/06/19'}
743
744
    @classmethod
745
    def get_navi_link(cls, last_soup, next_):
746
        return last_soup.find('a', class_='comic-arrow-right' if next_ else 'comic-arrow-left')
747
748
    @classmethod
749
    def get_comic_info(cls, soup, link):
750
        """Get information about a particular comics."""
751
        url = cls.get_url_from_link(link)
752
        date_re = re.compile('^%s/comic/([0-9]*)/([0-9]*)/([0-9]*)' % cls.url)
753
        year, month, day = [int(s) for s in date_re.match(url).groups()]
754
        imgs = soup.find('div', class_='comic-display').find_all('img', class_='img-responsive')
755
        return {
756
            'month': month,
757
            'year': year,
758
            'day': day,
759
            'img': [i['src'] for i in imgs],
760
        }
761
762
763 View Code Duplication
class Dilbert(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
764
    """Class to retrieve Dilbert comics."""
765
    # Also on http://www.gocomics.com/dilbert-classics
766
    name = 'dilbert'
767
    long_name = 'Dilbert'
768
    url = 'http://dilbert.com'
769
    get_url_from_link = join_cls_url_to_href
770
771
    @classmethod
772
    def get_first_comic_link(cls):
773
        """Get link to first comics."""
774
        return {'href': 'http://dilbert.com/strip/1989-04-16'}
775
776
    @classmethod
777
    def get_navi_link(cls, last_soup, next_):
778
        link = last_soup.find('div', class_='nav-comic nav-right' if next_ else 'nav-comic nav-left')
779
        return link.find('a') if link else None
780
781
    @classmethod
782
    def get_comic_info(cls, soup, link):
783
        """Get information about a particular comics."""
784
        title = soup.find('meta', property='og:title')['content']
785
        imgs = soup.find_all('meta', property='og:image')
786
        desc = soup.find('meta', property='og:description')['content']
787
        date_str = soup.find('meta', property='article:publish_date')['content']
788
        day = string_to_date(date_str, "%B %d, %Y")
789
        author = soup.find('meta', property='article:author')['content']
790
        tags = soup.find('meta', property='article:tag')['content']
791
        return {
792
            'title': title,
793
            'description': desc,
794
            'img': [i['content'] for i in imgs],
795
            'author': author,
796
            'tags': tags,
797
            'day': day.day,
798
            'month': day.month,
799
            'year': day.year
800
        }
801
802
803
class VictimsOfCircumsolar(GenericNavigableComic):
804
    """Class to retrieve VictimsOfCircumsolar comics."""
805
    name = 'circumsolar'
806
    long_name = 'Victims Of Circumsolar'
807
    url = 'http://www.victimsofcircumsolar.com'
808
    get_navi_link = get_a_navi_comicnavnext_navinext
809
810
    @classmethod
811
    def get_first_comic_link(cls):
812
        """Get link to first comics."""
813
        return {'href': 'http://www.victimsofcircumsolar.com/comic/modern-addiction'}
814
815
    @classmethod
816
    def get_comic_info(cls, soup, link):
817
        """Get information about a particular comics."""
818
        # Date is on the archive page
819
        title = soup.find_all('meta', property='og:title')[-1]['content']
820
        desc = soup.find_all('meta', property='og:description')[-1]['content']
821
        imgs = soup.find('div', id='comic').find_all('img')
822
        assert all(i['title'] == i['alt'] == title for i in imgs)
823
        return {
824
            'title': title,
825
            'description': desc,
826
            'img': [i['src'] for i in imgs],
827
        }
828
829
830
class ThreeWordPhrase(GenericNavigableComic):
831
    """Class to retrieve Three Word Phrase comics."""
832
    # Also on http://www.threewordphrase.tumblr.com
833
    name = 'threeword'
834
    long_name = 'Three Word Phrase'
835
    url = 'http://threewordphrase.com'
836
    get_url_from_link = join_cls_url_to_href
837
838
    @classmethod
839
    def get_first_comic_link(cls):
840
        """Get link to first comics."""
841
        return get_soup_at_url(cls.url).find('img', src='/firstlink.gif').parent
842
843
    @classmethod
844
    def get_navi_link(cls, last_soup, next_):
845
        link = last_soup.find('img', src='/nextlink.gif' if next_ else '/prevlink.gif').parent
846
        return None if link.get('href') is None else link
847
848
    @classmethod
849
    def get_comic_info(cls, soup, link):
850
        """Get information about a particular comics."""
851
        title = soup.find('title')
852
        imgs = [img for img in soup.find_all('img')
853
                if not img['src'].endswith(
854
                    ('link.gif', '32.png', 'twpbookad.jpg',
855
                     'merchad.jpg', 'header.gif', 'tipjar.jpg'))]
856
        return {
857
            'title': title.string if title else None,
858
            'title2': '  '.join(img.get('alt') for img in imgs if img.get('alt')),
859
            'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs],
860
        }
861
862
863
class DeadlyPanel(GenericEmptyComic, GenericNavigableComic):
864
    """Class to retrieve Deadly Panel comics."""
865
    # Also on https://tapastic.com/series/deadlypanel
866
    name = 'deadly'
867
    long_name = 'Deadly Panel'
868
    url = 'http://www.deadlypanel.com'
869
    get_first_comic_link = get_a_navi_navifirst
870
    get_navi_link = get_a_navi_comicnavnext_navinext
871
872
    @classmethod
873
    def get_comic_info(cls, soup, link):
874
        """Get information about a particular comics."""
875
        imgs = soup.find('div', id='comic').find_all('img')
876
        assert all(i['alt'] == i['title'] for i in imgs)
877
        return {
878
            'img': [i['src'] for i in imgs],
879
        }
880
881
882
class TheGentlemanArmchair(GenericNavigableComic):
883
    """Class to retrieve The Gentleman Armchair comics."""
884
    name = 'gentlemanarmchair'
885
    long_name = 'The Gentleman Armchair'
886
    url = 'http://thegentlemansarmchair.com'
887
    get_first_comic_link = get_a_navi_navifirst
888
    get_navi_link = get_link_rel_next
889
890
    @classmethod
891
    def get_comic_info(cls, soup, link):
892
        """Get information about a particular comics."""
893
        title = soup.find('h2', class_='post-title').string
894
        author = soup.find("span", class_="post-author").find("a").string
895
        date_str = soup.find('span', class_='post-date').string
896
        day = string_to_date(date_str, "%B %d, %Y")
897
        imgs = soup.find('div', id='comic').find_all('img')
898
        return {
899
            'img': [i['src'] for i in imgs],
900
            'title': title,
901
            'author': author,
902
            'month': day.month,
903
            'year': day.year,
904
            'day': day.day,
905
        }
906
907
908 View Code Duplication
class MyExtraLife(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
909
    """Class to retrieve My Extra Life comics."""
910
    name = 'extralife'
911
    long_name = 'My Extra Life'
912
    url = 'http://www.myextralife.com'
913
    get_navi_link = get_link_rel_next
914
915
    @classmethod
916
    def get_first_comic_link(cls):
917
        """Get link to first comics."""
918
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
919
920
    @classmethod
921
    def get_comic_info(cls, soup, link):
922
        """Get information about a particular comics."""
923
        title = soup.find("h1", class_="comic_title").string
924
        date_str = soup.find("span", class_="comic_date").string
925
        day = string_to_date(date_str, "%B %d, %Y")
926
        imgs = soup.find_all("img", class_="comic")
927
        assert all(i['alt'] == i['title'] == title for i in imgs)
928
        return {
929
            'title': title,
930
            'img': [i['src'] for i in imgs if i["src"]],
931
            'day': day.day,
932
            'month': day.month,
933
            'year': day.year
934
        }
935
936
937
class SaturdayMorningBreakfastCereal(GenericNavigableComic):
938
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
939
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
940
    # Also on http://smbc-comics.tumblr.com
941
    name = 'smbc'
942
    long_name = 'Saturday Morning Breakfast Cereal'
943
    url = 'http://www.smbc-comics.com'
944
    get_navi_link = get_a_rel_next
945
946
    @classmethod
947
    def get_first_comic_link(cls):
948
        """Get link to first comics."""
949
        return get_soup_at_url(cls.url).find('a', rel='start')
950
951
    @classmethod
952
    def get_comic_info(cls, soup, link):
953
        """Get information about a particular comics."""
954
        image1 = soup.find('img', id='cc-comic')
955
        image_url1 = image1['src']
956
        aftercomic = soup.find('div', id='aftercomic')
957
        image_url2 = aftercomic.find('img')['src'] if aftercomic else ''
958
        imgs = [image_url1] + ([image_url2] if image_url2 else [])
959
        date_str = soup.find('div', class_='cc-publishtime').contents[0]
960
        day = string_to_date(date_str, "%B %d, %Y")
961
        return {
962
            'title': image1['title'],
963
            'img': [urljoin_wrapper(cls.url, i) for i in imgs],
964
            'day': day.day,
965
            'month': day.month,
966
            'year': day.year
967
        }
968
969
970
class PerryBibleFellowship(GenericListableComic):
971
    """Class to retrieve Perry Bible Fellowship comics."""
972
    name = 'pbf'
973
    long_name = 'Perry Bible Fellowship'
974
    url = 'http://pbfcomics.com'
975
    get_url_from_archive_element = join_cls_url_to_href
976
977
    @classmethod
978
    def get_archive_elements(cls):
979
        comic_link_re = re.compile('^/[0-9]*/$')
980
        return reversed(get_soup_at_url(cls.url).find_all('a', href=comic_link_re))
981
982
    @classmethod
983
    def get_comic_info(cls, soup, link):
984
        """Get information about a particular comics."""
985
        url = cls.get_url_from_archive_element(link)
986
        comic_img_re = re.compile('^/archive_b/PBF.*')
987
        name = link.string
988
        num = int(link['name'])
989
        href = link['href']
990
        assert href == '/%d/' % num
991
        imgs = soup.find_all('img', src=comic_img_re)
992
        assert len(imgs) == 1
993
        assert imgs[0]['alt'] == name
994
        return {
995
            'num': num,
996
            'name': name,
997
            'img': [urljoin_wrapper(url, i['src']) for i in imgs],
998
            'prefix': '%d-' % num,
999
        }
1000
1001
1002
class Mercworks(GenericNavigableComic):
1003
    """Class to retrieve Mercworks comics."""
1004
    # Also on http://mercworks.tumblr.com
1005
    name = 'mercworks'
1006
    long_name = 'Mercworks'
1007
    url = 'http://mercworks.net'
1008
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1009
    get_navi_link = get_a_rel_next
1010
1011
    @classmethod
1012
    def get_comic_info(cls, soup, link):
1013
        """Get information about a particular comics."""
1014
        title = soup.find('meta', property='og:title')['content']
1015
        metadesc = soup.find('meta', property='og:description')
1016
        desc = metadesc['content'] if metadesc else ""
1017
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1018
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1019
        date_str = date_str[:10]
1020
        day = string_to_date(date_str, "%Y-%m-%d")
1021
        imgs = soup.find_all('meta', property='og:image')
1022
        return {
1023
            'img': [i['content'] for i in imgs],
1024
            'title': title,
1025
            'author': author,
1026
            'desc': desc,
1027
            'day': day.day,
1028
            'month': day.month,
1029
            'year': day.year
1030
        }
1031
1032
1033
class BerkeleyMews(GenericListableComic):
1034
    """Class to retrieve Berkeley Mews comics."""
1035
    # Also on http://mews.tumblr.com
1036
    # Also on http://www.gocomics.com/berkeley-mews
1037
    name = 'berkeley'
1038
    long_name = 'Berkeley Mews'
1039
    url = 'http://www.berkeleymews.com'
1040
    get_url_from_archive_element = get_href
1041
    comic_num_re = re.compile('%s/\\?p=([0-9]*)$' % url)
1042
1043
    @classmethod
1044
    def get_archive_elements(cls):
1045
        archive_url = urljoin_wrapper(cls.url, "?page_id=2")
1046
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_num_re))
1047
1048
    @classmethod
1049
    def get_comic_info(cls, soup, link):
1050
        """Get information about a particular comics."""
1051
        comic_date_re = re.compile('.*/([0-9]*)-([0-9]*)-([0-9]*)-.*')
1052
        url = cls.get_url_from_archive_element(link)
1053
        num = int(cls.comic_num_re.match(url).groups()[0])
1054
        img = soup.find('div', id='comic').find('img')
1055
        assert all(i['alt'] == i['title'] for i in [img])
1056
        title2 = img['title']
1057
        img_url = img['src']
1058
        year, month, day = [int(s) for s in comic_date_re.match(img_url).groups()]
1059
        return {
1060
            'num': num,
1061
            'title': link.string,
1062
            'title2': title2,
1063
            'img': [img_url],
1064
            'year': year,
1065
            'month': month,
1066
            'day': day,
1067
        }
1068
1069
1070
class GenericBouletCorp(GenericNavigableComic):
1071
    """Generic class to retrieve BouletCorp comics in different languages."""
1072
    # Also on http://bouletcorp.tumblr.com
1073
    get_navi_link = get_link_rel_next
1074
1075
    @classmethod
1076
    def get_first_comic_link(cls):
1077
        """Get link to first comics."""
1078
        return get_soup_at_url(cls.url).find('div', id='centered_nav').find_all('a')[0]
1079
1080
    @classmethod
1081
    def get_comic_info(cls, soup, link):
1082
        """Get information about a particular comics."""
1083
        url = cls.get_url_from_link(link)
1084
        date_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/' % cls.url)
1085
        year, month, day = [int(s) for s in date_re.match(url).groups()]
1086
        imgs = soup.find('div', id='notes').find('div', class_='storycontent').find_all('img')
1087
        texts = '  '.join(t for t in (i.get('title') for i in imgs) if t)
1088
        title = soup.find('title').string
1089
        return {
1090
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs if i.get('src') is not None],
1091
            'title': title,
1092
            'texts': texts,
1093
            'year': year,
1094
            'month': month,
1095
            'day': day,
1096
        }
1097
1098
1099
class BouletCorp(GenericBouletCorp):
1100
    """Class to retrieve BouletCorp comics."""
1101
    name = 'boulet'
1102
    long_name = 'Boulet Corp'
1103
    url = 'http://www.bouletcorp.com'
1104
1105
1106
class BouletCorpEn(GenericBouletCorp):
1107
    """Class to retrieve EnglishBouletCorp comics."""
1108
    name = 'boulet_en'
1109
    long_name = 'Boulet Corp English'
1110
    url = 'http://english.bouletcorp.com'
1111
1112
1113
class AmazingSuperPowers(GenericNavigableComic):
1114
    """Class to retrieve Amazing Super Powers comics."""
1115
    name = 'asp'
1116
    long_name = 'Amazing Super Powers'
1117
    url = 'http://www.amazingsuperpowers.com'
1118
    get_first_comic_link = get_a_navi_navifirst
1119
    get_navi_link = get_a_navi_navinext
1120
1121
    @classmethod
1122
    def get_comic_info(cls, soup, link):
1123
        """Get information about a particular comics."""
1124
        author = soup.find("span", class_="post-author").find("a").string
1125
        date_str = soup.find('span', class_='post-date').string
1126
        day = string_to_date(date_str, "%B %d, %Y")
1127
        imgs = soup.find('div', id='comic').find_all('img')
1128
        title = ' '.join(i['title'] for i in imgs)
1129
        assert all(i['alt'] == i['title'] for i in imgs)
1130
        return {
1131
            'title': title,
1132
            'author': author,
1133
            'img': [img['src'] for img in imgs],
1134
            'day': day.day,
1135
            'month': day.month,
1136
            'year': day.year
1137
        }
1138
1139
1140
class ToonHole(GenericListableComic):
1141
    """Class to retrieve Toon Holes comics."""
1142
    # Also on http://tapastic.com/series/TOONHOLE
1143
    name = 'toonhole'
1144
    long_name = 'Toon Hole'
1145
    url = 'http://www.toonhole.com'
1146
    get_url_from_archive_element = get_href
1147
1148
    @classmethod
1149
    def get_comic_info(cls, soup, link):
1150
        """Get information about a particular comics."""
1151
        title = link.string
1152
        date_str = remove_st_nd_rd_th_from_date(soup.find('div', class_='comicdate').string.strip())
1153
        day = string_to_date(date_str, "%B %d, %Y")
1154
        imgs = soup.find('div', id='comic').find_all('img')
1155
        assert all(i['alt'] == i['title'] == title for i in imgs)
1156
        return {
1157
            'title': title,
1158
            'month': day.month,
1159
            'year': day.year,
1160
            'day': day.day,
1161
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs],
1162
        }
1163
1164
    @classmethod
1165
    def get_archive_elements(cls):
1166
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1167
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1168
1169
1170
class Channelate(GenericNavigableComic):
1171
    """Class to retrieve Channelate comics."""
1172
    name = 'channelate'
1173
    long_name = 'Channelate'
1174
    url = 'http://www.channelate.com'
1175
    get_first_comic_link = get_div_navfirst_a
1176
    get_navi_link = get_link_rel_next
1177
1178
    @classmethod
1179
    def get_comic_info(cls, soup, link):
1180
        """Get information about a particular comics."""
1181
        author = soup.find("span", class_="post-author").find("a").string
1182
        date_str = soup.find('span', class_='post-date').string
1183
        day = string_to_date(date_str, '%Y/%m/%d')
1184
        title = soup.find('meta', property='og:title')['content']
1185
        post = soup.find('div', id='comic')
1186
        imgs = post.find_all('img') if post else []
1187
        assert all(i['alt'] == i['title'] for i in imgs)
1188
        extra_url = None
1189
        extra_div = soup.find('div', id='extrapanelbutton')
1190
        if extra_div:
1191
            extra_url = extra_div.find('a')['href']
1192
            extra_soup = get_soup_at_url(extra_url)
1193
            extra_imgs = extra_soup.find_all('img', class_='extrapanelimage')
1194
            imgs.extend(extra_imgs)
1195
        return {
1196
            'url_extra': extra_url,
1197
            'title': title,
1198
            'author': author,
1199
            'month': day.month,
1200
            'year': day.year,
1201
            'day': day.day,
1202
            'img': [i['src'] for i in imgs],
1203
        }
1204
1205
1206
class CyanideAndHappiness(GenericNavigableComic):
1207
    """Class to retrieve Cyanide And Happiness comics."""
1208
    name = 'cyanide'
1209
    long_name = 'Cyanide and Happiness'
1210
    url = 'http://explosm.net'
1211
    get_url_from_link = join_cls_url_to_href
1212
1213
    @classmethod
1214
    def get_first_comic_link(cls):
1215
        """Get link to first comics."""
1216
        return get_soup_at_url(cls.url).find('a', title='Oldest comic')
1217
1218
    @classmethod
1219
    def get_navi_link(cls, last_soup, next_):
1220
        link = last_soup.find('a', class_='next-comic' if next_ else 'previous-comic ')
1221
        return None if link.get('href') is None else link
1222
1223
    @classmethod
1224
    def get_comic_info(cls, soup, link):
1225
        """Get information about a particular comics."""
1226
        url2 = soup.find('meta', property='og:url')['content']
1227
        num = int(url2.split('/')[-2])
1228
        date_str = soup.find('h3').find('a').string
1229
        day = string_to_date(date_str, '%Y.%m.%d')
1230
        author = soup.find('small', class_="author-credit-name").string
1231
        assert author.startswith('by ')
1232
        author = author[3:]
1233
        imgs = soup.find_all('img', id='main-comic')
1234
        return {
1235
            'num': num,
1236
            'author': author,
1237
            'month': day.month,
1238
            'year': day.year,
1239
            'day': day.day,
1240
            'prefix': '%d-' % num,
1241
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i['src'])) for i in imgs]
1242
        }
1243
1244
1245
class MrLovenstein(GenericComic):
1246
    """Class to retrieve Mr Lovenstein comics."""
1247
    # Also on https://tapastic.com/series/MrLovenstein
1248
    name = 'mrlovenstein'
1249
    long_name = 'Mr. Lovenstein'
1250
    url = 'http://www.mrlovenstein.com'
1251
1252
    @classmethod
1253
    def get_next_comic(cls, last_comic):
1254
        # TODO: more info from http://www.mrlovenstein.com/archive
1255
        comic_num_re = re.compile('^/comic/([0-9]*)$')
1256
        nums = [int(comic_num_re.match(link['href']).groups()[0])
1257
                for link in get_soup_at_url(cls.url).find_all('a', href=comic_num_re)]
1258
        first, last = min(nums), max(nums)
1259
        if last_comic:
1260
            first = last_comic['num'] + 1
1261
        for num in range(first, last + 1):
1262
            url = urljoin_wrapper(cls.url, '/comic/%d' % num)
1263
            soup = get_soup_at_url(url)
1264
            imgs = list(
1265
                reversed(soup.find_all('img', src=re.compile('^/images/comics/'))))
1266
            description = soup.find('meta', attrs={'name': 'description'})['content']
1267
            yield {
1268
                'url': url,
1269
                'num': num,
1270
                'texts': '  '.join(t for t in (i.get('title') for i in imgs) if t),
1271
                'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1272
                'description': description,
1273
            }
1274
1275
1276
class DinosaurComics(GenericListableComic):
1277
    """Class to retrieve Dinosaur Comics comics."""
1278
    name = 'dinosaur'
1279
    long_name = 'Dinosaur Comics'
1280
    url = 'http://www.qwantz.com'
1281
    get_url_from_archive_element = get_href
1282
    comic_link_re = re.compile('^%s/index.php\\?comic=([0-9]*)$' % url)
1283
1284
    @classmethod
1285
    def get_archive_elements(cls):
1286
        archive_url = urljoin_wrapper(cls.url, 'archive.php')
1287
        # first link is random -> skip it
1288
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re)[1:])
1289
1290
    @classmethod
1291
    def get_comic_info(cls, soup, link):
1292
        """Get information about a particular comics."""
1293
        url = cls.get_url_from_archive_element(link)
1294
        num = int(cls.comic_link_re.match(url).groups()[0])
1295
        date_str = link.string
1296
        text = link.next_sibling.string
1297
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1298
        comic_img_re = re.compile('^%s/comics/' % cls.url)
1299
        img = soup.find('img', src=comic_img_re)
1300
        return {
1301
            'month': day.month,
1302
            'year': day.year,
1303
            'day': day.day,
1304
            'img': [img.get('src')],
1305
            'title': img.get('title'),
1306
            'text': text,
1307
            'num': num,
1308
        }
1309 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1310
1311
class ButterSafe(GenericListableComic):
1312
    """Class to retrieve Butter Safe comics."""
1313
    name = 'butter'
1314
    long_name = 'ButterSafe'
1315
    url = 'http://buttersafe.com'
1316
    get_url_from_archive_element = get_href
1317
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)
1318
1319
    @classmethod
1320
    def get_archive_elements(cls):
1321
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1322
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
1323
1324
    @classmethod
1325
    def get_comic_info(cls, soup, link):
1326
        """Get information about a particular comics."""
1327
        url = cls.get_url_from_archive_element(link)
1328
        title = link.string
1329
        year, month, day = [int(s) for s in cls.comic_link_re.match(url).groups()]
1330
        img = soup.find('div', id='comic').find('img')
1331
        assert img['alt'] == title
1332
        return {
1333
            'title': title,
1334
            'day': day,
1335
            'month': month,
1336
            'year': year,
1337
            'img': [img['src']],
1338
        }
1339
1340
1341
class CalvinAndHobbes(GenericComic):
1342
    """Class to retrieve Calvin and Hobbes comics."""
1343
    # Also on http://www.gocomics.com/calvinandhobbes/
1344
    name = 'calvin'
1345
    long_name = 'Calvin and Hobbes'
1346
    # This is not through any official webpage but eh...
1347
    url = 'http://marcel-oehler.marcellosendos.ch/comics/ch/'
1348
1349
    @classmethod
1350
    def get_next_comic(cls, last_comic):
1351
        last_date = get_date_for_comic(
1352
            last_comic) if last_comic else date(1985, 11, 1)
1353
        link_re = re.compile('^([0-9]*)/([0-9]*)/')
1354
        img_re = re.compile('')
1355
        for link in get_soup_at_url(cls.url).find_all('a', href=link_re):
1356
            url = link['href']
1357
            year, month = link_re.match(url).groups()
1358
            if date(int(year), int(month), 1) + timedelta(days=31) >= last_date:
1359
                img_re = re.compile('^%s%s([0-9]*)' % (year, month))
1360
                month_url = urljoin_wrapper(cls.url, url)
1361
                for img in get_soup_at_url(month_url).find_all('img', src=img_re):
1362
                    img_src = img['src']
1363
                    day = int(img_re.match(img_src).groups()[0])
1364
                    comic_date = date(int(year), int(month), day)
1365
                    if comic_date > last_date:
1366
                        yield {
1367
                            'url': month_url,
1368
                            'year': int(year),
1369
                            'month': int(month),
1370
                            'day': int(day),
1371
                            'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)],
1372
                        }
1373
                        last_date = comic_date
1374 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1375
1376
class AbstruseGoose(GenericListableComic):
1377
    """Class to retrieve AbstruseGoose Comics."""
1378
    name = 'abstruse'
1379
    long_name = 'Abstruse Goose'
1380
    url = 'http://abstrusegoose.com'
1381
    get_url_from_archive_element = get_href
1382
    comic_url_re = re.compile('^%s/([0-9]*)$' % url)
1383
    comic_img_re = re.compile('^%s/strips/.*' % url)
1384
1385
    @classmethod
1386
    def get_archive_elements(cls):
1387
        archive_url = urljoin_wrapper(cls.url, 'archive')
1388
        return get_soup_at_url(archive_url).find_all('a', href=cls.comic_url_re)
1389
1390
    @classmethod
1391
    def get_comic_info(cls, soup, archive_elt):
1392
        comic_url = cls.get_url_from_archive_element(archive_elt)
1393
        num = int(cls.comic_url_re.match(comic_url).groups()[0])
1394
        return {
1395
            'num': num,
1396
            'title': archive_elt.string,
1397
            'img': [soup.find('img', src=cls.comic_img_re)['src']]
1398
        }
1399
1400
1401
class PhDComics(GenericNavigableComic):
1402
    """Class to retrieve PHD Comics."""
1403
    name = 'phd'
1404
    long_name = 'PhD Comics'
1405
    url = 'http://phdcomics.com/comics/archive.php'
1406
    get_url_from_link = join_cls_url_to_href
1407
1408
    @classmethod
1409
    def get_first_comic_link(cls):
1410
        """Get link to first comics."""
1411
        return get_soup_at_url(cls.url).find('img', src='images/first_button.gif').parent
1412
1413
    @classmethod
1414
    def get_navi_link(cls, last_soup, next_):
1415
        img = last_soup.find('img', src='images/next_button.gif' if next_ else 'images/prev_button.gif')
1416
        return None if img is None else img.parent
1417
1418
    @classmethod
1419
    def get_comic_info(cls, soup, link):
1420
        """Get information about a particular comics."""
1421
        date_str = soup.find('font', face='Arial,Helvetica,Geneva,Swiss,SunSans-Regular', color='white').string.strip()
1422
        try:
1423
            day = string_to_date(date_str, '%m/%d/%Y')
1424
        except ValueError:
1425
            print("Invalid date %s" % date_str)
1426
            day = date.today()
1427
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
1428
        return {
1429
            'year': day.year,
1430
            'month': day.month,
1431
            'day': day.day,
1432
            'img': [soup.find('img', id='comic')['src']],
1433
            'title': title,
1434
        }
1435
1436
1437
class Octopuns(GenericNavigableComic):
1438
    """Class to retrieve Octopuns comics."""
1439
    # Also on http://octopuns.tumblr.com
1440
    name = 'octopuns'
1441
    long_name = 'Octopuns'
1442
    url = 'http://www.octopuns.net'
1443
1444
    @classmethod
1445
    def get_first_comic_link(cls):
1446
        """Get link to first comics."""
1447
        return get_soup_at_url(cls.url).find('img', src=re.compile('.*/First.png')).parent
1448
1449
    @classmethod
1450
    def get_navi_link(cls, last_soup, next_):
1451
        link = last_soup.find('img', src=re.compile('.*/Next.png' if next_ else '.*/Back.png')).parent
1452
        return None if link.get('href') is None else link
1453
1454
    @classmethod
1455
    def get_comic_info(cls, soup, link):
1456
        """Get information about a particular comics."""
1457
        title = soup.find('h3', class_='post-title entry-title').string
1458
        date_str = soup.find('h2', class_='date-header').string
1459
        day = string_to_date(date_str, "%A, %B %d, %Y")
1460
        imgs = soup.find_all('link', rel='image_src')
1461
        return {
1462
            'img': [i['href'] for i in imgs],
1463
            'title': title,
1464
            'day': day.day,
1465
            'month': day.month,
1466
            'year': day.year,
1467 View Code Duplication
        }
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1468
1469
1470
class Quarktees(GenericNavigableComic):
1471
    """Class to retrieve the Quarktees comics."""
1472
    name = 'quarktees'
1473
    long_name = 'Quarktees'
1474
    url = 'http://www.quarktees.com/blogs/news'
1475
    get_url_from_link = join_cls_url_to_href
1476
1477
    @classmethod
1478
    def get_first_comic_link(cls):
1479
        """Get link to first comics."""
1480
        return {'href': 'http://www.quarktees.com/blogs/news/12486621-coming-soon'}
1481
1482
    @classmethod
1483
    def get_navi_link(cls, last_soup, next_):
1484
        return last_soup.find('a', id='article-next' if next_ else 'article-prev')
1485
1486
    @classmethod
1487
    def get_comic_info(cls, soup, link):
1488
        """Get information about a particular comics."""
1489
        title = soup.find('meta', property='og:title')['content']
1490
        article = soup.find('div', class_='single-article')
1491
        imgs = article.find_all('img')
1492
        return {
1493
            'title': title,
1494
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1495
        }
1496
1497
1498
class OverCompensating(GenericNavigableComic):
1499
    """Class to retrieve the Over Compensating comics."""
1500
    name = 'compensating'
1501
    long_name = 'Over Compensating'
1502
    url = 'http://www.overcompensating.com'
1503
    get_url_from_link = join_cls_url_to_href
1504
1505
    @classmethod
1506
    def get_first_comic_link(cls):
1507
        """Get link to first comics."""
1508
        return get_soup_at_url(cls.url).find('a', href=re.compile('comic=1$'))
1509
1510
    @classmethod
1511
    def get_navi_link(cls, last_soup, next_):
1512
        return last_soup.find('a', title='next comic' if next_ else 'go back already')
1513
1514
    @classmethod
1515
    def get_comic_info(cls, soup, link):
1516
        """Get information about a particular comics."""
1517
        img_src_re = re.compile('^/oc/comics/.*')
1518
        comic_num_re = re.compile('.*comic=([0-9]*)$')
1519
        comic_url = cls.get_url_from_link(link)
1520
        num = int(comic_num_re.match(comic_url).groups()[0])
1521
        img = soup.find('img', src=img_src_re)
1522
        return {
1523
            'num': num,
1524
            'img': [urljoin_wrapper(comic_url, img['src'])],
1525
            'title': img.get('title')
1526
        }
1527
1528
1529
class Oglaf(GenericNavigableComic):
1530
    """Class to retrieve Oglaf comics."""
1531
    name = 'oglaf'
1532
    long_name = 'Oglaf [NSFW]'
1533
    url = 'http://oglaf.com'
1534
    get_url_from_link = join_cls_url_to_href
1535
1536
    @classmethod
1537
    def get_first_comic_link(cls):
1538
        """Get link to first comics."""
1539
        return get_soup_at_url(cls.url).find("div", id="st").parent
1540
1541
    @classmethod
1542
    def get_navi_link(cls, last_soup, next_):
1543
        div = last_soup.find("div", id="nx" if next_ else "pvs")
1544
        return div.parent if div else None
1545
1546
    @classmethod
1547
    def get_comic_info(cls, soup, link):
1548
        """Get information about a particular comics."""
1549
        title = soup.find('title').string
1550
        title_imgs = soup.find('div', id='tt').find_all('img')
1551
        assert len(title_imgs) == 1
1552
        strip_imgs = soup.find_all('img', id='strip')
1553
        assert len(strip_imgs) == 1
1554
        imgs = title_imgs + strip_imgs
1555
        desc = ' '.join(i['title'] for i in imgs)
1556
        return {
1557
            'title': title,
1558
            'img': [i['src'] for i in imgs],
1559
            'description': desc,
1560 View Code Duplication
        }
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1561
1562
1563
class ScandinaviaAndTheWorld(GenericNavigableComic):
1564
    """Class to retrieve Scandinavia And The World comics."""
1565
    name = 'satw'
1566
    long_name = 'Scandinavia And The World'
1567
    url = 'http://satwcomic.com'
1568
1569
    @classmethod
1570
    def get_first_comic_link(cls):
1571
        """Get link to first comics."""
1572
        return {'href': 'http://satwcomic.com/sweden-denmark-and-norway'}
1573
1574
    @classmethod
1575
    def get_navi_link(cls, last_soup, next_):
1576
        return last_soup.find('a', accesskey='n' if next_ else 'p')
1577
1578
    @classmethod
1579
    def get_comic_info(cls, soup, link):
1580
        """Get information about a particular comics."""
1581
        title = soup.find('meta', attrs={'name': 'twitter:label1'})['content']
1582
        desc = soup.find('meta', property='og:description')['content']
1583
        imgs = soup.find_all('img', itemprop="image")
1584
        return {
1585
            'title': title,
1586
            'description': desc,
1587
            'img': [i['src'] for i in imgs],
1588
        }
1589
1590
1591
class SomethingOfThatIlk(GenericEmptyComic):  # Does not exist anymore
1592
    """Class to retrieve the Something Of That Ilk comics."""
1593
    name = 'somethingofthatilk'
1594
    long_name = 'Something Of That Ilk'
1595
    url = 'http://www.somethingofthatilk.com'
1596
1597
1598
class InfiniteMonkeyBusiness(GenericNavigableComic):
1599
    """Generic class to retrieve InfiniteMonkeyBusiness comics."""
1600
    name = 'monkey'
1601
    long_name = 'Infinite Monkey Business'
1602
    url = 'http://infinitemonkeybusiness.net'
1603
    get_navi_link = get_a_navi_comicnavnext_navinext
1604
1605
    @classmethod
1606
    def get_first_comic_link(cls):
1607
        """Get link to first comics."""
1608
        return {'href': 'http://infinitemonkeybusiness.net/comic/pillory/'}
1609
1610
    @classmethod
1611
    def get_comic_info(cls, soup, link):
1612
        """Get information about a particular comics."""
1613
        title = soup.find('meta', property='og:title')['content']
1614
        imgs = soup.find('div', id='comic').find_all('img')
1615
        return {
1616
            'title': title,
1617
            'img': [i['src'] for i in imgs],
1618
        }
1619
1620
1621
class Wondermark(GenericListableComic):
1622
    """Class to retrieve the Wondermark comics."""
1623
    name = 'wondermark'
1624
    long_name = 'Wondermark'
1625
    url = 'http://wondermark.com'
1626
    get_url_from_archive_element = get_href
1627
1628
    @classmethod
1629
    def get_archive_elements(cls):
1630
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1631
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1632
1633
    @classmethod
1634
    def get_comic_info(cls, soup, link):
1635
        """Get information about a particular comics."""
1636
        date_str = soup.find('div', class_='postdate').find('em').string
1637
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1638
        div = soup.find('div', id='comic')
1639
        if div:
1640
            img = div.find('img')
1641
            img_src = [img['src']]
1642
            alt = img['alt']
1643
            assert alt == img['title']
1644
            title = soup.find('meta', property='og:title')['content']
1645
        else:
1646
            img_src = []
1647
            alt = ''
1648
            title = ''
1649
        return {
1650
            'month': day.month,
1651
            'year': day.year,
1652
            'day': day.day,
1653
            'img': img_src,
1654
            'title': title,
1655
            'alt': alt,
1656
            'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')),
1657
        }
1658
1659
1660 View Code Duplication
class WarehouseComic(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1661
    """Class to retrieve Warehouse Comic comics."""
1662
    name = 'warehouse'
1663
    long_name = 'Warehouse Comic'
1664
    url = 'http://warehousecomic.com'
1665
    get_first_comic_link = get_a_navi_navifirst
1666
    get_navi_link = get_link_rel_next
1667
1668
    @classmethod
1669
    def get_comic_info(cls, soup, link):
1670
        """Get information about a particular comics."""
1671
        title = soup.find('h2', class_='post-title').string
1672
        date_str = soup.find('span', class_='post-date').string
1673
        day = string_to_date(date_str, "%B %d, %Y")
1674
        imgs = soup.find('div', id='comic').find_all('img')
1675
        return {
1676
            'img': [i['src'] for i in imgs],
1677
            'title': title,
1678
            'day': day.day,
1679
            'month': day.month,
1680
            'year': day.year,
1681
        }
1682
1683
1684
class JustSayEh(GenericNavigableComic):
1685
    """Class to retrieve Just Say Eh comics."""
1686
    # Also on http//tapastic.com/series/Just-Say-Eh
1687
    name = 'justsayeh'
1688
    long_name = 'Just Say Eh'
1689
    url = 'http://www.justsayeh.com'
1690
    get_first_comic_link = get_a_navi_navifirst
1691
    get_navi_link = get_a_navi_comicnavnext_navinext
1692
1693
    @classmethod
1694
    def get_comic_info(cls, soup, link):
1695
        """Get information about a particular comics."""
1696
        title = soup.find('h2', class_='post-title').string
1697
        imgs = soup.find("div", id="comic").find_all("img")
1698
        assert all(i['alt'] == i['title'] for i in imgs)
1699
        alt = imgs[0]['alt']
1700
        return {
1701
            'img': [i['src'] for i in imgs],
1702
            'title': title,
1703
            'alt': alt,
1704
        }
1705 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1706
1707
class MouseBearComedy(GenericNavigableComic):
1708
    """Class to retrieve Mouse Bear Comedy comics."""
1709
    # Also on http://mousebearcomedy.tumblr.com
1710
    name = 'mousebear'
1711
    long_name = 'Mouse Bear Comedy'
1712
    url = 'http://www.mousebearcomedy.com'
1713
    get_first_comic_link = get_a_navi_navifirst
1714
    get_navi_link = get_a_navi_comicnavnext_navinext
1715
1716
    @classmethod
1717
    def get_comic_info(cls, soup, link):
1718
        """Get information about a particular comics."""
1719
        title = soup.find('h2', class_='post-title').string
1720
        author = soup.find("span", class_="post-author").find("a").string
1721
        date_str = soup.find("span", class_="post-date").string
1722
        day = string_to_date(date_str, '%B %d, %Y')
1723
        imgs = soup.find("div", id="comic").find_all("img")
1724
        assert all(i['alt'] == i['title'] == title for i in imgs)
1725
        return {
1726
            'day': day.day,
1727
            'month': day.month,
1728
            'year': day.year,
1729
            'img': [i['src'] for i in imgs],
1730
            'title': title,
1731
            'author': author,
1732
        }
1733
1734
1735
class BigFootJustice(GenericNavigableComic):
1736
    """Class to retrieve Big Foot Justice comics."""
1737
    # Also on http://tapastic.com/series/bigfoot-justice
1738
    name = 'bigfoot'
1739
    long_name = 'Big Foot Justice'
1740
    url = 'http://bigfootjustice.com'
1741
    get_first_comic_link = get_a_navi_navifirst
1742
    get_navi_link = get_a_navi_comicnavnext_navinext
1743
1744
    @classmethod
1745
    def get_comic_info(cls, soup, link):
1746
        """Get information about a particular comics."""
1747
        imgs = soup.find('div', id='comic').find_all('img')
1748
        assert all(i['title'] == i['alt'] for i in imgs)
1749
        title = ' '.join(i['title'] for i in imgs)
1750
        return {
1751
            'img': [i['src'] for i in imgs],
1752
            'title': title,
1753
        }
1754
1755
1756
class RespawnComic(GenericNavigableComic):
1757
    """Class to retrieve Respawn Comic."""
1758
    # Also on http://respawncomic.tumblr.com
1759
    name = 'respawn'
1760
    long_name = 'Respawn Comic'
1761
    url = 'http://respawncomic.com '
1762
    get_navi_link = get_a_navi_comicnavnext_navinext
1763
1764
    @classmethod
1765
    def get_first_comic_link(cls):
1766
        """Get link to first comics."""
1767
        return {'href': 'http://respawncomic.com/comic/c0001/'}
1768
1769
    @classmethod
1770
    def get_comic_info(cls, soup, link):
1771
        """Get information about a particular comics."""
1772
        title = soup.find('meta', property='og:title')['content']
1773
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1774
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1775
        date_str = date_str[:10]
1776
        day = string_to_date(date_str, "%Y-%m-%d")
1777
        imgs = soup.find_all('meta', property='og:image')
1778
        skip_imgs = {
1779
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1780
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1781
        }
1782
        return {
1783
            'title': title,
1784
            'author': author,
1785
            'day': day.day,
1786
            'month': day.month,
1787
            'year': day.year,
1788
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1789
        }
1790 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1791
1792
class SafelyEndangered(GenericNavigableComic):
1793
    """Class to retrieve Safely Endangered comics."""
1794
    # Also on http://tumblr.safelyendangered.com
1795
    name = 'endangered'
1796
    long_name = 'Safely Endangered'
1797
    url = 'http://www.safelyendangered.com'
1798
    get_navi_link = get_link_rel_next
1799
1800
    @classmethod
1801
    def get_first_comic_link(cls):
1802
        """Get link to first comics."""
1803
        return {'href': 'http://www.safelyendangered.com/comic/ignored/'}
1804
1805
    @classmethod
1806
    def get_comic_info(cls, soup, link):
1807
        """Get information about a particular comics."""
1808
        title = soup.find('h2', class_='post-title').string
1809
        date_str = soup.find('span', class_='post-date').string
1810
        day = string_to_date(date_str, '%B %d, %Y')
1811
        imgs = soup.find('div', id='comic').find_all('img')
1812
        alt = imgs[0]['alt']
1813
        assert all(i['alt'] == i['title'] for i in imgs)
1814
        return {
1815
            'day': day.day,
1816
            'month': day.month,
1817
            'year': day.year,
1818
            'img': [i['src'] for i in imgs],
1819
            'title': title,
1820
            'alt': alt,
1821
        }
1822 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1823
1824
class PicturesInBoxes(GenericNavigableComic):
1825
    """Class to retrieve Pictures In Boxes comics."""
1826
    # Also on http://picturesinboxescomic.tumblr.com
1827
    name = 'picturesinboxes'
1828
    long_name = 'Pictures in Boxes'
1829
    url = 'http://www.picturesinboxes.com'
1830
    get_navi_link = get_a_navi_navinext
1831
1832
    @classmethod
1833
    def get_first_comic_link(cls):
1834
        """Get link to first comics."""
1835
        return {'href': 'http://www.picturesinboxes.com/2013/10/26/tetris/'}
1836
1837
    @classmethod
1838
    def get_comic_info(cls, soup, link):
1839
        """Get information about a particular comics."""
1840
        title = soup.find('h2', class_='post-title').string
1841
        author = soup.find("span", class_="post-author").find("a").string
1842
        date_str = soup.find('span', class_='post-date').string
1843
        day = string_to_date(date_str, '%B %d, %Y')
1844
        imgs = soup.find('div', class_='comicpane').find_all('img')
1845
        assert imgs
1846
        assert all(i['title'] == i['alt'] == title for i in imgs)
1847
        return {
1848
            'day': day.day,
1849
            'month': day.month,
1850
            'year': day.year,
1851
            'img': [i['src'] for i in imgs],
1852
            'title': title,
1853
            'author': author,
1854
        }
1855
1856
1857
class Penmen(GenericEmptyComic):
1858
    """Class to retrieve Penmen comics."""
1859
    name = 'penmen'
1860
    long_name = 'Penmen'
1861
    url = 'http://penmen.com'
1862
1863
1864
class TheDoghouseDiaries(GenericNavigableComic):
1865
    """Class to retrieve The Dog House Diaries comics."""
1866
    name = 'doghouse'
1867
    long_name = 'The Dog House Diaries'
1868
    url = 'http://thedoghousediaries.com'
1869
1870
    @classmethod
1871
    def get_first_comic_link(cls):
1872
        """Get link to first comics."""
1873
        return get_soup_at_url(cls.url).find('a', id='firstlink')
1874
1875
    @classmethod
1876
    def get_navi_link(cls, last_soup, next_):
1877
        return last_soup.find('a', id='nextlink' if next_ else 'previouslink')
1878
1879
    @classmethod
1880
    def get_comic_info(cls, soup, link):
1881
        """Get information about a particular comics."""
1882
        comic_img_re = re.compile('^dhdcomics/.*')
1883
        img = soup.find('img', src=comic_img_re)
1884
        comic_url = cls.get_url_from_link(link)
1885
        return {
1886
            'title': soup.find('h2', id='titleheader').string,
1887
            'title2': soup.find('div', id='subtext').string,
1888
            'alt': img.get('title'),
1889
            'img': [urljoin_wrapper(comic_url, img['src'].strip())],
1890
            'num': int(comic_url.split('/')[-1]),
1891
        }
1892
1893
1894
class InvisibleBread(GenericListableComic):
1895
    """Class to retrieve Invisible Bread comics."""
1896
    # Also on http://www.gocomics.com/invisible-bread
1897
    name = 'invisiblebread'
1898
    long_name = 'Invisible Bread'
1899
    url = 'http://invisiblebread.com'
1900
1901
    @classmethod
1902
    def get_archive_elements(cls):
1903
        archive_url = urljoin_wrapper(cls.url, 'archives/')
1904
        return reversed(get_soup_at_url(archive_url).find_all('td', class_='archive-title'))
1905
1906
    @classmethod
1907
    def get_url_from_archive_element(cls, td):
1908
        return td.find('a')['href']
1909
1910
    @classmethod
1911
    def get_comic_info(cls, soup, td):
1912
        """Get information about a particular comics."""
1913
        url = cls.get_url_from_archive_element(td)
1914
        title = td.find('a').string
1915
        month_and_day = td.previous_sibling.string
1916
        link_re = re.compile('^%s/([0-9]+)/' % cls.url)
1917
        year = link_re.match(url).groups()[0]
1918
        date_str = month_and_day + ' ' + year
1919
        day = string_to_date(date_str, '%b %d %Y')
1920
        imgs = [soup.find('div', id='comic').find('img')]
1921
        assert len(imgs) == 1
1922
        assert all(i['title'] == i['alt'] == title for i in imgs)
1923
        return {
1924
            'month': day.month,
1925
            'year': day.year,
1926
            'day': day.day,
1927
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1928
            'title': title,
1929
        }
1930
1931
1932
class DiscoBleach(GenericEmptyComic):  # Does not work anymore
1933
    """Class to retrieve Disco Bleach Comics."""
1934
    name = 'discobleach'
1935
    long_name = 'Disco Bleach'
1936
    url = 'http://discobleach.com'
1937
1938
1939
class TubeyToons(GenericEmptyComic):  # Does not work anymore
1940
    """Class to retrieve TubeyToons comics."""
1941
    # Also on http://tapastic.com/series/Tubey-Toons
1942
    # Also on http://tubeytoons.tumblr.com
1943
    name = 'tubeytoons'
1944
    long_name = 'Tubey Toons'
1945
    url = 'http://tubeytoons.com'
1946
1947
1948
class CompletelySeriousComics(GenericNavigableComic):
1949
    """Class to retrieve Completely Serious comics."""
1950
    name = 'completelyserious'
1951
    long_name = 'Completely Serious Comics'
1952
    url = 'http://completelyseriouscomics.com'
1953
    get_first_comic_link = get_a_navi_navifirst
1954
    get_navi_link = get_a_navi_navinext
1955
1956
    @classmethod
1957
    def get_comic_info(cls, soup, link):
1958
        """Get information about a particular comics."""
1959
        title = soup.find('h2', class_='post-title').string
1960
        author = soup.find('span', class_='post-author').contents[1].string
1961
        date_str = soup.find('span', class_='post-date').string
1962
        day = string_to_date(date_str, '%B %d, %Y')
1963
        imgs = soup.find('div', class_='comicpane').find_all('img')
1964
        assert imgs
1965
        alt = imgs[0]['title']
1966
        assert all(i['title'] == i['alt'] == alt for i in imgs)
1967
        return {
1968
            'month': day.month,
1969
            'year': day.year,
1970
            'day': day.day,
1971
            'img': [i['src'] for i in imgs],
1972
            'title': title,
1973
            'alt': alt,
1974
            'author': author,
1975
        }
1976
1977
1978
class PoorlyDrawnLines(GenericListableComic):
1979
    """Class to retrieve Poorly Drawn Lines comics."""
1980
    # Also on http://pdlcomics.tumblr.com
1981
    name = 'poorlydrawn'
1982
    long_name = 'Poorly Drawn Lines'
1983
    url = 'http://poorlydrawnlines.com'
1984
    get_url_from_archive_element = get_href
1985
1986
    @classmethod
1987
    def get_comic_info(cls, soup, link):
1988
        """Get information about a particular comics."""
1989
        imgs = soup.find('div', class_='post').find_all('img')
1990
        assert len(imgs) <= 1
1991
        return {
1992
            'img': [i['src'] for i in imgs],
1993
            'title': imgs[0].get('title', "") if imgs else "",
1994
        }
1995
1996
    @classmethod
1997
    def get_archive_elements(cls):
1998
        archive_url = urljoin_wrapper(cls.url, 'archive')
1999
        url_re = re.compile('^%s/comic/.' % cls.url)
2000
        return reversed(get_soup_at_url(archive_url).find_all('a', href=url_re))
2001
2002
2003
class LoadingComics(GenericNavigableComic):
2004
    """Class to retrieve Loading Artist comics."""
2005
    name = 'loadingartist'
2006
    long_name = 'Loading Artist'
2007
    url = 'http://www.loadingartist.com/latest'
2008
2009
    @classmethod
2010
    def get_first_comic_link(cls):
2011
        """Get link to first comics."""
2012
        return get_soup_at_url(cls.url).find('a', title="First")
2013
2014
    @classmethod
2015
    def get_navi_link(cls, last_soup, next_):
2016
        return last_soup.find('a', title='Next' if next_ else 'Previous')
2017
2018
    @classmethod
2019
    def get_comic_info(cls, soup, link):
2020
        """Get information about a particular comics."""
2021
        title = soup.find('h1').string
2022
        date_str = soup.find('span', class_='date').string.strip()
2023
        day = string_to_date(date_str, "%B %d, %Y")
2024
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
2025
        return {
2026
            'title': title,
2027
            'img': [i['src'] for i in imgs],
2028
            'month': day.month,
2029
            'year': day.year,
2030
            'day': day.day,
2031
        }
2032
2033
2034
class ChuckleADuck(GenericNavigableComic):
2035
    """Class to retrieve Chuckle-A-Duck comics."""
2036
    name = 'chuckleaduck'
2037
    long_name = 'Chuckle-A-duck'
2038
    url = 'http://chuckleaduck.com'
2039
    get_first_comic_link = get_div_navfirst_a
2040
    get_navi_link = get_link_rel_next
2041
2042
    @classmethod
2043
    def get_comic_info(cls, soup, link):
2044
        """Get information about a particular comics."""
2045
        date_str = soup.find('span', class_='post-date').string
2046
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
2047
        author = soup.find('span', class_='post-author').string
2048
        div = soup.find('div', id='comic')
2049
        imgs = div.find_all('img') if div else []
2050
        title = imgs[0]['title'] if imgs else ""
2051
        assert all(i['title'] == i['alt'] == title for i in imgs)
2052
        return {
2053
            'month': day.month,
2054
            'year': day.year,
2055
            'day': day.day,
2056
            'img': [i['src'] for i in imgs],
2057
            'title': title,
2058
            'author': author,
2059 View Code Duplication
        }
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2060
2061
2062
class DepressedAlien(GenericNavigableComic):
2063
    """Class to retrieve Depressed Alien Comics."""
2064
    name = 'depressedalien'
2065
    long_name = 'Depressed Alien'
2066
    url = 'http://depressedalien.com'
2067
    get_url_from_link = join_cls_url_to_href
2068
2069
    @classmethod
2070
    def get_first_comic_link(cls):
2071
        """Get link to first comics."""
2072
        return get_soup_at_url(cls.url).find('img', attrs={'name': 'beginArrow'}).parent
2073
2074
    @classmethod
2075
    def get_navi_link(cls, last_soup, next_):
2076
        return last_soup.find('img', attrs={'name': 'rightArrow' if next_ else 'leftArrow'}).parent
2077
2078
    @classmethod
2079
    def get_comic_info(cls, soup, link):
2080
        """Get information about a particular comics."""
2081
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2082
        imgs = soup.find_all('meta', property='og:image')
2083
        return {
2084
            'title': title,
2085
            'img': [i['content'] for i in imgs],
2086
        }
2087
2088
2089
class ThingsInSquares(GenericListableComic):
2090
    """Class to retrieve Things In Squares comics."""
2091
    # This can be retrieved in other languages
2092
    # Also on https://tapastic.com/series/Things-in-Squares
2093
    name = 'squares'
2094
    long_name = 'Things in squares'
2095
    url = 'http://www.thingsinsquares.com'
2096
2097
    @classmethod
2098
    def get_comic_info(cls, soup, tr):
2099
        """Get information about a particular comics."""
2100
        _, td2, td3 = tr.find_all('td')
2101
        a = td2.find('a')
2102
        date_str = td3.string
2103
        day = string_to_date(date_str, "%m.%d.%y")
2104
        title = a.string
2105
        title2 = soup.find('meta', property='og:title')['content']
2106
        desc = soup.find('meta', property='og:description')
2107
        description = desc['content'] if desc else ''
2108
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2109
        imgs = soup.find('div', class_='entry-content').find_all('img')
2110
        return {
2111
            'day': day.day,
2112
            'month': day.month,
2113
            'year': day.year,
2114
            'title': title,
2115
            'title2': title2,
2116
            'description': description,
2117
            'tags': tags,
2118
            'img': [i['src'] for i in imgs],
2119
            'alt': ' '.join(i['alt'] for i in imgs),
2120
        }
2121
2122
    @classmethod
2123
    def get_url_from_archive_element(cls, tr):
2124
        _, td2, td3 = tr.find_all('td')
2125
        return td2.find('a')['href']
2126
2127
    @classmethod
2128
    def get_archive_elements(cls):
2129
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
2130
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2131
2132
2133
class HappleTea(GenericNavigableComic):
2134
    """Class to retrieve Happle Tea Comics."""
2135
    name = 'happletea'
2136
    long_name = 'Happle Tea'
2137
    url = 'http://www.happletea.com'
2138
    get_first_comic_link = get_a_navi_navifirst
2139
    get_navi_link = get_link_rel_next
2140
2141
    @classmethod
2142
    def get_comic_info(cls, soup, link):
2143
        """Get information about a particular comics."""
2144
        imgs = soup.find('div', id='comic').find_all('img')
2145
        post = soup.find('div', class_='post-content')
2146
        title = post.find('h2', class_='post-title').string
2147
        author = post.find('a', rel='author').string
2148
        date_str = post.find('span', class_='post-date').string
2149
        day = string_to_date(date_str, "%B %d, %Y")
2150
        assert all(i['alt'] == i['title'] for i in imgs)
2151
        return {
2152
            'title': title,
2153
            'img': [i['src'] for i in imgs],
2154
            'alt': ''.join(i['alt'] for i in imgs),
2155
            'month': day.month,
2156
            'year': day.year,
2157
            'day': day.day,
2158
            'author': author,
2159
        }
2160
2161
2162
class FatAwesomeComics(GenericNavigableComic):
2163
    """Class to retrieve Fat Awesome Comics."""
2164
    # Also on http://fatawesomecomedy.tumblr.com
2165
    name = 'fatawesome'
2166
    long_name = 'Fat Awesome'
2167
    url = 'http://fatawesome.com/comics'
2168
    get_navi_link = get_a_rel_next
2169
2170
    @classmethod
2171
    def get_first_comic_link(cls):
2172
        """Get link to first comics."""
2173
        return {'href': 'http://fatawesome.com/shortbus/'}
2174
2175
    @classmethod
2176
    def get_comic_info(cls, soup, link):
2177
        """Get information about a particular comics."""
2178
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2179
        description = soup.find('meta', attrs={'name': 'description'})['content']
2180
        tags_prop = soup.find('meta', property='article:tag')
2181
        tags = tags_prop['content'] if tags_prop else ""
2182
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2183
        day = string_to_date(date_str, "%Y-%m-%d")
2184
        imgs = soup.find_all('img', attrs={'data-recalc-dims': "1"})
2185
        assert len(imgs) == 1
2186
        return {
2187
            'title': title,
2188
            'description': description,
2189
            'tags': tags,
2190
            'alt': "".join(i['alt'] for i in imgs),
2191
            'img': [i['src'].rsplit('?', 1)[0] for i in imgs],
2192
            'month': day.month,
2193
            'year': day.year,
2194
            'day': day.day,
2195
        }
2196
2197
2198
class AnythingComic(GenericListableComic):
2199
    """Class to retrieve Anything Comics."""
2200
    # Also on http://tapastic.com/series/anything
2201
    name = 'anythingcomic'
2202
    long_name = 'Anything Comic'
2203
    url = 'http://www.anythingcomic.com'
2204
2205
    @classmethod
2206
    def get_archive_elements(cls):
2207
        archive_url = urljoin_wrapper(cls.url, 'archive/')
2208
        # The first 2 <tr>'s do not correspond to comics
2209
        return get_soup_at_url(archive_url).find('table', id='chapter_table').find_all('tr')[2:]
2210
2211
    @classmethod
2212
    def get_url_from_archive_element(cls, tr):
2213
        """Get url corresponding to an archive element."""
2214
        td_num, td_comic, td_date, _ = tr.find_all('td')
2215
        link = td_comic.find('a')
2216
        return urljoin_wrapper(cls.url, link['href'])
2217
2218
    @classmethod
2219
    def get_comic_info(cls, soup, tr):
2220
        """Get information about a particular comics."""
2221
        td_num, td_comic, td_date, _ = tr.find_all('td')
2222
        num = int(td_num.string)
2223
        link = td_comic.find('a')
2224
        title = link.string
2225
        imgs = soup.find_all('img', id='comic_image')
2226
        day = string_to_date(td_date.string, '%d %b %Y %I:%M %p')
2227
        assert len(imgs) == 1
2228
        assert all(i.get('alt') == i.get('title') for i in imgs)
2229
        return {
2230
            'num': num,
2231
            'title': title,
2232
            'alt': imgs[0].get('alt', ''),
2233
            'img': [i['src'] for i in imgs],
2234
            'month': day.month,
2235
            'year': day.year,
2236
            'day': day.day,
2237
        }
2238
2239
2240
class LonnieMillsap(GenericNavigableComic):
2241
    """Class to retrieve Lonnie Millsap's comics."""
2242
    name = 'millsap'
2243
    long_name = 'Lonnie Millsap'
2244
    url = 'http://www.lonniemillsap.com'
2245
    get_navi_link = get_link_rel_next
2246
2247
    @classmethod
2248
    def get_first_comic_link(cls):
2249
        """Get link to first comics."""
2250
        return {'href': 'http://www.lonniemillsap.com/?p=42'}
2251
2252
    @classmethod
2253
    def get_comic_info(cls, soup, link):
2254
        """Get information about a particular comics."""
2255
        title = soup.find('h2', class_='post-title').string
2256
        post = soup.find('div', class_='post-content')
2257
        author = post.find("span", class_="post-author").find("a").string
2258
        date_str = post.find("span", class_="post-date").string
2259
        day = string_to_date(date_str, "%B %d, %Y")
2260
        imgs = post.find("div", class_="entry").find_all("img")
2261
        return {
2262
            'title': title,
2263
            'author': author,
2264
            'img': [i['src'] for i in imgs],
2265
            'month': day.month,
2266
            'year': day.year,
2267
            'day': day.day,
2268
        }
2269 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2270
2271
class LinsEditions(GenericNavigableComic):
2272
    """Class to retrieve L.I.N.S. Editions comics."""
2273
    # Also on http://linscomics.tumblr.com
2274
    name = 'lins'
2275
    long_name = 'L.I.N.S. Editions'
2276
    url = 'https://linsedition.com'
2277
    get_navi_link = get_link_rel_next
2278
2279
    @classmethod
2280
    def get_first_comic_link(cls):
2281
        """Get link to first comics."""
2282
        return {'href': 'https://linsedition.com/2011/09/07/l-i-n-s/'}
2283
2284
    @classmethod
2285
    def get_comic_info(cls, soup, link):
2286
        """Get information about a particular comics."""
2287
        title = soup.find('meta', property='og:title')['content']
2288
        imgs = soup.find_all('meta', property='og:image')
2289
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2290
        day = string_to_date(date_str, "%Y-%m-%d")
2291
        return {
2292
            'title': title,
2293
            'img': [i['content'] for i in imgs],
2294
            'month': day.month,
2295
            'year': day.year,
2296
            'day': day.day,
2297
        }
2298
2299
2300
class ThorsThundershack(GenericNavigableComic):
2301
    """Class to retrieve Thor's Thundershack comics."""
2302
    # Also on http://tapastic.com/series/Thors-Thundershac
2303
    name = 'thor'
2304
    long_name = 'Thor\'s Thundershack'
2305
    url = 'http://www.thorsthundershack.com'
2306
    get_url_from_link = join_cls_url_to_href
2307
2308
    @classmethod
2309
    def get_first_comic_link(cls):
2310
        """Get link to first comics."""
2311
        return get_soup_at_url(cls.url).find('a', class_='first navlink')
2312
2313
    @classmethod
2314
    def get_navi_link(cls, last_soup, next_):
2315
        for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
2316
            if link['href'] != '/comic':
2317
                return link
2318
        return None
2319
2320
    @classmethod
2321
    def get_comic_info(cls, soup, link):
2322
        """Get information about a particular comics."""
2323
        title = soup.find('meta', attrs={'name': 'description'})["content"]
2324
        description = soup.find('div', itemprop='articleBody').text
2325
        author = soup.find('span', itemprop='author copyrightHolder').string
2326
        imgs = soup.find_all('img', itemprop='image')
2327
        assert all(i['title'] == i['alt'] for i in imgs)
2328
        alt = imgs[0]['alt'] if imgs else ""
2329
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
2330
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2331
        return {
2332
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2333
            'month': day.month,
2334
            'year': day.year,
2335
            'day': day.day,
2336
            'author': author,
2337
            'title': title,
2338
            'alt': alt,
2339
            'description': description,
2340
        }
2341
2342
2343
class GerbilWithAJetpack(GenericNavigableComic):
2344
    """Class to retrieve GerbilWithAJetpack comics."""
2345
    name = 'gerbil'
2346
    long_name = 'Gerbil With A Jetpack'
2347
    url = 'http://gerbilwithajetpack.com'
2348
    get_first_comic_link = get_a_navi_navifirst
2349
    get_navi_link = get_a_rel_next
2350
2351
    @classmethod
2352
    def get_comic_info(cls, soup, link):
2353
        """Get information about a particular comics."""
2354
        title = soup.find('h2', class_='post-title').string
2355
        author = soup.find("span", class_="post-author").find("a").string
2356
        date_str = soup.find("span", class_="post-date").string
2357
        day = string_to_date(date_str, "%B %d, %Y")
2358
        imgs = soup.find("div", id="comic").find_all("img")
2359
        alt = imgs[0]['alt']
2360
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2361
        return {
2362
            'img': [i['src'] for i in imgs],
2363
            'title': title,
2364
            'alt': alt,
2365
            'author': author,
2366
            'day': day.day,
2367
            'month': day.month,
2368
            'year': day.year
2369
        }
2370
2371
2372
class EveryDayBlues(GenericNavigableComic):
2373
    """Class to retrieve EveryDayBlues Comics."""
2374
    name = "blues"
2375
    long_name = "Every Day Blues"
2376
    url = "http://everydayblues.net"
2377
    get_first_comic_link = get_a_navi_navifirst
2378
    get_navi_link = get_link_rel_next
2379
2380
    @classmethod
2381
    def get_comic_info(cls, soup, link):
2382
        """Get information about a particular comics."""
2383
        title = soup.find("h2", class_="post-title").string
2384
        author = soup.find("span", class_="post-author").find("a").string
2385
        date_str = soup.find("span", class_="post-date").string
2386
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2387
        imgs = soup.find("div", id="comic").find_all("img")
2388
        assert all(i['alt'] == i['title'] == title for i in imgs)
2389
        assert len(imgs) <= 1
2390
        return {
2391
            'img': [i['src'] for i in imgs],
2392
            'title': title,
2393
            'author': author,
2394
            'day': day.day,
2395
            'month': day.month,
2396
            'year': day.year
2397
        }
2398
2399
2400
class BiterComics(GenericNavigableComic):
2401
    """Class to retrieve Biter Comics."""
2402
    name = "biter"
2403
    long_name = "Biter Comics"
2404
    url = "http://www.bitercomics.com"
2405
    get_first_comic_link = get_a_navi_navifirst
2406
    get_navi_link = get_link_rel_next
2407
2408
    @classmethod
2409
    def get_comic_info(cls, soup, link):
2410
        """Get information about a particular comics."""
2411
        title = soup.find("h1", class_="entry-title").string
2412
        author = soup.find("span", class_="author vcard").find("a").string
2413
        date_str = soup.find("span", class_="entry-date").string
2414
        day = string_to_date(date_str, "%B %d, %Y")
2415
        imgs = soup.find("div", id="comic").find_all("img")
2416
        assert all(i['alt'] == i['title'] for i in imgs)
2417
        assert len(imgs) == 1
2418
        alt = imgs[0]['alt']
2419
        return {
2420
            'img': [i['src'] for i in imgs],
2421
            'title': title,
2422
            'alt': alt,
2423
            'author': author,
2424
            'day': day.day,
2425
            'month': day.month,
2426
            'year': day.year
2427
        }
2428 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2429
2430
class TheAwkwardYeti(GenericNavigableComic):
2431
    """Class to retrieve The Awkward Yeti comics."""
2432
    # Also on http://www.gocomics.com/the-awkward-yeti
2433
    # Also on http://larstheyeti.tumblr.com
2434
    # Also on https://tapastic.com/series/TheAwkwardYeti
2435
    name = 'yeti'
2436
    long_name = 'The Awkward Yeti'
2437
    url = 'http://theawkwardyeti.com'
2438
    get_first_comic_link = get_a_navi_navifirst
2439
    get_navi_link = get_link_rel_next
2440
2441
    @classmethod
2442
    def get_comic_info(cls, soup, link):
2443
        """Get information about a particular comics."""
2444
        title = soup.find('h2', class_='post-title').string
2445
        date_str = soup.find("span", class_="post-date").string
2446
        day = string_to_date(date_str, "%B %d, %Y")
2447
        imgs = soup.find("div", id="comic").find_all("img")
2448
        assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
2449
        return {
2450
            'img': [i['src'] for i in imgs],
2451
            'title': title,
2452
            'day': day.day,
2453
            'month': day.month,
2454
            'year': day.year
2455
        }
2456
2457
2458
class PleasantThoughts(GenericNavigableComic):
2459
    """Class to retrieve Pleasant Thoughts comics."""
2460
    name = 'pleasant'
2461
    long_name = 'Pleasant Thoughts'
2462
    url = 'http://pleasant-thoughts.com'
2463
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2464
    get_navi_link = get_link_rel_next
2465
2466
    @classmethod
2467
    def get_comic_info(cls, soup, link):
2468
        """Get information about a particular comics."""
2469
        post = soup.find('div', class_='post-content')
2470
        title = post.find('h2', class_='post-title').string
2471
        imgs = post.find("div", class_="entry").find_all("img")
2472
        return {
2473
            'title': title,
2474
            'img': [i['src'] for i in imgs],
2475
        }
2476
2477
2478
class MisterAndMe(GenericNavigableComic):
2479
    """Class to retrieve Mister & Me Comics."""
2480
    # Also on http://www.gocomics.com/mister-and-me
2481
    # Also on https://tapastic.com/series/Mister-and-Me
2482
    name = 'mister'
2483
    long_name = 'Mister & Me'
2484
    url = 'http://www.mister-and-me.com'
2485
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2486
    get_navi_link = get_link_rel_next
2487
2488
    @classmethod
2489
    def get_comic_info(cls, soup, link):
2490
        """Get information about a particular comics."""
2491
        title = soup.find('h2', class_='post-title').string
2492
        author = soup.find("span", class_="post-author").find("a").string
2493
        date_str = soup.find("span", class_="post-date").string
2494
        day = string_to_date(date_str, "%B %d, %Y")
2495
        imgs = soup.find("div", id="comic").find_all("img")
2496
        assert all(i['alt'] == i['title'] for i in imgs)
2497
        assert len(imgs) <= 1
2498
        alt = imgs[0]['alt'] if imgs else ""
2499
        return {
2500
            'img': [i['src'] for i in imgs],
2501
            'title': title,
2502
            'alt': alt,
2503
            'author': author,
2504
            'day': day.day,
2505
            'month': day.month,
2506
            'year': day.year
2507
        }
2508
2509
2510
class LastPlaceComics(GenericNavigableComic):
2511
    """Class to retrieve Last Place Comics."""
2512
    name = 'lastplace'
2513
    long_name = 'LastPlaceComics'
2514
    url = "http://lastplacecomics.com"
2515
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2516
    get_navi_link = get_link_rel_next
2517
2518
    @classmethod
2519
    def get_comic_info(cls, soup, link):
2520
        """Get information about a particular comics."""
2521
        title = soup.find('h2', class_='post-title').string
2522
        author = soup.find("span", class_="post-author").find("a").string
2523
        date_str = soup.find("span", class_="post-date").string
2524
        day = string_to_date(date_str, "%B %d, %Y")
2525
        imgs = soup.find("div", id="comic").find_all("img")
2526
        assert all(i['alt'] == i['title'] for i in imgs)
2527
        assert len(imgs) <= 1
2528
        alt = imgs[0]['alt'] if imgs else ""
2529
        return {
2530
            'img': [i['src'] for i in imgs],
2531
            'title': title,
2532
            'alt': alt,
2533
            'author': author,
2534
            'day': day.day,
2535
            'month': day.month,
2536
            'year': day.year
2537
        }
2538
2539
2540
class TalesOfAbsurdity(GenericNavigableComic):
2541
    """Class to retrieve Tales Of Absurdity comics."""
2542
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
2543
    # Also on http://talesofabsurdity.tumblr.com
2544
    name = 'absurdity'
2545
    long_name = 'Tales of Absurdity'
2546
    url = 'http://talesofabsurdity.com'
2547
    get_first_comic_link = get_a_navi_navifirst
2548
    get_navi_link = get_a_navi_comicnavnext_navinext
2549
2550
    @classmethod
2551
    def get_comic_info(cls, soup, link):
2552
        """Get information about a particular comics."""
2553
        title = soup.find('h2', class_='post-title').string
2554
        author = soup.find("span", class_="post-author").find("a").string
2555
        date_str = soup.find("span", class_="post-date").string
2556
        day = string_to_date(date_str, "%B %d, %Y")
2557
        imgs = soup.find("div", id="comic").find_all("img")
2558
        assert all(i['alt'] == i['title'] for i in imgs)
2559
        alt = imgs[0]['alt'] if imgs else ""
2560
        return {
2561
            'img': [i['src'] for i in imgs],
2562
            'title': title,
2563
            'alt': alt,
2564
            'author': author,
2565
            'day': day.day,
2566
            'month': day.month,
2567
            'year': day.year
2568
        }
2569
2570
2571
class EndlessOrigami(GenericNavigableComic):
2572
    """Class to retrieve Endless Origami Comics."""
2573
    name = "origami"
2574
    long_name = "Endless Origami"
2575
    url = "http://endlessorigami.com"
2576
    get_first_comic_link = get_a_navi_navifirst
2577
    get_navi_link = get_link_rel_next
2578
2579
    @classmethod
2580
    def get_comic_info(cls, soup, link):
2581
        """Get information about a particular comics."""
2582
        title = soup.find('h2', class_='post-title').string
2583
        author = soup.find("span", class_="post-author").find("a").string
2584
        date_str = soup.find("span", class_="post-date").string
2585
        day = string_to_date(date_str, "%B %d, %Y")
2586
        imgs = soup.find("div", id="comic").find_all("img")
2587
        assert all(i['alt'] == i['title'] for i in imgs)
2588
        alt = imgs[0]['alt'] if imgs else ""
2589
        return {
2590
            'img': [i['src'] for i in imgs],
2591
            'title': title,
2592
            'alt': alt,
2593
            'author': author,
2594
            'day': day.day,
2595
            'month': day.month,
2596
            'year': day.year
2597
        }
2598
2599
2600 View Code Duplication
class PlanC(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2601
    """Class to retrieve Plan C comics."""
2602
    name = 'planc'
2603
    long_name = 'Plan C'
2604
    url = 'http://www.plancomic.com'
2605
    get_first_comic_link = get_a_navi_navifirst
2606
    get_navi_link = get_a_navi_comicnavnext_navinext
2607
2608
    @classmethod
2609
    def get_comic_info(cls, soup, link):
2610
        """Get information about a particular comics."""
2611
        title = soup.find('h2', class_='post-title').string
2612
        date_str = soup.find("span", class_="post-date").string
2613
        day = string_to_date(date_str, "%B %d, %Y")
2614
        imgs = soup.find('div', id='comic').find_all('img')
2615
        return {
2616
            'title': title,
2617
            'img': [i['src'] for i in imgs],
2618
            'month': day.month,
2619
            'year': day.year,
2620
            'day': day.day,
2621
        }
2622
2623
2624
class BuniComic(GenericNavigableComic):
2625
    """Class to retrieve Buni Comics."""
2626
    name = 'buni'
2627
    long_name = 'BuniComics'
2628
    url = 'http://www.bunicomic.com'
2629
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2630
    get_navi_link = get_link_rel_next
2631
2632
    @classmethod
2633
    def get_comic_info(cls, soup, link):
2634
        """Get information about a particular comics."""
2635
        imgs = soup.find('div', id='comic').find_all('img')
2636
        assert all(i['alt'] == i['title'] for i in imgs)
2637
        assert len(imgs) == 1
2638
        return {
2639
            'img': [i['src'] for i in imgs],
2640
            'title': imgs[0]['title'],
2641
        }
2642
2643
2644
class GenericCommitStrip(GenericNavigableComic):
2645
    """Generic class to retrieve Commit Strips in different languages."""
2646
    get_navi_link = get_a_rel_next
2647
2648
    @classmethod
2649
    def get_comic_info(cls, soup, link):
2650
        """Get information about a particular comics."""
2651
        desc = soup.find('meta', property='og:description')['content']
2652
        title = soup.find('meta', property='og:title')['content']
2653
        imgs = soup.find('div', class_='entry-content').find_all('img')
2654
        title2 = ' '.join(i.get('title', '') for i in imgs)
2655
        return {
2656
            'title': title,
2657
            'title2': title2,
2658
            'description': desc,
2659
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2660
        }
2661
2662
2663
class CommitStripFr(GenericCommitStrip):
2664
    """Class to retrieve Commit Strips in French."""
2665
    name = 'commit_fr'
2666
    long_name = 'Commit Strip (Fr)'
2667
    url = 'http://www.commitstrip.com/fr'
2668
2669
    @classmethod
2670
    def get_first_comic_link(cls):
2671
        """Get link to first comics."""
2672
        return {'href': 'http://www.commitstrip.com/fr/2012/02/22/interview/'}
2673
2674
2675
class CommitStripEn(GenericCommitStrip):
2676
    """Class to retrieve Commit Strips in English."""
2677
    name = 'commit_en'
2678
    long_name = 'Commit Strip (En)'
2679
    url = 'http://www.commitstrip.com/en'
2680
2681
    @classmethod
2682
    def get_first_comic_link(cls):
2683
        """Get link to first comics."""
2684
        return {'href': 'http://www.commitstrip.com/en/2012/02/22/interview/'}
2685
2686
2687
class GenericBoumerie(GenericNavigableComic):
2688
    """Generic class to retrieve Boumeries comics in different languages."""
2689
    get_first_comic_link = get_a_navi_navifirst
2690
    get_navi_link = get_link_rel_next
2691
    date_format = NotImplemented
2692
    lang = NotImplemented
2693
2694
    @classmethod
2695
    def get_comic_info(cls, soup, link):
2696
        """Get information about a particular comics."""
2697
        title = soup.find('h2', class_='post-title').string
2698
        short_url = soup.find('link', rel='shortlink')['href']
2699
        author = soup.find("span", class_="post-author").find("a").string
2700
        date_str = soup.find('span', class_='post-date').string
2701
        day = string_to_date(date_str, cls.date_format, cls.lang)
2702
        imgs = soup.find('div', id='comic').find_all('img')
2703
        assert all(i['alt'] == i['title'] for i in imgs)
2704
        return {
2705
            'short_url': short_url,
2706
            'img': [i['src'] for i in imgs],
2707
            'title': title,
2708
            'author': author,
2709
            'month': day.month,
2710
            'year': day.year,
2711
            'day': day.day,
2712
        }
2713
2714
2715
class BoumerieEn(GenericBoumerie):
2716
    """Class to retrieve Boumeries comics in English."""
2717
    name = 'boumeries_en'
2718
    long_name = 'Boumeries (En)'
2719
    url = 'http://comics.boumerie.com'
2720
    date_format = "%B %d, %Y"
2721
    lang = 'en_GB.UTF-8'
2722
2723
2724
class BoumerieFr(GenericBoumerie):
2725 View Code Duplication
    """Class to retrieve Boumeries comics in French."""
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2726
    name = 'boumeries_fr'
2727
    long_name = 'Boumeries (Fr)'
2728
    url = 'http://bd.boumerie.com'
2729
    date_format = "%A, %d %B %Y"
2730
    lang = "fr_FR.utf8"
2731
2732
2733
class UnearthedComics(GenericNavigableComic):
2734
    """Class to retrieve Unearthed comics."""
2735
    # Also on http://tapastic.com/series/UnearthedComics
2736
    # Also on http://unearthedcomics.tumblr.com
2737
    name = 'unearthed'
2738
    long_name = 'Unearthed Comics'
2739
    url = 'http://unearthedcomics.com'
2740
    get_navi_link = get_link_rel_next
2741
2742
    @classmethod
2743
    def get_first_comic_link(cls):
2744
        """Get link to first comics."""
2745
        return {'href': 'http://unearthedcomics.com/comics/world-with-turn-signals/'}
2746
2747
    @classmethod
2748
    def get_comic_info(cls, soup, link):
2749
        """Get information about a particular comics."""
2750
        short_url = soup.find('link', rel='shortlink')['href']
2751
        title_elt = soup.find('h1') or soup.find('h2')
2752
        title = title_elt.string if title_elt else ""
2753
        desc = soup.find('meta', property='og:description')
2754
        date_str = soup.find('time', class_='published updated hidden')['datetime']
2755
        day = string_to_date(date_str, "%Y-%m-%d")
2756
        post = soup.find('div', class_="entry content entry-content type-portfolio")
2757
        imgs = post.find_all('img')
2758
        return {
2759
            'title': title,
2760
            'description': desc,
2761
            'url2': short_url,
2762
            'img': [i['src'] for i in imgs],
2763
            'month': day.month,
2764
            'year': day.year,
2765
            'day': day.day,
2766
        }
2767
2768
2769
class Optipess(GenericNavigableComic):
2770
    """Class to retrieve Optipess comics."""
2771
    name = 'optipess'
2772
    long_name = 'Optipess'
2773
    url = 'http://www.optipess.com'
2774
    get_first_comic_link = get_a_navi_navifirst
2775
    get_navi_link = get_link_rel_next
2776
2777
    @classmethod
2778
    def get_comic_info(cls, soup, link):
2779
        """Get information about a particular comics."""
2780
        title = soup.find('h2', class_='post-title').string
2781
        author = soup.find("span", class_="post-author").find("a").string
2782
        comic = soup.find('div', id='comic')
2783
        imgs = comic.find_all('img') if comic else []
2784
        alt = imgs[0]['title'] if imgs else ""
2785
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2786
        date_str = soup.find('span', class_='post-date').string
2787
        day = string_to_date(date_str, "%B %d, %Y")
2788
        return {
2789
            'title': title,
2790
            'alt': alt,
2791
            'author': author,
2792
            'img': [i['src'] for i in imgs],
2793
            'month': day.month,
2794
            'year': day.year,
2795
            'day': day.day,
2796
        }
2797
2798
2799
class PainTrainComic(GenericNavigableComic):
2800
    """Class to retrieve Pain Train Comics."""
2801
    name = 'paintrain'
2802
    long_name = 'Pain Train Comics'
2803
    url = 'http://paintraincomic.com'
2804
    get_first_comic_link = get_a_navi_navifirst
2805
    get_navi_link = get_link_rel_next
2806
2807
    @classmethod
2808
    def get_comic_info(cls, soup, link):
2809
        """Get information about a particular comics."""
2810
        title = soup.find('h2', class_='post-title').string
2811
        short_url = soup.find('link', rel='shortlink')['href']
2812
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2813
        num = int(short_url_re.match(short_url).groups()[0])
2814
        imgs = soup.find('div', id='comic').find_all('img')
2815
        alt = imgs[0]['title']
2816
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2817
        date_str = soup.find('span', class_='post-date').string
2818
        day = string_to_date(date_str, "%d/%m/%Y")
2819
        return {
2820
            'short_url': short_url,
2821
            'num': num,
2822
            'img': [i['src'] for i in imgs],
2823
            'month': day.month,
2824
            'year': day.year,
2825
            'day': day.day,
2826
            'alt': alt,
2827
            'title': title,
2828
        }
2829
2830
2831
class MoonBeard(GenericNavigableComic):
2832
    """Class to retrieve MoonBeard comics."""
2833
    # Also on http://blog.squiresjam.es/moonbeard
2834
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
2835
    name = 'moonbeard'
2836
    long_name = 'Moon Beard'
2837
    url = 'http://moonbeard.com'
2838
    get_first_comic_link = get_a_navi_navifirst
2839
    get_navi_link = get_a_navi_navinext
2840
2841
    @classmethod
2842
    def get_comic_info(cls, soup, link):
2843
        """Get information about a particular comics."""
2844
        title = soup.find('h2', class_='post-title').string
2845
        short_url = soup.find('link', rel='shortlink')['href']
2846
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2847
        num = int(short_url_re.match(short_url).groups()[0])
2848
        imgs = soup.find('div', id='comic').find_all('img')
2849
        alt = imgs[0]['title']
2850
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2851
        date_str = soup.find('span', class_='post-date').string
2852
        day = string_to_date(date_str, "%B %d, %Y")
2853
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2854
        author = soup.find('span', class_='post-author').string
2855
        return {
2856
            'short_url': short_url,
2857
            'num': num,
2858
            'img': [i['src'] for i in imgs],
2859
            'month': day.month,
2860
            'year': day.year,
2861
            'day': day.day,
2862
            'title': title,
2863
            'tags': tags,
2864
            'alt': alt,
2865
            'author': author,
2866
        }
2867
2868
2869
class AHamADay(GenericNavigableComic):
2870
    """Class to retrieve class A Ham A Day comics."""
2871
    name = 'ham'
2872
    long_name = 'A Ham A Day'
2873
    url = 'http://www.ahammaday.com'
2874
    get_url_from_link = join_cls_url_to_href
2875
2876
    @classmethod
2877
    def get_first_comic_link(cls):
2878
        """Get link to first comics."""
2879
        return {'href': 'http://www.ahammaday.com/today/3/6/french'}
2880
2881
    @classmethod
2882
    def get_navi_link(cls, last_soup, next_):
2883
        # prev is next / next is prev
2884
        return last_soup.find('li', class_='previous' if next_ else 'next').find('a')
2885
2886
    @classmethod
2887
    def get_comic_info(cls, soup, link):
2888
        """Get information about a particular comics."""
2889
        date_str = soup.find('time', class_='published')['datetime']
2890
        day = string_to_date(date_str, "%Y-%m-%d")
2891
        author = soup.find('span', class_='blog-author').find('a').string
2892
        title = soup.find('meta', property='og:title')['content']
2893
        imgs = soup.find_all('meta', itemprop='image')
2894
        return {
2895
            'img': [i['content'] for i in imgs],
2896
            'title': title,
2897
            'author': author,
2898
            'day': day.day,
2899
            'month': day.month,
2900
            'year': day.year,
2901
        }
2902 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2903
2904
class LittleLifeLines(GenericNavigableComic):
2905
    """Class to retrieve Little Life Lines comics."""
2906
    name = 'life'
2907
    long_name = 'Little Life Lines'
2908
    url = 'http://www.littlelifelines.com'
2909
    get_url_from_link = join_cls_url_to_href
2910
2911
    @classmethod
2912
    def get_first_comic_link(cls):
2913
        """Get link to first comics."""
2914
        return {'href': 'http://www.littlelifelines.com/comics/well-done'}
2915
2916
    @classmethod
2917
    def get_navi_link(cls, last_soup, next_):
2918
        # prev is next / next is prev
2919
        li = last_soup.find('li', class_='prev' if next_ else 'next')
2920
        return li.find('a') if li else None
2921
2922
    @classmethod
2923
    def get_comic_info(cls, soup, link):
2924
        """Get information about a particular comics."""
2925
        title = soup.find('meta', property='og:title')['content']
2926
        desc = soup.find('meta', property='og:description')['content']
2927
        date_str = soup.find('time', class_='published')['datetime']
2928
        day = string_to_date(date_str, "%Y-%m-%d")
2929
        author = soup.find('a', rel='author').string
2930
        div_content = soup.find('div', class_="body entry-content")
2931
        imgs = div_content.find_all('img')
2932
        imgs = [i for i in imgs if i.get('src') is not None]
2933
        alt = imgs[0]['alt']
2934
        return {
2935
            'title': title,
2936
            'alt': alt,
2937
            'description': desc,
2938
            'author': author,
2939
            'day': day.day,
2940
            'month': day.month,
2941
            'year': day.year,
2942
            'img': [i['src'] for i in imgs],
2943
        }
2944
2945
2946 View Code Duplication
class GenericWordPressInkblot(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2947
    """Generic class to retrieve comics using WordPress with Inkblot."""
2948
    get_navi_link = get_link_rel_next
2949
2950
    @classmethod
2951
    def get_first_comic_link(cls):
2952
        """Get link to first comics."""
2953
        return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')
2954
2955
    @classmethod
2956
    def get_comic_info(cls, soup, link):
2957
        """Get information about a particular comics."""
2958
        title = soup.find('meta', property='og:title')['content']
2959
        imgs = soup.find('div', class_='webcomic-image').find_all('img')
2960
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2961
        day = string_to_date(date_str, "%Y-%m-%d")
2962
        return {
2963
            'title': title,
2964
            'day': day.day,
2965
            'month': day.month,
2966
            'year': day.year,
2967
            'img': [i['src'] for i in imgs],
2968
        }
2969
2970
2971
class EverythingsStupid(GenericWordPressInkblot):
2972
    """Class to retrieve Everything's stupid Comics."""
2973
    # Also on http://tapastic.com/series/EverythingsStupid
2974
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
2975
    # Also on http://everythingsstupidcomics.tumblr.com
2976
    name = 'stupid'
2977
    long_name = "Everything's Stupid"
2978
    url = 'http://everythingsstupid.net'
2979
2980
2981
class TheIsmComics(GenericWordPressInkblot):
2982
    """Class to retrieve The Ism Comics."""
2983
    # Also on https://tapastic.com/series/TheIsm (?)
2984
    name = 'theism'
2985
    long_name = "The Ism"
2986
    url = 'http://www.theism-comics.com'
2987
2988 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2989
class WoodenPlankStudios(GenericWordPressInkblot):
2990
    """Class to retrieve Wooden Plank Studios comics."""
2991
    name = 'woodenplank'
2992
    long_name = 'Wooden Plank Studios'
2993
    url = 'http://woodenplankstudios.com'
2994
2995
2996
class ElectricBunnyComic(GenericNavigableComic):
2997
    """Class to retrieve Electric Bunny Comics."""
2998
    # Also on http://electricbunnycomics.tumblr.com
2999
    name = 'bunny'
3000
    long_name = 'Electric Bunny Comic'
3001
    url = 'http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell'
3002
    get_url_from_link = join_cls_url_to_href
3003
3004
    @classmethod
3005
    def get_first_comic_link(cls):
3006
        """Get link to first comics."""
3007
        return get_soup_at_url(cls.url).find('img', alt='First').parent
3008
3009
    @classmethod
3010
    def get_navi_link(cls, last_soup, next_):
3011
        img = last_soup.find('img', alt='Next' if next_ else 'Back')
3012
        return img.parent if img else None
3013
3014
    @classmethod
3015
    def get_comic_info(cls, soup, link):
3016
        """Get information about a particular comics."""
3017
        title = soup.find('meta', property='og:title')['content']
3018
        imgs = soup.find_all('meta', property='og:image')
3019
        return {
3020
            'title': title,
3021
            'img': [i['content'] for i in imgs],
3022
        }
3023
3024
3025
class SheldonComics(GenericNavigableComic):
3026
    """Class to retrieve Sheldon comics."""
3027
    # Also on http://www.gocomics.com/sheldon
3028
    name = 'sheldon'
3029
    long_name = 'Sheldon Comics'
3030
    url = 'http://www.sheldoncomics.com'
3031
3032
    @classmethod
3033
    def get_first_comic_link(cls):
3034
        """Get link to first comics."""
3035
        return get_soup_at_url(cls.url).find("a", id="nav-first")
3036
3037
    @classmethod
3038
    def get_navi_link(cls, last_soup, next_):
3039
        for link in last_soup.find_all("a", id="nav-next" if next_ else "nav-prev"):
3040
            if link['href'] != 'http://www.sheldoncomics.com':
3041
                return link
3042
        return None
3043
3044
    @classmethod
3045
    def get_comic_info(cls, soup, link):
3046
        """Get information about a particular comics."""
3047
        imgs = soup.find("div", id="comic-foot").find_all("img")
3048
        assert all(i['alt'] == i['title'] for i in imgs)
3049
        assert len(imgs) == 1
3050
        title = imgs[0]['title']
3051
        return {
3052
            'title': title,
3053
            'img': [i['src'] for i in imgs],
3054
        }
3055
3056
3057
class CubeDrone(GenericNavigableComic):
3058
    """Class to retrieve Cube Drone comics."""
3059
    name = 'cubedrone'
3060
    long_name = 'Cube Drone'
3061
    url = 'http://cube-drone.com/comics'
3062
    get_url_from_link = join_cls_url_to_href
3063
3064
    @classmethod
3065
    def get_first_comic_link(cls):
3066
        """Get link to first comics."""
3067
        return get_soup_at_url(cls.url).find('span', class_='glyphicon glyphicon-backward').parent
3068
3069
    @classmethod
3070
    def get_navi_link(cls, last_soup, next_):
3071
        class_ = 'glyphicon glyphicon-chevron-' + ('right' if next_ else 'left')
3072
        return last_soup.find('span', class_=class_).parent
3073
3074
    @classmethod
3075
    def get_comic_info(cls, soup, link):
3076
        """Get information about a particular comics."""
3077
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
3078
        url2 = soup.find('meta', attrs={'name': 'twitter:url'})['content']
3079
        # date_str = soup.find('h2', class_='comic_title').find('small').string
3080
        # day = string_to_date(date_str, "%B %d, %Y, %I:%M %p")
3081
        imgs = soup.find_all('img', class_='comic img-responsive')
3082
        title2 = imgs[0]['title']
3083
        alt = imgs[0]['alt']
3084
        return {
3085
            'url2': url2,
3086
            'title': title,
3087
            'title2': title2,
3088
            'alt': alt,
3089
            'img': [i['src'] for i in imgs],
3090
        }
3091
3092
3093
class MakeItStoopid(GenericNavigableComic):
3094
    """Class to retrieve Make It Stoopid Comics."""
3095
    name = 'stoopid'
3096
    long_name = 'Make it stoopid'
3097
    url = 'http://makeitstoopid.com/comic.php'
3098
3099
    @classmethod
3100
    def get_nav(cls, soup):
3101
        cnav = soup.find_all(class_='cnav')
3102
        nav1, nav2 = cnav[:5], cnav[5:]
3103
        assert nav1 == nav2
3104
        # begin, prev, archive, next_, end = nav1
3105
        return [None if i.get('href') is None else i for i in nav1]
3106
3107
    @classmethod
3108
    def get_first_comic_link(cls):
3109
        """Get link to first comics."""
3110
        return cls.get_nav(get_soup_at_url(cls.url))[0]
3111
3112
    @classmethod
3113
    def get_navi_link(cls, last_soup, next_):
3114
        return cls.get_nav(last_soup)[3 if next_ else 1]
3115
3116
    @classmethod
3117
    def get_comic_info(cls, soup, link):
3118
        """Get information about a particular comics."""
3119
        title = link['title']
3120
        imgs = soup.find_all('img', id='comicimg')
3121
        return {
3122
            'title': title,
3123
            'img': [i['src'] for i in imgs],
3124
        }
3125
3126
3127
class GeekAndPoke(GenericNavigableComic):
3128
    """Class to retrieve Geek And Poke comics."""
3129
    name = 'geek'
3130
    long_name = 'Geek And Poke'
3131
    url = 'http://geek-and-poke.com'
3132
    get_url_from_link = join_cls_url_to_href
3133
3134
    @classmethod
3135
    def get_first_comic_link(cls):
3136
        """Get link to first comics."""
3137
        return {'href': 'http://geek-and-poke.com/geekandpoke/2006/8/27/a-new-place-for-a-not-so-old-blog.html'}
3138
3139
    @classmethod
3140
    def get_navi_link(cls, last_soup, next_):
3141
        return last_soup.find('a', class_='prev-item' if next_ else 'next-item')
3142
3143
    @classmethod
3144
    def get_comic_info(cls, soup, link):
3145
        """Get information about a particular comics."""
3146
        title = soup.find('meta', property='og:title')['content']
3147
        desc = soup.find('meta', property='og:description')['content']
3148
        date_str = soup.find('time', class_='published')['datetime']
3149
        day = string_to_date(date_str, "%Y-%m-%d")
3150
        author = soup.find('a', rel='author').string
3151
        div_content = (soup.find('div', class_="body entry-content") or
3152
                       soup.find('div', class_="special-content"))
3153
        imgs = div_content.find_all('img')
3154
        imgs = [i for i in imgs if i.get('src') is not None]
3155
        assert all('title' not in i or i['alt'] == i['title'] for i in imgs)
3156
        alt = imgs[0].get('alt', "") if imgs else []
3157
        return {
3158
            'title': title,
3159
            'alt': alt,
3160
            'description': desc,
3161
            'author': author,
3162
            'day': day.day,
3163
            'month': day.month,
3164
            'year': day.year,
3165
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3166
        }
3167
3168
3169
class GenericTumblrV1(GenericComic):
3170
    """Generic class to retrieve comics from Tumblr using the V1 API."""
3171
3172
    @classmethod
3173
    def get_next_comic(cls, last_comic):
3174
        for p in cls.get_posts(last_comic):
3175
            comic = cls.get_comic_info(p)
3176
            if comic is not None:
3177
                yield comic
3178
3179
    @classmethod
3180
    def get_url_from_post(cls, post):
3181
        return post['url']
3182
3183
    @classmethod
3184
    def get_api_url(cls):
3185
        return urljoin_wrapper(cls.url, '/api/read/')
3186
3187
    @classmethod
3188
    def get_comic_info(cls, post):
3189
        """Get information about a particular comics."""
3190
        # print(post)
3191
        type_ = post['type']
3192
        if type_ != 'photo':
3193
            # print("Type is %s" % type_)
3194
            return None
3195
        tumblr_id = int(post['id'])
3196
        api_url = cls.get_api_url() + '?id=%d' % (tumblr_id)
3197
        day = datetime.datetime.fromtimestamp(int(post['unix-timestamp'])).date()
3198
        caption = post.find('photo-caption')
3199
        title = caption.string if caption else ""
3200
        tags = ' '.join(t.string for t in post.find_all('tag'))
3201
        # Photos may appear in 'photo' tags and/or straight in the post
3202
        photo_tags = post.find_all('photo')
3203
        if not photo_tags:
3204
            photo_tags = [post]
3205
        # Images are in multiple resolutions - taking the first one
3206
        imgs = [photo.find('photo-url') for photo in photo_tags]
3207
        return {
3208
            'url': cls.get_url_from_post(post),
3209
            'url2': post['url-with-slug'],
3210
            'day': day.day,
3211
            'month': day.month,
3212
            'year': day.year,
3213
            'title': title,
3214
            'tags': tags,
3215
            'img': [i.string for i in imgs],
3216
            'tumblr-id': tumblr_id,
3217
            'api_url': api_url,  # for debug purposes
3218
        }
3219
3220
    @classmethod
3221
    def get_posts(cls, last_comic, nb_post_per_call=10):
3222
        """Get posts using API. nb_post_per_call is max 50.
3223
3224
        Posts are retrieved from newer to older as per the tumblr v1 api
3225
        but are returned in chronological order."""
3226
        waiting_for_url = last_comic['url'] if last_comic else None
3227
        posts_acc = []
3228
        if last_comic is not None:
3229
            # Sometimes, tumblr posts are deleted. We previous post is deleted, we
3230
            # might end up spending a lot of time looking for something that
3231
            # doesn't exist. Failing early and clearly might be a better option.
3232
            last_api_url = last_comic['api_url']
3233
            try:
3234
                get_soup_at_url(last_api_url)
3235
            except urllib.error.HTTPError:
3236
                try:
3237
                    get_soup_at_url(cls.url)
3238
                except urllib.error.HTTPError:
3239
                    print("Did not find previous post nor main url %s" % cls.url)
3240
                else:
3241
                    print("Did not find previous post %s : it might have been deleted" % last_api_url)
3242
                return reversed(posts_acc)
3243
        api_url = cls.get_api_url()
3244
        posts = get_soup_at_url(api_url).find('posts')
3245
        start, total = int(posts['start']), int(posts['total'])
3246
        assert start == 0
3247
        for starting_num in range(0, total, nb_post_per_call):
3248
            api_url2 = api_url + '?start=%d&num=%d' % (starting_num, nb_post_per_call)
3249
            # print(api_url2)
3250
            posts2 = get_soup_at_url(api_url2).find('posts')
3251
            start2, total2 = int(posts2['start']), int(posts2['total'])
3252
            assert starting_num == start2, "%d != %d" % (starting_num, start2)
3253
            # This may happen and should be handled in the future
3254
            assert total == total2, "%d != %d" % (total, total2)
3255
            for p in posts2.find_all('post'):
3256
                if waiting_for_url and waiting_for_url == cls.get_url_from_post(p):
3257
                    return reversed(posts_acc)
3258
                posts_acc.append(p)
3259
        if waiting_for_url is None:
3260
            return reversed(posts_acc)
3261
        print("Did not find %s : there might be a problem" % waiting_for_url)
3262
        return []
3263
3264
3265
class IrwinCardozo(GenericTumblrV1):
3266
    """Class to retrieve Irwin Cardozo Comics."""
3267
    name = 'irwinc'
3268
    long_name = 'Irwin Cardozo'
3269
    url = 'http://irwincardozocomics.tumblr.com'
3270
3271
3272
class AccordingToDevin(GenericTumblrV1):
3273
    """Class to retrieve According To Devin comics."""
3274
    name = 'devin'
3275
    long_name = 'According To Devin'
3276
    url = 'http://accordingtodevin.tumblr.com'
3277
3278
3279
class ItsTheTieTumblr(GenericTumblrV1):
3280
    """Class to retrieve It's the tie comics."""
3281
    # Also on http://itsthetie.com
3282
    # Also on https://tapastic.com/series/itsthetie
3283
    name = 'tie-tumblr'
3284
    long_name = "It's the tie (from Tumblr)"
3285
    url = "http://itsthetie.tumblr.com"
3286
3287
3288
class OctopunsTumblr(GenericTumblrV1):
3289
    """Class to retrieve Octopuns comics."""
3290
    # Also on http://www.octopuns.net
3291
    name = 'octopuns-tumblr'
3292
    long_name = 'Octopuns (from Tumblr)'
3293
    url = 'http://octopuns.tumblr.com'
3294
3295
3296
class PicturesInBoxesTumblr(GenericTumblrV1):
3297
    """Class to retrieve Pictures In Boxes comics."""
3298
    # Also on http://www.picturesinboxes.com
3299
    name = 'picturesinboxes-tumblr'
3300
    long_name = 'Pictures in Boxes (from Tumblr)'
3301
    url = 'http://picturesinboxescomic.tumblr.com'
3302
3303
3304
class TubeyToonsTumblr(GenericTumblrV1):
3305
    """Class to retrieve TubeyToons comics."""
3306
    # Also on http://tapastic.com/series/Tubey-Toons
3307
    # Also on http://tubeytoons.com
3308
    name = 'tubeytoons-tumblr'
3309
    long_name = 'Tubey Toons (from Tumblr)'
3310
    url = 'http://tubeytoons.tumblr.com'
3311
3312
3313
class UnearthedComicsTumblr(GenericTumblrV1):
3314
    """Class to retrieve Unearthed comics."""
3315
    # Also on http://tapastic.com/series/UnearthedComics
3316
    # Also on http://unearthedcomics.com
3317
    name = 'unearthed-tumblr'
3318
    long_name = 'Unearthed Comics (from Tumblr)'
3319
    url = 'http://unearthedcomics.tumblr.com'
3320
3321
3322
class PieComic(GenericTumblrV1):
3323
    """Class to retrieve Pie Comic comics."""
3324
    name = 'pie'
3325
    long_name = 'Pie Comic'
3326
    url = "http://piecomic.tumblr.com"
3327
3328
3329
class MrEthanDiamond(GenericTumblrV1):
3330
    """Class to retrieve Mr Ethan Diamond comics."""
3331
    name = 'diamond'
3332
    long_name = 'Mr Ethan Diamond'
3333
    url = 'http://mrethandiamond.tumblr.com'
3334
3335
3336
class Flocci(GenericTumblrV1):
3337
    """Class to retrieve floccinaucinihilipilification comics."""
3338
    name = 'flocci'
3339
    long_name = 'floccinaucinihilipilification'
3340
    url = "http://floccinaucinihilipilificationa.tumblr.com"
3341
3342
3343
class UpAndOut(GenericTumblrV1):
3344
    """Class to retrieve Up & Out comics."""
3345
    # Also on http://tapastic.com/series/UP-and-OUT
3346
    name = 'upandout'
3347
    long_name = 'Up And Out (from Tumblr)'
3348
    url = 'http://upandoutcomic.tumblr.com'
3349
3350
3351
class Pundemonium(GenericTumblrV1):
3352
    """Class to retrieve Pundemonium comics."""
3353
    name = 'pundemonium'
3354
    long_name = 'Pundemonium'
3355
    url = 'http://monstika.tumblr.com'
3356
3357
3358
class PoorlyDrawnLinesTumblr(GenericTumblrV1):
3359
    """Class to retrieve Poorly Drawn Lines comics."""
3360
    # Also on http://poorlydrawnlines.com
3361
    name = 'poorlydrawn-tumblr'
3362
    long_name = 'Poorly Drawn Lines (from Tumblr)'
3363
    url = 'http://pdlcomics.tumblr.com'
3364
3365
3366
class PearShapedComics(GenericTumblrV1):
3367
    """Class to retrieve Pear Shaped Comics."""
3368
    name = 'pearshaped'
3369
    long_name = 'Pear-Shaped Comics'
3370
    url = 'http://pearshapedcomics.com'
3371
3372
3373
class PondScumComics(GenericTumblrV1):
3374
    """Class to retrieve Pond Scum Comics."""
3375
    name = 'pond'
3376
    long_name = 'Pond Scum'
3377
    url = 'http://pondscumcomic.tumblr.com'
3378
3379
3380
class MercworksTumblr(GenericTumblrV1):
3381
    """Class to retrieve Mercworks comics."""
3382
    # Also on http://mercworks.net
3383
    name = 'mercworks-tumblr'
3384
    long_name = 'Mercworks (from Tumblr)'
3385
    url = 'http://mercworks.tumblr.com'
3386
3387
3388
class OwlTurdTumblr(GenericTumblrV1):
3389
    """Class to retrieve Owl Turd comics."""
3390
    # Also on http://tapastic.com/series/Owl-Turd-Comix
3391
    name = 'owlturd-tumblr'
3392
    long_name = 'Owl Turd (from Tumblr)'
3393
    url = 'http://owlturd.com'
3394
3395
3396
class VectorBelly(GenericTumblrV1):
3397
    """Class to retrieve Vector Belly comics."""
3398
    # Also on http://vectorbelly.com
3399
    name = 'vector'
3400
    long_name = 'Vector Belly'
3401
    url = 'http://vectorbelly.tumblr.com'
3402
3403
3404
class GoneIntoRapture(GenericTumblrV1):
3405
    """Class to retrieve Gone Into Rapture comics."""
3406
    # Also on http://goneintorapture.tumblr.com
3407
    # Also on http://tapastic.com/series/Goneintorapture
3408
    name = 'rapture'
3409
    long_name = 'Gone Into Rapture'
3410
    url = 'http://www.goneintorapture.com'
3411
3412
3413
class TheOatmealTumblr(GenericTumblrV1):
3414
    """Class to retrieve The Oatmeal comics."""
3415
    # Also on http://theoatmeal.com
3416
    name = 'oatmeal-tumblr'
3417
    long_name = 'The Oatmeal (from Tumblr)'
3418
    url = 'http://oatmeal.tumblr.com'
3419
3420
3421
class HeckIfIKnowComicsTumblr(GenericTumblrV1):
3422
    """Class to retrieve Heck If I Know Comics."""
3423
    # Also on http://tapastic.com/series/Regular
3424
    name = 'heck-tumblr'
3425
    long_name = 'Heck if I Know comics (from Tumblr)'
3426
    url = 'http://heckifiknowcomics.com'
3427
3428
3429
class MyJetPack(GenericTumblrV1):
3430
    """Class to retrieve My Jet Pack comics."""
3431
    name = 'jetpack'
3432
    long_name = 'My Jet Pack'
3433
    url = 'http://myjetpack.tumblr.com'
3434
3435
3436
class CheerUpEmoKidTumblr(GenericTumblrV1):
3437
    """Class to retrieve CheerUpEmoKid comics."""
3438
    # Also on http://www.cheerupemokid.com
3439
    # Also on http://tapastic.com/series/CUEK
3440
    name = 'cuek-tumblr'
3441
    long_name = 'Cheer Up Emo Kid (from Tumblr)'
3442
    url = 'http://enzocomics.tumblr.com'
3443
3444
3445
class ForLackOfABetterComic(GenericTumblrV1):
3446
    """Class to retrieve For Lack Of A Better Comics."""
3447
    # Also on http://forlackofabettercomic.com
3448
    name = 'lack'
3449
    long_name = 'For Lack Of A Better Comic'
3450
    url = 'http://forlackofabettercomic.tumblr.com'
3451
3452
3453
class ZenPencilsTumblr(GenericTumblrV1):
3454
    """Class to retrieve ZenPencils comics."""
3455
    # Also on http://zenpencils.com
3456
    # Also on http://www.gocomics.com/zen-pencils
3457
    name = 'zenpencils-tumblr'
3458
    long_name = 'Zen Pencils (from Tumblr)'
3459
    url = 'http://zenpencils.tumblr.com'
3460
3461
3462
class ThreeWordPhraseTumblr(GenericTumblrV1):
3463
    """Class to retrieve Three Word Phrase comics."""
3464
    # Also on http://threewordphrase.com
3465
    name = 'threeword-tumblr'
3466
    long_name = 'Three Word Phrase (from Tumblr)'
3467
    url = 'http://www.threewordphrase.tumblr.com'
3468
3469
3470
class TimeTrabbleTumblr(GenericTumblrV1):
3471
    """Class to retrieve Time Trabble comics."""
3472
    # Also on http://timetrabble.com
3473
    name = 'timetrabble-tumblr'
3474
    long_name = 'Time Trabble (from Tumblr)'
3475
    url = 'http://timetrabble.tumblr.com'
3476
3477
3478
class SafelyEndangeredTumblr(GenericTumblrV1):
3479
    """Class to retrieve Safely Endangered comics."""
3480
    # Also on http://www.safelyendangered.com
3481
    name = 'endangered-tumblr'
3482
    long_name = 'Safely Endangered (from Tumblr)'
3483
    url = 'http://tumblr.safelyendangered.com'
3484
3485
3486
class MouseBearComedyTumblr(GenericTumblrV1):
3487
    """Class to retrieve Mouse Bear Comedy comics."""
3488
    # Also on http://www.mousebearcomedy.com
3489
    name = 'mousebear-tumblr'
3490
    long_name = 'Mouse Bear Comedy (from Tumblr)'
3491
    url = 'http://mousebearcomedy.tumblr.com'
3492
3493
3494
class BouletCorpTumblr(GenericTumblrV1):
3495
    """Class to retrieve BouletCorp comics."""
3496
    # Also on http://www.bouletcorp.com
3497
    name = 'boulet-tumblr'
3498
    long_name = 'Boulet Corp (from Tumblr)'
3499
    url = 'http://bouletcorp.tumblr.com'
3500
3501
3502
class TheAwkwardYetiTumblr(GenericTumblrV1):
3503
    """Class to retrieve The Awkward Yeti comics."""
3504
    # Also on http://www.gocomics.com/the-awkward-yeti
3505
    # Also on http://theawkwardyeti.com
3506
    # Also on https://tapastic.com/series/TheAwkwardYeti
3507
    name = 'yeti-tumblr'
3508
    long_name = 'The Awkward Yeti (from Tumblr)'
3509
    url = 'http://larstheyeti.tumblr.com'
3510
3511
3512
class NellucNhoj(GenericTumblrV1):
3513
    """Class to retrieve NellucNhoj comics."""
3514
    name = 'nhoj'
3515
    long_name = 'Nelluc Nhoj'
3516
    url = 'http://nellucnhoj.com'
3517
3518
3519
class DownTheUpwardSpiralTumblr(GenericTumblrV1):
3520
    """Class to retrieve Down The Upward Spiral comics."""
3521
    # Also on http://www.downtheupwardspiral.com
3522
    name = 'spiral-tumblr'
3523
    long_name = 'Down the Upward Spiral (from Tumblr)'
3524
    url = 'http://downtheupwardspiral.tumblr.com'
3525
3526
3527
class AsPerUsualTumblr(GenericTumblrV1):
3528
    """Class to retrieve As Per Usual comics."""
3529
    # Also on https://tapastic.com/series/AsPerUsual
3530
    name = 'usual-tumblr'
3531
    long_name = 'As Per Usual (from Tumblr)'
3532
    url = 'http://as-per-usual.tumblr.com'
3533
3534
3535
class OneOneOneOneComicTumblr(GenericTumblrV1):
3536
    """Class to retrieve 1111 Comics."""
3537
    # Also on http://www.1111comics.me
3538
    # Also on https://tapastic.com/series/1111-Comics
3539
    name = '1111-tumblr'
3540
    long_name = '1111 Comics (from Tumblr)'
3541
    url = 'http://comics1111.tumblr.com'
3542
3543
3544
class JhallComicsTumblr(GenericTumblrV1):
3545
    """Class to retrieve Jhall Comics."""
3546
    # Also on http://jhallcomics.com
3547
    name = 'jhall-tumblr'
3548
    long_name = 'Jhall Comics (from Tumblr)'
3549
    url = 'http://jhallcomics.tumblr.com'
3550
3551
3552
class BerkeleyMewsTumblr(GenericTumblrV1):
3553
    """Class to retrieve Berkeley Mews comics."""
3554
    # Also on http://www.gocomics.com/berkeley-mews
3555
    # Also on http://www.berkeleymews.com
3556
    name = 'berkeley-tumblr'
3557
    long_name = 'Berkeley Mews (from Tumblr)'
3558
    url = 'http://mews.tumblr.com'
3559
3560
3561
class JoanCornellaTumblr(GenericTumblrV1):
3562
    """Class to retrieve Joan Cornella comics."""
3563
    # Also on http://joancornella.net
3564
    name = 'cornella-tumblr'
3565
    long_name = 'Joan Cornella (from Tumblr)'
3566
    url = 'http://cornellajoan.tumblr.com'
3567
3568
3569
class RespawnComicTumblr(GenericTumblrV1):
3570
    """Class to retrieve Respawn Comic."""
3571
    # Also on http://respawncomic.com
3572
    name = 'respawn-tumblr'
3573
    long_name = 'Respawn Comic (from Tumblr)'
3574
    url = 'http://respawncomic.tumblr.com'
3575
3576
3577
class ChrisHallbeckTumblr(GenericTumblrV1):
3578
    """Class to retrieve Chris Hallbeck comics."""
3579
    # Also on https://tapastic.com/ChrisHallbeck
3580
    # Also on http://maximumble.com
3581
    # Also on http://minimumble.com
3582
    # Also on http://thebookofbiff.com
3583
    name = 'hallbeck-tumblr'
3584
    long_name = 'Chris Hallback (from Tumblr)'
3585
    url = 'http://chrishallbeck.tumblr.com'
3586
3587
3588
class ComicNuggets(GenericTumblrV1):
3589
    """Class to retrieve Comic Nuggets."""
3590
    name = 'nuggets'
3591
    long_name = 'Comic Nuggets'
3592
    url = 'http://comicnuggets.com'
3593
3594
3595
class PigeonGazetteTumblr(GenericTumblrV1):
3596
    """Class to retrieve The Pigeon Gazette comics."""
3597
    # Also on https://tapastic.com/series/The-Pigeon-Gazette
3598
    name = 'pigeon-tumblr'
3599
    long_name = 'The Pigeon Gazette (from Tumblr)'
3600
    url = 'http://thepigeongazette.tumblr.com'
3601
3602
3603
class CancerOwl(GenericTumblrV1):
3604
    """Class to retrieve Cancer Owl comics."""
3605
    # Also on http://cancerowl.com
3606
    name = 'cancerowl-tumblr'
3607
    long_name = 'Cancer Owl (from Tumblr)'
3608
    url = 'http://cancerowl.tumblr.com'
3609
3610
3611
class FowlLanguageTumblr(GenericTumblrV1):
3612
    """Class to retrieve Fowl Language comics."""
3613
    # Also on http://www.fowllanguagecomics.com
3614
    # Also on http://tapastic.com/series/Fowl-Language-Comics
3615
    # Also on http://www.gocomics.com/fowl-language
3616
    name = 'fowllanguage-tumblr'
3617
    long_name = 'Fowl Language Comics (from Tumblr)'
3618
    url = 'http://fowllanguagecomics.tumblr.com'
3619
3620
3621
class TheOdd1sOutTumblr(GenericTumblrV1):
3622
    """Class to retrieve The Odd 1s Out comics."""
3623
    # Also on http://theodd1sout.com
3624
    # Also on https://tapastic.com/series/Theodd1sout
3625
    name = 'theodd-tumblr'
3626
    long_name = 'The Odd 1s Out (from Tumblr)'
3627
    url = 'http://theodd1sout.tumblr.com'
3628
3629
3630
class TheUnderfoldTumblr(GenericTumblrV1):
3631
    """Class to retrieve The Underfold comics."""
3632
    # Also on http://theunderfold.com
3633
    name = 'underfold-tumblr'
3634
    long_name = 'The Underfold (from Tumblr)'
3635
    url = 'http://theunderfold.tumblr.com'
3636
3637
3638
class LolNeinTumblr(GenericTumblrV1):
3639
    """Class to retrieve Lol Nein comics."""
3640
    # Also on http://lolnein.com
3641
    name = 'lolnein-tumblr'
3642
    long_name = 'Lol Nein (from Tumblr)'
3643
    url = 'http://lolneincom.tumblr.com'
3644
3645
3646
class FatAwesomeComicsTumblr(GenericTumblrV1):
3647
    """Class to retrieve Fat Awesome Comics."""
3648
    # Also on http://fatawesome.com/comics
3649
    name = 'fatawesome-tumblr'
3650
    long_name = 'Fat Awesome (from Tumblr)'
3651
    url = 'http://fatawesomecomedy.tumblr.com'
3652
3653
3654
class TheWorldIsFlatTumblr(GenericTumblrV1):
3655
    """Class to retrieve The World Is Flat Comics."""
3656
    # Also on https://tapastic.com/series/The-World-is-Flat
3657
    name = 'flatworld-tumblr'
3658
    long_name = 'The World Is Flat (from Tumblr)'
3659
    url = 'http://theworldisflatcomics.tumblr.com'
3660
3661
3662
class DorrisMc(GenericTumblrV1):
3663
    """Class to retrieve Dorris Mc Comics"""
3664
    # Also on http://www.gocomics.com/dorris-mccomics
3665
    name = 'dorrismc'
3666
    long_name = 'Dorris Mc'
3667
    url = 'http://dorrismccomics.com'
3668
3669
3670
class LeleozTumblr(GenericEmptyComic, GenericTumblrV1):
3671
    """Class to retrieve Leleoz comics."""
3672
    # Also on https://tapastic.com/series/Leleoz
3673
    name = 'leleoz-tumblr'
3674
    long_name = 'Leleoz (from Tumblr)'
3675
    url = 'http://leleozcomics.tumblr.com'
3676
3677
3678
class MoonBeardTumblr(GenericTumblrV1):
3679
    """Class to retrieve MoonBeard comics."""
3680
    # Also on http://moonbeard.com
3681
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
3682
    name = 'moonbeard-tumblr'
3683
    long_name = 'Moon Beard (from Tumblr)'
3684
    url = 'http://blog.squiresjam.es/moonbeard'
3685
3686
3687
class AComik(GenericTumblrV1):
3688
    """Class to retrieve A Comik"""
3689
    name = 'comik'
3690
    long_name = 'A Comik'
3691
    url = 'http://acomik.com'
3692
3693
3694
class ClassicRandy(GenericTumblrV1):
3695
    """Class to retrieve Classic Randy comics."""
3696
    name = 'randy'
3697
    long_name = 'Classic Randy'
3698
    url = 'http://classicrandy.tumblr.com'
3699
3700
3701
class DagssonTumblr(GenericTumblrV1):
3702
    """Class to retrieve Dagsson comics."""
3703
    # Also on http://www.dagsson.com
3704
    name = 'dagsson-tumblr'
3705
    long_name = 'Dagsson Hugleikur (from Tumblr)'
3706
    url = 'http://hugleikurdagsson.tumblr.com'
3707
3708
3709
class LinsEditionsTumblr(GenericTumblrV1):
3710
    """Class to retrieve L.I.N.S. Editions comics."""
3711
    # Also on https://linsedition.com
3712
    name = 'lins-tumblr'
3713
    long_name = 'L.I.N.S. Editions (from Tumblr)'
3714
    url = 'http://linscomics.tumblr.com'
3715
3716
3717
class OrigamiHotDish(GenericTumblrV1):
3718
    """Class to retrieve Origami Hot Dish comics."""
3719
    name = 'origamihotdish'
3720
    long_name = 'Origami Hot Dish'
3721
    url = 'http://origamihotdish.com'
3722
3723
3724
class HitAndMissComicsTumblr(GenericTumblrV1):
3725
    """Class to retrieve Hit and Miss Comics."""
3726
    name = 'hitandmiss'
3727
    long_name = 'Hit and Miss Comics'
3728
    url = 'http://hitandmisscomics.tumblr.com'
3729
3730
3731
class HMBlanc(GenericTumblrV1):
3732
    """Class to retrieve HM Blanc comics."""
3733
    name = 'hmblanc'
3734
    long_name = 'HM Blanc'
3735
    url = 'http://hmblanc.tumblr.com'
3736
3737
3738
class TalesOfAbsurdityTumblr(GenericTumblrV1):
3739
    """Class to retrieve Tales Of Absurdity comics."""
3740
    # Also on http://talesofabsurdity.com
3741
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
3742
    name = 'absurdity-tumblr'
3743
    long_name = 'Tales of Absurdity (from Tumblr)'
3744
    url = 'http://talesofabsurdity.tumblr.com'
3745
3746
3747
class RobbieAndBobby(GenericTumblrV1):
3748
    """Class to retrieve Robbie And Bobby comics."""
3749
    # Also on http://robbieandbobby.com
3750
    name = 'robbie-tumblr'
3751
    long_name = 'Robbie And Bobby (from Tumblr)'
3752
    url = 'http://robbieandbobby.tumblr.com'
3753
3754
3755
class ElectricBunnyComicTumblr(GenericTumblrV1):
3756
    """Class to retrieve Electric Bunny Comics."""
3757
    # Also on http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell
3758
    name = 'bunny-tumblr'
3759
    long_name = 'Electric Bunny Comic (from Tumblr)'
3760
    url = 'http://electricbunnycomics.tumblr.com'
3761
3762
3763
class Hoomph(GenericTumblrV1):
3764
    """Class to retrieve Hoomph comics."""
3765
    name = 'hoomph'
3766
    long_name = 'Hoomph'
3767
    url = 'http://hoom.ph'
3768
3769
3770
class BFGFSTumblr(GenericTumblrV1):
3771
    """Class to retrieve BFGFS comics."""
3772
    # Also on https://tapastic.com/series/BFGFS
3773
    # Also on http://bfgfs.com
3774
    name = 'bfgfs-tumblr'
3775
    long_name = 'BFGFS (from Tumblr)'
3776
    url = 'http://bfgfs.tumblr.com'
3777
3778
3779
class DoodleForFood(GenericTumblrV1):
3780
    """Class to retrieve Doodle For Food comics."""
3781
    # Also on  http://doodleforfood.com
3782
    name = 'doodle'
3783
    long_name = 'Doodle For Food'
3784
    url = 'http://doodleforfood.com'
3785
3786
3787
class HorovitzComics(GenericListableComic):
3788
    """Generic class to handle the logic common to the different comics from Horovitz."""
3789
    url = 'http://www.horovitzcomics.com'
3790
    img_re = re.compile('.*comics/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
3791
    link_re = NotImplemented
3792
    get_url_from_archive_element = join_cls_url_to_href
3793
3794
    @classmethod
3795
    def get_comic_info(cls, soup, link):
3796
        """Get information about a particular comics."""
3797
        href = link['href']
3798
        num = int(cls.link_re.match(href).groups()[0])
3799
        title = link.string
3800
        imgs = soup.find_all('img', id='comic')
3801
        assert len(imgs) == 1
3802
        year, month, day = [int(s)
3803
                            for s in cls.img_re.match(imgs[0]['src']).groups()]
3804
        return {
3805
            'title': title,
3806
            'day': day,
3807
            'month': month,
3808
            'year': year,
3809
            'img': [i['src'] for i in imgs],
3810
            'num': num,
3811
        }
3812
3813
    @classmethod
3814
    def get_archive_elements(cls):
3815
        archive_url = 'http://www.horovitzcomics.com/comics/archive/'
3816
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.link_re))
3817
3818
3819
class HorovitzNew(HorovitzComics):
3820
    """Class to retrieve Horovitz new comics."""
3821
    name = 'horovitznew'
3822
    long_name = 'Horovitz New'
3823
    link_re = re.compile('^/comics/new/([0-9]+)$')
3824
3825
3826
class HorovitzClassic(HorovitzComics):
3827
    """Class to retrieve Horovitz classic comics."""
3828
    name = 'horovitzclassic'
3829
    long_name = 'Horovitz Classic'
3830
    link_re = re.compile('^/comics/classic/([0-9]+)$')
3831
3832
3833
class GenericGoComic(GenericNavigableComic):
3834
    """Generic class to handle the logic common to comics from gocomics.com."""
3835
    url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)$')
3836
3837
    @classmethod
3838
    def get_first_comic_link(cls):
3839
        """Get link to first comics."""
3840
        return get_soup_at_url(cls.url).find('a', class_='beginning')
3841
3842
    @classmethod
3843
    def get_navi_link(cls, last_soup, next_):
3844
        return last_soup.find('a', class_='next' if next_ else 'prev', href=cls.url_date_re)
3845
3846
    @classmethod
3847
    def get_url_from_link(cls, link):
3848
        gocomics = 'http://www.gocomics.com'
3849
        return urljoin_wrapper(gocomics, link['href'])
3850
3851
    @classmethod
3852
    def get_comic_info(cls, soup, link):
3853
        """Get information about a particular comics."""
3854
        url = cls.get_url_from_link(link)
3855
        year, month, day = [int(s)
3856
                            for s in cls.url_date_re.match(url).groups()]
3857
        return {
3858
            'day': day,
3859
            'month': month,
3860
            'year': year,
3861
            'img': [soup.find_all('img', class_='strip')[-1]['src']],
3862
            'author': soup.find('meta', attrs={'name': 'author'})['content']
3863
        }
3864
3865
3866
class PearlsBeforeSwine(GenericGoComic):
3867
    """Class to retrieve Pearls Before Swine comics."""
3868
    name = 'pearls'
3869
    long_name = 'Pearls Before Swine'
3870
    url = 'http://www.gocomics.com/pearlsbeforeswine'
3871
3872
3873
class Peanuts(GenericGoComic):
3874
    """Class to retrieve Peanuts comics."""
3875
    name = 'peanuts'
3876
    long_name = 'Peanuts'
3877
    url = 'http://www.gocomics.com/peanuts'
3878
3879
3880
class MattWuerker(GenericGoComic):
3881
    """Class to retrieve Matt Wuerker comics."""
3882
    name = 'wuerker'
3883
    long_name = 'Matt Wuerker'
3884
    url = 'http://www.gocomics.com/mattwuerker'
3885
3886
3887
class TomToles(GenericGoComic):
3888
    """Class to retrieve Tom Toles comics."""
3889
    name = 'toles'
3890
    long_name = 'Tom Toles'
3891
    url = 'http://www.gocomics.com/tomtoles'
3892
3893
3894
class BreakOfDay(GenericGoComic):
3895
    """Class to retrieve Break Of Day comics."""
3896
    name = 'breakofday'
3897
    long_name = 'Break Of Day'
3898
    url = 'http://www.gocomics.com/break-of-day'
3899
3900
3901
class Brevity(GenericGoComic):
3902
    """Class to retrieve Brevity comics."""
3903
    name = 'brevity'
3904
    long_name = 'Brevity'
3905
    url = 'http://www.gocomics.com/brevity'
3906
3907
3908
class MichaelRamirez(GenericGoComic):
3909
    """Class to retrieve Michael Ramirez comics."""
3910
    name = 'ramirez'
3911
    long_name = 'Michael Ramirez'
3912
    url = 'http://www.gocomics.com/michaelramirez'
3913
3914
3915
class MikeLuckovich(GenericGoComic):
3916
    """Class to retrieve Mike Luckovich comics."""
3917
    name = 'luckovich'
3918
    long_name = 'Mike Luckovich'
3919
    url = 'http://www.gocomics.com/mikeluckovich'
3920
3921
3922
class JimBenton(GenericGoComic):
3923
    """Class to retrieve Jim Benton comics."""
3924
    # Also on http://jimbenton.tumblr.com
3925
    name = 'benton'
3926
    long_name = 'Jim Benton'
3927
    url = 'http://www.gocomics.com/jim-benton-cartoons'
3928
3929
3930
class TheArgyleSweater(GenericGoComic):
3931
    """Class to retrieve the Argyle Sweater comics."""
3932
    name = 'argyle'
3933
    long_name = 'Argyle Sweater'
3934
    url = 'http://www.gocomics.com/theargylesweater'
3935
3936
3937
class SunnyStreet(GenericGoComic):
3938
    """Class to retrieve Sunny Street comics."""
3939
    # Also on http://www.sunnystreetcomics.com
3940
    name = 'sunny'
3941
    long_name = 'Sunny Street'
3942
    url = 'http://www.gocomics.com/sunny-street'
3943
3944
3945
class OffTheMark(GenericGoComic):
3946
    """Class to retrieve Off The Mark comics."""
3947
    # Also on https://www.offthemark.com
3948
    name = 'offthemark'
3949
    long_name = 'Off The Mark'
3950
    url = 'http://www.gocomics.com/offthemark'
3951
3952
3953
class WuMo(GenericGoComic):
3954
    """Class to retrieve WuMo comics."""
3955
    # Also on http://wumo.com
3956
    name = 'wumo'
3957
    long_name = 'WuMo'
3958
    url = 'http://www.gocomics.com/wumo'
3959
3960
3961
class LunarBaboon(GenericGoComic):
3962
    """Class to retrieve Lunar Baboon comics."""
3963
    # Also on http://www.lunarbaboon.com
3964
    # Also on https://tapastic.com/series/Lunarbaboon
3965
    name = 'lunarbaboon'
3966
    long_name = 'Lunar Baboon'
3967
    url = 'http://www.gocomics.com/lunarbaboon'
3968
3969
3970
class SandersenGocomic(GenericGoComic):
3971
    """Class to retrieve Sarah Andersen comics."""
3972
    # Also on http://sarahcandersen.com
3973
    # Also on http://tapastic.com/series/Doodle-Time
3974
    name = 'sandersen-goc'
3975
    long_name = 'Sarah Andersen (from GoComics)'
3976
    url = 'http://www.gocomics.com/sarahs-scribbles'
3977
3978
3979
class CalvinAndHobbesGoComic(GenericGoComic):
3980
    """Class to retrieve Calvin and Hobbes comics."""
3981
    # From gocomics, not http://marcel-oehler.marcellosendos.ch/comics/ch/
3982
    name = 'calvin-goc'
3983
    long_name = 'Calvin and Hobbes (from GoComics)'
3984
    url = 'http://www.gocomics.com/calvinandhobbes'
3985
3986
3987
class RallGoComic(GenericGoComic):
3988
    """Class to retrieve Ted Rall comics."""
3989
    # Also on http://rall.com/comic
3990
    name = 'rall-goc'
3991
    long_name = "Ted Rall (from GoComics)"
3992
    url = "http://www.gocomics.com/tedrall"
3993
3994
3995
class TheAwkwardYetiGoComic(GenericGoComic):
3996
    """Class to retrieve The Awkward Yeti comics."""
3997
    # Also on http://larstheyeti.tumblr.com
3998
    # Also on http://theawkwardyeti.com
3999
    # Also on https://tapastic.com/series/TheAwkwardYeti
4000
    name = 'yeti-goc'
4001
    long_name = 'The Awkward Yeti (from GoComics)'
4002
    url = 'http://www.gocomics.com/the-awkward-yeti'
4003
4004
4005
class BerkeleyMewsGoComics(GenericGoComic):
4006
    """Class to retrieve Berkeley Mews comics."""
4007
    # Also on http://mews.tumblr.com
4008
    # Also on http://www.berkeleymews.com
4009
    name = 'berkeley-goc'
4010
    long_name = 'Berkeley Mews (from GoComics)'
4011
    url = 'http://www.gocomics.com/berkeley-mews'
4012
4013
4014
class SheldonGoComics(GenericGoComic):
4015
    """Class to retrieve Sheldon comics."""
4016
    # Also on http://www.sheldoncomics.com
4017
    name = 'sheldon-goc'
4018
    long_name = 'Sheldon Comics (from GoComics)'
4019
    url = 'http://www.gocomics.com/sheldon'
4020
4021
4022
class FowlLanguageGoComics(GenericGoComic):
4023
    """Class to retrieve Fowl Language comics."""
4024
    # Also on http://www.fowllanguagecomics.com
4025
    # Also on http://tapastic.com/series/Fowl-Language-Comics
4026
    # Also on http://fowllanguagecomics.tumblr.com
4027
    name = 'fowllanguage-goc'
4028
    long_name = 'Fowl Language Comics (from GoComics)'
4029
    url = 'http://www.gocomics.com/fowl-language'
4030
4031
4032
class NickAnderson(GenericGoComic):
4033
    """Class to retrieve Nick Anderson comics."""
4034
    name = 'nickanderson'
4035
    long_name = 'Nick Anderson'
4036
    url = 'http://www.gocomics.com/nickanderson'
4037
4038
4039
class GarfieldGoComics(GenericGoComic):
4040
    """Class to retrieve Garfield comics."""
4041
    # Also on http://garfield.com
4042
    name = 'garfield-goc'
4043
    long_name = 'Garfield (from GoComics)'
4044
    url = 'http://www.gocomics.com/garfield'
4045
4046
4047
class DorrisMcGoComics(GenericGoComic):
4048
    """Class to retrieve Dorris Mc Comics"""
4049
    # Also on http://dorrismccomics.com
4050
    name = 'dorrismc-goc'
4051
    long_name = 'Dorris Mc (from GoComics)'
4052
    url = 'http://www.gocomics.com/dorris-mccomics'
4053
4054
4055
class MisterAndMeGoComics(GenericGoComic):
4056
    """Class to retrieve Mister & Me Comics."""
4057
    # Also on http://www.mister-and-me.com
4058
    # Also on https://tapastic.com/series/Mister-and-Me
4059
    name = 'mister-goc'
4060
    long_name = 'Mister & Me (from GoComics)'
4061
    url = 'http://www.gocomics.com/mister-and-me'
4062
4063
4064
class GenericTapasticComic(GenericListableComic):
4065
    """Generic class to handle the logic common to comics from tapastic.com."""
4066
4067
    @classmethod
4068
    def get_comic_info(cls, soup, archive_elt):
4069
        """Get information about a particular comics."""
4070
        timestamp = int(archive_elt['publishDate']) / 1000.0
4071
        day = datetime.datetime.fromtimestamp(timestamp).date()
4072
        imgs = soup.find_all('img', class_='art-image')
4073
        if not imgs:
4074
            print("Comic %s is being uploaded, retry later" % cls.get_url_from_archive_element(archive_elt))
4075
            return None
4076
        assert len(imgs) > 0
4077
        return {
4078
            'day': day.day,
4079
            'year': day.year,
4080
            'month': day.month,
4081
            'img': [i['src'] for i in imgs],
4082
            'title': archive_elt['title'],
4083
        }
4084
4085
    @classmethod
4086
    def get_url_from_archive_element(cls, archive_elt):
4087
        return 'http://tapastic.com/episode/' + str(archive_elt['id'])
4088
4089
    @classmethod
4090
    def get_archive_elements(cls):
4091
        pref, suff = 'episodeList : ', ','
4092
        # Information is stored in the javascript part
4093
        # I don't know the clean way to get it so this is the ugly way.
4094
        string = [s[len(pref):-len(suff)] for s in (s.decode('utf-8').strip() for s in urlopen_wrapper(cls.url).readlines()) if s.startswith(pref) and s.endswith(suff)][0]
4095
        return json.loads(string)
4096
4097
4098
class VegetablesForDessert(GenericTapasticComic):
4099
    """Class to retrieve Vegetables For Dessert comics."""
4100
    # Also on http://vegetablesfordessert.tumblr.com
4101
    name = 'vegetables'
4102
    long_name = 'Vegetables For Dessert'
4103
    url = 'http://tapastic.com/series/vegetablesfordessert'
4104
4105
4106
class FowlLanguageTapa(GenericTapasticComic):
4107
    """Class to retrieve Fowl Language comics."""
4108
    # Also on http://www.fowllanguagecomics.com
4109
    # Also on http://fowllanguagecomics.tumblr.com
4110
    # Also on http://www.gocomics.com/fowl-language
4111
    name = 'fowllanguage-tapa'
4112
    long_name = 'Fowl Language Comics (from Tapastic)'
4113
    url = 'http://tapastic.com/series/Fowl-Language-Comics'
4114
4115
4116
class OscillatingProfundities(GenericTapasticComic):
4117
    """Class to retrieve Oscillating Profundities comics."""
4118
    name = 'oscillating'
4119
    long_name = 'Oscillating Profundities'
4120
    url = 'http://tapastic.com/series/oscillatingprofundities'
4121
4122
4123
class ZnoflatsComics(GenericTapasticComic):
4124
    """Class to retrieve Znoflats comics."""
4125
    name = 'znoflats'
4126
    long_name = 'Znoflats Comics'
4127
    url = 'http://tapastic.com/series/Znoflats-Comics'
4128
4129
4130
class SandersenTapastic(GenericTapasticComic):
4131
    """Class to retrieve Sarah Andersen comics."""
4132
    # Also on http://sarahcandersen.com
4133
    # Also on http://www.gocomics.com/sarahs-scribbles
4134
    name = 'sandersen-tapa'
4135
    long_name = 'Sarah Andersen (from Tapastic)'
4136
    url = 'http://tapastic.com/series/Doodle-Time'
4137
4138
4139
class TubeyToonsTapastic(GenericTapasticComic):
4140
    """Class to retrieve TubeyToons comics."""
4141
    # Also on http://tubeytoons.com
4142
    # Also on http://tubeytoons.tumblr.com
4143
    name = 'tubeytoons-tapa'
4144
    long_name = 'Tubey Toons (from Tapastic)'
4145
    url = 'http://tapastic.com/series/Tubey-Toons'
4146
4147
4148
class AnythingComicTapastic(GenericTapasticComic):
4149
    """Class to retrieve Anything Comics."""
4150
    # Also on http://www.anythingcomic.com
4151
    name = 'anythingcomic-tapa'
4152
    long_name = 'Anything Comic (from Tapastic)'
4153
    url = 'http://tapastic.com/series/anything'
4154
4155
4156
class UnearthedComicsTapastic(GenericTapasticComic):
4157
    """Class to retrieve Unearthed comics."""
4158
    # Also on http://unearthedcomics.com
4159
    # Also on http://unearthedcomics.tumblr.com
4160
    name = 'unearthed-tapa'
4161
    long_name = 'Unearthed Comics (from Tapastic)'
4162
    url = 'http://tapastic.com/series/UnearthedComics'
4163
4164
4165
class EverythingsStupidTapastic(GenericTapasticComic):
4166
    """Class to retrieve Everything's stupid Comics."""
4167
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
4168
    # Also on http://everythingsstupid.net
4169
    name = 'stupid-tapa'
4170
    long_name = "Everything's Stupid (from Tapastic)"
4171
    url = 'http://tapastic.com/series/EverythingsStupid'
4172
4173
4174
class JustSayEhTapastic(GenericTapasticComic):
4175
    """Class to retrieve Just Say Eh comics."""
4176
    # Also on http://www.justsayeh.com
4177
    name = 'justsayeh-tapa'
4178
    long_name = 'Just Say Eh (from Tapastic)'
4179
    url = 'http://tapastic.com/series/Just-Say-Eh'
4180
4181
4182
class ThorsThundershackTapastic(GenericTapasticComic):
4183
    """Class to retrieve Thor's Thundershack comics."""
4184
    # Also on http://www.thorsthundershack.com
4185
    name = 'thor-tapa'
4186
    long_name = 'Thor\'s Thundershack (from Tapastic)'
4187
    url = 'http://tapastic.com/series/Thors-Thundershac'
4188
4189
4190
class OwlTurdTapastic(GenericTapasticComic):
4191
    """Class to retrieve Owl Turd comics."""
4192
    # Also on http://owlturd.com
4193
    name = 'owlturd-tapa'
4194
    long_name = 'Owl Turd (from Tapastic)'
4195
    url = 'http://tapastic.com/series/Owl-Turd-Comix'
4196
4197
4198
class GoneIntoRaptureTapastic(GenericTapasticComic):
4199
    """Class to retrieve Gone Into Rapture comics."""
4200
    # Also on http://goneintorapture.tumblr.com
4201
    # Also on http://www.goneintorapture.com
4202
    name = 'rapture-tapa'
4203
    long_name = 'Gone Into Rapture (from Tapastic)'
4204
    url = 'http://tapastic.com/series/Goneintorapture'
4205
4206
4207
class HeckIfIKnowComicsTapa(GenericTapasticComic):
4208
    """Class to retrieve Heck If I Know Comics."""
4209
    # Also on http://heckifiknowcomics.com
4210
    name = 'heck-tapa'
4211
    long_name = 'Heck if I Know comics (from Tapastic)'
4212
    url = 'http://tapastic.com/series/Regular'
4213
4214
4215
class CheerUpEmoKidTapa(GenericTapasticComic):
4216
    """Class to retrieve CheerUpEmoKid comics."""
4217
    # Also on http://www.cheerupemokid.com
4218
    # Also on http://enzocomics.tumblr.com
4219
    name = 'cuek-tapa'
4220
    long_name = 'Cheer Up Emo Kid (from Tapastic)'
4221
    url = 'http://tapastic.com/series/CUEK'
4222
4223
4224
class BigFootJusticeTapa(GenericTapasticComic):
4225
    """Class to retrieve Big Foot Justice comics."""
4226
    # Also on http://bigfootjustice.com
4227
    name = 'bigfoot-tapa'
4228
    long_name = 'Big Foot Justice (from Tapastic)'
4229
    url = 'http://tapastic.com/series/bigfoot-justice'
4230
4231
4232
class UpAndOutTapa(GenericTapasticComic):
4233
    """Class to retrieve Up & Out comics."""
4234
    # Also on http://upandoutcomic.tumblr.com
4235
    name = 'upandout-tapa'
4236
    long_name = 'Up And Out (from Tapastic)'
4237
    url = 'http://tapastic.com/series/UP-and-OUT'
4238
4239
4240
class ToonHoleTapa(GenericTapasticComic):
4241
    """Class to retrieve Toon Holes comics."""
4242
    # Also on http://www.toonhole.com
4243
    name = 'toonhole-tapa'
4244
    long_name = 'Toon Hole (from Tapastic)'
4245
    url = 'http://tapastic.com/series/TOONHOLE'
4246
4247
4248
class AngryAtNothingTapa(GenericTapasticComic):
4249
    """Class to retrieve Angry at Nothing comics."""
4250
    # Also on http://www.angryatnothing.net
4251
    name = 'angry-tapa'
4252
    long_name = 'Angry At Nothing (from Tapastic)'
4253
    url = 'http://tapastic.com/series/Comics-yeah-definitely-comics-'
4254
4255
4256
class LeleozTapa(GenericTapasticComic):
4257
    """Class to retrieve Leleoz comics."""
4258
    # Also on http://leleozcomics.tumblr.com
4259
    name = 'leleoz-tapa'
4260
    long_name = 'Leleoz (from Tapastic)'
4261
    url = 'https://tapastic.com/series/Leleoz'
4262
4263
4264
class TheAwkwardYetiTapa(GenericTapasticComic):
4265
    """Class to retrieve The Awkward Yeti comics."""
4266
    # Also on http://www.gocomics.com/the-awkward-yeti
4267
    # Also on http://theawkwardyeti.com
4268
    # Also on http://larstheyeti.tumblr.com
4269
    name = 'yeti-tapa'
4270
    long_name = 'The Awkward Yeti (from Tapastic)'
4271
    url = 'https://tapastic.com/series/TheAwkwardYeti'
4272
4273
4274
class AsPerUsualTapa(GenericTapasticComic):
4275
    """Class to retrieve As Per Usual comics."""
4276
    # Also on http://as-per-usual.tumblr.com
4277
    name = 'usual-tapa'
4278
    long_name = 'As Per Usual (from Tapastic)'
4279
    url = 'https://tapastic.com/series/AsPerUsual'
4280
4281
4282
class OneOneOneOneComicTapa(GenericTapasticComic):
4283
    """Class to retrieve 1111 Comics."""
4284
    # Also on http://www.1111comics.me
4285
    # Also on http://comics1111.tumblr.com
4286
    name = '1111-tapa'
4287
    long_name = '1111 Comics (from Tapastic)'
4288
    url = 'https://tapastic.com/series/1111-Comics'
4289
4290
4291
class TumbleDryTapa(GenericTapasticComic):
4292
    """Class to retrieve Tumble Dry comics."""
4293
    # Also on http://tumbledrycomics.com
4294
    name = 'tumbledry-tapa'
4295
    long_name = 'Tumblr Dry (from Tapastic)'
4296
    url = 'https://tapastic.com/series/TumbleDryComics'
4297
4298
4299
class DeadlyPanelTapa(GenericTapasticComic):
4300
    """Class to retrieve Deadly Panel comics."""
4301
    # Also on http://www.deadlypanel.com
4302
    name = 'deadly-tapa'
4303
    long_name = 'Deadly Panel (from Tapastic)'
4304
    url = 'https://tapastic.com/series/deadlypanel'
4305
4306
4307
class ChrisHallbeckMaxiTapa(GenericTapasticComic):
4308
    """Class to retrieve Chris Hallbeck comics."""
4309
    # Also on http://chrishallbeck.tumblr.com
4310
    # Also on http://maximumble.com
4311
    name = 'hallbeckmaxi-tapa'
4312
    long_name = 'Chris Hallback - Maximumble (from Tapastic)'
4313
    url = 'https://tapastic.com/series/Maximumble'
4314
4315
4316
class ChrisHallbeckMiniTapa(GenericTapasticComic):
4317
    """Class to retrieve Chris Hallbeck comics."""
4318
    # Also on http://chrishallbeck.tumblr.com
4319
    # Also on http://minimumble.com
4320
    name = 'hallbeckmini-tapa'
4321
    long_name = 'Chris Hallback - Minimumble (from Tapastic)'
4322
    url = 'https://tapastic.com/series/Minimumble'
4323
4324
4325
class ChrisHallbeckBiffTapa(GenericTapasticComic):
4326
    """Class to retrieve Chris Hallbeck comics."""
4327
    # Also on http://chrishallbeck.tumblr.com
4328
    # Also on http://thebookofbiff.com
4329
    name = 'hallbeckbiff-tapa'
4330
    long_name = 'Chris Hallback - The Book of Biff (from Tapastic)'
4331
    url = 'https://tapastic.com/series/Biff'
4332
4333
4334
class RandoWisTapa(GenericTapasticComic):
4335
    """Class to retrieve RandoWis comics."""
4336
    # Also on https://randowis.com
4337
    name = 'randowis-tapa'
4338
    long_name = 'RandoWis (from Tapastic)'
4339
    url = 'https://tapastic.com/series/RandoWis'
4340
4341
4342
class PigeonGazetteTapa(GenericTapasticComic):
4343
    """Class to retrieve The Pigeon Gazette comics."""
4344
    # Also on http://thepigeongazette.tumblr.com
4345
    name = 'pigeon-tapa'
4346
    long_name = 'The Pigeon Gazette (from Tapastic)'
4347
    url = 'https://tapastic.com/series/The-Pigeon-Gazette'
4348
4349
4350
class TheOdd1sOutTapa(GenericTapasticComic):
4351
    """Class to retrieve The Odd 1s Out comics."""
4352
    # Also on http://theodd1sout.com
4353
    # Also on http://theodd1sout.tumblr.com
4354
    name = 'theodd-tapa'
4355
    long_name = 'The Odd 1s Out (from Tapastic)'
4356
    url = 'https://tapastic.com/series/Theodd1sout'
4357
4358
4359
class TheWorldIsFlatTapa(GenericTapasticComic):
4360
    """Class to retrieve The World Is Flat Comics."""
4361
    # Also on http://theworldisflatcomics.tumblr.com
4362
    name = 'flatworld-tapa'
4363
    long_name = 'The World Is Flat (from Tapastic)'
4364
    url = 'https://tapastic.com/series/The-World-is-Flat'
4365
4366
4367
class MisterAndMeTapa(GenericTapasticComic):
4368
    """Class to retrieve Mister & Me Comics."""
4369
    # Also on http://www.mister-and-me.com
4370
    # Also on http://www.gocomics.com/mister-and-me
4371
    name = 'mister-tapa'
4372
    long_name = 'Mister & Me (from Tapastic)'
4373
    url = 'https://tapastic.com/series/Mister-and-Me'
4374
4375
4376
class TalesOfAbsurdityTapa(GenericTapasticComic):
4377
    """Class to retrieve Tales Of Absurdity comics."""
4378
    # Also on http://talesofabsurdity.com
4379
    # Also on http://talesofabsurdity.tumblr.com
4380
    name = 'absurdity-tapa'
4381
    long_name = 'Tales of Absurdity (from Tapastic)'
4382
    url = 'http://tapastic.com/series/Tales-Of-Absurdity'
4383
4384
4385
class BFGFSTapa(GenericTapasticComic):
4386
    """Class to retrieve BFGFS comics."""
4387
    # Also on http://bfgfs.com
4388
    # Also on http://bfgfs.tumblr.com
4389
    name = 'bfgfs-tapa'
4390
    long_name = 'BFGFS (from Tapastic)'
4391
    url = 'https://tapastic.com/series/BFGFS'
4392
4393
4394
class DoodleForFoodTapa(GenericTapasticComic):
4395
    """Class to retrieve Doodle For Food comics."""
4396
    # Also on http://doodleforfood.com
4397
    name = 'doodle-tapa'
4398
    long_name = 'Doodle For Food (from Tapastic)'
4399
    url = 'https://tapastic.com/series/Doodle-for-Food'
4400
4401
4402
class MrLovensteinTapa(GenericTapasticComic):
4403
    """Class to retrieve Mr Lovenstein comics."""
4404
    # Also on  https://tapastic.com/series/MrLovenstein
4405
    name = 'mrlovenstein-tapa'
4406
    long_name = 'Mr. Lovenstein (from Tapastic)'
4407
    url = 'https://tapastic.com/series/MrLovenstein'
4408
4409
4410
def get_subclasses(klass):
4411
    """Gets the list of direct/indirect subclasses of a class"""
4412
    subclasses = klass.__subclasses__()
4413
    for derived in list(subclasses):
4414
        subclasses.extend(get_subclasses(derived))
4415
    return subclasses
4416
4417
4418
def remove_st_nd_rd_th_from_date(string):
4419
    """Function to transform 1st/2nd/3rd/4th in a parsable date format."""
4420
    # Hackish way to convert string with numeral "1st"/"2nd"/etc to date
4421
    return (string.replace('st', '')
4422
            .replace('nd', '')
4423
            .replace('rd', '')
4424
            .replace('th', '')
4425
            .replace('Augu', 'August'))
4426
4427
4428
def string_to_date(string, date_format, local=DEFAULT_LOCAL):
4429
    """Function to convert string to date object.
4430
    Wrapper around datetime.datetime.strptime."""
4431
    # format described in https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior
4432
    prev_locale = locale.setlocale(locale.LC_ALL)
4433
    if local != prev_locale:
4434
        locale.setlocale(locale.LC_ALL, local)
4435
    ret = datetime.datetime.strptime(string, date_format).date()
4436
    if local != prev_locale:
4437
        locale.setlocale(locale.LC_ALL, prev_locale)
4438
    return ret
4439
4440
4441
COMICS = set(get_subclasses(GenericComic))
4442
VALID_COMICS = [c for c in COMICS if c.name is not None]
4443
COMIC_NAMES = {c.name: c for c in VALID_COMICS}
4444
assert len(VALID_COMICS) == len(COMIC_NAMES)
4445
CLASS_NAMES = {c.__name__ for c in VALID_COMICS}
4446
assert len(VALID_COMICS) == len(CLASS_NAMES)
4447