Completed
Push — master ( fd33e2...b1708a )
by De
01:27
created

comics.py (30 issues)

1
#! /usr/bin/python3
2
# vim: set expandtab tabstop=4 shiftwidth=4 :
3
"""Module to retrieve webcomics"""
4
5
from comic_abstract import GenericComic, get_date_for_comic
6
import re
7
from datetime import date, timedelta
8
import datetime
9
from urlfunctions import get_soup_at_url, urljoin_wrapper,\
10
    convert_iri_to_plain_ascii_uri, load_json_at_url, urlopen_wrapper
11
import json
12
import locale
13
import urllib
14
15
DEFAULT_LOCAL = 'en_GB.UTF-8'
16
17
18
class Xkcd(GenericComic):
19
    """Class to retrieve Xkcd comics."""
20
    name = 'xkcd'
21
    long_name = 'xkcd'
22
    url = 'http://xkcd.com'
23
24
    @classmethod
25
    def get_next_comic(cls, last_comic):
26
        first_num = last_comic['num'] if last_comic else 0
27
        last_num = load_json_at_url(
28
            urljoin_wrapper(cls.url, 'info.0.json'))['num']
29
30
        for num in range(first_num + 1, last_num + 1):
31
            if num != 404:
32
                json_url = urljoin_wrapper(cls.url, '%d/info.0.json' % num)
33
                comic = load_json_at_url(json_url)
34
                comic['img'] = [comic['img']]
35
                comic['prefix'] = '%d-' % num
36
                comic['json_url'] = json_url
37
                comic['url'] = urljoin_wrapper(cls.url, str(num))
38
                comic['day'] = int(comic['day'])
39
                comic['month'] = int(comic['month'])
40
                comic['year'] = int(comic['year'])
41
                assert comic['num'] == num
42
                yield comic
43
44
45
# Helper functions corresponding to get_url_from_link/get_url_from_archive_element
46
47
48
@classmethod
49
def get_href(cls, link):
50
    """Implementation of get_url_from_link/get_url_from_archive_element."""
51
    return link['href']
52
53
54
@classmethod
55
def join_cls_url_to_href(cls, link):
56
    """Implementation of get_url_from_link/get_url_from_archive_element."""
57
    return urljoin_wrapper(cls.url, link['href'])
58
59
60
class GenericNavigableComic(GenericComic):
61
    """Generic class for "navigable" comics : with first/next arrows.
62
63
    The method `get_next_comic` methods is implemented in terms of new
64
    more specialized methods to be implemented/overridden:
65
        - get_first_comic_link
66
        - get_navi_link
67
        - get_comic_info
68
        - get_url_from_link
69
    """
70
71
    @classmethod
72
    def get_first_comic_link(cls):
73
        """Get link to first comics.
74
75
        Sometimes this can be retrieved of any comic page, sometimes on
76
        the archive page, sometimes it doesn't exist at all and one has
77
        to iterate backward to find it before hardcoding the result found.
78
        """
79
        raise NotImplementedError
80
81
    @classmethod
82
    def get_navi_link(cls, last_soup, next_):
83
        """Get link to next (or previous - for dev purposes) comic."""
84
        raise NotImplementedError
85
86
    @classmethod
87
    def get_comic_info(cls, soup, link):
88
        """Get information about a particular comics."""
89
        raise NotImplementedError
90
91
     @classmethod
92
     def get_url_from_link(cls, link):
93
         """Get url corresponding to a link."""
94
         return cls.get_href(link)
95
96
    @classmethod
97
    def get_next_link(cls, last_soup):
98
        """Get link to next comic."""
99
        return cls.get_navi_link(last_soup, True)
100
101
    @classmethod
102
    def get_prev_link(cls, last_soup):
103
        """Get link to previous comic."""
104
        return cls.get_navi_link(last_soup, False)
105
106
    @classmethod
107
    def get_next_comic(cls, last_comic):
108
        """Generic implementation of get_next_comic for navigable comics."""
109
        url = last_comic['url'] if last_comic else None
110
        next_comic = \
111
            cls.get_next_link(get_soup_at_url(url)) \
112
            if url else \
113
            cls.get_first_comic_link()
114
        cls.log("next/first comic will be %s (url is %s)" % (str(next_comic), url))
115
        while next_comic:
116
            prev_url, url = url, cls.get_url_from_link(next_comic)
117
            if prev_url == url:
118
                cls.log("got same url %s" % url)
119
                break
120
            cls.log("about to get %s (%s)" % (url, str(next_comic)))
121
            soup = get_soup_at_url(url)
122
            comic = cls.get_comic_info(soup, next_comic)
123
            if comic is not None:
124
                assert 'url' not in comic
125
                comic['url'] = url
126
                yield comic
127
            next_comic = cls.get_next_link(soup)
128
            cls.log("next comic will be %s" % str(next_comic))
129
130
    @classmethod
131
    def check_navigation(cls, url):
132
        """Check that navigation functions seem to be working - for dev purposes."""
133
        cls.log("about to check navigation from %s" % url)
134
        ok = True
135
        firstlink = cls.get_first_comic_link()
136
        if firstlink is None:
137
            print("From %s : no first link" % cls.url)
138
            ok = False
139
        if url is None:
140
            prevlink, nextlink = None, None
141
        else:
142
            soup = get_soup_at_url(url)
143
            prevlink, nextlink = cls.get_prev_link(soup), cls.get_next_link(soup)
144
        if prevlink is None and nextlink is None:
145
            print("From %s : no previous nor next" % url)
146
            ok = False
147
        else:
148
            if prevlink:
149
                prevurl = cls.get_url_from_link(prevlink)
150
                prevsoup = get_soup_at_url(prevurl)
151
                prevnext = cls.get_url_from_link(cls.get_next_link(prevsoup))
152
                if prevnext != url:
153
                    print("From %s, going backward then forward leads to %s" % (url, prevnext))
154
                    ok = False
155
            if nextlink:
156
                nexturl = cls.get_url_from_link(nextlink)
157
                if nexturl != url:
158
                    nextsoup = get_soup_at_url(nexturl)
159
                    nextprev = cls.get_url_from_link(cls.get_prev_link(nextsoup))
160
                    if nextprev != url:
161
                        print("From %s, going forward then backward leads to %s" % (url, nextprev))
162
                        ok = False
163
        cls.log("checked navigation from %s -> returned %d" % (url, ok))
164
        return ok
165
166
    # This method is not defined by default and is not part of this class'API.
167
    # It is only used:
168
    # - during development
169
    # - in subclasses implementing it correctly
170
    if False:
171
        @classmethod
172
        def get_first_comic_url(cls):
173
            """Get first comic url
174
175
            Sometimes, the first comic cannot be reached directly so to start
176
            from the first comic one has to go to the previous comic until
177
            there is no previous comics. Once this URL is reached, it
178
            is better to hardcode it but for development purposes, it
179
            is convenient to have an automatic way to find it.
180
            """
181
            url = input("Get starting URL: ")
182
            print(url)
183
            comic = cls.get_prev_link(get_soup_at_url(url))
184
            while comic:
185
                url = cls.get_url_from_link(comic)
186
                print(url)
187
                comic = cls.get_prev_link(get_soup_at_url(url))
188
            return url
189
190
191
class GenericListableComic(GenericComic):
192
    """Generic class for "listable" comics : with a list of comics (aka 'archive')
193
194
    The method `get_next_comic` methods is implemented in terms of new
195
    more specialized methods to be implemented/overridden:
196
        - get_archive_elements
197
        - get_url_from_archive_element
198
        - get_comic_info
199
    """
200
201
    @classmethod
202
    def get_archive_elements(cls):
203
        """Get the archive elements (iterable)."""
204
        raise NotImplementedError
205
206
    @classmethod
207
    def get_url_from_archive_element(cls, archive_elt):
208
        """Get url corresponding to an archive element."""
209
        raise NotImplementedError
210
211
    @classmethod
212
    def get_comic_info(cls, soup, archive_elt):
213
        """Get information about a particular comics."""
214
        raise NotImplementedError
215
216
    @classmethod
217
    def get_next_comic(cls, last_comic):
218
        """Generic implementation of get_next_comic for listable comics."""
219
        waiting_for_url = last_comic['url'] if last_comic else None
220
        for archive_elt in cls.get_archive_elements():
221
            url = cls.get_url_from_archive_element(archive_elt)
222
            cls.log("considering %s" % url)
223
            if waiting_for_url and waiting_for_url == url:
224
                waiting_for_url = None
225
            elif waiting_for_url is None:
226
                cls.log("about to get %s (%s)" % (url, str(archive_elt)))
227
                soup = get_soup_at_url(url)
228
                comic = cls.get_comic_info(soup, archive_elt)
229
                if comic is not None:
230
                    assert 'url' not in comic
231
                    comic['url'] = url
232
                    yield comic
233
        if waiting_for_url is not None:
234
            print("Did not find %s : there might be a problem" % waiting_for_url)
235
236
# Helper functions corresponding to get_first_comic_link/get_navi_link
237
238
239
@classmethod
240
def get_link_rel_next(cls, last_soup, next_):
241
    """Implementation of get_navi_link."""
242
    return last_soup.find('link', rel='next' if next_ else 'prev')
243
244
245
@classmethod
246
def get_a_rel_next(cls, last_soup, next_):
247
    """Implementation of get_navi_link."""
248
    return last_soup.find('a', rel='next' if next_ else 'prev')
249
250
251
@classmethod
252
def get_a_navi_navinext(cls, last_soup, next_):
253
    """Implementation of get_navi_link."""
254
    return last_soup.find('a', class_='navi navi-next' if next_ else 'navi navi-prev')
255
256
257
@classmethod
258
def get_a_navi_comicnavnext_navinext(cls, last_soup, next_):
259
    """Implementation of get_navi_link."""
260
    return last_soup.find('a', class_='navi comic-nav-next navi-next' if next_ else 'navi comic-nav-previous navi-prev')
261
262
263
@classmethod
264
def get_a_comicnavbase_comicnavnext(cls, last_soup, next_):
265
    """Implementation of get_navi_link."""
266
    return last_soup.find('a', class_='comic-nav-base comic-nav-next' if next_ else 'comic-nav-base comic-nav-previous')
267
268
269
@classmethod
270
def get_a_navi_navifirst(cls):
271
    """Implementation of get_first_comic_link."""
272
    return get_soup_at_url(cls.url).find('a', class_='navi navi-first')
273
274
275
@classmethod
276
def get_div_navfirst_a(cls):
277
    """Implementation of get_first_comic_link."""
278
    return get_soup_at_url(cls.url).find('div', class_="nav-first").find('a')
279
280
281
@classmethod
282
def get_a_comicnavbase_comicnavfirst(cls):
283
    """Implementation of get_first_comic_link."""
284
    return get_soup_at_url(cls.url).find('a', class_='comic-nav-base comic-nav-first')
285
286
287
class GenericEmptyComic(GenericComic):
288
    """Generic class for comics where nothing is to be done.
289
290
    It can be useful to deactivate temporarily comics that do not work
291
    properly by replacing `def MyComic(GenericWhateverComic)` with
292
    `def MyComic(GenericEmptyComic, GenericWhateverComic)`."""
293
294
    @classmethod
295
    def get_next_comic(cls, last_comic):
296
        return []
297
298
299
class ExtraFabulousComics(GenericNavigableComic):
300
    """Class to retrieve Extra Fabulous Comics."""
301
    name = 'efc'
302
    long_name = 'Extra Fabulous Comics'
303
    url = 'http://extrafabulouscomics.com'
304
    get_navi_link = get_link_rel_next
305
306
    @classmethod
307
    def get_first_comic_link(cls):
308
        """Get link to first comics."""
309
        return get_soup_at_url(cls.url).find('a', title='FIRST')
310
311
    @classmethod
312
    def get_comic_info(cls, soup, link):
313
        """Get information about a particular comics."""
314
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
315
        imgs = soup.find_all('img', src=img_src_re)
316
        title = soup.find('h2', class_='post-title').string
317
        return {
318 View Code Duplication
            'title': title,
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
319
            'img': [i['src'] for i in imgs],
320
            'prefix': title + '-'
321
        }
322
323
324
class GenericLeMondeBlog(GenericNavigableComic):
325
    """Generic class to retrieve comics from Le Monde blogs."""
326
    get_navi_link = get_link_rel_next
327
328
    @classmethod
329
    def get_first_comic_url(cls):
330
        """Get first comic url."""
331
        raise NotImplementedError
332
333
    @classmethod
334
    def get_first_comic_link(cls):
335
        """Get link to first comics."""
336
        return {'href': cls.get_first_comic_url()}
337
338
    @classmethod
339
    def get_comic_info(cls, soup, link):
340
        """Get information about a particular comics."""
341
        url2 = soup.find('link', rel='shortlink')['href']
342
        title = soup.find('meta', property='og:title')['content']
343
        date_str = soup.find("span", class_="entry-date").string
344
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
345
        imgs = soup.find_all('meta', property='og:image')
346
        return {
347
            'title': title,
348
            'url2': url2,
349
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
350
            'month': day.month,
351
            'year': day.year,
352
            'day': day.day,
353
        }
354
355
356
class ZepWorld(GenericLeMondeBlog):
357
    """Class to retrieve Zep World comics."""
358
    name = "zep"
359
    long_name = "Zep World"
360
    url = "http://zepworld.blog.lemonde.fr"
361
362
    @classmethod
363
    def get_first_comic_url(cls):
364
        return "http://zepworld.blog.lemonde.fr/2014/10/31/bientot-le-blog-de-zep/"
365
366
367
class Vidberg(GenericLeMondeBlog):
368
    """Class to retrieve Vidberg comics."""
369
    name = 'vidberg'
370
    long_name = "Vidberg - l'actu en patates"
371
    url = "http://vidberg.blog.lemonde.fr"
372
373
    @classmethod
374
    def get_first_comic_url(cls):
375
        # Not the first but I didn't find an efficient way to retrieve it
376
        return "http://vidberg.blog.lemonde.fr/2012/02/09/revue-de-campagne-la-campagne-du-modem-semballe/"
377
378
379
class Plantu(GenericLeMondeBlog):
380
    """Class to retrieve Plantu comics."""
381
    name = 'plantu'
382
    long_name = "Plantu"
383
    url = "http://plantu.blog.lemonde.fr"
384
385
    @classmethod
386
    def get_first_comic_url(cls):
387
        return "http://plantu.blog.lemonde.fr/2014/10/28/stress-test-a-bruxelles/"
388
389
390
class XavierGorce(GenericLeMondeBlog):
391
    """Class to retrieve Xavier Gorce comics."""
392
    name = 'gorce'
393
    long_name = "Xavier Gorce"
394
    url = "http://xaviergorce.blog.lemonde.fr"
395
396
    @classmethod
397
    def get_first_comic_url(cls):
398
        return "http://xaviergorce.blog.lemonde.fr/2015/01/09/distinction/"
399
400
401
class CartooningForPeace(GenericLeMondeBlog):
402
    """Class to retrieve Cartooning For Peace comics."""
403
    name = 'forpeace'
404
    long_name = "Cartooning For Peace"
405
    url = "http://cartooningforpeace.blog.lemonde.fr"
406
407
    @classmethod
408
    def get_first_comic_url(cls):
409
        return "http://cartooningforpeace.blog.lemonde.fr/2014/12/15/bado/"
410
411
412
class Aurel(GenericLeMondeBlog):
413
    """Class to retrieve Aurel comics."""
414
    name = 'aurel'
415
    long_name = "Aurel"
416
    url = "http://aurel.blog.lemonde.fr"
417
418
    @classmethod
419
    def get_first_comic_url(cls):
420
        return "http://aurel.blog.lemonde.fr/2014/09/29/le-senat-repasse-a-droite/"
421
422
423
class LesCulottees(GenericLeMondeBlog):
424
    """Class to retrieve Les Culottees comics."""
425
    name = 'culottees'
426
    long_name = 'Les Culottees'
427
    url = "http://lesculottees.blog.lemonde.fr"
428
429
    @classmethod
430
    def get_first_comic_url(cls):
431
        return "http://lesculottees.blog.lemonde.fr/2016/01/11/clementine-delait-femme-a-barbe/"
432
433
434
class UneAnneeAuLycee(GenericLeMondeBlog):
435
    """Class to retrieve Une Annee Au Lycee comics."""
436
    name = 'lycee'
437 View Code Duplication
    long_name = 'Une Annee au Lycee'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
438
    url = 'http://uneanneeaulycee.blog.lemonde.fr'
439
440
    @classmethod
441
    def get_first_comic_url(cls):
442
        return "http://uneanneeaulycee.blog.lemonde.fr/2016/06/13/la-semaine-du-bac-est-arrivee/"
443
444
445
class Rall(GenericNavigableComic):
446
    """Class to retrieve Ted Rall comics."""
447
    # Also on http://www.gocomics.com/tedrall
448
    name = 'rall'
449
    long_name = "Ted Rall"
450
    url = "http://rall.com/comic"
451
    get_navi_link = get_link_rel_next
452
453
    @classmethod
454
    def get_first_comic_link(cls):
455
        """Get link to first comics."""
456
        # Not the first but I didn't find an efficient way to retrieve it
457
        return {'href': "http://rall.com/2014/01/30/los-angeles-times-cartoon-well-miss-those-california-flowers"}
458
459
    @classmethod
460
    def get_comic_info(cls, soup, link):
461
        """Get information about a particular comics."""
462
        title = soup.find('meta', property='og:title')['content']
463
        author = soup.find("span", class_="author vcard").find("a").string
464
        date_str = soup.find("span", class_="entry-date").string
465
        day = string_to_date(date_str, "%B %d, %Y")
466
        desc = soup.find('meta', property='og:description')['content']
467
        imgs = soup.find('div', class_='entry-content').find_all('img')
468
        imgs = imgs[:-7]  # remove social media buttons
469
        return {
470
            'title': title,
471
            'author': author,
472
            'month': day.month,
473
            'year': day.year,
474
            'day': day.day,
475
            'description': desc,
476
            'img': [i['src'] for i in imgs],
477
        }
478
479
480
class Dilem(GenericNavigableComic):
481
    """Class to retrieve Ali Dilem comics."""
482
    name = 'dilem'
483
    long_name = 'Ali Dilem'
484
    url = 'http://information.tv5monde.com/dilem'
485
    get_url_from_link = join_cls_url_to_href
486
487 View Code Duplication
    @classmethod
488
    def get_first_comic_link(cls):
489
        """Get link to first comics."""
490
        return {'href': "http://information.tv5monde.com/dilem/2004-06-26"}
491
492
    @classmethod
493
    def get_navi_link(cls, last_soup, next_):
494
        # prev is next / next is prev
495
        li = last_soup.find('li', class_='prev' if next_ else 'next')
496
        return li.find('a') if li else None
497
498
    @classmethod
499
    def get_comic_info(cls, soup, link):
500
        """Get information about a particular comics."""
501
        short_url = soup.find('link', rel='shortlink')['href']
502
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
503
        imgs = soup.find_all('meta', property='og:image')
504
        date_str = soup.find('span', property='dc:date')['content']
505
        date_str = date_str[:10]
506
        day = string_to_date(date_str, "%Y-%m-%d")
507
        return {
508
            'short_url': short_url,
509
            'title': title,
510
            'img': [i['content'] for i in imgs],
511
            'day': day.day,
512
            'month': day.month,
513
            'year': day.year,
514
        }
515
516
517
class SpaceAvalanche(GenericNavigableComic):
518
    """Class to retrieve Space Avalanche comics."""
519
    name = 'avalanche'
520
    long_name = 'Space Avalanche'
521
    url = 'http://www.spaceavalanche.com'
522
    get_navi_link = get_link_rel_next
523
524
    @classmethod
525
    def get_first_comic_link(cls):
526
        """Get link to first comics."""
527
        return {'href': "http://www.spaceavalanche.com/2009/02/02/irish-sea/", 'title': "Irish Sea"}
528
529
    @classmethod
530
    def get_comic_info(cls, soup, link):
531
        """Get information about a particular comics."""
532
        url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
533
        title = link['title']
534
        url = cls.get_url_from_link(link)
535
        year, month, day = [int(s)
536
                            for s in url_date_re.match(url).groups()]
537
        imgs = soup.find("div", class_="entry").find_all("img")
538
        return {
539
            'title': title,
540
            'day': day,
541
            'month': month,
542
            'year': year,
543
            'img': [i['src'] for i in imgs],
544
        }
545
546
547
class ZenPencils(GenericNavigableComic):
548
    """Class to retrieve ZenPencils comics."""
549
    # Also on http://zenpencils.tumblr.com
550
    # Also on http://www.gocomics.com/zen-pencils
551
    name = 'zenpencils'
552
    long_name = 'Zen Pencils'
553
    url = 'http://zenpencils.com'
554
    get_navi_link = get_link_rel_next
555
556
    @classmethod
557
    def get_first_comic_link(cls):
558
        """Get link to first comics."""
559
        return {'href': "http://zenpencils.com/comic/1-ralph-waldo-emerson-make-them-cry/"}
560
561
    @classmethod
562
    def get_comic_info(cls, soup, link):
563
        """Get information about a particular comics."""
564
        imgs = soup.find('div', id='comic').find_all('img')
565
        post = soup.find('div', class_='post-content')
566
        author = post.find("span", class_="post-author").find("a").string
567
        title = post.find('h2', class_='post-title').string
568
        date_str = post.find('span', class_='post-date').string
569
        day = string_to_date(date_str, "%B %d, %Y")
570
        assert imgs
571
        assert all(i['alt'] == i['title'] for i in imgs)
572
        assert all(i['alt'] in (title, "") for i in imgs)
573
        desc = soup.find('meta', property='og:description')['content']
574
        return {
575
            'title': title,
576
            'description': desc,
577
            'author': author,
578
            'day': day.day,
579
            'month': day.month,
580
            'year': day.year,
581
            'img': [i['src'] for i in imgs],
582
        }
583
584
585
class ItsTheTie(GenericNavigableComic):
586
    """Class to retrieve It's the tie comics."""
587
    # Also on http://itsthetie.tumblr.com
588
    # Also on https://tapastic.com/series/itsthetie
589
    name = 'tie'
590
    long_name = "It's the tie"
591
    url = "http://itsthetie.com"
592
    get_first_comic_link = get_div_navfirst_a
593
    get_navi_link = get_a_rel_next
594
595
    @classmethod
596
    def get_comic_info(cls, soup, link):
597
        """Get information about a particular comics."""
598
        title = soup.find('h1', class_='comic-title').find('a').string
599
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
600
        day = string_to_date(date_str, "%B %d, %Y")
601
        # Bonus images may or may not be in meta og:image.
602
        imgs = soup.find_all('meta', property='og:image')
603 View Code Duplication
        imgs_src = [i['content'] for i in imgs]
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
604
        bonus = soup.find_all('img', attrs={'data-oversrc': True})
605
        bonus_src = [b['data-oversrc'] for b in bonus]
606
        all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src]
607
        all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")]
608
        tag_meta = soup.find('meta', property='article:tag')
609
        tags = tag_meta['content'] if tag_meta else ""
610
        return {
611
            'title': title,
612
            'month': day.month,
613
            'year': day.year,
614
            'day': day.day,
615
            'img': all_imgs_src,
616
            'tags': tags,
617
        }
618
619
620
class PenelopeBagieu(GenericNavigableComic):
621
    """Class to retrieve comics from Penelope Bagieu's blog."""
622
    name = 'bagieu'
623
    long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
624
    url = 'http://www.penelope-jolicoeur.com'
625
    get_navi_link = get_link_rel_next
626
627
    @classmethod
628
    def get_first_comic_link(cls):
629 View Code Duplication
        """Get link to first comics."""
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
630
        return {'href': 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'}
631
632
    @classmethod
633
    def get_comic_info(cls, soup, link):
634
        """Get information about a particular comics."""
635
        date_str = soup.find('h2', class_='date-header').string
636
        day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
637
        imgs = soup.find('div', class_='entry-body').find_all('img')
638
        title = soup.find('h3', class_='entry-header').string
639
        return {
640
            'title': title,
641
            'img': [i['src'] for i in imgs],
642
            'month': day.month,
643
            'year': day.year,
644
            'day': day.day,
645
        }
646
647
648
class OneOneOneOneComic(GenericNavigableComic):
649
    """Class to retrieve 1111 Comics."""
650
    # Also on http://comics1111.tumblr.com
651
    # Also on https://tapastic.com/series/1111-Comics
652
    name = '1111'
653
    long_name = '1111 Comics'
654 View Code Duplication
    url = 'http://www.1111comics.me'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
655
    get_first_comic_link = get_div_navfirst_a
656
    get_navi_link = get_link_rel_next
657
658
    @classmethod
659
    def get_comic_info(cls, soup, link):
660
        """Get information about a particular comics."""
661
        title = soup.find('h1', class_='comic-title').find('a').string
662
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
663
        day = string_to_date(date_str, "%B %d, %Y")
664
        imgs = soup.find_all('meta', property='og:image')
665
        return {
666
            'title': title,
667
            'month': day.month,
668
            'year': day.year,
669
            'day': day.day,
670
            'img': [i['content'] for i in imgs],
671
        }
672
673
674
class AngryAtNothing(GenericNavigableComic):
675
    """Class to retrieve Angry at Nothing comics."""
676
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
677
    name = 'angry'
678
    long_name = 'Angry At Nothing'
679
    url = 'http://www.angryatnothing.net'
680
    get_first_comic_link = get_div_navfirst_a
681
    get_navi_link = get_a_rel_next
682
683
    @classmethod
684
    def get_comic_info(cls, soup, link):
685
        """Get information about a particular comics."""
686
        title = soup.find('h1', class_='comic-title').find('a').string
687
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
688
        day = string_to_date(date_str, "%B %d, %Y")
689
        imgs = soup.find_all('meta', property='og:image')
690
        return {
691
            'title': title,
692
            'month': day.month,
693
            'year': day.year,
694
            'day': day.day,
695
            'img': [i['content'] for i in imgs],
696
        }
697
698
699
class NeDroid(GenericNavigableComic):
700
    """Class to retrieve NeDroid comics."""
701
    name = 'nedroid'
702
    long_name = 'NeDroid'
703
    url = 'http://nedroid.com'
704
    get_first_comic_link = get_div_navfirst_a
705
    get_navi_link = get_link_rel_next
706
    get_url_from_link = join_cls_url_to_href
707
708
    @classmethod
709
    def get_comic_info(cls, soup, link):
710
        """Get information about a particular comics."""
711
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
712
        comic_url_re = re.compile('//nedroid.com/comics/([0-9]*)-([0-9]*)-([0-9]*).*')
713
        short_url = cls.get_url_from_link(soup.find('link', rel='shortlink'))
714
        num = int(short_url_re.match(short_url).groups()[0])
715
        imgs = soup.find('div', id='comic').find_all('img')
716
        year, month, day = [int(s) for s in comic_url_re.match(imgs[0]['src']).groups()]
717
        assert len(imgs) == 1
718
        title = imgs[0]['alt']
719
        title2 = imgs[0]['title']
720
        return {
721
            'short_url': short_url,
722
            'title': title,
723
            'title2': title2,
724
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
725
            'day': day,
726
            'month': month,
727
            'year': year,
728
            'num': num,
729
        }
730
731
732
class Garfield(GenericNavigableComic):
733
    """Class to retrieve Garfield comics."""
734
    # Also on http://www.gocomics.com/garfield
735
    name = 'garfield'
736
    long_name = 'Garfield'
737
    url = 'https://garfield.com'
738
739 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
740
    def get_first_comic_link(cls):
741
        """Get link to first comics."""
742
        return {'href': 'https://garfield.com/comic/1978/06/19'}
743
744
    @classmethod
745
    def get_navi_link(cls, last_soup, next_):
746
        return last_soup.find('a', class_='comic-arrow-right' if next_ else 'comic-arrow-left')
747
748
    @classmethod
749
    def get_comic_info(cls, soup, link):
750
        """Get information about a particular comics."""
751
        url = cls.get_url_from_link(link)
752
        date_re = re.compile('^%s/comic/([0-9]*)/([0-9]*)/([0-9]*)' % cls.url)
753
        year, month, day = [int(s) for s in date_re.match(url).groups()]
754
        imgs = soup.find('div', class_='comic-display').find_all('img', class_='img-responsive')
755
        return {
756
            'month': month,
757
            'year': year,
758
            'day': day,
759
            'img': [i['src'] for i in imgs],
760
        }
761
762
763
class Dilbert(GenericNavigableComic):
764
    """Class to retrieve Dilbert comics."""
765
    # Also on http://www.gocomics.com/dilbert-classics
766
    name = 'dilbert'
767
    long_name = 'Dilbert'
768
    url = 'http://dilbert.com'
769
    get_url_from_link = join_cls_url_to_href
770
771
    @classmethod
772
    def get_first_comic_link(cls):
773
        """Get link to first comics."""
774
        return {'href': 'http://dilbert.com/strip/1989-04-16'}
775
776
    @classmethod
777
    def get_navi_link(cls, last_soup, next_):
778
        link = last_soup.find('div', class_='nav-comic nav-right' if next_ else 'nav-comic nav-left')
779
        return link.find('a') if link else None
780
781
    @classmethod
782
    def get_comic_info(cls, soup, link):
783
        """Get information about a particular comics."""
784
        title = soup.find('meta', property='og:title')['content']
785
        imgs = soup.find_all('meta', property='og:image')
786
        desc = soup.find('meta', property='og:description')['content']
787
        date_str = soup.find('meta', property='article:publish_date')['content']
788
        day = string_to_date(date_str, "%B %d, %Y")
789
        author = soup.find('meta', property='article:author')['content']
790
        tags = soup.find('meta', property='article:tag')['content']
791
        return {
792
            'title': title,
793
            'description': desc,
794
            'img': [i['content'] for i in imgs],
795
            'author': author,
796
            'tags': tags,
797
            'day': day.day,
798
            'month': day.month,
799
            'year': day.year
800
        }
801
802
803
class VictimsOfCircumsolar(GenericNavigableComic):
804
    """Class to retrieve VictimsOfCircumsolar comics."""
805
    name = 'circumsolar'
806
    long_name = 'Victims Of Circumsolar'
807
    url = 'http://www.victimsofcircumsolar.com'
808
    get_navi_link = get_a_navi_comicnavnext_navinext
809
810
    @classmethod
811
    def get_first_comic_link(cls):
812
        """Get link to first comics."""
813
        return {'href': 'http://www.victimsofcircumsolar.com/comic/modern-addiction'}
814
815
    @classmethod
816
    def get_comic_info(cls, soup, link):
817
        """Get information about a particular comics."""
818
        # Date is on the archive page
819
        title = soup.find_all('meta', property='og:title')[-1]['content']
820
        desc = soup.find_all('meta', property='og:description')[-1]['content']
821
        imgs = soup.find('div', id='comic').find_all('img')
822
        assert all(i['title'] == i['alt'] == title for i in imgs)
823
        return {
824
            'title': title,
825
            'description': desc,
826
            'img': [i['src'] for i in imgs],
827
        }
828
829
830
class ThreeWordPhrase(GenericNavigableComic):
831
    """Class to retrieve Three Word Phrase comics."""
832
    # Also on http://www.threewordphrase.tumblr.com
833
    name = 'threeword'
834
    long_name = 'Three Word Phrase'
835
    url = 'http://threewordphrase.com'
836
    get_url_from_link = join_cls_url_to_href
837
838
    @classmethod
839
    def get_first_comic_link(cls):
840
        """Get link to first comics."""
841
        return get_soup_at_url(cls.url).find('img', src='/firstlink.gif').parent
842
843
    @classmethod
844
    def get_navi_link(cls, last_soup, next_):
845
        link = last_soup.find('img', src='/nextlink.gif' if next_ else '/prevlink.gif').parent
846
        return None if link.get('href') is None else link
847
848
    @classmethod
849
    def get_comic_info(cls, soup, link):
850
        """Get information about a particular comics."""
851 View Code Duplication
        title = soup.find('title')
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
852
        imgs = [img for img in soup.find_all('img')
853
                if not img['src'].endswith(
854
                    ('link.gif', '32.png', 'twpbookad.jpg',
855
                     'merchad.jpg', 'header.gif', 'tipjar.jpg'))]
856
        return {
857
            'title': title.string if title else None,
858
            'title2': '  '.join(img.get('alt') for img in imgs if img.get('alt')),
859
            'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs],
860
        }
861
862
863
class DeadlyPanel(GenericEmptyComic, GenericNavigableComic):
864
    """Class to retrieve Deadly Panel comics."""
865
    # Also on https://tapastic.com/series/deadlypanel
866
    name = 'deadly'
867
    long_name = 'Deadly Panel'
868
    url = 'http://www.deadlypanel.com'
869
    get_first_comic_link = get_a_navi_navifirst
870
    get_navi_link = get_a_navi_comicnavnext_navinext
871
872
    @classmethod
873
    def get_comic_info(cls, soup, link):
874
        """Get information about a particular comics."""
875
        imgs = soup.find('div', id='comic').find_all('img')
876
        assert all(i['alt'] == i['title'] for i in imgs)
877
        return {
878
            'img': [i['src'] for i in imgs],
879
        }
880
881
882
class TheGentlemanArmchair(GenericNavigableComic):
883
    """Class to retrieve The Gentleman Armchair comics."""
884
    name = 'gentlemanarmchair'
885
    long_name = 'The Gentleman Armchair'
886
    url = 'http://thegentlemansarmchair.com'
887
    get_first_comic_link = get_a_navi_navifirst
888
    get_navi_link = get_link_rel_next
889
890
    @classmethod
891
    def get_comic_info(cls, soup, link):
892
        """Get information about a particular comics."""
893
        title = soup.find('h2', class_='post-title').string
894
        author = soup.find("span", class_="post-author").find("a").string
895
        date_str = soup.find('span', class_='post-date').string
896
        day = string_to_date(date_str, "%B %d, %Y")
897
        imgs = soup.find('div', id='comic').find_all('img')
898
        return {
899
            'img': [i['src'] for i in imgs],
900
            'title': title,
901
            'author': author,
902
            'month': day.month,
903
            'year': day.year,
904
            'day': day.day,
905
        }
906
907
908
class MyExtraLife(GenericNavigableComic):
909
    """Class to retrieve My Extra Life comics."""
910
    name = 'extralife'
911
    long_name = 'My Extra Life'
912
    url = 'http://www.myextralife.com'
913
    get_navi_link = get_link_rel_next
914
915
    @classmethod
916
    def get_first_comic_link(cls):
917
        """Get link to first comics."""
918
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
919
920
    @classmethod
921
    def get_comic_info(cls, soup, link):
922
        """Get information about a particular comics."""
923
        title = soup.find("h1", class_="comic_title").string
924
        date_str = soup.find("span", class_="comic_date").string
925
        day = string_to_date(date_str, "%B %d, %Y")
926
        imgs = soup.find_all("img", class_="comic")
927
        assert all(i['alt'] == i['title'] == title for i in imgs)
928
        return {
929
            'title': title,
930
            'img': [i['src'] for i in imgs if i["src"]],
931
            'day': day.day,
932
            'month': day.month,
933
            'year': day.year
934
        }
935
936
937
class SaturdayMorningBreakfastCereal(GenericNavigableComic):
938
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
939
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
940
    # Also on http://smbc-comics.tumblr.com
941
    name = 'smbc'
942
    long_name = 'Saturday Morning Breakfast Cereal'
943
    url = 'http://www.smbc-comics.com'
944
    get_navi_link = get_a_rel_next
945
946
    @classmethod
947
    def get_first_comic_link(cls):
948
        """Get link to first comics."""
949
        return get_soup_at_url(cls.url).find('a', rel='start')
950
951
    @classmethod
952
    def get_comic_info(cls, soup, link):
953
        """Get information about a particular comics."""
954
        image1 = soup.find('img', id='cc-comic')
955
        image_url1 = image1['src']
956
        aftercomic = soup.find('div', id='aftercomic')
957
        image_url2 = aftercomic.find('img')['src'] if aftercomic else ''
958
        imgs = [image_url1] + ([image_url2] if image_url2 else [])
959
        date_str = soup.find('div', class_='cc-publishtime').contents[0]
960
        day = string_to_date(date_str, "%B %d, %Y")
961
        return {
962
            'title': image1['title'],
963
            'img': [urljoin_wrapper(cls.url, i) for i in imgs],
964
            'day': day.day,
965
            'month': day.month,
966
            'year': day.year
967
        }
968
969
970
class PerryBibleFellowship(GenericListableComic):
971
    """Class to retrieve Perry Bible Fellowship comics."""
972
    name = 'pbf'
973
    long_name = 'Perry Bible Fellowship'
974 View Code Duplication
    url = 'http://pbfcomics.com'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
975
    get_url_from_archive_element = join_cls_url_to_href
976
977
    @classmethod
978
    def get_archive_elements(cls):
979
        comic_link_re = re.compile('^/[0-9]*/$')
980
        return reversed(get_soup_at_url(cls.url).find_all('a', href=comic_link_re))
981
982
    @classmethod
983
    def get_comic_info(cls, soup, link):
984
        """Get information about a particular comics."""
985
        url = cls.get_url_from_archive_element(link)
986
        comic_img_re = re.compile('^/archive_b/PBF.*')
987
        name = link.string
988
        num = int(link['name'])
989
        href = link['href']
990
        assert href == '/%d/' % num
991
        imgs = soup.find_all('img', src=comic_img_re)
992
        assert len(imgs) == 1
993
        assert imgs[0]['alt'] == name
994
        return {
995
            'num': num,
996
            'name': name,
997
            'img': [urljoin_wrapper(url, i['src']) for i in imgs],
998
            'prefix': '%d-' % num,
999
        }
1000
1001
1002
class Mercworks(GenericNavigableComic):
1003
    """Class to retrieve Mercworks comics."""
1004
    # Also on http://mercworks.tumblr.com
1005
    name = 'mercworks'
1006
    long_name = 'Mercworks'
1007
    url = 'http://mercworks.net'
1008
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1009
    get_navi_link = get_a_rel_next
1010
1011
    @classmethod
1012
    def get_comic_info(cls, soup, link):
1013
        """Get information about a particular comics."""
1014
        title = soup.find('meta', property='og:title')['content']
1015
        metadesc = soup.find('meta', property='og:description')
1016
        desc = metadesc['content'] if metadesc else ""
1017
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1018
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1019
        date_str = date_str[:10]
1020
        day = string_to_date(date_str, "%Y-%m-%d")
1021
        imgs = soup.find_all('meta', property='og:image')
1022
        return {
1023
            'img': [i['content'] for i in imgs],
1024
            'title': title,
1025
            'author': author,
1026
            'desc': desc,
1027
            'day': day.day,
1028
            'month': day.month,
1029
            'year': day.year
1030
        }
1031
1032
1033
class BerkeleyMews(GenericListableComic):
1034
    """Class to retrieve Berkeley Mews comics."""
1035
    # Also on http://mews.tumblr.com
1036
    # Also on http://www.gocomics.com/berkeley-mews
1037
    name = 'berkeley'
1038
    long_name = 'Berkeley Mews'
1039
    url = 'http://www.berkeleymews.com'
1040
    get_url_from_archive_element = get_href
1041
    comic_num_re = re.compile('%s/\\?p=([0-9]*)$' % url)
1042
1043
    @classmethod
1044
    def get_archive_elements(cls):
1045
        archive_url = urljoin_wrapper(cls.url, "?page_id=2")
1046
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_num_re))
1047
1048
    @classmethod
1049
    def get_comic_info(cls, soup, link):
1050
        """Get information about a particular comics."""
1051
        comic_date_re = re.compile('.*/([0-9]*)-([0-9]*)-([0-9]*)-.*')
1052
        url = cls.get_url_from_archive_element(link)
1053
        num = int(cls.comic_num_re.match(url).groups()[0])
1054
        img = soup.find('div', id='comic').find('img')
1055
        assert all(i['alt'] == i['title'] for i in [img])
1056
        title2 = img['title']
1057
        img_url = img['src']
1058
        year, month, day = [int(s) for s in comic_date_re.match(img_url).groups()]
1059
        return {
1060
            'num': num,
1061
            'title': link.string,
1062
            'title2': title2,
1063
            'img': [img_url],
1064
            'year': year,
1065
            'month': month,
1066
            'day': day,
1067
        }
1068
1069
1070
class GenericBouletCorp(GenericNavigableComic):
1071
    """Generic class to retrieve BouletCorp comics in different languages."""
1072 View Code Duplication
    # Also on http://bouletcorp.tumblr.com
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1073
    get_navi_link = get_link_rel_next
1074
1075
    @classmethod
1076
    def get_first_comic_link(cls):
1077
        """Get link to first comics."""
1078
        return get_soup_at_url(cls.url).find('div', id='centered_nav').find_all('a')[0]
1079
1080
    @classmethod
1081
    def get_comic_info(cls, soup, link):
1082
        """Get information about a particular comics."""
1083
        url = cls.get_url_from_link(link)
1084
        date_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/' % cls.url)
1085
        year, month, day = [int(s) for s in date_re.match(url).groups()]
1086
        imgs = soup.find('div', id='notes').find('div', class_='storycontent').find_all('img')
1087
        texts = '  '.join(t for t in (i.get('title') for i in imgs) if t)
1088
        title = soup.find('title').string
1089
        return {
1090
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs if i.get('src') is not None],
1091
            'title': title,
1092
            'texts': texts,
1093
            'year': year,
1094
            'month': month,
1095
            'day': day,
1096
        }
1097
1098
1099
class BouletCorp(GenericBouletCorp):
1100
    """Class to retrieve BouletCorp comics."""
1101
    name = 'boulet'
1102
    long_name = 'Boulet Corp'
1103
    url = 'http://www.bouletcorp.com'
1104
1105
1106
class BouletCorpEn(GenericBouletCorp):
1107
    """Class to retrieve EnglishBouletCorp comics."""
1108
    name = 'boulet_en'
1109
    long_name = 'Boulet Corp English'
1110
    url = 'http://english.bouletcorp.com'
1111
1112
1113
class AmazingSuperPowers(GenericNavigableComic):
1114
    """Class to retrieve Amazing Super Powers comics."""
1115
    name = 'asp'
1116
    long_name = 'Amazing Super Powers'
1117
    url = 'http://www.amazingsuperpowers.com'
1118
    get_first_comic_link = get_a_navi_navifirst
1119
    get_navi_link = get_a_navi_navinext
1120
1121
    @classmethod
1122
    def get_comic_info(cls, soup, link):
1123
        """Get information about a particular comics."""
1124
        author = soup.find("span", class_="post-author").find("a").string
1125
        date_str = soup.find('span', class_='post-date').string
1126
        day = string_to_date(date_str, "%B %d, %Y")
1127
        imgs = soup.find('div', id='comic').find_all('img')
1128
        title = ' '.join(i['title'] for i in imgs)
1129
        assert all(i['alt'] == i['title'] for i in imgs)
1130
        return {
1131
            'title': title,
1132
            'author': author,
1133
            'img': [img['src'] for img in imgs],
1134
            'day': day.day,
1135
            'month': day.month,
1136
            'year': day.year
1137
        }
1138
1139
1140
class ToonHole(GenericListableComic):
1141
    """Class to retrieve Toon Holes comics."""
1142
    # Also on http://tapastic.com/series/TOONHOLE
1143
    name = 'toonhole'
1144
    long_name = 'Toon Hole'
1145
    url = 'http://www.toonhole.com'
1146
    get_url_from_archive_element = get_href
1147
1148
    @classmethod
1149
    def get_comic_info(cls, soup, link):
1150
        """Get information about a particular comics."""
1151
        title = link.string
1152
        date_str = remove_st_nd_rd_th_from_date(soup.find('div', class_='comicdate').string.strip())
1153
        day = string_to_date(date_str, "%B %d, %Y")
1154
        imgs = soup.find('div', id='comic').find_all('img')
1155
        assert all(i['alt'] == i['title'] == title for i in imgs)
1156
        return {
1157
            'title': title,
1158
            'month': day.month,
1159
            'year': day.year,
1160
            'day': day.day,
1161
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs],
1162
        }
1163
1164
    @classmethod
1165
    def get_archive_elements(cls):
1166
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1167
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1168
1169
1170
class Channelate(GenericNavigableComic):
1171
    """Class to retrieve Channelate comics."""
1172
    name = 'channelate'
1173
    long_name = 'Channelate'
1174
    url = 'http://www.channelate.com'
1175
    get_first_comic_link = get_div_navfirst_a
1176
    get_navi_link = get_link_rel_next
1177
1178
    @classmethod
1179
    def get_comic_info(cls, soup, link):
1180
        """Get information about a particular comics."""
1181
        author = soup.find("span", class_="post-author").find("a").string
1182
        date_str = soup.find('span', class_='post-date').string
1183
        day = string_to_date(date_str, '%Y/%m/%d')
1184
        title = soup.find('meta', property='og:title')['content']
1185
        post = soup.find('div', id='comic')
1186
        imgs = post.find_all('img') if post else []
1187
        assert all(i['alt'] == i['title'] for i in imgs)
1188
        extra_url = None
1189
        extra_div = soup.find('div', id='extrapanelbutton')
1190
        if extra_div:
1191
            extra_url = extra_div.find('a')['href']
1192
            extra_soup = get_soup_at_url(extra_url)
1193
            extra_imgs = extra_soup.find_all('img', class_='extrapanelimage')
1194
            imgs.extend(extra_imgs)
1195
        return {
1196
            'url_extra': extra_url,
1197
            'title': title,
1198
            'author': author,
1199
            'month': day.month,
1200
            'year': day.year,
1201
            'day': day.day,
1202
            'img': [i['src'] for i in imgs],
1203
        }
1204
1205
1206
class CyanideAndHappiness(GenericNavigableComic):
1207
    """Class to retrieve Cyanide And Happiness comics."""
1208
    name = 'cyanide'
1209
    long_name = 'Cyanide and Happiness'
1210
    url = 'http://explosm.net'
1211
    get_url_from_link = join_cls_url_to_href
1212
1213
    @classmethod
1214
    def get_first_comic_link(cls):
1215
        """Get link to first comics."""
1216
        return get_soup_at_url(cls.url).find('a', title='Oldest comic')
1217
1218
    @classmethod
1219
    def get_navi_link(cls, last_soup, next_):
1220
        link = last_soup.find('a', class_='next-comic' if next_ else 'previous-comic ')
1221
        return None if link.get('href') is None else link
1222
1223
    @classmethod
1224
    def get_comic_info(cls, soup, link):
1225
        """Get information about a particular comics."""
1226
        url2 = soup.find('meta', property='og:url')['content']
1227
        num = int(url2.split('/')[-2])
1228
        date_str = soup.find('h3').find('a').string
1229
        day = string_to_date(date_str, '%Y.%m.%d')
1230
        author = soup.find('small', class_="author-credit-name").string
1231
        assert author.startswith('by ')
1232
        author = author[3:]
1233
        imgs = soup.find_all('img', id='main-comic')
1234
        return {
1235
            'num': num,
1236
            'author': author,
1237
            'month': day.month,
1238
            'year': day.year,
1239
            'day': day.day,
1240
            'prefix': '%d-' % num,
1241
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i['src'])) for i in imgs]
1242
        }
1243
1244
1245
class MrLovenstein(GenericComic):
1246
    """Class to retrieve Mr Lovenstein comics."""
1247
    # Also on https://tapastic.com/series/MrLovenstein
1248
    name = 'mrlovenstein'
1249
    long_name = 'Mr. Lovenstein'
1250
    url = 'http://www.mrlovenstein.com'
1251
1252
    @classmethod
1253
    def get_next_comic(cls, last_comic):
1254
        # TODO: more info from http://www.mrlovenstein.com/archive
1255
        comic_num_re = re.compile('^/comic/([0-9]*)$')
1256
        nums = [int(comic_num_re.match(link['href']).groups()[0])
1257
                for link in get_soup_at_url(cls.url).find_all('a', href=comic_num_re)]
1258
        first, last = min(nums), max(nums)
1259
        if last_comic:
1260
            first = last_comic['num'] + 1
1261
        for num in range(first, last + 1):
1262
            url = urljoin_wrapper(cls.url, '/comic/%d' % num)
1263
            soup = get_soup_at_url(url)
1264
            imgs = list(
1265
                reversed(soup.find_all('img', src=re.compile('^/images/comics/'))))
1266
            yield {
1267
                'url': url,
1268
                'num': num,
1269
                'texts': '  '.join(t for t in (i.get('title') for i in imgs) if t),
1270
                'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1271
            }
1272
1273
1274
class DinosaurComics(GenericListableComic):
1275
    """Class to retrieve Dinosaur Comics comics."""
1276
    name = 'dinosaur'
1277
    long_name = 'Dinosaur Comics'
1278
    url = 'http://www.qwantz.com'
1279
    get_url_from_archive_element = get_href
1280
    comic_link_re = re.compile('^%s/index.php\\?comic=([0-9]*)$' % url)
1281
1282
    @classmethod
1283
    def get_archive_elements(cls):
1284
        archive_url = '%s/archive.php' % cls.url
1285
        # first link is random -> skip it
1286
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re)[1:])
1287
1288
    @classmethod
1289
    def get_comic_info(cls, soup, link):
1290
        """Get information about a particular comics."""
1291
        url = cls.get_url_from_archive_element(link)
1292
        num = int(cls.comic_link_re.match(url).groups()[0])
1293
        date_str = link.string
1294
        text = link.next_sibling.string
1295
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1296
        comic_img_re = re.compile('^%s/comics/' % cls.url)
1297
        img = soup.find('img', src=comic_img_re)
1298
        return {
1299
            'month': day.month,
1300
            'year': day.year,
1301
            'day': day.day,
1302
            'img': [img.get('src')],
1303
            'title': img.get('title'),
1304
            'text': text,
1305
            'num': num,
1306
        }
1307
1308
1309
class ButterSafe(GenericListableComic):
1310
    """Class to retrieve Butter Safe comics."""
1311
    name = 'butter'
1312
    long_name = 'ButterSafe'
1313
    url = 'http://buttersafe.com'
1314
    get_url_from_archive_element = get_href
1315
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)
1316
1317
    @classmethod
1318
    def get_archive_elements(cls):
1319
        archive_url = '%s/archive/' % cls.url
1320
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
1321
1322
    @classmethod
1323
    def get_comic_info(cls, soup, link):
1324
        """Get information about a particular comics."""
1325
        url = cls.get_url_from_archive_element(link)
1326
        title = link.string
1327
        year, month, day = [int(s) for s in cls.comic_link_re.match(url).groups()]
1328
        img = soup.find('div', id='comic').find('img')
1329
        assert img['alt'] == title
1330
        return {
1331
            'title': title,
1332
            'day': day,
1333
            'month': month,
1334
            'year': year,
1335
            'img': [img['src']],
1336
        }
1337
1338
1339
class CalvinAndHobbes(GenericComic):
1340
    """Class to retrieve Calvin and Hobbes comics."""
1341
    # Also on http://www.gocomics.com/calvinandhobbes/
1342
    name = 'calvin'
1343
    long_name = 'Calvin and Hobbes'
1344
    # This is not through any official webpage but eh...
1345
    url = 'http://marcel-oehler.marcellosendos.ch/comics/ch/'
1346
1347
    @classmethod
1348
    def get_next_comic(cls, last_comic):
1349
        last_date = get_date_for_comic(
1350
            last_comic) if last_comic else date(1985, 11, 1)
1351
        link_re = re.compile('^([0-9]*)/([0-9]*)/')
1352
        img_re = re.compile('')
1353
        for link in get_soup_at_url(cls.url).find_all('a', href=link_re):
1354
            url = link['href']
1355
            year, month = link_re.match(url).groups()
1356
            if date(int(year), int(month), 1) + timedelta(days=31) >= last_date:
1357
                img_re = re.compile('^%s%s([0-9]*)' % (year, month))
1358
                month_url = urljoin_wrapper(cls.url, url)
1359
                for img in get_soup_at_url(month_url).find_all('img', src=img_re):
1360
                    img_src = img['src']
1361
                    day = int(img_re.match(img_src).groups()[0])
1362
                    comic_date = date(int(year), int(month), day)
1363
                    if comic_date > last_date:
1364
                        yield {
1365
                            'url': month_url,
1366
                            'year': int(year),
1367
                            'month': int(month),
1368
                            'day': int(day),
1369
                            'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)],
1370
                        }
1371
                        last_date = comic_date
1372
1373
1374
class AbstruseGoose(GenericComic):
1375
    """Class to retrieve AbstruseGoose Comics."""
1376
    name = 'abstruse'
1377
    long_name = 'Abstruse Goose'
1378
    url = 'http://abstrusegoose.com'
1379
1380
    @classmethod
1381
    def get_next_comic(cls, last_comic):
1382
        archive_url = '%s/archive' % cls.url
1383
        last_num = last_comic['num'] if last_comic else 0
1384 View Code Duplication
        comic_url_re = re.compile('^%s/([0-9]*)$' % cls.url)
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1385
        comic_img_re = re.compile('^%s/strips/.*' % cls.url)
1386
        for link in get_soup_at_url(archive_url).find_all('a', href=comic_url_re):
1387
            comic_url = link['href']
1388
            num = int(comic_url_re.match(comic_url).groups()[0])
1389
            if num > last_num:
1390
                yield {
1391
                    'url': comic_url,
1392
                    'num': num,
1393
                    'title': link.string,
1394
                    'img': [get_soup_at_url(comic_url).find('img', src=comic_img_re)['src']]
1395
                }
1396
1397
1398
class PhDComics(GenericNavigableComic):
1399
    """Class to retrieve PHD Comics."""
1400
    name = 'phd'
1401
    long_name = 'PhD Comics'
1402
    url = 'http://phdcomics.com/comics/archive.php'
1403
    get_url_from_link = join_cls_url_to_href
1404
1405
    @classmethod
1406
    def get_first_comic_link(cls):
1407
        """Get link to first comics."""
1408
        return get_soup_at_url(cls.url).find('img', src='images/first_button.gif').parent
1409
1410
    @classmethod
1411
    def get_navi_link(cls, last_soup, next_):
1412
        img = last_soup.find('img', src='images/next_button.gif' if next_ else 'images/prev_button.gif')
1413
        return None if img is None else img.parent
1414
1415
    @classmethod
1416
    def get_comic_info(cls, soup, link):
1417
        """Get information about a particular comics."""
1418
        date_str = soup.find('font', face='Arial,Helvetica,Geneva,Swiss,SunSans-Regular', color='white').string.strip()
1419
        try:
1420
            day = string_to_date(date_str, '%m/%d/%Y')
1421
        except ValueError:
1422
            print("Invalid date %s" % date_str)
1423
            day = date.today()
1424
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
1425
        return {
1426
            'year': day.year,
1427
            'month': day.month,
1428
            'day': day.day,
1429
            'img': [soup.find('img', id='comic')['src']],
1430
            'title': title,
1431
        }
1432
1433
1434
class Octopuns(GenericNavigableComic):
1435
    """Class to retrieve Octopuns comics."""
1436
    # Also on http://octopuns.tumblr.com
1437
    name = 'octopuns'
1438
    long_name = 'Octopuns'
1439
    url = 'http://www.octopuns.net'
1440
1441
    @classmethod
1442
    def get_first_comic_link(cls):
1443
        """Get link to first comics."""
1444
        return get_soup_at_url(cls.url).find('img', src=re.compile('.*/First.png')).parent
1445
1446
    @classmethod
1447
    def get_navi_link(cls, last_soup, next_):
1448
        link = last_soup.find('img', src=re.compile('.*/Next.png' if next_ else '.*/Back.png')).parent
1449
        return None if link.get('href') is None else link
1450
1451
    @classmethod
1452
    def get_comic_info(cls, soup, link):
1453
        """Get information about a particular comics."""
1454
        title = soup.find('h3', class_='post-title entry-title').string
1455
        date_str = soup.find('h2', class_='date-header').string
1456
        day = string_to_date(date_str, "%A, %B %d, %Y")
1457
        imgs = soup.find_all('link', rel='image_src')
1458
        return {
1459
            'img': [i['href'] for i in imgs],
1460
            'title': title,
1461
            'day': day.day,
1462
            'month': day.month,
1463
            'year': day.year,
1464
        }
1465
1466
1467
class Quarktees(GenericNavigableComic):
1468
    """Class to retrieve the Quarktees comics."""
1469
    name = 'quarktees'
1470
    long_name = 'Quarktees'
1471
    url = 'http://www.quarktees.com/blogs/news'
1472
    get_url_from_link = join_cls_url_to_href
1473
1474
    @classmethod
1475
    def get_first_comic_link(cls):
1476
        """Get link to first comics."""
1477
        return {'href': 'http://www.quarktees.com/blogs/news/12486621-coming-soon'}
1478
1479
    @classmethod
1480
    def get_navi_link(cls, last_soup, next_):
1481
        return last_soup.find('a', id='article-next' if next_ else 'article-prev')
1482
1483
    @classmethod
1484
    def get_comic_info(cls, soup, link):
1485
        """Get information about a particular comics."""
1486
        title = soup.find('meta', property='og:title')['content']
1487
        article = soup.find('div', class_='single-article')
1488
        imgs = article.find_all('img')
1489
        return {
1490
            'title': title,
1491
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1492
        }
1493
1494
1495
class OverCompensating(GenericNavigableComic):
1496
    """Class to retrieve the Over Compensating comics."""
1497
    name = 'compensating'
1498
    long_name = 'Over Compensating'
1499
    url = 'http://www.overcompensating.com'
1500
    get_url_from_link = join_cls_url_to_href
1501
1502 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1503
    def get_first_comic_link(cls):
1504
        """Get link to first comics."""
1505
        return get_soup_at_url(cls.url).find('a', href=re.compile('comic=1$'))
1506
1507
    @classmethod
1508
    def get_navi_link(cls, last_soup, next_):
1509
        return last_soup.find('a', title='next comic' if next_ else 'go back already')
1510
1511
    @classmethod
1512
    def get_comic_info(cls, soup, link):
1513
        """Get information about a particular comics."""
1514
        img_src_re = re.compile('^/oc/comics/.*')
1515
        comic_num_re = re.compile('.*comic=([0-9]*)$')
1516
        comic_url = cls.get_url_from_link(link)
1517
        num = int(comic_num_re.match(comic_url).groups()[0])
1518
        img = soup.find('img', src=img_src_re)
1519
        return {
1520
            'num': num,
1521
            'img': [urljoin_wrapper(comic_url, img['src'])],
1522
            'title': img.get('title')
1523
        }
1524
1525
1526
class Oglaf(GenericNavigableComic):
1527
    """Class to retrieve Oglaf comics."""
1528
    name = 'oglaf'
1529
    long_name = 'Oglaf [NSFW]'
1530
    url = 'http://oglaf.com'
1531
    get_url_from_link = join_cls_url_to_href
1532
1533
    @classmethod
1534
    def get_first_comic_link(cls):
1535
        """Get link to first comics."""
1536
        return get_soup_at_url(cls.url).find("div", id="st").parent
1537
1538
    @classmethod
1539
    def get_navi_link(cls, last_soup, next_):
1540
        div = last_soup.find("div", id="nx" if next_ else "pvs")
1541
        return div.parent if div else None
1542
1543
    @classmethod
1544
    def get_comic_info(cls, soup, link):
1545
        """Get information about a particular comics."""
1546
        title = soup.find('title').string
1547
        title_imgs = soup.find('div', id='tt').find_all('img')
1548
        assert len(title_imgs) == 1
1549
        strip_imgs = soup.find_all('img', id='strip')
1550
        assert len(strip_imgs) == 1
1551
        imgs = title_imgs + strip_imgs
1552
        desc = ' '.join(i['title'] for i in imgs)
1553
        return {
1554
            'title': title,
1555
            'img': [i['src'] for i in imgs],
1556
            'description': desc,
1557
        }
1558
1559
1560
class ScandinaviaAndTheWorld(GenericNavigableComic):
1561
    """Class to retrieve Scandinavia And The World comics."""
1562
    name = 'satw'
1563
    long_name = 'Scandinavia And The World'
1564
    url = 'http://satwcomic.com'
1565
1566
    @classmethod
1567
    def get_first_comic_link(cls):
1568
        """Get link to first comics."""
1569
        return {'href': 'http://satwcomic.com/sweden-denmark-and-norway'}
1570
1571
    @classmethod
1572
    def get_navi_link(cls, last_soup, next_):
1573
        return last_soup.find('a', accesskey='n' if next_ else 'p')
1574
1575
    @classmethod
1576
    def get_comic_info(cls, soup, link):
1577
        """Get information about a particular comics."""
1578
        title = soup.find('meta', attrs={'name': 'twitter:label1'})['content']
1579
        desc = soup.find('meta', property='og:description')['content']
1580
        imgs = soup.find_all('img', itemprop="image")
1581
        return {
1582
            'title': title,
1583
            'description': desc,
1584
            'img': [i['src'] for i in imgs],
1585
        }
1586
1587
1588
class SomethingOfThatIlk(GenericEmptyComic):  # Does not exist anymore
1589
    """Class to retrieve the Something Of That Ilk comics."""
1590
    name = 'somethingofthatilk'
1591
    long_name = 'Something Of That Ilk'
1592
    url = 'http://www.somethingofthatilk.com'
1593
1594 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1595
class InfiniteMonkeyBusiness(GenericNavigableComic):
1596
    """Generic class to retrieve InfiniteMonkeyBusiness comics."""
1597
    name = 'monkey'
1598
    long_name = 'Infinite Monkey Business'
1599
    url = 'http://infinitemonkeybusiness.net'
1600
    get_navi_link = get_a_navi_comicnavnext_navinext
1601
1602
    @classmethod
1603
    def get_first_comic_link(cls):
1604
        """Get link to first comics."""
1605
        return {'href': 'http://infinitemonkeybusiness.net/comic/pillory/'}
1606
1607
    @classmethod
1608
    def get_comic_info(cls, soup, link):
1609
        """Get information about a particular comics."""
1610
        title = soup.find('meta', property='og:title')['content']
1611
        imgs = soup.find('div', id='comic').find_all('img')
1612
        return {
1613
            'title': title,
1614
            'img': [i['src'] for i in imgs],
1615
        }
1616
1617
1618
class Wondermark(GenericListableComic):
1619
    """Class to retrieve the Wondermark comics."""
1620
    name = 'wondermark'
1621
    long_name = 'Wondermark'
1622
    url = 'http://wondermark.com'
1623
    get_url_from_archive_element = get_href
1624
1625
    @classmethod
1626
    def get_archive_elements(cls):
1627
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1628
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1629
1630
    @classmethod
1631
    def get_comic_info(cls, soup, link):
1632
        """Get information about a particular comics."""
1633
        date_str = soup.find('div', class_='postdate').find('em').string
1634
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1635
        div = soup.find('div', id='comic')
1636
        if div:
1637
            img = div.find('img')
1638
            img_src = [img['src']]
1639 View Code Duplication
            alt = img['alt']
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1640
            assert alt == img['title']
1641
            title = soup.find('meta', property='og:title')['content']
1642
        else:
1643
            img_src = []
1644
            alt = ''
1645
            title = ''
1646
        return {
1647
            'month': day.month,
1648
            'year': day.year,
1649
            'day': day.day,
1650
            'img': img_src,
1651
            'title': title,
1652
            'alt': alt,
1653
            'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')),
1654
        }
1655
1656
1657
class WarehouseComic(GenericNavigableComic):
1658
    """Class to retrieve Warehouse Comic comics."""
1659
    name = 'warehouse'
1660
    long_name = 'Warehouse Comic'
1661
    url = 'http://warehousecomic.com'
1662
    get_first_comic_link = get_a_navi_navifirst
1663
    get_navi_link = get_link_rel_next
1664
1665
    @classmethod
1666 View Code Duplication
    def get_comic_info(cls, soup, link):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1667
        """Get information about a particular comics."""
1668
        title = soup.find('h2', class_='post-title').string
1669
        date_str = soup.find('span', class_='post-date').string
1670
        day = string_to_date(date_str, "%B %d, %Y")
1671
        imgs = soup.find('div', id='comic').find_all('img')
1672
        return {
1673
            'img': [i['src'] for i in imgs],
1674
            'title': title,
1675
            'day': day.day,
1676
            'month': day.month,
1677
            'year': day.year,
1678
        }
1679
1680
1681
class JustSayEh(GenericNavigableComic):
1682
    """Class to retrieve Just Say Eh comics."""
1683
    # Also on http//tapastic.com/series/Just-Say-Eh
1684
    name = 'justsayeh'
1685
    long_name = 'Just Say Eh'
1686
    url = 'http://www.justsayeh.com'
1687
    get_first_comic_link = get_a_navi_navifirst
1688
    get_navi_link = get_a_navi_comicnavnext_navinext
1689
1690
    @classmethod
1691
    def get_comic_info(cls, soup, link):
1692
        """Get information about a particular comics."""
1693
        title = soup.find('h2', class_='post-title').string
1694
        imgs = soup.find("div", id="comic").find_all("img")
1695
        assert all(i['alt'] == i['title'] for i in imgs)
1696
        alt = imgs[0]['alt']
1697
        return {
1698 View Code Duplication
            'img': [i['src'] for i in imgs],
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1699
            'title': title,
1700
            'alt': alt,
1701
        }
1702
1703
1704
class MouseBearComedy(GenericNavigableComic):
1705
    """Class to retrieve Mouse Bear Comedy comics."""
1706
    # Also on http://mousebearcomedy.tumblr.com
1707
    name = 'mousebear'
1708
    long_name = 'Mouse Bear Comedy'
1709
    url = 'http://www.mousebearcomedy.com'
1710
    get_first_comic_link = get_a_navi_navifirst
1711
    get_navi_link = get_a_navi_comicnavnext_navinext
1712
1713
    @classmethod
1714
    def get_comic_info(cls, soup, link):
1715
        """Get information about a particular comics."""
1716
        title = soup.find('h2', class_='post-title').string
1717
        author = soup.find("span", class_="post-author").find("a").string
1718
        date_str = soup.find("span", class_="post-date").string
1719
        day = string_to_date(date_str, '%B %d, %Y')
1720 View Code Duplication
        imgs = soup.find("div", id="comic").find_all("img")
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1721
        assert all(i['alt'] == i['title'] == title for i in imgs)
1722
        return {
1723
            'day': day.day,
1724
            'month': day.month,
1725
            'year': day.year,
1726
            'img': [i['src'] for i in imgs],
1727
            'title': title,
1728
            'author': author,
1729
        }
1730
1731
1732
class BigFootJustice(GenericNavigableComic):
1733
    """Class to retrieve Big Foot Justice comics."""
1734
    # Also on http://tapastic.com/series/bigfoot-justice
1735
    name = 'bigfoot'
1736
    long_name = 'Big Foot Justice'
1737
    url = 'http://bigfootjustice.com'
1738
    get_first_comic_link = get_a_navi_navifirst
1739
    get_navi_link = get_a_navi_comicnavnext_navinext
1740
1741
    @classmethod
1742
    def get_comic_info(cls, soup, link):
1743
        """Get information about a particular comics."""
1744
        imgs = soup.find('div', id='comic').find_all('img')
1745
        assert all(i['title'] == i['alt'] for i in imgs)
1746
        title = ' '.join(i['title'] for i in imgs)
1747
        return {
1748
            'img': [i['src'] for i in imgs],
1749
            'title': title,
1750
        }
1751
1752
1753
class RespawnComic(GenericNavigableComic):
1754
    """Class to retrieve Respawn Comic."""
1755
    # Also on http://respawncomic.tumblr.com
1756
    name = 'respawn'
1757
    long_name = 'Respawn Comic'
1758
    url = 'http://respawncomic.com '
1759
    get_navi_link = get_a_navi_comicnavnext_navinext
1760
1761
    @classmethod
1762
    def get_first_comic_link(cls):
1763
        """Get link to first comics."""
1764
        return {'href': 'http://respawncomic.com/comic/c0001/'}
1765
1766
    @classmethod
1767
    def get_comic_info(cls, soup, link):
1768
        """Get information about a particular comics."""
1769
        title = soup.find('meta', property='og:title')['content']
1770
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1771
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1772
        date_str = date_str[:10]
1773
        day = string_to_date(date_str, "%Y-%m-%d")
1774
        imgs = soup.find_all('meta', property='og:image')
1775
        skip_imgs = {
1776
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1777
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1778
        }
1779
        return {
1780
            'title': title,
1781
            'author': author,
1782
            'day': day.day,
1783
            'month': day.month,
1784
            'year': day.year,
1785
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1786
        }
1787
1788
1789
class SafelyEndangered(GenericNavigableComic):
1790
    """Class to retrieve Safely Endangered comics."""
1791
    # Also on http://tumblr.safelyendangered.com
1792
    name = 'endangered'
1793
    long_name = 'Safely Endangered'
1794
    url = 'http://www.safelyendangered.com'
1795
    get_navi_link = get_link_rel_next
1796
1797
    @classmethod
1798
    def get_first_comic_link(cls):
1799
        """Get link to first comics."""
1800
        return {'href': 'http://www.safelyendangered.com/comic/ignored/'}
1801
1802
    @classmethod
1803
    def get_comic_info(cls, soup, link):
1804
        """Get information about a particular comics."""
1805
        title = soup.find('h2', class_='post-title').string
1806
        date_str = soup.find('span', class_='post-date').string
1807
        day = string_to_date(date_str, '%B %d, %Y')
1808
        imgs = soup.find('div', id='comic').find_all('img')
1809
        alt = imgs[0]['alt']
1810
        assert all(i['alt'] == i['title'] for i in imgs)
1811
        return {
1812
            'day': day.day,
1813
            'month': day.month,
1814
            'year': day.year,
1815
            'img': [i['src'] for i in imgs],
1816
            'title': title,
1817
            'alt': alt,
1818
        }
1819
1820
1821
class PicturesInBoxes(GenericNavigableComic):
1822
    """Class to retrieve Pictures In Boxes comics."""
1823
    # Also on http://picturesinboxescomic.tumblr.com
1824
    name = 'picturesinboxes'
1825
    long_name = 'Pictures in Boxes'
1826
    url = 'http://www.picturesinboxes.com'
1827
    get_navi_link = get_a_navi_navinext
1828
1829
    @classmethod
1830
    def get_first_comic_link(cls):
1831
        """Get link to first comics."""
1832
        return {'href': 'http://www.picturesinboxes.com/2013/10/26/tetris/'}
1833
1834
    @classmethod
1835
    def get_comic_info(cls, soup, link):
1836
        """Get information about a particular comics."""
1837
        title = soup.find('h2', class_='post-title').string
1838
        author = soup.find("span", class_="post-author").find("a").string
1839
        date_str = soup.find('span', class_='post-date').string
1840
        day = string_to_date(date_str, '%B %d, %Y')
1841
        imgs = soup.find('div', class_='comicpane').find_all('img')
1842
        assert imgs
1843
        assert all(i['title'] == i['alt'] == title for i in imgs)
1844
        return {
1845
            'day': day.day,
1846
            'month': day.month,
1847
            'year': day.year,
1848
            'img': [i['src'] for i in imgs],
1849
            'title': title,
1850
            'author': author,
1851
        }
1852
1853
1854
class Penmen(GenericEmptyComic):
1855
    """Class to retrieve Penmen comics."""
1856
    name = 'penmen'
1857
    long_name = 'Penmen'
1858
    url = 'http://penmen.com'
1859
1860
1861
class TheDoghouseDiaries(GenericNavigableComic):
1862
    """Class to retrieve The Dog House Diaries comics."""
1863
    name = 'doghouse'
1864
    long_name = 'The Dog House Diaries'
1865
    url = 'http://thedoghousediaries.com'
1866
1867
    @classmethod
1868
    def get_first_comic_link(cls):
1869
        """Get link to first comics."""
1870
        return get_soup_at_url(cls.url).find('a', id='firstlink')
1871
1872
    @classmethod
1873
    def get_navi_link(cls, last_soup, next_):
1874
        return last_soup.find('a', id='nextlink' if next_ else 'previouslink')
1875
1876
    @classmethod
1877
    def get_comic_info(cls, soup, link):
1878
        """Get information about a particular comics."""
1879
        comic_img_re = re.compile('^dhdcomics/.*')
1880
        img = soup.find('img', src=comic_img_re)
1881
        comic_url = cls.get_url_from_link(link)
1882
        return {
1883
            'title': soup.find('h2', id='titleheader').string,
1884
            'title2': soup.find('div', id='subtext').string,
1885
            'alt': img.get('title'),
1886
            'img': [urljoin_wrapper(comic_url, img['src'].strip())],
1887
            'num': int(comic_url.split('/')[-1]),
1888
        }
1889
1890
1891
class InvisibleBread(GenericListableComic):
1892
    """Class to retrieve Invisible Bread comics."""
1893
    # Also on http://www.gocomics.com/invisible-bread
1894
    name = 'invisiblebread'
1895
    long_name = 'Invisible Bread'
1896
    url = 'http://invisiblebread.com'
1897
1898
    @classmethod
1899
    def get_archive_elements(cls):
1900
        archive_url = urljoin_wrapper(cls.url, '/archives/')
1901
        return reversed(get_soup_at_url(archive_url).find_all('td', class_='archive-title'))
1902
1903
    @classmethod
1904
    def get_url_from_archive_element(cls, td):
1905
        return td.find('a')['href']
1906
1907
    @classmethod
1908
    def get_comic_info(cls, soup, td):
1909
        """Get information about a particular comics."""
1910
        url = cls.get_url_from_archive_element(td)
1911
        title = td.find('a').string
1912
        month_and_day = td.previous_sibling.string
1913
        link_re = re.compile('^%s/([0-9]+)/' % cls.url)
1914
        year = link_re.match(url).groups()[0]
1915
        date_str = month_and_day + ' ' + year
1916
        day = string_to_date(date_str, '%b %d %Y')
1917
        imgs = [soup.find('div', id='comic').find('img')]
1918
        assert len(imgs) == 1
1919
        assert all(i['title'] == i['alt'] == title for i in imgs)
1920
        return {
1921
            'month': day.month,
1922 View Code Duplication
            'year': day.year,
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1923
            'day': day.day,
1924
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1925
            'title': title,
1926
        }
1927
1928
1929
class DiscoBleach(GenericEmptyComic):  # Does not work anymore
1930
    """Class to retrieve Disco Bleach Comics."""
1931
    name = 'discobleach'
1932
    long_name = 'Disco Bleach'
1933
    url = 'http://discobleach.com'
1934
1935
1936
class TubeyToons(GenericEmptyComic):  # Does not work anymore
1937
    """Class to retrieve TubeyToons comics."""
1938
    # Also on http://tapastic.com/series/Tubey-Toons
1939
    # Also on http://tubeytoons.tumblr.com
1940
    name = 'tubeytoons'
1941
    long_name = 'Tubey Toons'
1942
    url = 'http://tubeytoons.com'
1943
1944
1945
class CompletelySeriousComics(GenericNavigableComic):
1946
    """Class to retrieve Completely Serious comics."""
1947
    name = 'completelyserious'
1948
    long_name = 'Completely Serious Comics'
1949
    url = 'http://completelyseriouscomics.com'
1950
    get_first_comic_link = get_a_navi_navifirst
1951 View Code Duplication
    get_navi_link = get_a_navi_navinext
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1952
1953
    @classmethod
1954
    def get_comic_info(cls, soup, link):
1955
        """Get information about a particular comics."""
1956
        title = soup.find('h2', class_='post-title').string
1957
        author = soup.find('span', class_='post-author').contents[1].string
1958
        date_str = soup.find('span', class_='post-date').string
1959
        day = string_to_date(date_str, '%B %d, %Y')
1960
        imgs = soup.find('div', class_='comicpane').find_all('img')
1961
        assert imgs
1962
        alt = imgs[0]['title']
1963
        assert all(i['title'] == i['alt'] == alt for i in imgs)
1964
        return {
1965
            'month': day.month,
1966
            'year': day.year,
1967
            'day': day.day,
1968
            'img': [i['src'] for i in imgs],
1969
            'title': title,
1970
            'alt': alt,
1971
            'author': author,
1972
        }
1973
1974
1975
class PoorlyDrawnLines(GenericListableComic):
1976
    """Class to retrieve Poorly Drawn Lines comics."""
1977
    # Also on http://pdlcomics.tumblr.com
1978 View Code Duplication
    name = 'poorlydrawn'
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1979
    long_name = 'Poorly Drawn Lines'
1980
    url = 'http://poorlydrawnlines.com'
1981
    get_url_from_archive_element = get_href
1982
1983
    @classmethod
1984
    def get_comic_info(cls, soup, link):
1985
        """Get information about a particular comics."""
1986
        imgs = soup.find('div', class_='post').find_all('img')
1987
        assert len(imgs) <= 1
1988
        return {
1989
            'img': [i['src'] for i in imgs],
1990
            'title': imgs[0].get('title', "") if imgs else "",
1991
        }
1992
1993
    @classmethod
1994
    def get_archive_elements(cls):
1995
        archive_url = urljoin_wrapper(cls.url, 'archive')
1996
        url_re = re.compile('^%s/comic/.' % cls.url)
1997
        return reversed(get_soup_at_url(archive_url).find_all('a', href=url_re))
1998
1999
2000
class LoadingComics(GenericNavigableComic):
2001
    """Class to retrieve Loading Artist comics."""
2002
    name = 'loadingartist'
2003
    long_name = 'Loading Artist'
2004
    url = 'http://www.loadingartist.com/latest'
2005
2006
    @classmethod
2007
    def get_first_comic_link(cls):
2008
        """Get link to first comics."""
2009
        return get_soup_at_url(cls.url).find('a', title="First")
2010
2011
    @classmethod
2012
    def get_navi_link(cls, last_soup, next_):
2013
        return last_soup.find('a', title='Next' if next_ else 'Previous')
2014
2015
    @classmethod
2016
    def get_comic_info(cls, soup, link):
2017
        """Get information about a particular comics."""
2018
        title = soup.find('h1').string
2019
        date_str = soup.find('span', class_='date').string.strip()
2020
        day = string_to_date(date_str, "%B %d, %Y")
2021
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
2022
        return {
2023
            'title': title,
2024
            'img': [i['src'] for i in imgs],
2025
            'month': day.month,
2026
            'year': day.year,
2027
            'day': day.day,
2028
        }
2029
2030
2031
class ChuckleADuck(GenericNavigableComic):
2032
    """Class to retrieve Chuckle-A-Duck comics."""
2033
    name = 'chuckleaduck'
2034
    long_name = 'Chuckle-A-duck'
2035
    url = 'http://chuckleaduck.com'
2036
    get_first_comic_link = get_div_navfirst_a
2037
    get_navi_link = get_link_rel_next
2038
2039
    @classmethod
2040
    def get_comic_info(cls, soup, link):
2041
        """Get information about a particular comics."""
2042
        date_str = soup.find('span', class_='post-date').string
2043
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
2044
        author = soup.find('span', class_='post-author').string
2045
        div = soup.find('div', id='comic')
2046 View Code Duplication
        imgs = div.find_all('img') if div else []
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2047
        title = imgs[0]['title'] if imgs else ""
2048
        assert all(i['title'] == i['alt'] == title for i in imgs)
2049
        return {
2050
            'month': day.month,
2051
            'year': day.year,
2052
            'day': day.day,
2053
            'img': [i['src'] for i in imgs],
2054
            'title': title,
2055
            'author': author,
2056
        }
2057
2058
2059
class DepressedAlien(GenericNavigableComic):
2060
    """Class to retrieve Depressed Alien Comics."""
2061
    name = 'depressedalien'
2062
    long_name = 'Depressed Alien'
2063
    url = 'http://depressedalien.com'
2064
    get_url_from_link = join_cls_url_to_href
2065
2066
    @classmethod
2067
    def get_first_comic_link(cls):
2068
        """Get link to first comics."""
2069
        return get_soup_at_url(cls.url).find('img', attrs={'name': 'beginArrow'}).parent
2070
2071
    @classmethod
2072
    def get_navi_link(cls, last_soup, next_):
2073
        return last_soup.find('img', attrs={'name': 'rightArrow' if next_ else 'leftArrow'}).parent
2074
2075
    @classmethod
2076
    def get_comic_info(cls, soup, link):
2077
        """Get information about a particular comics."""
2078
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2079
        imgs = soup.find_all('meta', property='og:image')
2080
        return {
2081
            'title': title,
2082
            'img': [i['content'] for i in imgs],
2083
        }
2084
2085
2086
class ThingsInSquares(GenericListableComic):
2087
    """Class to retrieve Things In Squares comics."""
2088
    # This can be retrieved in other languages
2089
    # Also on https://tapastic.com/series/Things-in-Squares
2090
    name = 'squares'
2091
    long_name = 'Things in squares'
2092
    url = 'http://www.thingsinsquares.com'
2093
2094
    @classmethod
2095
    def get_comic_info(cls, soup, tr):
2096
        """Get information about a particular comics."""
2097
        _, td2, td3 = tr.find_all('td')
2098
        a = td2.find('a')
2099
        date_str = td3.string
2100
        day = string_to_date(date_str, "%m.%d.%y")
2101
        title = a.string
2102
        title2 = soup.find('meta', property='og:title')['content']
2103
        desc = soup.find('meta', property='og:description')
2104
        description = desc['content'] if desc else ''
2105
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2106
        imgs = soup.find('div', class_='entry-content').find_all('img')
2107
        return {
2108
            'day': day.day,
2109
            'month': day.month,
2110
            'year': day.year,
2111
            'title': title,
2112
            'title2': title2,
2113
            'description': description,
2114
            'tags': tags,
2115
            'img': [i['src'] for i in imgs],
2116
            'alt': ' '.join(i['alt'] for i in imgs),
2117
        }
2118
2119
    @classmethod
2120
    def get_url_from_archive_element(cls, tr):
2121
        _, td2, td3 = tr.find_all('td')
2122
        return td2.find('a')['href']
2123
2124
    @classmethod
2125
    def get_archive_elements(cls):
2126
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
2127
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2128
2129
2130
class HappleTea(GenericNavigableComic):
2131
    """Class to retrieve Happle Tea Comics."""
2132
    name = 'happletea'
2133
    long_name = 'Happle Tea'
2134
    url = 'http://www.happletea.com'
2135
    get_first_comic_link = get_a_navi_navifirst
2136
    get_navi_link = get_link_rel_next
2137
2138
    @classmethod
2139
    def get_comic_info(cls, soup, link):
2140
        """Get information about a particular comics."""
2141
        imgs = soup.find('div', id='comic').find_all('img')
2142
        post = soup.find('div', class_='post-content')
2143
        title = post.find('h2', class_='post-title').string
2144
        author = post.find('a', rel='author').string
2145
        date_str = post.find('span', class_='post-date').string
2146
        day = string_to_date(date_str, "%B %d, %Y")
2147
        assert all(i['alt'] == i['title'] for i in imgs)
2148
        return {
2149
            'title': title,
2150
            'img': [i['src'] for i in imgs],
2151
            'alt': ''.join(i['alt'] for i in imgs),
2152
            'month': day.month,
2153
            'year': day.year,
2154
            'day': day.day,
2155
            'author': author,
2156
        }
2157
2158
2159
class FatAwesomeComics(GenericNavigableComic):
2160
    """Class to retrieve Fat Awesome Comics."""
2161
    # Also on http://fatawesomecomedy.tumblr.com
2162
    name = 'fatawesome'
2163
    long_name = 'Fat Awesome'
2164
    url = 'http://fatawesome.com/comics'
2165
    get_navi_link = get_a_rel_next
2166
2167
    @classmethod
2168
    def get_first_comic_link(cls):
2169
        """Get link to first comics."""
2170
        return {'href': 'http://fatawesome.com/shortbus/'}
2171
2172
    @classmethod
2173
    def get_comic_info(cls, soup, link):
2174 View Code Duplication
        """Get information about a particular comics."""
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2175
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2176
        description = soup.find('meta', attrs={'name': 'description'})['content']
2177
        tags_prop = soup.find('meta', property='article:tag')
2178
        tags = tags_prop['content'] if tags_prop else ""
2179
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2180
        day = string_to_date(date_str, "%Y-%m-%d")
2181
        imgs = soup.find_all('img', attrs={'data-recalc-dims': "1"})
2182
        assert len(imgs) == 1
2183
        return {
2184
            'title': title,
2185
            'description': description,
2186
            'tags': tags,
2187
            'alt': "".join(i['alt'] for i in imgs),
2188
            'img': [i['src'].rsplit('?', 1)[0] for i in imgs],
2189
            'month': day.month,
2190
            'year': day.year,
2191
            'day': day.day,
2192
        }
2193
2194
2195
class AnythingComic(GenericComic):
2196
    """Class to retrieve Anything Comics."""
2197
    # Also on http://tapastic.com/series/anything
2198
    name = 'anythingcomic'
2199
    long_name = 'Anything Comic'
2200
    url = 'http://www.anythingcomic.com'
2201
2202
    @classmethod
2203
    def get_next_comic(cls, last_comic):
2204
        last_num = last_comic['num'] if last_comic else 0
2205
        archive_url = urljoin_wrapper(cls.url, 'archive')
2206
        for i, tr in enumerate(get_soup_at_url(archive_url).find('table', id='chapter_table').find_all('tr')):
2207
            if i > 1:
2208
                td_num, td_comic, td_date, td_com = tr.find_all('td')
2209
                num = int(td_num.string)
2210
                assert num + 1 == i
2211
                if num > last_num:
2212
                    link = td_comic.find('a')
2213
                    comic_url = urljoin_wrapper(cls.url, link['href'])
2214
                    title = link.string
2215
                    soup = get_soup_at_url(comic_url)
2216
                    imgs = soup.find_all('img', id='comic_image')
2217
                    day = string_to_date(td_date.string, '%d %b %Y %I:%M %p')
2218
                    assert len(imgs) == 1
2219
                    assert all(i.get('alt') == i.get('title') for i in imgs)
2220
                    yield {
2221
                        'url': comic_url,
2222
                        'num': num,
2223
                        'title': title,
2224
                        'alt': imgs[0].get('alt', ''),
2225
                        'img': [i['src'] for i in imgs],
2226
                        'month': day.month,
2227
                        'year': day.year,
2228
                        'day': day.day,
2229
                    }
2230
2231
2232
class LonnieMillsap(GenericNavigableComic):
2233
    """Class to retrieve Lonnie Millsap's comics."""
2234
    name = 'millsap'
2235
    long_name = 'Lonnie Millsap'
2236
    url = 'http://www.lonniemillsap.com'
2237
    get_navi_link = get_link_rel_next
2238
2239
    @classmethod
2240
    def get_first_comic_link(cls):
2241
        """Get link to first comics."""
2242 View Code Duplication
        return {'href': 'http://www.lonniemillsap.com/?p=42'}
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2243
2244
    @classmethod
2245
    def get_comic_info(cls, soup, link):
2246
        """Get information about a particular comics."""
2247
        title = soup.find('h2', class_='post-title').string
2248
        post = soup.find('div', class_='post-content')
2249
        author = post.find("span", class_="post-author").find("a").string
2250
        date_str = post.find("span", class_="post-date").string
2251
        day = string_to_date(date_str, "%B %d, %Y")
2252
        imgs = post.find("div", class_="entry").find_all("img")
2253
        return {
2254
            'title': title,
2255
            'author': author,
2256
            'img': [i['src'] for i in imgs],
2257
            'month': day.month,
2258
            'year': day.year,
2259
            'day': day.day,
2260
        }
2261
2262
2263
class LinsEditions(GenericNavigableComic):
2264
    """Class to retrieve L.I.N.S. Editions comics."""
2265
    # Also on http://linscomics.tumblr.com
2266
    name = 'lins'
2267
    long_name = 'L.I.N.S. Editions'
2268
    url = 'https://linsedition.com'
2269
    get_navi_link = get_link_rel_next
2270
2271
    @classmethod
2272
    def get_first_comic_link(cls):
2273
        """Get link to first comics."""
2274
        return {'href': 'https://linsedition.com/2011/09/07/l-i-n-s/'}
2275
2276
    @classmethod
2277
    def get_comic_info(cls, soup, link):
2278
        """Get information about a particular comics."""
2279
        title = soup.find('meta', property='og:title')['content']
2280
        imgs = soup.find_all('meta', property='og:image')
2281
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2282
        day = string_to_date(date_str, "%Y-%m-%d")
2283
        return {
2284
            'title': title,
2285
            'img': [i['content'] for i in imgs],
2286
            'month': day.month,
2287
            'year': day.year,
2288
            'day': day.day,
2289
        }
2290
2291
2292
class ThorsThundershack(GenericNavigableComic):
2293
    """Class to retrieve Thor's Thundershack comics."""
2294
    # Also on http://tapastic.com/series/Thors-Thundershac
2295
    name = 'thor'
2296
    long_name = 'Thor\'s Thundershack'
2297
    url = 'http://www.thorsthundershack.com'
2298
    get_url_from_link = join_cls_url_to_href
2299
2300
    @classmethod
2301
    def get_first_comic_link(cls):
2302
        """Get link to first comics."""
2303
        return get_soup_at_url(cls.url).find('a', class_='first navlink')
2304
2305
    @classmethod
2306
    def get_navi_link(cls, last_soup, next_):
2307
        for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
2308
            if link['href'] != '/comic':
2309
                return link
2310
        return None
2311
2312
    @classmethod
2313
    def get_comic_info(cls, soup, link):
2314
        """Get information about a particular comics."""
2315
        title = soup.find('meta', attrs={'name': 'description'})["content"]
2316
        description = soup.find('div', itemprop='articleBody').text
2317
        author = soup.find('span', itemprop='author copyrightHolder').string
2318
        imgs = soup.find_all('img', itemprop='image')
2319
        assert all(i['title'] == i['alt'] for i in imgs)
2320
        alt = imgs[0]['alt'] if imgs else ""
2321
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
2322
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2323
        return {
2324
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2325
            'month': day.month,
2326
            'year': day.year,
2327
            'day': day.day,
2328
            'author': author,
2329
            'title': title,
2330
            'alt': alt,
2331
            'description': description,
2332
        }
2333
2334
2335
class GerbilWithAJetpack(GenericNavigableComic):
2336
    """Class to retrieve GerbilWithAJetpack comics."""
2337
    name = 'gerbil'
2338
    long_name = 'Gerbil With A Jetpack'
2339
    url = 'http://gerbilwithajetpack.com'
2340
    get_first_comic_link = get_a_navi_navifirst
2341
    get_navi_link = get_a_rel_next
2342
2343
    @classmethod
2344
    def get_comic_info(cls, soup, link):
2345
        """Get information about a particular comics."""
2346
        title = soup.find('h2', class_='post-title').string
2347
        author = soup.find("span", class_="post-author").find("a").string
2348
        date_str = soup.find("span", class_="post-date").string
2349
        day = string_to_date(date_str, "%B %d, %Y")
2350
        imgs = soup.find("div", id="comic").find_all("img")
2351
        alt = imgs[0]['alt']
2352
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2353
        return {
2354
            'img': [i['src'] for i in imgs],
2355
            'title': title,
2356
            'alt': alt,
2357
            'author': author,
2358
            'day': day.day,
2359
            'month': day.month,
2360
            'year': day.year
2361
        }
2362
2363
2364
class EveryDayBlues(GenericNavigableComic):
2365
    """Class to retrieve EveryDayBlues Comics."""
2366
    name = "blues"
2367
    long_name = "Every Day Blues"
2368
    url = "http://everydayblues.net"
2369
    get_first_comic_link = get_a_navi_navifirst
2370
    get_navi_link = get_link_rel_next
2371
2372
    @classmethod
2373
    def get_comic_info(cls, soup, link):
2374
        """Get information about a particular comics."""
2375
        title = soup.find("h2", class_="post-title").string
2376
        author = soup.find("span", class_="post-author").find("a").string
2377
        date_str = soup.find("span", class_="post-date").string
2378
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2379
        imgs = soup.find("div", id="comic").find_all("img")
2380
        assert all(i['alt'] == i['title'] == title for i in imgs)
2381
        assert len(imgs) <= 1
2382
        return {
2383
            'img': [i['src'] for i in imgs],
2384
            'title': title,
2385
            'author': author,
2386
            'day': day.day,
2387
            'month': day.month,
2388
            'year': day.year
2389
        }
2390
2391
2392
class BiterComics(GenericNavigableComic):
2393
    """Class to retrieve Biter Comics."""
2394
    name = "biter"
2395
    long_name = "Biter Comics"
2396
    url = "http://www.bitercomics.com"
2397
    get_first_comic_link = get_a_navi_navifirst
2398
    get_navi_link = get_link_rel_next
2399
2400
    @classmethod
2401
    def get_comic_info(cls, soup, link):
2402
        """Get information about a particular comics."""
2403
        title = soup.find("h1", class_="entry-title").string
2404
        author = soup.find("span", class_="author vcard").find("a").string
2405
        date_str = soup.find("span", class_="entry-date").string
2406
        day = string_to_date(date_str, "%B %d, %Y")
2407
        imgs = soup.find("div", id="comic").find_all("img")
2408
        assert all(i['alt'] == i['title'] for i in imgs)
2409
        assert len(imgs) == 1
2410
        alt = imgs[0]['alt']
2411
        return {
2412
            'img': [i['src'] for i in imgs],
2413
            'title': title,
2414
            'alt': alt,
2415
            'author': author,
2416
            'day': day.day,
2417
            'month': day.month,
2418
            'year': day.year
2419
        }
2420
2421
2422
class TheAwkwardYeti(GenericNavigableComic):
2423
    """Class to retrieve The Awkward Yeti comics."""
2424
    # Also on http://www.gocomics.com/the-awkward-yeti
2425
    # Also on http://larstheyeti.tumblr.com
2426
    # Also on https://tapastic.com/series/TheAwkwardYeti
2427
    name = 'yeti'
2428
    long_name = 'The Awkward Yeti'
2429
    url = 'http://theawkwardyeti.com'
2430
    get_first_comic_link = get_a_navi_navifirst
2431
    get_navi_link = get_link_rel_next
2432 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2433
    @classmethod
2434
    def get_comic_info(cls, soup, link):
2435
        """Get information about a particular comics."""
2436
        title = soup.find('h2', class_='post-title').string
2437
        date_str = soup.find("span", class_="post-date").string
2438
        day = string_to_date(date_str, "%B %d, %Y")
2439
        imgs = soup.find("div", id="comic").find_all("img")
2440
        assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
2441
        return {
2442
            'img': [i['src'] for i in imgs],
2443
            'title': title,
2444
            'day': day.day,
2445
            'month': day.month,
2446
            'year': day.year
2447
        }
2448
2449
2450
class PleasantThoughts(GenericNavigableComic):
2451
    """Class to retrieve Pleasant Thoughts comics."""
2452
    name = 'pleasant'
2453
    long_name = 'Pleasant Thoughts'
2454
    url = 'http://pleasant-thoughts.com'
2455
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2456
    get_navi_link = get_link_rel_next
2457
2458
    @classmethod
2459
    def get_comic_info(cls, soup, link):
2460
        """Get information about a particular comics."""
2461
        post = soup.find('div', class_='post-content')
2462 View Code Duplication
        title = post.find('h2', class_='post-title').string
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2463
        imgs = post.find("div", class_="entry").find_all("img")
2464
        return {
2465
            'title': title,
2466
            'img': [i['src'] for i in imgs],
2467
        }
2468
2469
2470
class MisterAndMe(GenericNavigableComic):
2471
    """Class to retrieve Mister & Me Comics."""
2472
    # Also on http://www.gocomics.com/mister-and-me
2473
    # Also on https://tapastic.com/series/Mister-and-Me
2474
    name = 'mister'
2475
    long_name = 'Mister & Me'
2476
    url = 'http://www.mister-and-me.com'
2477
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2478
    get_navi_link = get_link_rel_next
2479
2480
    @classmethod
2481
    def get_comic_info(cls, soup, link):
2482
        """Get information about a particular comics."""
2483
        title = soup.find('h2', class_='post-title').string
2484
        author = soup.find("span", class_="post-author").find("a").string
2485
        date_str = soup.find("span", class_="post-date").string
2486
        day = string_to_date(date_str, "%B %d, %Y")
2487
        imgs = soup.find("div", id="comic").find_all("img")
2488
        assert all(i['alt'] == i['title'] for i in imgs)
2489
        assert len(imgs) <= 1
2490 View Code Duplication
        alt = imgs[0]['alt'] if imgs else ""
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2491
        return {
2492
            'img': [i['src'] for i in imgs],
2493
            'title': title,
2494
            'alt': alt,
2495
            'author': author,
2496
            'day': day.day,
2497
            'month': day.month,
2498
            'year': day.year
2499
        }
2500
2501
2502
class LastPlaceComics(GenericNavigableComic):
2503
    """Class to retrieve Last Place Comics."""
2504
    name = 'lastplace'
2505
    long_name = 'LastPlaceComics'
2506
    url = "http://lastplacecomics.com"
2507
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2508
    get_navi_link = get_link_rel_next
2509
2510
    @classmethod
2511
    def get_comic_info(cls, soup, link):
2512
        """Get information about a particular comics."""
2513 View Code Duplication
        title = soup.find('h2', class_='post-title').string
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2514
        author = soup.find("span", class_="post-author").find("a").string
2515
        date_str = soup.find("span", class_="post-date").string
2516
        day = string_to_date(date_str, "%B %d, %Y")
2517
        imgs = soup.find("div", id="comic").find_all("img")
2518
        assert all(i['alt'] == i['title'] for i in imgs)
2519
        assert len(imgs) <= 1
2520
        alt = imgs[0]['alt'] if imgs else ""
2521
        return {
2522
            'img': [i['src'] for i in imgs],
2523
            'title': title,
2524
            'alt': alt,
2525
            'author': author,
2526
            'day': day.day,
2527
            'month': day.month,
2528
            'year': day.year
2529
        }
2530
2531
2532
class TalesOfAbsurdity(GenericNavigableComic):
2533
    """Class to retrieve Tales Of Absurdity comics."""
2534
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
2535
    # Also on http://talesofabsurdity.tumblr.com
2536
    name = 'absurdity'
2537
    long_name = 'Tales of Absurdity'
2538
    url = 'http://talesofabsurdity.com'
2539
    get_first_comic_link = get_a_navi_navifirst
2540
    get_navi_link = get_a_navi_comicnavnext_navinext
2541
2542
    @classmethod
2543
    def get_comic_info(cls, soup, link):
2544
        """Get information about a particular comics."""
2545
        title = soup.find('h2', class_='post-title').string
2546
        author = soup.find("span", class_="post-author").find("a").string
2547
        date_str = soup.find("span", class_="post-date").string
2548
        day = string_to_date(date_str, "%B %d, %Y")
2549
        imgs = soup.find("div", id="comic").find_all("img")
2550
        assert all(i['alt'] == i['title'] for i in imgs)
2551
        alt = imgs[0]['alt'] if imgs else ""
2552
        return {
2553
            'img': [i['src'] for i in imgs],
2554
            'title': title,
2555
            'alt': alt,
2556
            'author': author,
2557
            'day': day.day,
2558
            'month': day.month,
2559
            'year': day.year
2560
        }
2561
2562
2563
class EndlessOrigami(GenericNavigableComic):
2564
    """Class to retrieve Endless Origami Comics."""
2565
    name = "origami"
2566
    long_name = "Endless Origami"
2567
    url = "http://endlessorigami.com"
2568
    get_first_comic_link = get_a_navi_navifirst
2569
    get_navi_link = get_link_rel_next
2570
2571
    @classmethod
2572 View Code Duplication
    def get_comic_info(cls, soup, link):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2573
        """Get information about a particular comics."""
2574
        title = soup.find('h2', class_='post-title').string
2575
        author = soup.find("span", class_="post-author").find("a").string
2576
        date_str = soup.find("span", class_="post-date").string
2577
        day = string_to_date(date_str, "%B %d, %Y")
2578
        imgs = soup.find("div", id="comic").find_all("img")
2579
        assert all(i['alt'] == i['title'] for i in imgs)
2580
        alt = imgs[0]['alt'] if imgs else ""
2581
        return {
2582
            'img': [i['src'] for i in imgs],
2583
            'title': title,
2584
            'alt': alt,
2585
            'author': author,
2586
            'day': day.day,
2587
            'month': day.month,
2588
            'year': day.year
2589
        }
2590
2591
2592
class PlanC(GenericNavigableComic):
2593
    """Class to retrieve Plan C comics."""
2594
    name = 'planc'
2595
    long_name = 'Plan C'
2596
    url = 'http://www.plancomic.com'
2597
    get_first_comic_link = get_a_navi_navifirst
2598
    get_navi_link = get_a_navi_comicnavnext_navinext
2599
2600
    @classmethod
2601
    def get_comic_info(cls, soup, link):
2602
        """Get information about a particular comics."""
2603
        title = soup.find('h2', class_='post-title').string
2604
        date_str = soup.find("span", class_="post-date").string
2605
        day = string_to_date(date_str, "%B %d, %Y")
2606
        imgs = soup.find('div', id='comic').find_all('img')
2607
        return {
2608
            'title': title,
2609
            'img': [i['src'] for i in imgs],
2610
            'month': day.month,
2611
            'year': day.year,
2612
            'day': day.day,
2613
        }
2614
2615
2616
class BuniComic(GenericNavigableComic):
2617 View Code Duplication
    """Class to retrieve Buni Comics."""
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2618
    name = 'buni'
2619
    long_name = 'BuniComics'
2620
    url = 'http://www.bunicomic.com'
2621
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2622
    get_navi_link = get_link_rel_next
2623
2624
    @classmethod
2625
    def get_comic_info(cls, soup, link):
2626
        """Get information about a particular comics."""
2627
        imgs = soup.find('div', id='comic').find_all('img')
2628
        assert all(i['alt'] == i['title'] for i in imgs)
2629
        assert len(imgs) == 1
2630
        return {
2631
            'img': [i['src'] for i in imgs],
2632
            'title': imgs[0]['title'],
2633
        }
2634
2635
2636
class GenericCommitStrip(GenericNavigableComic):
2637
    """Generic class to retrieve Commit Strips in different languages."""
2638
    get_navi_link = get_a_rel_next
2639
2640
    @classmethod
2641
    def get_comic_info(cls, soup, link):
2642
        """Get information about a particular comics."""
2643
        desc = soup.find('meta', property='og:description')['content']
2644
        title = soup.find('meta', property='og:title')['content']
2645
        imgs = soup.find('div', class_='entry-content').find_all('img')
2646
        title2 = ' '.join(i.get('title', '') for i in imgs)
2647
        return {
2648
            'title': title,
2649
            'title2': title2,
2650
            'description': desc,
2651
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2652
        }
2653
2654
2655
class CommitStripFr(GenericCommitStrip):
2656
    """Class to retrieve Commit Strips in French."""
2657
    name = 'commit_fr'
2658
    long_name = 'Commit Strip (Fr)'
2659
    url = 'http://www.commitstrip.com/fr'
2660
2661
    @classmethod
2662
    def get_first_comic_link(cls):
2663
        """Get link to first comics."""
2664
        return {'href': 'http://www.commitstrip.com/fr/2012/02/22/interview/'}
2665
2666
2667
class CommitStripEn(GenericCommitStrip):
2668
    """Class to retrieve Commit Strips in English."""
2669
    name = 'commit_en'
2670
    long_name = 'Commit Strip (En)'
2671
    url = 'http://www.commitstrip.com/en'
2672
2673
    @classmethod
2674
    def get_first_comic_link(cls):
2675
        """Get link to first comics."""
2676
        return {'href': 'http://www.commitstrip.com/en/2012/02/22/interview/'}
2677
2678
2679
class GenericBoumerie(GenericNavigableComic):
2680
    """Generic class to retrieve Boumeries comics in different languages."""
2681
    get_first_comic_link = get_a_navi_navifirst
2682
    get_navi_link = get_link_rel_next
2683
    date_format = NotImplemented
2684
    lang = NotImplemented
2685
2686
    @classmethod
2687
    def get_comic_info(cls, soup, link):
2688
        """Get information about a particular comics."""
2689
        title = soup.find('h2', class_='post-title').string
2690
        short_url = soup.find('link', rel='shortlink')['href']
2691
        author = soup.find("span", class_="post-author").find("a").string
2692
        date_str = soup.find('span', class_='post-date').string
2693
        day = string_to_date(date_str, cls.date_format, cls.lang)
2694
        imgs = soup.find('div', id='comic').find_all('img')
2695
        assert all(i['alt'] == i['title'] for i in imgs)
2696
        return {
2697
            'short_url': short_url,
2698
            'img': [i['src'] for i in imgs],
2699
            'title': title,
2700
            'author': author,
2701
            'month': day.month,
2702
            'year': day.year,
2703
            'day': day.day,
2704
        }
2705
2706
2707
class BoumerieEn(GenericBoumerie):
2708
    """Class to retrieve Boumeries comics in English."""
2709
    name = 'boumeries_en'
2710
    long_name = 'Boumeries (En)'
2711
    url = 'http://comics.boumerie.com'
2712
    date_format = "%B %d, %Y"
2713
    lang = 'en_GB.UTF-8'
2714
2715
2716
class BoumerieFr(GenericBoumerie):
2717
    """Class to retrieve Boumeries comics in French."""
2718
    name = 'boumeries_fr'
2719
    long_name = 'Boumeries (Fr)'
2720
    url = 'http://bd.boumerie.com'
2721
    date_format = "%A, %d %B %Y"
2722
    lang = "fr_FR.utf8"
2723
2724
2725
class UnearthedComics(GenericNavigableComic):
2726
    """Class to retrieve Unearthed comics."""
2727
    # Also on http://tapastic.com/series/UnearthedComics
2728
    # Also on http://unearthedcomics.tumblr.com
2729
    name = 'unearthed'
2730
    long_name = 'Unearthed Comics'
2731
    url = 'http://unearthedcomics.com'
2732
    get_navi_link = get_link_rel_next
2733
2734
    @classmethod
2735
    def get_first_comic_link(cls):
2736
        """Get link to first comics."""
2737
        return {'href': 'http://unearthedcomics.com/comics/world-with-turn-signals/'}
2738
2739
    @classmethod
2740
    def get_comic_info(cls, soup, link):
2741
        """Get information about a particular comics."""
2742
        short_url = soup.find('link', rel='shortlink')['href']
2743
        title_elt = soup.find('h1') or soup.find('h2')
2744
        title = title_elt.string if title_elt else ""
2745
        desc = soup.find('meta', property='og:description')
2746
        date_str = soup.find('time', class_='published updated hidden')['datetime']
2747
        day = string_to_date(date_str, "%Y-%m-%d")
2748
        post = soup.find('div', class_="entry content entry-content type-portfolio")
2749
        imgs = post.find_all('img')
2750
        return {
2751
            'title': title,
2752
            'description': desc,
2753
            'url2': short_url,
2754
            'img': [i['src'] for i in imgs],
2755
            'month': day.month,
2756
            'year': day.year,
2757
            'day': day.day,
2758
        }
2759
2760
2761
class Optipess(GenericNavigableComic):
2762
    """Class to retrieve Optipess comics."""
2763
    name = 'optipess'
2764
    long_name = 'Optipess'
2765
    url = 'http://www.optipess.com'
2766
    get_first_comic_link = get_a_navi_navifirst
2767
    get_navi_link = get_link_rel_next
2768
2769
    @classmethod
2770
    def get_comic_info(cls, soup, link):
2771
        """Get information about a particular comics."""
2772
        title = soup.find('h2', class_='post-title').string
2773
        author = soup.find("span", class_="post-author").find("a").string
2774
        comic = soup.find('div', id='comic')
2775
        imgs = comic.find_all('img') if comic else []
2776
        alt = imgs[0]['title'] if imgs else ""
2777
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2778
        date_str = soup.find('span', class_='post-date').string
2779
        day = string_to_date(date_str, "%B %d, %Y")
2780
        return {
2781 View Code Duplication
            'title': title,
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2782
            'alt': alt,
2783
            'author': author,
2784
            'img': [i['src'] for i in imgs],
2785
            'month': day.month,
2786
            'year': day.year,
2787
            'day': day.day,
2788
        }
2789
2790
2791
class PainTrainComic(GenericNavigableComic):
2792
    """Class to retrieve Pain Train Comics."""
2793
    name = 'paintrain'
2794
    long_name = 'Pain Train Comics'
2795
    url = 'http://paintraincomic.com'
2796
    get_first_comic_link = get_a_navi_navifirst
2797
    get_navi_link = get_link_rel_next
2798
2799
    @classmethod
2800
    def get_comic_info(cls, soup, link):
2801
        """Get information about a particular comics."""
2802
        title = soup.find('h2', class_='post-title').string
2803
        short_url = soup.find('link', rel='shortlink')['href']
2804
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2805
        num = int(short_url_re.match(short_url).groups()[0])
2806
        imgs = soup.find('div', id='comic').find_all('img')
2807
        alt = imgs[0]['title']
2808
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2809
        date_str = soup.find('span', class_='post-date').string
2810
        day = string_to_date(date_str, "%d/%m/%Y")
2811
        return {
2812
            'short_url': short_url,
2813
            'num': num,
2814
            'img': [i['src'] for i in imgs],
2815
            'month': day.month,
2816
            'year': day.year,
2817
            'day': day.day,
2818
            'alt': alt,
2819
            'title': title,
2820
        }
2821 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2822
2823
class MoonBeard(GenericNavigableComic):
2824
    """Class to retrieve MoonBeard comics."""
2825
    # Also on http://blog.squiresjam.es/moonbeard
2826
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
2827
    name = 'moonbeard'
2828
    long_name = 'Moon Beard'
2829
    url = 'http://moonbeard.com'
2830
    get_first_comic_link = get_a_navi_navifirst
2831
    get_navi_link = get_a_navi_navinext
2832
2833
    @classmethod
2834
    def get_comic_info(cls, soup, link):
2835
        """Get information about a particular comics."""
2836
        title = soup.find('h2', class_='post-title').string
2837
        short_url = soup.find('link', rel='shortlink')['href']
2838
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2839
        num = int(short_url_re.match(short_url).groups()[0])
2840
        imgs = soup.find('div', id='comic').find_all('img')
2841
        alt = imgs[0]['title']
2842
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2843
        date_str = soup.find('span', class_='post-date').string
2844
        day = string_to_date(date_str, "%B %d, %Y")
2845
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2846
        author = soup.find('span', class_='post-author').string
2847
        return {
2848
            'short_url': short_url,
2849
            'num': num,
2850
            'img': [i['src'] for i in imgs],
2851
            'month': day.month,
2852
            'year': day.year,
2853
            'day': day.day,
2854
            'title': title,
2855
            'tags': tags,
2856
            'alt': alt,
2857
            'author': author,
2858
        }
2859
2860
2861
class AHamADay(GenericNavigableComic):
2862
    """Class to retrieve class A Ham A Day comics."""
2863
    name = 'ham'
2864
    long_name = 'A Ham A Day'
2865
    url = 'http://www.ahammaday.com'
2866
    get_url_from_link = join_cls_url_to_href
2867
2868
    @classmethod
2869
    def get_first_comic_link(cls):
2870
        """Get link to first comics."""
2871
        return {'href': 'http://www.ahammaday.com/today/3/6/french'}
2872
2873
    @classmethod
2874
    def get_navi_link(cls, last_soup, next_):
2875
        # prev is next / next is prev
2876
        return last_soup.find('li', class_='previous' if next_ else 'next').find('a')
2877
2878
    @classmethod
2879
    def get_comic_info(cls, soup, link):
2880
        """Get information about a particular comics."""
2881
        date_str = soup.find('time', class_='published')['datetime']
2882
        day = string_to_date(date_str, "%Y-%m-%d")
2883
        author = soup.find('span', class_='blog-author').find('a').string
2884
        title = soup.find('meta', property='og:title')['content']
2885
        imgs = soup.find_all('meta', itemprop='image')
2886
        return {
2887
            'img': [i['content'] for i in imgs],
2888
            'title': title,
2889
            'author': author,
2890
            'day': day.day,
2891
            'month': day.month,
2892
            'year': day.year,
2893
        }
2894
2895
2896
class LittleLifeLines(GenericNavigableComic):
2897
    """Class to retrieve Little Life Lines comics."""
2898
    name = 'life'
2899
    long_name = 'Little Life Lines'
2900
    url = 'http://www.littlelifelines.com'
2901
    get_url_from_link = join_cls_url_to_href
2902
2903
    @classmethod
2904
    def get_first_comic_link(cls):
2905
        """Get link to first comics."""
2906
        return {'href': 'http://www.littlelifelines.com/comics/well-done'}
2907
2908
    @classmethod
2909
    def get_navi_link(cls, last_soup, next_):
2910
        # prev is next / next is prev
2911
        li = last_soup.find('li', class_='prev' if next_ else 'next')
2912
        return li.find('a') if li else None
2913
2914
    @classmethod
2915
    def get_comic_info(cls, soup, link):
2916
        """Get information about a particular comics."""
2917
        title = soup.find('meta', property='og:title')['content']
2918
        desc = soup.find('meta', property='og:description')['content']
2919
        date_str = soup.find('time', class_='published')['datetime']
2920
        day = string_to_date(date_str, "%Y-%m-%d")
2921
        author = soup.find('a', rel='author').string
2922
        div_content = soup.find('div', class_="body entry-content")
2923
        imgs = div_content.find_all('img')
2924
        imgs = [i for i in imgs if i.get('src') is not None]
2925
        alt = imgs[0]['alt']
2926
        return {
2927
            'title': title,
2928
            'alt': alt,
2929
            'description': desc,
2930
            'author': author,
2931
            'day': day.day,
2932
            'month': day.month,
2933
            'year': day.year,
2934
            'img': [i['src'] for i in imgs],
2935
        }
2936
2937
2938
class GenericWordPressInkblot(GenericNavigableComic):
2939
    """Generic class to retrieve comics using WordPress with Inkblot."""
2940
    get_navi_link = get_link_rel_next
2941
2942
    @classmethod
2943
    def get_first_comic_link(cls):
2944
        """Get link to first comics."""
2945
        return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')
2946
2947
    @classmethod
2948
    def get_comic_info(cls, soup, link):
2949
        """Get information about a particular comics."""
2950
        title = soup.find('meta', property='og:title')['content']
2951
        imgs = soup.find('div', class_='webcomic-image').find_all('img')
2952
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2953
        day = string_to_date(date_str, "%Y-%m-%d")
2954
        return {
2955
            'title': title,
2956
            'day': day.day,
2957
            'month': day.month,
2958
            'year': day.year,
2959
            'img': [i['src'] for i in imgs],
2960
        }
2961
2962
2963
class EverythingsStupid(GenericWordPressInkblot):
2964
    """Class to retrieve Everything's stupid Comics."""
2965
    # Also on http://tapastic.com/series/EverythingsStupid
2966
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
2967
    # Also on http://everythingsstupidcomics.tumblr.com
2968
    name = 'stupid'
2969
    long_name = "Everything's Stupid"
2970
    url = 'http://everythingsstupid.net'
2971
2972
2973
class TheIsmComics(GenericWordPressInkblot):
2974
    """Class to retrieve The Ism Comics."""
2975
    # Also on https://tapastic.com/series/TheIsm (?)
2976
    name = 'theism'
2977
    long_name = "The Ism"
2978
    url = 'http://www.theism-comics.com'
2979
2980
2981
class WoodenPlankStudios(GenericWordPressInkblot):
2982
    """Class to retrieve Wooden Plank Studios comics."""
2983
    name = 'woodenplank'
2984
    long_name = 'Wooden Plank Studios'
2985
    url = 'http://woodenplankstudios.com'
2986
2987
2988
class ElectricBunnyComic(GenericNavigableComic):
2989
    """Class to retrieve Electric Bunny Comics."""
2990
    # Also on http://electricbunnycomics.tumblr.com
2991
    name = 'bunny'
2992
    long_name = 'Electric Bunny Comic'
2993
    url = 'http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell'
2994
    get_url_from_link = join_cls_url_to_href
2995
2996
    @classmethod
2997
    def get_first_comic_link(cls):
2998
        """Get link to first comics."""
2999
        return get_soup_at_url(cls.url).find('img', alt='First').parent
3000
3001
    @classmethod
3002
    def get_navi_link(cls, last_soup, next_):
3003
        img = last_soup.find('img', alt='Next' if next_ else 'Back')
3004
        return img.parent if img else None
3005
3006
    @classmethod
3007
    def get_comic_info(cls, soup, link):
3008
        """Get information about a particular comics."""
3009
        title = soup.find('meta', property='og:title')['content']
3010
        imgs = soup.find_all('meta', property='og:image')
3011
        return {
3012
            'title': title,
3013
            'img': [i['content'] for i in imgs],
3014
        }
3015
3016
3017
class SheldonComics(GenericNavigableComic):
3018
    """Class to retrieve Sheldon comics."""
3019
    # Also on http://www.gocomics.com/sheldon
3020
    name = 'sheldon'
3021
    long_name = 'Sheldon Comics'
3022
    url = 'http://www.sheldoncomics.com'
3023
3024
    @classmethod
3025
    def get_first_comic_link(cls):
3026
        """Get link to first comics."""
3027
        return get_soup_at_url(cls.url).find("a", id="nav-first")
3028
3029
    @classmethod
3030
    def get_navi_link(cls, last_soup, next_):
3031
        for link in last_soup.find_all("a", id="nav-next" if next_ else "nav-prev"):
3032
            if link['href'] != 'http://www.sheldoncomics.com':
3033
                return link
3034
        return None
3035
3036
    @classmethod
3037
    def get_comic_info(cls, soup, link):
3038
        """Get information about a particular comics."""
3039
        imgs = soup.find("div", id="comic-foot").find_all("img")
3040
        assert all(i['alt'] == i['title'] for i in imgs)
3041
        assert len(imgs) == 1
3042
        title = imgs[0]['title']
3043
        return {
3044
            'title': title,
3045
            'img': [i['src'] for i in imgs],
3046
        }
3047
3048
3049
class CubeDrone(GenericNavigableComic):
3050
    """Class to retrieve Cube Drone comics."""
3051
    name = 'cubedrone'
3052
    long_name = 'Cube Drone'
3053
    url = 'http://cube-drone.com/comics'
3054
    get_url_from_link = join_cls_url_to_href
3055
3056
    @classmethod
3057
    def get_first_comic_link(cls):
3058
        """Get link to first comics."""
3059
        return get_soup_at_url(cls.url).find('span', class_='glyphicon glyphicon-backward').parent
3060
3061
    @classmethod
3062
    def get_navi_link(cls, last_soup, next_):
3063
        class_ = 'glyphicon glyphicon-chevron-' + ('right' if next_ else 'left')
3064
        return last_soup.find('span', class_=class_).parent
3065
3066
    @classmethod
3067
    def get_comic_info(cls, soup, link):
3068
        """Get information about a particular comics."""
3069
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
3070
        url2 = soup.find('meta', attrs={'name': 'twitter:url'})['content']
3071
        # date_str = soup.find('h2', class_='comic_title').find('small').string
3072
        # day = string_to_date(date_str, "%B %d, %Y, %I:%M %p")
3073
        imgs = soup.find_all('img', class_='comic img-responsive')
3074
        title2 = imgs[0]['title']
3075
        alt = imgs[0]['alt']
3076
        return {
3077
            'url2': url2,
3078
            'title': title,
3079
            'title2': title2,
3080
            'alt': alt,
3081
            'img': [i['src'] for i in imgs],
3082
        }
3083
3084
3085
class MakeItStoopid(GenericNavigableComic):
3086
    """Class to retrieve Make It Stoopid Comics."""
3087
    name = 'stoopid'
3088
    long_name = 'Make it stoopid'
3089
    url = 'http://makeitstoopid.com/comic.php'
3090
3091
    @classmethod
3092
    def get_nav(cls, soup):
3093
        cnav = soup.find_all(class_='cnav')
3094
        nav1, nav2 = cnav[:5], cnav[5:]
3095
        assert nav1 == nav2
3096
        # begin, prev, archive, next_, end = nav1
3097
        return [None if i.get('href') is None else i for i in nav1]
3098
3099
    @classmethod
3100
    def get_first_comic_link(cls):
3101
        """Get link to first comics."""
3102
        return cls.get_nav(get_soup_at_url(cls.url))[0]
3103
3104
    @classmethod
3105
    def get_navi_link(cls, last_soup, next_):
3106
        return cls.get_nav(last_soup)[3 if next_ else 1]
3107
3108
    @classmethod
3109
    def get_comic_info(cls, soup, link):
3110
        """Get information about a particular comics."""
3111
        title = link['title']
3112
        imgs = soup.find_all('img', id='comicimg')
3113
        return {
3114
            'title': title,
3115
            'img': [i['src'] for i in imgs],
3116
        }
3117
3118
3119
class GeekAndPoke(GenericNavigableComic):
3120
    """Class to retrieve Geek And Poke comics."""
3121
    name = 'geek'
3122
    long_name = 'Geek And Poke'
3123
    url = 'http://geek-and-poke.com'
3124
    get_url_from_link = join_cls_url_to_href
3125
3126
    @classmethod
3127
    def get_first_comic_link(cls):
3128
        """Get link to first comics."""
3129
        return {'href': 'http://geek-and-poke.com/geekandpoke/2006/8/27/a-new-place-for-a-not-so-old-blog.html'}
3130
3131
    @classmethod
3132
    def get_navi_link(cls, last_soup, next_):
3133
        return last_soup.find('a', class_='prev-item' if next_ else 'next-item')
3134
3135
    @classmethod
3136
    def get_comic_info(cls, soup, link):
3137
        """Get information about a particular comics."""
3138
        title = soup.find('meta', property='og:title')['content']
3139
        desc = soup.find('meta', property='og:description')['content']
3140
        date_str = soup.find('time', class_='published')['datetime']
3141
        day = string_to_date(date_str, "%Y-%m-%d")
3142
        author = soup.find('a', rel='author').string
3143
        div_content = (soup.find('div', class_="body entry-content") or
3144
                       soup.find('div', class_="special-content"))
3145
        imgs = div_content.find_all('img')
3146
        imgs = [i for i in imgs if i.get('src') is not None]
3147
        assert all('title' not in i or i['alt'] == i['title'] for i in imgs)
3148
        alt = imgs[0].get('alt', "") if imgs else []
3149
        return {
3150
            'title': title,
3151
            'alt': alt,
3152
            'description': desc,
3153
            'author': author,
3154
            'day': day.day,
3155
            'month': day.month,
3156
            'year': day.year,
3157
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3158
        }
3159
3160
3161
class GenericTumblrV1(GenericComic):
3162
    """Generic class to retrieve comics from Tumblr using the V1 API."""
3163
3164
    @classmethod
3165
    def get_next_comic(cls, last_comic):
3166
        for p in cls.get_posts(last_comic):
3167
            comic = cls.get_comic_info(p)
3168
            if comic is not None:
3169
                yield comic
3170
3171
    @classmethod
3172
    def get_url_from_post(cls, post):
3173
        return post['url']
3174
3175
    @classmethod
3176
    def get_api_url(cls):
3177
        return urljoin_wrapper(cls.url, '/api/read/')
3178
3179
    @classmethod
3180
    def get_comic_info(cls, post):
3181
        """Get information about a particular comics."""
3182
        # print(post)
3183
        type_ = post['type']
3184
        if type_ != 'photo':
3185
            # print("Type is %s" % type_)
3186
            return None
3187
        tumblr_id = int(post['id'])
3188
        api_url = cls.get_api_url() + '?id=%d' % (tumblr_id)
3189
        day = datetime.datetime.fromtimestamp(int(post['unix-timestamp'])).date()
3190
        caption = post.find('photo-caption')
3191
        title = caption.string if caption else ""
3192
        tags = ' '.join(t.string for t in post.find_all('tag'))
3193
        # Photos may appear in 'photo' tags and/or straight in the post
3194
        photo_tags = post.find_all('photo')
3195
        if not photo_tags:
3196
            photo_tags = [post]
3197
        # Images are in multiple resolutions - taking the first one
3198
        imgs = [photo.find('photo-url') for photo in photo_tags]
3199
        return {
3200
            'url': cls.get_url_from_post(post),
3201
            'url2': post['url-with-slug'],
3202
            'day': day.day,
3203
            'month': day.month,
3204
            'year': day.year,
3205
            'title': title,
3206
            'tags': tags,
3207
            'img': [i.string for i in imgs],
3208
            'tumblr-id': tumblr_id,
3209
            'api_url': api_url,  # for debug purposes
3210
        }
3211
3212
    @classmethod
3213
    def get_posts(cls, last_comic, nb_post_per_call=10):
3214
        """Get posts using API. nb_post_per_call is max 50.
3215
3216
        Posts are retrieved from newer to older as per the tumblr v1 api
3217
        but are returned in chronological order."""
3218
        waiting_for_url = last_comic['url'] if last_comic else None
3219
        posts_acc = []
3220
        if last_comic is not None:
3221
            # Sometimes, tumblr posts are deleted. We previous post is deleted, we
3222
            # might end up spending a lot of time looking for something that
3223
            # doesn't exist. Failing early and clearly might be a better option.
3224
            last_api_url = last_comic['api_url']
3225
            try:
3226
                get_soup_at_url(last_api_url)
3227
            except urllib.error.HTTPError:
3228
                try:
3229
                    get_soup_at_url(cls.url)
3230
                except urllib.error.HTTPError:
3231
                    print("Did not find previous post nor main url %s" % cls.url)
3232
                else:
3233
                    print("Did not find previous post %s : it might have been deleted" % last_api_url)
3234
                return reversed(posts_acc)
3235
        api_url = cls.get_api_url()
3236
        posts = get_soup_at_url(api_url).find('posts')
3237
        start, total = int(posts['start']), int(posts['total'])
3238
        assert start == 0
3239
        for starting_num in range(0, total, nb_post_per_call):
3240
            api_url2 = api_url + '?start=%d&num=%d' % (starting_num, nb_post_per_call)
3241
            # print(api_url2)
3242
            posts2 = get_soup_at_url(api_url2).find('posts')
3243
            start2, total2 = int(posts2['start']), int(posts2['total'])
3244
            assert starting_num == start2, "%d != %d" % (starting_num, start2)
3245
            # This may happen and should be handled in the future
3246
            assert total == total2, "%d != %d" % (total, total2)
3247
            for p in posts2.find_all('post'):
3248
                if waiting_for_url and waiting_for_url == cls.get_url_from_post(p):
3249
                    return reversed(posts_acc)
3250
                posts_acc.append(p)
3251
        if waiting_for_url is None:
3252
            return reversed(posts_acc)
3253
        print("Did not find %s : there might be a problem" % waiting_for_url)
3254
        return []
3255
3256
3257
class IrwinCardozo(GenericTumblrV1):
3258
    """Class to retrieve Irwin Cardozo Comics."""
3259
    name = 'irwinc'
3260
    long_name = 'Irwin Cardozo'
3261
    url = 'http://irwincardozocomics.tumblr.com'
3262
3263
3264
class AccordingToDevin(GenericTumblrV1):
3265
    """Class to retrieve According To Devin comics."""
3266
    name = 'devin'
3267
    long_name = 'According To Devin'
3268
    url = 'http://accordingtodevin.tumblr.com'
3269
3270
3271
class ItsTheTieTumblr(GenericTumblrV1):
3272
    """Class to retrieve It's the tie comics."""
3273
    # Also on http://itsthetie.com
3274
    # Also on https://tapastic.com/series/itsthetie
3275
    name = 'tie-tumblr'
3276
    long_name = "It's the tie (from Tumblr)"
3277
    url = "http://itsthetie.tumblr.com"
3278
3279
3280
class OctopunsTumblr(GenericTumblrV1):
3281
    """Class to retrieve Octopuns comics."""
3282
    # Also on http://www.octopuns.net
3283
    name = 'octopuns-tumblr'
3284
    long_name = 'Octopuns (from Tumblr)'
3285
    url = 'http://octopuns.tumblr.com'
3286
3287
3288
class PicturesInBoxesTumblr(GenericTumblrV1):
3289
    """Class to retrieve Pictures In Boxes comics."""
3290
    # Also on http://www.picturesinboxes.com
3291
    name = 'picturesinboxes-tumblr'
3292
    long_name = 'Pictures in Boxes (from Tumblr)'
3293
    url = 'http://picturesinboxescomic.tumblr.com'
3294
3295
3296
class TubeyToonsTumblr(GenericTumblrV1):
3297
    """Class to retrieve TubeyToons comics."""
3298
    # Also on http://tapastic.com/series/Tubey-Toons
3299
    # Also on http://tubeytoons.com
3300
    name = 'tubeytoons-tumblr'
3301
    long_name = 'Tubey Toons (from Tumblr)'
3302
    url = 'http://tubeytoons.tumblr.com'
3303
3304
3305
class UnearthedComicsTumblr(GenericTumblrV1):
3306
    """Class to retrieve Unearthed comics."""
3307
    # Also on http://tapastic.com/series/UnearthedComics
3308
    # Also on http://unearthedcomics.com
3309
    name = 'unearthed-tumblr'
3310
    long_name = 'Unearthed Comics (from Tumblr)'
3311
    url = 'http://unearthedcomics.tumblr.com'
3312
3313
3314
class PieComic(GenericTumblrV1):
3315
    """Class to retrieve Pie Comic comics."""
3316
    name = 'pie'
3317
    long_name = 'Pie Comic'
3318
    url = "http://piecomic.tumblr.com"
3319
3320
3321
class MrEthanDiamond(GenericTumblrV1):
3322
    """Class to retrieve Mr Ethan Diamond comics."""
3323
    name = 'diamond'
3324
    long_name = 'Mr Ethan Diamond'
3325
    url = 'http://mrethandiamond.tumblr.com'
3326
3327
3328
class Flocci(GenericTumblrV1):
3329
    """Class to retrieve floccinaucinihilipilification comics."""
3330
    name = 'flocci'
3331
    long_name = 'floccinaucinihilipilification'
3332
    url = "http://floccinaucinihilipilificationa.tumblr.com"
3333
3334
3335
class UpAndOut(GenericTumblrV1):
3336
    """Class to retrieve Up & Out comics."""
3337
    # Also on http://tapastic.com/series/UP-and-OUT
3338
    name = 'upandout'
3339
    long_name = 'Up And Out (from Tumblr)'
3340
    url = 'http://upandoutcomic.tumblr.com'
3341
3342
3343
class Pundemonium(GenericTumblrV1):
3344
    """Class to retrieve Pundemonium comics."""
3345
    name = 'pundemonium'
3346
    long_name = 'Pundemonium'
3347
    url = 'http://monstika.tumblr.com'
3348
3349
3350
class PoorlyDrawnLinesTumblr(GenericTumblrV1):
3351
    """Class to retrieve Poorly Drawn Lines comics."""
3352
    # Also on http://poorlydrawnlines.com
3353
    name = 'poorlydrawn-tumblr'
3354
    long_name = 'Poorly Drawn Lines (from Tumblr)'
3355
    url = 'http://pdlcomics.tumblr.com'
3356
3357
3358
class PearShapedComics(GenericTumblrV1):
3359
    """Class to retrieve Pear Shaped Comics."""
3360
    name = 'pearshaped'
3361
    long_name = 'Pear-Shaped Comics'
3362
    url = 'http://pearshapedcomics.com'
3363
3364
3365
class PondScumComics(GenericTumblrV1):
3366
    """Class to retrieve Pond Scum Comics."""
3367
    name = 'pond'
3368
    long_name = 'Pond Scum'
3369
    url = 'http://pondscumcomic.tumblr.com'
3370
3371
3372
class MercworksTumblr(GenericTumblrV1):
3373
    """Class to retrieve Mercworks comics."""
3374
    # Also on http://mercworks.net
3375
    name = 'mercworks-tumblr'
3376
    long_name = 'Mercworks (from Tumblr)'
3377
    url = 'http://mercworks.tumblr.com'
3378
3379
3380
class OwlTurdTumblr(GenericTumblrV1):
3381
    """Class to retrieve Owl Turd comics."""
3382
    # Also on http://tapastic.com/series/Owl-Turd-Comix
3383
    name = 'owlturd-tumblr'
3384
    long_name = 'Owl Turd (from Tumblr)'
3385
    url = 'http://owlturd.com'
3386
3387
3388
class VectorBelly(GenericTumblrV1):
3389
    """Class to retrieve Vector Belly comics."""
3390
    # Also on http://vectorbelly.com
3391
    name = 'vector'
3392
    long_name = 'Vector Belly'
3393
    url = 'http://vectorbelly.tumblr.com'
3394
3395
3396
class GoneIntoRapture(GenericTumblrV1):
3397
    """Class to retrieve Gone Into Rapture comics."""
3398
    # Also on http://goneintorapture.tumblr.com
3399
    # Also on http://tapastic.com/series/Goneintorapture
3400
    name = 'rapture'
3401
    long_name = 'Gone Into Rapture'
3402
    url = 'http://www.goneintorapture.com'
3403
3404
3405
class TheOatmealTumblr(GenericTumblrV1):
3406
    """Class to retrieve The Oatmeal comics."""
3407
    # Also on http://theoatmeal.com
3408
    name = 'oatmeal-tumblr'
3409
    long_name = 'The Oatmeal (from Tumblr)'
3410
    url = 'http://oatmeal.tumblr.com'
3411
3412
3413
class HeckIfIKnowComicsTumblr(GenericTumblrV1):
3414
    """Class to retrieve Heck If I Know Comics."""
3415
    # Also on http://tapastic.com/series/Regular
3416
    name = 'heck-tumblr'
3417
    long_name = 'Heck if I Know comics (from Tumblr)'
3418
    url = 'http://heckifiknowcomics.com'
3419
3420
3421
class MyJetPack(GenericTumblrV1):
3422
    """Class to retrieve My Jet Pack comics."""
3423
    name = 'jetpack'
3424
    long_name = 'My Jet Pack'
3425
    url = 'http://myjetpack.tumblr.com'
3426
3427
3428
class CheerUpEmoKidTumblr(GenericTumblrV1):
3429
    """Class to retrieve CheerUpEmoKid comics."""
3430
    # Also on http://www.cheerupemokid.com
3431
    # Also on http://tapastic.com/series/CUEK
3432
    name = 'cuek-tumblr'
3433
    long_name = 'Cheer Up Emo Kid (from Tumblr)'
3434
    url = 'http://enzocomics.tumblr.com'
3435
3436
3437
class ForLackOfABetterComic(GenericTumblrV1):
3438
    """Class to retrieve For Lack Of A Better Comics."""
3439
    # Also on http://forlackofabettercomic.com
3440
    name = 'lack'
3441
    long_name = 'For Lack Of A Better Comic'
3442
    url = 'http://forlackofabettercomic.tumblr.com'
3443
3444
3445
class ZenPencilsTumblr(GenericTumblrV1):
3446
    """Class to retrieve ZenPencils comics."""
3447
    # Also on http://zenpencils.com
3448
    # Also on http://www.gocomics.com/zen-pencils
3449
    name = 'zenpencils-tumblr'
3450
    long_name = 'Zen Pencils (from Tumblr)'
3451
    url = 'http://zenpencils.tumblr.com'
3452
3453
3454
class ThreeWordPhraseTumblr(GenericTumblrV1):
3455
    """Class to retrieve Three Word Phrase comics."""
3456
    # Also on http://threewordphrase.com
3457
    name = 'threeword-tumblr'
3458
    long_name = 'Three Word Phrase (from Tumblr)'
3459
    url = 'http://www.threewordphrase.tumblr.com'
3460
3461
3462
class TimeTrabbleTumblr(GenericTumblrV1):
3463
    """Class to retrieve Time Trabble comics."""
3464
    # Also on http://timetrabble.com
3465
    name = 'timetrabble-tumblr'
3466
    long_name = 'Time Trabble (from Tumblr)'
3467
    url = 'http://timetrabble.tumblr.com'
3468
3469
3470
class SafelyEndangeredTumblr(GenericTumblrV1):
3471
    """Class to retrieve Safely Endangered comics."""
3472
    # Also on http://www.safelyendangered.com
3473
    name = 'endangered-tumblr'
3474
    long_name = 'Safely Endangered (from Tumblr)'
3475
    url = 'http://tumblr.safelyendangered.com'
3476
3477
3478
class MouseBearComedyTumblr(GenericTumblrV1):
3479
    """Class to retrieve Mouse Bear Comedy comics."""
3480
    # Also on http://www.mousebearcomedy.com
3481
    name = 'mousebear-tumblr'
3482
    long_name = 'Mouse Bear Comedy (from Tumblr)'
3483
    url = 'http://mousebearcomedy.tumblr.com'
3484
3485
3486
class BouletCorpTumblr(GenericTumblrV1):
3487
    """Class to retrieve BouletCorp comics."""
3488
    # Also on http://www.bouletcorp.com
3489
    name = 'boulet-tumblr'
3490
    long_name = 'Boulet Corp (from Tumblr)'
3491
    url = 'http://bouletcorp.tumblr.com'
3492
3493
3494
class TheAwkwardYetiTumblr(GenericTumblrV1):
3495
    """Class to retrieve The Awkward Yeti comics."""
3496
    # Also on http://www.gocomics.com/the-awkward-yeti
3497
    # Also on http://theawkwardyeti.com
3498
    # Also on https://tapastic.com/series/TheAwkwardYeti
3499
    name = 'yeti-tumblr'
3500
    long_name = 'The Awkward Yeti (from Tumblr)'
3501
    url = 'http://larstheyeti.tumblr.com'
3502
3503
3504
class NellucNhoj(GenericTumblrV1):
3505
    """Class to retrieve NellucNhoj comics."""
3506
    name = 'nhoj'
3507
    long_name = 'Nelluc Nhoj'
3508
    url = 'http://nellucnhoj.com'
3509
3510
3511
class DownTheUpwardSpiralTumblr(GenericTumblrV1):
3512
    """Class to retrieve Down The Upward Spiral comics."""
3513
    # Also on http://www.downtheupwardspiral.com
3514
    name = 'spiral-tumblr'
3515
    long_name = 'Down the Upward Spiral (from Tumblr)'
3516
    url = 'http://downtheupwardspiral.tumblr.com'
3517
3518
3519
class AsPerUsualTumblr(GenericTumblrV1):
3520
    """Class to retrieve As Per Usual comics."""
3521
    # Also on https://tapastic.com/series/AsPerUsual
3522
    name = 'usual-tumblr'
3523
    long_name = 'As Per Usual (from Tumblr)'
3524
    url = 'http://as-per-usual.tumblr.com'
3525
3526
3527
class OneOneOneOneComicTumblr(GenericTumblrV1):
3528
    """Class to retrieve 1111 Comics."""
3529
    # Also on http://www.1111comics.me
3530
    # Also on https://tapastic.com/series/1111-Comics
3531
    name = '1111-tumblr'
3532
    long_name = '1111 Comics (from Tumblr)'
3533
    url = 'http://comics1111.tumblr.com'
3534
3535
3536
class JhallComicsTumblr(GenericTumblrV1):
3537
    """Class to retrieve Jhall Comics."""
3538
    # Also on http://jhallcomics.com
3539
    name = 'jhall-tumblr'
3540
    long_name = 'Jhall Comics (from Tumblr)'
3541
    url = 'http://jhallcomics.tumblr.com'
3542
3543
3544
class BerkeleyMewsTumblr(GenericTumblrV1):
3545
    """Class to retrieve Berkeley Mews comics."""
3546
    # Also on http://www.gocomics.com/berkeley-mews
3547
    # Also on http://www.berkeleymews.com
3548
    name = 'berkeley-tumblr'
3549
    long_name = 'Berkeley Mews (from Tumblr)'
3550
    url = 'http://mews.tumblr.com'
3551
3552
3553
class JoanCornellaTumblr(GenericTumblrV1):
3554
    """Class to retrieve Joan Cornella comics."""
3555
    # Also on http://joancornella.net
3556
    name = 'cornella-tumblr'
3557
    long_name = 'Joan Cornella (from Tumblr)'
3558
    url = 'http://cornellajoan.tumblr.com'
3559
3560
3561
class RespawnComicTumblr(GenericTumblrV1):
3562
    """Class to retrieve Respawn Comic."""
3563
    # Also on http://respawncomic.com
3564
    name = 'respawn-tumblr'
3565
    long_name = 'Respawn Comic (from Tumblr)'
3566
    url = 'http://respawncomic.tumblr.com'
3567
3568
3569
class ChrisHallbeckTumblr(GenericTumblrV1):
3570
    """Class to retrieve Chris Hallbeck comics."""
3571
    # Also on https://tapastic.com/ChrisHallbeck
3572
    # Also on http://maximumble.com
3573
    # Also on http://minimumble.com
3574
    # Also on http://thebookofbiff.com
3575
    name = 'hallbeck-tumblr'
3576
    long_name = 'Chris Hallback (from Tumblr)'
3577
    url = 'http://chrishallbeck.tumblr.com'
3578
3579
3580
class ComicNuggets(GenericTumblrV1):
3581
    """Class to retrieve Comic Nuggets."""
3582
    name = 'nuggets'
3583
    long_name = 'Comic Nuggets'
3584
    url = 'http://comicnuggets.com'
3585
3586
3587
class PigeonGazetteTumblr(GenericTumblrV1):
3588
    """Class to retrieve The Pigeon Gazette comics."""
3589
    # Also on https://tapastic.com/series/The-Pigeon-Gazette
3590
    name = 'pigeon-tumblr'
3591
    long_name = 'The Pigeon Gazette (from Tumblr)'
3592
    url = 'http://thepigeongazette.tumblr.com'
3593
3594
3595
class CancerOwl(GenericTumblrV1):
3596
    """Class to retrieve Cancer Owl comics."""
3597
    # Also on http://cancerowl.com
3598
    name = 'cancerowl-tumblr'
3599
    long_name = 'Cancer Owl (from Tumblr)'
3600
    url = 'http://cancerowl.tumblr.com'
3601
3602
3603
class FowlLanguageTumblr(GenericTumblrV1):
3604
    """Class to retrieve Fowl Language comics."""
3605
    # Also on http://www.fowllanguagecomics.com
3606
    # Also on http://tapastic.com/series/Fowl-Language-Comics
3607
    # Also on http://www.gocomics.com/fowl-language
3608
    name = 'fowllanguage-tumblr'
3609
    long_name = 'Fowl Language Comics (from Tumblr)'
3610
    url = 'http://fowllanguagecomics.tumblr.com'
3611
3612
3613
class TheOdd1sOutTumblr(GenericTumblrV1):
3614
    """Class to retrieve The Odd 1s Out comics."""
3615
    # Also on http://theodd1sout.com
3616
    # Also on https://tapastic.com/series/Theodd1sout
3617
    name = 'theodd-tumblr'
3618
    long_name = 'The Odd 1s Out (from Tumblr)'
3619
    url = 'http://theodd1sout.tumblr.com'
3620
3621
3622
class TheUnderfoldTumblr(GenericTumblrV1):
3623
    """Class to retrieve The Underfold comics."""
3624
    # Also on http://theunderfold.com
3625
    name = 'underfold-tumblr'
3626
    long_name = 'The Underfold (from Tumblr)'
3627
    url = 'http://theunderfold.tumblr.com'
3628
3629
3630
class LolNeinTumblr(GenericTumblrV1):
3631
    """Class to retrieve Lol Nein comics."""
3632
    # Also on http://lolnein.com
3633
    name = 'lolnein-tumblr'
3634
    long_name = 'Lol Nein (from Tumblr)'
3635
    url = 'http://lolneincom.tumblr.com'
3636
3637
3638
class FatAwesomeComicsTumblr(GenericTumblrV1):
3639
    """Class to retrieve Fat Awesome Comics."""
3640
    # Also on http://fatawesome.com/comics
3641
    name = 'fatawesome-tumblr'
3642
    long_name = 'Fat Awesome (from Tumblr)'
3643
    url = 'http://fatawesomecomedy.tumblr.com'
3644
3645
3646
class TheWorldIsFlatTumblr(GenericTumblrV1):
3647
    """Class to retrieve The World Is Flat Comics."""
3648
    # Also on https://tapastic.com/series/The-World-is-Flat
3649
    name = 'flatworld-tumblr'
3650
    long_name = 'The World Is Flat (from Tumblr)'
3651
    url = 'http://theworldisflatcomics.tumblr.com'
3652
3653
3654
class DorrisMc(GenericTumblrV1):
3655
    """Class to retrieve Dorris Mc Comics"""
3656
    # Also on http://www.gocomics.com/dorris-mccomics
3657
    name = 'dorrismc'
3658
    long_name = 'Dorris Mc'
3659
    url = 'http://dorrismccomics.com'
3660
3661
3662
class LeleozTumblr(GenericEmptyComic, GenericTumblrV1):
3663
    """Class to retrieve Leleoz comics."""
3664
    # Also on https://tapastic.com/series/Leleoz
3665
    name = 'leleoz-tumblr'
3666
    long_name = 'Leleoz (from Tumblr)'
3667
    url = 'http://leleozcomics.tumblr.com'
3668
3669
3670
class MoonBeardTumblr(GenericTumblrV1):
3671
    """Class to retrieve MoonBeard comics."""
3672
    # Also on http://moonbeard.com
3673
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
3674
    name = 'moonbeard-tumblr'
3675
    long_name = 'Moon Beard (from Tumblr)'
3676
    url = 'http://blog.squiresjam.es/moonbeard'
3677
3678
3679
class AComik(GenericTumblrV1):
3680
    """Class to retrieve A Comik"""
3681
    name = 'comik'
3682
    long_name = 'A Comik'
3683
    url = 'http://acomik.com'
3684
3685
3686
class ClassicRandy(GenericTumblrV1):
3687
    """Class to retrieve Classic Randy comics."""
3688
    name = 'randy'
3689
    long_name = 'Classic Randy'
3690
    url = 'http://classicrandy.tumblr.com'
3691
3692
3693
class DagssonTumblr(GenericTumblrV1):
3694
    """Class to retrieve Dagsson comics."""
3695
    # Also on http://www.dagsson.com
3696
    name = 'dagsson-tumblr'
3697
    long_name = 'Dagsson Hugleikur (from Tumblr)'
3698
    url = 'http://hugleikurdagsson.tumblr.com'
3699
3700
3701
class LinsEditionsTumblr(GenericTumblrV1):
3702
    """Class to retrieve L.I.N.S. Editions comics."""
3703
    # Also on https://linsedition.com
3704
    name = 'lins-tumblr'
3705
    long_name = 'L.I.N.S. Editions (from Tumblr)'
3706
    url = 'http://linscomics.tumblr.com'
3707
3708
3709
class OrigamiHotDish(GenericTumblrV1):
3710
    """Class to retrieve Origami Hot Dish comics."""
3711
    name = 'origamihotdish'
3712
    long_name = 'Origami Hot Dish'
3713
    url = 'http://origamihotdish.com'
3714
3715
3716
class HitAndMissComicsTumblr(GenericTumblrV1):
3717
    """Class to retrieve Hit and Miss Comics."""
3718
    name = 'hitandmiss'
3719
    long_name = 'Hit and Miss Comics'
3720
    url = 'http://hitandmisscomics.tumblr.com'
3721
3722
3723
class HMBlanc(GenericTumblrV1):
3724
    """Class to retrieve HM Blanc comics."""
3725
    name = 'hmblanc'
3726
    long_name = 'HM Blanc'
3727
    url = 'http://hmblanc.tumblr.com'
3728
3729
3730
class TalesOfAbsurdityTumblr(GenericTumblrV1):
3731
    """Class to retrieve Tales Of Absurdity comics."""
3732
    # Also on http://talesofabsurdity.com
3733
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
3734
    name = 'absurdity-tumblr'
3735
    long_name = 'Tales of Absurdity (from Tumblr)'
3736
    url = 'http://talesofabsurdity.tumblr.com'
3737
3738
3739
class RobbieAndBobby(GenericTumblrV1):
3740
    """Class to retrieve Robbie And Bobby comics."""
3741
    # Also on http://robbieandbobby.com
3742
    name = 'robbie-tumblr'
3743
    long_name = 'Robbie And Bobby (from Tumblr)'
3744
    url = 'http://robbieandbobby.tumblr.com'
3745
3746
3747
class ElectricBunnyComicTumblr(GenericTumblrV1):
3748
    """Class to retrieve Electric Bunny Comics."""
3749
    # Also on http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell
3750
    name = 'bunny-tumblr'
3751
    long_name = 'Electric Bunny Comic (from Tumblr)'
3752
    url = 'http://electricbunnycomics.tumblr.com'
3753
3754
3755
class Hoomph(GenericTumblrV1):
3756
    """Class to retrieve Hoomph comics."""
3757
    name = 'hoomph'
3758
    long_name = 'Hoomph'
3759
    url = 'http://hoom.ph'
3760
3761
3762
class BFGFSTumblr(GenericTumblrV1):
3763
    """Class to retrieve BFGFS comics."""
3764
    # Also on https://tapastic.com/series/BFGFS
3765
    # Also on http://bfgfs.com
3766
    name = 'bfgfs-tumblr'
3767
    long_name = 'BFGFS (from Tumblr)'
3768
    url = 'http://bfgfs.tumblr.com'
3769
3770
3771
class DoodleForFood(GenericTumblrV1):
3772
    """Class to retrieve Doodle For Food comics."""
3773
    # Also on  http://doodleforfood.com
3774
    name = 'doodle'
3775
    long_name = 'Doodle For Food'
3776
    url = 'http://doodleforfood.com'
3777
3778
3779
class HorovitzComics(GenericListableComic):
3780
    """Generic class to handle the logic common to the different comics from Horovitz."""
3781
    url = 'http://www.horovitzcomics.com'
3782
    img_re = re.compile('.*comics/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
3783
    link_re = NotImplemented
3784
    get_url_from_archive_element = join_cls_url_to_href
3785
3786
    @classmethod
3787
    def get_comic_info(cls, soup, link):
3788
        """Get information about a particular comics."""
3789
        href = link['href']
3790
        num = int(cls.link_re.match(href).groups()[0])
3791
        title = link.string
3792
        imgs = soup.find_all('img', id='comic')
3793
        assert len(imgs) == 1
3794
        year, month, day = [int(s)
3795
                            for s in cls.img_re.match(imgs[0]['src']).groups()]
3796
        return {
3797
            'title': title,
3798
            'day': day,
3799
            'month': month,
3800
            'year': year,
3801
            'img': [i['src'] for i in imgs],
3802
            'num': num,
3803
        }
3804
3805
    @classmethod
3806
    def get_archive_elements(cls):
3807
        archive = 'http://www.horovitzcomics.com/comics/archive/'
3808
        return reversed(get_soup_at_url(archive).find_all('a', href=cls.link_re))
3809
3810
3811
class HorovitzNew(HorovitzComics):
3812
    """Class to retrieve Horovitz new comics."""
3813
    name = 'horovitznew'
3814
    long_name = 'Horovitz New'
3815
    link_re = re.compile('^/comics/new/([0-9]+)$')
3816
3817
3818
class HorovitzClassic(HorovitzComics):
3819
    """Class to retrieve Horovitz classic comics."""
3820
    name = 'horovitzclassic'
3821
    long_name = 'Horovitz Classic'
3822
    link_re = re.compile('^/comics/classic/([0-9]+)$')
3823
3824
3825
class GenericGoComic(GenericNavigableComic):
3826
    """Generic class to handle the logic common to comics from gocomics.com."""
3827
    url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)$')
3828
3829
    @classmethod
3830
    def get_first_comic_link(cls):
3831
        """Get link to first comics."""
3832
        return get_soup_at_url(cls.url).find('a', class_='beginning')
3833
3834
    @classmethod
3835
    def get_navi_link(cls, last_soup, next_):
3836
        return last_soup.find('a', class_='next' if next_ else 'prev', href=cls.url_date_re)
3837
3838
    @classmethod
3839
    def get_url_from_link(cls, link):
3840
        gocomics = 'http://www.gocomics.com'
3841
        return urljoin_wrapper(gocomics, link['href'])
3842
3843
    @classmethod
3844
    def get_comic_info(cls, soup, link):
3845
        """Get information about a particular comics."""
3846
        url = cls.get_url_from_link(link)
3847
        year, month, day = [int(s)
3848
                            for s in cls.url_date_re.match(url).groups()]
3849
        return {
3850
            'day': day,
3851
            'month': month,
3852
            'year': year,
3853
            'img': [soup.find_all('img', class_='strip')[-1]['src']],
3854
            'author': soup.find('meta', attrs={'name': 'author'})['content']
3855
        }
3856
3857
3858
class PearlsBeforeSwine(GenericGoComic):
3859
    """Class to retrieve Pearls Before Swine comics."""
3860
    name = 'pearls'
3861
    long_name = 'Pearls Before Swine'
3862
    url = 'http://www.gocomics.com/pearlsbeforeswine'
3863
3864
3865
class Peanuts(GenericGoComic):
3866
    """Class to retrieve Peanuts comics."""
3867
    name = 'peanuts'
3868
    long_name = 'Peanuts'
3869
    url = 'http://www.gocomics.com/peanuts'
3870
3871
3872
class MattWuerker(GenericGoComic):
3873
    """Class to retrieve Matt Wuerker comics."""
3874
    name = 'wuerker'
3875
    long_name = 'Matt Wuerker'
3876
    url = 'http://www.gocomics.com/mattwuerker'
3877
3878
3879
class TomToles(GenericGoComic):
3880
    """Class to retrieve Tom Toles comics."""
3881
    name = 'toles'
3882
    long_name = 'Tom Toles'
3883
    url = 'http://www.gocomics.com/tomtoles'
3884
3885
3886
class BreakOfDay(GenericGoComic):
3887
    """Class to retrieve Break Of Day comics."""
3888
    name = 'breakofday'
3889
    long_name = 'Break Of Day'
3890
    url = 'http://www.gocomics.com/break-of-day'
3891
3892
3893
class Brevity(GenericGoComic):
3894
    """Class to retrieve Brevity comics."""
3895
    name = 'brevity'
3896
    long_name = 'Brevity'
3897
    url = 'http://www.gocomics.com/brevity'
3898
3899
3900
class MichaelRamirez(GenericGoComic):
3901
    """Class to retrieve Michael Ramirez comics."""
3902
    name = 'ramirez'
3903
    long_name = 'Michael Ramirez'
3904
    url = 'http://www.gocomics.com/michaelramirez'
3905
3906
3907
class MikeLuckovich(GenericGoComic):
3908
    """Class to retrieve Mike Luckovich comics."""
3909
    name = 'luckovich'
3910
    long_name = 'Mike Luckovich'
3911
    url = 'http://www.gocomics.com/mikeluckovich'
3912
3913
3914
class JimBenton(GenericGoComic):
3915
    """Class to retrieve Jim Benton comics."""
3916
    # Also on http://jimbenton.tumblr.com
3917
    name = 'benton'
3918
    long_name = 'Jim Benton'
3919
    url = 'http://www.gocomics.com/jim-benton-cartoons'
3920
3921
3922
class TheArgyleSweater(GenericGoComic):
3923
    """Class to retrieve the Argyle Sweater comics."""
3924
    name = 'argyle'
3925
    long_name = 'Argyle Sweater'
3926
    url = 'http://www.gocomics.com/theargylesweater'
3927
3928
3929
class SunnyStreet(GenericGoComic):
3930
    """Class to retrieve Sunny Street comics."""
3931
    # Also on http://www.sunnystreetcomics.com
3932
    name = 'sunny'
3933
    long_name = 'Sunny Street'
3934
    url = 'http://www.gocomics.com/sunny-street'
3935
3936
3937
class OffTheMark(GenericGoComic):
3938
    """Class to retrieve Off The Mark comics."""
3939
    # Also on https://www.offthemark.com
3940
    name = 'offthemark'
3941
    long_name = 'Off The Mark'
3942
    url = 'http://www.gocomics.com/offthemark'
3943
3944
3945
class WuMo(GenericGoComic):
3946
    """Class to retrieve WuMo comics."""
3947
    # Also on http://wumo.com
3948
    name = 'wumo'
3949
    long_name = 'WuMo'
3950
    url = 'http://www.gocomics.com/wumo'
3951
3952
3953
class LunarBaboon(GenericGoComic):
3954
    """Class to retrieve Lunar Baboon comics."""
3955
    # Also on http://www.lunarbaboon.com
3956
    # Also on https://tapastic.com/series/Lunarbaboon
3957
    name = 'lunarbaboon'
3958
    long_name = 'Lunar Baboon'
3959
    url = 'http://www.gocomics.com/lunarbaboon'
3960
3961
3962
class SandersenGocomic(GenericGoComic):
3963
    """Class to retrieve Sarah Andersen comics."""
3964
    # Also on http://sarahcandersen.com
3965
    # Also on http://tapastic.com/series/Doodle-Time
3966
    name = 'sandersen-goc'
3967
    long_name = 'Sarah Andersen (from GoComics)'
3968
    url = 'http://www.gocomics.com/sarahs-scribbles'
3969
3970
3971
class CalvinAndHobbesGoComic(GenericGoComic):
3972
    """Class to retrieve Calvin and Hobbes comics."""
3973
    # From gocomics, not http://marcel-oehler.marcellosendos.ch/comics/ch/
3974
    name = 'calvin-goc'
3975
    long_name = 'Calvin and Hobbes (from GoComics)'
3976
    url = 'http://www.gocomics.com/calvinandhobbes'
3977
3978
3979
class RallGoComic(GenericGoComic):
3980
    """Class to retrieve Ted Rall comics."""
3981
    # Also on http://rall.com/comic
3982
    name = 'rall-goc'
3983
    long_name = "Ted Rall (from GoComics)"
3984
    url = "http://www.gocomics.com/tedrall"
3985
3986
3987
class TheAwkwardYetiGoComic(GenericGoComic):
3988
    """Class to retrieve The Awkward Yeti comics."""
3989
    # Also on http://larstheyeti.tumblr.com
3990
    # Also on http://theawkwardyeti.com
3991
    # Also on https://tapastic.com/series/TheAwkwardYeti
3992
    name = 'yeti-goc'
3993
    long_name = 'The Awkward Yeti (from GoComics)'
3994
    url = 'http://www.gocomics.com/the-awkward-yeti'
3995
3996
3997
class BerkeleyMewsGoComics(GenericGoComic):
3998
    """Class to retrieve Berkeley Mews comics."""
3999
    # Also on http://mews.tumblr.com
4000
    # Also on http://www.berkeleymews.com
4001
    name = 'berkeley-goc'
4002
    long_name = 'Berkeley Mews (from GoComics)'
4003
    url = 'http://www.gocomics.com/berkeley-mews'
4004
4005
4006
class SheldonGoComics(GenericGoComic):
4007
    """Class to retrieve Sheldon comics."""
4008
    # Also on http://www.sheldoncomics.com
4009
    name = 'sheldon-goc'
4010
    long_name = 'Sheldon Comics (from GoComics)'
4011
    url = 'http://www.gocomics.com/sheldon'
4012
4013
4014
class FowlLanguageGoComics(GenericGoComic):
4015
    """Class to retrieve Fowl Language comics."""
4016
    # Also on http://www.fowllanguagecomics.com
4017
    # Also on http://tapastic.com/series/Fowl-Language-Comics
4018
    # Also on http://fowllanguagecomics.tumblr.com
4019
    name = 'fowllanguage-goc'
4020
    long_name = 'Fowl Language Comics (from GoComics)'
4021
    url = 'http://www.gocomics.com/fowl-language'
4022
4023
4024
class NickAnderson(GenericGoComic):
4025
    """Class to retrieve Nick Anderson comics."""
4026
    name = 'nickanderson'
4027
    long_name = 'Nick Anderson'
4028
    url = 'http://www.gocomics.com/nickanderson'
4029
4030
4031
class GarfieldGoComics(GenericGoComic):
4032
    """Class to retrieve Garfield comics."""
4033
    # Also on http://garfield.com
4034
    name = 'garfield-goc'
4035
    long_name = 'Garfield (from GoComics)'
4036
    url = 'http://www.gocomics.com/garfield'
4037
4038
4039
class DorrisMcGoComics(GenericGoComic):
4040
    """Class to retrieve Dorris Mc Comics"""
4041
    # Also on http://dorrismccomics.com
4042
    name = 'dorrismc-goc'
4043
    long_name = 'Dorris Mc (from GoComics)'
4044
    url = 'http://www.gocomics.com/dorris-mccomics'
4045
4046
4047
class MisterAndMeGoComics(GenericGoComic):
4048
    """Class to retrieve Mister & Me Comics."""
4049
    # Also on http://www.mister-and-me.com
4050
    # Also on https://tapastic.com/series/Mister-and-Me
4051
    name = 'mister-goc'
4052
    long_name = 'Mister & Me (from GoComics)'
4053
    url = 'http://www.gocomics.com/mister-and-me'
4054
4055
4056
class GenericTapasticComic(GenericListableComic):
4057
    """Generic class to handle the logic common to comics from tapastic.com."""
4058
4059
    @classmethod
4060
    def get_comic_info(cls, soup, archive_elt):
4061
        """Get information about a particular comics."""
4062
        timestamp = int(archive_elt['publishDate']) / 1000.0
4063
        day = datetime.datetime.fromtimestamp(timestamp).date()
4064
        imgs = soup.find_all('img', class_='art-image')
4065
        if not imgs:
4066
            print("Comic %s is being uploaded, retry later" % cls.get_url_from_archive_element(archive_elt))
4067
            return None
4068
        assert len(imgs) > 0
4069
        return {
4070
            'day': day.day,
4071
            'year': day.year,
4072
            'month': day.month,
4073
            'img': [i['src'] for i in imgs],
4074
            'title': archive_elt['title'],
4075
        }
4076
4077
    @classmethod
4078
    def get_url_from_archive_element(cls, archive_elt):
4079
        return 'http://tapastic.com/episode/' + str(archive_elt['id'])
4080
4081
    @classmethod
4082
    def get_archive_elements(cls):
4083
        pref, suff = 'episodeList : ', ','
4084
        # Information is stored in the javascript part
4085
        # I don't know the clean way to get it so this is the ugly way.
4086
        string = [s[len(pref):-len(suff)] for s in (s.decode('utf-8').strip() for s in urlopen_wrapper(cls.url).readlines()) if s.startswith(pref) and s.endswith(suff)][0]
4087
        return json.loads(string)
4088
4089
4090
class VegetablesForDessert(GenericTapasticComic):
4091
    """Class to retrieve Vegetables For Dessert comics."""
4092
    # Also on http://vegetablesfordessert.tumblr.com
4093
    name = 'vegetables'
4094
    long_name = 'Vegetables For Dessert'
4095
    url = 'http://tapastic.com/series/vegetablesfordessert'
4096
4097
4098
class FowlLanguageTapa(GenericTapasticComic):
4099
    """Class to retrieve Fowl Language comics."""
4100
    # Also on http://www.fowllanguagecomics.com
4101
    # Also on http://fowllanguagecomics.tumblr.com
4102
    # Also on http://www.gocomics.com/fowl-language
4103
    name = 'fowllanguage-tapa'
4104
    long_name = 'Fowl Language Comics (from Tapastic)'
4105
    url = 'http://tapastic.com/series/Fowl-Language-Comics'
4106
4107
4108
class OscillatingProfundities(GenericTapasticComic):
4109
    """Class to retrieve Oscillating Profundities comics."""
4110
    name = 'oscillating'
4111
    long_name = 'Oscillating Profundities'
4112
    url = 'http://tapastic.com/series/oscillatingprofundities'
4113
4114
4115
class ZnoflatsComics(GenericTapasticComic):
4116
    """Class to retrieve Znoflats comics."""
4117
    name = 'znoflats'
4118
    long_name = 'Znoflats Comics'
4119
    url = 'http://tapastic.com/series/Znoflats-Comics'
4120
4121
4122
class SandersenTapastic(GenericTapasticComic):
4123
    """Class to retrieve Sarah Andersen comics."""
4124
    # Also on http://sarahcandersen.com
4125
    # Also on http://www.gocomics.com/sarahs-scribbles
4126
    name = 'sandersen-tapa'
4127
    long_name = 'Sarah Andersen (from Tapastic)'
4128
    url = 'http://tapastic.com/series/Doodle-Time'
4129
4130
4131
class TubeyToonsTapastic(GenericTapasticComic):
4132
    """Class to retrieve TubeyToons comics."""
4133
    # Also on http://tubeytoons.com
4134
    # Also on http://tubeytoons.tumblr.com
4135
    name = 'tubeytoons-tapa'
4136
    long_name = 'Tubey Toons (from Tapastic)'
4137
    url = 'http://tapastic.com/series/Tubey-Toons'
4138
4139
4140
class AnythingComicTapastic(GenericTapasticComic):
4141
    """Class to retrieve Anything Comics."""
4142
    # Also on http://www.anythingcomic.com
4143
    name = 'anythingcomic-tapa'
4144
    long_name = 'Anything Comic (from Tapastic)'
4145
    url = 'http://tapastic.com/series/anything'
4146
4147
4148
class UnearthedComicsTapastic(GenericTapasticComic):
4149
    """Class to retrieve Unearthed comics."""
4150
    # Also on http://unearthedcomics.com
4151
    # Also on http://unearthedcomics.tumblr.com
4152
    name = 'unearthed-tapa'
4153
    long_name = 'Unearthed Comics (from Tapastic)'
4154
    url = 'http://tapastic.com/series/UnearthedComics'
4155
4156
4157
class EverythingsStupidTapastic(GenericTapasticComic):
4158
    """Class to retrieve Everything's stupid Comics."""
4159
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
4160
    # Also on http://everythingsstupid.net
4161
    name = 'stupid-tapa'
4162
    long_name = "Everything's Stupid (from Tapastic)"
4163
    url = 'http://tapastic.com/series/EverythingsStupid'
4164
4165
4166
class JustSayEhTapastic(GenericTapasticComic):
4167
    """Class to retrieve Just Say Eh comics."""
4168
    # Also on http://www.justsayeh.com
4169
    name = 'justsayeh-tapa'
4170
    long_name = 'Just Say Eh (from Tapastic)'
4171
    url = 'http://tapastic.com/series/Just-Say-Eh'
4172
4173
4174
class ThorsThundershackTapastic(GenericTapasticComic):
4175
    """Class to retrieve Thor's Thundershack comics."""
4176
    # Also on http://www.thorsthundershack.com
4177
    name = 'thor-tapa'
4178
    long_name = 'Thor\'s Thundershack (from Tapastic)'
4179
    url = 'http://tapastic.com/series/Thors-Thundershac'
4180
4181
4182
class OwlTurdTapastic(GenericTapasticComic):
4183
    """Class to retrieve Owl Turd comics."""
4184
    # Also on http://owlturd.com
4185
    name = 'owlturd-tapa'
4186
    long_name = 'Owl Turd (from Tapastic)'
4187
    url = 'http://tapastic.com/series/Owl-Turd-Comix'
4188
4189
4190
class GoneIntoRaptureTapastic(GenericTapasticComic):
4191
    """Class to retrieve Gone Into Rapture comics."""
4192
    # Also on http://goneintorapture.tumblr.com
4193
    # Also on http://www.goneintorapture.com
4194
    name = 'rapture-tapa'
4195
    long_name = 'Gone Into Rapture (from Tapastic)'
4196
    url = 'http://tapastic.com/series/Goneintorapture'
4197
4198
4199
class HeckIfIKnowComicsTapa(GenericTapasticComic):
4200
    """Class to retrieve Heck If I Know Comics."""
4201
    # Also on http://heckifiknowcomics.com
4202
    name = 'heck-tapa'
4203
    long_name = 'Heck if I Know comics (from Tapastic)'
4204
    url = 'http://tapastic.com/series/Regular'
4205
4206
4207
class CheerUpEmoKidTapa(GenericTapasticComic):
4208
    """Class to retrieve CheerUpEmoKid comics."""
4209
    # Also on http://www.cheerupemokid.com
4210
    # Also on http://enzocomics.tumblr.com
4211
    name = 'cuek-tapa'
4212
    long_name = 'Cheer Up Emo Kid (from Tapastic)'
4213
    url = 'http://tapastic.com/series/CUEK'
4214
4215
4216
class BigFootJusticeTapa(GenericTapasticComic):
4217
    """Class to retrieve Big Foot Justice comics."""
4218
    # Also on http://bigfootjustice.com
4219
    name = 'bigfoot-tapa'
4220
    long_name = 'Big Foot Justice (from Tapastic)'
4221
    url = 'http://tapastic.com/series/bigfoot-justice'
4222
4223
4224
class UpAndOutTapa(GenericTapasticComic):
4225
    """Class to retrieve Up & Out comics."""
4226
    # Also on http://upandoutcomic.tumblr.com
4227
    name = 'upandout-tapa'
4228
    long_name = 'Up And Out (from Tapastic)'
4229
    url = 'http://tapastic.com/series/UP-and-OUT'
4230
4231
4232
class ToonHoleTapa(GenericTapasticComic):
4233
    """Class to retrieve Toon Holes comics."""
4234
    # Also on http://www.toonhole.com
4235
    name = 'toonhole-tapa'
4236
    long_name = 'Toon Hole (from Tapastic)'
4237
    url = 'http://tapastic.com/series/TOONHOLE'
4238
4239
4240
class AngryAtNothingTapa(GenericTapasticComic):
4241
    """Class to retrieve Angry at Nothing comics."""
4242
    # Also on http://www.angryatnothing.net
4243
    name = 'angry-tapa'
4244
    long_name = 'Angry At Nothing (from Tapastic)'
4245
    url = 'http://tapastic.com/series/Comics-yeah-definitely-comics-'
4246
4247
4248
class LeleozTapa(GenericTapasticComic):
4249
    """Class to retrieve Leleoz comics."""
4250
    # Also on http://leleozcomics.tumblr.com
4251
    name = 'leleoz-tapa'
4252
    long_name = 'Leleoz (from Tapastic)'
4253
    url = 'https://tapastic.com/series/Leleoz'
4254
4255
4256
class TheAwkwardYetiTapa(GenericTapasticComic):
4257
    """Class to retrieve The Awkward Yeti comics."""
4258
    # Also on http://www.gocomics.com/the-awkward-yeti
4259
    # Also on http://theawkwardyeti.com
4260
    # Also on http://larstheyeti.tumblr.com
4261
    name = 'yeti-tapa'
4262
    long_name = 'The Awkward Yeti (from Tapastic)'
4263
    url = 'https://tapastic.com/series/TheAwkwardYeti'
4264
4265
4266
class AsPerUsualTapa(GenericTapasticComic):
4267
    """Class to retrieve As Per Usual comics."""
4268
    # Also on http://as-per-usual.tumblr.com
4269
    name = 'usual-tapa'
4270
    long_name = 'As Per Usual (from Tapastic)'
4271
    url = 'https://tapastic.com/series/AsPerUsual'
4272
4273
4274
class OneOneOneOneComicTapa(GenericTapasticComic):
4275
    """Class to retrieve 1111 Comics."""
4276
    # Also on http://www.1111comics.me
4277
    # Also on http://comics1111.tumblr.com
4278
    name = '1111-tapa'
4279
    long_name = '1111 Comics (from Tapastic)'
4280
    url = 'https://tapastic.com/series/1111-Comics'
4281
4282
4283
class TumbleDryTapa(GenericTapasticComic):
4284
    """Class to retrieve Tumble Dry comics."""
4285
    # Also on http://tumbledrycomics.com
4286
    name = 'tumbledry-tapa'
4287
    long_name = 'Tumblr Dry (from Tapastic)'
4288
    url = 'https://tapastic.com/series/TumbleDryComics'
4289
4290
4291
class DeadlyPanelTapa(GenericTapasticComic):
4292
    """Class to retrieve Deadly Panel comics."""
4293
    # Also on http://www.deadlypanel.com
4294
    name = 'deadly-tapa'
4295
    long_name = 'Deadly Panel (from Tapastic)'
4296
    url = 'https://tapastic.com/series/deadlypanel'
4297
4298
4299
class ChrisHallbeckMaxiTapa(GenericTapasticComic):
4300
    """Class to retrieve Chris Hallbeck comics."""
4301
    # Also on http://chrishallbeck.tumblr.com
4302
    # Also on http://maximumble.com
4303
    name = 'hallbeckmaxi-tapa'
4304
    long_name = 'Chris Hallback - Maximumble (from Tapastic)'
4305
    url = 'https://tapastic.com/series/Maximumble'
4306
4307
4308
class ChrisHallbeckMiniTapa(GenericTapasticComic):
4309
    """Class to retrieve Chris Hallbeck comics."""
4310
    # Also on http://chrishallbeck.tumblr.com
4311
    # Also on http://minimumble.com
4312
    name = 'hallbeckmini-tapa'
4313
    long_name = 'Chris Hallback - Minimumble (from Tapastic)'
4314
    url = 'https://tapastic.com/series/Minimumble'
4315
4316
4317
class ChrisHallbeckBiffTapa(GenericTapasticComic):
4318
    """Class to retrieve Chris Hallbeck comics."""
4319
    # Also on http://chrishallbeck.tumblr.com
4320
    # Also on http://thebookofbiff.com
4321
    name = 'hallbeckbiff-tapa'
4322
    long_name = 'Chris Hallback - The Book of Biff (from Tapastic)'
4323
    url = 'https://tapastic.com/series/Biff'
4324
4325
4326
class RandoWisTapa(GenericTapasticComic):
4327
    """Class to retrieve RandoWis comics."""
4328
    # Also on https://randowis.com
4329
    name = 'randowis-tapa'
4330
    long_name = 'RandoWis (from Tapastic)'
4331
    url = 'https://tapastic.com/series/RandoWis'
4332
4333
4334
class PigeonGazetteTapa(GenericTapasticComic):
4335
    """Class to retrieve The Pigeon Gazette comics."""
4336
    # Also on http://thepigeongazette.tumblr.com
4337
    name = 'pigeon-tapa'
4338
    long_name = 'The Pigeon Gazette (from Tapastic)'
4339
    url = 'https://tapastic.com/series/The-Pigeon-Gazette'
4340
4341
4342
class TheOdd1sOutTapa(GenericTapasticComic):
4343
    """Class to retrieve The Odd 1s Out comics."""
4344
    # Also on http://theodd1sout.com
4345
    # Also on http://theodd1sout.tumblr.com
4346
    name = 'theodd-tapa'
4347
    long_name = 'The Odd 1s Out (from Tapastic)'
4348
    url = 'https://tapastic.com/series/Theodd1sout'
4349
4350
4351
class TheWorldIsFlatTapa(GenericTapasticComic):
4352
    """Class to retrieve The World Is Flat Comics."""
4353
    # Also on http://theworldisflatcomics.tumblr.com
4354
    name = 'flatworld-tapa'
4355
    long_name = 'The World Is Flat (from Tapastic)'
4356
    url = 'https://tapastic.com/series/The-World-is-Flat'
4357
4358
4359
class MisterAndMeTapa(GenericTapasticComic):
4360
    """Class to retrieve Mister & Me Comics."""
4361
    # Also on http://www.mister-and-me.com
4362
    # Also on http://www.gocomics.com/mister-and-me
4363
    name = 'mister-tapa'
4364
    long_name = 'Mister & Me (from Tapastic)'
4365
    url = 'https://tapastic.com/series/Mister-and-Me'
4366
4367
4368
class TalesOfAbsurdityTapa(GenericTapasticComic):
4369
    """Class to retrieve Tales Of Absurdity comics."""
4370
    # Also on http://talesofabsurdity.com
4371
    # Also on http://talesofabsurdity.tumblr.com
4372
    name = 'absurdity-tapa'
4373
    long_name = 'Tales of Absurdity (from Tapastic)'
4374
    url = 'http://tapastic.com/series/Tales-Of-Absurdity'
4375
4376
4377
class BFGFSTapa(GenericTapasticComic):
4378
    """Class to retrieve BFGFS comics."""
4379
    # Also on http://bfgfs.com
4380
    # Also on http://bfgfs.tumblr.com
4381
    name = 'bfgfs-tapa'
4382
    long_name = 'BFGFS (from Tapastic)'
4383
    url = 'https://tapastic.com/series/BFGFS'
4384
4385
4386
class DoodleForFoodTapa(GenericTapasticComic):
4387
    """Class to retrieve Doodle For Food comics."""
4388
    # Also on http://doodleforfood.com
4389
    name = 'doodle-tapa'
4390
    long_name = 'Doodle For Food (from Tapastic)'
4391
    url = 'https://tapastic.com/series/Doodle-for-Food'
4392
4393
4394
def get_subclasses(klass):
4395
    """Gets the list of direct/indirect subclasses of a class"""
4396
    subclasses = klass.__subclasses__()
4397
    for derived in list(subclasses):
4398
        subclasses.extend(get_subclasses(derived))
4399
    return subclasses
4400
4401
4402
def remove_st_nd_rd_th_from_date(string):
4403
    """Function to transform 1st/2nd/3rd/4th in a parsable date format."""
4404
    # Hackish way to convert string with numeral "1st"/"2nd"/etc to date
4405
    return (string.replace('st', '')
4406
            .replace('nd', '')
4407
            .replace('rd', '')
4408
            .replace('th', '')
4409
            .replace('Augu', 'August'))
4410
4411
4412
def string_to_date(string, date_format, local=DEFAULT_LOCAL):
4413
    """Function to convert string to date object.
4414
    Wrapper around datetime.datetime.strptime."""
4415
    # format described in https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior
4416
    prev_locale = locale.setlocale(locale.LC_ALL)
4417
    if local != prev_locale:
4418
        locale.setlocale(locale.LC_ALL, local)
4419
    ret = datetime.datetime.strptime(string, date_format).date()
4420
    if local != prev_locale:
4421
        locale.setlocale(locale.LC_ALL, prev_locale)
4422
    return ret
4423
4424
4425
COMICS = set(get_subclasses(GenericComic))
4426
VALID_COMICS = [c for c in COMICS if c.name is not None]
4427
COMIC_NAMES = {c.name: c for c in VALID_COMICS}
4428
assert len(VALID_COMICS) == len(COMIC_NAMES)
4429
CLASS_NAMES = {c.__name__ for c in VALID_COMICS}
4430
assert len(VALID_COMICS) == len(CLASS_NAMES)
4431