Completed
Push — master ( bfc1f3...e1e8dd )
by De
01:13
created

comics.py (10 issues)

1
#! /usr/bin/python3
2
# vim: set expandtab tabstop=4 shiftwidth=4 :
3
"""Module to retrieve webcomics"""
4
5
from comic_abstract import GenericComic, get_date_for_comic
6
import re
7
from datetime import date, timedelta
8
import datetime
9
from urlfunctions import get_soup_at_url, urljoin_wrapper,\
10
    convert_iri_to_plain_ascii_uri, load_json_at_url, urlopen_wrapper
11
import json
12
import locale
13
import urllib
14
15
DEFAULT_LOCAL = 'en_GB.UTF-8'
16
17
18
class Xkcd(GenericComic):
19
    """Class to retrieve Xkcd comics."""
20
    name = 'xkcd'
21
    long_name = 'xkcd'
22
    url = 'http://xkcd.com'
23
24
    @classmethod
25
    def get_next_comic(cls, last_comic):
26
        first_num = last_comic['num'] if last_comic else 0
27
        last_num = load_json_at_url(
28
            urljoin_wrapper(cls.url, 'info.0.json'))['num']
29
30
        for num in range(first_num + 1, last_num + 1):
31
            if num != 404:
32
                json_url = urljoin_wrapper(cls.url, '%d/info.0.json' % num)
33
                comic = load_json_at_url(json_url)
34
                comic['img'] = [comic['img']]
35
                comic['prefix'] = '%d-' % num
36
                comic['json_url'] = json_url
37
                comic['url'] = urljoin_wrapper(cls.url, str(num))
38
                comic['day'] = int(comic['day'])
39
                comic['month'] = int(comic['month'])
40
                comic['year'] = int(comic['year'])
41
                assert comic['num'] == num
42
                yield comic
43
44
45
# Helper functions corresponding to get_url_from_link/get_url_from_archive_element
46
47
48
@classmethod
49
def get_href(cls, link):
50
    """Implementation of get_url_from_link/get_url_from_archive_element."""
51
    return link['href']
52
53
54
@classmethod
55
def join_cls_url_to_href(cls, link):
56
    """Implementation of get_url_from_link/get_url_from_archive_element."""
57
    return urljoin_wrapper(cls.url, link['href'])
58
59
60
class GenericNavigableComic(GenericComic):
61
    """Generic class for "navigable" comics : with first/next arrows.
62
63
    The method `get_next_comic` methods is implemented in terms of new
64
    more specialized methods to be implemented/overridden:
65
        - get_first_comic_link
66
        - get_navi_link
67
        - get_comic_info
68
        - get_url_from_link
69
    """
70
71
    @classmethod
72
    def get_first_comic_link(cls):
73
        """Get link to first comics.
74
75
        Sometimes this can be retrieved of any comic page, sometimes on
76
        the archive page, sometimes it doesn't exist at all and one has
77
        to iterate backward to find it before hardcoding the result found.
78
        """
79
        raise NotImplementedError
80
81
    @classmethod
82
    def get_navi_link(cls, last_soup, next_):
83
        """Get link to next (or previous - for dev purposes) comic."""
84
        raise NotImplementedError
85
86
    @classmethod
87
    def get_comic_info(cls, soup, link):
88
        """Get information about a particular comics."""
89
        raise NotImplementedError
90
91
    @classmethod
92
    def get_url_from_link(cls, link):
93
        """Get url corresponding to a link. Default implementation is similar to get_href."""
94
        return link['href']
95
96
    @classmethod
97
    def get_next_link(cls, last_soup):
98
        """Get link to next comic."""
99
        return cls.get_navi_link(last_soup, True)
100
101
    @classmethod
102
    def get_prev_link(cls, last_soup):
103
        """Get link to previous comic."""
104
        return cls.get_navi_link(last_soup, False)
105
106
    @classmethod
107
    def get_next_comic(cls, last_comic):
108
        """Generic implementation of get_next_comic for navigable comics."""
109
        url = last_comic['url'] if last_comic else None
110
        next_comic = \
111
            cls.get_next_link(get_soup_at_url(url)) \
112
            if url else \
113
            cls.get_first_comic_link()
114
        cls.log("next/first comic will be %s (url is %s)" % (str(next_comic), url))
115
        while next_comic:
116
            prev_url, url = url, cls.get_url_from_link(next_comic)
117
            if prev_url == url:
118
                cls.log("got same url %s" % url)
119
                break
120
            cls.log("about to get %s (%s)" % (url, str(next_comic)))
121
            soup = get_soup_at_url(url)
122
            comic = cls.get_comic_info(soup, next_comic)
123
            if comic is not None:
124
                assert 'url' not in comic
125
                comic['url'] = url
126
                yield comic
127
            next_comic = cls.get_next_link(soup)
128
            cls.log("next comic will be %s" % str(next_comic))
129
130
    @classmethod
131
    def check_navigation(cls, url):
132
        """Check that navigation functions seem to be working - for dev purposes."""
133
        cls.log("about to check navigation from %s" % url)
134
        ok = True
135
        firstlink = cls.get_first_comic_link()
136
        if firstlink is None:
137
            print("From %s : no first link" % cls.url)
138
            ok = False
139
        if url is None:
140
            prevlink, nextlink = None, None
141
        else:
142
            soup = get_soup_at_url(url)
143
            prevlink, nextlink = cls.get_prev_link(soup), cls.get_next_link(soup)
144
        if prevlink is None and nextlink is None:
145
            print("From %s : no previous nor next" % url)
146
            ok = False
147
        else:
148
            if prevlink:
149
                prevurl = cls.get_url_from_link(prevlink)
150
                prevsoup = get_soup_at_url(prevurl)
151
                prevnext = cls.get_url_from_link(cls.get_next_link(prevsoup))
152
                if prevnext != url:
153
                    print("From %s, going backward then forward leads to %s" % (url, prevnext))
154
                    ok = False
155
            if nextlink:
156
                nexturl = cls.get_url_from_link(nextlink)
157
                if nexturl != url:
158
                    nextsoup = get_soup_at_url(nexturl)
159
                    nextprev = cls.get_url_from_link(cls.get_prev_link(nextsoup))
160
                    if nextprev != url:
161
                        print("From %s, going forward then backward leads to %s" % (url, nextprev))
162
                        ok = False
163
        cls.log("checked navigation from %s -> returned %d" % (url, ok))
164
        return ok
165
166
    # This method is not defined by default and is not part of this class'API.
167
    # It is only used:
168
    # - during development
169
    # - in subclasses implementing it correctly
170
    if False:
171
        @classmethod
172
        def get_first_comic_url(cls):
173
            """Get first comic url
174
175
            Sometimes, the first comic cannot be reached directly so to start
176
            from the first comic one has to go to the previous comic until
177
            there is no previous comics. Once this URL is reached, it
178
            is better to hardcode it but for development purposes, it
179
            is convenient to have an automatic way to find it.
180
            """
181
            url = input("Get starting URL: ")
182
            print(url)
183
            comic = cls.get_prev_link(get_soup_at_url(url))
184
            while comic:
185
                url = cls.get_url_from_link(comic)
186
                print(url)
187
                comic = cls.get_prev_link(get_soup_at_url(url))
188
            return url
189
190
191
class GenericListableComic(GenericComic):
192
    """Generic class for "listable" comics : with a list of comics (aka 'archive')
193
194
    The method `get_next_comic` methods is implemented in terms of new
195
    more specialized methods to be implemented/overridden:
196
        - get_archive_elements
197
        - get_url_from_archive_element
198
        - get_comic_info
199
    """
200
201
    @classmethod
202
    def get_archive_elements(cls):
203
        """Get the archive elements (iterable)."""
204
        raise NotImplementedError
205
206
    @classmethod
207
    def get_url_from_archive_element(cls, archive_elt):
208
        """Get url corresponding to an archive element."""
209
        raise NotImplementedError
210
211
    @classmethod
212
    def get_comic_info(cls, soup, archive_elt):
213
        """Get information about a particular comics."""
214
        raise NotImplementedError
215
216
    @classmethod
217
    def get_next_comic(cls, last_comic):
218
        """Generic implementation of get_next_comic for listable comics."""
219
        waiting_for_url = last_comic['url'] if last_comic else None
220
        for archive_elt in cls.get_archive_elements():
221
            url = cls.get_url_from_archive_element(archive_elt)
222
            cls.log("considering %s" % url)
223
            if waiting_for_url and waiting_for_url == url:
224
                waiting_for_url = None
225
            elif waiting_for_url is None:
226
                cls.log("about to get %s (%s)" % (url, str(archive_elt)))
227
                soup = get_soup_at_url(url)
228
                comic = cls.get_comic_info(soup, archive_elt)
229
                if comic is not None:
230
                    assert 'url' not in comic
231
                    comic['url'] = url
232
                    yield comic
233
        if waiting_for_url is not None:
234
            print("Did not find %s : there might be a problem" % waiting_for_url)
235
236
# Helper functions corresponding to get_first_comic_link/get_navi_link
237
238
239
@classmethod
240
def get_link_rel_next(cls, last_soup, next_):
241
    """Implementation of get_navi_link."""
242
    return last_soup.find('link', rel='next' if next_ else 'prev')
243
244
245
@classmethod
246
def get_a_rel_next(cls, last_soup, next_):
247
    """Implementation of get_navi_link."""
248
    return last_soup.find('a', rel='next' if next_ else 'prev')
249
250
251
@classmethod
252
def get_a_navi_navinext(cls, last_soup, next_):
253
    """Implementation of get_navi_link."""
254
    return last_soup.find('a', class_='navi navi-next' if next_ else 'navi navi-prev')
255
256
257
@classmethod
258
def get_a_navi_comicnavnext_navinext(cls, last_soup, next_):
259
    """Implementation of get_navi_link."""
260
    return last_soup.find('a', class_='navi comic-nav-next navi-next' if next_ else 'navi comic-nav-previous navi-prev')
261
262
263
@classmethod
264
def get_a_comicnavbase_comicnavnext(cls, last_soup, next_):
265
    """Implementation of get_navi_link."""
266
    return last_soup.find('a', class_='comic-nav-base comic-nav-next' if next_ else 'comic-nav-base comic-nav-previous')
267
268
269
@classmethod
270
def get_a_navi_navifirst(cls):
271
    """Implementation of get_first_comic_link."""
272
    return get_soup_at_url(cls.url).find('a', class_='navi navi-first')
273
274
275
@classmethod
276
def get_div_navfirst_a(cls):
277
    """Implementation of get_first_comic_link."""
278
    return get_soup_at_url(cls.url).find('div', class_="nav-first").find('a')
279
280
281
@classmethod
282
def get_a_comicnavbase_comicnavfirst(cls):
283
    """Implementation of get_first_comic_link."""
284
    return get_soup_at_url(cls.url).find('a', class_='comic-nav-base comic-nav-first')
285
286
287
class GenericEmptyComic(GenericComic):
288
    """Generic class for comics where nothing is to be done.
289
290
    It can be useful to deactivate temporarily comics that do not work
291
    properly by replacing `def MyComic(GenericWhateverComic)` with
292
    `def MyComic(GenericEmptyComic, GenericWhateverComic)`."""
293
294
    @classmethod
295
    def get_next_comic(cls, last_comic):
296
        return []
297
298
299
class ExtraFabulousComics(GenericNavigableComic):
300
    """Class to retrieve Extra Fabulous Comics."""
301
    name = 'efc'
302
    long_name = 'Extra Fabulous Comics'
303
    url = 'http://extrafabulouscomics.com'
304
    get_navi_link = get_link_rel_next
305
306
    @classmethod
307
    def get_first_comic_link(cls):
308
        """Get link to first comics."""
309
        return get_soup_at_url(cls.url).find('a', title='FIRST')
310
311
    @classmethod
312
    def get_comic_info(cls, soup, link):
313
        """Get information about a particular comics."""
314
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
315
        imgs = soup.find_all('img', src=img_src_re)
316
        title = soup.find('h2', class_='post-title').string
317
        return {
318
            'title': title,
319
            'img': [i['src'] for i in imgs],
320
            'prefix': title + '-'
321
        }
322
323
324
class GenericLeMondeBlog(GenericNavigableComic):
325
    """Generic class to retrieve comics from Le Monde blogs."""
326
    get_navi_link = get_link_rel_next
327
328
    @classmethod
329
    def get_first_comic_url(cls):
330
        """Get first comic url."""
331
        raise NotImplementedError
332
333
    @classmethod
334
    def get_first_comic_link(cls):
335
        """Get link to first comics."""
336
        return {'href': cls.get_first_comic_url()}
337
338
    @classmethod
339
    def get_comic_info(cls, soup, link):
340
        """Get information about a particular comics."""
341
        url2 = soup.find('link', rel='shortlink')['href']
342
        title = soup.find('meta', property='og:title')['content']
343
        date_str = soup.find("span", class_="entry-date").string
344
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
345
        imgs = soup.find_all('meta', property='og:image')
346
        return {
347
            'title': title,
348
            'url2': url2,
349
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
350
            'month': day.month,
351
            'year': day.year,
352
            'day': day.day,
353
        }
354
355
356
class ZepWorld(GenericLeMondeBlog):
357
    """Class to retrieve Zep World comics."""
358
    name = "zep"
359
    long_name = "Zep World"
360
    url = "http://zepworld.blog.lemonde.fr"
361
362
    @classmethod
363
    def get_first_comic_url(cls):
364
        return "http://zepworld.blog.lemonde.fr/2014/10/31/bientot-le-blog-de-zep/"
365
366
367
class Vidberg(GenericLeMondeBlog):
368
    """Class to retrieve Vidberg comics."""
369
    name = 'vidberg'
370
    long_name = "Vidberg - l'actu en patates"
371
    url = "http://vidberg.blog.lemonde.fr"
372
373
    @classmethod
374
    def get_first_comic_url(cls):
375
        # Not the first but I didn't find an efficient way to retrieve it
376
        return "http://vidberg.blog.lemonde.fr/2012/02/09/revue-de-campagne-la-campagne-du-modem-semballe/"
377
378
379
class Plantu(GenericLeMondeBlog):
380
    """Class to retrieve Plantu comics."""
381
    name = 'plantu'
382
    long_name = "Plantu"
383
    url = "http://plantu.blog.lemonde.fr"
384
385
    @classmethod
386
    def get_first_comic_url(cls):
387
        return "http://plantu.blog.lemonde.fr/2014/10/28/stress-test-a-bruxelles/"
388
389
390
class XavierGorce(GenericLeMondeBlog):
391
    """Class to retrieve Xavier Gorce comics."""
392
    name = 'gorce'
393
    long_name = "Xavier Gorce"
394
    url = "http://xaviergorce.blog.lemonde.fr"
395
396
    @classmethod
397
    def get_first_comic_url(cls):
398
        return "http://xaviergorce.blog.lemonde.fr/2015/01/09/distinction/"
399
400
401
class CartooningForPeace(GenericLeMondeBlog):
402
    """Class to retrieve Cartooning For Peace comics."""
403
    name = 'forpeace'
404
    long_name = "Cartooning For Peace"
405
    url = "http://cartooningforpeace.blog.lemonde.fr"
406
407
    @classmethod
408
    def get_first_comic_url(cls):
409
        return "http://cartooningforpeace.blog.lemonde.fr/2014/12/15/bado/"
410
411
412
class Aurel(GenericLeMondeBlog):
413
    """Class to retrieve Aurel comics."""
414
    name = 'aurel'
415
    long_name = "Aurel"
416
    url = "http://aurel.blog.lemonde.fr"
417
418
    @classmethod
419
    def get_first_comic_url(cls):
420
        return "http://aurel.blog.lemonde.fr/2014/09/29/le-senat-repasse-a-droite/"
421
422
423
class LesCulottees(GenericLeMondeBlog):
424
    """Class to retrieve Les Culottees comics."""
425
    name = 'culottees'
426
    long_name = 'Les Culottees'
427
    url = "http://lesculottees.blog.lemonde.fr"
428
429
    @classmethod
430
    def get_first_comic_url(cls):
431
        return "http://lesculottees.blog.lemonde.fr/2016/01/11/clementine-delait-femme-a-barbe/"
432
433
434
class UneAnneeAuLycee(GenericLeMondeBlog):
435
    """Class to retrieve Une Annee Au Lycee comics."""
436
    name = 'lycee'
437
    long_name = 'Une Annee au Lycee'
438
    url = 'http://uneanneeaulycee.blog.lemonde.fr'
439
440
    @classmethod
441
    def get_first_comic_url(cls):
442
        return "http://uneanneeaulycee.blog.lemonde.fr/2016/06/13/la-semaine-du-bac-est-arrivee/"
443
444
445 View Code Duplication
class Rall(GenericNavigableComic):
446
    """Class to retrieve Ted Rall comics."""
447
    # Also on http://www.gocomics.com/tedrall
448
    name = 'rall'
449
    long_name = "Ted Rall"
450
    url = "http://rall.com/comic"
451
    get_navi_link = get_link_rel_next
452
453
    @classmethod
454
    def get_first_comic_link(cls):
455
        """Get link to first comics."""
456
        # Not the first but I didn't find an efficient way to retrieve it
457
        return {'href': "http://rall.com/2014/01/30/los-angeles-times-cartoon-well-miss-those-california-flowers"}
458
459
    @classmethod
460
    def get_comic_info(cls, soup, link):
461
        """Get information about a particular comics."""
462
        title = soup.find('meta', property='og:title')['content']
463
        author = soup.find("span", class_="author vcard").find("a").string
464
        date_str = soup.find("span", class_="entry-date").string
465
        day = string_to_date(date_str, "%B %d, %Y")
466
        desc = soup.find('meta', property='og:description')['content']
467
        imgs = soup.find('div', class_='entry-content').find_all('img')
468
        imgs = imgs[:-7]  # remove social media buttons
469
        return {
470
            'title': title,
471
            'author': author,
472
            'month': day.month,
473
            'year': day.year,
474
            'day': day.day,
475
            'description': desc,
476
            'img': [i['src'] for i in imgs],
477
        }
478
479
480
class Dilem(GenericNavigableComic):
481
    """Class to retrieve Ali Dilem comics."""
482
    name = 'dilem'
483
    long_name = 'Ali Dilem'
484
    url = 'http://information.tv5monde.com/dilem'
485
    get_url_from_link = join_cls_url_to_href
486
487
    @classmethod
488
    def get_first_comic_link(cls):
489
        """Get link to first comics."""
490
        return {'href': "http://information.tv5monde.com/dilem/2004-06-26"}
491
492
    @classmethod
493
    def get_navi_link(cls, last_soup, next_):
494
        # prev is next / next is prev
495
        li = last_soup.find('li', class_='prev' if next_ else 'next')
496
        return li.find('a') if li else None
497
498 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
499
    def get_comic_info(cls, soup, link):
500
        """Get information about a particular comics."""
501
        short_url = soup.find('link', rel='shortlink')['href']
502
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
503
        imgs = soup.find_all('meta', property='og:image')
504
        date_str = soup.find('span', property='dc:date')['content']
505
        date_str = date_str[:10]
506
        day = string_to_date(date_str, "%Y-%m-%d")
507
        return {
508
            'short_url': short_url,
509
            'title': title,
510
            'img': [i['content'] for i in imgs],
511
            'day': day.day,
512
            'month': day.month,
513
            'year': day.year,
514
        }
515
516
517
class SpaceAvalanche(GenericNavigableComic):
518
    """Class to retrieve Space Avalanche comics."""
519
    name = 'avalanche'
520
    long_name = 'Space Avalanche'
521
    url = 'http://www.spaceavalanche.com'
522
    get_navi_link = get_link_rel_next
523
524
    @classmethod
525
    def get_first_comic_link(cls):
526
        """Get link to first comics."""
527
        return {'href': "http://www.spaceavalanche.com/2009/02/02/irish-sea/", 'title': "Irish Sea"}
528
529
    @classmethod
530
    def get_comic_info(cls, soup, link):
531
        """Get information about a particular comics."""
532
        url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
533
        title = link['title']
534
        url = cls.get_url_from_link(link)
535
        year, month, day = [int(s)
536
                            for s in url_date_re.match(url).groups()]
537
        imgs = soup.find("div", class_="entry").find_all("img")
538
        return {
539
            'title': title,
540
            'day': day,
541
            'month': month,
542
            'year': year,
543
            'img': [i['src'] for i in imgs],
544
        }
545
546
547
class ZenPencils(GenericNavigableComic):
548
    """Class to retrieve ZenPencils comics."""
549
    # Also on http://zenpencils.tumblr.com
550
    # Also on http://www.gocomics.com/zen-pencils
551
    name = 'zenpencils'
552
    long_name = 'Zen Pencils'
553
    url = 'http://zenpencils.com'
554
    get_navi_link = get_link_rel_next
555
556
    @classmethod
557
    def get_first_comic_link(cls):
558
        """Get link to first comics."""
559
        return {'href': "http://zenpencils.com/comic/1-ralph-waldo-emerson-make-them-cry/"}
560
561
    @classmethod
562
    def get_comic_info(cls, soup, link):
563
        """Get information about a particular comics."""
564
        imgs = soup.find('div', id='comic').find_all('img')
565
        post = soup.find('div', class_='post-content')
566
        author = post.find("span", class_="post-author").find("a").string
567
        title = post.find('h2', class_='post-title').string
568
        date_str = post.find('span', class_='post-date').string
569
        day = string_to_date(date_str, "%B %d, %Y")
570
        assert imgs
571
        assert all(i['alt'] == i['title'] for i in imgs)
572
        assert all(i['alt'] in (title, "") for i in imgs)
573
        desc = soup.find('meta', property='og:description')['content']
574
        return {
575
            'title': title,
576
            'description': desc,
577
            'author': author,
578
            'day': day.day,
579
            'month': day.month,
580
            'year': day.year,
581
            'img': [i['src'] for i in imgs],
582
        }
583
584
585
class ItsTheTie(GenericNavigableComic):
586
    """Class to retrieve It's the tie comics."""
587
    # Also on http://itsthetie.tumblr.com
588
    # Also on https://tapastic.com/series/itsthetie
589
    name = 'tie'
590
    long_name = "It's the tie"
591
    url = "http://itsthetie.com"
592
    get_first_comic_link = get_div_navfirst_a
593
    get_navi_link = get_a_rel_next
594
595
    @classmethod
596
    def get_comic_info(cls, soup, link):
597
        """Get information about a particular comics."""
598
        title = soup.find('h1', class_='comic-title').find('a').string
599
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
600
        day = string_to_date(date_str, "%B %d, %Y")
601
        # Bonus images may or may not be in meta og:image.
602
        imgs = soup.find_all('meta', property='og:image')
603
        imgs_src = [i['content'] for i in imgs]
604
        bonus = soup.find_all('img', attrs={'data-oversrc': True})
605
        bonus_src = [b['data-oversrc'] for b in bonus]
606
        all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src]
607
        all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")]
608
        tag_meta = soup.find('meta', property='article:tag')
609
        tags = tag_meta['content'] if tag_meta else ""
610
        return {
611
            'title': title,
612
            'month': day.month,
613
            'year': day.year,
614
            'day': day.day,
615
            'img': all_imgs_src,
616
            'tags': tags,
617
        }
618
619
620
class PenelopeBagieu(GenericNavigableComic):
621
    """Class to retrieve comics from Penelope Bagieu's blog."""
622
    name = 'bagieu'
623
    long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
624
    url = 'http://www.penelope-jolicoeur.com'
625
    get_navi_link = get_link_rel_next
626
627
    @classmethod
628
    def get_first_comic_link(cls):
629
        """Get link to first comics."""
630
        return {'href': 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'}
631
632
    @classmethod
633
    def get_comic_info(cls, soup, link):
634
        """Get information about a particular comics."""
635
        date_str = soup.find('h2', class_='date-header').string
636
        day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
637
        imgs = soup.find('div', class_='entry-body').find_all('img')
638
        title = soup.find('h3', class_='entry-header').string
639
        return {
640
            'title': title,
641
            'img': [i['src'] for i in imgs],
642
            'month': day.month,
643
            'year': day.year,
644
            'day': day.day,
645
        }
646
647
648
class OneOneOneOneComic(GenericNavigableComic):
649
    """Class to retrieve 1111 Comics."""
650
    # Also on http://comics1111.tumblr.com
651
    # Also on https://tapastic.com/series/1111-Comics
652
    name = '1111'
653
    long_name = '1111 Comics'
654
    url = 'http://www.1111comics.me'
655
    get_first_comic_link = get_div_navfirst_a
656
    get_navi_link = get_link_rel_next
657
658
    @classmethod
659
    def get_comic_info(cls, soup, link):
660
        """Get information about a particular comics."""
661
        title = soup.find('h1', class_='comic-title').find('a').string
662
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
663
        day = string_to_date(date_str, "%B %d, %Y")
664
        imgs = soup.find_all('meta', property='og:image')
665
        return {
666
            'title': title,
667
            'month': day.month,
668
            'year': day.year,
669
            'day': day.day,
670
            'img': [i['content'] for i in imgs],
671
        }
672
673
674
class AngryAtNothing(GenericNavigableComic):
675
    """Class to retrieve Angry at Nothing comics."""
676
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
677
    name = 'angry'
678
    long_name = 'Angry At Nothing'
679
    url = 'http://www.angryatnothing.net'
680
    get_first_comic_link = get_div_navfirst_a
681
    get_navi_link = get_a_rel_next
682
683
    @classmethod
684
    def get_comic_info(cls, soup, link):
685
        """Get information about a particular comics."""
686
        title = soup.find('h1', class_='comic-title').find('a').string
687
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
688
        day = string_to_date(date_str, "%B %d, %Y")
689
        imgs = soup.find_all('meta', property='og:image')
690
        return {
691
            'title': title,
692
            'month': day.month,
693
            'year': day.year,
694
            'day': day.day,
695
            'img': [i['content'] for i in imgs],
696
        }
697
698
699
class NeDroid(GenericNavigableComic):
700
    """Class to retrieve NeDroid comics."""
701
    name = 'nedroid'
702
    long_name = 'NeDroid'
703
    url = 'http://nedroid.com'
704
    get_first_comic_link = get_div_navfirst_a
705
    get_navi_link = get_link_rel_next
706
    get_url_from_link = join_cls_url_to_href
707
708
    @classmethod
709
    def get_comic_info(cls, soup, link):
710
        """Get information about a particular comics."""
711
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
712
        comic_url_re = re.compile('//nedroid.com/comics/([0-9]*)-([0-9]*)-([0-9]*).*')
713
        short_url = cls.get_url_from_link(soup.find('link', rel='shortlink'))
714
        num = int(short_url_re.match(short_url).groups()[0])
715
        imgs = soup.find('div', id='comic').find_all('img')
716
        year, month, day = [int(s) for s in comic_url_re.match(imgs[0]['src']).groups()]
717
        assert len(imgs) == 1
718
        title = imgs[0]['alt']
719
        title2 = imgs[0]['title']
720
        return {
721
            'short_url': short_url,
722
            'title': title,
723
            'title2': title2,
724
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
725
            'day': day,
726
            'month': month,
727
            'year': year,
728
            'num': num,
729
        }
730
731
732
class Garfield(GenericNavigableComic):
733
    """Class to retrieve Garfield comics."""
734
    # Also on http://www.gocomics.com/garfield
735
    name = 'garfield'
736
    long_name = 'Garfield'
737
    url = 'https://garfield.com'
738
739
    @classmethod
740
    def get_first_comic_link(cls):
741
        """Get link to first comics."""
742
        return {'href': 'https://garfield.com/comic/1978/06/19'}
743
744
    @classmethod
745
    def get_navi_link(cls, last_soup, next_):
746
        return last_soup.find('a', class_='comic-arrow-right' if next_ else 'comic-arrow-left')
747
748
    @classmethod
749
    def get_comic_info(cls, soup, link):
750
        """Get information about a particular comics."""
751
        url = cls.get_url_from_link(link)
752
        date_re = re.compile('^%s/comic/([0-9]*)/([0-9]*)/([0-9]*)' % cls.url)
753
        year, month, day = [int(s) for s in date_re.match(url).groups()]
754
        imgs = soup.find('div', class_='comic-display').find_all('img', class_='img-responsive')
755
        return {
756
            'month': month,
757
            'year': year,
758
            'day': day,
759
            'img': [i['src'] for i in imgs],
760
        }
761
762
763 View Code Duplication
class Dilbert(GenericNavigableComic):
764
    """Class to retrieve Dilbert comics."""
765
    # Also on http://www.gocomics.com/dilbert-classics
766
    name = 'dilbert'
767
    long_name = 'Dilbert'
768
    url = 'http://dilbert.com'
769
    get_url_from_link = join_cls_url_to_href
770
771
    @classmethod
772
    def get_first_comic_link(cls):
773
        """Get link to first comics."""
774
        return {'href': 'http://dilbert.com/strip/1989-04-16'}
775
776
    @classmethod
777
    def get_navi_link(cls, last_soup, next_):
778
        link = last_soup.find('div', class_='nav-comic nav-right' if next_ else 'nav-comic nav-left')
779
        return link.find('a') if link else None
780
781
    @classmethod
782
    def get_comic_info(cls, soup, link):
783
        """Get information about a particular comics."""
784
        title = soup.find('meta', property='og:title')['content']
785
        imgs = soup.find_all('meta', property='og:image')
786
        desc = soup.find('meta', property='og:description')['content']
787
        date_str = soup.find('meta', property='article:publish_date')['content']
788
        day = string_to_date(date_str, "%B %d, %Y")
789
        author = soup.find('meta', property='article:author')['content']
790
        tags = soup.find('meta', property='article:tag')['content']
791
        return {
792
            'title': title,
793
            'description': desc,
794
            'img': [i['content'] for i in imgs],
795
            'author': author,
796
            'tags': tags,
797
            'day': day.day,
798
            'month': day.month,
799
            'year': day.year
800
        }
801
802
803
class VictimsOfCircumsolar(GenericNavigableComic):
804
    """Class to retrieve VictimsOfCircumsolar comics."""
805
    name = 'circumsolar'
806
    long_name = 'Victims Of Circumsolar'
807
    url = 'http://www.victimsofcircumsolar.com'
808
    get_navi_link = get_a_navi_comicnavnext_navinext
809
810
    @classmethod
811
    def get_first_comic_link(cls):
812
        """Get link to first comics."""
813
        return {'href': 'http://www.victimsofcircumsolar.com/comic/modern-addiction'}
814
815
    @classmethod
816
    def get_comic_info(cls, soup, link):
817
        """Get information about a particular comics."""
818
        # Date is on the archive page
819
        title = soup.find_all('meta', property='og:title')[-1]['content']
820
        desc = soup.find_all('meta', property='og:description')[-1]['content']
821
        imgs = soup.find('div', id='comic').find_all('img')
822
        assert all(i['title'] == i['alt'] == title for i in imgs)
823
        return {
824
            'title': title,
825
            'description': desc,
826
            'img': [i['src'] for i in imgs],
827
        }
828
829
830
class ThreeWordPhrase(GenericNavigableComic):
831
    """Class to retrieve Three Word Phrase comics."""
832
    # Also on http://www.threewordphrase.tumblr.com
833
    name = 'threeword'
834
    long_name = 'Three Word Phrase'
835
    url = 'http://threewordphrase.com'
836
    get_url_from_link = join_cls_url_to_href
837
838
    @classmethod
839
    def get_first_comic_link(cls):
840
        """Get link to first comics."""
841
        return get_soup_at_url(cls.url).find('img', src='/firstlink.gif').parent
842
843
    @classmethod
844
    def get_navi_link(cls, last_soup, next_):
845
        link = last_soup.find('img', src='/nextlink.gif' if next_ else '/prevlink.gif').parent
846
        return None if link.get('href') is None else link
847
848
    @classmethod
849
    def get_comic_info(cls, soup, link):
850
        """Get information about a particular comics."""
851
        title = soup.find('title')
852
        imgs = [img for img in soup.find_all('img')
853
                if not img['src'].endswith(
854
                    ('link.gif', '32.png', 'twpbookad.jpg',
855
                     'merchad.jpg', 'header.gif', 'tipjar.jpg'))]
856
        return {
857
            'title': title.string if title else None,
858
            'title2': '  '.join(img.get('alt') for img in imgs if img.get('alt')),
859
            'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs],
860
        }
861
862
863
class DeadlyPanel(GenericEmptyComic, GenericNavigableComic):
864
    """Class to retrieve Deadly Panel comics."""
865
    # Also on https://tapastic.com/series/deadlypanel
866
    name = 'deadly'
867
    long_name = 'Deadly Panel'
868
    url = 'http://www.deadlypanel.com'
869
    get_first_comic_link = get_a_navi_navifirst
870
    get_navi_link = get_a_navi_comicnavnext_navinext
871
872
    @classmethod
873
    def get_comic_info(cls, soup, link):
874
        """Get information about a particular comics."""
875
        imgs = soup.find('div', id='comic').find_all('img')
876
        assert all(i['alt'] == i['title'] for i in imgs)
877
        return {
878
            'img': [i['src'] for i in imgs],
879
        }
880
881
882 View Code Duplication
class TheGentlemanArmchair(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
883
    """Class to retrieve The Gentleman Armchair comics."""
884
    name = 'gentlemanarmchair'
885
    long_name = 'The Gentleman Armchair'
886
    url = 'http://thegentlemansarmchair.com'
887
    get_first_comic_link = get_a_navi_navifirst
888
    get_navi_link = get_link_rel_next
889
890
    @classmethod
891
    def get_comic_info(cls, soup, link):
892
        """Get information about a particular comics."""
893
        title = soup.find('h2', class_='post-title').string
894
        author = soup.find("span", class_="post-author").find("a").string
895
        date_str = soup.find('span', class_='post-date').string
896
        day = string_to_date(date_str, "%B %d, %Y")
897
        imgs = soup.find('div', id='comic').find_all('img')
898
        return {
899
            'img': [i['src'] for i in imgs],
900
            'title': title,
901
            'author': author,
902
            'month': day.month,
903
            'year': day.year,
904
            'day': day.day,
905
        }
906
907
908 View Code Duplication
class MyExtraLife(GenericNavigableComic):
909
    """Class to retrieve My Extra Life comics."""
910
    name = 'extralife'
911
    long_name = 'My Extra Life'
912
    url = 'http://www.myextralife.com'
913
    get_navi_link = get_link_rel_next
914
915
    @classmethod
916
    def get_first_comic_link(cls):
917
        """Get link to first comics."""
918
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
919
920
    @classmethod
921
    def get_comic_info(cls, soup, link):
922
        """Get information about a particular comics."""
923
        title = soup.find("h1", class_="comic_title").string
924
        date_str = soup.find("span", class_="comic_date").string
925
        day = string_to_date(date_str, "%B %d, %Y")
926
        imgs = soup.find_all("img", class_="comic")
927
        assert all(i['alt'] == i['title'] == title for i in imgs)
928
        return {
929
            'title': title,
930
            'img': [i['src'] for i in imgs if i["src"]],
931
            'day': day.day,
932
            'month': day.month,
933
            'year': day.year
934
        }
935
936
937
class SaturdayMorningBreakfastCereal(GenericNavigableComic):
938
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
939
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
940
    # Also on http://smbc-comics.tumblr.com
941
    name = 'smbc'
942
    long_name = 'Saturday Morning Breakfast Cereal'
943
    url = 'http://www.smbc-comics.com'
944
    get_navi_link = get_a_rel_next
945
946
    @classmethod
947
    def get_first_comic_link(cls):
948
        """Get link to first comics."""
949
        return get_soup_at_url(cls.url).find('a', rel='start')
950
951
    @classmethod
952
    def get_comic_info(cls, soup, link):
953
        """Get information about a particular comics."""
954
        image1 = soup.find('img', id='cc-comic')
955
        image_url1 = image1['src']
956
        aftercomic = soup.find('div', id='aftercomic')
957
        image_url2 = aftercomic.find('img')['src'] if aftercomic else ''
958
        imgs = [image_url1] + ([image_url2] if image_url2 else [])
959
        date_str = soup.find('div', class_='cc-publishtime').contents[0]
960
        day = string_to_date(date_str, "%B %d, %Y")
961
        return {
962
            'title': image1['title'],
963
            'img': [urljoin_wrapper(cls.url, i) for i in imgs],
964
            'day': day.day,
965
            'month': day.month,
966
            'year': day.year
967
        }
968
969
970
class PerryBibleFellowship(GenericListableComic):
971
    """Class to retrieve Perry Bible Fellowship comics."""
972
    name = 'pbf'
973
    long_name = 'Perry Bible Fellowship'
974
    url = 'http://pbfcomics.com'
975
    get_url_from_archive_element = join_cls_url_to_href
976
977
    @classmethod
978
    def get_archive_elements(cls):
979
        comic_link_re = re.compile('^/[0-9]*/$')
980
        return reversed(get_soup_at_url(cls.url).find_all('a', href=comic_link_re))
981
982
    @classmethod
983
    def get_comic_info(cls, soup, link):
984
        """Get information about a particular comics."""
985
        url = cls.get_url_from_archive_element(link)
986
        comic_img_re = re.compile('^/archive_b/PBF.*')
987
        name = link.string
988
        num = int(link['name'])
989
        href = link['href']
990
        assert href == '/%d/' % num
991
        imgs = soup.find_all('img', src=comic_img_re)
992
        assert len(imgs) == 1
993
        assert imgs[0]['alt'] == name
994
        return {
995
            'num': num,
996
            'name': name,
997
            'img': [urljoin_wrapper(url, i['src']) for i in imgs],
998
            'prefix': '%d-' % num,
999
        }
1000
1001
1002
class Mercworks(GenericNavigableComic):
1003
    """Class to retrieve Mercworks comics."""
1004
    # Also on http://mercworks.tumblr.com
1005
    name = 'mercworks'
1006
    long_name = 'Mercworks'
1007
    url = 'http://mercworks.net'
1008
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1009
    get_navi_link = get_a_rel_next
1010
1011 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1012
    def get_comic_info(cls, soup, link):
1013
        """Get information about a particular comics."""
1014
        title = soup.find('meta', property='og:title')['content']
1015
        metadesc = soup.find('meta', property='og:description')
1016
        desc = metadesc['content'] if metadesc else ""
1017
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1018
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1019
        date_str = date_str[:10]
1020
        day = string_to_date(date_str, "%Y-%m-%d")
1021
        imgs = soup.find_all('meta', property='og:image')
1022
        return {
1023
            'img': [i['content'] for i in imgs],
1024
            'title': title,
1025
            'author': author,
1026
            'desc': desc,
1027
            'day': day.day,
1028
            'month': day.month,
1029
            'year': day.year
1030
        }
1031
1032
1033
class BerkeleyMews(GenericListableComic):
1034
    """Class to retrieve Berkeley Mews comics."""
1035
    # Also on http://mews.tumblr.com
1036
    # Also on http://www.gocomics.com/berkeley-mews
1037
    name = 'berkeley'
1038
    long_name = 'Berkeley Mews'
1039
    url = 'http://www.berkeleymews.com'
1040
    get_url_from_archive_element = get_href
1041
    comic_num_re = re.compile('%s/\\?p=([0-9]*)$' % url)
1042
1043
    @classmethod
1044
    def get_archive_elements(cls):
1045
        archive_url = urljoin_wrapper(cls.url, "?page_id=2")
1046
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_num_re))
1047
1048
    @classmethod
1049
    def get_comic_info(cls, soup, link):
1050
        """Get information about a particular comics."""
1051
        comic_date_re = re.compile('.*/([0-9]*)-([0-9]*)-([0-9]*)-.*')
1052
        url = cls.get_url_from_archive_element(link)
1053
        num = int(cls.comic_num_re.match(url).groups()[0])
1054
        img = soup.find('div', id='comic').find('img')
1055
        assert all(i['alt'] == i['title'] for i in [img])
1056
        title2 = img['title']
1057
        img_url = img['src']
1058
        year, month, day = [int(s) for s in comic_date_re.match(img_url).groups()]
1059
        return {
1060
            'num': num,
1061
            'title': link.string,
1062
            'title2': title2,
1063
            'img': [img_url],
1064
            'year': year,
1065
            'month': month,
1066
            'day': day,
1067
        }
1068
1069
1070
class GenericBouletCorp(GenericNavigableComic):
1071
    """Generic class to retrieve BouletCorp comics in different languages."""
1072
    # Also on http://bouletcorp.tumblr.com
1073
    get_navi_link = get_link_rel_next
1074
1075
    @classmethod
1076
    def get_first_comic_link(cls):
1077
        """Get link to first comics."""
1078
        return get_soup_at_url(cls.url).find('div', id='centered_nav').find_all('a')[0]
1079
1080
    @classmethod
1081
    def get_comic_info(cls, soup, link):
1082
        """Get information about a particular comics."""
1083
        url = cls.get_url_from_link(link)
1084
        date_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/' % cls.url)
1085
        year, month, day = [int(s) for s in date_re.match(url).groups()]
1086
        imgs = soup.find('div', id='notes').find('div', class_='storycontent').find_all('img')
1087
        texts = '  '.join(t for t in (i.get('title') for i in imgs) if t)
1088
        title = soup.find('title').string
1089
        return {
1090
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs if i.get('src') is not None],
1091
            'title': title,
1092
            'texts': texts,
1093
            'year': year,
1094
            'month': month,
1095
            'day': day,
1096
        }
1097
1098
1099
class BouletCorp(GenericBouletCorp):
1100
    """Class to retrieve BouletCorp comics."""
1101
    name = 'boulet'
1102
    long_name = 'Boulet Corp'
1103
    url = 'http://www.bouletcorp.com'
1104
1105
1106
class BouletCorpEn(GenericBouletCorp):
1107
    """Class to retrieve EnglishBouletCorp comics."""
1108
    name = 'boulet_en'
1109
    long_name = 'Boulet Corp English'
1110
    url = 'http://english.bouletcorp.com'
1111
1112
1113
class AmazingSuperPowers(GenericNavigableComic):
1114
    """Class to retrieve Amazing Super Powers comics."""
1115
    name = 'asp'
1116
    long_name = 'Amazing Super Powers'
1117
    url = 'http://www.amazingsuperpowers.com'
1118
    get_first_comic_link = get_a_navi_navifirst
1119
    get_navi_link = get_a_navi_navinext
1120
1121
    @classmethod
1122
    def get_comic_info(cls, soup, link):
1123
        """Get information about a particular comics."""
1124
        author = soup.find("span", class_="post-author").find("a").string
1125
        date_str = soup.find('span', class_='post-date').string
1126
        day = string_to_date(date_str, "%B %d, %Y")
1127
        imgs = soup.find('div', id='comic').find_all('img')
1128
        title = ' '.join(i['title'] for i in imgs)
1129
        assert all(i['alt'] == i['title'] for i in imgs)
1130
        return {
1131
            'title': title,
1132
            'author': author,
1133
            'img': [img['src'] for img in imgs],
1134
            'day': day.day,
1135
            'month': day.month,
1136
            'year': day.year
1137
        }
1138
1139
1140
class ToonHole(GenericListableComic):
1141
    """Class to retrieve Toon Holes comics."""
1142
    # Also on http://tapastic.com/series/TOONHOLE
1143
    name = 'toonhole'
1144
    long_name = 'Toon Hole'
1145
    url = 'http://www.toonhole.com'
1146
    get_url_from_archive_element = get_href
1147
1148
    @classmethod
1149
    def get_comic_info(cls, soup, link):
1150
        """Get information about a particular comics."""
1151
        title = link.string
1152
        date_str = remove_st_nd_rd_th_from_date(soup.find('div', class_='comicdate').string.strip())
1153
        day = string_to_date(date_str, "%B %d, %Y")
1154
        imgs = soup.find('div', id='comic').find_all('img')
1155
        assert all(i['alt'] == i['title'] == title for i in imgs)
1156
        return {
1157
            'title': title,
1158
            'month': day.month,
1159
            'year': day.year,
1160
            'day': day.day,
1161
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs],
1162
        }
1163
1164
    @classmethod
1165
    def get_archive_elements(cls):
1166
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1167
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1168
1169
1170
class Channelate(GenericNavigableComic):
1171
    """Class to retrieve Channelate comics."""
1172
    name = 'channelate'
1173
    long_name = 'Channelate'
1174
    url = 'http://www.channelate.com'
1175
    get_first_comic_link = get_div_navfirst_a
1176
    get_navi_link = get_link_rel_next
1177
1178
    @classmethod
1179
    def get_comic_info(cls, soup, link):
1180
        """Get information about a particular comics."""
1181
        author = soup.find("span", class_="post-author").find("a").string
1182
        date_str = soup.find('span', class_='post-date').string
1183
        day = string_to_date(date_str, '%Y/%m/%d')
1184
        title = soup.find('meta', property='og:title')['content']
1185
        post = soup.find('div', id='comic')
1186
        imgs = post.find_all('img') if post else []
1187
        assert all(i['alt'] == i['title'] for i in imgs)
1188
        extra_url = None
1189
        extra_div = soup.find('div', id='extrapanelbutton')
1190
        if extra_div:
1191
            extra_url = extra_div.find('a')['href']
1192
            extra_soup = get_soup_at_url(extra_url)
1193
            extra_imgs = extra_soup.find_all('img', class_='extrapanelimage')
1194
            imgs.extend(extra_imgs)
1195
        return {
1196
            'url_extra': extra_url,
1197
            'title': title,
1198
            'author': author,
1199
            'month': day.month,
1200
            'year': day.year,
1201
            'day': day.day,
1202
            'img': [i['src'] for i in imgs],
1203
        }
1204
1205
1206
class CyanideAndHappiness(GenericNavigableComic):
1207
    """Class to retrieve Cyanide And Happiness comics."""
1208
    name = 'cyanide'
1209
    long_name = 'Cyanide and Happiness'
1210
    url = 'http://explosm.net'
1211
    get_url_from_link = join_cls_url_to_href
1212
1213
    @classmethod
1214
    def get_first_comic_link(cls):
1215
        """Get link to first comics."""
1216
        return get_soup_at_url(cls.url).find('a', title='Oldest comic')
1217
1218
    @classmethod
1219
    def get_navi_link(cls, last_soup, next_):
1220
        link = last_soup.find('a', class_='next-comic' if next_ else 'previous-comic ')
1221
        return None if link.get('href') is None else link
1222
1223
    @classmethod
1224
    def get_comic_info(cls, soup, link):
1225
        """Get information about a particular comics."""
1226
        url2 = soup.find('meta', property='og:url')['content']
1227
        num = int(url2.split('/')[-2])
1228
        date_str = soup.find('h3').find('a').string
1229
        day = string_to_date(date_str, '%Y.%m.%d')
1230
        author = soup.find('small', class_="author-credit-name").string
1231
        assert author.startswith('by ')
1232
        author = author[3:]
1233
        imgs = soup.find_all('img', id='main-comic')
1234
        return {
1235
            'num': num,
1236
            'author': author,
1237
            'month': day.month,
1238
            'year': day.year,
1239
            'day': day.day,
1240
            'prefix': '%d-' % num,
1241
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i['src'])) for i in imgs]
1242
        }
1243
1244
1245
class MrLovenstein(GenericComic):
1246
    """Class to retrieve Mr Lovenstein comics."""
1247
    # Also on https://tapastic.com/series/MrLovenstein
1248
    name = 'mrlovenstein'
1249
    long_name = 'Mr. Lovenstein'
1250
    url = 'http://www.mrlovenstein.com'
1251
1252
    @classmethod
1253
    def get_next_comic(cls, last_comic):
1254
        # TODO: more info from http://www.mrlovenstein.com/archive
1255
        comic_num_re = re.compile('^/comic/([0-9]*)$')
1256
        nums = [int(comic_num_re.match(link['href']).groups()[0])
1257
                for link in get_soup_at_url(cls.url).find_all('a', href=comic_num_re)]
1258
        first, last = min(nums), max(nums)
1259
        if last_comic:
1260
            first = last_comic['num'] + 1
1261
        for num in range(first, last + 1):
1262
            url = urljoin_wrapper(cls.url, '/comic/%d' % num)
1263
            soup = get_soup_at_url(url)
1264
            imgs = list(
1265
                reversed(soup.find_all('img', src=re.compile('^/images/comics/'))))
1266
            yield {
1267
                'url': url,
1268
                'num': num,
1269
                'texts': '  '.join(t for t in (i.get('title') for i in imgs) if t),
1270
                'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1271
            }
1272
1273
1274
class DinosaurComics(GenericListableComic):
1275
    """Class to retrieve Dinosaur Comics comics."""
1276
    name = 'dinosaur'
1277
    long_name = 'Dinosaur Comics'
1278
    url = 'http://www.qwantz.com'
1279
    get_url_from_archive_element = get_href
1280
    comic_link_re = re.compile('^%s/index.php\\?comic=([0-9]*)$' % url)
1281
1282
    @classmethod
1283
    def get_archive_elements(cls):
1284
        archive_url = urljoin_wrapper(cls.url, 'archive.php')
1285
        # first link is random -> skip it
1286
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re)[1:])
1287
1288
    @classmethod
1289
    def get_comic_info(cls, soup, link):
1290
        """Get information about a particular comics."""
1291
        url = cls.get_url_from_archive_element(link)
1292
        num = int(cls.comic_link_re.match(url).groups()[0])
1293
        date_str = link.string
1294
        text = link.next_sibling.string
1295
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1296
        comic_img_re = re.compile('^%s/comics/' % cls.url)
1297
        img = soup.find('img', src=comic_img_re)
1298
        return {
1299
            'month': day.month,
1300
            'year': day.year,
1301
            'day': day.day,
1302
            'img': [img.get('src')],
1303
            'title': img.get('title'),
1304
            'text': text,
1305
            'num': num,
1306
        }
1307
1308
1309 View Code Duplication
class ButterSafe(GenericListableComic):
1310
    """Class to retrieve Butter Safe comics."""
1311
    name = 'butter'
1312
    long_name = 'ButterSafe'
1313
    url = 'http://buttersafe.com'
1314
    get_url_from_archive_element = get_href
1315
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)
1316
1317
    @classmethod
1318
    def get_archive_elements(cls):
1319
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1320
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
1321
1322
    @classmethod
1323
    def get_comic_info(cls, soup, link):
1324
        """Get information about a particular comics."""
1325
        url = cls.get_url_from_archive_element(link)
1326
        title = link.string
1327
        year, month, day = [int(s) for s in cls.comic_link_re.match(url).groups()]
1328
        img = soup.find('div', id='comic').find('img')
1329
        assert img['alt'] == title
1330
        return {
1331
            'title': title,
1332
            'day': day,
1333
            'month': month,
1334
            'year': year,
1335
            'img': [img['src']],
1336
        }
1337
1338
1339
class CalvinAndHobbes(GenericComic):
1340
    """Class to retrieve Calvin and Hobbes comics."""
1341
    # Also on http://www.gocomics.com/calvinandhobbes/
1342
    name = 'calvin'
1343
    long_name = 'Calvin and Hobbes'
1344
    # This is not through any official webpage but eh...
1345
    url = 'http://marcel-oehler.marcellosendos.ch/comics/ch/'
1346
1347
    @classmethod
1348
    def get_next_comic(cls, last_comic):
1349
        last_date = get_date_for_comic(
1350
            last_comic) if last_comic else date(1985, 11, 1)
1351
        link_re = re.compile('^([0-9]*)/([0-9]*)/')
1352
        img_re = re.compile('')
1353
        for link in get_soup_at_url(cls.url).find_all('a', href=link_re):
1354
            url = link['href']
1355
            year, month = link_re.match(url).groups()
1356
            if date(int(year), int(month), 1) + timedelta(days=31) >= last_date:
1357
                img_re = re.compile('^%s%s([0-9]*)' % (year, month))
1358
                month_url = urljoin_wrapper(cls.url, url)
1359
                for img in get_soup_at_url(month_url).find_all('img', src=img_re):
1360
                    img_src = img['src']
1361
                    day = int(img_re.match(img_src).groups()[0])
1362
                    comic_date = date(int(year), int(month), day)
1363
                    if comic_date > last_date:
1364
                        yield {
1365
                            'url': month_url,
1366
                            'year': int(year),
1367
                            'month': int(month),
1368
                            'day': int(day),
1369
                            'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)],
1370
                        }
1371
                        last_date = comic_date
1372
1373
1374 View Code Duplication
class AbstruseGoose(GenericListableComic):
1375
    """Class to retrieve AbstruseGoose Comics."""
1376
    name = 'abstruse'
1377
    long_name = 'Abstruse Goose'
1378
    url = 'http://abstrusegoose.com'
1379
    get_url_from_archive_element = get_href
1380
    comic_url_re = re.compile('^%s/([0-9]*)$' % url)
1381
    comic_img_re = re.compile('^%s/strips/.*' % url)
1382
1383
    @classmethod
1384
    def get_archive_elements(cls):
1385
        archive_url = urljoin_wrapper(cls.url, 'archive')
1386
        return get_soup_at_url(archive_url).find_all('a', href=cls.comic_url_re)
1387
1388
    @classmethod
1389
    def get_comic_info(cls, soup, archive_elt):
1390
        comic_url = cls.get_url_from_archive_element(archive_elt)
1391
        num = int(cls.comic_url_re.match(comic_url).groups()[0])
1392
        return {
1393
            'num': num,
1394
            'title': archive_elt.string,
1395
            'img': [soup.find('img', src=cls.comic_img_re)['src']]
1396
        }
1397
1398
1399
class PhDComics(GenericNavigableComic):
1400
    """Class to retrieve PHD Comics."""
1401
    name = 'phd'
1402
    long_name = 'PhD Comics'
1403
    url = 'http://phdcomics.com/comics/archive.php'
1404
    get_url_from_link = join_cls_url_to_href
1405
1406
    @classmethod
1407
    def get_first_comic_link(cls):
1408
        """Get link to first comics."""
1409
        return get_soup_at_url(cls.url).find('img', src='images/first_button.gif').parent
1410
1411
    @classmethod
1412
    def get_navi_link(cls, last_soup, next_):
1413
        img = last_soup.find('img', src='images/next_button.gif' if next_ else 'images/prev_button.gif')
1414
        return None if img is None else img.parent
1415
1416
    @classmethod
1417
    def get_comic_info(cls, soup, link):
1418
        """Get information about a particular comics."""
1419
        date_str = soup.find('font', face='Arial,Helvetica,Geneva,Swiss,SunSans-Regular', color='white').string.strip()
1420
        try:
1421
            day = string_to_date(date_str, '%m/%d/%Y')
1422
        except ValueError:
1423
            print("Invalid date %s" % date_str)
1424
            day = date.today()
1425
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
1426
        return {
1427
            'year': day.year,
1428
            'month': day.month,
1429
            'day': day.day,
1430
            'img': [soup.find('img', id='comic')['src']],
1431
            'title': title,
1432
        }
1433
1434 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1435
class Octopuns(GenericNavigableComic):
1436
    """Class to retrieve Octopuns comics."""
1437
    # Also on http://octopuns.tumblr.com
1438
    name = 'octopuns'
1439
    long_name = 'Octopuns'
1440
    url = 'http://www.octopuns.net'
1441
1442
    @classmethod
1443
    def get_first_comic_link(cls):
1444
        """Get link to first comics."""
1445
        return get_soup_at_url(cls.url).find('img', src=re.compile('.*/First.png')).parent
1446
1447
    @classmethod
1448
    def get_navi_link(cls, last_soup, next_):
1449
        link = last_soup.find('img', src=re.compile('.*/Next.png' if next_ else '.*/Back.png')).parent
1450
        return None if link.get('href') is None else link
1451
1452
    @classmethod
1453
    def get_comic_info(cls, soup, link):
1454
        """Get information about a particular comics."""
1455
        title = soup.find('h3', class_='post-title entry-title').string
1456
        date_str = soup.find('h2', class_='date-header').string
1457
        day = string_to_date(date_str, "%A, %B %d, %Y")
1458
        imgs = soup.find_all('link', rel='image_src')
1459
        return {
1460
            'img': [i['href'] for i in imgs],
1461
            'title': title,
1462
            'day': day.day,
1463
            'month': day.month,
1464
            'year': day.year,
1465
        }
1466
1467 View Code Duplication
1468
class Quarktees(GenericNavigableComic):
1469
    """Class to retrieve the Quarktees comics."""
1470
    name = 'quarktees'
1471
    long_name = 'Quarktees'
1472
    url = 'http://www.quarktees.com/blogs/news'
1473
    get_url_from_link = join_cls_url_to_href
1474
1475
    @classmethod
1476
    def get_first_comic_link(cls):
1477
        """Get link to first comics."""
1478
        return {'href': 'http://www.quarktees.com/blogs/news/12486621-coming-soon'}
1479
1480
    @classmethod
1481
    def get_navi_link(cls, last_soup, next_):
1482
        return last_soup.find('a', id='article-next' if next_ else 'article-prev')
1483
1484
    @classmethod
1485
    def get_comic_info(cls, soup, link):
1486
        """Get information about a particular comics."""
1487
        title = soup.find('meta', property='og:title')['content']
1488
        article = soup.find('div', class_='single-article')
1489
        imgs = article.find_all('img')
1490
        return {
1491
            'title': title,
1492
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1493
        }
1494
1495
1496
class OverCompensating(GenericNavigableComic):
1497
    """Class to retrieve the Over Compensating comics."""
1498
    name = 'compensating'
1499
    long_name = 'Over Compensating'
1500
    url = 'http://www.overcompensating.com'
1501
    get_url_from_link = join_cls_url_to_href
1502
1503
    @classmethod
1504
    def get_first_comic_link(cls):
1505
        """Get link to first comics."""
1506
        return get_soup_at_url(cls.url).find('a', href=re.compile('comic=1$'))
1507
1508
    @classmethod
1509
    def get_navi_link(cls, last_soup, next_):
1510
        return last_soup.find('a', title='next comic' if next_ else 'go back already')
1511
1512
    @classmethod
1513
    def get_comic_info(cls, soup, link):
1514
        """Get information about a particular comics."""
1515
        img_src_re = re.compile('^/oc/comics/.*')
1516
        comic_num_re = re.compile('.*comic=([0-9]*)$')
1517
        comic_url = cls.get_url_from_link(link)
1518
        num = int(comic_num_re.match(comic_url).groups()[0])
1519
        img = soup.find('img', src=img_src_re)
1520
        return {
1521
            'num': num,
1522
            'img': [urljoin_wrapper(comic_url, img['src'])],
1523
            'title': img.get('title')
1524
        }
1525
1526
1527
class Oglaf(GenericNavigableComic):
1528
    """Class to retrieve Oglaf comics."""
1529
    name = 'oglaf'
1530
    long_name = 'Oglaf [NSFW]'
1531
    url = 'http://oglaf.com'
1532
    get_url_from_link = join_cls_url_to_href
1533
1534
    @classmethod
1535
    def get_first_comic_link(cls):
1536
        """Get link to first comics."""
1537
        return get_soup_at_url(cls.url).find("div", id="st").parent
1538
1539
    @classmethod
1540
    def get_navi_link(cls, last_soup, next_):
1541
        div = last_soup.find("div", id="nx" if next_ else "pvs")
1542
        return div.parent if div else None
1543
1544
    @classmethod
1545
    def get_comic_info(cls, soup, link):
1546
        """Get information about a particular comics."""
1547
        title = soup.find('title').string
1548
        title_imgs = soup.find('div', id='tt').find_all('img')
1549
        assert len(title_imgs) == 1
1550
        strip_imgs = soup.find_all('img', id='strip')
1551
        assert len(strip_imgs) == 1
1552
        imgs = title_imgs + strip_imgs
1553
        desc = ' '.join(i['title'] for i in imgs)
1554
        return {
1555
            'title': title,
1556
            'img': [i['src'] for i in imgs],
1557
            'description': desc,
1558
        }
1559
1560 View Code Duplication
1561
class ScandinaviaAndTheWorld(GenericNavigableComic):
1562
    """Class to retrieve Scandinavia And The World comics."""
1563
    name = 'satw'
1564
    long_name = 'Scandinavia And The World'
1565
    url = 'http://satwcomic.com'
1566
1567
    @classmethod
1568
    def get_first_comic_link(cls):
1569
        """Get link to first comics."""
1570
        return {'href': 'http://satwcomic.com/sweden-denmark-and-norway'}
1571
1572
    @classmethod
1573
    def get_navi_link(cls, last_soup, next_):
1574
        return last_soup.find('a', accesskey='n' if next_ else 'p')
1575
1576
    @classmethod
1577
    def get_comic_info(cls, soup, link):
1578
        """Get information about a particular comics."""
1579
        title = soup.find('meta', attrs={'name': 'twitter:label1'})['content']
1580
        desc = soup.find('meta', property='og:description')['content']
1581
        imgs = soup.find_all('img', itemprop="image")
1582
        return {
1583
            'title': title,
1584
            'description': desc,
1585
            'img': [i['src'] for i in imgs],
1586
        }
1587
1588
1589
class SomethingOfThatIlk(GenericEmptyComic):  # Does not exist anymore
1590
    """Class to retrieve the Something Of That Ilk comics."""
1591
    name = 'somethingofthatilk'
1592
    long_name = 'Something Of That Ilk'
1593
    url = 'http://www.somethingofthatilk.com'
1594
1595
1596
class InfiniteMonkeyBusiness(GenericNavigableComic):
1597
    """Generic class to retrieve InfiniteMonkeyBusiness comics."""
1598
    name = 'monkey'
1599
    long_name = 'Infinite Monkey Business'
1600
    url = 'http://infinitemonkeybusiness.net'
1601
    get_navi_link = get_a_navi_comicnavnext_navinext
1602
1603
    @classmethod
1604
    def get_first_comic_link(cls):
1605
        """Get link to first comics."""
1606
        return {'href': 'http://infinitemonkeybusiness.net/comic/pillory/'}
1607
1608
    @classmethod
1609
    def get_comic_info(cls, soup, link):
1610
        """Get information about a particular comics."""
1611
        title = soup.find('meta', property='og:title')['content']
1612
        imgs = soup.find('div', id='comic').find_all('img')
1613
        return {
1614
            'title': title,
1615
            'img': [i['src'] for i in imgs],
1616
        }
1617
1618
1619
class Wondermark(GenericListableComic):
1620
    """Class to retrieve the Wondermark comics."""
1621
    name = 'wondermark'
1622
    long_name = 'Wondermark'
1623
    url = 'http://wondermark.com'
1624
    get_url_from_archive_element = get_href
1625
1626
    @classmethod
1627
    def get_archive_elements(cls):
1628
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1629
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1630
1631
    @classmethod
1632
    def get_comic_info(cls, soup, link):
1633
        """Get information about a particular comics."""
1634
        date_str = soup.find('div', class_='postdate').find('em').string
1635
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1636
        div = soup.find('div', id='comic')
1637
        if div:
1638
            img = div.find('img')
1639
            img_src = [img['src']]
1640
            alt = img['alt']
1641
            assert alt == img['title']
1642
            title = soup.find('meta', property='og:title')['content']
1643
        else:
1644
            img_src = []
1645
            alt = ''
1646
            title = ''
1647
        return {
1648
            'month': day.month,
1649
            'year': day.year,
1650
            'day': day.day,
1651
            'img': img_src,
1652
            'title': title,
1653
            'alt': alt,
1654
            'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')),
1655
        }
1656
1657
1658
class WarehouseComic(GenericNavigableComic):
1659
    """Class to retrieve Warehouse Comic comics."""
1660
    name = 'warehouse'
1661
    long_name = 'Warehouse Comic'
1662
    url = 'http://warehousecomic.com'
1663
    get_first_comic_link = get_a_navi_navifirst
1664
    get_navi_link = get_link_rel_next
1665
1666
    @classmethod
1667
    def get_comic_info(cls, soup, link):
1668
        """Get information about a particular comics."""
1669
        title = soup.find('h2', class_='post-title').string
1670
        date_str = soup.find('span', class_='post-date').string
1671
        day = string_to_date(date_str, "%B %d, %Y")
1672
        imgs = soup.find('div', id='comic').find_all('img')
1673
        return {
1674
            'img': [i['src'] for i in imgs],
1675
            'title': title,
1676
            'day': day.day,
1677
            'month': day.month,
1678
            'year': day.year,
1679
        }
1680
1681
1682
class JustSayEh(GenericNavigableComic):
1683
    """Class to retrieve Just Say Eh comics."""
1684
    # Also on http//tapastic.com/series/Just-Say-Eh
1685
    name = 'justsayeh'
1686
    long_name = 'Just Say Eh'
1687
    url = 'http://www.justsayeh.com'
1688
    get_first_comic_link = get_a_navi_navifirst
1689
    get_navi_link = get_a_navi_comicnavnext_navinext
1690
1691
    @classmethod
1692
    def get_comic_info(cls, soup, link):
1693
        """Get information about a particular comics."""
1694
        title = soup.find('h2', class_='post-title').string
1695
        imgs = soup.find("div", id="comic").find_all("img")
1696
        assert all(i['alt'] == i['title'] for i in imgs)
1697
        alt = imgs[0]['alt']
1698
        return {
1699
            'img': [i['src'] for i in imgs],
1700
            'title': title,
1701
            'alt': alt,
1702
        }
1703
1704
1705 View Code Duplication
class MouseBearComedy(GenericNavigableComic):
1706
    """Class to retrieve Mouse Bear Comedy comics."""
1707
    # Also on http://mousebearcomedy.tumblr.com
1708
    name = 'mousebear'
1709
    long_name = 'Mouse Bear Comedy'
1710
    url = 'http://www.mousebearcomedy.com'
1711
    get_first_comic_link = get_a_navi_navifirst
1712
    get_navi_link = get_a_navi_comicnavnext_navinext
1713
1714
    @classmethod
1715
    def get_comic_info(cls, soup, link):
1716
        """Get information about a particular comics."""
1717
        title = soup.find('h2', class_='post-title').string
1718
        author = soup.find("span", class_="post-author").find("a").string
1719
        date_str = soup.find("span", class_="post-date").string
1720
        day = string_to_date(date_str, '%B %d, %Y')
1721
        imgs = soup.find("div", id="comic").find_all("img")
1722
        assert all(i['alt'] == i['title'] == title for i in imgs)
1723
        return {
1724
            'day': day.day,
1725
            'month': day.month,
1726
            'year': day.year,
1727
            'img': [i['src'] for i in imgs],
1728
            'title': title,
1729
            'author': author,
1730
        }
1731
1732 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1733
class BigFootJustice(GenericNavigableComic):
1734
    """Class to retrieve Big Foot Justice comics."""
1735
    # Also on http://tapastic.com/series/bigfoot-justice
1736
    name = 'bigfoot'
1737
    long_name = 'Big Foot Justice'
1738
    url = 'http://bigfootjustice.com'
1739
    get_first_comic_link = get_a_navi_navifirst
1740
    get_navi_link = get_a_navi_comicnavnext_navinext
1741
1742
    @classmethod
1743
    def get_comic_info(cls, soup, link):
1744
        """Get information about a particular comics."""
1745
        imgs = soup.find('div', id='comic').find_all('img')
1746
        assert all(i['title'] == i['alt'] for i in imgs)
1747
        title = ' '.join(i['title'] for i in imgs)
1748
        return {
1749
            'img': [i['src'] for i in imgs],
1750
            'title': title,
1751
        }
1752
1753
1754
class RespawnComic(GenericNavigableComic):
1755
    """Class to retrieve Respawn Comic."""
1756
    # Also on http://respawncomic.tumblr.com
1757
    name = 'respawn'
1758
    long_name = 'Respawn Comic'
1759
    url = 'http://respawncomic.com '
1760
    get_navi_link = get_a_navi_comicnavnext_navinext
1761
1762
    @classmethod
1763
    def get_first_comic_link(cls):
1764
        """Get link to first comics."""
1765
        return {'href': 'http://respawncomic.com/comic/c0001/'}
1766 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1767
    @classmethod
1768
    def get_comic_info(cls, soup, link):
1769
        """Get information about a particular comics."""
1770
        title = soup.find('meta', property='og:title')['content']
1771
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1772
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1773
        date_str = date_str[:10]
1774
        day = string_to_date(date_str, "%Y-%m-%d")
1775
        imgs = soup.find_all('meta', property='og:image')
1776
        skip_imgs = {
1777
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1778
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1779
        }
1780
        return {
1781
            'title': title,
1782
            'author': author,
1783
            'day': day.day,
1784
            'month': day.month,
1785
            'year': day.year,
1786
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1787
        }
1788
1789
1790 View Code Duplication
class SafelyEndangered(GenericNavigableComic):
1791
    """Class to retrieve Safely Endangered comics."""
1792
    # Also on http://tumblr.safelyendangered.com
1793
    name = 'endangered'
1794
    long_name = 'Safely Endangered'
1795
    url = 'http://www.safelyendangered.com'
1796
    get_navi_link = get_link_rel_next
1797
1798
    @classmethod
1799
    def get_first_comic_link(cls):
1800
        """Get link to first comics."""
1801
        return {'href': 'http://www.safelyendangered.com/comic/ignored/'}
1802
1803
    @classmethod
1804
    def get_comic_info(cls, soup, link):
1805
        """Get information about a particular comics."""
1806
        title = soup.find('h2', class_='post-title').string
1807
        date_str = soup.find('span', class_='post-date').string
1808
        day = string_to_date(date_str, '%B %d, %Y')
1809
        imgs = soup.find('div', id='comic').find_all('img')
1810
        alt = imgs[0]['alt']
1811
        assert all(i['alt'] == i['title'] for i in imgs)
1812
        return {
1813
            'day': day.day,
1814
            'month': day.month,
1815
            'year': day.year,
1816
            'img': [i['src'] for i in imgs],
1817
            'title': title,
1818
            'alt': alt,
1819
        }
1820
1821
1822 View Code Duplication
class PicturesInBoxes(GenericNavigableComic):
1823
    """Class to retrieve Pictures In Boxes comics."""
1824
    # Also on http://picturesinboxescomic.tumblr.com
1825
    name = 'picturesinboxes'
1826
    long_name = 'Pictures in Boxes'
1827
    url = 'http://www.picturesinboxes.com'
1828
    get_navi_link = get_a_navi_navinext
1829
1830
    @classmethod
1831
    def get_first_comic_link(cls):
1832
        """Get link to first comics."""
1833
        return {'href': 'http://www.picturesinboxes.com/2013/10/26/tetris/'}
1834
1835
    @classmethod
1836
    def get_comic_info(cls, soup, link):
1837
        """Get information about a particular comics."""
1838
        title = soup.find('h2', class_='post-title').string
1839
        author = soup.find("span", class_="post-author").find("a").string
1840
        date_str = soup.find('span', class_='post-date').string
1841
        day = string_to_date(date_str, '%B %d, %Y')
1842
        imgs = soup.find('div', class_='comicpane').find_all('img')
1843
        assert imgs
1844
        assert all(i['title'] == i['alt'] == title for i in imgs)
1845
        return {
1846
            'day': day.day,
1847
            'month': day.month,
1848
            'year': day.year,
1849
            'img': [i['src'] for i in imgs],
1850
            'title': title,
1851
            'author': author,
1852
        }
1853
1854
1855
class Penmen(GenericEmptyComic):
1856
    """Class to retrieve Penmen comics."""
1857
    name = 'penmen'
1858
    long_name = 'Penmen'
1859
    url = 'http://penmen.com'
1860
1861
1862
class TheDoghouseDiaries(GenericNavigableComic):
1863
    """Class to retrieve The Dog House Diaries comics."""
1864
    name = 'doghouse'
1865
    long_name = 'The Dog House Diaries'
1866
    url = 'http://thedoghousediaries.com'
1867
1868
    @classmethod
1869
    def get_first_comic_link(cls):
1870
        """Get link to first comics."""
1871
        return get_soup_at_url(cls.url).find('a', id='firstlink')
1872
1873
    @classmethod
1874
    def get_navi_link(cls, last_soup, next_):
1875
        return last_soup.find('a', id='nextlink' if next_ else 'previouslink')
1876
1877
    @classmethod
1878
    def get_comic_info(cls, soup, link):
1879
        """Get information about a particular comics."""
1880
        comic_img_re = re.compile('^dhdcomics/.*')
1881
        img = soup.find('img', src=comic_img_re)
1882
        comic_url = cls.get_url_from_link(link)
1883
        return {
1884
            'title': soup.find('h2', id='titleheader').string,
1885
            'title2': soup.find('div', id='subtext').string,
1886
            'alt': img.get('title'),
1887
            'img': [urljoin_wrapper(comic_url, img['src'].strip())],
1888
            'num': int(comic_url.split('/')[-1]),
1889
        }
1890
1891
1892
class InvisibleBread(GenericListableComic):
1893
    """Class to retrieve Invisible Bread comics."""
1894
    # Also on http://www.gocomics.com/invisible-bread
1895
    name = 'invisiblebread'
1896
    long_name = 'Invisible Bread'
1897
    url = 'http://invisiblebread.com'
1898
1899
    @classmethod
1900
    def get_archive_elements(cls):
1901
        archive_url = urljoin_wrapper(cls.url, 'archives/')
1902
        return reversed(get_soup_at_url(archive_url).find_all('td', class_='archive-title'))
1903
1904
    @classmethod
1905
    def get_url_from_archive_element(cls, td):
1906
        return td.find('a')['href']
1907
1908
    @classmethod
1909
    def get_comic_info(cls, soup, td):
1910
        """Get information about a particular comics."""
1911
        url = cls.get_url_from_archive_element(td)
1912
        title = td.find('a').string
1913
        month_and_day = td.previous_sibling.string
1914
        link_re = re.compile('^%s/([0-9]+)/' % cls.url)
1915
        year = link_re.match(url).groups()[0]
1916
        date_str = month_and_day + ' ' + year
1917
        day = string_to_date(date_str, '%b %d %Y')
1918
        imgs = [soup.find('div', id='comic').find('img')]
1919
        assert len(imgs) == 1
1920
        assert all(i['title'] == i['alt'] == title for i in imgs)
1921
        return {
1922
            'month': day.month,
1923
            'year': day.year,
1924
            'day': day.day,
1925
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1926
            'title': title,
1927
        }
1928
1929
1930
class DiscoBleach(GenericEmptyComic):  # Does not work anymore
1931
    """Class to retrieve Disco Bleach Comics."""
1932
    name = 'discobleach'
1933
    long_name = 'Disco Bleach'
1934
    url = 'http://discobleach.com'
1935
1936
1937
class TubeyToons(GenericEmptyComic):  # Does not work anymore
1938
    """Class to retrieve TubeyToons comics."""
1939
    # Also on http://tapastic.com/series/Tubey-Toons
1940
    # Also on http://tubeytoons.tumblr.com
1941
    name = 'tubeytoons'
1942
    long_name = 'Tubey Toons'
1943
    url = 'http://tubeytoons.com'
1944
1945
1946
class CompletelySeriousComics(GenericNavigableComic):
1947
    """Class to retrieve Completely Serious comics."""
1948
    name = 'completelyserious'
1949
    long_name = 'Completely Serious Comics'
1950
    url = 'http://completelyseriouscomics.com'
1951
    get_first_comic_link = get_a_navi_navifirst
1952
    get_navi_link = get_a_navi_navinext
1953
1954
    @classmethod
1955
    def get_comic_info(cls, soup, link):
1956
        """Get information about a particular comics."""
1957
        title = soup.find('h2', class_='post-title').string
1958
        author = soup.find('span', class_='post-author').contents[1].string
1959
        date_str = soup.find('span', class_='post-date').string
1960
        day = string_to_date(date_str, '%B %d, %Y')
1961
        imgs = soup.find('div', class_='comicpane').find_all('img')
1962
        assert imgs
1963
        alt = imgs[0]['title']
1964
        assert all(i['title'] == i['alt'] == alt for i in imgs)
1965
        return {
1966
            'month': day.month,
1967
            'year': day.year,
1968
            'day': day.day,
1969
            'img': [i['src'] for i in imgs],
1970
            'title': title,
1971
            'alt': alt,
1972
            'author': author,
1973
        }
1974
1975
1976
class PoorlyDrawnLines(GenericListableComic):
1977
    """Class to retrieve Poorly Drawn Lines comics."""
1978
    # Also on http://pdlcomics.tumblr.com
1979
    name = 'poorlydrawn'
1980
    long_name = 'Poorly Drawn Lines'
1981
    url = 'http://poorlydrawnlines.com'
1982
    get_url_from_archive_element = get_href
1983
1984
    @classmethod
1985
    def get_comic_info(cls, soup, link):
1986
        """Get information about a particular comics."""
1987
        imgs = soup.find('div', class_='post').find_all('img')
1988
        assert len(imgs) <= 1
1989
        return {
1990
            'img': [i['src'] for i in imgs],
1991
            'title': imgs[0].get('title', "") if imgs else "",
1992
        }
1993
1994
    @classmethod
1995
    def get_archive_elements(cls):
1996
        archive_url = urljoin_wrapper(cls.url, 'archive')
1997
        url_re = re.compile('^%s/comic/.' % cls.url)
1998
        return reversed(get_soup_at_url(archive_url).find_all('a', href=url_re))
1999
2000 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2001
class LoadingComics(GenericNavigableComic):
2002
    """Class to retrieve Loading Artist comics."""
2003
    name = 'loadingartist'
2004
    long_name = 'Loading Artist'
2005
    url = 'http://www.loadingartist.com/latest'
2006
2007
    @classmethod
2008
    def get_first_comic_link(cls):
2009
        """Get link to first comics."""
2010
        return get_soup_at_url(cls.url).find('a', title="First")
2011
2012
    @classmethod
2013
    def get_navi_link(cls, last_soup, next_):
2014
        return last_soup.find('a', title='Next' if next_ else 'Previous')
2015
2016
    @classmethod
2017
    def get_comic_info(cls, soup, link):
2018
        """Get information about a particular comics."""
2019
        title = soup.find('h1').string
2020
        date_str = soup.find('span', class_='date').string.strip()
2021
        day = string_to_date(date_str, "%B %d, %Y")
2022
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
2023
        return {
2024
            'title': title,
2025
            'img': [i['src'] for i in imgs],
2026
            'month': day.month,
2027
            'year': day.year,
2028
            'day': day.day,
2029
        }
2030
2031
2032
class ChuckleADuck(GenericNavigableComic):
2033
    """Class to retrieve Chuckle-A-Duck comics."""
2034
    name = 'chuckleaduck'
2035
    long_name = 'Chuckle-A-duck'
2036
    url = 'http://chuckleaduck.com'
2037
    get_first_comic_link = get_div_navfirst_a
2038
    get_navi_link = get_link_rel_next
2039
2040
    @classmethod
2041
    def get_comic_info(cls, soup, link):
2042
        """Get information about a particular comics."""
2043
        date_str = soup.find('span', class_='post-date').string
2044
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
2045
        author = soup.find('span', class_='post-author').string
2046
        div = soup.find('div', id='comic')
2047
        imgs = div.find_all('img') if div else []
2048
        title = imgs[0]['title'] if imgs else ""
2049
        assert all(i['title'] == i['alt'] == title for i in imgs)
2050
        return {
2051
            'month': day.month,
2052
            'year': day.year,
2053
            'day': day.day,
2054
            'img': [i['src'] for i in imgs],
2055
            'title': title,
2056
            'author': author,
2057
        }
2058
2059 View Code Duplication
2060
class DepressedAlien(GenericNavigableComic):
2061
    """Class to retrieve Depressed Alien Comics."""
2062
    name = 'depressedalien'
2063
    long_name = 'Depressed Alien'
2064
    url = 'http://depressedalien.com'
2065
    get_url_from_link = join_cls_url_to_href
2066
2067
    @classmethod
2068
    def get_first_comic_link(cls):
2069
        """Get link to first comics."""
2070
        return get_soup_at_url(cls.url).find('img', attrs={'name': 'beginArrow'}).parent
2071
2072
    @classmethod
2073
    def get_navi_link(cls, last_soup, next_):
2074
        return last_soup.find('img', attrs={'name': 'rightArrow' if next_ else 'leftArrow'}).parent
2075
2076
    @classmethod
2077
    def get_comic_info(cls, soup, link):
2078
        """Get information about a particular comics."""
2079
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2080
        imgs = soup.find_all('meta', property='og:image')
2081
        return {
2082
            'title': title,
2083
            'img': [i['content'] for i in imgs],
2084
        }
2085
2086
2087
class ThingsInSquares(GenericListableComic):
2088
    """Class to retrieve Things In Squares comics."""
2089
    # This can be retrieved in other languages
2090
    # Also on https://tapastic.com/series/Things-in-Squares
2091
    name = 'squares'
2092
    long_name = 'Things in squares'
2093
    url = 'http://www.thingsinsquares.com'
2094
2095
    @classmethod
2096
    def get_comic_info(cls, soup, tr):
2097
        """Get information about a particular comics."""
2098
        _, td2, td3 = tr.find_all('td')
2099
        a = td2.find('a')
2100
        date_str = td3.string
2101
        day = string_to_date(date_str, "%m.%d.%y")
2102
        title = a.string
2103
        title2 = soup.find('meta', property='og:title')['content']
2104
        desc = soup.find('meta', property='og:description')
2105
        description = desc['content'] if desc else ''
2106
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2107
        imgs = soup.find('div', class_='entry-content').find_all('img')
2108
        return {
2109
            'day': day.day,
2110
            'month': day.month,
2111
            'year': day.year,
2112
            'title': title,
2113
            'title2': title2,
2114
            'description': description,
2115
            'tags': tags,
2116
            'img': [i['src'] for i in imgs],
2117
            'alt': ' '.join(i['alt'] for i in imgs),
2118
        }
2119
2120
    @classmethod
2121
    def get_url_from_archive_element(cls, tr):
2122
        _, td2, td3 = tr.find_all('td')
2123
        return td2.find('a')['href']
2124
2125
    @classmethod
2126
    def get_archive_elements(cls):
2127
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
2128
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2129
2130
2131
class HappleTea(GenericNavigableComic):
2132
    """Class to retrieve Happle Tea Comics."""
2133
    name = 'happletea'
2134
    long_name = 'Happle Tea'
2135
    url = 'http://www.happletea.com'
2136
    get_first_comic_link = get_a_navi_navifirst
2137
    get_navi_link = get_link_rel_next
2138
2139
    @classmethod
2140
    def get_comic_info(cls, soup, link):
2141
        """Get information about a particular comics."""
2142
        imgs = soup.find('div', id='comic').find_all('img')
2143
        post = soup.find('div', class_='post-content')
2144
        title = post.find('h2', class_='post-title').string
2145
        author = post.find('a', rel='author').string
2146
        date_str = post.find('span', class_='post-date').string
2147
        day = string_to_date(date_str, "%B %d, %Y")
2148
        assert all(i['alt'] == i['title'] for i in imgs)
2149
        return {
2150
            'title': title,
2151
            'img': [i['src'] for i in imgs],
2152
            'alt': ''.join(i['alt'] for i in imgs),
2153
            'month': day.month,
2154
            'year': day.year,
2155
            'day': day.day,
2156
            'author': author,
2157
        }
2158
2159
2160
class FatAwesomeComics(GenericNavigableComic):
2161
    """Class to retrieve Fat Awesome Comics."""
2162
    # Also on http://fatawesomecomedy.tumblr.com
2163
    name = 'fatawesome'
2164
    long_name = 'Fat Awesome'
2165
    url = 'http://fatawesome.com/comics'
2166
    get_navi_link = get_a_rel_next
2167
2168
    @classmethod
2169
    def get_first_comic_link(cls):
2170
        """Get link to first comics."""
2171
        return {'href': 'http://fatawesome.com/shortbus/'}
2172
2173
    @classmethod
2174
    def get_comic_info(cls, soup, link):
2175
        """Get information about a particular comics."""
2176
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2177
        description = soup.find('meta', attrs={'name': 'description'})['content']
2178
        tags_prop = soup.find('meta', property='article:tag')
2179
        tags = tags_prop['content'] if tags_prop else ""
2180
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2181
        day = string_to_date(date_str, "%Y-%m-%d")
2182
        imgs = soup.find_all('img', attrs={'data-recalc-dims': "1"})
2183
        assert len(imgs) == 1
2184
        return {
2185
            'title': title,
2186
            'description': description,
2187
            'tags': tags,
2188
            'alt': "".join(i['alt'] for i in imgs),
2189
            'img': [i['src'].rsplit('?', 1)[0] for i in imgs],
2190
            'month': day.month,
2191
            'year': day.year,
2192
            'day': day.day,
2193
        }
2194
2195
2196
class AnythingComic(GenericListableComic):
2197
    """Class to retrieve Anything Comics."""
2198
    # Also on http://tapastic.com/series/anything
2199
    name = 'anythingcomic'
2200
    long_name = 'Anything Comic'
2201
    url = 'http://www.anythingcomic.com'
2202
2203
    @classmethod
2204
    def get_archive_elements(cls):
2205
        archive_url = urljoin_wrapper(cls.url, 'archive/')
2206
        # The first 2 <tr>'s do not correspond to comics
2207
        return get_soup_at_url(archive_url).find('table', id='chapter_table').find_all('tr')[2:]
2208
2209
    @classmethod
2210
    def get_url_from_archive_element(cls, tr):
2211
        """Get url corresponding to an archive element."""
2212
        td_num, td_comic, td_date, _ = tr.find_all('td')
2213
        link = td_comic.find('a')
2214
        return urljoin_wrapper(cls.url, link['href'])
2215
2216
    @classmethod
2217
    def get_comic_info(cls, soup, tr):
2218
        """Get information about a particular comics."""
2219
        td_num, td_comic, td_date, _ = tr.find_all('td')
2220
        num = int(td_num.string)
2221
        link = td_comic.find('a')
2222
        title = link.string
2223
        imgs = soup.find_all('img', id='comic_image')
2224
        day = string_to_date(td_date.string, '%d %b %Y %I:%M %p')
2225
        assert len(imgs) == 1
2226
        assert all(i.get('alt') == i.get('title') for i in imgs)
2227
        return {
2228
            'num': num,
2229
            'title': title,
2230
            'alt': imgs[0].get('alt', ''),
2231
            'img': [i['src'] for i in imgs],
2232 View Code Duplication
            'month': day.month,
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2233
            'year': day.year,
2234
            'day': day.day,
2235
        }
2236
2237
2238
class LonnieMillsap(GenericNavigableComic):
2239
    """Class to retrieve Lonnie Millsap's comics."""
2240
    name = 'millsap'
2241
    long_name = 'Lonnie Millsap'
2242
    url = 'http://www.lonniemillsap.com'
2243
    get_navi_link = get_link_rel_next
2244
2245
    @classmethod
2246
    def get_first_comic_link(cls):
2247
        """Get link to first comics."""
2248
        return {'href': 'http://www.lonniemillsap.com/?p=42'}
2249
2250
    @classmethod
2251
    def get_comic_info(cls, soup, link):
2252
        """Get information about a particular comics."""
2253
        title = soup.find('h2', class_='post-title').string
2254
        post = soup.find('div', class_='post-content')
2255
        author = post.find("span", class_="post-author").find("a").string
2256
        date_str = post.find("span", class_="post-date").string
2257
        day = string_to_date(date_str, "%B %d, %Y")
2258
        imgs = post.find("div", class_="entry").find_all("img")
2259
        return {
2260
            'title': title,
2261
            'author': author,
2262
            'img': [i['src'] for i in imgs],
2263
            'month': day.month,
2264
            'year': day.year,
2265
            'day': day.day,
2266
        }
2267
2268
2269 View Code Duplication
class LinsEditions(GenericNavigableComic):
2270
    """Class to retrieve L.I.N.S. Editions comics."""
2271
    # Also on http://linscomics.tumblr.com
2272
    name = 'lins'
2273
    long_name = 'L.I.N.S. Editions'
2274
    url = 'https://linsedition.com'
2275
    get_navi_link = get_link_rel_next
2276
2277
    @classmethod
2278
    def get_first_comic_link(cls):
2279
        """Get link to first comics."""
2280
        return {'href': 'https://linsedition.com/2011/09/07/l-i-n-s/'}
2281
2282
    @classmethod
2283
    def get_comic_info(cls, soup, link):
2284
        """Get information about a particular comics."""
2285
        title = soup.find('meta', property='og:title')['content']
2286
        imgs = soup.find_all('meta', property='og:image')
2287
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2288
        day = string_to_date(date_str, "%Y-%m-%d")
2289
        return {
2290
            'title': title,
2291
            'img': [i['content'] for i in imgs],
2292
            'month': day.month,
2293
            'year': day.year,
2294
            'day': day.day,
2295
        }
2296
2297
2298
class ThorsThundershack(GenericNavigableComic):
2299
    """Class to retrieve Thor's Thundershack comics."""
2300
    # Also on http://tapastic.com/series/Thors-Thundershac
2301
    name = 'thor'
2302
    long_name = 'Thor\'s Thundershack'
2303
    url = 'http://www.thorsthundershack.com'
2304
    get_url_from_link = join_cls_url_to_href
2305
2306
    @classmethod
2307
    def get_first_comic_link(cls):
2308
        """Get link to first comics."""
2309
        return get_soup_at_url(cls.url).find('a', class_='first navlink')
2310
2311
    @classmethod
2312
    def get_navi_link(cls, last_soup, next_):
2313
        for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
2314
            if link['href'] != '/comic':
2315
                return link
2316
        return None
2317
2318
    @classmethod
2319
    def get_comic_info(cls, soup, link):
2320
        """Get information about a particular comics."""
2321
        title = soup.find('meta', attrs={'name': 'description'})["content"]
2322
        description = soup.find('div', itemprop='articleBody').text
2323
        author = soup.find('span', itemprop='author copyrightHolder').string
2324
        imgs = soup.find_all('img', itemprop='image')
2325
        assert all(i['title'] == i['alt'] for i in imgs)
2326
        alt = imgs[0]['alt'] if imgs else ""
2327
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
2328
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2329
        return {
2330
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2331
            'month': day.month,
2332
            'year': day.year,
2333
            'day': day.day,
2334
            'author': author,
2335
            'title': title,
2336
            'alt': alt,
2337
            'description': description,
2338
        }
2339
2340
2341
class GerbilWithAJetpack(GenericNavigableComic):
2342
    """Class to retrieve GerbilWithAJetpack comics."""
2343
    name = 'gerbil'
2344
    long_name = 'Gerbil With A Jetpack'
2345
    url = 'http://gerbilwithajetpack.com'
2346
    get_first_comic_link = get_a_navi_navifirst
2347
    get_navi_link = get_a_rel_next
2348
2349
    @classmethod
2350
    def get_comic_info(cls, soup, link):
2351
        """Get information about a particular comics."""
2352
        title = soup.find('h2', class_='post-title').string
2353
        author = soup.find("span", class_="post-author").find("a").string
2354
        date_str = soup.find("span", class_="post-date").string
2355
        day = string_to_date(date_str, "%B %d, %Y")
2356
        imgs = soup.find("div", id="comic").find_all("img")
2357
        alt = imgs[0]['alt']
2358
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2359
        return {
2360
            'img': [i['src'] for i in imgs],
2361
            'title': title,
2362
            'alt': alt,
2363
            'author': author,
2364
            'day': day.day,
2365
            'month': day.month,
2366
            'year': day.year
2367
        }
2368
2369
2370
class EveryDayBlues(GenericNavigableComic):
2371
    """Class to retrieve EveryDayBlues Comics."""
2372
    name = "blues"
2373
    long_name = "Every Day Blues"
2374
    url = "http://everydayblues.net"
2375
    get_first_comic_link = get_a_navi_navifirst
2376
    get_navi_link = get_link_rel_next
2377
2378
    @classmethod
2379
    def get_comic_info(cls, soup, link):
2380
        """Get information about a particular comics."""
2381
        title = soup.find("h2", class_="post-title").string
2382
        author = soup.find("span", class_="post-author").find("a").string
2383
        date_str = soup.find("span", class_="post-date").string
2384
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2385
        imgs = soup.find("div", id="comic").find_all("img")
2386
        assert all(i['alt'] == i['title'] == title for i in imgs)
2387
        assert len(imgs) <= 1
2388
        return {
2389
            'img': [i['src'] for i in imgs],
2390
            'title': title,
2391
            'author': author,
2392
            'day': day.day,
2393
            'month': day.month,
2394
            'year': day.year
2395
        }
2396
2397
2398
class BiterComics(GenericNavigableComic):
2399
    """Class to retrieve Biter Comics."""
2400
    name = "biter"
2401
    long_name = "Biter Comics"
2402
    url = "http://www.bitercomics.com"
2403
    get_first_comic_link = get_a_navi_navifirst
2404
    get_navi_link = get_link_rel_next
2405
2406
    @classmethod
2407
    def get_comic_info(cls, soup, link):
2408
        """Get information about a particular comics."""
2409
        title = soup.find("h1", class_="entry-title").string
2410
        author = soup.find("span", class_="author vcard").find("a").string
2411
        date_str = soup.find("span", class_="entry-date").string
2412
        day = string_to_date(date_str, "%B %d, %Y")
2413
        imgs = soup.find("div", id="comic").find_all("img")
2414
        assert all(i['alt'] == i['title'] for i in imgs)
2415
        assert len(imgs) == 1
2416
        alt = imgs[0]['alt']
2417
        return {
2418
            'img': [i['src'] for i in imgs],
2419
            'title': title,
2420
            'alt': alt,
2421
            'author': author,
2422
            'day': day.day,
2423
            'month': day.month,
2424
            'year': day.year
2425
        }
2426
2427
2428 View Code Duplication
class TheAwkwardYeti(GenericNavigableComic):
2429
    """Class to retrieve The Awkward Yeti comics."""
2430
    # Also on http://www.gocomics.com/the-awkward-yeti
2431
    # Also on http://larstheyeti.tumblr.com
2432
    # Also on https://tapastic.com/series/TheAwkwardYeti
2433
    name = 'yeti'
2434
    long_name = 'The Awkward Yeti'
2435
    url = 'http://theawkwardyeti.com'
2436
    get_first_comic_link = get_a_navi_navifirst
2437
    get_navi_link = get_link_rel_next
2438
2439
    @classmethod
2440
    def get_comic_info(cls, soup, link):
2441
        """Get information about a particular comics."""
2442
        title = soup.find('h2', class_='post-title').string
2443
        date_str = soup.find("span", class_="post-date").string
2444
        day = string_to_date(date_str, "%B %d, %Y")
2445
        imgs = soup.find("div", id="comic").find_all("img")
2446
        assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
2447
        return {
2448
            'img': [i['src'] for i in imgs],
2449
            'title': title,
2450
            'day': day.day,
2451
            'month': day.month,
2452
            'year': day.year
2453
        }
2454
2455
2456
class PleasantThoughts(GenericNavigableComic):
2457
    """Class to retrieve Pleasant Thoughts comics."""
2458
    name = 'pleasant'
2459
    long_name = 'Pleasant Thoughts'
2460
    url = 'http://pleasant-thoughts.com'
2461
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2462
    get_navi_link = get_link_rel_next
2463
2464
    @classmethod
2465
    def get_comic_info(cls, soup, link):
2466
        """Get information about a particular comics."""
2467
        post = soup.find('div', class_='post-content')
2468
        title = post.find('h2', class_='post-title').string
2469
        imgs = post.find("div", class_="entry").find_all("img")
2470
        return {
2471
            'title': title,
2472
            'img': [i['src'] for i in imgs],
2473
        }
2474
2475
2476
class MisterAndMe(GenericNavigableComic):
2477
    """Class to retrieve Mister & Me Comics."""
2478
    # Also on http://www.gocomics.com/mister-and-me
2479
    # Also on https://tapastic.com/series/Mister-and-Me
2480
    name = 'mister'
2481
    long_name = 'Mister & Me'
2482
    url = 'http://www.mister-and-me.com'
2483
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2484
    get_navi_link = get_link_rel_next
2485
2486
    @classmethod
2487
    def get_comic_info(cls, soup, link):
2488
        """Get information about a particular comics."""
2489
        title = soup.find('h2', class_='post-title').string
2490
        author = soup.find("span", class_="post-author").find("a").string
2491
        date_str = soup.find("span", class_="post-date").string
2492
        day = string_to_date(date_str, "%B %d, %Y")
2493
        imgs = soup.find("div", id="comic").find_all("img")
2494
        assert all(i['alt'] == i['title'] for i in imgs)
2495
        assert len(imgs) <= 1
2496
        alt = imgs[0]['alt'] if imgs else ""
2497
        return {
2498
            'img': [i['src'] for i in imgs],
2499
            'title': title,
2500
            'alt': alt,
2501
            'author': author,
2502
            'day': day.day,
2503
            'month': day.month,
2504
            'year': day.year
2505
        }
2506
2507
2508
class LastPlaceComics(GenericNavigableComic):
2509
    """Class to retrieve Last Place Comics."""
2510
    name = 'lastplace'
2511
    long_name = 'LastPlaceComics'
2512
    url = "http://lastplacecomics.com"
2513
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2514
    get_navi_link = get_link_rel_next
2515
2516
    @classmethod
2517
    def get_comic_info(cls, soup, link):
2518
        """Get information about a particular comics."""
2519
        title = soup.find('h2', class_='post-title').string
2520
        author = soup.find("span", class_="post-author").find("a").string
2521
        date_str = soup.find("span", class_="post-date").string
2522
        day = string_to_date(date_str, "%B %d, %Y")
2523
        imgs = soup.find("div", id="comic").find_all("img")
2524
        assert all(i['alt'] == i['title'] for i in imgs)
2525
        assert len(imgs) <= 1
2526
        alt = imgs[0]['alt'] if imgs else ""
2527
        return {
2528
            'img': [i['src'] for i in imgs],
2529
            'title': title,
2530
            'alt': alt,
2531
            'author': author,
2532
            'day': day.day,
2533
            'month': day.month,
2534
            'year': day.year
2535
        }
2536
2537
2538
class TalesOfAbsurdity(GenericNavigableComic):
2539
    """Class to retrieve Tales Of Absurdity comics."""
2540
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
2541
    # Also on http://talesofabsurdity.tumblr.com
2542
    name = 'absurdity'
2543
    long_name = 'Tales of Absurdity'
2544
    url = 'http://talesofabsurdity.com'
2545
    get_first_comic_link = get_a_navi_navifirst
2546
    get_navi_link = get_a_navi_comicnavnext_navinext
2547
2548
    @classmethod
2549
    def get_comic_info(cls, soup, link):
2550
        """Get information about a particular comics."""
2551
        title = soup.find('h2', class_='post-title').string
2552
        author = soup.find("span", class_="post-author").find("a").string
2553
        date_str = soup.find("span", class_="post-date").string
2554
        day = string_to_date(date_str, "%B %d, %Y")
2555
        imgs = soup.find("div", id="comic").find_all("img")
2556
        assert all(i['alt'] == i['title'] for i in imgs)
2557
        alt = imgs[0]['alt'] if imgs else ""
2558
        return {
2559
            'img': [i['src'] for i in imgs],
2560
            'title': title,
2561
            'alt': alt,
2562
            'author': author,
2563
            'day': day.day,
2564
            'month': day.month,
2565
            'year': day.year
2566
        }
2567
2568
2569
class EndlessOrigami(GenericNavigableComic):
2570
    """Class to retrieve Endless Origami Comics."""
2571
    name = "origami"
2572
    long_name = "Endless Origami"
2573
    url = "http://endlessorigami.com"
2574
    get_first_comic_link = get_a_navi_navifirst
2575
    get_navi_link = get_link_rel_next
2576
2577
    @classmethod
2578
    def get_comic_info(cls, soup, link):
2579
        """Get information about a particular comics."""
2580
        title = soup.find('h2', class_='post-title').string
2581
        author = soup.find("span", class_="post-author").find("a").string
2582
        date_str = soup.find("span", class_="post-date").string
2583
        day = string_to_date(date_str, "%B %d, %Y")
2584
        imgs = soup.find("div", id="comic").find_all("img")
2585
        assert all(i['alt'] == i['title'] for i in imgs)
2586
        alt = imgs[0]['alt'] if imgs else ""
2587
        return {
2588
            'img': [i['src'] for i in imgs],
2589
            'title': title,
2590
            'alt': alt,
2591
            'author': author,
2592
            'day': day.day,
2593
            'month': day.month,
2594
            'year': day.year
2595
        }
2596
2597
2598
class PlanC(GenericNavigableComic):
2599
    """Class to retrieve Plan C comics."""
2600
    name = 'planc'
2601
    long_name = 'Plan C'
2602
    url = 'http://www.plancomic.com'
2603
    get_first_comic_link = get_a_navi_navifirst
2604
    get_navi_link = get_a_navi_comicnavnext_navinext
2605
2606
    @classmethod
2607
    def get_comic_info(cls, soup, link):
2608
        """Get information about a particular comics."""
2609
        title = soup.find('h2', class_='post-title').string
2610
        date_str = soup.find("span", class_="post-date").string
2611
        day = string_to_date(date_str, "%B %d, %Y")
2612
        imgs = soup.find('div', id='comic').find_all('img')
2613
        return {
2614
            'title': title,
2615
            'img': [i['src'] for i in imgs],
2616 View Code Duplication
            'month': day.month,
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2617
            'year': day.year,
2618
            'day': day.day,
2619
        }
2620
2621
2622
class BuniComic(GenericNavigableComic):
2623
    """Class to retrieve Buni Comics."""
2624
    name = 'buni'
2625
    long_name = 'BuniComics'
2626
    url = 'http://www.bunicomic.com'
2627
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2628
    get_navi_link = get_link_rel_next
2629
2630
    @classmethod
2631
    def get_comic_info(cls, soup, link):
2632
        """Get information about a particular comics."""
2633
        imgs = soup.find('div', id='comic').find_all('img')
2634
        assert all(i['alt'] == i['title'] for i in imgs)
2635
        assert len(imgs) == 1
2636
        return {
2637
            'img': [i['src'] for i in imgs],
2638
            'title': imgs[0]['title'],
2639
        }
2640
2641
2642
class GenericCommitStrip(GenericNavigableComic):
2643
    """Generic class to retrieve Commit Strips in different languages."""
2644
    get_navi_link = get_a_rel_next
2645
2646
    @classmethod
2647
    def get_comic_info(cls, soup, link):
2648
        """Get information about a particular comics."""
2649
        desc = soup.find('meta', property='og:description')['content']
2650
        title = soup.find('meta', property='og:title')['content']
2651
        imgs = soup.find('div', class_='entry-content').find_all('img')
2652
        title2 = ' '.join(i.get('title', '') for i in imgs)
2653
        return {
2654
            'title': title,
2655
            'title2': title2,
2656
            'description': desc,
2657
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2658
        }
2659
2660
2661
class CommitStripFr(GenericCommitStrip):
2662
    """Class to retrieve Commit Strips in French."""
2663
    name = 'commit_fr'
2664
    long_name = 'Commit Strip (Fr)'
2665
    url = 'http://www.commitstrip.com/fr'
2666
2667
    @classmethod
2668
    def get_first_comic_link(cls):
2669
        """Get link to first comics."""
2670
        return {'href': 'http://www.commitstrip.com/fr/2012/02/22/interview/'}
2671
2672
2673
class CommitStripEn(GenericCommitStrip):
2674
    """Class to retrieve Commit Strips in English."""
2675
    name = 'commit_en'
2676
    long_name = 'Commit Strip (En)'
2677
    url = 'http://www.commitstrip.com/en'
2678
2679
    @classmethod
2680
    def get_first_comic_link(cls):
2681
        """Get link to first comics."""
2682
        return {'href': 'http://www.commitstrip.com/en/2012/02/22/interview/'}
2683
2684
2685
class GenericBoumerie(GenericNavigableComic):
2686
    """Generic class to retrieve Boumeries comics in different languages."""
2687
    get_first_comic_link = get_a_navi_navifirst
2688
    get_navi_link = get_link_rel_next
2689
    date_format = NotImplemented
2690
    lang = NotImplemented
2691
2692
    @classmethod
2693
    def get_comic_info(cls, soup, link):
2694
        """Get information about a particular comics."""
2695
        title = soup.find('h2', class_='post-title').string
2696
        short_url = soup.find('link', rel='shortlink')['href']
2697
        author = soup.find("span", class_="post-author").find("a").string
2698
        date_str = soup.find('span', class_='post-date').string
2699
        day = string_to_date(date_str, cls.date_format, cls.lang)
2700
        imgs = soup.find('div', id='comic').find_all('img')
2701
        assert all(i['alt'] == i['title'] for i in imgs)
2702
        return {
2703
            'short_url': short_url,
2704
            'img': [i['src'] for i in imgs],
2705
            'title': title,
2706
            'author': author,
2707
            'month': day.month,
2708
            'year': day.year,
2709
            'day': day.day,
2710
        }
2711
2712
2713
class BoumerieEn(GenericBoumerie):
2714
    """Class to retrieve Boumeries comics in English."""
2715
    name = 'boumeries_en'
2716
    long_name = 'Boumeries (En)'
2717
    url = 'http://comics.boumerie.com'
2718
    date_format = "%B %d, %Y"
2719
    lang = 'en_GB.UTF-8'
2720
2721
2722
class BoumerieFr(GenericBoumerie):
2723
    """Class to retrieve Boumeries comics in French."""
2724
    name = 'boumeries_fr'
2725 View Code Duplication
    long_name = 'Boumeries (Fr)'
2726
    url = 'http://bd.boumerie.com'
2727
    date_format = "%A, %d %B %Y"
2728
    lang = "fr_FR.utf8"
2729
2730
2731
class UnearthedComics(GenericNavigableComic):
2732
    """Class to retrieve Unearthed comics."""
2733
    # Also on http://tapastic.com/series/UnearthedComics
2734
    # Also on http://unearthedcomics.tumblr.com
2735
    name = 'unearthed'
2736
    long_name = 'Unearthed Comics'
2737
    url = 'http://unearthedcomics.com'
2738
    get_navi_link = get_link_rel_next
2739
2740
    @classmethod
2741
    def get_first_comic_link(cls):
2742
        """Get link to first comics."""
2743
        return {'href': 'http://unearthedcomics.com/comics/world-with-turn-signals/'}
2744
2745
    @classmethod
2746
    def get_comic_info(cls, soup, link):
2747
        """Get information about a particular comics."""
2748
        short_url = soup.find('link', rel='shortlink')['href']
2749
        title_elt = soup.find('h1') or soup.find('h2')
2750
        title = title_elt.string if title_elt else ""
2751
        desc = soup.find('meta', property='og:description')
2752
        date_str = soup.find('time', class_='published updated hidden')['datetime']
2753
        day = string_to_date(date_str, "%Y-%m-%d")
2754
        post = soup.find('div', class_="entry content entry-content type-portfolio")
2755
        imgs = post.find_all('img')
2756
        return {
2757
            'title': title,
2758
            'description': desc,
2759
            'url2': short_url,
2760
            'img': [i['src'] for i in imgs],
2761
            'month': day.month,
2762
            'year': day.year,
2763
            'day': day.day,
2764
        }
2765
2766
2767
class Optipess(GenericNavigableComic):
2768
    """Class to retrieve Optipess comics."""
2769
    name = 'optipess'
2770
    long_name = 'Optipess'
2771
    url = 'http://www.optipess.com'
2772
    get_first_comic_link = get_a_navi_navifirst
2773
    get_navi_link = get_link_rel_next
2774
2775
    @classmethod
2776
    def get_comic_info(cls, soup, link):
2777
        """Get information about a particular comics."""
2778
        title = soup.find('h2', class_='post-title').string
2779
        author = soup.find("span", class_="post-author").find("a").string
2780
        comic = soup.find('div', id='comic')
2781
        imgs = comic.find_all('img') if comic else []
2782
        alt = imgs[0]['title'] if imgs else ""
2783
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2784
        date_str = soup.find('span', class_='post-date').string
2785
        day = string_to_date(date_str, "%B %d, %Y")
2786
        return {
2787
            'title': title,
2788
            'alt': alt,
2789
            'author': author,
2790
            'img': [i['src'] for i in imgs],
2791
            'month': day.month,
2792
            'year': day.year,
2793
            'day': day.day,
2794
        }
2795
2796
2797
class PainTrainComic(GenericNavigableComic):
2798
    """Class to retrieve Pain Train Comics."""
2799
    name = 'paintrain'
2800
    long_name = 'Pain Train Comics'
2801
    url = 'http://paintraincomic.com'
2802
    get_first_comic_link = get_a_navi_navifirst
2803
    get_navi_link = get_link_rel_next
2804
2805
    @classmethod
2806
    def get_comic_info(cls, soup, link):
2807
        """Get information about a particular comics."""
2808
        title = soup.find('h2', class_='post-title').string
2809
        short_url = soup.find('link', rel='shortlink')['href']
2810
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2811
        num = int(short_url_re.match(short_url).groups()[0])
2812
        imgs = soup.find('div', id='comic').find_all('img')
2813
        alt = imgs[0]['title']
2814
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2815
        date_str = soup.find('span', class_='post-date').string
2816
        day = string_to_date(date_str, "%d/%m/%Y")
2817
        return {
2818
            'short_url': short_url,
2819
            'num': num,
2820
            'img': [i['src'] for i in imgs],
2821
            'month': day.month,
2822
            'year': day.year,
2823
            'day': day.day,
2824
            'alt': alt,
2825
            'title': title,
2826
        }
2827
2828
2829
class MoonBeard(GenericNavigableComic):
2830
    """Class to retrieve MoonBeard comics."""
2831
    # Also on http://blog.squiresjam.es/moonbeard
2832
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
2833
    name = 'moonbeard'
2834
    long_name = 'Moon Beard'
2835
    url = 'http://moonbeard.com'
2836
    get_first_comic_link = get_a_navi_navifirst
2837
    get_navi_link = get_a_navi_navinext
2838
2839
    @classmethod
2840
    def get_comic_info(cls, soup, link):
2841
        """Get information about a particular comics."""
2842
        title = soup.find('h2', class_='post-title').string
2843
        short_url = soup.find('link', rel='shortlink')['href']
2844
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2845
        num = int(short_url_re.match(short_url).groups()[0])
2846
        imgs = soup.find('div', id='comic').find_all('img')
2847
        alt = imgs[0]['title']
2848
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2849
        date_str = soup.find('span', class_='post-date').string
2850
        day = string_to_date(date_str, "%B %d, %Y")
2851
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2852
        author = soup.find('span', class_='post-author').string
2853
        return {
2854
            'short_url': short_url,
2855
            'num': num,
2856
            'img': [i['src'] for i in imgs],
2857
            'month': day.month,
2858
            'year': day.year,
2859
            'day': day.day,
2860
            'title': title,
2861
            'tags': tags,
2862
            'alt': alt,
2863
            'author': author,
2864
        }
2865
2866
2867
class AHamADay(GenericNavigableComic):
2868
    """Class to retrieve class A Ham A Day comics."""
2869
    name = 'ham'
2870
    long_name = 'A Ham A Day'
2871
    url = 'http://www.ahammaday.com'
2872
    get_url_from_link = join_cls_url_to_href
2873
2874
    @classmethod
2875
    def get_first_comic_link(cls):
2876
        """Get link to first comics."""
2877
        return {'href': 'http://www.ahammaday.com/today/3/6/french'}
2878
2879
    @classmethod
2880
    def get_navi_link(cls, last_soup, next_):
2881
        # prev is next / next is prev
2882
        return last_soup.find('li', class_='previous' if next_ else 'next').find('a')
2883
2884 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2885
    def get_comic_info(cls, soup, link):
2886
        """Get information about a particular comics."""
2887
        date_str = soup.find('time', class_='published')['datetime']
2888
        day = string_to_date(date_str, "%Y-%m-%d")
2889
        author = soup.find('span', class_='blog-author').find('a').string
2890
        title = soup.find('meta', property='og:title')['content']
2891
        imgs = soup.find_all('meta', itemprop='image')
2892
        return {
2893
            'img': [i['content'] for i in imgs],
2894
            'title': title,
2895
            'author': author,
2896
            'day': day.day,
2897
            'month': day.month,
2898
            'year': day.year,
2899
        }
2900
2901
2902 View Code Duplication
class LittleLifeLines(GenericNavigableComic):
2903
    """Class to retrieve Little Life Lines comics."""
2904
    name = 'life'
2905
    long_name = 'Little Life Lines'
2906
    url = 'http://www.littlelifelines.com'
2907
    get_url_from_link = join_cls_url_to_href
2908
2909
    @classmethod
2910
    def get_first_comic_link(cls):
2911
        """Get link to first comics."""
2912
        return {'href': 'http://www.littlelifelines.com/comics/well-done'}
2913
2914
    @classmethod
2915
    def get_navi_link(cls, last_soup, next_):
2916
        # prev is next / next is prev
2917
        li = last_soup.find('li', class_='prev' if next_ else 'next')
2918
        return li.find('a') if li else None
2919
2920
    @classmethod
2921
    def get_comic_info(cls, soup, link):
2922
        """Get information about a particular comics."""
2923
        title = soup.find('meta', property='og:title')['content']
2924
        desc = soup.find('meta', property='og:description')['content']
2925
        date_str = soup.find('time', class_='published')['datetime']
2926
        day = string_to_date(date_str, "%Y-%m-%d")
2927
        author = soup.find('a', rel='author').string
2928
        div_content = soup.find('div', class_="body entry-content")
2929
        imgs = div_content.find_all('img')
2930
        imgs = [i for i in imgs if i.get('src') is not None]
2931
        alt = imgs[0]['alt']
2932
        return {
2933
            'title': title,
2934
            'alt': alt,
2935
            'description': desc,
2936
            'author': author,
2937
            'day': day.day,
2938
            'month': day.month,
2939
            'year': day.year,
2940
            'img': [i['src'] for i in imgs],
2941
        }
2942
2943
2944
class GenericWordPressInkblot(GenericNavigableComic):
2945
    """Generic class to retrieve comics using WordPress with Inkblot."""
2946
    get_navi_link = get_link_rel_next
2947
2948
    @classmethod
2949
    def get_first_comic_link(cls):
2950
        """Get link to first comics."""
2951
        return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')
2952
2953
    @classmethod
2954
    def get_comic_info(cls, soup, link):
2955
        """Get information about a particular comics."""
2956
        title = soup.find('meta', property='og:title')['content']
2957
        imgs = soup.find('div', class_='webcomic-image').find_all('img')
2958
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2959
        day = string_to_date(date_str, "%Y-%m-%d")
2960
        return {
2961
            'title': title,
2962
            'day': day.day,
2963
            'month': day.month,
2964
            'year': day.year,
2965
            'img': [i['src'] for i in imgs],
2966
        }
2967
2968
2969
class EverythingsStupid(GenericWordPressInkblot):
2970
    """Class to retrieve Everything's stupid Comics."""
2971
    # Also on http://tapastic.com/series/EverythingsStupid
2972
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
2973
    # Also on http://everythingsstupidcomics.tumblr.com
2974
    name = 'stupid'
2975
    long_name = "Everything's Stupid"
2976
    url = 'http://everythingsstupid.net'
2977
2978
2979
class TheIsmComics(GenericWordPressInkblot):
2980
    """Class to retrieve The Ism Comics."""
2981
    # Also on https://tapastic.com/series/TheIsm (?)
2982
    name = 'theism'
2983
    long_name = "The Ism"
2984
    url = 'http://www.theism-comics.com'
2985
2986
2987
class WoodenPlankStudios(GenericWordPressInkblot):
2988 View Code Duplication
    """Class to retrieve Wooden Plank Studios comics."""
2989
    name = 'woodenplank'
2990
    long_name = 'Wooden Plank Studios'
2991
    url = 'http://woodenplankstudios.com'
2992
2993
2994
class ElectricBunnyComic(GenericNavigableComic):
2995
    """Class to retrieve Electric Bunny Comics."""
2996
    # Also on http://electricbunnycomics.tumblr.com
2997
    name = 'bunny'
2998
    long_name = 'Electric Bunny Comic'
2999
    url = 'http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell'
3000
    get_url_from_link = join_cls_url_to_href
3001
3002
    @classmethod
3003
    def get_first_comic_link(cls):
3004
        """Get link to first comics."""
3005
        return get_soup_at_url(cls.url).find('img', alt='First').parent
3006
3007
    @classmethod
3008
    def get_navi_link(cls, last_soup, next_):
3009
        img = last_soup.find('img', alt='Next' if next_ else 'Back')
3010
        return img.parent if img else None
3011
3012
    @classmethod
3013
    def get_comic_info(cls, soup, link):
3014
        """Get information about a particular comics."""
3015
        title = soup.find('meta', property='og:title')['content']
3016
        imgs = soup.find_all('meta', property='og:image')
3017
        return {
3018
            'title': title,
3019
            'img': [i['content'] for i in imgs],
3020
        }
3021
3022
3023
class SheldonComics(GenericNavigableComic):
3024
    """Class to retrieve Sheldon comics."""
3025
    # Also on http://www.gocomics.com/sheldon
3026
    name = 'sheldon'
3027
    long_name = 'Sheldon Comics'
3028
    url = 'http://www.sheldoncomics.com'
3029
3030
    @classmethod
3031
    def get_first_comic_link(cls):
3032
        """Get link to first comics."""
3033
        return get_soup_at_url(cls.url).find("a", id="nav-first")
3034
3035
    @classmethod
3036
    def get_navi_link(cls, last_soup, next_):
3037
        for link in last_soup.find_all("a", id="nav-next" if next_ else "nav-prev"):
3038
            if link['href'] != 'http://www.sheldoncomics.com':
3039
                return link
3040
        return None
3041
3042
    @classmethod
3043
    def get_comic_info(cls, soup, link):
3044
        """Get information about a particular comics."""
3045
        imgs = soup.find("div", id="comic-foot").find_all("img")
3046
        assert all(i['alt'] == i['title'] for i in imgs)
3047
        assert len(imgs) == 1
3048
        title = imgs[0]['title']
3049
        return {
3050
            'title': title,
3051
            'img': [i['src'] for i in imgs],
3052
        }
3053
3054
3055
class CubeDrone(GenericNavigableComic):
3056
    """Class to retrieve Cube Drone comics."""
3057
    name = 'cubedrone'
3058
    long_name = 'Cube Drone'
3059
    url = 'http://cube-drone.com/comics'
3060
    get_url_from_link = join_cls_url_to_href
3061
3062
    @classmethod
3063
    def get_first_comic_link(cls):
3064
        """Get link to first comics."""
3065
        return get_soup_at_url(cls.url).find('span', class_='glyphicon glyphicon-backward').parent
3066
3067
    @classmethod
3068
    def get_navi_link(cls, last_soup, next_):
3069
        class_ = 'glyphicon glyphicon-chevron-' + ('right' if next_ else 'left')
3070
        return last_soup.find('span', class_=class_).parent
3071
3072
    @classmethod
3073
    def get_comic_info(cls, soup, link):
3074
        """Get information about a particular comics."""
3075
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
3076
        url2 = soup.find('meta', attrs={'name': 'twitter:url'})['content']
3077
        # date_str = soup.find('h2', class_='comic_title').find('small').string
3078
        # day = string_to_date(date_str, "%B %d, %Y, %I:%M %p")
3079
        imgs = soup.find_all('img', class_='comic img-responsive')
3080
        title2 = imgs[0]['title']
3081
        alt = imgs[0]['alt']
3082
        return {
3083
            'url2': url2,
3084
            'title': title,
3085
            'title2': title2,
3086
            'alt': alt,
3087
            'img': [i['src'] for i in imgs],
3088
        }
3089
3090
3091
class MakeItStoopid(GenericNavigableComic):
3092
    """Class to retrieve Make It Stoopid Comics."""
3093
    name = 'stoopid'
3094
    long_name = 'Make it stoopid'
3095
    url = 'http://makeitstoopid.com/comic.php'
3096
3097
    @classmethod
3098
    def get_nav(cls, soup):
3099
        cnav = soup.find_all(class_='cnav')
3100
        nav1, nav2 = cnav[:5], cnav[5:]
3101
        assert nav1 == nav2
3102
        # begin, prev, archive, next_, end = nav1
3103
        return [None if i.get('href') is None else i for i in nav1]
3104
3105
    @classmethod
3106
    def get_first_comic_link(cls):
3107
        """Get link to first comics."""
3108
        return cls.get_nav(get_soup_at_url(cls.url))[0]
3109
3110
    @classmethod
3111
    def get_navi_link(cls, last_soup, next_):
3112
        return cls.get_nav(last_soup)[3 if next_ else 1]
3113
3114
    @classmethod
3115
    def get_comic_info(cls, soup, link):
3116
        """Get information about a particular comics."""
3117
        title = link['title']
3118
        imgs = soup.find_all('img', id='comicimg')
3119
        return {
3120
            'title': title,
3121
            'img': [i['src'] for i in imgs],
3122
        }
3123
3124
3125
class GeekAndPoke(GenericNavigableComic):
3126
    """Class to retrieve Geek And Poke comics."""
3127
    name = 'geek'
3128
    long_name = 'Geek And Poke'
3129
    url = 'http://geek-and-poke.com'
3130
    get_url_from_link = join_cls_url_to_href
3131
3132
    @classmethod
3133
    def get_first_comic_link(cls):
3134
        """Get link to first comics."""
3135
        return {'href': 'http://geek-and-poke.com/geekandpoke/2006/8/27/a-new-place-for-a-not-so-old-blog.html'}
3136
3137
    @classmethod
3138
    def get_navi_link(cls, last_soup, next_):
3139
        return last_soup.find('a', class_='prev-item' if next_ else 'next-item')
3140
3141
    @classmethod
3142
    def get_comic_info(cls, soup, link):
3143
        """Get information about a particular comics."""
3144
        title = soup.find('meta', property='og:title')['content']
3145
        desc = soup.find('meta', property='og:description')['content']
3146
        date_str = soup.find('time', class_='published')['datetime']
3147
        day = string_to_date(date_str, "%Y-%m-%d")
3148
        author = soup.find('a', rel='author').string
3149
        div_content = (soup.find('div', class_="body entry-content") or
3150
                       soup.find('div', class_="special-content"))
3151
        imgs = div_content.find_all('img')
3152
        imgs = [i for i in imgs if i.get('src') is not None]
3153
        assert all('title' not in i or i['alt'] == i['title'] for i in imgs)
3154
        alt = imgs[0].get('alt', "") if imgs else []
3155
        return {
3156
            'title': title,
3157
            'alt': alt,
3158
            'description': desc,
3159
            'author': author,
3160
            'day': day.day,
3161
            'month': day.month,
3162
            'year': day.year,
3163
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3164
        }
3165
3166
3167
class GenericTumblrV1(GenericComic):
3168
    """Generic class to retrieve comics from Tumblr using the V1 API."""
3169
3170
    @classmethod
3171
    def get_next_comic(cls, last_comic):
3172
        for p in cls.get_posts(last_comic):
3173
            comic = cls.get_comic_info(p)
3174
            if comic is not None:
3175
                yield comic
3176
3177
    @classmethod
3178
    def get_url_from_post(cls, post):
3179
        return post['url']
3180
3181
    @classmethod
3182
    def get_api_url(cls):
3183
        return urljoin_wrapper(cls.url, '/api/read/')
3184
3185
    @classmethod
3186
    def get_comic_info(cls, post):
3187
        """Get information about a particular comics."""
3188
        # print(post)
3189
        type_ = post['type']
3190
        if type_ != 'photo':
3191
            # print("Type is %s" % type_)
3192
            return None
3193
        tumblr_id = int(post['id'])
3194
        api_url = cls.get_api_url() + '?id=%d' % (tumblr_id)
3195
        day = datetime.datetime.fromtimestamp(int(post['unix-timestamp'])).date()
3196
        caption = post.find('photo-caption')
3197
        title = caption.string if caption else ""
3198
        tags = ' '.join(t.string for t in post.find_all('tag'))
3199
        # Photos may appear in 'photo' tags and/or straight in the post
3200
        photo_tags = post.find_all('photo')
3201
        if not photo_tags:
3202
            photo_tags = [post]
3203
        # Images are in multiple resolutions - taking the first one
3204
        imgs = [photo.find('photo-url') for photo in photo_tags]
3205
        return {
3206
            'url': cls.get_url_from_post(post),
3207
            'url2': post['url-with-slug'],
3208
            'day': day.day,
3209
            'month': day.month,
3210
            'year': day.year,
3211
            'title': title,
3212
            'tags': tags,
3213
            'img': [i.string for i in imgs],
3214
            'tumblr-id': tumblr_id,
3215
            'api_url': api_url,  # for debug purposes
3216
        }
3217
3218
    @classmethod
3219
    def get_posts(cls, last_comic, nb_post_per_call=10):
3220
        """Get posts using API. nb_post_per_call is max 50.
3221
3222
        Posts are retrieved from newer to older as per the tumblr v1 api
3223
        but are returned in chronological order."""
3224
        waiting_for_url = last_comic['url'] if last_comic else None
3225
        posts_acc = []
3226
        if last_comic is not None:
3227
            # Sometimes, tumblr posts are deleted. We previous post is deleted, we
3228
            # might end up spending a lot of time looking for something that
3229
            # doesn't exist. Failing early and clearly might be a better option.
3230
            last_api_url = last_comic['api_url']
3231
            try:
3232
                get_soup_at_url(last_api_url)
3233
            except urllib.error.HTTPError:
3234
                try:
3235
                    get_soup_at_url(cls.url)
3236
                except urllib.error.HTTPError:
3237
                    print("Did not find previous post nor main url %s" % cls.url)
3238
                else:
3239
                    print("Did not find previous post %s : it might have been deleted" % last_api_url)
3240
                return reversed(posts_acc)
3241
        api_url = cls.get_api_url()
3242
        posts = get_soup_at_url(api_url).find('posts')
3243
        start, total = int(posts['start']), int(posts['total'])
3244
        assert start == 0
3245
        for starting_num in range(0, total, nb_post_per_call):
3246
            api_url2 = api_url + '?start=%d&num=%d' % (starting_num, nb_post_per_call)
3247
            # print(api_url2)
3248
            posts2 = get_soup_at_url(api_url2).find('posts')
3249
            start2, total2 = int(posts2['start']), int(posts2['total'])
3250
            assert starting_num == start2, "%d != %d" % (starting_num, start2)
3251
            # This may happen and should be handled in the future
3252
            assert total == total2, "%d != %d" % (total, total2)
3253
            for p in posts2.find_all('post'):
3254
                if waiting_for_url and waiting_for_url == cls.get_url_from_post(p):
3255
                    return reversed(posts_acc)
3256
                posts_acc.append(p)
3257
        if waiting_for_url is None:
3258
            return reversed(posts_acc)
3259
        print("Did not find %s : there might be a problem" % waiting_for_url)
3260
        return []
3261
3262
3263
class IrwinCardozo(GenericTumblrV1):
3264
    """Class to retrieve Irwin Cardozo Comics."""
3265
    name = 'irwinc'
3266
    long_name = 'Irwin Cardozo'
3267
    url = 'http://irwincardozocomics.tumblr.com'
3268
3269
3270
class AccordingToDevin(GenericTumblrV1):
3271
    """Class to retrieve According To Devin comics."""
3272
    name = 'devin'
3273
    long_name = 'According To Devin'
3274
    url = 'http://accordingtodevin.tumblr.com'
3275
3276
3277
class ItsTheTieTumblr(GenericTumblrV1):
3278
    """Class to retrieve It's the tie comics."""
3279
    # Also on http://itsthetie.com
3280
    # Also on https://tapastic.com/series/itsthetie
3281
    name = 'tie-tumblr'
3282
    long_name = "It's the tie (from Tumblr)"
3283
    url = "http://itsthetie.tumblr.com"
3284
3285
3286
class OctopunsTumblr(GenericTumblrV1):
3287
    """Class to retrieve Octopuns comics."""
3288
    # Also on http://www.octopuns.net
3289
    name = 'octopuns-tumblr'
3290
    long_name = 'Octopuns (from Tumblr)'
3291
    url = 'http://octopuns.tumblr.com'
3292
3293
3294
class PicturesInBoxesTumblr(GenericTumblrV1):
3295
    """Class to retrieve Pictures In Boxes comics."""
3296
    # Also on http://www.picturesinboxes.com
3297
    name = 'picturesinboxes-tumblr'
3298
    long_name = 'Pictures in Boxes (from Tumblr)'
3299
    url = 'http://picturesinboxescomic.tumblr.com'
3300
3301
3302
class TubeyToonsTumblr(GenericTumblrV1):
3303
    """Class to retrieve TubeyToons comics."""
3304
    # Also on http://tapastic.com/series/Tubey-Toons
3305
    # Also on http://tubeytoons.com
3306
    name = 'tubeytoons-tumblr'
3307
    long_name = 'Tubey Toons (from Tumblr)'
3308
    url = 'http://tubeytoons.tumblr.com'
3309
3310
3311
class UnearthedComicsTumblr(GenericTumblrV1):
3312
    """Class to retrieve Unearthed comics."""
3313
    # Also on http://tapastic.com/series/UnearthedComics
3314
    # Also on http://unearthedcomics.com
3315
    name = 'unearthed-tumblr'
3316
    long_name = 'Unearthed Comics (from Tumblr)'
3317
    url = 'http://unearthedcomics.tumblr.com'
3318
3319
3320
class PieComic(GenericTumblrV1):
3321
    """Class to retrieve Pie Comic comics."""
3322
    name = 'pie'
3323
    long_name = 'Pie Comic'
3324
    url = "http://piecomic.tumblr.com"
3325
3326
3327
class MrEthanDiamond(GenericTumblrV1):
3328
    """Class to retrieve Mr Ethan Diamond comics."""
3329
    name = 'diamond'
3330
    long_name = 'Mr Ethan Diamond'
3331
    url = 'http://mrethandiamond.tumblr.com'
3332
3333
3334
class Flocci(GenericTumblrV1):
3335
    """Class to retrieve floccinaucinihilipilification comics."""
3336
    name = 'flocci'
3337
    long_name = 'floccinaucinihilipilification'
3338
    url = "http://floccinaucinihilipilificationa.tumblr.com"
3339
3340
3341
class UpAndOut(GenericTumblrV1):
3342
    """Class to retrieve Up & Out comics."""
3343
    # Also on http://tapastic.com/series/UP-and-OUT
3344
    name = 'upandout'
3345
    long_name = 'Up And Out (from Tumblr)'
3346
    url = 'http://upandoutcomic.tumblr.com'
3347
3348
3349
class Pundemonium(GenericTumblrV1):
3350
    """Class to retrieve Pundemonium comics."""
3351
    name = 'pundemonium'
3352
    long_name = 'Pundemonium'
3353
    url = 'http://monstika.tumblr.com'
3354
3355
3356
class PoorlyDrawnLinesTumblr(GenericTumblrV1):
3357
    """Class to retrieve Poorly Drawn Lines comics."""
3358
    # Also on http://poorlydrawnlines.com
3359
    name = 'poorlydrawn-tumblr'
3360
    long_name = 'Poorly Drawn Lines (from Tumblr)'
3361
    url = 'http://pdlcomics.tumblr.com'
3362
3363
3364
class PearShapedComics(GenericTumblrV1):
3365
    """Class to retrieve Pear Shaped Comics."""
3366
    name = 'pearshaped'
3367
    long_name = 'Pear-Shaped Comics'
3368
    url = 'http://pearshapedcomics.com'
3369
3370
3371
class PondScumComics(GenericTumblrV1):
3372
    """Class to retrieve Pond Scum Comics."""
3373
    name = 'pond'
3374
    long_name = 'Pond Scum'
3375
    url = 'http://pondscumcomic.tumblr.com'
3376
3377
3378
class MercworksTumblr(GenericTumblrV1):
3379
    """Class to retrieve Mercworks comics."""
3380
    # Also on http://mercworks.net
3381
    name = 'mercworks-tumblr'
3382
    long_name = 'Mercworks (from Tumblr)'
3383
    url = 'http://mercworks.tumblr.com'
3384
3385
3386
class OwlTurdTumblr(GenericTumblrV1):
3387
    """Class to retrieve Owl Turd comics."""
3388
    # Also on http://tapastic.com/series/Owl-Turd-Comix
3389
    name = 'owlturd-tumblr'
3390
    long_name = 'Owl Turd (from Tumblr)'
3391
    url = 'http://owlturd.com'
3392
3393
3394
class VectorBelly(GenericTumblrV1):
3395
    """Class to retrieve Vector Belly comics."""
3396
    # Also on http://vectorbelly.com
3397
    name = 'vector'
3398
    long_name = 'Vector Belly'
3399
    url = 'http://vectorbelly.tumblr.com'
3400
3401
3402
class GoneIntoRapture(GenericTumblrV1):
3403
    """Class to retrieve Gone Into Rapture comics."""
3404
    # Also on http://goneintorapture.tumblr.com
3405
    # Also on http://tapastic.com/series/Goneintorapture
3406
    name = 'rapture'
3407
    long_name = 'Gone Into Rapture'
3408
    url = 'http://www.goneintorapture.com'
3409
3410
3411
class TheOatmealTumblr(GenericTumblrV1):
3412
    """Class to retrieve The Oatmeal comics."""
3413
    # Also on http://theoatmeal.com
3414
    name = 'oatmeal-tumblr'
3415
    long_name = 'The Oatmeal (from Tumblr)'
3416
    url = 'http://oatmeal.tumblr.com'
3417
3418
3419
class HeckIfIKnowComicsTumblr(GenericTumblrV1):
3420
    """Class to retrieve Heck If I Know Comics."""
3421
    # Also on http://tapastic.com/series/Regular
3422
    name = 'heck-tumblr'
3423
    long_name = 'Heck if I Know comics (from Tumblr)'
3424
    url = 'http://heckifiknowcomics.com'
3425
3426
3427
class MyJetPack(GenericTumblrV1):
3428
    """Class to retrieve My Jet Pack comics."""
3429
    name = 'jetpack'
3430
    long_name = 'My Jet Pack'
3431
    url = 'http://myjetpack.tumblr.com'
3432
3433
3434
class CheerUpEmoKidTumblr(GenericTumblrV1):
3435
    """Class to retrieve CheerUpEmoKid comics."""
3436
    # Also on http://www.cheerupemokid.com
3437
    # Also on http://tapastic.com/series/CUEK
3438
    name = 'cuek-tumblr'
3439
    long_name = 'Cheer Up Emo Kid (from Tumblr)'
3440
    url = 'http://enzocomics.tumblr.com'
3441
3442
3443
class ForLackOfABetterComic(GenericTumblrV1):
3444
    """Class to retrieve For Lack Of A Better Comics."""
3445
    # Also on http://forlackofabettercomic.com
3446
    name = 'lack'
3447
    long_name = 'For Lack Of A Better Comic'
3448
    url = 'http://forlackofabettercomic.tumblr.com'
3449
3450
3451
class ZenPencilsTumblr(GenericTumblrV1):
3452
    """Class to retrieve ZenPencils comics."""
3453
    # Also on http://zenpencils.com
3454
    # Also on http://www.gocomics.com/zen-pencils
3455
    name = 'zenpencils-tumblr'
3456
    long_name = 'Zen Pencils (from Tumblr)'
3457
    url = 'http://zenpencils.tumblr.com'
3458
3459
3460
class ThreeWordPhraseTumblr(GenericTumblrV1):
3461
    """Class to retrieve Three Word Phrase comics."""
3462
    # Also on http://threewordphrase.com
3463
    name = 'threeword-tumblr'
3464
    long_name = 'Three Word Phrase (from Tumblr)'
3465
    url = 'http://www.threewordphrase.tumblr.com'
3466
3467
3468
class TimeTrabbleTumblr(GenericTumblrV1):
3469
    """Class to retrieve Time Trabble comics."""
3470
    # Also on http://timetrabble.com
3471
    name = 'timetrabble-tumblr'
3472
    long_name = 'Time Trabble (from Tumblr)'
3473
    url = 'http://timetrabble.tumblr.com'
3474
3475
3476
class SafelyEndangeredTumblr(GenericTumblrV1):
3477
    """Class to retrieve Safely Endangered comics."""
3478
    # Also on http://www.safelyendangered.com
3479
    name = 'endangered-tumblr'
3480
    long_name = 'Safely Endangered (from Tumblr)'
3481
    url = 'http://tumblr.safelyendangered.com'
3482
3483
3484
class MouseBearComedyTumblr(GenericTumblrV1):
3485
    """Class to retrieve Mouse Bear Comedy comics."""
3486
    # Also on http://www.mousebearcomedy.com
3487
    name = 'mousebear-tumblr'
3488
    long_name = 'Mouse Bear Comedy (from Tumblr)'
3489
    url = 'http://mousebearcomedy.tumblr.com'
3490
3491
3492
class BouletCorpTumblr(GenericTumblrV1):
3493
    """Class to retrieve BouletCorp comics."""
3494
    # Also on http://www.bouletcorp.com
3495
    name = 'boulet-tumblr'
3496
    long_name = 'Boulet Corp (from Tumblr)'
3497
    url = 'http://bouletcorp.tumblr.com'
3498
3499
3500
class TheAwkwardYetiTumblr(GenericTumblrV1):
3501
    """Class to retrieve The Awkward Yeti comics."""
3502
    # Also on http://www.gocomics.com/the-awkward-yeti
3503
    # Also on http://theawkwardyeti.com
3504
    # Also on https://tapastic.com/series/TheAwkwardYeti
3505
    name = 'yeti-tumblr'
3506
    long_name = 'The Awkward Yeti (from Tumblr)'
3507
    url = 'http://larstheyeti.tumblr.com'
3508
3509
3510
class NellucNhoj(GenericTumblrV1):
3511
    """Class to retrieve NellucNhoj comics."""
3512
    name = 'nhoj'
3513
    long_name = 'Nelluc Nhoj'
3514
    url = 'http://nellucnhoj.com'
3515
3516
3517
class DownTheUpwardSpiralTumblr(GenericTumblrV1):
3518
    """Class to retrieve Down The Upward Spiral comics."""
3519
    # Also on http://www.downtheupwardspiral.com
3520
    name = 'spiral-tumblr'
3521
    long_name = 'Down the Upward Spiral (from Tumblr)'
3522
    url = 'http://downtheupwardspiral.tumblr.com'
3523
3524
3525
class AsPerUsualTumblr(GenericTumblrV1):
3526
    """Class to retrieve As Per Usual comics."""
3527
    # Also on https://tapastic.com/series/AsPerUsual
3528
    name = 'usual-tumblr'
3529
    long_name = 'As Per Usual (from Tumblr)'
3530
    url = 'http://as-per-usual.tumblr.com'
3531
3532
3533
class OneOneOneOneComicTumblr(GenericTumblrV1):
3534
    """Class to retrieve 1111 Comics."""
3535
    # Also on http://www.1111comics.me
3536
    # Also on https://tapastic.com/series/1111-Comics
3537
    name = '1111-tumblr'
3538
    long_name = '1111 Comics (from Tumblr)'
3539
    url = 'http://comics1111.tumblr.com'
3540
3541
3542
class JhallComicsTumblr(GenericTumblrV1):
3543
    """Class to retrieve Jhall Comics."""
3544
    # Also on http://jhallcomics.com
3545
    name = 'jhall-tumblr'
3546
    long_name = 'Jhall Comics (from Tumblr)'
3547
    url = 'http://jhallcomics.tumblr.com'
3548
3549
3550
class BerkeleyMewsTumblr(GenericTumblrV1):
3551
    """Class to retrieve Berkeley Mews comics."""
3552
    # Also on http://www.gocomics.com/berkeley-mews
3553
    # Also on http://www.berkeleymews.com
3554
    name = 'berkeley-tumblr'
3555
    long_name = 'Berkeley Mews (from Tumblr)'
3556
    url = 'http://mews.tumblr.com'
3557
3558
3559
class JoanCornellaTumblr(GenericTumblrV1):
3560
    """Class to retrieve Joan Cornella comics."""
3561
    # Also on http://joancornella.net
3562
    name = 'cornella-tumblr'
3563
    long_name = 'Joan Cornella (from Tumblr)'
3564
    url = 'http://cornellajoan.tumblr.com'
3565
3566
3567
class RespawnComicTumblr(GenericTumblrV1):
3568
    """Class to retrieve Respawn Comic."""
3569
    # Also on http://respawncomic.com
3570
    name = 'respawn-tumblr'
3571
    long_name = 'Respawn Comic (from Tumblr)'
3572
    url = 'http://respawncomic.tumblr.com'
3573
3574
3575
class ChrisHallbeckTumblr(GenericTumblrV1):
3576
    """Class to retrieve Chris Hallbeck comics."""
3577
    # Also on https://tapastic.com/ChrisHallbeck
3578
    # Also on http://maximumble.com
3579
    # Also on http://minimumble.com
3580
    # Also on http://thebookofbiff.com
3581
    name = 'hallbeck-tumblr'
3582
    long_name = 'Chris Hallback (from Tumblr)'
3583
    url = 'http://chrishallbeck.tumblr.com'
3584
3585
3586
class ComicNuggets(GenericTumblrV1):
3587
    """Class to retrieve Comic Nuggets."""
3588
    name = 'nuggets'
3589
    long_name = 'Comic Nuggets'
3590
    url = 'http://comicnuggets.com'
3591
3592
3593
class PigeonGazetteTumblr(GenericTumblrV1):
3594
    """Class to retrieve The Pigeon Gazette comics."""
3595
    # Also on https://tapastic.com/series/The-Pigeon-Gazette
3596
    name = 'pigeon-tumblr'
3597
    long_name = 'The Pigeon Gazette (from Tumblr)'
3598
    url = 'http://thepigeongazette.tumblr.com'
3599
3600
3601
class CancerOwl(GenericTumblrV1):
3602
    """Class to retrieve Cancer Owl comics."""
3603
    # Also on http://cancerowl.com
3604
    name = 'cancerowl-tumblr'
3605
    long_name = 'Cancer Owl (from Tumblr)'
3606
    url = 'http://cancerowl.tumblr.com'
3607
3608
3609
class FowlLanguageTumblr(GenericTumblrV1):
3610
    """Class to retrieve Fowl Language comics."""
3611
    # Also on http://www.fowllanguagecomics.com
3612
    # Also on http://tapastic.com/series/Fowl-Language-Comics
3613
    # Also on http://www.gocomics.com/fowl-language
3614
    name = 'fowllanguage-tumblr'
3615
    long_name = 'Fowl Language Comics (from Tumblr)'
3616
    url = 'http://fowllanguagecomics.tumblr.com'
3617
3618
3619
class TheOdd1sOutTumblr(GenericTumblrV1):
3620
    """Class to retrieve The Odd 1s Out comics."""
3621
    # Also on http://theodd1sout.com
3622
    # Also on https://tapastic.com/series/Theodd1sout
3623
    name = 'theodd-tumblr'
3624
    long_name = 'The Odd 1s Out (from Tumblr)'
3625
    url = 'http://theodd1sout.tumblr.com'
3626
3627
3628
class TheUnderfoldTumblr(GenericTumblrV1):
3629
    """Class to retrieve The Underfold comics."""
3630
    # Also on http://theunderfold.com
3631
    name = 'underfold-tumblr'
3632
    long_name = 'The Underfold (from Tumblr)'
3633
    url = 'http://theunderfold.tumblr.com'
3634
3635
3636
class LolNeinTumblr(GenericTumblrV1):
3637
    """Class to retrieve Lol Nein comics."""
3638
    # Also on http://lolnein.com
3639
    name = 'lolnein-tumblr'
3640
    long_name = 'Lol Nein (from Tumblr)'
3641
    url = 'http://lolneincom.tumblr.com'
3642
3643
3644
class FatAwesomeComicsTumblr(GenericTumblrV1):
3645
    """Class to retrieve Fat Awesome Comics."""
3646
    # Also on http://fatawesome.com/comics
3647
    name = 'fatawesome-tumblr'
3648
    long_name = 'Fat Awesome (from Tumblr)'
3649
    url = 'http://fatawesomecomedy.tumblr.com'
3650
3651
3652
class TheWorldIsFlatTumblr(GenericTumblrV1):
3653
    """Class to retrieve The World Is Flat Comics."""
3654
    # Also on https://tapastic.com/series/The-World-is-Flat
3655
    name = 'flatworld-tumblr'
3656
    long_name = 'The World Is Flat (from Tumblr)'
3657
    url = 'http://theworldisflatcomics.tumblr.com'
3658
3659
3660
class DorrisMc(GenericTumblrV1):
3661
    """Class to retrieve Dorris Mc Comics"""
3662
    # Also on http://www.gocomics.com/dorris-mccomics
3663
    name = 'dorrismc'
3664
    long_name = 'Dorris Mc'
3665
    url = 'http://dorrismccomics.com'
3666
3667
3668
class LeleozTumblr(GenericEmptyComic, GenericTumblrV1):
3669
    """Class to retrieve Leleoz comics."""
3670
    # Also on https://tapastic.com/series/Leleoz
3671
    name = 'leleoz-tumblr'
3672
    long_name = 'Leleoz (from Tumblr)'
3673
    url = 'http://leleozcomics.tumblr.com'
3674
3675
3676
class MoonBeardTumblr(GenericTumblrV1):
3677
    """Class to retrieve MoonBeard comics."""
3678
    # Also on http://moonbeard.com
3679
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
3680
    name = 'moonbeard-tumblr'
3681
    long_name = 'Moon Beard (from Tumblr)'
3682
    url = 'http://blog.squiresjam.es/moonbeard'
3683
3684
3685
class AComik(GenericTumblrV1):
3686
    """Class to retrieve A Comik"""
3687
    name = 'comik'
3688
    long_name = 'A Comik'
3689
    url = 'http://acomik.com'
3690
3691
3692
class ClassicRandy(GenericTumblrV1):
3693
    """Class to retrieve Classic Randy comics."""
3694
    name = 'randy'
3695
    long_name = 'Classic Randy'
3696
    url = 'http://classicrandy.tumblr.com'
3697
3698
3699
class DagssonTumblr(GenericTumblrV1):
3700
    """Class to retrieve Dagsson comics."""
3701
    # Also on http://www.dagsson.com
3702
    name = 'dagsson-tumblr'
3703
    long_name = 'Dagsson Hugleikur (from Tumblr)'
3704
    url = 'http://hugleikurdagsson.tumblr.com'
3705
3706
3707
class LinsEditionsTumblr(GenericTumblrV1):
3708
    """Class to retrieve L.I.N.S. Editions comics."""
3709
    # Also on https://linsedition.com
3710
    name = 'lins-tumblr'
3711
    long_name = 'L.I.N.S. Editions (from Tumblr)'
3712
    url = 'http://linscomics.tumblr.com'
3713
3714
3715
class OrigamiHotDish(GenericTumblrV1):
3716
    """Class to retrieve Origami Hot Dish comics."""
3717
    name = 'origamihotdish'
3718
    long_name = 'Origami Hot Dish'
3719
    url = 'http://origamihotdish.com'
3720
3721
3722
class HitAndMissComicsTumblr(GenericTumblrV1):
3723
    """Class to retrieve Hit and Miss Comics."""
3724
    name = 'hitandmiss'
3725
    long_name = 'Hit and Miss Comics'
3726
    url = 'http://hitandmisscomics.tumblr.com'
3727
3728
3729
class HMBlanc(GenericTumblrV1):
3730
    """Class to retrieve HM Blanc comics."""
3731
    name = 'hmblanc'
3732
    long_name = 'HM Blanc'
3733
    url = 'http://hmblanc.tumblr.com'
3734
3735
3736
class TalesOfAbsurdityTumblr(GenericTumblrV1):
3737
    """Class to retrieve Tales Of Absurdity comics."""
3738
    # Also on http://talesofabsurdity.com
3739
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
3740
    name = 'absurdity-tumblr'
3741
    long_name = 'Tales of Absurdity (from Tumblr)'
3742
    url = 'http://talesofabsurdity.tumblr.com'
3743
3744
3745
class RobbieAndBobby(GenericTumblrV1):
3746
    """Class to retrieve Robbie And Bobby comics."""
3747
    # Also on http://robbieandbobby.com
3748
    name = 'robbie-tumblr'
3749
    long_name = 'Robbie And Bobby (from Tumblr)'
3750
    url = 'http://robbieandbobby.tumblr.com'
3751
3752
3753
class ElectricBunnyComicTumblr(GenericTumblrV1):
3754
    """Class to retrieve Electric Bunny Comics."""
3755
    # Also on http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell
3756
    name = 'bunny-tumblr'
3757
    long_name = 'Electric Bunny Comic (from Tumblr)'
3758
    url = 'http://electricbunnycomics.tumblr.com'
3759
3760
3761
class Hoomph(GenericTumblrV1):
3762
    """Class to retrieve Hoomph comics."""
3763
    name = 'hoomph'
3764
    long_name = 'Hoomph'
3765
    url = 'http://hoom.ph'
3766
3767
3768
class BFGFSTumblr(GenericTumblrV1):
3769
    """Class to retrieve BFGFS comics."""
3770
    # Also on https://tapastic.com/series/BFGFS
3771
    # Also on http://bfgfs.com
3772
    name = 'bfgfs-tumblr'
3773
    long_name = 'BFGFS (from Tumblr)'
3774
    url = 'http://bfgfs.tumblr.com'
3775
3776
3777
class DoodleForFood(GenericTumblrV1):
3778
    """Class to retrieve Doodle For Food comics."""
3779
    # Also on  http://doodleforfood.com
3780
    name = 'doodle'
3781
    long_name = 'Doodle For Food'
3782
    url = 'http://doodleforfood.com'
3783
3784
3785
class HorovitzComics(GenericListableComic):
3786
    """Generic class to handle the logic common to the different comics from Horovitz."""
3787
    url = 'http://www.horovitzcomics.com'
3788
    img_re = re.compile('.*comics/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
3789
    link_re = NotImplemented
3790
    get_url_from_archive_element = join_cls_url_to_href
3791
3792
    @classmethod
3793
    def get_comic_info(cls, soup, link):
3794
        """Get information about a particular comics."""
3795
        href = link['href']
3796
        num = int(cls.link_re.match(href).groups()[0])
3797
        title = link.string
3798
        imgs = soup.find_all('img', id='comic')
3799
        assert len(imgs) == 1
3800
        year, month, day = [int(s)
3801
                            for s in cls.img_re.match(imgs[0]['src']).groups()]
3802
        return {
3803
            'title': title,
3804
            'day': day,
3805
            'month': month,
3806
            'year': year,
3807
            'img': [i['src'] for i in imgs],
3808
            'num': num,
3809
        }
3810
3811
    @classmethod
3812
    def get_archive_elements(cls):
3813
        archive_url = 'http://www.horovitzcomics.com/comics/archive/'
3814
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.link_re))
3815
3816
3817
class HorovitzNew(HorovitzComics):
3818
    """Class to retrieve Horovitz new comics."""
3819
    name = 'horovitznew'
3820
    long_name = 'Horovitz New'
3821
    link_re = re.compile('^/comics/new/([0-9]+)$')
3822
3823
3824
class HorovitzClassic(HorovitzComics):
3825
    """Class to retrieve Horovitz classic comics."""
3826
    name = 'horovitzclassic'
3827
    long_name = 'Horovitz Classic'
3828
    link_re = re.compile('^/comics/classic/([0-9]+)$')
3829
3830
3831
class GenericGoComic(GenericNavigableComic):
3832
    """Generic class to handle the logic common to comics from gocomics.com."""
3833
    url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)$')
3834
3835
    @classmethod
3836
    def get_first_comic_link(cls):
3837
        """Get link to first comics."""
3838
        return get_soup_at_url(cls.url).find('a', class_='beginning')
3839
3840
    @classmethod
3841
    def get_navi_link(cls, last_soup, next_):
3842
        return last_soup.find('a', class_='next' if next_ else 'prev', href=cls.url_date_re)
3843
3844
    @classmethod
3845
    def get_url_from_link(cls, link):
3846
        gocomics = 'http://www.gocomics.com'
3847
        return urljoin_wrapper(gocomics, link['href'])
3848
3849
    @classmethod
3850
    def get_comic_info(cls, soup, link):
3851
        """Get information about a particular comics."""
3852
        url = cls.get_url_from_link(link)
3853
        year, month, day = [int(s)
3854
                            for s in cls.url_date_re.match(url).groups()]
3855
        return {
3856
            'day': day,
3857
            'month': month,
3858
            'year': year,
3859
            'img': [soup.find_all('img', class_='strip')[-1]['src']],
3860
            'author': soup.find('meta', attrs={'name': 'author'})['content']
3861
        }
3862
3863
3864
class PearlsBeforeSwine(GenericGoComic):
3865
    """Class to retrieve Pearls Before Swine comics."""
3866
    name = 'pearls'
3867
    long_name = 'Pearls Before Swine'
3868
    url = 'http://www.gocomics.com/pearlsbeforeswine'
3869
3870
3871
class Peanuts(GenericGoComic):
3872
    """Class to retrieve Peanuts comics."""
3873
    name = 'peanuts'
3874
    long_name = 'Peanuts'
3875
    url = 'http://www.gocomics.com/peanuts'
3876
3877
3878
class MattWuerker(GenericGoComic):
3879
    """Class to retrieve Matt Wuerker comics."""
3880
    name = 'wuerker'
3881
    long_name = 'Matt Wuerker'
3882
    url = 'http://www.gocomics.com/mattwuerker'
3883
3884
3885
class TomToles(GenericGoComic):
3886
    """Class to retrieve Tom Toles comics."""
3887
    name = 'toles'
3888
    long_name = 'Tom Toles'
3889
    url = 'http://www.gocomics.com/tomtoles'
3890
3891
3892
class BreakOfDay(GenericGoComic):
3893
    """Class to retrieve Break Of Day comics."""
3894
    name = 'breakofday'
3895
    long_name = 'Break Of Day'
3896
    url = 'http://www.gocomics.com/break-of-day'
3897
3898
3899
class Brevity(GenericGoComic):
3900
    """Class to retrieve Brevity comics."""
3901
    name = 'brevity'
3902
    long_name = 'Brevity'
3903
    url = 'http://www.gocomics.com/brevity'
3904
3905
3906
class MichaelRamirez(GenericGoComic):
3907
    """Class to retrieve Michael Ramirez comics."""
3908
    name = 'ramirez'
3909
    long_name = 'Michael Ramirez'
3910
    url = 'http://www.gocomics.com/michaelramirez'
3911
3912
3913
class MikeLuckovich(GenericGoComic):
3914
    """Class to retrieve Mike Luckovich comics."""
3915
    name = 'luckovich'
3916
    long_name = 'Mike Luckovich'
3917
    url = 'http://www.gocomics.com/mikeluckovich'
3918
3919
3920
class JimBenton(GenericGoComic):
3921
    """Class to retrieve Jim Benton comics."""
3922
    # Also on http://jimbenton.tumblr.com
3923
    name = 'benton'
3924
    long_name = 'Jim Benton'
3925
    url = 'http://www.gocomics.com/jim-benton-cartoons'
3926
3927
3928
class TheArgyleSweater(GenericGoComic):
3929
    """Class to retrieve the Argyle Sweater comics."""
3930
    name = 'argyle'
3931
    long_name = 'Argyle Sweater'
3932
    url = 'http://www.gocomics.com/theargylesweater'
3933
3934
3935
class SunnyStreet(GenericGoComic):
3936
    """Class to retrieve Sunny Street comics."""
3937
    # Also on http://www.sunnystreetcomics.com
3938
    name = 'sunny'
3939
    long_name = 'Sunny Street'
3940
    url = 'http://www.gocomics.com/sunny-street'
3941
3942
3943
class OffTheMark(GenericGoComic):
3944
    """Class to retrieve Off The Mark comics."""
3945
    # Also on https://www.offthemark.com
3946
    name = 'offthemark'
3947
    long_name = 'Off The Mark'
3948
    url = 'http://www.gocomics.com/offthemark'
3949
3950
3951
class WuMo(GenericGoComic):
3952
    """Class to retrieve WuMo comics."""
3953
    # Also on http://wumo.com
3954
    name = 'wumo'
3955
    long_name = 'WuMo'
3956
    url = 'http://www.gocomics.com/wumo'
3957
3958
3959
class LunarBaboon(GenericGoComic):
3960
    """Class to retrieve Lunar Baboon comics."""
3961
    # Also on http://www.lunarbaboon.com
3962
    # Also on https://tapastic.com/series/Lunarbaboon
3963
    name = 'lunarbaboon'
3964
    long_name = 'Lunar Baboon'
3965
    url = 'http://www.gocomics.com/lunarbaboon'
3966
3967
3968
class SandersenGocomic(GenericGoComic):
3969
    """Class to retrieve Sarah Andersen comics."""
3970
    # Also on http://sarahcandersen.com
3971
    # Also on http://tapastic.com/series/Doodle-Time
3972
    name = 'sandersen-goc'
3973
    long_name = 'Sarah Andersen (from GoComics)'
3974
    url = 'http://www.gocomics.com/sarahs-scribbles'
3975
3976
3977
class CalvinAndHobbesGoComic(GenericGoComic):
3978
    """Class to retrieve Calvin and Hobbes comics."""
3979
    # From gocomics, not http://marcel-oehler.marcellosendos.ch/comics/ch/
3980
    name = 'calvin-goc'
3981
    long_name = 'Calvin and Hobbes (from GoComics)'
3982
    url = 'http://www.gocomics.com/calvinandhobbes'
3983
3984
3985
class RallGoComic(GenericGoComic):
3986
    """Class to retrieve Ted Rall comics."""
3987
    # Also on http://rall.com/comic
3988
    name = 'rall-goc'
3989
    long_name = "Ted Rall (from GoComics)"
3990
    url = "http://www.gocomics.com/tedrall"
3991
3992
3993
class TheAwkwardYetiGoComic(GenericGoComic):
3994
    """Class to retrieve The Awkward Yeti comics."""
3995
    # Also on http://larstheyeti.tumblr.com
3996
    # Also on http://theawkwardyeti.com
3997
    # Also on https://tapastic.com/series/TheAwkwardYeti
3998
    name = 'yeti-goc'
3999
    long_name = 'The Awkward Yeti (from GoComics)'
4000
    url = 'http://www.gocomics.com/the-awkward-yeti'
4001
4002
4003
class BerkeleyMewsGoComics(GenericGoComic):
4004
    """Class to retrieve Berkeley Mews comics."""
4005
    # Also on http://mews.tumblr.com
4006
    # Also on http://www.berkeleymews.com
4007
    name = 'berkeley-goc'
4008
    long_name = 'Berkeley Mews (from GoComics)'
4009
    url = 'http://www.gocomics.com/berkeley-mews'
4010
4011
4012
class SheldonGoComics(GenericGoComic):
4013
    """Class to retrieve Sheldon comics."""
4014
    # Also on http://www.sheldoncomics.com
4015
    name = 'sheldon-goc'
4016
    long_name = 'Sheldon Comics (from GoComics)'
4017
    url = 'http://www.gocomics.com/sheldon'
4018
4019
4020
class FowlLanguageGoComics(GenericGoComic):
4021
    """Class to retrieve Fowl Language comics."""
4022
    # Also on http://www.fowllanguagecomics.com
4023
    # Also on http://tapastic.com/series/Fowl-Language-Comics
4024
    # Also on http://fowllanguagecomics.tumblr.com
4025
    name = 'fowllanguage-goc'
4026
    long_name = 'Fowl Language Comics (from GoComics)'
4027
    url = 'http://www.gocomics.com/fowl-language'
4028
4029
4030
class NickAnderson(GenericGoComic):
4031
    """Class to retrieve Nick Anderson comics."""
4032
    name = 'nickanderson'
4033
    long_name = 'Nick Anderson'
4034
    url = 'http://www.gocomics.com/nickanderson'
4035
4036
4037
class GarfieldGoComics(GenericGoComic):
4038
    """Class to retrieve Garfield comics."""
4039
    # Also on http://garfield.com
4040
    name = 'garfield-goc'
4041
    long_name = 'Garfield (from GoComics)'
4042
    url = 'http://www.gocomics.com/garfield'
4043
4044
4045
class DorrisMcGoComics(GenericGoComic):
4046
    """Class to retrieve Dorris Mc Comics"""
4047
    # Also on http://dorrismccomics.com
4048
    name = 'dorrismc-goc'
4049
    long_name = 'Dorris Mc (from GoComics)'
4050
    url = 'http://www.gocomics.com/dorris-mccomics'
4051
4052
4053
class MisterAndMeGoComics(GenericGoComic):
4054
    """Class to retrieve Mister & Me Comics."""
4055
    # Also on http://www.mister-and-me.com
4056
    # Also on https://tapastic.com/series/Mister-and-Me
4057
    name = 'mister-goc'
4058
    long_name = 'Mister & Me (from GoComics)'
4059
    url = 'http://www.gocomics.com/mister-and-me'
4060
4061
4062
class GenericTapasticComic(GenericListableComic):
4063
    """Generic class to handle the logic common to comics from tapastic.com."""
4064
4065
    @classmethod
4066
    def get_comic_info(cls, soup, archive_elt):
4067
        """Get information about a particular comics."""
4068
        timestamp = int(archive_elt['publishDate']) / 1000.0
4069
        day = datetime.datetime.fromtimestamp(timestamp).date()
4070
        imgs = soup.find_all('img', class_='art-image')
4071
        if not imgs:
4072
            print("Comic %s is being uploaded, retry later" % cls.get_url_from_archive_element(archive_elt))
4073
            return None
4074
        assert len(imgs) > 0
4075
        return {
4076
            'day': day.day,
4077
            'year': day.year,
4078
            'month': day.month,
4079
            'img': [i['src'] for i in imgs],
4080
            'title': archive_elt['title'],
4081
        }
4082
4083
    @classmethod
4084
    def get_url_from_archive_element(cls, archive_elt):
4085
        return 'http://tapastic.com/episode/' + str(archive_elt['id'])
4086
4087
    @classmethod
4088
    def get_archive_elements(cls):
4089
        pref, suff = 'episodeList : ', ','
4090
        # Information is stored in the javascript part
4091
        # I don't know the clean way to get it so this is the ugly way.
4092
        string = [s[len(pref):-len(suff)] for s in (s.decode('utf-8').strip() for s in urlopen_wrapper(cls.url).readlines()) if s.startswith(pref) and s.endswith(suff)][0]
4093
        return json.loads(string)
4094
4095
4096
class VegetablesForDessert(GenericTapasticComic):
4097
    """Class to retrieve Vegetables For Dessert comics."""
4098
    # Also on http://vegetablesfordessert.tumblr.com
4099
    name = 'vegetables'
4100
    long_name = 'Vegetables For Dessert'
4101
    url = 'http://tapastic.com/series/vegetablesfordessert'
4102
4103
4104
class FowlLanguageTapa(GenericTapasticComic):
4105
    """Class to retrieve Fowl Language comics."""
4106
    # Also on http://www.fowllanguagecomics.com
4107
    # Also on http://fowllanguagecomics.tumblr.com
4108
    # Also on http://www.gocomics.com/fowl-language
4109
    name = 'fowllanguage-tapa'
4110
    long_name = 'Fowl Language Comics (from Tapastic)'
4111
    url = 'http://tapastic.com/series/Fowl-Language-Comics'
4112
4113
4114
class OscillatingProfundities(GenericTapasticComic):
4115
    """Class to retrieve Oscillating Profundities comics."""
4116
    name = 'oscillating'
4117
    long_name = 'Oscillating Profundities'
4118
    url = 'http://tapastic.com/series/oscillatingprofundities'
4119
4120
4121
class ZnoflatsComics(GenericTapasticComic):
4122
    """Class to retrieve Znoflats comics."""
4123
    name = 'znoflats'
4124
    long_name = 'Znoflats Comics'
4125
    url = 'http://tapastic.com/series/Znoflats-Comics'
4126
4127
4128
class SandersenTapastic(GenericTapasticComic):
4129
    """Class to retrieve Sarah Andersen comics."""
4130
    # Also on http://sarahcandersen.com
4131
    # Also on http://www.gocomics.com/sarahs-scribbles
4132
    name = 'sandersen-tapa'
4133
    long_name = 'Sarah Andersen (from Tapastic)'
4134
    url = 'http://tapastic.com/series/Doodle-Time'
4135
4136
4137
class TubeyToonsTapastic(GenericTapasticComic):
4138
    """Class to retrieve TubeyToons comics."""
4139
    # Also on http://tubeytoons.com
4140
    # Also on http://tubeytoons.tumblr.com
4141
    name = 'tubeytoons-tapa'
4142
    long_name = 'Tubey Toons (from Tapastic)'
4143
    url = 'http://tapastic.com/series/Tubey-Toons'
4144
4145
4146
class AnythingComicTapastic(GenericTapasticComic):
4147
    """Class to retrieve Anything Comics."""
4148
    # Also on http://www.anythingcomic.com
4149
    name = 'anythingcomic-tapa'
4150
    long_name = 'Anything Comic (from Tapastic)'
4151
    url = 'http://tapastic.com/series/anything'
4152
4153
4154
class UnearthedComicsTapastic(GenericTapasticComic):
4155
    """Class to retrieve Unearthed comics."""
4156
    # Also on http://unearthedcomics.com
4157
    # Also on http://unearthedcomics.tumblr.com
4158
    name = 'unearthed-tapa'
4159
    long_name = 'Unearthed Comics (from Tapastic)'
4160
    url = 'http://tapastic.com/series/UnearthedComics'
4161
4162
4163
class EverythingsStupidTapastic(GenericTapasticComic):
4164
    """Class to retrieve Everything's stupid Comics."""
4165
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
4166
    # Also on http://everythingsstupid.net
4167
    name = 'stupid-tapa'
4168
    long_name = "Everything's Stupid (from Tapastic)"
4169
    url = 'http://tapastic.com/series/EverythingsStupid'
4170
4171
4172
class JustSayEhTapastic(GenericTapasticComic):
4173
    """Class to retrieve Just Say Eh comics."""
4174
    # Also on http://www.justsayeh.com
4175
    name = 'justsayeh-tapa'
4176
    long_name = 'Just Say Eh (from Tapastic)'
4177
    url = 'http://tapastic.com/series/Just-Say-Eh'
4178
4179
4180
class ThorsThundershackTapastic(GenericTapasticComic):
4181
    """Class to retrieve Thor's Thundershack comics."""
4182
    # Also on http://www.thorsthundershack.com
4183
    name = 'thor-tapa'
4184
    long_name = 'Thor\'s Thundershack (from Tapastic)'
4185
    url = 'http://tapastic.com/series/Thors-Thundershac'
4186
4187
4188
class OwlTurdTapastic(GenericTapasticComic):
4189
    """Class to retrieve Owl Turd comics."""
4190
    # Also on http://owlturd.com
4191
    name = 'owlturd-tapa'
4192
    long_name = 'Owl Turd (from Tapastic)'
4193
    url = 'http://tapastic.com/series/Owl-Turd-Comix'
4194
4195
4196
class GoneIntoRaptureTapastic(GenericTapasticComic):
4197
    """Class to retrieve Gone Into Rapture comics."""
4198
    # Also on http://goneintorapture.tumblr.com
4199
    # Also on http://www.goneintorapture.com
4200
    name = 'rapture-tapa'
4201
    long_name = 'Gone Into Rapture (from Tapastic)'
4202
    url = 'http://tapastic.com/series/Goneintorapture'
4203
4204
4205
class HeckIfIKnowComicsTapa(GenericTapasticComic):
4206
    """Class to retrieve Heck If I Know Comics."""
4207
    # Also on http://heckifiknowcomics.com
4208
    name = 'heck-tapa'
4209
    long_name = 'Heck if I Know comics (from Tapastic)'
4210
    url = 'http://tapastic.com/series/Regular'
4211
4212
4213
class CheerUpEmoKidTapa(GenericTapasticComic):
4214
    """Class to retrieve CheerUpEmoKid comics."""
4215
    # Also on http://www.cheerupemokid.com
4216
    # Also on http://enzocomics.tumblr.com
4217
    name = 'cuek-tapa'
4218
    long_name = 'Cheer Up Emo Kid (from Tapastic)'
4219
    url = 'http://tapastic.com/series/CUEK'
4220
4221
4222
class BigFootJusticeTapa(GenericTapasticComic):
4223
    """Class to retrieve Big Foot Justice comics."""
4224
    # Also on http://bigfootjustice.com
4225
    name = 'bigfoot-tapa'
4226
    long_name = 'Big Foot Justice (from Tapastic)'
4227
    url = 'http://tapastic.com/series/bigfoot-justice'
4228
4229
4230
class UpAndOutTapa(GenericTapasticComic):
4231
    """Class to retrieve Up & Out comics."""
4232
    # Also on http://upandoutcomic.tumblr.com
4233
    name = 'upandout-tapa'
4234
    long_name = 'Up And Out (from Tapastic)'
4235
    url = 'http://tapastic.com/series/UP-and-OUT'
4236
4237
4238
class ToonHoleTapa(GenericTapasticComic):
4239
    """Class to retrieve Toon Holes comics."""
4240
    # Also on http://www.toonhole.com
4241
    name = 'toonhole-tapa'
4242
    long_name = 'Toon Hole (from Tapastic)'
4243
    url = 'http://tapastic.com/series/TOONHOLE'
4244
4245
4246
class AngryAtNothingTapa(GenericTapasticComic):
4247
    """Class to retrieve Angry at Nothing comics."""
4248
    # Also on http://www.angryatnothing.net
4249
    name = 'angry-tapa'
4250
    long_name = 'Angry At Nothing (from Tapastic)'
4251
    url = 'http://tapastic.com/series/Comics-yeah-definitely-comics-'
4252
4253
4254
class LeleozTapa(GenericTapasticComic):
4255
    """Class to retrieve Leleoz comics."""
4256
    # Also on http://leleozcomics.tumblr.com
4257
    name = 'leleoz-tapa'
4258
    long_name = 'Leleoz (from Tapastic)'
4259
    url = 'https://tapastic.com/series/Leleoz'
4260
4261
4262
class TheAwkwardYetiTapa(GenericTapasticComic):
4263
    """Class to retrieve The Awkward Yeti comics."""
4264
    # Also on http://www.gocomics.com/the-awkward-yeti
4265
    # Also on http://theawkwardyeti.com
4266
    # Also on http://larstheyeti.tumblr.com
4267
    name = 'yeti-tapa'
4268
    long_name = 'The Awkward Yeti (from Tapastic)'
4269
    url = 'https://tapastic.com/series/TheAwkwardYeti'
4270
4271
4272
class AsPerUsualTapa(GenericTapasticComic):
4273
    """Class to retrieve As Per Usual comics."""
4274
    # Also on http://as-per-usual.tumblr.com
4275
    name = 'usual-tapa'
4276
    long_name = 'As Per Usual (from Tapastic)'
4277
    url = 'https://tapastic.com/series/AsPerUsual'
4278
4279
4280
class OneOneOneOneComicTapa(GenericTapasticComic):
4281
    """Class to retrieve 1111 Comics."""
4282
    # Also on http://www.1111comics.me
4283
    # Also on http://comics1111.tumblr.com
4284
    name = '1111-tapa'
4285
    long_name = '1111 Comics (from Tapastic)'
4286
    url = 'https://tapastic.com/series/1111-Comics'
4287
4288
4289
class TumbleDryTapa(GenericTapasticComic):
4290
    """Class to retrieve Tumble Dry comics."""
4291
    # Also on http://tumbledrycomics.com
4292
    name = 'tumbledry-tapa'
4293
    long_name = 'Tumblr Dry (from Tapastic)'
4294
    url = 'https://tapastic.com/series/TumbleDryComics'
4295
4296
4297
class DeadlyPanelTapa(GenericTapasticComic):
4298
    """Class to retrieve Deadly Panel comics."""
4299
    # Also on http://www.deadlypanel.com
4300
    name = 'deadly-tapa'
4301
    long_name = 'Deadly Panel (from Tapastic)'
4302
    url = 'https://tapastic.com/series/deadlypanel'
4303
4304
4305
class ChrisHallbeckMaxiTapa(GenericTapasticComic):
4306
    """Class to retrieve Chris Hallbeck comics."""
4307
    # Also on http://chrishallbeck.tumblr.com
4308
    # Also on http://maximumble.com
4309
    name = 'hallbeckmaxi-tapa'
4310
    long_name = 'Chris Hallback - Maximumble (from Tapastic)'
4311
    url = 'https://tapastic.com/series/Maximumble'
4312
4313
4314
class ChrisHallbeckMiniTapa(GenericTapasticComic):
4315
    """Class to retrieve Chris Hallbeck comics."""
4316
    # Also on http://chrishallbeck.tumblr.com
4317
    # Also on http://minimumble.com
4318
    name = 'hallbeckmini-tapa'
4319
    long_name = 'Chris Hallback - Minimumble (from Tapastic)'
4320
    url = 'https://tapastic.com/series/Minimumble'
4321
4322
4323
class ChrisHallbeckBiffTapa(GenericTapasticComic):
4324
    """Class to retrieve Chris Hallbeck comics."""
4325
    # Also on http://chrishallbeck.tumblr.com
4326
    # Also on http://thebookofbiff.com
4327
    name = 'hallbeckbiff-tapa'
4328
    long_name = 'Chris Hallback - The Book of Biff (from Tapastic)'
4329
    url = 'https://tapastic.com/series/Biff'
4330
4331
4332
class RandoWisTapa(GenericTapasticComic):
4333
    """Class to retrieve RandoWis comics."""
4334
    # Also on https://randowis.com
4335
    name = 'randowis-tapa'
4336
    long_name = 'RandoWis (from Tapastic)'
4337
    url = 'https://tapastic.com/series/RandoWis'
4338
4339
4340
class PigeonGazetteTapa(GenericTapasticComic):
4341
    """Class to retrieve The Pigeon Gazette comics."""
4342
    # Also on http://thepigeongazette.tumblr.com
4343
    name = 'pigeon-tapa'
4344
    long_name = 'The Pigeon Gazette (from Tapastic)'
4345
    url = 'https://tapastic.com/series/The-Pigeon-Gazette'
4346
4347
4348
class TheOdd1sOutTapa(GenericTapasticComic):
4349
    """Class to retrieve The Odd 1s Out comics."""
4350
    # Also on http://theodd1sout.com
4351
    # Also on http://theodd1sout.tumblr.com
4352
    name = 'theodd-tapa'
4353
    long_name = 'The Odd 1s Out (from Tapastic)'
4354
    url = 'https://tapastic.com/series/Theodd1sout'
4355
4356
4357
class TheWorldIsFlatTapa(GenericTapasticComic):
4358
    """Class to retrieve The World Is Flat Comics."""
4359
    # Also on http://theworldisflatcomics.tumblr.com
4360
    name = 'flatworld-tapa'
4361
    long_name = 'The World Is Flat (from Tapastic)'
4362
    url = 'https://tapastic.com/series/The-World-is-Flat'
4363
4364
4365
class MisterAndMeTapa(GenericTapasticComic):
4366
    """Class to retrieve Mister & Me Comics."""
4367
    # Also on http://www.mister-and-me.com
4368
    # Also on http://www.gocomics.com/mister-and-me
4369
    name = 'mister-tapa'
4370
    long_name = 'Mister & Me (from Tapastic)'
4371
    url = 'https://tapastic.com/series/Mister-and-Me'
4372
4373
4374
class TalesOfAbsurdityTapa(GenericTapasticComic):
4375
    """Class to retrieve Tales Of Absurdity comics."""
4376
    # Also on http://talesofabsurdity.com
4377
    # Also on http://talesofabsurdity.tumblr.com
4378
    name = 'absurdity-tapa'
4379
    long_name = 'Tales of Absurdity (from Tapastic)'
4380
    url = 'http://tapastic.com/series/Tales-Of-Absurdity'
4381
4382
4383
class BFGFSTapa(GenericTapasticComic):
4384
    """Class to retrieve BFGFS comics."""
4385
    # Also on http://bfgfs.com
4386
    # Also on http://bfgfs.tumblr.com
4387
    name = 'bfgfs-tapa'
4388
    long_name = 'BFGFS (from Tapastic)'
4389
    url = 'https://tapastic.com/series/BFGFS'
4390
4391
4392
class DoodleForFoodTapa(GenericTapasticComic):
4393
    """Class to retrieve Doodle For Food comics."""
4394
    # Also on http://doodleforfood.com
4395
    name = 'doodle-tapa'
4396
    long_name = 'Doodle For Food (from Tapastic)'
4397
    url = 'https://tapastic.com/series/Doodle-for-Food'
4398
4399
4400
class MrLovensteinTapa(GenericTapasticComic):
4401
    """Class to retrieve Mr Lovenstein comics."""
4402
    # Also on  https://tapastic.com/series/MrLovenstein
4403
    name = 'mrlovenstein-tapa'
4404
    long_name = 'Mr. Lovenstein (from Tapastic)'
4405
    url = 'https://tapastic.com/series/MrLovenstein'
4406
4407
4408
def get_subclasses(klass):
4409
    """Gets the list of direct/indirect subclasses of a class"""
4410
    subclasses = klass.__subclasses__()
4411
    for derived in list(subclasses):
4412
        subclasses.extend(get_subclasses(derived))
4413
    return subclasses
4414
4415
4416
def remove_st_nd_rd_th_from_date(string):
4417
    """Function to transform 1st/2nd/3rd/4th in a parsable date format."""
4418
    # Hackish way to convert string with numeral "1st"/"2nd"/etc to date
4419
    return (string.replace('st', '')
4420
            .replace('nd', '')
4421
            .replace('rd', '')
4422
            .replace('th', '')
4423
            .replace('Augu', 'August'))
4424
4425
4426
def string_to_date(string, date_format, local=DEFAULT_LOCAL):
4427
    """Function to convert string to date object.
4428
    Wrapper around datetime.datetime.strptime."""
4429
    # format described in https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior
4430
    prev_locale = locale.setlocale(locale.LC_ALL)
4431
    if local != prev_locale:
4432
        locale.setlocale(locale.LC_ALL, local)
4433
    ret = datetime.datetime.strptime(string, date_format).date()
4434
    if local != prev_locale:
4435
        locale.setlocale(locale.LC_ALL, prev_locale)
4436
    return ret
4437
4438
4439
COMICS = set(get_subclasses(GenericComic))
4440
VALID_COMICS = [c for c in COMICS if c.name is not None]
4441
COMIC_NAMES = {c.name: c for c in VALID_COMICS}
4442
assert len(VALID_COMICS) == len(COMIC_NAMES)
4443
CLASS_NAMES = {c.__name__ for c in VALID_COMICS}
4444
assert len(VALID_COMICS) == len(CLASS_NAMES)
4445