Completed
Push — master ( cd1478...27ec9a )
by De
01:16 queued 10s
created

comics.py (2 issues)

1
#! /usr/bin/python3
2
# vim: set expandtab tabstop=4 shiftwidth=4 :
3
"""Module to retrieve webcomics"""
4
5
from comic_abstract import GenericComic, get_date_for_comic
6
import re
7
from datetime import date, timedelta
8
import datetime
9
from urlfunctions import get_soup_at_url, urljoin_wrapper,\
10
    convert_iri_to_plain_ascii_uri, load_json_at_url, urlopen_wrapper
11
import json
12
import locale
13
import urllib
14
15
DEFAULT_LOCAL = 'en_GB.UTF-8'
16
17
18
class Xkcd(GenericComic):
19
    """Class to retrieve Xkcd comics."""
20
    name = 'xkcd'
21
    long_name = 'xkcd'
22
    url = 'http://xkcd.com'
23
24
    @classmethod
25
    def get_next_comic(cls, last_comic):
26
        first_num = last_comic['num'] if last_comic else 0
27
        last_num = load_json_at_url(
28
            urljoin_wrapper(cls.url, 'info.0.json'))['num']
29
30
        for num in range(first_num + 1, last_num + 1):
31
            if num != 404:
32
                json_url = urljoin_wrapper(cls.url, '%d/info.0.json' % num)
33
                comic = load_json_at_url(json_url)
34
                comic['img'] = [comic['img']]
35
                comic['prefix'] = '%d-' % num
36
                comic['json_url'] = json_url
37
                comic['url'] = urljoin_wrapper(cls.url, str(num))
38
                comic['day'] = int(comic['day'])
39
                comic['month'] = int(comic['month'])
40
                comic['year'] = int(comic['year'])
41
                assert comic['num'] == num
42
                yield comic
43
44
45
# Helper functions corresponding to get_url_from_link/get_url_from_archive_element
46
47
48
@classmethod
49
def get_href(cls, link):
50
    """Implementation of get_url_from_link/get_url_from_archive_element."""
51
    return link['href']
52
53
54
@classmethod
55
def join_cls_url_to_href(cls, link):
56
    """Implementation of get_url_from_link/get_url_from_archive_element."""
57
    return urljoin_wrapper(cls.url, link['href'])
58
59
60
class GenericNavigableComic(GenericComic):
61
    """Generic class for "navigable" comics : with first/next arrows.
62
63
    The method `get_next_comic` methods is implemented in terms of new
64
    more specialized methods to be implemented/overridden:
65
        - get_first_comic_link
66
        - get_navi_link
67
        - get_comic_info
68
        - get_url_from_link
69
    """
70
71
    @classmethod
72
    def get_first_comic_link(cls):
73
        """Get link to first comics.
74
75
        Sometimes this can be retrieved of any comic page, sometimes on
76
        the archive page, sometimes it doesn't exist at all and one has
77
        to iterate backward to find it before hardcoding the result found.
78
        """
79
        raise NotImplementedError
80
81
    @classmethod
82
    def get_navi_link(cls, last_soup, next_):
83
        """Get link to next (or previous - for dev purposes) comic."""
84
        raise NotImplementedError
85
86
    @classmethod
87
    def get_comic_info(cls, soup, link):
88
        """Get information about a particular comics."""
89
        raise NotImplementedError
90
91
    @classmethod
92
    def get_url_from_link(cls, link):
93
        """Get url corresponding to a link. Default implementation is similar to get_href."""
94
        return link['href']
95
96
    @classmethod
97
    def get_next_link(cls, last_soup):
98
        """Get link to next comic."""
99
        return cls.get_navi_link(last_soup, True)
100
101
    @classmethod
102
    def get_prev_link(cls, last_soup):
103
        """Get link to previous comic."""
104
        return cls.get_navi_link(last_soup, False)
105
106
    @classmethod
107
    def get_next_comic(cls, last_comic):
108
        """Generic implementation of get_next_comic for navigable comics."""
109
        url = last_comic['url'] if last_comic else None
110
        next_comic = \
111
            cls.get_next_link(get_soup_at_url(url)) \
112
            if url else \
113
            cls.get_first_comic_link()
114
        cls.log("next/first comic will be %s (url is %s)" % (str(next_comic), url))
115
        while next_comic:
116
            prev_url, url = url, cls.get_url_from_link(next_comic)
117
            if prev_url == url:
118
                cls.log("got same url %s" % url)
119
                break
120
            cls.log("about to get %s (%s)" % (url, str(next_comic)))
121
            soup = get_soup_at_url(url)
122
            comic = cls.get_comic_info(soup, next_comic)
123
            if comic is not None:
124
                assert 'url' not in comic
125
                comic['url'] = url
126
                yield comic
127
            next_comic = cls.get_next_link(soup)
128
            cls.log("next comic will be %s" % str(next_comic))
129
130
    @classmethod
131
    def check_first_link(cls):
132
        """Check that navigation to first comic seems to be working - for dev purposes."""
133
        cls.log("about to check first link")
134
        ok = True
135
        firstlink = cls.get_first_comic_link()
136
        if firstlink is None:
137
            print("From %s : no first link" % cls.url)
138
            ok = False
139
        else:
140
            firsturl = cls.get_url_from_link(firstlink)
141
            try:
142
                get_soup_at_url(firsturl)
143
            except urllib.error.HTTPError:
144
                print("From %s : invalid first url" % cls.url)
145
                ok = False
146
        cls.log("checked first link -> returned %d" % ok)
147
        return ok
148
149
    @classmethod
150
    def check_prev_next_links(cls, url):
151
        """Check that navigation to prev/next from a given URL seems to be working - for dev purposes."""
152
        cls.log("about to check prev/next from %s" % url)
153
        ok = True
154
        if url is None:
155
            prevlink, nextlink = None, None
156
        else:
157
            soup = get_soup_at_url(url)
158
            prevlink, nextlink = cls.get_prev_link(soup), cls.get_next_link(soup)
159
        if prevlink is None and nextlink is None:
160
            print("From %s : no previous nor next" % url)
161
            ok = False
162
        else:
163
            if prevlink:
164
                prevurl = cls.get_url_from_link(prevlink)
165
                prevsoup = get_soup_at_url(prevurl)
166
                prevnext = cls.get_url_from_link(cls.get_next_link(prevsoup))
167
                if prevnext != url:
168
                    print("From %s, going backward then forward leads to %s" % (url, prevnext))
169
                    ok = False
170
            if nextlink:
171
                nexturl = cls.get_url_from_link(nextlink)
172
                if nexturl != url:
173
                    nextsoup = get_soup_at_url(nexturl)
174
                    nextprev = cls.get_url_from_link(cls.get_prev_link(nextsoup))
175
                    if nextprev != url:
176
                        print("From %s, going forward then backward leads to %s" % (url, nextprev))
177
                        ok = False
178
        cls.log("checked prev/next from %s -> returned %d" % (url, ok))
179
        return ok
180
181
    @classmethod
182
    def check_navigation(cls, url):
183
        """Check that navigation functions seem to be working - for dev purposes."""
184
        cls.log("about to check navigation from %s" % url)
185
        first = cls.check_first_link()
186
        prevnext = cls.check_prev_next_links(url)
187
        ok = first and prevnext
188
        cls.log("checked navigation from %s -> returned %d" % (url, ok))
189
        return ok
190
191
    # This method is not defined by default and is not part of this class'API.
192
    # It is only used:
193
    # - during development
194
    # - in subclasses implementing it correctly
195
    if False:
196
        @classmethod
197
        def get_first_comic_url(cls):
198
            """Get first comic url
199
200
            Sometimes, the first comic cannot be reached directly so to start
201
            from the first comic one has to go to the previous comic until
202
            there is no previous comics. Once this URL is reached, it
203
            is better to hardcode it but for development purposes, it
204
            is convenient to have an automatic way to find it.
205
            """
206
            url = input("Get starting URL: ")
207
            print(url)
208
            comic = cls.get_prev_link(get_soup_at_url(url))
209
            while comic:
210
                url = cls.get_url_from_link(comic)
211
                print(url)
212
                comic = cls.get_prev_link(get_soup_at_url(url))
213
            return url
214
215
216
class GenericListableComic(GenericComic):
217
    """Generic class for "listable" comics : with a list of comics (aka 'archive')
218
219
    The method `get_next_comic` methods is implemented in terms of new
220
    more specialized methods to be implemented/overridden:
221
        - get_archive_elements
222
        - get_url_from_archive_element
223
        - get_comic_info
224
    """
225
226
    @classmethod
227
    def get_archive_elements(cls):
228
        """Get the archive elements (iterable)."""
229
        raise NotImplementedError
230
231
    @classmethod
232
    def get_url_from_archive_element(cls, archive_elt):
233
        """Get url corresponding to an archive element."""
234
        raise NotImplementedError
235
236
    @classmethod
237
    def get_comic_info(cls, soup, archive_elt):
238
        """Get information about a particular comics."""
239
        raise NotImplementedError
240
241
    @classmethod
242
    def get_next_comic(cls, last_comic):
243
        """Generic implementation of get_next_comic for listable comics."""
244
        waiting_for_url = last_comic['url'] if last_comic else None
245
        for archive_elt in cls.get_archive_elements():
246
            url = cls.get_url_from_archive_element(archive_elt)
247
            cls.log("considering %s" % url)
248
            if waiting_for_url and waiting_for_url == url:
249
                waiting_for_url = None
250
            elif waiting_for_url is None:
251
                cls.log("about to get %s (%s)" % (url, str(archive_elt)))
252
                soup = get_soup_at_url(url)
253
                comic = cls.get_comic_info(soup, archive_elt)
254
                if comic is not None:
255
                    assert 'url' not in comic
256
                    comic['url'] = url
257
                    yield comic
258
        if waiting_for_url is not None:
259
            print("Did not find %s : there might be a problem" % waiting_for_url)
260
261
# Helper functions corresponding to get_first_comic_link/get_navi_link
262
263
264
@classmethod
265
def get_link_rel_next(cls, last_soup, next_):
266
    """Implementation of get_navi_link."""
267
    return last_soup.find('link', rel='next' if next_ else 'prev')
268
269
270
@classmethod
271
def get_a_rel_next(cls, last_soup, next_):
272
    """Implementation of get_navi_link."""
273
    return last_soup.find('a', rel='next' if next_ else 'prev')
274
275
276
@classmethod
277
def get_a_navi_navinext(cls, last_soup, next_):
278
    """Implementation of get_navi_link."""
279
    return last_soup.find('a', class_='navi navi-next' if next_ else 'navi navi-prev')
280
281
282
@classmethod
283
def get_a_navi_comicnavnext_navinext(cls, last_soup, next_):
284
    """Implementation of get_navi_link."""
285
    return last_soup.find('a', class_='navi comic-nav-next navi-next' if next_ else 'navi comic-nav-previous navi-prev')
286
287
288
@classmethod
289
def get_a_comicnavbase_comicnavnext(cls, last_soup, next_):
290
    """Implementation of get_navi_link."""
291
    return last_soup.find('a', class_='comic-nav-base comic-nav-next' if next_ else 'comic-nav-base comic-nav-previous')
292
293
294
@classmethod
295
def get_a_navi_navifirst(cls):
296
    """Implementation of get_first_comic_link."""
297
    return get_soup_at_url(cls.url).find('a', class_='navi navi-first')
298
299
300
@classmethod
301
def get_div_navfirst_a(cls):
302
    """Implementation of get_first_comic_link."""
303
    return get_soup_at_url(cls.url).find('div', class_="nav-first").find('a')
304
305
306
@classmethod
307
def get_a_comicnavbase_comicnavfirst(cls):
308
    """Implementation of get_first_comic_link."""
309
    return get_soup_at_url(cls.url).find('a', class_='comic-nav-base comic-nav-first')
310
311
312
@classmethod
313
def simulate_first_link(cls):
314
    """Implementation of get_first_comic_link creating a link-like object from
315
    an URL provided by the class."""
316
    return {'href': cls.first_url}
317
318
319
@classmethod
320
def navigate_to_first_comic(cls):
321
    """Implementation of get_first_comic_link navigating from a user provided
322
    URL to the first comic.
323
324
    Sometimes, the first comic cannot be reached directly so to start
325
    from the first comic one has to go to the previous comic until
326
    there is no previous comics. Once this URL is reached, it
327
    is better to hardcode it but for development purposes, it
328
    is convenient to have an automatic way to find it.
329
    """
330
    url = input("Get starting URL: ")
331
    print(url)
332
    comic = cls.get_prev_link(get_soup_at_url(url))
333
    while comic:
334
        url = cls.get_url_from_link(comic)
335
        print(url)
336
        comic = cls.get_prev_link(get_soup_at_url(url))
337
    return {'href': url}
338
339
340
class GenericEmptyComic(GenericComic):
341
    """Generic class for comics where nothing is to be done.
342
343
    It can be useful to deactivate temporarily comics that do not work
344
    properly by replacing `def MyComic(GenericWhateverComic)` with
345
    `def MyComic(GenericEmptyComic, GenericWhateverComic)`."""
346
347
    @classmethod
348
    def get_next_comic(cls, last_comic):
349
        cls.log("comic is considered as empty - returning no comic")
350
        return []
351
352
353
class ExtraFabulousComics(GenericNavigableComic):
354
    """Class to retrieve Extra Fabulous Comics."""
355
    name = 'efc'
356
    long_name = 'Extra Fabulous Comics'
357
    url = 'http://extrafabulouscomics.com'
358
    get_navi_link = get_link_rel_next
359
360
    @classmethod
361
    def get_first_comic_link(cls):
362
        """Get link to first comics."""
363
        return get_soup_at_url(cls.url).find('a', title='FIRST')
364
365
    @classmethod
366
    def get_comic_info(cls, soup, link):
367
        """Get information about a particular comics."""
368
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
369
        imgs = soup.find_all('img', src=img_src_re)
370
        title = soup.find('h2', class_='post-title').string
371
        return {
372
            'title': title,
373
            'img': [i['src'] for i in imgs],
374
            'prefix': title + '-'
375
        }
376
377
378 View Code Duplication
class GenericLeMondeBlog(GenericNavigableComic):
379
    """Generic class to retrieve comics from Le Monde blogs."""
380
    get_navi_link = get_link_rel_next
381
    get_first_comic_link = simulate_first_link
382
    first_url = NotImplemented
383
384
    @classmethod
385
    def get_comic_info(cls, soup, link):
386
        """Get information about a particular comics."""
387
        url2 = soup.find('link', rel='shortlink')['href']
388
        title = soup.find('meta', property='og:title')['content']
389
        date_str = soup.find("span", class_="entry-date").string
390
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
391
        imgs = soup.find_all('meta', property='og:image')
392
        return {
393
            'title': title,
394
            'url2': url2,
395
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
396
            'month': day.month,
397
            'year': day.year,
398
            'day': day.day,
399
        }
400
401
402
class ZepWorld(GenericLeMondeBlog):
403
    """Class to retrieve Zep World comics."""
404
    name = "zep"
405
    long_name = "Zep World"
406
    url = "http://zepworld.blog.lemonde.fr"
407
    first_url = "http://zepworld.blog.lemonde.fr/2014/10/31/bientot-le-blog-de-zep/"
408
409
410
class Vidberg(GenericLeMondeBlog):
411
    """Class to retrieve Vidberg comics."""
412
    name = 'vidberg'
413
    long_name = "Vidberg - l'actu en patates"
414
    url = "http://vidberg.blog.lemonde.fr"
415
    # Not the first but I didn't find an efficient way to retrieve it
416
    first_url = "http://vidberg.blog.lemonde.fr/2012/02/09/revue-de-campagne-la-campagne-du-modem-semballe/"
417
418
419
class Plantu(GenericLeMondeBlog):
420
    """Class to retrieve Plantu comics."""
421
    name = 'plantu'
422
    long_name = "Plantu"
423
    url = "http://plantu.blog.lemonde.fr"
424
    first_url = "http://plantu.blog.lemonde.fr/2014/10/28/stress-test-a-bruxelles/"
425
426
427
class XavierGorce(GenericLeMondeBlog):
428
    """Class to retrieve Xavier Gorce comics."""
429
    name = 'gorce'
430
    long_name = "Xavier Gorce"
431
    url = "http://xaviergorce.blog.lemonde.fr"
432
    first_url = "http://xaviergorce.blog.lemonde.fr/2015/01/09/distinction/"
433
434
435
class CartooningForPeace(GenericLeMondeBlog):
436
    """Class to retrieve Cartooning For Peace comics."""
437
    name = 'forpeace'
438
    long_name = "Cartooning For Peace"
439
    url = "http://cartooningforpeace.blog.lemonde.fr"
440
    first_url = "http://cartooningforpeace.blog.lemonde.fr/2014/12/15/bado/"
441
442
443
class Aurel(GenericLeMondeBlog):
444
    """Class to retrieve Aurel comics."""
445
    name = 'aurel'
446
    long_name = "Aurel"
447
    url = "http://aurel.blog.lemonde.fr"
448
    first_url = "http://aurel.blog.lemonde.fr/2014/09/29/le-senat-repasse-a-droite/"
449
450
451
class LesCulottees(GenericLeMondeBlog):
452
    """Class to retrieve Les Culottees comics."""
453
    name = 'culottees'
454
    long_name = 'Les Culottees'
455
    url = "http://lesculottees.blog.lemonde.fr"
456
    first_url = "http://lesculottees.blog.lemonde.fr/2016/01/11/clementine-delait-femme-a-barbe/"
457
458
459
class UneAnneeAuLycee(GenericLeMondeBlog):
460
    """Class to retrieve Une Annee Au Lycee comics."""
461
    name = 'lycee'
462
    long_name = 'Une Annee au Lycee'
463
    url = 'http://uneanneeaulycee.blog.lemonde.fr'
464
    first_url = "http://uneanneeaulycee.blog.lemonde.fr/2016/06/13/la-semaine-du-bac-est-arrivee/"
465
466
467 View Code Duplication
class Rall(GenericNavigableComic):
468
    """Class to retrieve Ted Rall comics."""
469
    # Also on http://www.gocomics.com/tedrall
470
    name = 'rall'
471
    long_name = "Ted Rall"
472
    url = "http://rall.com/comic"
473
    get_navi_link = get_link_rel_next
474
    get_first_comic_link = simulate_first_link
475
    # Not the first but I didn't find an efficient way to retrieve it
476
    first_url = "http://rall.com/2014/01/30/los-angeles-times-cartoon-well-miss-those-california-flowers"
477
478
    @classmethod
479
    def get_comic_info(cls, soup, link):
480
        """Get information about a particular comics."""
481
        title = soup.find('meta', property='og:title')['content']
482
        author = soup.find("span", class_="author vcard").find("a").string
483
        date_str = soup.find("span", class_="entry-date").string
484
        day = string_to_date(date_str, "%B %d, %Y")
485
        desc = soup.find('meta', property='og:description')['content']
486
        imgs = soup.find('div', class_='entry-content').find_all('img')
487
        imgs = imgs[:-7]  # remove social media buttons
488
        return {
489
            'title': title,
490
            'author': author,
491
            'month': day.month,
492
            'year': day.year,
493
            'day': day.day,
494
            'description': desc,
495
            'img': [i['src'] for i in imgs],
496
        }
497
498
499
class Dilem(GenericNavigableComic):
500
    """Class to retrieve Ali Dilem comics."""
501
    name = 'dilem'
502
    long_name = 'Ali Dilem'
503
    url = 'http://information.tv5monde.com/dilem'
504
    get_url_from_link = join_cls_url_to_href
505
    get_first_comic_link = simulate_first_link
506
    first_url = "http://information.tv5monde.com/dilem/2004-06-26"
507
508
    @classmethod
509
    def get_navi_link(cls, last_soup, next_):
510
        # prev is next / next is prev
511
        li = last_soup.find('li', class_='prev' if next_ else 'next')
512
        return li.find('a') if li else None
513
514
    @classmethod
515
    def get_comic_info(cls, soup, link):
516
        """Get information about a particular comics."""
517
        short_url = soup.find('link', rel='shortlink')['href']
518
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
519
        imgs = soup.find_all('meta', property='og:image')
520
        date_str = soup.find('span', property='dc:date')['content']
521
        date_str = date_str[:10]
522
        day = string_to_date(date_str, "%Y-%m-%d")
523
        return {
524
            'short_url': short_url,
525
            'title': title,
526
            'img': [i['content'] for i in imgs],
527
            'day': day.day,
528
            'month': day.month,
529
            'year': day.year,
530
        }
531
532
533
class SpaceAvalanche(GenericNavigableComic):
534
    """Class to retrieve Space Avalanche comics."""
535
    name = 'avalanche'
536
    long_name = 'Space Avalanche'
537
    url = 'http://www.spaceavalanche.com'
538
    get_navi_link = get_link_rel_next
539
540
    @classmethod
541
    def get_first_comic_link(cls):
542
        """Get link to first comics."""
543
        return {'href': "http://www.spaceavalanche.com/2009/02/02/irish-sea/", 'title': "Irish Sea"}
544
545
    @classmethod
546
    def get_comic_info(cls, soup, link):
547
        """Get information about a particular comics."""
548
        url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
549
        title = link['title']
550
        url = cls.get_url_from_link(link)
551
        year, month, day = [int(s)
552
                            for s in url_date_re.match(url).groups()]
553
        imgs = soup.find("div", class_="entry").find_all("img")
554
        return {
555
            'title': title,
556
            'day': day,
557
            'month': month,
558
            'year': year,
559
            'img': [i['src'] for i in imgs],
560
        }
561
562
563
class ZenPencils(GenericNavigableComic):
564
    """Class to retrieve ZenPencils comics."""
565
    # Also on http://zenpencils.tumblr.com
566
    # Also on http://www.gocomics.com/zen-pencils
567
    name = 'zenpencils'
568
    long_name = 'Zen Pencils'
569
    url = 'http://zenpencils.com'
570
    get_navi_link = get_link_rel_next
571
    get_first_comic_link = simulate_first_link
572
    first_url = "http://zenpencils.com/comic/1-ralph-waldo-emerson-make-them-cry/"
573
574
    @classmethod
575
    def get_comic_info(cls, soup, link):
576
        """Get information about a particular comics."""
577
        imgs = soup.find('div', id='comic').find_all('img')
578
        post = soup.find('div', class_='post-content')
579
        author = post.find("span", class_="post-author").find("a").string
580
        title = post.find('h2', class_='post-title').string
581
        date_str = post.find('span', class_='post-date').string
582
        day = string_to_date(date_str, "%B %d, %Y")
583
        assert imgs
584
        assert all(i['alt'] == i['title'] for i in imgs)
585
        assert all(i['alt'] in (title, "") for i in imgs)
586
        desc = soup.find('meta', property='og:description')['content']
587
        return {
588
            'title': title,
589
            'description': desc,
590
            'author': author,
591
            'day': day.day,
592
            'month': day.month,
593
            'year': day.year,
594
            'img': [i['src'] for i in imgs],
595
        }
596
597
598
class ItsTheTie(GenericNavigableComic):
599
    """Class to retrieve It's the tie comics."""
600
    # Also on http://itsthetie.tumblr.com
601
    # Also on https://tapastic.com/series/itsthetie
602
    name = 'tie'
603
    long_name = "It's the tie"
604
    url = "http://itsthetie.com"
605
    get_first_comic_link = get_div_navfirst_a
606
    get_navi_link = get_a_rel_next
607
608
    @classmethod
609
    def get_comic_info(cls, soup, link):
610
        """Get information about a particular comics."""
611
        title = soup.find('h1', class_='comic-title').find('a').string
612
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
613
        day = string_to_date(date_str, "%B %d, %Y")
614
        # Bonus images may or may not be in meta og:image.
615
        imgs = soup.find_all('meta', property='og:image')
616
        imgs_src = [i['content'] for i in imgs]
617
        bonus = soup.find_all('img', attrs={'data-oversrc': True})
618
        bonus_src = [b['data-oversrc'] for b in bonus]
619
        all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src]
620
        all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")]
621
        tag_meta = soup.find('meta', property='article:tag')
622
        tags = tag_meta['content'] if tag_meta else ""
623
        return {
624
            'title': title,
625
            'month': day.month,
626
            'year': day.year,
627
            'day': day.day,
628
            'img': all_imgs_src,
629
            'tags': tags,
630
        }
631
632
633 View Code Duplication
class PenelopeBagieu(GenericNavigableComic):
634
    """Class to retrieve comics from Penelope Bagieu's blog."""
635
    name = 'bagieu'
636
    long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
637
    url = 'http://www.penelope-jolicoeur.com'
638
    get_navi_link = get_link_rel_next
639
    get_first_comic_link = simulate_first_link
640
    first_url = 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'
641
642
    @classmethod
643
    def get_comic_info(cls, soup, link):
644
        """Get information about a particular comics."""
645
        date_str = soup.find('h2', class_='date-header').string
646
        day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
647
        imgs = soup.find('div', class_='entry-body').find_all('img')
648
        title = soup.find('h3', class_='entry-header').string
649
        return {
650
            'title': title,
651
            'img': [i['src'] for i in imgs],
652
            'month': day.month,
653
            'year': day.year,
654
            'day': day.day,
655
        }
656
657
658 View Code Duplication
class OneOneOneOneComic(GenericNavigableComic):
659
    """Class to retrieve 1111 Comics."""
660
    # Also on http://comics1111.tumblr.com
661
    # Also on https://tapastic.com/series/1111-Comics
662
    name = '1111'
663
    long_name = '1111 Comics'
664
    url = 'http://www.1111comics.me'
665
    get_first_comic_link = get_div_navfirst_a
666
    get_navi_link = get_link_rel_next
667
668
    @classmethod
669
    def get_comic_info(cls, soup, link):
670
        """Get information about a particular comics."""
671
        title = soup.find('h1', class_='comic-title').find('a').string
672
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
673
        day = string_to_date(date_str, "%B %d, %Y")
674
        imgs = soup.find_all('meta', property='og:image')
675
        return {
676
            'title': title,
677
            'month': day.month,
678
            'year': day.year,
679
            'day': day.day,
680
            'img': [i['content'] for i in imgs],
681
        }
682
683
684 View Code Duplication
class AngryAtNothing(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
685
    """Class to retrieve Angry at Nothing comics."""
686
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
687
    name = 'angry'
688
    long_name = 'Angry At Nothing'
689
    url = 'http://www.angryatnothing.net'
690
    get_first_comic_link = get_div_navfirst_a
691
    get_navi_link = get_a_rel_next
692
693
    @classmethod
694
    def get_comic_info(cls, soup, link):
695
        """Get information about a particular comics."""
696
        title = soup.find('h1', class_='comic-title').find('a').string
697
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
698
        day = string_to_date(date_str, "%B %d, %Y")
699
        imgs = soup.find_all('meta', property='og:image')
700
        return {
701
            'title': title,
702
            'month': day.month,
703
            'year': day.year,
704
            'day': day.day,
705
            'img': [i['content'] for i in imgs],
706
        }
707
708
709
class NeDroid(GenericNavigableComic):
710
    """Class to retrieve NeDroid comics."""
711
    name = 'nedroid'
712
    long_name = 'NeDroid'
713
    url = 'http://nedroid.com'
714
    get_first_comic_link = get_div_navfirst_a
715
    get_navi_link = get_link_rel_next
716
    get_url_from_link = join_cls_url_to_href
717
718
    @classmethod
719
    def get_comic_info(cls, soup, link):
720
        """Get information about a particular comics."""
721
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
722
        comic_url_re = re.compile('//nedroid.com/comics/([0-9]*)-([0-9]*)-([0-9]*).*')
723
        short_url = cls.get_url_from_link(soup.find('link', rel='shortlink'))
724
        num = int(short_url_re.match(short_url).groups()[0])
725
        imgs = soup.find('div', id='comic').find_all('img')
726
        year, month, day = [int(s) for s in comic_url_re.match(imgs[0]['src']).groups()]
727
        assert len(imgs) == 1
728
        title = imgs[0]['alt']
729
        title2 = imgs[0]['title']
730
        return {
731
            'short_url': short_url,
732
            'title': title,
733
            'title2': title2,
734
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
735
            'day': day,
736
            'month': month,
737
            'year': year,
738
            'num': num,
739
        }
740
741
742
class Garfield(GenericNavigableComic):
743
    """Class to retrieve Garfield comics."""
744
    # Also on http://www.gocomics.com/garfield
745
    name = 'garfield'
746
    long_name = 'Garfield'
747
    url = 'https://garfield.com'
748
    get_first_comic_link = simulate_first_link
749
    first_url = 'https://garfield.com/comic/1978/06/19'
750
751
    @classmethod
752
    def get_navi_link(cls, last_soup, next_):
753
        return last_soup.find('a', class_='comic-arrow-right' if next_ else 'comic-arrow-left')
754
755
    @classmethod
756
    def get_comic_info(cls, soup, link):
757
        """Get information about a particular comics."""
758
        url = cls.get_url_from_link(link)
759
        date_re = re.compile('^%s/comic/([0-9]*)/([0-9]*)/([0-9]*)' % cls.url)
760
        year, month, day = [int(s) for s in date_re.match(url).groups()]
761
        imgs = soup.find('div', class_='comic-display').find_all('img', class_='img-responsive')
762
        return {
763
            'month': month,
764
            'year': year,
765
            'day': day,
766
            'img': [i['src'] for i in imgs],
767
        }
768
769
770 View Code Duplication
class Dilbert(GenericNavigableComic):
771
    """Class to retrieve Dilbert comics."""
772
    # Also on http://www.gocomics.com/dilbert-classics
773
    name = 'dilbert'
774
    long_name = 'Dilbert'
775
    url = 'http://dilbert.com'
776
    get_url_from_link = join_cls_url_to_href
777
    get_first_comic_link = simulate_first_link
778
    first_url = 'http://dilbert.com/strip/1989-04-16'
779
780
    @classmethod
781
    def get_navi_link(cls, last_soup, next_):
782
        link = last_soup.find('div', class_='nav-comic nav-right' if next_ else 'nav-comic nav-left')
783
        return link.find('a') if link else None
784
785
    @classmethod
786
    def get_comic_info(cls, soup, link):
787
        """Get information about a particular comics."""
788
        title = soup.find('meta', property='og:title')['content']
789
        imgs = soup.find_all('meta', property='og:image')
790
        desc = soup.find('meta', property='og:description')['content']
791
        date_str = soup.find('meta', property='article:publish_date')['content']
792
        day = string_to_date(date_str, "%B %d, %Y")
793
        author = soup.find('meta', property='article:author')['content']
794
        tags = soup.find('meta', property='article:tag')['content']
795
        return {
796
            'title': title,
797
            'description': desc,
798
            'img': [i['content'] for i in imgs],
799
            'author': author,
800
            'tags': tags,
801
            'day': day.day,
802
            'month': day.month,
803
            'year': day.year
804
        }
805
806
807
class VictimsOfCircumsolar(GenericNavigableComic):
808
    """Class to retrieve VictimsOfCircumsolar comics."""
809
    name = 'circumsolar'
810
    long_name = 'Victims Of Circumsolar'
811
    url = 'http://www.victimsofcircumsolar.com'
812
    get_navi_link = get_a_navi_comicnavnext_navinext
813
    get_first_comic_link = simulate_first_link
814
    first_url = 'http://www.victimsofcircumsolar.com/comic/modern-addiction'
815
816
    @classmethod
817
    def get_comic_info(cls, soup, link):
818
        """Get information about a particular comics."""
819
        # Date is on the archive page
820
        title = soup.find_all('meta', property='og:title')[-1]['content']
821
        desc = soup.find_all('meta', property='og:description')[-1]['content']
822
        imgs = soup.find('div', id='comic').find_all('img')
823
        assert all(i['title'] == i['alt'] == title for i in imgs)
824
        return {
825
            'title': title,
826
            'description': desc,
827
            'img': [i['src'] for i in imgs],
828
        }
829
830
831
class ThreeWordPhrase(GenericNavigableComic):
832
    """Class to retrieve Three Word Phrase comics."""
833
    # Also on http://www.threewordphrase.tumblr.com
834
    name = 'threeword'
835
    long_name = 'Three Word Phrase'
836
    url = 'http://threewordphrase.com'
837
    get_url_from_link = join_cls_url_to_href
838
839
    @classmethod
840
    def get_first_comic_link(cls):
841
        """Get link to first comics."""
842
        return get_soup_at_url(cls.url).find('img', src='/firstlink.gif').parent
843
844
    @classmethod
845
    def get_navi_link(cls, last_soup, next_):
846
        link = last_soup.find('img', src='/nextlink.gif' if next_ else '/prevlink.gif').parent
847
        return None if link.get('href') is None else link
848
849
    @classmethod
850
    def get_comic_info(cls, soup, link):
851
        """Get information about a particular comics."""
852
        title = soup.find('title')
853
        imgs = [img for img in soup.find_all('img')
854
                if not img['src'].endswith(
855
                    ('link.gif', '32.png', 'twpbookad.jpg',
856
                     'merchad.jpg', 'header.gif', 'tipjar.jpg'))]
857
        return {
858
            'title': title.string if title else None,
859
            'title2': '  '.join(img.get('alt') for img in imgs if img.get('alt')),
860
            'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs],
861
        }
862
863
864
class DeadlyPanel(GenericEmptyComic, GenericNavigableComic):
865
    """Class to retrieve Deadly Panel comics."""
866
    # Also on https://tapastic.com/series/deadlypanel
867
    name = 'deadly'
868
    long_name = 'Deadly Panel'
869
    url = 'http://www.deadlypanel.com'
870
    get_first_comic_link = get_a_navi_navifirst
871
    get_navi_link = get_a_navi_comicnavnext_navinext
872
873
    @classmethod
874
    def get_comic_info(cls, soup, link):
875
        """Get information about a particular comics."""
876
        imgs = soup.find('div', id='comic').find_all('img')
877
        assert all(i['alt'] == i['title'] for i in imgs)
878
        return {
879
            'img': [i['src'] for i in imgs],
880
        }
881
882
883
class TheGentlemanArmchair(GenericNavigableComic):
884
    """Class to retrieve The Gentleman Armchair comics."""
885
    name = 'gentlemanarmchair'
886
    long_name = 'The Gentleman Armchair'
887
    url = 'http://thegentlemansarmchair.com'
888
    get_first_comic_link = get_a_navi_navifirst
889
    get_navi_link = get_link_rel_next
890
891
    @classmethod
892
    def get_comic_info(cls, soup, link):
893
        """Get information about a particular comics."""
894
        title = soup.find('h2', class_='post-title').string
895
        author = soup.find("span", class_="post-author").find("a").string
896
        date_str = soup.find('span', class_='post-date').string
897
        day = string_to_date(date_str, "%B %d, %Y")
898
        imgs = soup.find('div', id='comic').find_all('img')
899
        return {
900
            'img': [i['src'] for i in imgs],
901
            'title': title,
902
            'author': author,
903
            'month': day.month,
904
            'year': day.year,
905
            'day': day.day,
906
        }
907
908
909 View Code Duplication
class MyExtraLife(GenericNavigableComic):
910
    """Class to retrieve My Extra Life comics."""
911
    name = 'extralife'
912
    long_name = 'My Extra Life'
913
    url = 'http://www.myextralife.com'
914
    get_navi_link = get_link_rel_next
915
916
    @classmethod
917
    def get_first_comic_link(cls):
918
        """Get link to first comics."""
919
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
920
921
    @classmethod
922
    def get_comic_info(cls, soup, link):
923
        """Get information about a particular comics."""
924
        title = soup.find("h1", class_="comic_title").string
925
        date_str = soup.find("span", class_="comic_date").string
926
        day = string_to_date(date_str, "%B %d, %Y")
927
        imgs = soup.find_all("img", class_="comic")
928
        assert all(i['alt'] == i['title'] == title for i in imgs)
929
        return {
930
            'title': title,
931
            'img': [i['src'] for i in imgs if i["src"]],
932
            'day': day.day,
933
            'month': day.month,
934
            'year': day.year
935
        }
936
937
938
class SaturdayMorningBreakfastCereal(GenericNavigableComic):
939
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
940
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
941
    # Also on http://smbc-comics.tumblr.com
942
    name = 'smbc'
943
    long_name = 'Saturday Morning Breakfast Cereal'
944
    url = 'http://www.smbc-comics.com'
945
    get_navi_link = get_a_rel_next
946
947
    @classmethod
948
    def get_first_comic_link(cls):
949
        """Get link to first comics."""
950
        return get_soup_at_url(cls.url).find('a', rel='start')
951
952
    @classmethod
953
    def get_comic_info(cls, soup, link):
954
        """Get information about a particular comics."""
955
        image1 = soup.find('img', id='cc-comic')
956
        image_url1 = image1['src']
957
        aftercomic = soup.find('div', id='aftercomic')
958
        image_url2 = aftercomic.find('img')['src'] if aftercomic else ''
959
        imgs = [image_url1] + ([image_url2] if image_url2 else [])
960
        date_str = soup.find('div', class_='cc-publishtime').contents[0]
961
        day = string_to_date(date_str, "%B %d, %Y")
962
        return {
963
            'title': image1['title'],
964
            'img': [urljoin_wrapper(cls.url, i) for i in imgs],
965
            'day': day.day,
966
            'month': day.month,
967
            'year': day.year
968
        }
969
970
971
class PerryBibleFellowship(GenericListableComic):
972
    """Class to retrieve Perry Bible Fellowship comics."""
973
    name = 'pbf'
974
    long_name = 'Perry Bible Fellowship'
975
    url = 'http://pbfcomics.com'
976
    get_url_from_archive_element = join_cls_url_to_href
977
978
    @classmethod
979
    def get_archive_elements(cls):
980
        comic_link_re = re.compile('^/[0-9]*/$')
981
        return reversed(get_soup_at_url(cls.url).find_all('a', href=comic_link_re))
982
983
    @classmethod
984
    def get_comic_info(cls, soup, link):
985
        """Get information about a particular comics."""
986
        url = cls.get_url_from_archive_element(link)
987
        comic_img_re = re.compile('^/archive_b/PBF.*')
988
        name = link.string
989
        num = int(link['name'])
990
        href = link['href']
991
        assert href == '/%d/' % num
992
        imgs = soup.find_all('img', src=comic_img_re)
993
        assert len(imgs) == 1
994
        assert imgs[0]['alt'] == name
995
        return {
996
            'num': num,
997
            'name': name,
998
            'img': [urljoin_wrapper(url, i['src']) for i in imgs],
999
            'prefix': '%d-' % num,
1000
        }
1001
1002
1003 View Code Duplication
class Mercworks(GenericNavigableComic):
1004
    """Class to retrieve Mercworks comics."""
1005
    # Also on http://mercworks.tumblr.com
1006
    name = 'mercworks'
1007
    long_name = 'Mercworks'
1008
    url = 'http://mercworks.net'
1009
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1010
    get_navi_link = get_a_rel_next
1011
1012
    @classmethod
1013
    def get_comic_info(cls, soup, link):
1014
        """Get information about a particular comics."""
1015
        title = soup.find('meta', property='og:title')['content']
1016
        metadesc = soup.find('meta', property='og:description')
1017
        desc = metadesc['content'] if metadesc else ""
1018
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1019
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1020
        date_str = date_str[:10]
1021
        day = string_to_date(date_str, "%Y-%m-%d")
1022
        imgs = soup.find_all('meta', property='og:image')
1023
        return {
1024
            'img': [i['content'] for i in imgs],
1025
            'title': title,
1026
            'author': author,
1027
            'desc': desc,
1028
            'day': day.day,
1029
            'month': day.month,
1030
            'year': day.year
1031
        }
1032
1033
1034
class BerkeleyMews(GenericListableComic):
1035
    """Class to retrieve Berkeley Mews comics."""
1036
    # Also on http://mews.tumblr.com
1037
    # Also on http://www.gocomics.com/berkeley-mews
1038
    name = 'berkeley'
1039
    long_name = 'Berkeley Mews'
1040
    url = 'http://www.berkeleymews.com'
1041
    get_url_from_archive_element = get_href
1042
    comic_num_re = re.compile('%s/\\?p=([0-9]*)$' % url)
1043
1044
    @classmethod
1045
    def get_archive_elements(cls):
1046
        archive_url = urljoin_wrapper(cls.url, "?page_id=2")
1047
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_num_re))
1048
1049
    @classmethod
1050
    def get_comic_info(cls, soup, link):
1051
        """Get information about a particular comics."""
1052
        comic_date_re = re.compile('.*/([0-9]*)-([0-9]*)-([0-9]*)-.*')
1053
        url = cls.get_url_from_archive_element(link)
1054
        num = int(cls.comic_num_re.match(url).groups()[0])
1055
        img = soup.find('div', id='comic').find('img')
1056
        assert all(i['alt'] == i['title'] for i in [img])
1057
        title2 = img['title']
1058
        img_url = img['src']
1059
        year, month, day = [int(s) for s in comic_date_re.match(img_url).groups()]
1060
        return {
1061
            'num': num,
1062
            'title': link.string,
1063
            'title2': title2,
1064
            'img': [img_url],
1065
            'year': year,
1066
            'month': month,
1067
            'day': day,
1068
        }
1069
1070
1071
class GenericBouletCorp(GenericNavigableComic):
1072
    """Generic class to retrieve BouletCorp comics in different languages."""
1073
    # Also on http://bouletcorp.tumblr.com
1074
    get_navi_link = get_link_rel_next
1075
1076
    @classmethod
1077
    def get_first_comic_link(cls):
1078
        """Get link to first comics."""
1079
        return get_soup_at_url(cls.url).find('div', id='centered_nav').find_all('a')[0]
1080
1081
    @classmethod
1082
    def get_comic_info(cls, soup, link):
1083
        """Get information about a particular comics."""
1084
        url = cls.get_url_from_link(link)
1085
        date_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/' % cls.url)
1086
        year, month, day = [int(s) for s in date_re.match(url).groups()]
1087
        imgs = soup.find('div', id='notes').find('div', class_='storycontent').find_all('img')
1088
        texts = '  '.join(t for t in (i.get('title') for i in imgs) if t)
1089
        title = soup.find('title').string
1090
        return {
1091
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs if i.get('src') is not None],
1092
            'title': title,
1093
            'texts': texts,
1094
            'year': year,
1095
            'month': month,
1096
            'day': day,
1097
        }
1098
1099
1100
class BouletCorp(GenericBouletCorp):
1101
    """Class to retrieve BouletCorp comics."""
1102
    name = 'boulet'
1103
    long_name = 'Boulet Corp'
1104
    url = 'http://www.bouletcorp.com'
1105
1106
1107
class BouletCorpEn(GenericBouletCorp):
1108
    """Class to retrieve EnglishBouletCorp comics."""
1109
    name = 'boulet_en'
1110
    long_name = 'Boulet Corp English'
1111
    url = 'http://english.bouletcorp.com'
1112
1113
1114 View Code Duplication
class AmazingSuperPowers(GenericNavigableComic):
1115
    """Class to retrieve Amazing Super Powers comics."""
1116
    name = 'asp'
1117
    long_name = 'Amazing Super Powers'
1118
    url = 'http://www.amazingsuperpowers.com'
1119
    get_first_comic_link = get_a_navi_navifirst
1120
    get_navi_link = get_a_navi_navinext
1121
1122
    @classmethod
1123
    def get_comic_info(cls, soup, link):
1124
        """Get information about a particular comics."""
1125
        author = soup.find("span", class_="post-author").find("a").string
1126
        date_str = soup.find('span', class_='post-date').string
1127
        day = string_to_date(date_str, "%B %d, %Y")
1128
        imgs = soup.find('div', id='comic').find_all('img')
1129
        title = ' '.join(i['title'] for i in imgs)
1130
        assert all(i['alt'] == i['title'] for i in imgs)
1131
        return {
1132
            'title': title,
1133
            'author': author,
1134
            'img': [img['src'] for img in imgs],
1135
            'day': day.day,
1136
            'month': day.month,
1137
            'year': day.year
1138
        }
1139
1140
1141
class ToonHole(GenericListableComic):
1142
    """Class to retrieve Toon Holes comics."""
1143
    # Also on http://tapastic.com/series/TOONHOLE
1144
    name = 'toonhole'
1145
    long_name = 'Toon Hole'
1146
    url = 'http://www.toonhole.com'
1147
    get_url_from_archive_element = get_href
1148
1149
    @classmethod
1150
    def get_comic_info(cls, soup, link):
1151
        """Get information about a particular comics."""
1152
        title = link.string
1153
        date_str = remove_st_nd_rd_th_from_date(soup.find('div', class_='comicdate').string.strip())
1154
        day = string_to_date(date_str, "%B %d, %Y")
1155
        imgs = soup.find('div', id='comic').find_all('img')
1156
        assert all(i['alt'] == i['title'] == title for i in imgs)
1157
        return {
1158
            'title': title,
1159
            'month': day.month,
1160
            'year': day.year,
1161
            'day': day.day,
1162
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs],
1163
        }
1164
1165
    @classmethod
1166
    def get_archive_elements(cls):
1167
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1168
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1169
1170
1171
class Channelate(GenericNavigableComic):
1172
    """Class to retrieve Channelate comics."""
1173
    name = 'channelate'
1174
    long_name = 'Channelate'
1175
    url = 'http://www.channelate.com'
1176
    get_first_comic_link = get_div_navfirst_a
1177
    get_navi_link = get_link_rel_next
1178
1179
    @classmethod
1180
    def get_comic_info(cls, soup, link):
1181
        """Get information about a particular comics."""
1182
        author = soup.find("span", class_="post-author").find("a").string
1183
        date_str = soup.find('span', class_='post-date').string
1184
        day = string_to_date(date_str, '%Y/%m/%d')
1185
        title = soup.find('meta', property='og:title')['content']
1186
        post = soup.find('div', id='comic')
1187
        imgs = post.find_all('img') if post else []
1188
        assert all(i['alt'] == i['title'] for i in imgs)
1189
        extra_url = None
1190
        extra_div = soup.find('div', id='extrapanelbutton')
1191
        if extra_div:
1192
            extra_url = extra_div.find('a')['href']
1193
            extra_soup = get_soup_at_url(extra_url)
1194
            extra_imgs = extra_soup.find_all('img', class_='extrapanelimage')
1195
            imgs.extend(extra_imgs)
1196
        return {
1197
            'url_extra': extra_url,
1198
            'title': title,
1199
            'author': author,
1200
            'month': day.month,
1201
            'year': day.year,
1202
            'day': day.day,
1203
            'img': [i['src'] for i in imgs],
1204
        }
1205
1206
1207
class CyanideAndHappiness(GenericNavigableComic):
1208
    """Class to retrieve Cyanide And Happiness comics."""
1209
    name = 'cyanide'
1210
    long_name = 'Cyanide and Happiness'
1211
    url = 'http://explosm.net'
1212
    get_url_from_link = join_cls_url_to_href
1213
1214
    @classmethod
1215
    def get_first_comic_link(cls):
1216
        """Get link to first comics."""
1217
        return get_soup_at_url(cls.url).find('a', title='Oldest comic')
1218
1219
    @classmethod
1220
    def get_navi_link(cls, last_soup, next_):
1221
        link = last_soup.find('a', class_='next-comic' if next_ else 'previous-comic ')
1222
        return None if link.get('href') is None else link
1223
1224
    @classmethod
1225
    def get_comic_info(cls, soup, link):
1226
        """Get information about a particular comics."""
1227
        url2 = soup.find('meta', property='og:url')['content']
1228
        num = int(url2.split('/')[-2])
1229
        date_str = soup.find('h3').find('a').string
1230
        day = string_to_date(date_str, '%Y.%m.%d')
1231
        author = soup.find('small', class_="author-credit-name").string
1232
        assert author.startswith('by ')
1233
        author = author[3:]
1234
        imgs = soup.find_all('img', id='main-comic')
1235
        return {
1236
            'num': num,
1237
            'author': author,
1238
            'month': day.month,
1239
            'year': day.year,
1240
            'day': day.day,
1241
            'prefix': '%d-' % num,
1242
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i['src'])) for i in imgs]
1243
        }
1244
1245
1246
class MrLovenstein(GenericComic):
1247
    """Class to retrieve Mr Lovenstein comics."""
1248
    # Also on https://tapastic.com/series/MrLovenstein
1249
    name = 'mrlovenstein'
1250
    long_name = 'Mr. Lovenstein'
1251
    url = 'http://www.mrlovenstein.com'
1252
1253
    @classmethod
1254
    def get_next_comic(cls, last_comic):
1255
        # TODO: more info from http://www.mrlovenstein.com/archive
1256
        comic_num_re = re.compile('^/comic/([0-9]*)$')
1257
        nums = [int(comic_num_re.match(link['href']).groups()[0])
1258
                for link in get_soup_at_url(cls.url).find_all('a', href=comic_num_re)]
1259
        first, last = min(nums), max(nums)
1260
        if last_comic:
1261
            first = last_comic['num'] + 1
1262
        for num in range(first, last + 1):
1263
            url = urljoin_wrapper(cls.url, '/comic/%d' % num)
1264
            soup = get_soup_at_url(url)
1265
            imgs = list(
1266
                reversed(soup.find_all('img', src=re.compile('^/images/comics/'))))
1267
            description = soup.find('meta', attrs={'name': 'description'})['content']
1268
            yield {
1269
                'url': url,
1270
                'num': num,
1271
                'texts': '  '.join(t for t in (i.get('title') for i in imgs) if t),
1272
                'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1273
                'description': description,
1274
            }
1275
1276
1277
class DinosaurComics(GenericListableComic):
1278
    """Class to retrieve Dinosaur Comics comics."""
1279
    name = 'dinosaur'
1280
    long_name = 'Dinosaur Comics'
1281
    url = 'http://www.qwantz.com'
1282
    get_url_from_archive_element = get_href
1283
    comic_link_re = re.compile('^%s/index.php\\?comic=([0-9]*)$' % url)
1284
1285
    @classmethod
1286
    def get_archive_elements(cls):
1287
        archive_url = urljoin_wrapper(cls.url, 'archive.php')
1288
        # first link is random -> skip it
1289
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re)[1:])
1290
1291
    @classmethod
1292
    def get_comic_info(cls, soup, link):
1293
        """Get information about a particular comics."""
1294
        url = cls.get_url_from_archive_element(link)
1295
        num = int(cls.comic_link_re.match(url).groups()[0])
1296
        date_str = link.string
1297
        text = link.next_sibling.string
1298
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1299
        comic_img_re = re.compile('^%s/comics/' % cls.url)
1300
        img = soup.find('img', src=comic_img_re)
1301
        return {
1302
            'month': day.month,
1303
            'year': day.year,
1304
            'day': day.day,
1305
            'img': [img.get('src')],
1306
            'title': img.get('title'),
1307
            'text': text,
1308
            'num': num,
1309 View Code Duplication
        }
1310
1311
1312
class ButterSafe(GenericListableComic):
1313
    """Class to retrieve Butter Safe comics."""
1314
    name = 'butter'
1315
    long_name = 'ButterSafe'
1316
    url = 'http://buttersafe.com'
1317
    get_url_from_archive_element = get_href
1318
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)
1319
1320
    @classmethod
1321
    def get_archive_elements(cls):
1322
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1323
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
1324
1325
    @classmethod
1326
    def get_comic_info(cls, soup, link):
1327
        """Get information about a particular comics."""
1328
        url = cls.get_url_from_archive_element(link)
1329
        title = link.string
1330
        year, month, day = [int(s) for s in cls.comic_link_re.match(url).groups()]
1331
        img = soup.find('div', id='comic').find('img')
1332
        assert img['alt'] == title
1333
        return {
1334
            'title': title,
1335
            'day': day,
1336
            'month': month,
1337
            'year': year,
1338
            'img': [img['src']],
1339
        }
1340
1341
1342
class CalvinAndHobbes(GenericComic):
1343
    """Class to retrieve Calvin and Hobbes comics."""
1344
    # Also on http://www.gocomics.com/calvinandhobbes/
1345
    name = 'calvin'
1346
    long_name = 'Calvin and Hobbes'
1347
    # This is not through any official webpage but eh...
1348
    url = 'http://marcel-oehler.marcellosendos.ch/comics/ch/'
1349
1350
    @classmethod
1351
    def get_next_comic(cls, last_comic):
1352
        last_date = get_date_for_comic(
1353
            last_comic) if last_comic else date(1985, 11, 1)
1354
        link_re = re.compile('^([0-9]*)/([0-9]*)/')
1355
        img_re = re.compile('')
1356
        for link in get_soup_at_url(cls.url).find_all('a', href=link_re):
1357
            url = link['href']
1358
            year, month = link_re.match(url).groups()
1359
            if date(int(year), int(month), 1) + timedelta(days=31) >= last_date:
1360
                img_re = re.compile('^%s%s([0-9]*)' % (year, month))
1361
                month_url = urljoin_wrapper(cls.url, url)
1362
                for img in get_soup_at_url(month_url).find_all('img', src=img_re):
1363
                    img_src = img['src']
1364
                    day = int(img_re.match(img_src).groups()[0])
1365
                    comic_date = date(int(year), int(month), day)
1366
                    if comic_date > last_date:
1367
                        yield {
1368
                            'url': month_url,
1369
                            'year': int(year),
1370
                            'month': int(month),
1371
                            'day': int(day),
1372
                            'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)],
1373
                        }
1374 View Code Duplication
                        last_date = comic_date
1375
1376
1377
class AbstruseGoose(GenericListableComic):
1378
    """Class to retrieve AbstruseGoose Comics."""
1379
    name = 'abstruse'
1380
    long_name = 'Abstruse Goose'
1381
    url = 'http://abstrusegoose.com'
1382
    get_url_from_archive_element = get_href
1383
    comic_url_re = re.compile('^%s/([0-9]*)$' % url)
1384
    comic_img_re = re.compile('^%s/strips/.*' % url)
1385
1386
    @classmethod
1387
    def get_archive_elements(cls):
1388
        archive_url = urljoin_wrapper(cls.url, 'archive')
1389
        return get_soup_at_url(archive_url).find_all('a', href=cls.comic_url_re)
1390
1391
    @classmethod
1392
    def get_comic_info(cls, soup, archive_elt):
1393
        comic_url = cls.get_url_from_archive_element(archive_elt)
1394
        num = int(cls.comic_url_re.match(comic_url).groups()[0])
1395
        return {
1396
            'num': num,
1397
            'title': archive_elt.string,
1398
            'img': [soup.find('img', src=cls.comic_img_re)['src']]
1399
        }
1400
1401
1402
class PhDComics(GenericNavigableComic):
1403
    """Class to retrieve PHD Comics."""
1404
    name = 'phd'
1405
    long_name = 'PhD Comics'
1406
    url = 'http://phdcomics.com/comics/archive.php'
1407
    get_url_from_link = join_cls_url_to_href
1408
1409
    @classmethod
1410
    def get_first_comic_link(cls):
1411
        """Get link to first comics."""
1412
        return get_soup_at_url(cls.url).find('img', src='images/first_button.gif').parent
1413
1414
    @classmethod
1415
    def get_navi_link(cls, last_soup, next_):
1416
        img = last_soup.find('img', src='images/next_button.gif' if next_ else 'images/prev_button.gif')
1417
        return None if img is None else img.parent
1418
1419
    @classmethod
1420
    def get_comic_info(cls, soup, link):
1421
        """Get information about a particular comics."""
1422
        date_str = soup.find('font', face='Arial,Helvetica,Geneva,Swiss,SunSans-Regular', color='white').string.strip()
1423
        try:
1424
            day = string_to_date(date_str, '%m/%d/%Y')
1425
        except ValueError:
1426
            print("Invalid date %s" % date_str)
1427
            day = date.today()
1428
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
1429
        return {
1430
            'year': day.year,
1431
            'month': day.month,
1432
            'day': day.day,
1433
            'img': [soup.find('img', id='comic')['src']],
1434
            'title': title,
1435
        }
1436
1437
1438 View Code Duplication
class Octopuns(GenericNavigableComic):
1439
    """Class to retrieve Octopuns comics."""
1440
    # Also on http://octopuns.tumblr.com
1441
    name = 'octopuns'
1442
    long_name = 'Octopuns'
1443
    url = 'http://www.octopuns.net'
1444
1445
    @classmethod
1446
    def get_first_comic_link(cls):
1447
        """Get link to first comics."""
1448
        return get_soup_at_url(cls.url).find('img', src=re.compile('.*/First.png')).parent
1449
1450
    @classmethod
1451
    def get_navi_link(cls, last_soup, next_):
1452
        link = last_soup.find('img', src=re.compile('.*/Next.png' if next_ else '.*/Back.png')).parent
1453
        return None if link.get('href') is None else link
1454
1455
    @classmethod
1456
    def get_comic_info(cls, soup, link):
1457
        """Get information about a particular comics."""
1458
        title = soup.find('h3', class_='post-title entry-title').string
1459
        date_str = soup.find('h2', class_='date-header').string
1460
        day = string_to_date(date_str, "%A, %B %d, %Y")
1461
        imgs = soup.find_all('link', rel='image_src')
1462
        return {
1463
            'img': [i['href'] for i in imgs],
1464
            'title': title,
1465
            'day': day.day,
1466
            'month': day.month,
1467
            'year': day.year,
1468
        }
1469
1470
1471
class Quarktees(GenericNavigableComic):
1472
    """Class to retrieve the Quarktees comics."""
1473
    name = 'quarktees'
1474
    long_name = 'Quarktees'
1475
    url = 'http://www.quarktees.com/blogs/news'
1476
    get_url_from_link = join_cls_url_to_href
1477
    get_first_comic_link = simulate_first_link
1478
    first_url = 'http://www.quarktees.com/blogs/news/12486621-coming-soon'
1479
1480
    @classmethod
1481
    def get_navi_link(cls, last_soup, next_):
1482
        return last_soup.find('a', id='article-next' if next_ else 'article-prev')
1483
1484
    @classmethod
1485
    def get_comic_info(cls, soup, link):
1486
        """Get information about a particular comics."""
1487
        title = soup.find('meta', property='og:title')['content']
1488
        article = soup.find('div', class_='single-article')
1489
        imgs = article.find_all('img')
1490
        return {
1491
            'title': title,
1492
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1493
        }
1494
1495
1496
class OverCompensating(GenericNavigableComic):
1497
    """Class to retrieve the Over Compensating comics."""
1498
    name = 'compensating'
1499
    long_name = 'Over Compensating'
1500
    url = 'http://www.overcompensating.com'
1501
    get_url_from_link = join_cls_url_to_href
1502
1503
    @classmethod
1504
    def get_first_comic_link(cls):
1505
        """Get link to first comics."""
1506
        return get_soup_at_url(cls.url).find('a', href=re.compile('comic=1$'))
1507
1508
    @classmethod
1509
    def get_navi_link(cls, last_soup, next_):
1510
        return last_soup.find('a', title='next comic' if next_ else 'go back already')
1511
1512
    @classmethod
1513
    def get_comic_info(cls, soup, link):
1514
        """Get information about a particular comics."""
1515
        img_src_re = re.compile('^/oc/comics/.*')
1516
        comic_num_re = re.compile('.*comic=([0-9]*)$')
1517
        comic_url = cls.get_url_from_link(link)
1518
        num = int(comic_num_re.match(comic_url).groups()[0])
1519
        img = soup.find('img', src=img_src_re)
1520
        return {
1521
            'num': num,
1522
            'img': [urljoin_wrapper(comic_url, img['src'])],
1523
            'title': img.get('title')
1524
        }
1525
1526
1527
class Oglaf(GenericNavigableComic):
1528
    """Class to retrieve Oglaf comics."""
1529
    name = 'oglaf'
1530
    long_name = 'Oglaf [NSFW]'
1531
    url = 'http://oglaf.com'
1532
    get_url_from_link = join_cls_url_to_href
1533
1534
    @classmethod
1535
    def get_first_comic_link(cls):
1536
        """Get link to first comics."""
1537
        return get_soup_at_url(cls.url).find("div", id="st").parent
1538
1539
    @classmethod
1540
    def get_navi_link(cls, last_soup, next_):
1541
        div = last_soup.find("div", id="nx" if next_ else "pvs")
1542
        return div.parent if div else None
1543
1544
    @classmethod
1545
    def get_comic_info(cls, soup, link):
1546
        """Get information about a particular comics."""
1547
        title = soup.find('title').string
1548
        title_imgs = soup.find('div', id='tt').find_all('img')
1549
        assert len(title_imgs) == 1
1550
        strip_imgs = soup.find_all('img', id='strip')
1551
        assert len(strip_imgs) == 1
1552
        imgs = title_imgs + strip_imgs
1553
        desc = ' '.join(i['title'] for i in imgs)
1554
        return {
1555
            'title': title,
1556
            'img': [i['src'] for i in imgs],
1557
            'description': desc,
1558
        }
1559
1560
1561
class ScandinaviaAndTheWorld(GenericNavigableComic):
1562
    """Class to retrieve Scandinavia And The World comics."""
1563
    name = 'satw'
1564
    long_name = 'Scandinavia And The World'
1565
    url = 'http://satwcomic.com'
1566
    get_first_comic_link = simulate_first_link
1567
    first_url = 'http://satwcomic.com/sweden-denmark-and-norway'
1568
1569
    @classmethod
1570
    def get_navi_link(cls, last_soup, next_):
1571
        return last_soup.find('a', accesskey='n' if next_ else 'p')
1572
1573
    @classmethod
1574
    def get_comic_info(cls, soup, link):
1575
        """Get information about a particular comics."""
1576
        title = soup.find('meta', attrs={'name': 'twitter:label1'})['content']
1577
        desc = soup.find('meta', property='og:description')['content']
1578
        imgs = soup.find_all('img', itemprop="image")
1579
        return {
1580
            'title': title,
1581
            'description': desc,
1582
            'img': [i['src'] for i in imgs],
1583
        }
1584
1585
1586
class SomethingOfThatIlk(GenericEmptyComic):  # Does not exist anymore
1587
    """Class to retrieve the Something Of That Ilk comics."""
1588
    name = 'somethingofthatilk'
1589
    long_name = 'Something Of That Ilk'
1590
    url = 'http://www.somethingofthatilk.com'
1591
1592
1593
class InfiniteMonkeyBusiness(GenericNavigableComic):
1594
    """Generic class to retrieve InfiniteMonkeyBusiness comics."""
1595
    name = 'monkey'
1596
    long_name = 'Infinite Monkey Business'
1597
    url = 'http://infinitemonkeybusiness.net'
1598
    get_navi_link = get_a_navi_comicnavnext_navinext
1599
    get_first_comic_link = simulate_first_link
1600
    first_url = 'http://infinitemonkeybusiness.net/comic/pillory/'
1601
1602
    @classmethod
1603
    def get_comic_info(cls, soup, link):
1604
        """Get information about a particular comics."""
1605
        title = soup.find('meta', property='og:title')['content']
1606
        imgs = soup.find('div', id='comic').find_all('img')
1607
        return {
1608
            'title': title,
1609
            'img': [i['src'] for i in imgs],
1610
        }
1611
1612
1613
class Wondermark(GenericListableComic):
1614
    """Class to retrieve the Wondermark comics."""
1615
    name = 'wondermark'
1616
    long_name = 'Wondermark'
1617
    url = 'http://wondermark.com'
1618
    get_url_from_archive_element = get_href
1619
1620
    @classmethod
1621
    def get_archive_elements(cls):
1622
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1623
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1624
1625
    @classmethod
1626
    def get_comic_info(cls, soup, link):
1627
        """Get information about a particular comics."""
1628
        date_str = soup.find('div', class_='postdate').find('em').string
1629
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1630
        div = soup.find('div', id='comic')
1631
        if div:
1632
            img = div.find('img')
1633
            img_src = [img['src']]
1634
            alt = img['alt']
1635
            assert alt == img['title']
1636
            title = soup.find('meta', property='og:title')['content']
1637
        else:
1638
            img_src = []
1639
            alt = ''
1640
            title = ''
1641
        return {
1642
            'month': day.month,
1643
            'year': day.year,
1644
            'day': day.day,
1645
            'img': img_src,
1646
            'title': title,
1647
            'alt': alt,
1648
            'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')),
1649
        }
1650
1651
1652 View Code Duplication
class WarehouseComic(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1653
    """Class to retrieve Warehouse Comic comics."""
1654
    name = 'warehouse'
1655
    long_name = 'Warehouse Comic'
1656
    url = 'http://warehousecomic.com'
1657
    get_first_comic_link = get_a_navi_navifirst
1658
    get_navi_link = get_link_rel_next
1659
1660
    @classmethod
1661
    def get_comic_info(cls, soup, link):
1662
        """Get information about a particular comics."""
1663
        title = soup.find('h2', class_='post-title').string
1664
        date_str = soup.find('span', class_='post-date').string
1665
        day = string_to_date(date_str, "%B %d, %Y")
1666
        imgs = soup.find('div', id='comic').find_all('img')
1667
        return {
1668
            'img': [i['src'] for i in imgs],
1669
            'title': title,
1670
            'day': day.day,
1671
            'month': day.month,
1672
            'year': day.year,
1673
        }
1674
1675
1676
class JustSayEh(GenericNavigableComic):
1677
    """Class to retrieve Just Say Eh comics."""
1678
    # Also on http//tapastic.com/series/Just-Say-Eh
1679
    name = 'justsayeh'
1680
    long_name = 'Just Say Eh'
1681
    url = 'http://www.justsayeh.com'
1682
    get_first_comic_link = get_a_navi_navifirst
1683
    get_navi_link = get_a_navi_comicnavnext_navinext
1684
1685
    @classmethod
1686
    def get_comic_info(cls, soup, link):
1687
        """Get information about a particular comics."""
1688
        title = soup.find('h2', class_='post-title').string
1689
        imgs = soup.find("div", id="comic").find_all("img")
1690
        assert all(i['alt'] == i['title'] for i in imgs)
1691
        alt = imgs[0]['alt']
1692
        return {
1693
            'img': [i['src'] for i in imgs],
1694
            'title': title,
1695
            'alt': alt,
1696
        }
1697
1698
1699 View Code Duplication
class MouseBearComedy(GenericNavigableComic):
1700
    """Class to retrieve Mouse Bear Comedy comics."""
1701
    # Also on http://mousebearcomedy.tumblr.com
1702
    name = 'mousebear'
1703
    long_name = 'Mouse Bear Comedy'
1704
    url = 'http://www.mousebearcomedy.com'
1705
    get_first_comic_link = get_a_navi_navifirst
1706
    get_navi_link = get_a_navi_comicnavnext_navinext
1707
1708
    @classmethod
1709
    def get_comic_info(cls, soup, link):
1710
        """Get information about a particular comics."""
1711
        title = soup.find('h2', class_='post-title').string
1712
        author = soup.find("span", class_="post-author").find("a").string
1713
        date_str = soup.find("span", class_="post-date").string
1714
        day = string_to_date(date_str, '%B %d, %Y')
1715
        imgs = soup.find("div", id="comic").find_all("img")
1716
        assert all(i['alt'] == i['title'] == title for i in imgs)
1717
        return {
1718
            'day': day.day,
1719
            'month': day.month,
1720
            'year': day.year,
1721
            'img': [i['src'] for i in imgs],
1722
            'title': title,
1723
            'author': author,
1724
        }
1725
1726
1727 View Code Duplication
class BigFootJustice(GenericNavigableComic):
1728
    """Class to retrieve Big Foot Justice comics."""
1729
    # Also on http://tapastic.com/series/bigfoot-justice
1730
    name = 'bigfoot'
1731
    long_name = 'Big Foot Justice'
1732
    url = 'http://bigfootjustice.com'
1733
    get_first_comic_link = get_a_navi_navifirst
1734
    get_navi_link = get_a_navi_comicnavnext_navinext
1735
1736
    @classmethod
1737
    def get_comic_info(cls, soup, link):
1738
        """Get information about a particular comics."""
1739
        imgs = soup.find('div', id='comic').find_all('img')
1740
        assert all(i['title'] == i['alt'] for i in imgs)
1741
        title = ' '.join(i['title'] for i in imgs)
1742
        return {
1743
            'img': [i['src'] for i in imgs],
1744
            'title': title,
1745
        }
1746
1747
1748 View Code Duplication
class RespawnComic(GenericNavigableComic):
1749
    """Class to retrieve Respawn Comic."""
1750
    # Also on http://respawncomic.tumblr.com
1751
    name = 'respawn'
1752
    long_name = 'Respawn Comic'
1753
    url = 'http://respawncomic.com '
1754
    get_navi_link = get_a_navi_comicnavnext_navinext
1755
    get_first_comic_link = simulate_first_link
1756
    first_url = 'http://respawncomic.com/comic/c0001/'
1757
1758
    @classmethod
1759
    def get_comic_info(cls, soup, link):
1760
        """Get information about a particular comics."""
1761
        title = soup.find('meta', property='og:title')['content']
1762
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1763
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1764
        date_str = date_str[:10]
1765
        day = string_to_date(date_str, "%Y-%m-%d")
1766
        imgs = soup.find_all('meta', property='og:image')
1767
        skip_imgs = {
1768
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1769
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1770
        }
1771
        return {
1772
            'title': title,
1773
            'author': author,
1774
            'day': day.day,
1775
            'month': day.month,
1776
            'year': day.year,
1777
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1778
        }
1779
1780
1781 View Code Duplication
class SafelyEndangered(GenericNavigableComic):
1782
    """Class to retrieve Safely Endangered comics."""
1783
    # Also on http://tumblr.safelyendangered.com
1784
    name = 'endangered'
1785
    long_name = 'Safely Endangered'
1786
    url = 'http://www.safelyendangered.com'
1787
    get_navi_link = get_link_rel_next
1788
    get_first_comic_link = simulate_first_link
1789
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1790
1791
    @classmethod
1792
    def get_comic_info(cls, soup, link):
1793
        """Get information about a particular comics."""
1794
        title = soup.find('h2', class_='post-title').string
1795
        date_str = soup.find('span', class_='post-date').string
1796
        day = string_to_date(date_str, '%B %d, %Y')
1797
        imgs = soup.find('div', id='comic').find_all('img')
1798
        alt = imgs[0]['alt']
1799
        assert all(i['alt'] == i['title'] for i in imgs)
1800
        return {
1801
            'day': day.day,
1802
            'month': day.month,
1803
            'year': day.year,
1804
            'img': [i['src'] for i in imgs],
1805
            'title': title,
1806
            'alt': alt,
1807
        }
1808
1809
1810 View Code Duplication
class PicturesInBoxes(GenericNavigableComic):
1811
    """Class to retrieve Pictures In Boxes comics."""
1812
    # Also on http://picturesinboxescomic.tumblr.com
1813
    name = 'picturesinboxes'
1814
    long_name = 'Pictures in Boxes'
1815
    url = 'http://www.picturesinboxes.com'
1816
    get_navi_link = get_a_navi_navinext
1817
    get_first_comic_link = simulate_first_link
1818
    first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'
1819
1820
    @classmethod
1821
    def get_comic_info(cls, soup, link):
1822
        """Get information about a particular comics."""
1823
        title = soup.find('h2', class_='post-title').string
1824
        author = soup.find("span", class_="post-author").find("a").string
1825
        date_str = soup.find('span', class_='post-date').string
1826
        day = string_to_date(date_str, '%B %d, %Y')
1827
        imgs = soup.find('div', class_='comicpane').find_all('img')
1828
        assert imgs
1829
        assert all(i['title'] == i['alt'] == title for i in imgs)
1830
        return {
1831
            'day': day.day,
1832
            'month': day.month,
1833
            'year': day.year,
1834
            'img': [i['src'] for i in imgs],
1835
            'title': title,
1836
            'author': author,
1837
        }
1838
1839
1840
class Penmen(GenericEmptyComic):
1841
    """Class to retrieve Penmen comics."""
1842
    name = 'penmen'
1843
    long_name = 'Penmen'
1844
    url = 'http://penmen.com'
1845
1846
1847
class TheDoghouseDiaries(GenericNavigableComic):
1848
    """Class to retrieve The Dog House Diaries comics."""
1849
    name = 'doghouse'
1850
    long_name = 'The Dog House Diaries'
1851
    url = 'http://thedoghousediaries.com'
1852
1853
    @classmethod
1854
    def get_first_comic_link(cls):
1855
        """Get link to first comics."""
1856
        return get_soup_at_url(cls.url).find('a', id='firstlink')
1857
1858
    @classmethod
1859
    def get_navi_link(cls, last_soup, next_):
1860
        return last_soup.find('a', id='nextlink' if next_ else 'previouslink')
1861
1862
    @classmethod
1863
    def get_comic_info(cls, soup, link):
1864
        """Get information about a particular comics."""
1865
        comic_img_re = re.compile('^dhdcomics/.*')
1866
        img = soup.find('img', src=comic_img_re)
1867
        comic_url = cls.get_url_from_link(link)
1868
        return {
1869
            'title': soup.find('h2', id='titleheader').string,
1870
            'title2': soup.find('div', id='subtext').string,
1871
            'alt': img.get('title'),
1872
            'img': [urljoin_wrapper(comic_url, img['src'].strip())],
1873
            'num': int(comic_url.split('/')[-1]),
1874
        }
1875
1876
1877
class InvisibleBread(GenericListableComic):
1878
    """Class to retrieve Invisible Bread comics."""
1879
    # Also on http://www.gocomics.com/invisible-bread
1880
    name = 'invisiblebread'
1881
    long_name = 'Invisible Bread'
1882
    url = 'http://invisiblebread.com'
1883
1884
    @classmethod
1885
    def get_archive_elements(cls):
1886
        archive_url = urljoin_wrapper(cls.url, 'archives/')
1887
        return reversed(get_soup_at_url(archive_url).find_all('td', class_='archive-title'))
1888
1889
    @classmethod
1890
    def get_url_from_archive_element(cls, td):
1891
        return td.find('a')['href']
1892
1893
    @classmethod
1894
    def get_comic_info(cls, soup, td):
1895
        """Get information about a particular comics."""
1896
        url = cls.get_url_from_archive_element(td)
1897
        title = td.find('a').string
1898
        month_and_day = td.previous_sibling.string
1899
        link_re = re.compile('^%s/([0-9]+)/' % cls.url)
1900
        year = link_re.match(url).groups()[0]
1901
        date_str = month_and_day + ' ' + year
1902
        day = string_to_date(date_str, '%b %d %Y')
1903
        imgs = [soup.find('div', id='comic').find('img')]
1904
        assert len(imgs) == 1
1905
        assert all(i['title'] == i['alt'] == title for i in imgs)
1906
        return {
1907
            'month': day.month,
1908
            'year': day.year,
1909
            'day': day.day,
1910
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1911
            'title': title,
1912
        }
1913
1914
1915
class DiscoBleach(GenericEmptyComic):  # Does not work anymore
1916
    """Class to retrieve Disco Bleach Comics."""
1917
    name = 'discobleach'
1918
    long_name = 'Disco Bleach'
1919
    url = 'http://discobleach.com'
1920
1921
1922
class TubeyToons(GenericEmptyComic):  # Does not work anymore
1923
    """Class to retrieve TubeyToons comics."""
1924
    # Also on http://tapastic.com/series/Tubey-Toons
1925
    # Also on http://tubeytoons.tumblr.com
1926
    name = 'tubeytoons'
1927
    long_name = 'Tubey Toons'
1928
    url = 'http://tubeytoons.com'
1929
1930
1931 View Code Duplication
class CompletelySeriousComics(GenericNavigableComic):
1932
    """Class to retrieve Completely Serious comics."""
1933
    name = 'completelyserious'
1934
    long_name = 'Completely Serious Comics'
1935
    url = 'http://completelyseriouscomics.com'
1936
    get_first_comic_link = get_a_navi_navifirst
1937
    get_navi_link = get_a_navi_navinext
1938
1939
    @classmethod
1940
    def get_comic_info(cls, soup, link):
1941
        """Get information about a particular comics."""
1942
        title = soup.find('h2', class_='post-title').string
1943
        author = soup.find('span', class_='post-author').contents[1].string
1944
        date_str = soup.find('span', class_='post-date').string
1945
        day = string_to_date(date_str, '%B %d, %Y')
1946
        imgs = soup.find('div', class_='comicpane').find_all('img')
1947
        assert imgs
1948
        alt = imgs[0]['title']
1949
        assert all(i['title'] == i['alt'] == alt for i in imgs)
1950
        return {
1951
            'month': day.month,
1952
            'year': day.year,
1953
            'day': day.day,
1954
            'img': [i['src'] for i in imgs],
1955
            'title': title,
1956
            'alt': alt,
1957
            'author': author,
1958
        }
1959
1960
1961
class PoorlyDrawnLines(GenericListableComic):
1962
    """Class to retrieve Poorly Drawn Lines comics."""
1963
    # Also on http://pdlcomics.tumblr.com
1964
    name = 'poorlydrawn'
1965
    long_name = 'Poorly Drawn Lines'
1966
    url = 'http://poorlydrawnlines.com'
1967
    get_url_from_archive_element = get_href
1968
1969
    @classmethod
1970
    def get_comic_info(cls, soup, link):
1971
        """Get information about a particular comics."""
1972
        imgs = soup.find('div', class_='post').find_all('img')
1973
        assert len(imgs) <= 1
1974
        return {
1975
            'img': [i['src'] for i in imgs],
1976
            'title': imgs[0].get('title', "") if imgs else "",
1977
        }
1978
1979
    @classmethod
1980
    def get_archive_elements(cls):
1981
        archive_url = urljoin_wrapper(cls.url, 'archive')
1982
        url_re = re.compile('^%s/comic/.' % cls.url)
1983
        return reversed(get_soup_at_url(archive_url).find_all('a', href=url_re))
1984
1985
1986 View Code Duplication
class LoadingComics(GenericNavigableComic):
1987
    """Class to retrieve Loading Artist comics."""
1988
    name = 'loadingartist'
1989
    long_name = 'Loading Artist'
1990
    url = 'http://www.loadingartist.com/latest'
1991
1992
    @classmethod
1993
    def get_first_comic_link(cls):
1994
        """Get link to first comics."""
1995
        return get_soup_at_url(cls.url).find('a', title="First")
1996
1997
    @classmethod
1998
    def get_navi_link(cls, last_soup, next_):
1999
        return last_soup.find('a', title='Next' if next_ else 'Previous')
2000
2001
    @classmethod
2002
    def get_comic_info(cls, soup, link):
2003
        """Get information about a particular comics."""
2004
        title = soup.find('h1').string
2005
        date_str = soup.find('span', class_='date').string.strip()
2006
        day = string_to_date(date_str, "%B %d, %Y")
2007
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
2008
        return {
2009
            'title': title,
2010
            'img': [i['src'] for i in imgs],
2011
            'month': day.month,
2012
            'year': day.year,
2013
            'day': day.day,
2014
        }
2015
2016
2017 View Code Duplication
class ChuckleADuck(GenericNavigableComic):
2018
    """Class to retrieve Chuckle-A-Duck comics."""
2019
    name = 'chuckleaduck'
2020
    long_name = 'Chuckle-A-duck'
2021
    url = 'http://chuckleaduck.com'
2022
    get_first_comic_link = get_div_navfirst_a
2023
    get_navi_link = get_link_rel_next
2024
2025
    @classmethod
2026
    def get_comic_info(cls, soup, link):
2027
        """Get information about a particular comics."""
2028
        date_str = soup.find('span', class_='post-date').string
2029
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
2030
        author = soup.find('span', class_='post-author').string
2031
        div = soup.find('div', id='comic')
2032
        imgs = div.find_all('img') if div else []
2033
        title = imgs[0]['title'] if imgs else ""
2034
        assert all(i['title'] == i['alt'] == title for i in imgs)
2035
        return {
2036
            'month': day.month,
2037
            'year': day.year,
2038
            'day': day.day,
2039
            'img': [i['src'] for i in imgs],
2040
            'title': title,
2041
            'author': author,
2042
        }
2043
2044
2045
class DepressedAlien(GenericNavigableComic):
2046
    """Class to retrieve Depressed Alien Comics."""
2047
    name = 'depressedalien'
2048
    long_name = 'Depressed Alien'
2049
    url = 'http://depressedalien.com'
2050
    get_url_from_link = join_cls_url_to_href
2051
2052
    @classmethod
2053
    def get_first_comic_link(cls):
2054
        """Get link to first comics."""
2055
        return get_soup_at_url(cls.url).find('img', attrs={'name': 'beginArrow'}).parent
2056
2057
    @classmethod
2058
    def get_navi_link(cls, last_soup, next_):
2059
        return last_soup.find('img', attrs={'name': 'rightArrow' if next_ else 'leftArrow'}).parent
2060
2061
    @classmethod
2062
    def get_comic_info(cls, soup, link):
2063
        """Get information about a particular comics."""
2064
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2065
        imgs = soup.find_all('meta', property='og:image')
2066
        return {
2067
            'title': title,
2068
            'img': [i['content'] for i in imgs],
2069
        }
2070
2071
2072
class ThingsInSquares(GenericListableComic):
2073
    """Class to retrieve Things In Squares comics."""
2074
    # This can be retrieved in other languages
2075
    # Also on https://tapastic.com/series/Things-in-Squares
2076
    name = 'squares'
2077
    long_name = 'Things in squares'
2078
    url = 'http://www.thingsinsquares.com'
2079
2080
    @classmethod
2081
    def get_comic_info(cls, soup, tr):
2082
        """Get information about a particular comics."""
2083
        _, td2, td3 = tr.find_all('td')
2084
        a = td2.find('a')
2085
        date_str = td3.string
2086
        day = string_to_date(date_str, "%m.%d.%y")
2087
        title = a.string
2088
        title2 = soup.find('meta', property='og:title')['content']
2089
        desc = soup.find('meta', property='og:description')
2090
        description = desc['content'] if desc else ''
2091
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2092
        imgs = soup.find('div', class_='entry-content').find_all('img')
2093
        return {
2094
            'day': day.day,
2095
            'month': day.month,
2096
            'year': day.year,
2097
            'title': title,
2098
            'title2': title2,
2099
            'description': description,
2100
            'tags': tags,
2101
            'img': [i['src'] for i in imgs],
2102
            'alt': ' '.join(i['alt'] for i in imgs),
2103
        }
2104
2105
    @classmethod
2106
    def get_url_from_archive_element(cls, tr):
2107
        _, td2, td3 = tr.find_all('td')
2108
        return td2.find('a')['href']
2109
2110
    @classmethod
2111
    def get_archive_elements(cls):
2112
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
2113
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2114
2115
2116
class HappleTea(GenericNavigableComic):
2117
    """Class to retrieve Happle Tea Comics."""
2118
    name = 'happletea'
2119
    long_name = 'Happle Tea'
2120
    url = 'http://www.happletea.com'
2121
    get_first_comic_link = get_a_navi_navifirst
2122
    get_navi_link = get_link_rel_next
2123
2124
    @classmethod
2125
    def get_comic_info(cls, soup, link):
2126
        """Get information about a particular comics."""
2127
        imgs = soup.find('div', id='comic').find_all('img')
2128
        post = soup.find('div', class_='post-content')
2129
        title = post.find('h2', class_='post-title').string
2130
        author = post.find('a', rel='author').string
2131
        date_str = post.find('span', class_='post-date').string
2132
        day = string_to_date(date_str, "%B %d, %Y")
2133
        assert all(i['alt'] == i['title'] for i in imgs)
2134
        return {
2135
            'title': title,
2136
            'img': [i['src'] for i in imgs],
2137
            'alt': ''.join(i['alt'] for i in imgs),
2138
            'month': day.month,
2139
            'year': day.year,
2140
            'day': day.day,
2141
            'author': author,
2142
        }
2143
2144
2145
class FatAwesomeComics(GenericNavigableComic):
2146
    """Class to retrieve Fat Awesome Comics."""
2147
    # Also on http://fatawesomecomedy.tumblr.com
2148
    name = 'fatawesome'
2149
    long_name = 'Fat Awesome'
2150
    url = 'http://fatawesome.com/comics'
2151
    get_navi_link = get_a_rel_next
2152
    get_first_comic_link = simulate_first_link
2153
    first_url = 'http://fatawesome.com/shortbus/'
2154
2155
    @classmethod
2156
    def get_comic_info(cls, soup, link):
2157
        """Get information about a particular comics."""
2158
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2159
        description = soup.find('meta', attrs={'name': 'description'})['content']
2160
        tags_prop = soup.find('meta', property='article:tag')
2161
        tags = tags_prop['content'] if tags_prop else ""
2162
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2163
        day = string_to_date(date_str, "%Y-%m-%d")
2164
        imgs = soup.find_all('img', attrs={'data-recalc-dims': "1"})
2165
        assert len(imgs) == 1
2166
        return {
2167
            'title': title,
2168
            'description': description,
2169
            'tags': tags,
2170
            'alt': "".join(i['alt'] for i in imgs),
2171
            'img': [i['src'].rsplit('?', 1)[0] for i in imgs],
2172
            'month': day.month,
2173
            'year': day.year,
2174
            'day': day.day,
2175
        }
2176
2177
2178
class AnythingComic(GenericListableComic):
2179
    """Class to retrieve Anything Comics."""
2180
    # Also on http://tapastic.com/series/anything
2181
    name = 'anythingcomic'
2182
    long_name = 'Anything Comic'
2183
    url = 'http://www.anythingcomic.com'
2184
2185
    @classmethod
2186
    def get_archive_elements(cls):
2187
        archive_url = urljoin_wrapper(cls.url, 'archive/')
2188
        # The first 2 <tr>'s do not correspond to comics
2189
        return get_soup_at_url(archive_url).find('table', id='chapter_table').find_all('tr')[2:]
2190
2191
    @classmethod
2192
    def get_url_from_archive_element(cls, tr):
2193
        """Get url corresponding to an archive element."""
2194
        td_num, td_comic, td_date, _ = tr.find_all('td')
2195
        link = td_comic.find('a')
2196
        return urljoin_wrapper(cls.url, link['href'])
2197
2198
    @classmethod
2199
    def get_comic_info(cls, soup, tr):
2200
        """Get information about a particular comics."""
2201
        td_num, td_comic, td_date, _ = tr.find_all('td')
2202
        num = int(td_num.string)
2203
        link = td_comic.find('a')
2204
        title = link.string
2205
        imgs = soup.find_all('img', id='comic_image')
2206
        day = string_to_date(td_date.string, '%d %b %Y %I:%M %p')
2207
        assert len(imgs) == 1
2208
        assert all(i.get('alt') == i.get('title') for i in imgs)
2209
        return {
2210
            'num': num,
2211
            'title': title,
2212
            'alt': imgs[0].get('alt', ''),
2213
            'img': [i['src'] for i in imgs],
2214
            'month': day.month,
2215
            'year': day.year,
2216
            'day': day.day,
2217
        }
2218
2219
2220
class LonnieMillsap(GenericNavigableComic):
2221
    """Class to retrieve Lonnie Millsap's comics."""
2222
    name = 'millsap'
2223
    long_name = 'Lonnie Millsap'
2224
    url = 'http://www.lonniemillsap.com'
2225
    get_navi_link = get_link_rel_next
2226
    get_first_comic_link = simulate_first_link
2227
    first_url = 'http://www.lonniemillsap.com/?p=42'
2228
2229
    @classmethod
2230
    def get_comic_info(cls, soup, link):
2231
        """Get information about a particular comics."""
2232
        title = soup.find('h2', class_='post-title').string
2233
        post = soup.find('div', class_='post-content')
2234
        author = post.find("span", class_="post-author").find("a").string
2235
        date_str = post.find("span", class_="post-date").string
2236
        day = string_to_date(date_str, "%B %d, %Y")
2237
        imgs = post.find("div", class_="entry").find_all("img")
2238
        return {
2239
            'title': title,
2240
            'author': author,
2241
            'img': [i['src'] for i in imgs],
2242
            'month': day.month,
2243
            'year': day.year,
2244
            'day': day.day,
2245
        }
2246
2247
2248 View Code Duplication
class LinsEditions(GenericNavigableComic):
2249
    """Class to retrieve L.I.N.S. Editions comics."""
2250
    # Also on http://linscomics.tumblr.com
2251
    name = 'lins'
2252
    long_name = 'L.I.N.S. Editions'
2253
    url = 'https://linsedition.com'
2254
    get_navi_link = get_link_rel_next
2255
    get_first_comic_link = simulate_first_link
2256
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2257
2258
    @classmethod
2259
    def get_comic_info(cls, soup, link):
2260
        """Get information about a particular comics."""
2261
        title = soup.find('meta', property='og:title')['content']
2262
        imgs = soup.find_all('meta', property='og:image')
2263
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2264
        day = string_to_date(date_str, "%Y-%m-%d")
2265
        return {
2266
            'title': title,
2267
            'img': [i['content'] for i in imgs],
2268
            'month': day.month,
2269
            'year': day.year,
2270
            'day': day.day,
2271
        }
2272
2273
2274
class ThorsThundershack(GenericNavigableComic):
2275
    """Class to retrieve Thor's Thundershack comics."""
2276
    # Also on http://tapastic.com/series/Thors-Thundershac
2277
    name = 'thor'
2278
    long_name = 'Thor\'s Thundershack'
2279
    url = 'http://www.thorsthundershack.com'
2280
    get_url_from_link = join_cls_url_to_href
2281
2282
    @classmethod
2283
    def get_first_comic_link(cls):
2284
        """Get link to first comics."""
2285
        return get_soup_at_url(cls.url).find('a', class_='first navlink')
2286
2287
    @classmethod
2288
    def get_navi_link(cls, last_soup, next_):
2289
        for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
2290
            if link['href'] != '/comic':
2291
                return link
2292
        return None
2293
2294
    @classmethod
2295
    def get_comic_info(cls, soup, link):
2296
        """Get information about a particular comics."""
2297
        title = soup.find('meta', attrs={'name': 'description'})["content"]
2298
        description = soup.find('div', itemprop='articleBody').text
2299
        author = soup.find('span', itemprop='author copyrightHolder').string
2300
        imgs = soup.find_all('img', itemprop='image')
2301
        assert all(i['title'] == i['alt'] for i in imgs)
2302
        alt = imgs[0]['alt'] if imgs else ""
2303
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
2304
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2305
        return {
2306
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2307
            'month': day.month,
2308
            'year': day.year,
2309
            'day': day.day,
2310
            'author': author,
2311
            'title': title,
2312
            'alt': alt,
2313
            'description': description,
2314
        }
2315
2316
2317 View Code Duplication
class GerbilWithAJetpack(GenericNavigableComic):
2318
    """Class to retrieve GerbilWithAJetpack comics."""
2319
    name = 'gerbil'
2320
    long_name = 'Gerbil With A Jetpack'
2321
    url = 'http://gerbilwithajetpack.com'
2322
    get_first_comic_link = get_a_navi_navifirst
2323
    get_navi_link = get_a_rel_next
2324
2325
    @classmethod
2326
    def get_comic_info(cls, soup, link):
2327
        """Get information about a particular comics."""
2328
        title = soup.find('h2', class_='post-title').string
2329
        author = soup.find("span", class_="post-author").find("a").string
2330
        date_str = soup.find("span", class_="post-date").string
2331
        day = string_to_date(date_str, "%B %d, %Y")
2332
        imgs = soup.find("div", id="comic").find_all("img")
2333
        alt = imgs[0]['alt']
2334
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2335
        return {
2336
            'img': [i['src'] for i in imgs],
2337
            'title': title,
2338
            'alt': alt,
2339
            'author': author,
2340
            'day': day.day,
2341
            'month': day.month,
2342
            'year': day.year
2343
        }
2344
2345
2346
class EveryDayBlues(GenericNavigableComic):
2347
    """Class to retrieve EveryDayBlues Comics."""
2348
    name = "blues"
2349
    long_name = "Every Day Blues"
2350
    url = "http://everydayblues.net"
2351
    get_first_comic_link = get_a_navi_navifirst
2352
    get_navi_link = get_link_rel_next
2353
2354
    @classmethod
2355
    def get_comic_info(cls, soup, link):
2356
        """Get information about a particular comics."""
2357
        title = soup.find("h2", class_="post-title").string
2358
        author = soup.find("span", class_="post-author").find("a").string
2359
        date_str = soup.find("span", class_="post-date").string
2360
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2361
        imgs = soup.find("div", id="comic").find_all("img")
2362
        assert all(i['alt'] == i['title'] == title for i in imgs)
2363
        assert len(imgs) <= 1
2364
        return {
2365
            'img': [i['src'] for i in imgs],
2366
            'title': title,
2367
            'author': author,
2368
            'day': day.day,
2369
            'month': day.month,
2370
            'year': day.year
2371
        }
2372
2373
2374
class BiterComics(GenericNavigableComic):
2375
    """Class to retrieve Biter Comics."""
2376
    name = "biter"
2377
    long_name = "Biter Comics"
2378
    url = "http://www.bitercomics.com"
2379
    get_first_comic_link = get_a_navi_navifirst
2380
    get_navi_link = get_link_rel_next
2381
2382
    @classmethod
2383
    def get_comic_info(cls, soup, link):
2384
        """Get information about a particular comics."""
2385
        title = soup.find("h1", class_="entry-title").string
2386
        author = soup.find("span", class_="author vcard").find("a").string
2387
        date_str = soup.find("span", class_="entry-date").string
2388
        day = string_to_date(date_str, "%B %d, %Y")
2389
        imgs = soup.find("div", id="comic").find_all("img")
2390
        assert all(i['alt'] == i['title'] for i in imgs)
2391
        assert len(imgs) == 1
2392
        alt = imgs[0]['alt']
2393
        return {
2394
            'img': [i['src'] for i in imgs],
2395
            'title': title,
2396
            'alt': alt,
2397
            'author': author,
2398
            'day': day.day,
2399
            'month': day.month,
2400
            'year': day.year
2401
        }
2402
2403
2404 View Code Duplication
class TheAwkwardYeti(GenericNavigableComic):
2405
    """Class to retrieve The Awkward Yeti comics."""
2406
    # Also on http://www.gocomics.com/the-awkward-yeti
2407
    # Also on http://larstheyeti.tumblr.com
2408
    # Also on https://tapastic.com/series/TheAwkwardYeti
2409
    name = 'yeti'
2410
    long_name = 'The Awkward Yeti'
2411
    url = 'http://theawkwardyeti.com'
2412
    get_first_comic_link = get_a_navi_navifirst
2413
    get_navi_link = get_link_rel_next
2414
2415
    @classmethod
2416
    def get_comic_info(cls, soup, link):
2417
        """Get information about a particular comics."""
2418
        title = soup.find('h2', class_='post-title').string
2419
        date_str = soup.find("span", class_="post-date").string
2420
        day = string_to_date(date_str, "%B %d, %Y")
2421
        imgs = soup.find("div", id="comic").find_all("img")
2422
        assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
2423
        return {
2424
            'img': [i['src'] for i in imgs],
2425
            'title': title,
2426
            'day': day.day,
2427
            'month': day.month,
2428
            'year': day.year
2429
        }
2430
2431
2432
class PleasantThoughts(GenericNavigableComic):
2433
    """Class to retrieve Pleasant Thoughts comics."""
2434
    name = 'pleasant'
2435
    long_name = 'Pleasant Thoughts'
2436
    url = 'http://pleasant-thoughts.com'
2437
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2438
    get_navi_link = get_link_rel_next
2439
2440
    @classmethod
2441
    def get_comic_info(cls, soup, link):
2442
        """Get information about a particular comics."""
2443
        post = soup.find('div', class_='post-content')
2444
        title = post.find('h2', class_='post-title').string
2445
        imgs = post.find("div", class_="entry").find_all("img")
2446
        return {
2447
            'title': title,
2448
            'img': [i['src'] for i in imgs],
2449
        }
2450
2451
2452
class MisterAndMe(GenericNavigableComic):
2453
    """Class to retrieve Mister & Me Comics."""
2454
    # Also on http://www.gocomics.com/mister-and-me
2455
    # Also on https://tapastic.com/series/Mister-and-Me
2456
    name = 'mister'
2457
    long_name = 'Mister & Me'
2458
    url = 'http://www.mister-and-me.com'
2459
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2460
    get_navi_link = get_link_rel_next
2461
2462
    @classmethod
2463
    def get_comic_info(cls, soup, link):
2464
        """Get information about a particular comics."""
2465
        title = soup.find('h2', class_='post-title').string
2466
        author = soup.find("span", class_="post-author").find("a").string
2467
        date_str = soup.find("span", class_="post-date").string
2468
        day = string_to_date(date_str, "%B %d, %Y")
2469
        imgs = soup.find("div", id="comic").find_all("img")
2470
        assert all(i['alt'] == i['title'] for i in imgs)
2471
        assert len(imgs) <= 1
2472
        alt = imgs[0]['alt'] if imgs else ""
2473
        return {
2474
            'img': [i['src'] for i in imgs],
2475
            'title': title,
2476
            'alt': alt,
2477
            'author': author,
2478
            'day': day.day,
2479
            'month': day.month,
2480
            'year': day.year
2481
        }
2482
2483
2484 View Code Duplication
class LastPlaceComics(GenericNavigableComic):
2485
    """Class to retrieve Last Place Comics."""
2486
    name = 'lastplace'
2487
    long_name = 'LastPlaceComics'
2488
    url = "http://lastplacecomics.com"
2489
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2490
    get_navi_link = get_link_rel_next
2491
2492
    @classmethod
2493
    def get_comic_info(cls, soup, link):
2494
        """Get information about a particular comics."""
2495
        title = soup.find('h2', class_='post-title').string
2496
        author = soup.find("span", class_="post-author").find("a").string
2497
        date_str = soup.find("span", class_="post-date").string
2498
        day = string_to_date(date_str, "%B %d, %Y")
2499
        imgs = soup.find("div", id="comic").find_all("img")
2500
        assert all(i['alt'] == i['title'] for i in imgs)
2501
        assert len(imgs) <= 1
2502
        alt = imgs[0]['alt'] if imgs else ""
2503
        return {
2504
            'img': [i['src'] for i in imgs],
2505
            'title': title,
2506
            'alt': alt,
2507
            'author': author,
2508
            'day': day.day,
2509
            'month': day.month,
2510
            'year': day.year
2511
        }
2512
2513
2514
class TalesOfAbsurdity(GenericNavigableComic):
2515
    """Class to retrieve Tales Of Absurdity comics."""
2516
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
2517
    # Also on http://talesofabsurdity.tumblr.com
2518
    name = 'absurdity'
2519
    long_name = 'Tales of Absurdity'
2520
    url = 'http://talesofabsurdity.com'
2521
    get_first_comic_link = get_a_navi_navifirst
2522
    get_navi_link = get_a_navi_comicnavnext_navinext
2523
2524
    @classmethod
2525
    def get_comic_info(cls, soup, link):
2526
        """Get information about a particular comics."""
2527
        title = soup.find('h2', class_='post-title').string
2528
        author = soup.find("span", class_="post-author").find("a").string
2529
        date_str = soup.find("span", class_="post-date").string
2530
        day = string_to_date(date_str, "%B %d, %Y")
2531
        imgs = soup.find("div", id="comic").find_all("img")
2532
        assert all(i['alt'] == i['title'] for i in imgs)
2533
        alt = imgs[0]['alt'] if imgs else ""
2534
        return {
2535
            'img': [i['src'] for i in imgs],
2536
            'title': title,
2537
            'alt': alt,
2538
            'author': author,
2539
            'day': day.day,
2540
            'month': day.month,
2541
            'year': day.year
2542
        }
2543
2544
2545 View Code Duplication
class EndlessOrigami(GenericNavigableComic):
2546
    """Class to retrieve Endless Origami Comics."""
2547
    name = "origami"
2548
    long_name = "Endless Origami"
2549
    url = "http://endlessorigami.com"
2550
    get_first_comic_link = get_a_navi_navifirst
2551
    get_navi_link = get_link_rel_next
2552
2553
    @classmethod
2554
    def get_comic_info(cls, soup, link):
2555
        """Get information about a particular comics."""
2556
        title = soup.find('h2', class_='post-title').string
2557
        author = soup.find("span", class_="post-author").find("a").string
2558
        date_str = soup.find("span", class_="post-date").string
2559
        day = string_to_date(date_str, "%B %d, %Y")
2560
        imgs = soup.find("div", id="comic").find_all("img")
2561
        assert all(i['alt'] == i['title'] for i in imgs)
2562
        alt = imgs[0]['alt'] if imgs else ""
2563
        return {
2564
            'img': [i['src'] for i in imgs],
2565
            'title': title,
2566
            'alt': alt,
2567
            'author': author,
2568
            'day': day.day,
2569
            'month': day.month,
2570
            'year': day.year
2571
        }
2572
2573
2574 View Code Duplication
class PlanC(GenericNavigableComic):
2575
    """Class to retrieve Plan C comics."""
2576
    name = 'planc'
2577
    long_name = 'Plan C'
2578
    url = 'http://www.plancomic.com'
2579
    get_first_comic_link = get_a_navi_navifirst
2580
    get_navi_link = get_a_navi_comicnavnext_navinext
2581
2582
    @classmethod
2583
    def get_comic_info(cls, soup, link):
2584
        """Get information about a particular comics."""
2585
        title = soup.find('h2', class_='post-title').string
2586
        date_str = soup.find("span", class_="post-date").string
2587
        day = string_to_date(date_str, "%B %d, %Y")
2588
        imgs = soup.find('div', id='comic').find_all('img')
2589
        return {
2590
            'title': title,
2591
            'img': [i['src'] for i in imgs],
2592
            'month': day.month,
2593
            'year': day.year,
2594
            'day': day.day,
2595
        }
2596
2597
2598 View Code Duplication
class BuniComic(GenericNavigableComic):
2599
    """Class to retrieve Buni Comics."""
2600
    name = 'buni'
2601
    long_name = 'BuniComics'
2602
    url = 'http://www.bunicomic.com'
2603
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2604
    get_navi_link = get_link_rel_next
2605
2606
    @classmethod
2607
    def get_comic_info(cls, soup, link):
2608
        """Get information about a particular comics."""
2609
        imgs = soup.find('div', id='comic').find_all('img')
2610
        assert all(i['alt'] == i['title'] for i in imgs)
2611
        assert len(imgs) == 1
2612
        return {
2613
            'img': [i['src'] for i in imgs],
2614
            'title': imgs[0]['title'],
2615
        }
2616
2617
2618
class GenericCommitStrip(GenericNavigableComic):
2619
    """Generic class to retrieve Commit Strips in different languages."""
2620
    get_navi_link = get_a_rel_next
2621
    get_first_comic_link = simulate_first_link
2622
    first_url = NotImplemented
2623
2624
    @classmethod
2625
    def get_comic_info(cls, soup, link):
2626
        """Get information about a particular comics."""
2627
        desc = soup.find('meta', property='og:description')['content']
2628
        title = soup.find('meta', property='og:title')['content']
2629
        imgs = soup.find('div', class_='entry-content').find_all('img')
2630
        title2 = ' '.join(i.get('title', '') for i in imgs)
2631
        return {
2632
            'title': title,
2633
            'title2': title2,
2634
            'description': desc,
2635
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2636
        }
2637
2638
2639
class CommitStripFr(GenericCommitStrip):
2640
    """Class to retrieve Commit Strips in French."""
2641
    name = 'commit_fr'
2642
    long_name = 'Commit Strip (Fr)'
2643
    url = 'http://www.commitstrip.com/fr'
2644
    first_url = 'http://www.commitstrip.com/fr/2012/02/22/interview/'
2645
2646
2647
class CommitStripEn(GenericCommitStrip):
2648
    """Class to retrieve Commit Strips in English."""
2649
    name = 'commit_en'
2650
    long_name = 'Commit Strip (En)'
2651
    url = 'http://www.commitstrip.com/en'
2652
    first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'
2653
2654
2655
class GenericBoumerie(GenericNavigableComic):
2656
    """Generic class to retrieve Boumeries comics in different languages."""
2657
    get_first_comic_link = get_a_navi_navifirst
2658
    get_navi_link = get_link_rel_next
2659
    date_format = NotImplemented
2660
    lang = NotImplemented
2661
2662
    @classmethod
2663
    def get_comic_info(cls, soup, link):
2664
        """Get information about a particular comics."""
2665
        title = soup.find('h2', class_='post-title').string
2666
        short_url = soup.find('link', rel='shortlink')['href']
2667
        author = soup.find("span", class_="post-author").find("a").string
2668
        date_str = soup.find('span', class_='post-date').string
2669
        day = string_to_date(date_str, cls.date_format, cls.lang)
2670
        imgs = soup.find('div', id='comic').find_all('img')
2671
        assert all(i['alt'] == i['title'] for i in imgs)
2672
        return {
2673
            'short_url': short_url,
2674 View Code Duplication
            'img': [i['src'] for i in imgs],
2675
            'title': title,
2676
            'author': author,
2677
            'month': day.month,
2678
            'year': day.year,
2679
            'day': day.day,
2680
        }
2681
2682
2683
class BoumerieEn(GenericBoumerie):
2684
    """Class to retrieve Boumeries comics in English."""
2685
    name = 'boumeries_en'
2686
    long_name = 'Boumeries (En)'
2687
    url = 'http://comics.boumerie.com'
2688
    date_format = "%B %d, %Y"
2689
    lang = 'en_GB.UTF-8'
2690
2691
2692
class BoumerieFr(GenericBoumerie):
2693
    """Class to retrieve Boumeries comics in French."""
2694
    name = 'boumeries_fr'
2695
    long_name = 'Boumeries (Fr)'
2696
    url = 'http://bd.boumerie.com'
2697
    date_format = "%A, %d %B %Y"
2698
    lang = "fr_FR.utf8"
2699
2700
2701 View Code Duplication
class UnearthedComics(GenericNavigableComic):
2702
    """Class to retrieve Unearthed comics."""
2703
    # Also on http://tapastic.com/series/UnearthedComics
2704
    # Also on http://unearthedcomics.tumblr.com
2705
    name = 'unearthed'
2706
    long_name = 'Unearthed Comics'
2707
    url = 'http://unearthedcomics.com'
2708
    get_navi_link = get_link_rel_next
2709
    get_first_comic_link = simulate_first_link
2710
    first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'
2711
2712
    @classmethod
2713
    def get_comic_info(cls, soup, link):
2714
        """Get information about a particular comics."""
2715
        short_url = soup.find('link', rel='shortlink')['href']
2716
        title_elt = soup.find('h1') or soup.find('h2')
2717
        title = title_elt.string if title_elt else ""
2718
        desc = soup.find('meta', property='og:description')
2719
        date_str = soup.find('time', class_='published updated hidden')['datetime']
2720
        day = string_to_date(date_str, "%Y-%m-%d")
2721
        post = soup.find('div', class_="entry content entry-content type-portfolio")
2722
        imgs = post.find_all('img')
2723
        return {
2724
            'title': title,
2725
            'description': desc,
2726
            'url2': short_url,
2727
            'img': [i['src'] for i in imgs],
2728
            'month': day.month,
2729
            'year': day.year,
2730
            'day': day.day,
2731
        }
2732
2733
2734 View Code Duplication
class Optipess(GenericNavigableComic):
2735
    """Class to retrieve Optipess comics."""
2736
    name = 'optipess'
2737
    long_name = 'Optipess'
2738
    url = 'http://www.optipess.com'
2739
    get_first_comic_link = get_a_navi_navifirst
2740
    get_navi_link = get_link_rel_next
2741
2742
    @classmethod
2743
    def get_comic_info(cls, soup, link):
2744
        """Get information about a particular comics."""
2745
        title = soup.find('h2', class_='post-title').string
2746
        author = soup.find("span", class_="post-author").find("a").string
2747
        comic = soup.find('div', id='comic')
2748
        imgs = comic.find_all('img') if comic else []
2749
        alt = imgs[0]['title'] if imgs else ""
2750
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2751
        date_str = soup.find('span', class_='post-date').string
2752
        day = string_to_date(date_str, "%B %d, %Y")
2753
        return {
2754
            'title': title,
2755
            'alt': alt,
2756
            'author': author,
2757
            'img': [i['src'] for i in imgs],
2758
            'month': day.month,
2759
            'year': day.year,
2760
            'day': day.day,
2761
        }
2762
2763
2764
class PainTrainComic(GenericNavigableComic):
2765
    """Class to retrieve Pain Train Comics."""
2766
    name = 'paintrain'
2767
    long_name = 'Pain Train Comics'
2768
    url = 'http://paintraincomic.com'
2769
    get_first_comic_link = get_a_navi_navifirst
2770
    get_navi_link = get_link_rel_next
2771
2772
    @classmethod
2773
    def get_comic_info(cls, soup, link):
2774
        """Get information about a particular comics."""
2775
        title = soup.find('h2', class_='post-title').string
2776
        short_url = soup.find('link', rel='shortlink')['href']
2777
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2778
        num = int(short_url_re.match(short_url).groups()[0])
2779
        imgs = soup.find('div', id='comic').find_all('img')
2780
        alt = imgs[0]['title']
2781
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2782
        date_str = soup.find('span', class_='post-date').string
2783
        day = string_to_date(date_str, "%d/%m/%Y")
2784
        return {
2785
            'short_url': short_url,
2786
            'num': num,
2787
            'img': [i['src'] for i in imgs],
2788
            'month': day.month,
2789
            'year': day.year,
2790
            'day': day.day,
2791
            'alt': alt,
2792
            'title': title,
2793
        }
2794
2795
2796
class MoonBeard(GenericNavigableComic):
2797
    """Class to retrieve MoonBeard comics."""
2798
    # Also on http://blog.squiresjam.es/moonbeard
2799
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
2800
    name = 'moonbeard'
2801
    long_name = 'Moon Beard'
2802
    url = 'http://moonbeard.com'
2803
    get_first_comic_link = get_a_navi_navifirst
2804
    get_navi_link = get_a_navi_navinext
2805
2806
    @classmethod
2807
    def get_comic_info(cls, soup, link):
2808
        """Get information about a particular comics."""
2809
        title = soup.find('h2', class_='post-title').string
2810
        short_url = soup.find('link', rel='shortlink')['href']
2811
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2812
        num = int(short_url_re.match(short_url).groups()[0])
2813
        imgs = soup.find('div', id='comic').find_all('img')
2814
        alt = imgs[0]['title']
2815
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2816
        date_str = soup.find('span', class_='post-date').string
2817
        day = string_to_date(date_str, "%B %d, %Y")
2818
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2819
        author = soup.find('span', class_='post-author').string
2820
        return {
2821
            'short_url': short_url,
2822
            'num': num,
2823
            'img': [i['src'] for i in imgs],
2824
            'month': day.month,
2825
            'year': day.year,
2826
            'day': day.day,
2827
            'title': title,
2828
            'tags': tags,
2829
            'alt': alt,
2830
            'author': author,
2831
        }
2832
2833
2834
class AHamADay(GenericNavigableComic):
2835
    """Class to retrieve class A Ham A Day comics."""
2836
    name = 'ham'
2837
    long_name = 'A Ham A Day'
2838
    url = 'http://www.ahammaday.com'
2839
    get_url_from_link = join_cls_url_to_href
2840
    get_first_comic_link = simulate_first_link
2841
    first_url = 'http://www.ahammaday.com/today/3/6/french'
2842
2843
    @classmethod
2844
    def get_navi_link(cls, last_soup, next_):
2845
        # prev is next / next is prev
2846
        return last_soup.find('li', class_='previous' if next_ else 'next').find('a')
2847
2848
    @classmethod
2849
    def get_comic_info(cls, soup, link):
2850
        """Get information about a particular comics."""
2851
        date_str = soup.find('time', class_='published')['datetime']
2852
        day = string_to_date(date_str, "%Y-%m-%d")
2853
        author = soup.find('span', class_='blog-author').find('a').string
2854
        title = soup.find('meta', property='og:title')['content']
2855
        imgs = soup.find_all('meta', itemprop='image')
2856
        return {
2857
            'img': [i['content'] for i in imgs],
2858
            'title': title,
2859
            'author': author,
2860
            'day': day.day,
2861
            'month': day.month,
2862
            'year': day.year,
2863
        }
2864
2865
2866 View Code Duplication
class LittleLifeLines(GenericNavigableComic):
2867
    """Class to retrieve Little Life Lines comics."""
2868
    name = 'life'
2869
    long_name = 'Little Life Lines'
2870
    url = 'http://www.littlelifelines.com'
2871
    get_url_from_link = join_cls_url_to_href
2872
    get_first_comic_link = simulate_first_link
2873
    first_url = 'http://www.littlelifelines.com/comics/well-done'
2874
2875
    @classmethod
2876
    def get_navi_link(cls, last_soup, next_):
2877
        # prev is next / next is prev
2878
        li = last_soup.find('li', class_='prev' if next_ else 'next')
2879
        return li.find('a') if li else None
2880
2881
    @classmethod
2882
    def get_comic_info(cls, soup, link):
2883
        """Get information about a particular comics."""
2884
        title = soup.find('meta', property='og:title')['content']
2885
        desc = soup.find('meta', property='og:description')['content']
2886
        date_str = soup.find('time', class_='published')['datetime']
2887
        day = string_to_date(date_str, "%Y-%m-%d")
2888
        author = soup.find('a', rel='author').string
2889
        div_content = soup.find('div', class_="body entry-content")
2890
        imgs = div_content.find_all('img')
2891
        imgs = [i for i in imgs if i.get('src') is not None]
2892
        alt = imgs[0]['alt']
2893
        return {
2894
            'title': title,
2895
            'alt': alt,
2896
            'description': desc,
2897
            'author': author,
2898
            'day': day.day,
2899
            'month': day.month,
2900
            'year': day.year,
2901
            'img': [i['src'] for i in imgs],
2902
        }
2903
2904
2905
class GenericWordPressInkblot(GenericNavigableComic):
2906
    """Generic class to retrieve comics using WordPress with Inkblot."""
2907
    get_navi_link = get_link_rel_next
2908
2909
    @classmethod
2910
    def get_first_comic_link(cls):
2911
        """Get link to first comics."""
2912
        return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')
2913
2914
    @classmethod
2915
    def get_comic_info(cls, soup, link):
2916
        """Get information about a particular comics."""
2917
        title = soup.find('meta', property='og:title')['content']
2918
        imgs = soup.find('div', class_='webcomic-image').find_all('img')
2919
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2920
        day = string_to_date(date_str, "%Y-%m-%d")
2921
        return {
2922
            'title': title,
2923
            'day': day.day,
2924
            'month': day.month,
2925
            'year': day.year,
2926
            'img': [i['src'] for i in imgs],
2927
        }
2928
2929
2930
class EverythingsStupid(GenericWordPressInkblot):
2931
    """Class to retrieve Everything's stupid Comics."""
2932
    # Also on http://tapastic.com/series/EverythingsStupid
2933
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
2934
    # Also on http://everythingsstupidcomics.tumblr.com
2935
    name = 'stupid'
2936
    long_name = "Everything's Stupid"
2937
    url = 'http://everythingsstupid.net'
2938
2939
2940
class TheIsmComics(GenericWordPressInkblot):
2941
    """Class to retrieve The Ism Comics."""
2942
    # Also on https://tapastic.com/series/TheIsm (?)
2943
    name = 'theism'
2944
    long_name = "The Ism"
2945
    url = 'http://www.theism-comics.com'
2946
2947
2948
class WoodenPlankStudios(GenericWordPressInkblot):
2949
    """Class to retrieve Wooden Plank Studios comics."""
2950
    name = 'woodenplank'
2951
    long_name = 'Wooden Plank Studios'
2952
    url = 'http://woodenplankstudios.com'
2953
2954
2955
class ElectricBunnyComic(GenericNavigableComic):
2956
    """Class to retrieve Electric Bunny Comics."""
2957
    # Also on http://electricbunnycomics.tumblr.com
2958
    name = 'bunny'
2959
    long_name = 'Electric Bunny Comic'
2960
    url = 'http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell'
2961
    get_url_from_link = join_cls_url_to_href
2962
2963
    @classmethod
2964
    def get_first_comic_link(cls):
2965
        """Get link to first comics."""
2966
        return get_soup_at_url(cls.url).find('img', alt='First').parent
2967
2968
    @classmethod
2969
    def get_navi_link(cls, last_soup, next_):
2970
        img = last_soup.find('img', alt='Next' if next_ else 'Back')
2971
        return img.parent if img else None
2972
2973
    @classmethod
2974
    def get_comic_info(cls, soup, link):
2975
        """Get information about a particular comics."""
2976
        title = soup.find('meta', property='og:title')['content']
2977
        imgs = soup.find_all('meta', property='og:image')
2978
        return {
2979
            'title': title,
2980
            'img': [i['content'] for i in imgs],
2981 View Code Duplication
        }
2982
2983
2984
class SheldonComics(GenericNavigableComic):
2985
    """Class to retrieve Sheldon comics."""
2986
    # Also on http://www.gocomics.com/sheldon
2987
    name = 'sheldon'
2988
    long_name = 'Sheldon Comics'
2989
    url = 'http://www.sheldoncomics.com'
2990
2991
    @classmethod
2992
    def get_first_comic_link(cls):
2993
        """Get link to first comics."""
2994
        return get_soup_at_url(cls.url).find("a", id="nav-first")
2995
2996
    @classmethod
2997
    def get_navi_link(cls, last_soup, next_):
2998
        for link in last_soup.find_all("a", id="nav-next" if next_ else "nav-prev"):
2999
            if link['href'] != 'http://www.sheldoncomics.com':
3000
                return link
3001
        return None
3002
3003
    @classmethod
3004
    def get_comic_info(cls, soup, link):
3005
        """Get information about a particular comics."""
3006
        imgs = soup.find("div", id="comic-foot").find_all("img")
3007
        assert all(i['alt'] == i['title'] for i in imgs)
3008
        assert len(imgs) == 1
3009
        title = imgs[0]['title']
3010
        return {
3011
            'title': title,
3012
            'img': [i['src'] for i in imgs],
3013
        }
3014
3015
3016
class CubeDrone(GenericNavigableComic):
3017
    """Class to retrieve Cube Drone comics."""
3018
    name = 'cubedrone'
3019
    long_name = 'Cube Drone'
3020
    url = 'http://cube-drone.com/comics'
3021
    get_url_from_link = join_cls_url_to_href
3022
3023
    @classmethod
3024
    def get_first_comic_link(cls):
3025
        """Get link to first comics."""
3026
        return get_soup_at_url(cls.url).find('span', class_='glyphicon glyphicon-backward').parent
3027
3028
    @classmethod
3029
    def get_navi_link(cls, last_soup, next_):
3030
        class_ = 'glyphicon glyphicon-chevron-' + ('right' if next_ else 'left')
3031
        return last_soup.find('span', class_=class_).parent
3032
3033
    @classmethod
3034
    def get_comic_info(cls, soup, link):
3035
        """Get information about a particular comics."""
3036
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
3037
        url2 = soup.find('meta', attrs={'name': 'twitter:url'})['content']
3038
        # date_str = soup.find('h2', class_='comic_title').find('small').string
3039
        # day = string_to_date(date_str, "%B %d, %Y, %I:%M %p")
3040
        imgs = soup.find_all('img', class_='comic img-responsive')
3041
        title2 = imgs[0]['title']
3042
        alt = imgs[0]['alt']
3043
        return {
3044
            'url2': url2,
3045
            'title': title,
3046
            'title2': title2,
3047
            'alt': alt,
3048
            'img': [i['src'] for i in imgs],
3049
        }
3050
3051
3052
class MakeItStoopid(GenericNavigableComic):
3053
    """Class to retrieve Make It Stoopid Comics."""
3054
    name = 'stoopid'
3055
    long_name = 'Make it stoopid'
3056
    url = 'http://makeitstoopid.com/comic.php'
3057
3058
    @classmethod
3059
    def get_nav(cls, soup):
3060
        cnav = soup.find_all(class_='cnav')
3061
        nav1, nav2 = cnav[:5], cnav[5:]
3062
        assert nav1 == nav2
3063
        # begin, prev, archive, next_, end = nav1
3064
        return [None if i.get('href') is None else i for i in nav1]
3065
3066
    @classmethod
3067
    def get_first_comic_link(cls):
3068
        """Get link to first comics."""
3069
        return cls.get_nav(get_soup_at_url(cls.url))[0]
3070
3071
    @classmethod
3072
    def get_navi_link(cls, last_soup, next_):
3073
        return cls.get_nav(last_soup)[3 if next_ else 1]
3074
3075
    @classmethod
3076
    def get_comic_info(cls, soup, link):
3077
        """Get information about a particular comics."""
3078
        title = link['title']
3079
        imgs = soup.find_all('img', id='comicimg')
3080
        return {
3081
            'title': title,
3082
            'img': [i['src'] for i in imgs],
3083
        }
3084
3085
3086
class GeekAndPoke(GenericNavigableComic):
3087
    """Class to retrieve Geek And Poke comics."""
3088
    name = 'geek'
3089
    long_name = 'Geek And Poke'
3090
    url = 'http://geek-and-poke.com'
3091
    get_url_from_link = join_cls_url_to_href
3092
    get_first_comic_link = simulate_first_link
3093
    first_url = 'http://geek-and-poke.com/geekandpoke/2006/8/27/a-new-place-for-a-not-so-old-blog.html'
3094
3095
    @classmethod
3096
    def get_navi_link(cls, last_soup, next_):
3097
        return last_soup.find('a', class_='prev-item' if next_ else 'next-item')
3098
3099
    @classmethod
3100
    def get_comic_info(cls, soup, link):
3101
        """Get information about a particular comics."""
3102
        title = soup.find('meta', property='og:title')['content']
3103
        desc = soup.find('meta', property='og:description')['content']
3104
        date_str = soup.find('time', class_='published')['datetime']
3105
        day = string_to_date(date_str, "%Y-%m-%d")
3106
        author = soup.find('a', rel='author').string
3107
        div_content = (soup.find('div', class_="body entry-content") or
3108
                       soup.find('div', class_="special-content"))
3109
        imgs = div_content.find_all('img')
3110
        imgs = [i for i in imgs if i.get('src') is not None]
3111
        assert all('title' not in i or i['alt'] == i['title'] for i in imgs)
3112
        alt = imgs[0].get('alt', "") if imgs else []
3113
        return {
3114
            'title': title,
3115
            'alt': alt,
3116
            'description': desc,
3117
            'author': author,
3118
            'day': day.day,
3119
            'month': day.month,
3120
            'year': day.year,
3121
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3122
        }
3123
3124
3125
class GenericTumblrV1(GenericComic):
3126
    """Generic class to retrieve comics from Tumblr using the V1 API."""
3127
3128
    @classmethod
3129
    def get_next_comic(cls, last_comic):
3130
        for p in cls.get_posts(last_comic):
3131
            comic = cls.get_comic_info(p)
3132
            if comic is not None:
3133
                yield comic
3134
3135
    @classmethod
3136
    def get_url_from_post(cls, post):
3137
        return post['url']
3138
3139
    @classmethod
3140
    def get_api_url(cls):
3141
        return urljoin_wrapper(cls.url, '/api/read/')
3142
3143
    @classmethod
3144
    def get_comic_info(cls, post):
3145
        """Get information about a particular comics."""
3146
        # print(post)
3147
        type_ = post['type']
3148
        if type_ != 'photo':
3149
            # print("Type is %s" % type_)
3150
            return None
3151
        tumblr_id = int(post['id'])
3152
        api_url = cls.get_api_url() + '?id=%d' % (tumblr_id)
3153
        day = datetime.datetime.fromtimestamp(int(post['unix-timestamp'])).date()
3154
        caption = post.find('photo-caption')
3155
        title = caption.string if caption else ""
3156
        tags = ' '.join(t.string for t in post.find_all('tag'))
3157
        # Photos may appear in 'photo' tags and/or straight in the post
3158
        photo_tags = post.find_all('photo')
3159
        if not photo_tags:
3160
            photo_tags = [post]
3161
        # Images are in multiple resolutions - taking the first one
3162
        imgs = [photo.find('photo-url') for photo in photo_tags]
3163
        return {
3164
            'url': cls.get_url_from_post(post),
3165
            'url2': post['url-with-slug'],
3166
            'day': day.day,
3167
            'month': day.month,
3168
            'year': day.year,
3169
            'title': title,
3170
            'tags': tags,
3171
            'img': [i.string for i in imgs],
3172
            'tumblr-id': tumblr_id,
3173
            'api_url': api_url,  # for debug purposes
3174
        }
3175
3176
    @classmethod
3177
    def get_posts(cls, last_comic, nb_post_per_call=10):
3178
        """Get posts using API. nb_post_per_call is max 50.
3179
3180
        Posts are retrieved from newer to older as per the tumblr v1 api
3181
        but are returned in chronological order."""
3182
        waiting_for_url = last_comic['url'] if last_comic else None
3183
        posts_acc = []
3184
        if last_comic is not None:
3185
            # Sometimes, tumblr posts are deleted. We previous post is deleted, we
3186
            # might end up spending a lot of time looking for something that
3187
            # doesn't exist. Failing early and clearly might be a better option.
3188
            last_api_url = last_comic['api_url']
3189
            try:
3190
                get_soup_at_url(last_api_url)
3191
            except urllib.error.HTTPError:
3192
                try:
3193
                    get_soup_at_url(cls.url)
3194
                except urllib.error.HTTPError:
3195
                    print("Did not find previous post nor main url %s" % cls.url)
3196
                else:
3197
                    print("Did not find previous post %s : it might have been deleted" % last_api_url)
3198
                return reversed(posts_acc)
3199
        api_url = cls.get_api_url()
3200
        posts = get_soup_at_url(api_url).find('posts')
3201
        start, total = int(posts['start']), int(posts['total'])
3202
        assert start == 0
3203
        for starting_num in range(0, total, nb_post_per_call):
3204
            api_url2 = api_url + '?start=%d&num=%d' % (starting_num, nb_post_per_call)
3205
            # print(api_url2)
3206
            posts2 = get_soup_at_url(api_url2).find('posts')
3207
            start2, total2 = int(posts2['start']), int(posts2['total'])
3208
            assert starting_num == start2, "%d != %d" % (starting_num, start2)
3209
            # This may happen and should be handled in the future
3210
            assert total == total2, "%d != %d" % (total, total2)
3211
            for p in posts2.find_all('post'):
3212
                if waiting_for_url and waiting_for_url == cls.get_url_from_post(p):
3213
                    return reversed(posts_acc)
3214
                posts_acc.append(p)
3215
        if waiting_for_url is None:
3216
            return reversed(posts_acc)
3217
        print("Did not find %s : there might be a problem" % waiting_for_url)
3218
        return []
3219
3220
3221
class IrwinCardozo(GenericTumblrV1):
3222
    """Class to retrieve Irwin Cardozo Comics."""
3223
    name = 'irwinc'
3224
    long_name = 'Irwin Cardozo'
3225
    url = 'http://irwincardozocomics.tumblr.com'
3226
3227
3228
class AccordingToDevin(GenericTumblrV1):
3229
    """Class to retrieve According To Devin comics."""
3230
    name = 'devin'
3231
    long_name = 'According To Devin'
3232
    url = 'http://accordingtodevin.tumblr.com'
3233
3234
3235
class ItsTheTieTumblr(GenericTumblrV1):
3236
    """Class to retrieve It's the tie comics."""
3237
    # Also on http://itsthetie.com
3238
    # Also on https://tapastic.com/series/itsthetie
3239
    name = 'tie-tumblr'
3240
    long_name = "It's the tie (from Tumblr)"
3241
    url = "http://itsthetie.tumblr.com"
3242
3243
3244
class OctopunsTumblr(GenericTumblrV1):
3245
    """Class to retrieve Octopuns comics."""
3246
    # Also on http://www.octopuns.net
3247
    name = 'octopuns-tumblr'
3248
    long_name = 'Octopuns (from Tumblr)'
3249
    url = 'http://octopuns.tumblr.com'
3250
3251
3252
class PicturesInBoxesTumblr(GenericTumblrV1):
3253
    """Class to retrieve Pictures In Boxes comics."""
3254
    # Also on http://www.picturesinboxes.com
3255
    name = 'picturesinboxes-tumblr'
3256
    long_name = 'Pictures in Boxes (from Tumblr)'
3257
    url = 'http://picturesinboxescomic.tumblr.com'
3258
3259
3260
class TubeyToonsTumblr(GenericTumblrV1):
3261
    """Class to retrieve TubeyToons comics."""
3262
    # Also on http://tapastic.com/series/Tubey-Toons
3263
    # Also on http://tubeytoons.com
3264
    name = 'tubeytoons-tumblr'
3265
    long_name = 'Tubey Toons (from Tumblr)'
3266
    url = 'http://tubeytoons.tumblr.com'
3267
3268
3269
class UnearthedComicsTumblr(GenericTumblrV1):
3270
    """Class to retrieve Unearthed comics."""
3271
    # Also on http://tapastic.com/series/UnearthedComics
3272
    # Also on http://unearthedcomics.com
3273
    name = 'unearthed-tumblr'
3274
    long_name = 'Unearthed Comics (from Tumblr)'
3275
    url = 'http://unearthedcomics.tumblr.com'
3276
3277
3278
class PieComic(GenericTumblrV1):
3279
    """Class to retrieve Pie Comic comics."""
3280
    name = 'pie'
3281
    long_name = 'Pie Comic'
3282
    url = "http://piecomic.tumblr.com"
3283
3284
3285
class MrEthanDiamond(GenericTumblrV1):
3286
    """Class to retrieve Mr Ethan Diamond comics."""
3287
    name = 'diamond'
3288
    long_name = 'Mr Ethan Diamond'
3289
    url = 'http://mrethandiamond.tumblr.com'
3290
3291
3292
class Flocci(GenericTumblrV1):
3293
    """Class to retrieve floccinaucinihilipilification comics."""
3294
    name = 'flocci'
3295
    long_name = 'floccinaucinihilipilification'
3296
    url = "http://floccinaucinihilipilificationa.tumblr.com"
3297
3298
3299
class UpAndOut(GenericTumblrV1):
3300
    """Class to retrieve Up & Out comics."""
3301
    # Also on http://tapastic.com/series/UP-and-OUT
3302
    name = 'upandout'
3303
    long_name = 'Up And Out (from Tumblr)'
3304
    url = 'http://upandoutcomic.tumblr.com'
3305
3306
3307
class Pundemonium(GenericTumblrV1):
3308
    """Class to retrieve Pundemonium comics."""
3309
    name = 'pundemonium'
3310
    long_name = 'Pundemonium'
3311
    url = 'http://monstika.tumblr.com'
3312
3313
3314
class PoorlyDrawnLinesTumblr(GenericTumblrV1):
3315
    """Class to retrieve Poorly Drawn Lines comics."""
3316
    # Also on http://poorlydrawnlines.com
3317
    name = 'poorlydrawn-tumblr'
3318
    long_name = 'Poorly Drawn Lines (from Tumblr)'
3319
    url = 'http://pdlcomics.tumblr.com'
3320
3321
3322
class PearShapedComics(GenericTumblrV1):
3323
    """Class to retrieve Pear Shaped Comics."""
3324
    name = 'pearshaped'
3325
    long_name = 'Pear-Shaped Comics'
3326
    url = 'http://pearshapedcomics.com'
3327
3328
3329
class PondScumComics(GenericTumblrV1):
3330
    """Class to retrieve Pond Scum Comics."""
3331
    name = 'pond'
3332
    long_name = 'Pond Scum'
3333
    url = 'http://pondscumcomic.tumblr.com'
3334
3335
3336
class MercworksTumblr(GenericTumblrV1):
3337
    """Class to retrieve Mercworks comics."""
3338
    # Also on http://mercworks.net
3339
    name = 'mercworks-tumblr'
3340
    long_name = 'Mercworks (from Tumblr)'
3341
    url = 'http://mercworks.tumblr.com'
3342
3343
3344
class OwlTurdTumblr(GenericTumblrV1):
3345
    """Class to retrieve Owl Turd comics."""
3346
    # Also on http://tapastic.com/series/Owl-Turd-Comix
3347
    name = 'owlturd-tumblr'
3348
    long_name = 'Owl Turd (from Tumblr)'
3349
    url = 'http://owlturd.com'
3350
3351
3352
class VectorBelly(GenericTumblrV1):
3353
    """Class to retrieve Vector Belly comics."""
3354
    # Also on http://vectorbelly.com
3355
    name = 'vector'
3356
    long_name = 'Vector Belly'
3357
    url = 'http://vectorbelly.tumblr.com'
3358
3359
3360
class GoneIntoRapture(GenericTumblrV1):
3361
    """Class to retrieve Gone Into Rapture comics."""
3362
    # Also on http://goneintorapture.tumblr.com
3363
    # Also on http://tapastic.com/series/Goneintorapture
3364
    name = 'rapture'
3365
    long_name = 'Gone Into Rapture'
3366
    url = 'http://www.goneintorapture.com'
3367
3368
3369
class TheOatmealTumblr(GenericTumblrV1):
3370
    """Class to retrieve The Oatmeal comics."""
3371
    # Also on http://theoatmeal.com
3372
    name = 'oatmeal-tumblr'
3373
    long_name = 'The Oatmeal (from Tumblr)'
3374
    url = 'http://oatmeal.tumblr.com'
3375
3376
3377
class HeckIfIKnowComicsTumblr(GenericTumblrV1):
3378
    """Class to retrieve Heck If I Know Comics."""
3379
    # Also on http://tapastic.com/series/Regular
3380
    name = 'heck-tumblr'
3381
    long_name = 'Heck if I Know comics (from Tumblr)'
3382
    url = 'http://heckifiknowcomics.com'
3383
3384
3385
class MyJetPack(GenericTumblrV1):
3386
    """Class to retrieve My Jet Pack comics."""
3387
    name = 'jetpack'
3388
    long_name = 'My Jet Pack'
3389
    url = 'http://myjetpack.tumblr.com'
3390
3391
3392
class CheerUpEmoKidTumblr(GenericTumblrV1):
3393
    """Class to retrieve CheerUpEmoKid comics."""
3394
    # Also on http://www.cheerupemokid.com
3395
    # Also on http://tapastic.com/series/CUEK
3396
    name = 'cuek-tumblr'
3397
    long_name = 'Cheer Up Emo Kid (from Tumblr)'
3398
    url = 'http://enzocomics.tumblr.com'
3399
3400
3401
class ForLackOfABetterComic(GenericTumblrV1):
3402
    """Class to retrieve For Lack Of A Better Comics."""
3403
    # Also on http://forlackofabettercomic.com
3404
    name = 'lack'
3405
    long_name = 'For Lack Of A Better Comic'
3406
    url = 'http://forlackofabettercomic.tumblr.com'
3407
3408
3409
class ZenPencilsTumblr(GenericTumblrV1):
3410
    """Class to retrieve ZenPencils comics."""
3411
    # Also on http://zenpencils.com
3412
    # Also on http://www.gocomics.com/zen-pencils
3413
    name = 'zenpencils-tumblr'
3414
    long_name = 'Zen Pencils (from Tumblr)'
3415
    url = 'http://zenpencils.tumblr.com'
3416
3417
3418
class ThreeWordPhraseTumblr(GenericTumblrV1):
3419
    """Class to retrieve Three Word Phrase comics."""
3420
    # Also on http://threewordphrase.com
3421
    name = 'threeword-tumblr'
3422
    long_name = 'Three Word Phrase (from Tumblr)'
3423
    url = 'http://www.threewordphrase.tumblr.com'
3424
3425
3426
class TimeTrabbleTumblr(GenericTumblrV1):
3427
    """Class to retrieve Time Trabble comics."""
3428
    # Also on http://timetrabble.com
3429
    name = 'timetrabble-tumblr'
3430
    long_name = 'Time Trabble (from Tumblr)'
3431
    url = 'http://timetrabble.tumblr.com'
3432
3433
3434
class SafelyEndangeredTumblr(GenericTumblrV1):
3435
    """Class to retrieve Safely Endangered comics."""
3436
    # Also on http://www.safelyendangered.com
3437
    name = 'endangered-tumblr'
3438
    long_name = 'Safely Endangered (from Tumblr)'
3439
    url = 'http://tumblr.safelyendangered.com'
3440
3441
3442
class MouseBearComedyTumblr(GenericTumblrV1):
3443
    """Class to retrieve Mouse Bear Comedy comics."""
3444
    # Also on http://www.mousebearcomedy.com
3445
    name = 'mousebear-tumblr'
3446
    long_name = 'Mouse Bear Comedy (from Tumblr)'
3447
    url = 'http://mousebearcomedy.tumblr.com'
3448
3449
3450
class BouletCorpTumblr(GenericTumblrV1):
3451
    """Class to retrieve BouletCorp comics."""
3452
    # Also on http://www.bouletcorp.com
3453
    name = 'boulet-tumblr'
3454
    long_name = 'Boulet Corp (from Tumblr)'
3455
    url = 'http://bouletcorp.tumblr.com'
3456
3457
3458
class TheAwkwardYetiTumblr(GenericTumblrV1):
3459
    """Class to retrieve The Awkward Yeti comics."""
3460
    # Also on http://www.gocomics.com/the-awkward-yeti
3461
    # Also on http://theawkwardyeti.com
3462
    # Also on https://tapastic.com/series/TheAwkwardYeti
3463
    name = 'yeti-tumblr'
3464
    long_name = 'The Awkward Yeti (from Tumblr)'
3465
    url = 'http://larstheyeti.tumblr.com'
3466
3467
3468
class NellucNhoj(GenericTumblrV1):
3469
    """Class to retrieve NellucNhoj comics."""
3470
    name = 'nhoj'
3471
    long_name = 'Nelluc Nhoj'
3472
    url = 'http://nellucnhoj.com'
3473
3474
3475
class DownTheUpwardSpiralTumblr(GenericTumblrV1):
3476
    """Class to retrieve Down The Upward Spiral comics."""
3477
    # Also on http://www.downtheupwardspiral.com
3478
    name = 'spiral-tumblr'
3479
    long_name = 'Down the Upward Spiral (from Tumblr)'
3480
    url = 'http://downtheupwardspiral.tumblr.com'
3481
3482
3483
class AsPerUsualTumblr(GenericTumblrV1):
3484
    """Class to retrieve As Per Usual comics."""
3485
    # Also on https://tapastic.com/series/AsPerUsual
3486
    name = 'usual-tumblr'
3487
    long_name = 'As Per Usual (from Tumblr)'
3488
    url = 'http://as-per-usual.tumblr.com'
3489
3490
3491
class OneOneOneOneComicTumblr(GenericTumblrV1):
3492
    """Class to retrieve 1111 Comics."""
3493
    # Also on http://www.1111comics.me
3494
    # Also on https://tapastic.com/series/1111-Comics
3495
    name = '1111-tumblr'
3496
    long_name = '1111 Comics (from Tumblr)'
3497
    url = 'http://comics1111.tumblr.com'
3498
3499
3500
class JhallComicsTumblr(GenericTumblrV1):
3501
    """Class to retrieve Jhall Comics."""
3502
    # Also on http://jhallcomics.com
3503
    name = 'jhall-tumblr'
3504
    long_name = 'Jhall Comics (from Tumblr)'
3505
    url = 'http://jhallcomics.tumblr.com'
3506
3507
3508
class BerkeleyMewsTumblr(GenericTumblrV1):
3509
    """Class to retrieve Berkeley Mews comics."""
3510
    # Also on http://www.gocomics.com/berkeley-mews
3511
    # Also on http://www.berkeleymews.com
3512
    name = 'berkeley-tumblr'
3513
    long_name = 'Berkeley Mews (from Tumblr)'
3514
    url = 'http://mews.tumblr.com'
3515
3516
3517
class JoanCornellaTumblr(GenericTumblrV1):
3518
    """Class to retrieve Joan Cornella comics."""
3519
    # Also on http://joancornella.net
3520
    name = 'cornella-tumblr'
3521
    long_name = 'Joan Cornella (from Tumblr)'
3522
    url = 'http://cornellajoan.tumblr.com'
3523
3524
3525
class RespawnComicTumblr(GenericTumblrV1):
3526
    """Class to retrieve Respawn Comic."""
3527
    # Also on http://respawncomic.com
3528
    name = 'respawn-tumblr'
3529
    long_name = 'Respawn Comic (from Tumblr)'
3530
    url = 'http://respawncomic.tumblr.com'
3531
3532
3533
class ChrisHallbeckTumblr(GenericTumblrV1):
3534
    """Class to retrieve Chris Hallbeck comics."""
3535
    # Also on https://tapastic.com/ChrisHallbeck
3536
    # Also on http://maximumble.com
3537
    # Also on http://minimumble.com
3538
    # Also on http://thebookofbiff.com
3539
    name = 'hallbeck-tumblr'
3540
    long_name = 'Chris Hallback (from Tumblr)'
3541
    url = 'http://chrishallbeck.tumblr.com'
3542
3543
3544
class ComicNuggets(GenericTumblrV1):
3545
    """Class to retrieve Comic Nuggets."""
3546
    name = 'nuggets'
3547
    long_name = 'Comic Nuggets'
3548
    url = 'http://comicnuggets.com'
3549
3550
3551
class PigeonGazetteTumblr(GenericTumblrV1):
3552
    """Class to retrieve The Pigeon Gazette comics."""
3553
    # Also on https://tapastic.com/series/The-Pigeon-Gazette
3554
    name = 'pigeon-tumblr'
3555
    long_name = 'The Pigeon Gazette (from Tumblr)'
3556
    url = 'http://thepigeongazette.tumblr.com'
3557
3558
3559
class CancerOwl(GenericTumblrV1):
3560
    """Class to retrieve Cancer Owl comics."""
3561
    # Also on http://cancerowl.com
3562
    name = 'cancerowl-tumblr'
3563
    long_name = 'Cancer Owl (from Tumblr)'
3564
    url = 'http://cancerowl.tumblr.com'
3565
3566
3567
class FowlLanguageTumblr(GenericTumblrV1):
3568
    """Class to retrieve Fowl Language comics."""
3569
    # Also on http://www.fowllanguagecomics.com
3570
    # Also on http://tapastic.com/series/Fowl-Language-Comics
3571
    # Also on http://www.gocomics.com/fowl-language
3572
    name = 'fowllanguage-tumblr'
3573
    long_name = 'Fowl Language Comics (from Tumblr)'
3574
    url = 'http://fowllanguagecomics.tumblr.com'
3575
3576
3577
class TheOdd1sOutTumblr(GenericTumblrV1):
3578
    """Class to retrieve The Odd 1s Out comics."""
3579
    # Also on http://theodd1sout.com
3580
    # Also on https://tapastic.com/series/Theodd1sout
3581
    name = 'theodd-tumblr'
3582
    long_name = 'The Odd 1s Out (from Tumblr)'
3583
    url = 'http://theodd1sout.tumblr.com'
3584
3585
3586
class TheUnderfoldTumblr(GenericTumblrV1):
3587
    """Class to retrieve The Underfold comics."""
3588
    # Also on http://theunderfold.com
3589
    name = 'underfold-tumblr'
3590
    long_name = 'The Underfold (from Tumblr)'
3591
    url = 'http://theunderfold.tumblr.com'
3592
3593
3594
class LolNeinTumblr(GenericTumblrV1):
3595
    """Class to retrieve Lol Nein comics."""
3596
    # Also on http://lolnein.com
3597
    name = 'lolnein-tumblr'
3598
    long_name = 'Lol Nein (from Tumblr)'
3599
    url = 'http://lolneincom.tumblr.com'
3600
3601
3602
class FatAwesomeComicsTumblr(GenericTumblrV1):
3603
    """Class to retrieve Fat Awesome Comics."""
3604
    # Also on http://fatawesome.com/comics
3605
    name = 'fatawesome-tumblr'
3606
    long_name = 'Fat Awesome (from Tumblr)'
3607
    url = 'http://fatawesomecomedy.tumblr.com'
3608
3609
3610
class TheWorldIsFlatTumblr(GenericTumblrV1):
3611
    """Class to retrieve The World Is Flat Comics."""
3612
    # Also on https://tapastic.com/series/The-World-is-Flat
3613
    name = 'flatworld-tumblr'
3614
    long_name = 'The World Is Flat (from Tumblr)'
3615
    url = 'http://theworldisflatcomics.tumblr.com'
3616
3617
3618
class DorrisMc(GenericTumblrV1):
3619
    """Class to retrieve Dorris Mc Comics"""
3620
    # Also on http://www.gocomics.com/dorris-mccomics
3621
    name = 'dorrismc'
3622
    long_name = 'Dorris Mc'
3623
    url = 'http://dorrismccomics.com'
3624
3625
3626
class LeleozTumblr(GenericEmptyComic, GenericTumblrV1):
3627
    """Class to retrieve Leleoz comics."""
3628
    # Also on https://tapastic.com/series/Leleoz
3629
    name = 'leleoz-tumblr'
3630
    long_name = 'Leleoz (from Tumblr)'
3631
    url = 'http://leleozcomics.tumblr.com'
3632
3633
3634
class MoonBeardTumblr(GenericTumblrV1):
3635
    """Class to retrieve MoonBeard comics."""
3636
    # Also on http://moonbeard.com
3637
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
3638
    name = 'moonbeard-tumblr'
3639
    long_name = 'Moon Beard (from Tumblr)'
3640
    url = 'http://blog.squiresjam.es/moonbeard'
3641
3642
3643
class AComik(GenericTumblrV1):
3644
    """Class to retrieve A Comik"""
3645
    name = 'comik'
3646
    long_name = 'A Comik'
3647
    url = 'http://acomik.com'
3648
3649
3650
class ClassicRandy(GenericTumblrV1):
3651
    """Class to retrieve Classic Randy comics."""
3652
    name = 'randy'
3653
    long_name = 'Classic Randy'
3654
    url = 'http://classicrandy.tumblr.com'
3655
3656
3657
class DagssonTumblr(GenericTumblrV1):
3658
    """Class to retrieve Dagsson comics."""
3659
    # Also on http://www.dagsson.com
3660
    name = 'dagsson-tumblr'
3661
    long_name = 'Dagsson Hugleikur (from Tumblr)'
3662
    url = 'http://hugleikurdagsson.tumblr.com'
3663
3664
3665
class LinsEditionsTumblr(GenericTumblrV1):
3666
    """Class to retrieve L.I.N.S. Editions comics."""
3667
    # Also on https://linsedition.com
3668
    name = 'lins-tumblr'
3669
    long_name = 'L.I.N.S. Editions (from Tumblr)'
3670
    url = 'http://linscomics.tumblr.com'
3671
3672
3673
class OrigamiHotDish(GenericTumblrV1):
3674
    """Class to retrieve Origami Hot Dish comics."""
3675
    name = 'origamihotdish'
3676
    long_name = 'Origami Hot Dish'
3677
    url = 'http://origamihotdish.com'
3678
3679
3680
class HitAndMissComicsTumblr(GenericTumblrV1):
3681
    """Class to retrieve Hit and Miss Comics."""
3682
    name = 'hitandmiss'
3683
    long_name = 'Hit and Miss Comics'
3684
    url = 'http://hitandmisscomics.tumblr.com'
3685
3686
3687
class HMBlanc(GenericTumblrV1):
3688
    """Class to retrieve HM Blanc comics."""
3689
    name = 'hmblanc'
3690
    long_name = 'HM Blanc'
3691
    url = 'http://hmblanc.tumblr.com'
3692
3693
3694
class TalesOfAbsurdityTumblr(GenericTumblrV1):
3695
    """Class to retrieve Tales Of Absurdity comics."""
3696
    # Also on http://talesofabsurdity.com
3697
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
3698
    name = 'absurdity-tumblr'
3699
    long_name = 'Tales of Absurdity (from Tumblr)'
3700
    url = 'http://talesofabsurdity.tumblr.com'
3701
3702
3703
class RobbieAndBobby(GenericTumblrV1):
3704
    """Class to retrieve Robbie And Bobby comics."""
3705
    # Also on http://robbieandbobby.com
3706
    name = 'robbie-tumblr'
3707
    long_name = 'Robbie And Bobby (from Tumblr)'
3708
    url = 'http://robbieandbobby.tumblr.com'
3709
3710
3711
class ElectricBunnyComicTumblr(GenericTumblrV1):
3712
    """Class to retrieve Electric Bunny Comics."""
3713
    # Also on http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell
3714
    name = 'bunny-tumblr'
3715
    long_name = 'Electric Bunny Comic (from Tumblr)'
3716
    url = 'http://electricbunnycomics.tumblr.com'
3717
3718
3719
class Hoomph(GenericTumblrV1):
3720
    """Class to retrieve Hoomph comics."""
3721
    name = 'hoomph'
3722
    long_name = 'Hoomph'
3723
    url = 'http://hoom.ph'
3724
3725
3726
class BFGFSTumblr(GenericTumblrV1):
3727
    """Class to retrieve BFGFS comics."""
3728
    # Also on https://tapastic.com/series/BFGFS
3729
    # Also on http://bfgfs.com
3730
    name = 'bfgfs-tumblr'
3731
    long_name = 'BFGFS (from Tumblr)'
3732
    url = 'http://bfgfs.tumblr.com'
3733
3734
3735
class DoodleForFood(GenericTumblrV1):
3736
    """Class to retrieve Doodle For Food comics."""
3737
    # Also on http://doodleforfood.com
3738
    name = 'doodle'
3739
    long_name = 'Doodle For Food'
3740
    url = 'http://doodleforfood.com'
3741
3742
3743
class DougWasTaken(GenericTumblrV1):
3744
    """Class to retrieve Doug Was Taken comics."""
3745
    name = 'doog'
3746
    long_name = 'Doug Was Taken'
3747
    url = 'http://dougwastaken.tumblr.com'
3748
3749
3750
class MandatoryRollerCoaster(GenericEmptyComic, GenericTumblrV1):
3751
    """Class to retrieve Mandatory Roller Coaster comics."""
3752
    name = 'rollercoaster'
3753
    long_name = 'Mandatory Roller Coaster'
3754
    url = 'http://mandatoryrollercoaster.com'
3755
3756
3757
class HorovitzComics(GenericListableComic):
3758
    """Generic class to handle the logic common to the different comics from Horovitz."""
3759
    url = 'http://www.horovitzcomics.com'
3760
    img_re = re.compile('.*comics/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
3761
    link_re = NotImplemented
3762
    get_url_from_archive_element = join_cls_url_to_href
3763
3764
    @classmethod
3765
    def get_comic_info(cls, soup, link):
3766
        """Get information about a particular comics."""
3767
        href = link['href']
3768
        num = int(cls.link_re.match(href).groups()[0])
3769
        title = link.string
3770
        imgs = soup.find_all('img', id='comic')
3771
        assert len(imgs) == 1
3772
        year, month, day = [int(s)
3773
                            for s in cls.img_re.match(imgs[0]['src']).groups()]
3774
        return {
3775
            'title': title,
3776
            'day': day,
3777
            'month': month,
3778
            'year': year,
3779
            'img': [i['src'] for i in imgs],
3780
            'num': num,
3781
        }
3782
3783
    @classmethod
3784
    def get_archive_elements(cls):
3785
        archive_url = 'http://www.horovitzcomics.com/comics/archive/'
3786
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.link_re))
3787
3788
3789
class HorovitzNew(HorovitzComics):
3790
    """Class to retrieve Horovitz new comics."""
3791
    name = 'horovitznew'
3792
    long_name = 'Horovitz New'
3793
    link_re = re.compile('^/comics/new/([0-9]+)$')
3794
3795
3796
class HorovitzClassic(HorovitzComics):
3797
    """Class to retrieve Horovitz classic comics."""
3798
    name = 'horovitzclassic'
3799
    long_name = 'Horovitz Classic'
3800
    link_re = re.compile('^/comics/classic/([0-9]+)$')
3801
3802
3803
class GenericGoComic(GenericNavigableComic):
3804
    """Generic class to handle the logic common to comics from gocomics.com."""
3805
    url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)$')
3806
3807
    @classmethod
3808
    def get_first_comic_link(cls):
3809
        """Get link to first comics."""
3810
        return get_soup_at_url(cls.url).find('a', class_='beginning')
3811
3812
    @classmethod
3813
    def get_navi_link(cls, last_soup, next_):
3814
        return last_soup.find('a', class_='next' if next_ else 'prev', href=cls.url_date_re)
3815
3816
    @classmethod
3817
    def get_url_from_link(cls, link):
3818
        gocomics = 'http://www.gocomics.com'
3819
        return urljoin_wrapper(gocomics, link['href'])
3820
3821
    @classmethod
3822
    def get_comic_info(cls, soup, link):
3823
        """Get information about a particular comics."""
3824
        url = cls.get_url_from_link(link)
3825
        year, month, day = [int(s)
3826
                            for s in cls.url_date_re.match(url).groups()]
3827
        return {
3828
            'day': day,
3829
            'month': month,
3830
            'year': year,
3831
            'img': [soup.find_all('img', class_='strip')[-1]['src']],
3832
            'author': soup.find('meta', attrs={'name': 'author'})['content']
3833
        }
3834
3835
3836
class PearlsBeforeSwine(GenericGoComic):
3837
    """Class to retrieve Pearls Before Swine comics."""
3838
    name = 'pearls'
3839
    long_name = 'Pearls Before Swine'
3840
    url = 'http://www.gocomics.com/pearlsbeforeswine'
3841
3842
3843
class Peanuts(GenericGoComic):
3844
    """Class to retrieve Peanuts comics."""
3845
    name = 'peanuts'
3846
    long_name = 'Peanuts'
3847
    url = 'http://www.gocomics.com/peanuts'
3848
3849
3850
class MattWuerker(GenericGoComic):
3851
    """Class to retrieve Matt Wuerker comics."""
3852
    name = 'wuerker'
3853
    long_name = 'Matt Wuerker'
3854
    url = 'http://www.gocomics.com/mattwuerker'
3855
3856
3857
class TomToles(GenericGoComic):
3858
    """Class to retrieve Tom Toles comics."""
3859
    name = 'toles'
3860
    long_name = 'Tom Toles'
3861
    url = 'http://www.gocomics.com/tomtoles'
3862
3863
3864
class BreakOfDay(GenericGoComic):
3865
    """Class to retrieve Break Of Day comics."""
3866
    name = 'breakofday'
3867
    long_name = 'Break Of Day'
3868
    url = 'http://www.gocomics.com/break-of-day'
3869
3870
3871
class Brevity(GenericGoComic):
3872
    """Class to retrieve Brevity comics."""
3873
    name = 'brevity'
3874
    long_name = 'Brevity'
3875
    url = 'http://www.gocomics.com/brevity'
3876
3877
3878
class MichaelRamirez(GenericGoComic):
3879
    """Class to retrieve Michael Ramirez comics."""
3880
    name = 'ramirez'
3881
    long_name = 'Michael Ramirez'
3882
    url = 'http://www.gocomics.com/michaelramirez'
3883
3884
3885
class MikeLuckovich(GenericGoComic):
3886
    """Class to retrieve Mike Luckovich comics."""
3887
    name = 'luckovich'
3888
    long_name = 'Mike Luckovich'
3889
    url = 'http://www.gocomics.com/mikeluckovich'
3890
3891
3892
class JimBenton(GenericGoComic):
3893
    """Class to retrieve Jim Benton comics."""
3894
    # Also on http://jimbenton.tumblr.com
3895
    name = 'benton'
3896
    long_name = 'Jim Benton'
3897
    url = 'http://www.gocomics.com/jim-benton-cartoons'
3898
3899
3900
class TheArgyleSweater(GenericGoComic):
3901
    """Class to retrieve the Argyle Sweater comics."""
3902
    name = 'argyle'
3903
    long_name = 'Argyle Sweater'
3904
    url = 'http://www.gocomics.com/theargylesweater'
3905
3906
3907
class SunnyStreet(GenericGoComic):
3908
    """Class to retrieve Sunny Street comics."""
3909
    # Also on http://www.sunnystreetcomics.com
3910
    name = 'sunny'
3911
    long_name = 'Sunny Street'
3912
    url = 'http://www.gocomics.com/sunny-street'
3913
3914
3915
class OffTheMark(GenericGoComic):
3916
    """Class to retrieve Off The Mark comics."""
3917
    # Also on https://www.offthemark.com
3918
    name = 'offthemark'
3919
    long_name = 'Off The Mark'
3920
    url = 'http://www.gocomics.com/offthemark'
3921
3922
3923
class WuMo(GenericGoComic):
3924
    """Class to retrieve WuMo comics."""
3925
    # Also on http://wumo.com
3926
    name = 'wumo'
3927
    long_name = 'WuMo'
3928
    url = 'http://www.gocomics.com/wumo'
3929
3930
3931
class LunarBaboon(GenericGoComic):
3932
    """Class to retrieve Lunar Baboon comics."""
3933
    # Also on http://www.lunarbaboon.com
3934
    # Also on https://tapastic.com/series/Lunarbaboon
3935
    name = 'lunarbaboon'
3936
    long_name = 'Lunar Baboon'
3937
    url = 'http://www.gocomics.com/lunarbaboon'
3938
3939
3940
class SandersenGocomic(GenericGoComic):
3941
    """Class to retrieve Sarah Andersen comics."""
3942
    # Also on http://sarahcandersen.com
3943
    # Also on http://tapastic.com/series/Doodle-Time
3944
    name = 'sandersen-goc'
3945
    long_name = 'Sarah Andersen (from GoComics)'
3946
    url = 'http://www.gocomics.com/sarahs-scribbles'
3947
3948
3949
class CalvinAndHobbesGoComic(GenericGoComic):
3950
    """Class to retrieve Calvin and Hobbes comics."""
3951
    # From gocomics, not http://marcel-oehler.marcellosendos.ch/comics/ch/
3952
    name = 'calvin-goc'
3953
    long_name = 'Calvin and Hobbes (from GoComics)'
3954
    url = 'http://www.gocomics.com/calvinandhobbes'
3955
3956
3957
class RallGoComic(GenericGoComic):
3958
    """Class to retrieve Ted Rall comics."""
3959
    # Also on http://rall.com/comic
3960
    name = 'rall-goc'
3961
    long_name = "Ted Rall (from GoComics)"
3962
    url = "http://www.gocomics.com/tedrall"
3963
3964
3965
class TheAwkwardYetiGoComic(GenericGoComic):
3966
    """Class to retrieve The Awkward Yeti comics."""
3967
    # Also on http://larstheyeti.tumblr.com
3968
    # Also on http://theawkwardyeti.com
3969
    # Also on https://tapastic.com/series/TheAwkwardYeti
3970
    name = 'yeti-goc'
3971
    long_name = 'The Awkward Yeti (from GoComics)'
3972
    url = 'http://www.gocomics.com/the-awkward-yeti'
3973
3974
3975
class BerkeleyMewsGoComics(GenericGoComic):
3976
    """Class to retrieve Berkeley Mews comics."""
3977
    # Also on http://mews.tumblr.com
3978
    # Also on http://www.berkeleymews.com
3979
    name = 'berkeley-goc'
3980
    long_name = 'Berkeley Mews (from GoComics)'
3981
    url = 'http://www.gocomics.com/berkeley-mews'
3982
3983
3984
class SheldonGoComics(GenericGoComic):
3985
    """Class to retrieve Sheldon comics."""
3986
    # Also on http://www.sheldoncomics.com
3987
    name = 'sheldon-goc'
3988
    long_name = 'Sheldon Comics (from GoComics)'
3989
    url = 'http://www.gocomics.com/sheldon'
3990
3991
3992
class FowlLanguageGoComics(GenericGoComic):
3993
    """Class to retrieve Fowl Language comics."""
3994
    # Also on http://www.fowllanguagecomics.com
3995
    # Also on http://tapastic.com/series/Fowl-Language-Comics
3996
    # Also on http://fowllanguagecomics.tumblr.com
3997
    name = 'fowllanguage-goc'
3998
    long_name = 'Fowl Language Comics (from GoComics)'
3999
    url = 'http://www.gocomics.com/fowl-language'
4000
4001
4002
class NickAnderson(GenericGoComic):
4003
    """Class to retrieve Nick Anderson comics."""
4004
    name = 'nickanderson'
4005
    long_name = 'Nick Anderson'
4006
    url = 'http://www.gocomics.com/nickanderson'
4007
4008
4009
class GarfieldGoComics(GenericGoComic):
4010
    """Class to retrieve Garfield comics."""
4011
    # Also on http://garfield.com
4012
    name = 'garfield-goc'
4013
    long_name = 'Garfield (from GoComics)'
4014
    url = 'http://www.gocomics.com/garfield'
4015
4016
4017
class DorrisMcGoComics(GenericGoComic):
4018
    """Class to retrieve Dorris Mc Comics"""
4019
    # Also on http://dorrismccomics.com
4020
    name = 'dorrismc-goc'
4021
    long_name = 'Dorris Mc (from GoComics)'
4022
    url = 'http://www.gocomics.com/dorris-mccomics'
4023
4024
4025
class MisterAndMeGoComics(GenericGoComic):
4026
    """Class to retrieve Mister & Me Comics."""
4027
    # Also on http://www.mister-and-me.com
4028
    # Also on https://tapastic.com/series/Mister-and-Me
4029
    name = 'mister-goc'
4030
    long_name = 'Mister & Me (from GoComics)'
4031
    url = 'http://www.gocomics.com/mister-and-me'
4032
4033
4034
class GenericTapasticComic(GenericListableComic):
4035
    """Generic class to handle the logic common to comics from tapastic.com."""
4036
4037
    @classmethod
4038
    def get_comic_info(cls, soup, archive_elt):
4039
        """Get information about a particular comics."""
4040
        timestamp = int(archive_elt['publishDate']) / 1000.0
4041
        day = datetime.datetime.fromtimestamp(timestamp).date()
4042
        imgs = soup.find_all('img', class_='art-image')
4043
        if not imgs:
4044
            print("Comic %s is being uploaded, retry later" % cls.get_url_from_archive_element(archive_elt))
4045
            return None
4046
        assert len(imgs) > 0
4047
        return {
4048
            'day': day.day,
4049
            'year': day.year,
4050
            'month': day.month,
4051
            'img': [i['src'] for i in imgs],
4052
            'title': archive_elt['title'],
4053
        }
4054
4055
    @classmethod
4056
    def get_url_from_archive_element(cls, archive_elt):
4057
        return 'http://tapastic.com/episode/' + str(archive_elt['id'])
4058
4059
    @classmethod
4060
    def get_archive_elements(cls):
4061
        pref, suff = 'episodeList : ', ','
4062
        # Information is stored in the javascript part
4063
        # I don't know the clean way to get it so this is the ugly way.
4064
        string = [s[len(pref):-len(suff)] for s in (s.decode('utf-8').strip() for s in urlopen_wrapper(cls.url).readlines()) if s.startswith(pref) and s.endswith(suff)][0]
4065
        return json.loads(string)
4066
4067
4068
class VegetablesForDessert(GenericTapasticComic):
4069
    """Class to retrieve Vegetables For Dessert comics."""
4070
    # Also on http://vegetablesfordessert.tumblr.com
4071
    name = 'vegetables'
4072
    long_name = 'Vegetables For Dessert'
4073
    url = 'http://tapastic.com/series/vegetablesfordessert'
4074
4075
4076
class FowlLanguageTapa(GenericTapasticComic):
4077
    """Class to retrieve Fowl Language comics."""
4078
    # Also on http://www.fowllanguagecomics.com
4079
    # Also on http://fowllanguagecomics.tumblr.com
4080
    # Also on http://www.gocomics.com/fowl-language
4081
    name = 'fowllanguage-tapa'
4082
    long_name = 'Fowl Language Comics (from Tapastic)'
4083
    url = 'http://tapastic.com/series/Fowl-Language-Comics'
4084
4085
4086
class OscillatingProfundities(GenericTapasticComic):
4087
    """Class to retrieve Oscillating Profundities comics."""
4088
    name = 'oscillating'
4089
    long_name = 'Oscillating Profundities'
4090
    url = 'http://tapastic.com/series/oscillatingprofundities'
4091
4092
4093
class ZnoflatsComics(GenericTapasticComic):
4094
    """Class to retrieve Znoflats comics."""
4095
    name = 'znoflats'
4096
    long_name = 'Znoflats Comics'
4097
    url = 'http://tapastic.com/series/Znoflats-Comics'
4098
4099
4100
class SandersenTapastic(GenericTapasticComic):
4101
    """Class to retrieve Sarah Andersen comics."""
4102
    # Also on http://sarahcandersen.com
4103
    # Also on http://www.gocomics.com/sarahs-scribbles
4104
    name = 'sandersen-tapa'
4105
    long_name = 'Sarah Andersen (from Tapastic)'
4106
    url = 'http://tapastic.com/series/Doodle-Time'
4107
4108
4109
class TubeyToonsTapastic(GenericTapasticComic):
4110
    """Class to retrieve TubeyToons comics."""
4111
    # Also on http://tubeytoons.com
4112
    # Also on http://tubeytoons.tumblr.com
4113
    name = 'tubeytoons-tapa'
4114
    long_name = 'Tubey Toons (from Tapastic)'
4115
    url = 'http://tapastic.com/series/Tubey-Toons'
4116
4117
4118
class AnythingComicTapastic(GenericTapasticComic):
4119
    """Class to retrieve Anything Comics."""
4120
    # Also on http://www.anythingcomic.com
4121
    name = 'anythingcomic-tapa'
4122
    long_name = 'Anything Comic (from Tapastic)'
4123
    url = 'http://tapastic.com/series/anything'
4124
4125
4126
class UnearthedComicsTapastic(GenericTapasticComic):
4127
    """Class to retrieve Unearthed comics."""
4128
    # Also on http://unearthedcomics.com
4129
    # Also on http://unearthedcomics.tumblr.com
4130
    name = 'unearthed-tapa'
4131
    long_name = 'Unearthed Comics (from Tapastic)'
4132
    url = 'http://tapastic.com/series/UnearthedComics'
4133
4134
4135
class EverythingsStupidTapastic(GenericTapasticComic):
4136
    """Class to retrieve Everything's stupid Comics."""
4137
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
4138
    # Also on http://everythingsstupid.net
4139
    name = 'stupid-tapa'
4140
    long_name = "Everything's Stupid (from Tapastic)"
4141
    url = 'http://tapastic.com/series/EverythingsStupid'
4142
4143
4144
class JustSayEhTapastic(GenericTapasticComic):
4145
    """Class to retrieve Just Say Eh comics."""
4146
    # Also on http://www.justsayeh.com
4147
    name = 'justsayeh-tapa'
4148
    long_name = 'Just Say Eh (from Tapastic)'
4149
    url = 'http://tapastic.com/series/Just-Say-Eh'
4150
4151
4152
class ThorsThundershackTapastic(GenericTapasticComic):
4153
    """Class to retrieve Thor's Thundershack comics."""
4154
    # Also on http://www.thorsthundershack.com
4155
    name = 'thor-tapa'
4156
    long_name = 'Thor\'s Thundershack (from Tapastic)'
4157
    url = 'http://tapastic.com/series/Thors-Thundershac'
4158
4159
4160
class OwlTurdTapastic(GenericTapasticComic):
4161
    """Class to retrieve Owl Turd comics."""
4162
    # Also on http://owlturd.com
4163
    name = 'owlturd-tapa'
4164
    long_name = 'Owl Turd (from Tapastic)'
4165
    url = 'http://tapastic.com/series/Owl-Turd-Comix'
4166
4167
4168
class GoneIntoRaptureTapastic(GenericTapasticComic):
4169
    """Class to retrieve Gone Into Rapture comics."""
4170
    # Also on http://goneintorapture.tumblr.com
4171
    # Also on http://www.goneintorapture.com
4172
    name = 'rapture-tapa'
4173
    long_name = 'Gone Into Rapture (from Tapastic)'
4174
    url = 'http://tapastic.com/series/Goneintorapture'
4175
4176
4177
class HeckIfIKnowComicsTapa(GenericTapasticComic):
4178
    """Class to retrieve Heck If I Know Comics."""
4179
    # Also on http://heckifiknowcomics.com
4180
    name = 'heck-tapa'
4181
    long_name = 'Heck if I Know comics (from Tapastic)'
4182
    url = 'http://tapastic.com/series/Regular'
4183
4184
4185
class CheerUpEmoKidTapa(GenericTapasticComic):
4186
    """Class to retrieve CheerUpEmoKid comics."""
4187
    # Also on http://www.cheerupemokid.com
4188
    # Also on http://enzocomics.tumblr.com
4189
    name = 'cuek-tapa'
4190
    long_name = 'Cheer Up Emo Kid (from Tapastic)'
4191
    url = 'http://tapastic.com/series/CUEK'
4192
4193
4194
class BigFootJusticeTapa(GenericTapasticComic):
4195
    """Class to retrieve Big Foot Justice comics."""
4196
    # Also on http://bigfootjustice.com
4197
    name = 'bigfoot-tapa'
4198
    long_name = 'Big Foot Justice (from Tapastic)'
4199
    url = 'http://tapastic.com/series/bigfoot-justice'
4200
4201
4202
class UpAndOutTapa(GenericTapasticComic):
4203
    """Class to retrieve Up & Out comics."""
4204
    # Also on http://upandoutcomic.tumblr.com
4205
    name = 'upandout-tapa'
4206
    long_name = 'Up And Out (from Tapastic)'
4207
    url = 'http://tapastic.com/series/UP-and-OUT'
4208
4209
4210
class ToonHoleTapa(GenericTapasticComic):
4211
    """Class to retrieve Toon Holes comics."""
4212
    # Also on http://www.toonhole.com
4213
    name = 'toonhole-tapa'
4214
    long_name = 'Toon Hole (from Tapastic)'
4215
    url = 'http://tapastic.com/series/TOONHOLE'
4216
4217
4218
class AngryAtNothingTapa(GenericTapasticComic):
4219
    """Class to retrieve Angry at Nothing comics."""
4220
    # Also on http://www.angryatnothing.net
4221
    name = 'angry-tapa'
4222
    long_name = 'Angry At Nothing (from Tapastic)'
4223
    url = 'http://tapastic.com/series/Comics-yeah-definitely-comics-'
4224
4225
4226
class LeleozTapa(GenericTapasticComic):
4227
    """Class to retrieve Leleoz comics."""
4228
    # Also on http://leleozcomics.tumblr.com
4229
    name = 'leleoz-tapa'
4230
    long_name = 'Leleoz (from Tapastic)'
4231
    url = 'https://tapastic.com/series/Leleoz'
4232
4233
4234
class TheAwkwardYetiTapa(GenericTapasticComic):
4235
    """Class to retrieve The Awkward Yeti comics."""
4236
    # Also on http://www.gocomics.com/the-awkward-yeti
4237
    # Also on http://theawkwardyeti.com
4238
    # Also on http://larstheyeti.tumblr.com
4239
    name = 'yeti-tapa'
4240
    long_name = 'The Awkward Yeti (from Tapastic)'
4241
    url = 'https://tapastic.com/series/TheAwkwardYeti'
4242
4243
4244
class AsPerUsualTapa(GenericTapasticComic):
4245
    """Class to retrieve As Per Usual comics."""
4246
    # Also on http://as-per-usual.tumblr.com
4247
    name = 'usual-tapa'
4248
    long_name = 'As Per Usual (from Tapastic)'
4249
    url = 'https://tapastic.com/series/AsPerUsual'
4250
4251
4252
class OneOneOneOneComicTapa(GenericTapasticComic):
4253
    """Class to retrieve 1111 Comics."""
4254
    # Also on http://www.1111comics.me
4255
    # Also on http://comics1111.tumblr.com
4256
    name = '1111-tapa'
4257
    long_name = '1111 Comics (from Tapastic)'
4258
    url = 'https://tapastic.com/series/1111-Comics'
4259
4260
4261
class TumbleDryTapa(GenericTapasticComic):
4262
    """Class to retrieve Tumble Dry comics."""
4263
    # Also on http://tumbledrycomics.com
4264
    name = 'tumbledry-tapa'
4265
    long_name = 'Tumblr Dry (from Tapastic)'
4266
    url = 'https://tapastic.com/series/TumbleDryComics'
4267
4268
4269
class DeadlyPanelTapa(GenericTapasticComic):
4270
    """Class to retrieve Deadly Panel comics."""
4271
    # Also on http://www.deadlypanel.com
4272
    name = 'deadly-tapa'
4273
    long_name = 'Deadly Panel (from Tapastic)'
4274
    url = 'https://tapastic.com/series/deadlypanel'
4275
4276
4277
class ChrisHallbeckMaxiTapa(GenericTapasticComic):
4278
    """Class to retrieve Chris Hallbeck comics."""
4279
    # Also on http://chrishallbeck.tumblr.com
4280
    # Also on http://maximumble.com
4281
    name = 'hallbeckmaxi-tapa'
4282
    long_name = 'Chris Hallback - Maximumble (from Tapastic)'
4283
    url = 'https://tapastic.com/series/Maximumble'
4284
4285
4286
class ChrisHallbeckMiniTapa(GenericTapasticComic):
4287
    """Class to retrieve Chris Hallbeck comics."""
4288
    # Also on http://chrishallbeck.tumblr.com
4289
    # Also on http://minimumble.com
4290
    name = 'hallbeckmini-tapa'
4291
    long_name = 'Chris Hallback - Minimumble (from Tapastic)'
4292
    url = 'https://tapastic.com/series/Minimumble'
4293
4294
4295
class ChrisHallbeckBiffTapa(GenericTapasticComic):
4296
    """Class to retrieve Chris Hallbeck comics."""
4297
    # Also on http://chrishallbeck.tumblr.com
4298
    # Also on http://thebookofbiff.com
4299
    name = 'hallbeckbiff-tapa'
4300
    long_name = 'Chris Hallback - The Book of Biff (from Tapastic)'
4301
    url = 'https://tapastic.com/series/Biff'
4302
4303
4304
class RandoWisTapa(GenericTapasticComic):
4305
    """Class to retrieve RandoWis comics."""
4306
    # Also on https://randowis.com
4307
    name = 'randowis-tapa'
4308
    long_name = 'RandoWis (from Tapastic)'
4309
    url = 'https://tapastic.com/series/RandoWis'
4310
4311
4312
class PigeonGazetteTapa(GenericTapasticComic):
4313
    """Class to retrieve The Pigeon Gazette comics."""
4314
    # Also on http://thepigeongazette.tumblr.com
4315
    name = 'pigeon-tapa'
4316
    long_name = 'The Pigeon Gazette (from Tapastic)'
4317
    url = 'https://tapastic.com/series/The-Pigeon-Gazette'
4318
4319
4320
class TheOdd1sOutTapa(GenericTapasticComic):
4321
    """Class to retrieve The Odd 1s Out comics."""
4322
    # Also on http://theodd1sout.com
4323
    # Also on http://theodd1sout.tumblr.com
4324
    name = 'theodd-tapa'
4325
    long_name = 'The Odd 1s Out (from Tapastic)'
4326
    url = 'https://tapastic.com/series/Theodd1sout'
4327
4328
4329
class TheWorldIsFlatTapa(GenericTapasticComic):
4330
    """Class to retrieve The World Is Flat Comics."""
4331
    # Also on http://theworldisflatcomics.tumblr.com
4332
    name = 'flatworld-tapa'
4333
    long_name = 'The World Is Flat (from Tapastic)'
4334
    url = 'https://tapastic.com/series/The-World-is-Flat'
4335
4336
4337
class MisterAndMeTapa(GenericTapasticComic):
4338
    """Class to retrieve Mister & Me Comics."""
4339
    # Also on http://www.mister-and-me.com
4340
    # Also on http://www.gocomics.com/mister-and-me
4341
    name = 'mister-tapa'
4342
    long_name = 'Mister & Me (from Tapastic)'
4343
    url = 'https://tapastic.com/series/Mister-and-Me'
4344
4345
4346
class TalesOfAbsurdityTapa(GenericTapasticComic):
4347
    """Class to retrieve Tales Of Absurdity comics."""
4348
    # Also on http://talesofabsurdity.com
4349
    # Also on http://talesofabsurdity.tumblr.com
4350
    name = 'absurdity-tapa'
4351
    long_name = 'Tales of Absurdity (from Tapastic)'
4352
    url = 'http://tapastic.com/series/Tales-Of-Absurdity'
4353
4354
4355
class BFGFSTapa(GenericTapasticComic):
4356
    """Class to retrieve BFGFS comics."""
4357
    # Also on http://bfgfs.com
4358
    # Also on http://bfgfs.tumblr.com
4359
    name = 'bfgfs-tapa'
4360
    long_name = 'BFGFS (from Tapastic)'
4361
    url = 'https://tapastic.com/series/BFGFS'
4362
4363
4364
class DoodleForFoodTapa(GenericTapasticComic):
4365
    """Class to retrieve Doodle For Food comics."""
4366
    # Also on http://doodleforfood.com
4367
    name = 'doodle-tapa'
4368
    long_name = 'Doodle For Food (from Tapastic)'
4369
    url = 'https://tapastic.com/series/Doodle-for-Food'
4370
4371
4372
class MrLovensteinTapa(GenericTapasticComic):
4373
    """Class to retrieve Mr Lovenstein comics."""
4374
    # Also on  https://tapastic.com/series/MrLovenstein
4375
    name = 'mrlovenstein-tapa'
4376
    long_name = 'Mr. Lovenstein (from Tapastic)'
4377
    url = 'https://tapastic.com/series/MrLovenstein'
4378
4379
4380
def get_subclasses(klass):
4381
    """Gets the list of direct/indirect subclasses of a class"""
4382
    subclasses = klass.__subclasses__()
4383
    for derived in list(subclasses):
4384
        subclasses.extend(get_subclasses(derived))
4385
    return subclasses
4386
4387
4388
def remove_st_nd_rd_th_from_date(string):
4389
    """Function to transform 1st/2nd/3rd/4th in a parsable date format."""
4390
    # Hackish way to convert string with numeral "1st"/"2nd"/etc to date
4391
    return (string.replace('st', '')
4392
            .replace('nd', '')
4393
            .replace('rd', '')
4394
            .replace('th', '')
4395
            .replace('Augu', 'August'))
4396
4397
4398
def string_to_date(string, date_format, local=DEFAULT_LOCAL):
4399
    """Function to convert string to date object.
4400
    Wrapper around datetime.datetime.strptime."""
4401
    # format described in https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior
4402
    prev_locale = locale.setlocale(locale.LC_ALL)
4403
    if local != prev_locale:
4404
        locale.setlocale(locale.LC_ALL, local)
4405
    ret = datetime.datetime.strptime(string, date_format).date()
4406
    if local != prev_locale:
4407
        locale.setlocale(locale.LC_ALL, prev_locale)
4408
    return ret
4409
4410
4411
COMICS = set(get_subclasses(GenericComic))
4412
VALID_COMICS = [c for c in COMICS if c.name is not None]
4413
COMIC_NAMES = {c.name: c for c in VALID_COMICS}
4414
assert len(VALID_COMICS) == len(COMIC_NAMES)
4415
CLASS_NAMES = {c.__name__ for c in VALID_COMICS}
4416
assert len(VALID_COMICS) == len(CLASS_NAMES)
4417