Completed
Push — master ( e61dff...08d646 )
by De
01:17
created

comics.py (13 issues)

1
#! /usr/bin/python3
2
# vim: set expandtab tabstop=4 shiftwidth=4 :
3
"""Module to retrieve webcomics"""
4
5
from comic_abstract import GenericComic, get_date_for_comic
6
import re
7
from datetime import date, timedelta
8
import datetime
9
from urlfunctions import get_soup_at_url, urljoin_wrapper,\
10
    convert_iri_to_plain_ascii_uri, load_json_at_url, urlopen_wrapper
11
import json
12
import locale
13
import urllib
14
15
DEFAULT_LOCAL = 'en_GB.UTF-8'
16
17
18
class Xkcd(GenericComic):
19
    """Class to retrieve Xkcd comics."""
20
    name = 'xkcd'
21
    long_name = 'xkcd'
22
    url = 'http://xkcd.com'
23
24
    @classmethod
25
    def get_next_comic(cls, last_comic):
26
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
27
        first_num = last_comic['num'] if last_comic else 0
28
        last_num = load_json_at_url(
29
            urljoin_wrapper(cls.url, 'info.0.json'))['num']
30
31
        for num in range(first_num + 1, last_num + 1):
32
            if num != 404:
33
                json_url = urljoin_wrapper(cls.url, '%d/info.0.json' % num)
34
                comic = load_json_at_url(json_url)
35
                comic['img'] = [comic['img']]
36
                comic['prefix'] = '%d-' % num
37
                comic['json_url'] = json_url
38
                comic['url'] = urljoin_wrapper(cls.url, str(num))
39
                comic['day'] = int(comic['day'])
40
                comic['month'] = int(comic['month'])
41
                comic['year'] = int(comic['year'])
42
                assert comic['num'] == num
43
                yield comic
44
45
46
# Helper functions corresponding to get_url_from_link/get_url_from_archive_element
47
48
49
@classmethod
50
def get_href(cls, link):
51
    """Implementation of get_url_from_link/get_url_from_archive_element."""
52
    return link['href']
53
54
55
@classmethod
56
def join_cls_url_to_href(cls, link):
57
    """Implementation of get_url_from_link/get_url_from_archive_element."""
58
    return urljoin_wrapper(cls.url, link['href'])
59
60
61
class GenericNavigableComic(GenericComic):
62
    """Generic class for "navigable" comics : with first/next arrows.
63
64
    This class applies to comic where previous and next comics can be
65
    accessed from a given comic. Once given a starting point (either
66
    the first comic or the last comic retrieved), it will handle the
67
    navigation, the retrieval of the soup object and the setting of
68
    the 'url' attribute on retrieved comics. This limits a lot the
69
    amount of boilerplate code in the different implementation classes.
70
71
    The method `get_next_comic` methods is implemented in terms of new
72
    more specialized methods to be implemented/overridden:
73
        - get_first_comic_link
74
        - get_navi_link
75
        - get_comic_info
76
        - get_url_from_link
77
    """
78
79
    @classmethod
80
    def get_first_comic_link(cls):
81
        """Get link to first comics.
82
83
        Sometimes this can be retrieved of any comic page, sometimes on
84
        the archive page, sometimes it doesn't exist at all and one has
85
        to iterate backward to find it before hardcoding the result found.
86
        """
87
        raise NotImplementedError
88
89
    @classmethod
90
    def get_navi_link(cls, last_soup, next_):
91
        """Get link to next (or previous - for dev purposes) comic."""
92
        raise NotImplementedError
93
94
    @classmethod
95
    def get_comic_info(cls, soup, link):
96
        """Get information about a particular comics."""
97
        raise NotImplementedError
98
99
    @classmethod
100
    def get_url_from_link(cls, link):
101
        """Get url corresponding to a link. Default implementation is similar to get_href."""
102
        return link['href']
103
104
    @classmethod
105
    def get_next_link(cls, last_soup):
106
        """Get link to next comic."""
107
        return cls.get_navi_link(last_soup, True)
108
109
    @classmethod
110
    def get_prev_link(cls, last_soup):
111
        """Get link to previous comic."""
112
        return cls.get_navi_link(last_soup, False)
113
114
    @classmethod
115
    def get_next_comic(cls, last_comic):
116
        """Generic implementation of get_next_comic for navigable comics."""
117
        url = last_comic['url'] if last_comic else None
118
        next_comic = \
119
            cls.get_next_link(get_soup_at_url(url)) \
120
            if url else \
121
            cls.get_first_comic_link()
122
        cls.log("next/first comic will be %s (url is %s)" % (str(next_comic), url))
123
        while next_comic:
124
            prev_url, url = url, cls.get_url_from_link(next_comic)
125
            if prev_url == url:
126
                cls.log("got same url %s" % url)
127
                break
128
            cls.log("about to get %s (%s)" % (url, str(next_comic)))
129
            soup = get_soup_at_url(url)
130
            comic = cls.get_comic_info(soup, next_comic)
131
            if comic is not None:
132
                assert 'url' not in comic
133
                comic['url'] = url
134
                yield comic
135
            next_comic = cls.get_next_link(soup)
136
            cls.log("next comic will be %s" % str(next_comic))
137
138
    @classmethod
139
    def check_first_link(cls):
140
        """Check that navigation to first comic seems to be working - for dev purposes."""
141
        cls.log("about to check first link")
142
        ok = True
143
        firstlink = cls.get_first_comic_link()
144
        if firstlink is None:
145
            print("From %s : no first link" % cls.url)
146
            ok = False
147
        else:
148
            firsturl = cls.get_url_from_link(firstlink)
149
            try:
150
                get_soup_at_url(firsturl)
151
            except urllib.error.HTTPError:
152
                print("From %s : invalid first url" % cls.url)
153
                ok = False
154
        cls.log("checked first link -> returned %d" % ok)
155
        return ok
156
157
    @classmethod
158
    def check_prev_next_links(cls, url):
159
        """Check that navigation to prev/next from a given URL seems to be working - for dev purposes."""
160
        cls.log("about to check prev/next from %s" % url)
161
        ok = True
162
        if url is None:
163
            prevlink, nextlink = None, None
164
        else:
165
            soup = get_soup_at_url(url)
166
            prevlink, nextlink = cls.get_prev_link(soup), cls.get_next_link(soup)
167
        if prevlink is None and nextlink is None:
168
            print("From %s : no previous nor next" % url)
169
            ok = False
170
        else:
171
            if prevlink:
172
                prevurl = cls.get_url_from_link(prevlink)
173
                prevsoup = get_soup_at_url(prevurl)
174
                prevnext = cls.get_url_from_link(cls.get_next_link(prevsoup))
175
                if prevnext != url:
176
                    print("From %s, going backward then forward leads to %s" % (url, prevnext))
177
                    ok = False
178
            if nextlink:
179
                nexturl = cls.get_url_from_link(nextlink)
180
                if nexturl != url:
181
                    nextsoup = get_soup_at_url(nexturl)
182
                    nextprev = cls.get_url_from_link(cls.get_prev_link(nextsoup))
183
                    if nextprev != url:
184
                        print("From %s, going forward then backward leads to %s" % (url, nextprev))
185
                        ok = False
186
        cls.log("checked prev/next from %s -> returned %d" % (url, ok))
187
        return ok
188
189
    @classmethod
190
    def check_navigation(cls, url):
191
        """Check that navigation functions seem to be working - for dev purposes."""
192
        cls.log("about to check navigation from %s" % url)
193
        first = cls.check_first_link()
194
        prevnext = cls.check_prev_next_links(url)
195
        ok = first and prevnext
196
        cls.log("checked navigation from %s -> returned %d" % (url, ok))
197
        return ok
198
199
200
class GenericListableComic(GenericComic):
201
    """Generic class for "listable" comics : with a list of comics (aka 'archive')
202
203
    The method `get_next_comic` methods is implemented in terms of new
204
    more specialized methods to be implemented/overridden:
205
        - get_archive_elements
206
        - get_url_from_archive_element
207
        - get_comic_info
208
    """
209
210
    @classmethod
211
    def get_archive_elements(cls):
212
        """Get the archive elements (iterable)."""
213
        raise NotImplementedError
214
215
    @classmethod
216
    def get_url_from_archive_element(cls, archive_elt):
217
        """Get url corresponding to an archive element."""
218
        raise NotImplementedError
219
220
    @classmethod
221
    def get_comic_info(cls, soup, archive_elt):
222
        """Get information about a particular comics."""
223
        raise NotImplementedError
224
225
    @classmethod
226
    def get_next_comic(cls, last_comic):
227
        """Generic implementation of get_next_comic for listable comics."""
228
        waiting_for_url = last_comic['url'] if last_comic else None
229
        for archive_elt in cls.get_archive_elements():
230
            url = cls.get_url_from_archive_element(archive_elt)
231
            cls.log("considering %s" % url)
232
            if waiting_for_url and waiting_for_url == url:
233
                waiting_for_url = None
234
            elif waiting_for_url is None:
235
                cls.log("about to get %s (%s)" % (url, str(archive_elt)))
236
                soup = get_soup_at_url(url)
237
                comic = cls.get_comic_info(soup, archive_elt)
238
                if comic is not None:
239
                    assert 'url' not in comic
240
                    comic['url'] = url
241
                    yield comic
242
        if waiting_for_url is not None:
243
            print("Did not find %s : there might be a problem" % waiting_for_url)
244
245
# Helper functions corresponding to get_first_comic_link/get_navi_link
246
247
248
@classmethod
249
def get_link_rel_next(cls, last_soup, next_):
250
    """Implementation of get_navi_link."""
251
    return last_soup.find('link', rel='next' if next_ else 'prev')
252
253
254
@classmethod
255
def get_a_rel_next(cls, last_soup, next_):
256
    """Implementation of get_navi_link."""
257
    return last_soup.find('a', rel='next' if next_ else 'prev')
258
259
260
@classmethod
261
def get_a_navi_navinext(cls, last_soup, next_):
262
    """Implementation of get_navi_link."""
263
    return last_soup.find('a', class_='navi navi-next' if next_ else 'navi navi-prev')
264
265
266
@classmethod
267
def get_a_navi_comicnavnext_navinext(cls, last_soup, next_):
268
    """Implementation of get_navi_link."""
269
    return last_soup.find('a', class_='navi comic-nav-next navi-next' if next_ else 'navi comic-nav-previous navi-prev')
270
271
272
@classmethod
273
def get_a_comicnavbase_comicnavnext(cls, last_soup, next_):
274
    """Implementation of get_navi_link."""
275
    return last_soup.find('a', class_='comic-nav-base comic-nav-next' if next_ else 'comic-nav-base comic-nav-previous')
276
277
278
@classmethod
279
def get_a_navi_navifirst(cls):
280
    """Implementation of get_first_comic_link."""
281
    return get_soup_at_url(cls.url).find('a', class_='navi navi-first')
282
283
284
@classmethod
285
def get_div_navfirst_a(cls):
286
    """Implementation of get_first_comic_link."""
287
    return get_soup_at_url(cls.url).find('div', class_="nav-first").find('a')
288
289
290
@classmethod
291
def get_a_comicnavbase_comicnavfirst(cls):
292
    """Implementation of get_first_comic_link."""
293
    return get_soup_at_url(cls.url).find('a', class_='comic-nav-base comic-nav-first')
294
295
296
@classmethod
297
def simulate_first_link(cls):
298
    """Implementation of get_first_comic_link creating a link-like object from
299
    an URL provided by the class."""
300
    return {'href': cls.first_url}
301
302
303
@classmethod
304
def navigate_to_first_comic(cls):
305
    """Implementation of get_first_comic_link navigating from a user provided
306
    URL to the first comic.
307
308
    Sometimes, the first comic cannot be reached directly so to start
309
    from the first comic one has to go to the previous comic until
310
    there is no previous comics. Once this URL is reached, it
311
    is better to hardcode it but for development purposes, it
312
    is convenient to have an automatic way to find it.
313
    """
314
    url = input("Get starting URL: ")
315
    print(url)
316
    comic = cls.get_prev_link(get_soup_at_url(url))
317
    while comic:
318
        url = cls.get_url_from_link(comic)
319
        print(url)
320
        comic = cls.get_prev_link(get_soup_at_url(url))
321
    return {'href': url}
322
323
324
class GenericEmptyComic(GenericComic):
325
    """Generic class for comics where nothing is to be done.
326
327
    It can be useful to deactivate temporarily comics that do not work
328
    properly by replacing `def MyComic(GenericWhateverComic)` with
329
    `def MyComic(GenericEmptyComic, GenericWhateverComic)`."""
330
331
    @classmethod
332
    def get_next_comic(cls, last_comic):
333
        """Implementation of get_next_comic returning no comics."""
334
        cls.log("comic is considered as empty - returning no comic")
335
        return []
336
337
338 View Code Duplication
class ExtraFabulousComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
339
    """Class to retrieve Extra Fabulous Comics."""
340
    name = 'efc'
341
    long_name = 'Extra Fabulous Comics'
342
    url = 'http://extrafabulouscomics.com'
343
    get_first_comic_link = get_a_navi_navifirst
344
    get_navi_link = get_link_rel_next
345
346
    @classmethod
347
    def get_comic_info(cls, soup, link):
348
        """Get information about a particular comics."""
349
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
350
        imgs = soup.find_all('img', src=img_src_re)
351
        title = soup.find('meta', property='og:title')['content']
352
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
353
        day = string_to_date(date_str, "%Y-%m-%d")
354
        return {
355
            'title': title,
356
            'img': [i['src'] for i in imgs],
357
            'month': day.month,
358
            'year': day.year,
359
            'day': day.day,
360
            'prefix': title + '-'
361
        }
362
363
364 View Code Duplication
class GenericLeMondeBlog(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
365
    """Generic class to retrieve comics from Le Monde blogs."""
366
    get_navi_link = get_link_rel_next
367
    get_first_comic_link = simulate_first_link
368
    first_url = NotImplemented
369
370
    @classmethod
371
    def get_comic_info(cls, soup, link):
372
        """Get information about a particular comics."""
373
        url2 = soup.find('link', rel='shortlink')['href']
374
        title = soup.find('meta', property='og:title')['content']
375
        date_str = soup.find("span", class_="entry-date").string
376
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
377
        imgs = soup.find_all('meta', property='og:image')
378
        return {
379
            'title': title,
380
            'url2': url2,
381
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
382
            'month': day.month,
383
            'year': day.year,
384
            'day': day.day,
385
        }
386
387
388
class ZepWorld(GenericLeMondeBlog):
389
    """Class to retrieve Zep World comics."""
390
    name = "zep"
391
    long_name = "Zep World"
392
    url = "http://zepworld.blog.lemonde.fr"
393
    first_url = "http://zepworld.blog.lemonde.fr/2014/10/31/bientot-le-blog-de-zep/"
394
395
396
class Vidberg(GenericLeMondeBlog):
397
    """Class to retrieve Vidberg comics."""
398
    name = 'vidberg'
399
    long_name = "Vidberg - l'actu en patates"
400
    url = "http://vidberg.blog.lemonde.fr"
401
    # Not the first but I didn't find an efficient way to retrieve it
402
    first_url = "http://vidberg.blog.lemonde.fr/2012/02/09/revue-de-campagne-la-campagne-du-modem-semballe/"
403
404
405
class Plantu(GenericLeMondeBlog):
406
    """Class to retrieve Plantu comics."""
407
    name = 'plantu'
408
    long_name = "Plantu"
409
    url = "http://plantu.blog.lemonde.fr"
410
    first_url = "http://plantu.blog.lemonde.fr/2014/10/28/stress-test-a-bruxelles/"
411
412
413
class XavierGorce(GenericLeMondeBlog):
414
    """Class to retrieve Xavier Gorce comics."""
415
    name = 'gorce'
416
    long_name = "Xavier Gorce"
417
    url = "http://xaviergorce.blog.lemonde.fr"
418
    first_url = "http://xaviergorce.blog.lemonde.fr/2015/01/09/distinction/"
419
420
421
class CartooningForPeace(GenericLeMondeBlog):
422
    """Class to retrieve Cartooning For Peace comics."""
423
    name = 'forpeace'
424
    long_name = "Cartooning For Peace"
425
    url = "http://cartooningforpeace.blog.lemonde.fr"
426
    first_url = "http://cartooningforpeace.blog.lemonde.fr/2014/12/15/bado/"
427
428
429
class Aurel(GenericLeMondeBlog):
430
    """Class to retrieve Aurel comics."""
431
    name = 'aurel'
432
    long_name = "Aurel"
433
    url = "http://aurel.blog.lemonde.fr"
434
    first_url = "http://aurel.blog.lemonde.fr/2014/09/29/le-senat-repasse-a-droite/"
435
436
437
class LesCulottees(GenericLeMondeBlog):
438
    """Class to retrieve Les Culottees comics."""
439
    name = 'culottees'
440
    long_name = 'Les Culottees'
441
    url = "http://lesculottees.blog.lemonde.fr"
442
    first_url = "http://lesculottees.blog.lemonde.fr/2016/01/11/clementine-delait-femme-a-barbe/"
443
444
445
class UneAnneeAuLycee(GenericLeMondeBlog):
446
    """Class to retrieve Une Annee Au Lycee comics."""
447
    name = 'lycee'
448
    long_name = 'Une Annee au Lycee'
449
    url = 'http://uneanneeaulycee.blog.lemonde.fr'
450
    first_url = "http://uneanneeaulycee.blog.lemonde.fr/2016/06/13/la-semaine-du-bac-est-arrivee/"
451
452
453 View Code Duplication
class Rall(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
454
    """Class to retrieve Ted Rall comics."""
455
    # Also on http://www.gocomics.com/tedrall
456
    name = 'rall'
457
    long_name = "Ted Rall"
458
    url = "http://rall.com/comic"
459
    get_navi_link = get_link_rel_next
460
    get_first_comic_link = simulate_first_link
461
    # Not the first but I didn't find an efficient way to retrieve it
462
    first_url = "http://rall.com/2014/01/30/los-angeles-times-cartoon-well-miss-those-california-flowers"
463
464
    @classmethod
465
    def get_comic_info(cls, soup, link):
466
        """Get information about a particular comics."""
467
        title = soup.find('meta', property='og:title')['content']
468
        author = soup.find("span", class_="author vcard").find("a").string
469
        date_str = soup.find("span", class_="entry-date").string
470
        day = string_to_date(date_str, "%B %d, %Y")
471
        desc = soup.find('meta', property='og:description')['content']
472
        imgs = soup.find('div', class_='entry-content').find_all('img')
473
        imgs = imgs[:-7]  # remove social media buttons
474
        return {
475
            'title': title,
476
            'author': author,
477
            'month': day.month,
478
            'year': day.year,
479
            'day': day.day,
480
            'description': desc,
481
            'img': [i['src'] for i in imgs],
482
        }
483
484
485
class Dilem(GenericNavigableComic):
486
    """Class to retrieve Ali Dilem comics."""
487
    name = 'dilem'
488
    long_name = 'Ali Dilem'
489
    url = 'http://information.tv5monde.com/dilem'
490
    get_url_from_link = join_cls_url_to_href
491
    get_first_comic_link = simulate_first_link
492
    first_url = "http://information.tv5monde.com/dilem/2004-06-26"
493
494
    @classmethod
495
    def get_navi_link(cls, last_soup, next_):
496
        """Get link to next or previous comic."""
497
        # prev is next / next is prev
498
        li = last_soup.find('li', class_='prev' if next_ else 'next')
499
        return li.find('a') if li else None
500
501
    @classmethod
502
    def get_comic_info(cls, soup, link):
503
        """Get information about a particular comics."""
504
        short_url = soup.find('link', rel='shortlink')['href']
505
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
506
        imgs = soup.find_all('meta', property='og:image')
507
        date_str = soup.find('span', property='dc:date')['content']
508
        date_str = date_str[:10]
509
        day = string_to_date(date_str, "%Y-%m-%d")
510
        return {
511
            'short_url': short_url,
512
            'title': title,
513
            'img': [i['content'] for i in imgs],
514
            'day': day.day,
515
            'month': day.month,
516
            'year': day.year,
517
        }
518
519
520 View Code Duplication
class SpaceAvalanche(GenericNavigableComic):
521
    """Class to retrieve Space Avalanche comics."""
522
    name = 'avalanche'
523
    long_name = 'Space Avalanche'
524
    url = 'http://www.spaceavalanche.com'
525
    get_navi_link = get_link_rel_next
526
527
    @classmethod
528
    def get_first_comic_link(cls):
529
        """Get link to first comics."""
530
        return {'href': "http://www.spaceavalanche.com/2009/02/02/irish-sea/", 'title': "Irish Sea"}
531
532
    @classmethod
533
    def get_comic_info(cls, soup, link):
534
        """Get information about a particular comics."""
535
        url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
536
        title = link['title']
537
        url = cls.get_url_from_link(link)
538
        year, month, day = [int(s)
539
                            for s in url_date_re.match(url).groups()]
540
        imgs = soup.find("div", class_="entry").find_all("img")
541
        return {
542
            'title': title,
543
            'day': day,
544
            'month': month,
545
            'year': year,
546
            'img': [i['src'] for i in imgs],
547
        }
548
549
550
class ZenPencils(GenericNavigableComic):
551
    """Class to retrieve ZenPencils comics."""
552
    # Also on http://zenpencils.tumblr.com
553
    # Also on http://www.gocomics.com/zen-pencils
554
    name = 'zenpencils'
555
    long_name = 'Zen Pencils'
556
    url = 'http://zenpencils.com'
557
    get_navi_link = get_link_rel_next
558
    get_first_comic_link = simulate_first_link
559
    first_url = "http://zenpencils.com/comic/1-ralph-waldo-emerson-make-them-cry/"
560
561
    @classmethod
562
    def get_comic_info(cls, soup, link):
563
        """Get information about a particular comics."""
564
        imgs = soup.find('div', id='comic').find_all('img')
565
        # imgs2 = soup.find_all('meta', property='og:image')
566
        post = soup.find('div', class_='post-content')
567
        author = post.find("span", class_="post-author").find("a").string
568
        title = soup.find('meta', property='og:title')['content']
569
        date_str = post.find('span', class_='post-date').string
570
        day = string_to_date(date_str, "%B %d, %Y")
571
        assert imgs
572
        assert all(i['alt'] == i['title'] for i in imgs)
573
        assert all(i['alt'] in (title, "") for i in imgs)
574
        desc = soup.find('meta', property='og:description')['content']
575
        return {
576
            'title': title,
577
            'description': desc,
578
            'author': author,
579
            'day': day.day,
580
            'month': day.month,
581
            'year': day.year,
582
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
583
        }
584
585
586
class ItsTheTie(GenericNavigableComic):
587
    """Class to retrieve It's the tie comics."""
588
    # Also on http://itsthetie.tumblr.com
589
    # Also on https://tapastic.com/series/itsthetie
590
    name = 'tie'
591
    long_name = "It's the tie"
592
    url = "http://itsthetie.com"
593
    get_first_comic_link = get_div_navfirst_a
594
    get_navi_link = get_a_rel_next
595
596
    @classmethod
597
    def get_comic_info(cls, soup, link):
598
        """Get information about a particular comics."""
599
        title = soup.find('h1', class_='comic-title').find('a').string
600
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
601
        day = string_to_date(date_str, "%B %d, %Y")
602
        # Bonus images may or may not be in meta og:image.
603
        imgs = soup.find_all('meta', property='og:image')
604
        imgs_src = [i['content'] for i in imgs]
605
        bonus = soup.find_all('img', attrs={'data-oversrc': True})
606
        bonus_src = [b['data-oversrc'] for b in bonus]
607
        all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src]
608
        all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")]
609
        tag_meta = soup.find('meta', property='article:tag')
610
        tags = tag_meta['content'] if tag_meta else ""
611
        return {
612
            'title': title,
613
            'month': day.month,
614
            'year': day.year,
615
            'day': day.day,
616
            'img': all_imgs_src,
617
            'tags': tags,
618
        }
619
620 View Code Duplication
621
class PenelopeBagieu(GenericNavigableComic):
622
    """Class to retrieve comics from Penelope Bagieu's blog."""
623
    name = 'bagieu'
624
    long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
625
    url = 'http://www.penelope-jolicoeur.com'
626
    get_navi_link = get_link_rel_next
627
    get_first_comic_link = simulate_first_link
628
    first_url = 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'
629
630
    @classmethod
631
    def get_comic_info(cls, soup, link):
632
        """Get information about a particular comics."""
633
        date_str = soup.find('h2', class_='date-header').string
634
        day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
635
        imgs = soup.find('div', class_='entry-body').find_all('img')
636
        title = soup.find('h3', class_='entry-header').string
637
        return {
638
            'title': title,
639
            'img': [i['src'] for i in imgs],
640
            'month': day.month,
641
            'year': day.year,
642
            'day': day.day,
643
        }
644
645 View Code Duplication
646
class OneOneOneOneComic(GenericNavigableComic):
647
    """Class to retrieve 1111 Comics."""
648
    # Also on http://comics1111.tumblr.com
649
    # Also on https://tapastic.com/series/1111-Comics
650
    name = '1111'
651
    long_name = '1111 Comics'
652
    url = 'http://www.1111comics.me'
653
    get_first_comic_link = get_div_navfirst_a
654
    get_navi_link = get_link_rel_next
655
656
    @classmethod
657
    def get_comic_info(cls, soup, link):
658
        """Get information about a particular comics."""
659
        title = soup.find('h1', class_='comic-title').find('a').string
660
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
661
        day = string_to_date(date_str, "%B %d, %Y")
662
        imgs = soup.find_all('meta', property='og:image')
663
        return {
664
            'title': title,
665
            'month': day.month,
666
            'year': day.year,
667
            'day': day.day,
668
            'img': [i['content'] for i in imgs],
669
        }
670
671
672
class AngryAtNothing(GenericNavigableComic):
673
    """Class to retrieve Angry at Nothing comics."""
674
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
675
    name = 'angry'
676
    long_name = 'Angry At Nothing'
677
    url = 'http://www.angryatnothing.net'
678
    get_first_comic_link = get_div_navfirst_a
679
    get_navi_link = get_a_rel_next
680
681
    @classmethod
682
    def get_comic_info(cls, soup, link):
683
        """Get information about a particular comics."""
684
        title = soup.find('h1', class_='comic-title').find('a').string
685
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
686
        day = string_to_date(date_str, "%B %d, %Y")
687
        imgs = soup.find_all('meta', property='og:image')
688
        return {
689
            'title': title,
690
            'month': day.month,
691
            'year': day.year,
692
            'day': day.day,
693
            'img': [i['content'] for i in imgs],
694
        }
695
696
697
class NeDroid(GenericNavigableComic):
698
    """Class to retrieve NeDroid comics."""
699
    name = 'nedroid'
700
    long_name = 'NeDroid'
701
    url = 'http://nedroid.com'
702
    get_first_comic_link = get_div_navfirst_a
703
    get_navi_link = get_link_rel_next
704
    get_url_from_link = join_cls_url_to_href
705
706
    @classmethod
707
    def get_comic_info(cls, soup, link):
708
        """Get information about a particular comics."""
709
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
710
        comic_url_re = re.compile('//nedroid.com/comics/([0-9]*)-([0-9]*)-([0-9]*).*')
711
        short_url = cls.get_url_from_link(soup.find('link', rel='shortlink'))
712
        num = int(short_url_re.match(short_url).groups()[0])
713
        imgs = soup.find('div', id='comic').find_all('img')
714
        year, month, day = [int(s) for s in comic_url_re.match(imgs[0]['src']).groups()]
715
        assert len(imgs) == 1
716
        title = imgs[0]['alt']
717
        title2 = imgs[0]['title']
718
        return {
719
            'short_url': short_url,
720
            'title': title,
721
            'title2': title2,
722
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
723
            'day': day,
724
            'month': month,
725
            'year': year,
726
            'num': num,
727
        }
728
729 View Code Duplication
730
class Garfield(GenericNavigableComic):
731
    """Class to retrieve Garfield comics."""
732
    # Also on http://www.gocomics.com/garfield
733
    name = 'garfield'
734
    long_name = 'Garfield'
735
    url = 'https://garfield.com'
736
    get_first_comic_link = simulate_first_link
737
    first_url = 'https://garfield.com/comic/1978/06/19'
738
739
    @classmethod
740
    def get_navi_link(cls, last_soup, next_):
741
        """Get link to next or previous comic."""
742
        return last_soup.find('a', class_='comic-arrow-right' if next_ else 'comic-arrow-left')
743
744
    @classmethod
745
    def get_comic_info(cls, soup, link):
746
        """Get information about a particular comics."""
747
        url = cls.get_url_from_link(link)
748
        date_re = re.compile('^%s/comic/([0-9]*)/([0-9]*)/([0-9]*)' % cls.url)
749
        year, month, day = [int(s) for s in date_re.match(url).groups()]
750
        imgs = soup.find('div', class_='comic-display').find_all('img', class_='img-responsive')
751
        return {
752
            'month': month,
753
            'year': year,
754
            'day': day,
755
            'img': [i['src'] for i in imgs],
756
        }
757
758 View Code Duplication
759
class Dilbert(GenericNavigableComic):
760
    """Class to retrieve Dilbert comics."""
761
    # Also on http://www.gocomics.com/dilbert-classics
762
    name = 'dilbert'
763
    long_name = 'Dilbert'
764
    url = 'http://dilbert.com'
765
    get_url_from_link = join_cls_url_to_href
766
    get_first_comic_link = simulate_first_link
767
    first_url = 'http://dilbert.com/strip/1989-04-16'
768
769
    @classmethod
770
    def get_navi_link(cls, last_soup, next_):
771
        """Get link to next or previous comic."""
772
        link = last_soup.find('div', class_='nav-comic nav-right' if next_ else 'nav-comic nav-left')
773
        return link.find('a') if link else None
774
775
    @classmethod
776
    def get_comic_info(cls, soup, link):
777
        """Get information about a particular comics."""
778
        title = soup.find('meta', property='og:title')['content']
779
        imgs = soup.find_all('meta', property='og:image')
780
        desc = soup.find('meta', property='og:description')['content']
781
        date_str = soup.find('meta', property='article:publish_date')['content']
782
        day = string_to_date(date_str, "%B %d, %Y")
783
        author = soup.find('meta', property='article:author')['content']
784
        tags = soup.find('meta', property='article:tag')['content']
785
        return {
786
            'title': title,
787
            'description': desc,
788
            'img': [i['content'] for i in imgs],
789
            'author': author,
790
            'tags': tags,
791
            'day': day.day,
792
            'month': day.month,
793
            'year': day.year
794
        }
795
796
797
class VictimsOfCircumsolar(GenericNavigableComic):
798
    """Class to retrieve VictimsOfCircumsolar comics."""
799
    name = 'circumsolar'
800
    long_name = 'Victims Of Circumsolar'
801
    url = 'http://www.victimsofcircumsolar.com'
802
    get_navi_link = get_a_navi_comicnavnext_navinext
803
    get_first_comic_link = simulate_first_link
804
    first_url = 'http://www.victimsofcircumsolar.com/comic/modern-addiction'
805
806
    @classmethod
807
    def get_comic_info(cls, soup, link):
808
        """Get information about a particular comics."""
809
        # Date is on the archive page
810
        title = soup.find_all('meta', property='og:title')[-1]['content']
811
        desc = soup.find_all('meta', property='og:description')[-1]['content']
812
        imgs = soup.find('div', id='comic').find_all('img')
813
        assert all(i['title'] == i['alt'] == title for i in imgs)
814
        return {
815
            'title': title,
816
            'description': desc,
817
            'img': [i['src'] for i in imgs],
818
        }
819
820
821
class ThreeWordPhrase(GenericNavigableComic):
822
    """Class to retrieve Three Word Phrase comics."""
823
    # Also on http://www.threewordphrase.tumblr.com
824
    name = 'threeword'
825
    long_name = 'Three Word Phrase'
826
    url = 'http://threewordphrase.com'
827
    get_url_from_link = join_cls_url_to_href
828
829
    @classmethod
830
    def get_first_comic_link(cls):
831
        """Get link to first comics."""
832
        return get_soup_at_url(cls.url).find('img', src='/firstlink.gif').parent
833
834
    @classmethod
835
    def get_navi_link(cls, last_soup, next_):
836
        """Get link to next or previous comic."""
837
        link = last_soup.find('img', src='/nextlink.gif' if next_ else '/prevlink.gif').parent
838
        return None if link.get('href') is None else link
839
840
    @classmethod
841
    def get_comic_info(cls, soup, link):
842
        """Get information about a particular comics."""
843
        title = soup.find('title')
844
        imgs = [img for img in soup.find_all('img')
845
                if not img['src'].endswith(
846
                    ('link.gif', '32.png', 'twpbookad.jpg',
847
                     'merchad.jpg', 'header.gif', 'tipjar.jpg'))]
848
        return {
849
            'title': title.string if title else None,
850
            'title2': '  '.join(img.get('alt') for img in imgs if img.get('alt')),
851
            'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs],
852
        }
853
854
855
class DeadlyPanel(GenericEmptyComic, GenericNavigableComic):
856
    """Class to retrieve Deadly Panel comics."""
857
    # Also on https://tapastic.com/series/deadlypanel
858
    name = 'deadly'
859
    long_name = 'Deadly Panel'
860
    url = 'http://www.deadlypanel.com'
861
    get_first_comic_link = get_a_navi_navifirst
862
    get_navi_link = get_a_navi_comicnavnext_navinext
863
864
    @classmethod
865
    def get_comic_info(cls, soup, link):
866
        """Get information about a particular comics."""
867
        imgs = soup.find('div', id='comic').find_all('img')
868
        assert all(i['alt'] == i['title'] for i in imgs)
869
        return {
870
            'img': [i['src'] for i in imgs],
871
        }
872
873
874
class TheGentlemanArmchair(GenericNavigableComic):
875
    """Class to retrieve The Gentleman Armchair comics."""
876
    name = 'gentlemanarmchair'
877
    long_name = 'The Gentleman Armchair'
878
    url = 'http://thegentlemansarmchair.com'
879
    get_first_comic_link = get_a_navi_navifirst
880
    get_navi_link = get_link_rel_next
881
882
    @classmethod
883
    def get_comic_info(cls, soup, link):
884
        """Get information about a particular comics."""
885
        title = soup.find('h2', class_='post-title').string
886
        author = soup.find("span", class_="post-author").find("a").string
887
        date_str = soup.find('span', class_='post-date').string
888
        day = string_to_date(date_str, "%B %d, %Y")
889
        imgs = soup.find('div', id='comic').find_all('img')
890
        return {
891
            'img': [i['src'] for i in imgs],
892
            'title': title,
893
            'author': author,
894
            'month': day.month,
895
            'year': day.year,
896
            'day': day.day,
897
        }
898
899
900 View Code Duplication
class MyExtraLife(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
901
    """Class to retrieve My Extra Life comics."""
902
    name = 'extralife'
903
    long_name = 'My Extra Life'
904
    url = 'http://www.myextralife.com'
905
    get_navi_link = get_link_rel_next
906
907
    @classmethod
908
    def get_first_comic_link(cls):
909
        """Get link to first comics."""
910
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
911
912
    @classmethod
913
    def get_comic_info(cls, soup, link):
914
        """Get information about a particular comics."""
915
        title = soup.find("h1", class_="comic_title").string
916
        date_str = soup.find("span", class_="comic_date").string
917
        day = string_to_date(date_str, "%B %d, %Y")
918
        imgs = soup.find_all("img", class_="comic")
919
        assert all(i['alt'] == i['title'] == title for i in imgs)
920
        return {
921
            'title': title,
922
            'img': [i['src'] for i in imgs if i["src"]],
923
            'day': day.day,
924
            'month': day.month,
925
            'year': day.year
926
        }
927
928
929
class SaturdayMorningBreakfastCereal(GenericNavigableComic):
930
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
931
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
932
    # Also on http://smbc-comics.tumblr.com
933
    name = 'smbc'
934
    long_name = 'Saturday Morning Breakfast Cereal'
935
    url = 'http://www.smbc-comics.com'
936
    get_navi_link = get_a_rel_next
937
938
    @classmethod
939
    def get_first_comic_link(cls):
940
        """Get link to first comics."""
941
        return get_soup_at_url(cls.url).find('a', rel='start')
942
943
    @classmethod
944
    def get_comic_info(cls, soup, link):
945
        """Get information about a particular comics."""
946
        image1 = soup.find('img', id='cc-comic')
947
        image_url1 = image1['src']
948
        aftercomic = soup.find('div', id='aftercomic')
949
        image_url2 = aftercomic.find('img')['src'] if aftercomic else ''
950
        imgs = [image_url1] + ([image_url2] if image_url2 else [])
951
        date_str = soup.find('div', class_='cc-publishtime').contents[0]
952
        day = string_to_date(date_str, "%B %d, %Y")
953
        return {
954
            'title': image1['title'],
955
            'img': [urljoin_wrapper(cls.url, i) for i in imgs],
956
            'day': day.day,
957
            'month': day.month,
958
            'year': day.year
959
        }
960
961
962
class PerryBibleFellowship(GenericListableComic):
963
    """Class to retrieve Perry Bible Fellowship comics."""
964
    name = 'pbf'
965
    long_name = 'Perry Bible Fellowship'
966
    url = 'http://pbfcomics.com'
967
    get_url_from_archive_element = join_cls_url_to_href
968
969
    @classmethod
970
    def get_archive_elements(cls):
971
        comic_link_re = re.compile('^/[0-9]*/$')
972
        return reversed(get_soup_at_url(cls.url).find_all('a', href=comic_link_re))
973
974
    @classmethod
975
    def get_comic_info(cls, soup, link):
976
        """Get information about a particular comics."""
977
        url = cls.get_url_from_archive_element(link)
978
        comic_img_re = re.compile('^/archive_b/PBF.*')
979
        name = link.string
980
        num = int(link['name'])
981
        href = link['href']
982
        assert href == '/%d/' % num
983
        imgs = soup.find_all('img', src=comic_img_re)
984
        assert len(imgs) == 1
985
        assert imgs[0]['alt'] == name
986
        return {
987
            'num': num,
988
            'name': name,
989
            'img': [urljoin_wrapper(url, i['src']) for i in imgs],
990
            'prefix': '%d-' % num,
991
        }
992
993
994 View Code Duplication
class Mercworks(GenericNavigableComic):
995
    """Class to retrieve Mercworks comics."""
996
    # Also on http://mercworks.tumblr.com
997
    name = 'mercworks'
998
    long_name = 'Mercworks'
999
    url = 'http://mercworks.net'
1000
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1001
    get_navi_link = get_a_rel_next
1002
1003
    @classmethod
1004
    def get_comic_info(cls, soup, link):
1005
        """Get information about a particular comics."""
1006
        title = soup.find('meta', property='og:title')['content']
1007
        metadesc = soup.find('meta', property='og:description')
1008
        desc = metadesc['content'] if metadesc else ""
1009
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1010
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1011
        date_str = date_str[:10]
1012
        day = string_to_date(date_str, "%Y-%m-%d")
1013
        imgs = soup.find_all('meta', property='og:image')
1014
        return {
1015
            'img': [i['content'] for i in imgs],
1016
            'title': title,
1017
            'author': author,
1018
            'desc': desc,
1019
            'day': day.day,
1020
            'month': day.month,
1021
            'year': day.year
1022
        }
1023
1024
1025
class BerkeleyMews(GenericListableComic):
1026
    """Class to retrieve Berkeley Mews comics."""
1027
    # Also on http://mews.tumblr.com
1028
    # Also on http://www.gocomics.com/berkeley-mews
1029
    name = 'berkeley'
1030
    long_name = 'Berkeley Mews'
1031
    url = 'http://www.berkeleymews.com'
1032
    get_url_from_archive_element = get_href
1033
    comic_num_re = re.compile('%s/\\?p=([0-9]*)$' % url)
1034
1035
    @classmethod
1036
    def get_archive_elements(cls):
1037
        archive_url = urljoin_wrapper(cls.url, "?page_id=2")
1038
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_num_re))
1039
1040
    @classmethod
1041
    def get_comic_info(cls, soup, link):
1042
        """Get information about a particular comics."""
1043
        comic_date_re = re.compile('.*/([0-9]*)-([0-9]*)-([0-9]*)-.*')
1044
        url = cls.get_url_from_archive_element(link)
1045
        num = int(cls.comic_num_re.match(url).groups()[0])
1046
        img = soup.find('div', id='comic').find('img')
1047
        assert all(i['alt'] == i['title'] for i in [img])
1048
        title2 = img['title']
1049
        img_url = img['src']
1050
        year, month, day = [int(s) for s in comic_date_re.match(img_url).groups()]
1051
        return {
1052
            'num': num,
1053
            'title': link.string,
1054
            'title2': title2,
1055
            'img': [img_url],
1056
            'year': year,
1057
            'month': month,
1058
            'day': day,
1059
        }
1060
1061
1062
class GenericBouletCorp(GenericNavigableComic):
1063
    """Generic class to retrieve BouletCorp comics in different languages."""
1064
    # Also on http://bouletcorp.tumblr.com
1065
    get_navi_link = get_link_rel_next
1066
1067
    @classmethod
1068
    def get_first_comic_link(cls):
1069
        """Get link to first comics."""
1070
        return get_soup_at_url(cls.url).find('div', id='centered_nav').find_all('a')[0]
1071
1072
    @classmethod
1073
    def get_comic_info(cls, soup, link):
1074
        """Get information about a particular comics."""
1075
        url = cls.get_url_from_link(link)
1076
        date_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/' % cls.url)
1077
        year, month, day = [int(s) for s in date_re.match(url).groups()]
1078
        imgs = soup.find('div', id='notes').find('div', class_='storycontent').find_all('img')
1079
        texts = '  '.join(t for t in (i.get('title') for i in imgs) if t)
1080
        title = soup.find('title').string
1081
        return {
1082
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs if i.get('src') is not None],
1083
            'title': title,
1084
            'texts': texts,
1085
            'year': year,
1086
            'month': month,
1087
            'day': day,
1088
        }
1089
1090
1091
class BouletCorp(GenericBouletCorp):
1092
    """Class to retrieve BouletCorp comics."""
1093
    name = 'boulet'
1094
    long_name = 'Boulet Corp'
1095
    url = 'http://www.bouletcorp.com'
1096
1097
1098
class BouletCorpEn(GenericBouletCorp):
1099
    """Class to retrieve EnglishBouletCorp comics."""
1100
    name = 'boulet_en'
1101
    long_name = 'Boulet Corp English'
1102
    url = 'http://english.bouletcorp.com'
1103
1104
1105
class AmazingSuperPowers(GenericNavigableComic):
1106
    """Class to retrieve Amazing Super Powers comics."""
1107
    name = 'asp'
1108
    long_name = 'Amazing Super Powers'
1109
    url = 'http://www.amazingsuperpowers.com'
1110
    get_first_comic_link = get_a_navi_navifirst
1111
    get_navi_link = get_a_navi_navinext
1112
1113
    @classmethod
1114
    def get_comic_info(cls, soup, link):
1115
        """Get information about a particular comics."""
1116
        author = soup.find("span", class_="post-author").find("a").string
1117
        date_str = soup.find('span', class_='post-date').string
1118
        day = string_to_date(date_str, "%B %d, %Y")
1119
        imgs = soup.find('div', id='comic').find_all('img')
1120
        title = ' '.join(i['title'] for i in imgs)
1121
        assert all(i['alt'] == i['title'] for i in imgs)
1122
        return {
1123
            'title': title,
1124
            'author': author,
1125
            'img': [img['src'] for img in imgs],
1126
            'day': day.day,
1127
            'month': day.month,
1128
            'year': day.year
1129
        }
1130
1131
1132
class ToonHole(GenericListableComic):
1133
    """Class to retrieve Toon Holes comics."""
1134
    # Also on http://tapastic.com/series/TOONHOLE
1135
    name = 'toonhole'
1136
    long_name = 'Toon Hole'
1137
    url = 'http://www.toonhole.com'
1138
    get_url_from_archive_element = get_href
1139
1140
    @classmethod
1141
    def get_comic_info(cls, soup, link):
1142
        """Get information about a particular comics."""
1143
        title = link.string
1144
        date_str = remove_st_nd_rd_th_from_date(soup.find('div', class_='comicdate').string.strip())
1145
        day = string_to_date(date_str, "%B %d, %Y")
1146
        imgs = soup.find('div', id='comic').find_all('img')
1147
        assert all(i['alt'] == i['title'] == title for i in imgs)
1148
        return {
1149
            'title': title,
1150
            'month': day.month,
1151
            'year': day.year,
1152
            'day': day.day,
1153
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs],
1154
        }
1155
1156
    @classmethod
1157
    def get_archive_elements(cls):
1158
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1159
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1160
1161
1162
class Channelate(GenericNavigableComic):
1163
    """Class to retrieve Channelate comics."""
1164
    name = 'channelate'
1165
    long_name = 'Channelate'
1166
    url = 'http://www.channelate.com'
1167
    get_first_comic_link = get_div_navfirst_a
1168
    get_navi_link = get_link_rel_next
1169
1170
    @classmethod
1171
    def get_comic_info(cls, soup, link):
1172
        """Get information about a particular comics."""
1173
        author = soup.find("span", class_="post-author").find("a").string
1174
        date_str = soup.find('span', class_='post-date').string
1175
        day = string_to_date(date_str, '%Y/%m/%d')
1176
        title = soup.find('meta', property='og:title')['content']
1177
        post = soup.find('div', id='comic')
1178
        imgs = post.find_all('img') if post else []
1179
        extra_url = None
1180
        extra_div = soup.find('div', id='extrapanelbutton')
1181
        if extra_div:
1182
            extra_url = extra_div.find('a')['href']
1183
            extra_soup = get_soup_at_url(extra_url)
1184
            extra_imgs = extra_soup.find_all('img', class_='extrapanelimage')
1185
            imgs.extend(extra_imgs)
1186
        return {
1187
            'url_extra': extra_url,
1188
            'title': title,
1189
            'author': author,
1190
            'month': day.month,
1191
            'year': day.year,
1192
            'day': day.day,
1193
            'img': [i['src'] for i in imgs],
1194
        }
1195
1196
1197
class CyanideAndHappiness(GenericNavigableComic):
1198
    """Class to retrieve Cyanide And Happiness comics."""
1199
    name = 'cyanide'
1200
    long_name = 'Cyanide and Happiness'
1201
    url = 'http://explosm.net'
1202
    get_url_from_link = join_cls_url_to_href
1203
1204
    @classmethod
1205
    def get_first_comic_link(cls):
1206
        """Get link to first comics."""
1207
        return get_soup_at_url(cls.url).find('a', title='Oldest comic')
1208
1209
    @classmethod
1210
    def get_navi_link(cls, last_soup, next_):
1211
        """Get link to next or previous comic."""
1212
        link = last_soup.find('a', class_='next-comic' if next_ else 'previous-comic ')
1213
        return None if link.get('href') is None else link
1214
1215
    @classmethod
1216
    def get_comic_info(cls, soup, link):
1217
        """Get information about a particular comics."""
1218
        url2 = soup.find('meta', property='og:url')['content']
1219
        num = int(url2.split('/')[-2])
1220
        date_str = soup.find('h3').find('a').string
1221
        day = string_to_date(date_str, '%Y.%m.%d')
1222
        author = soup.find('small', class_="author-credit-name").string
1223
        assert author.startswith('by ')
1224
        author = author[3:]
1225
        imgs = soup.find_all('img', id='main-comic')
1226
        return {
1227
            'num': num,
1228
            'author': author,
1229
            'month': day.month,
1230
            'year': day.year,
1231
            'day': day.day,
1232
            'prefix': '%d-' % num,
1233
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i['src'])) for i in imgs]
1234
        }
1235
1236
1237
class MrLovenstein(GenericComic):
1238
    """Class to retrieve Mr Lovenstein comics."""
1239
    # Also on https://tapastic.com/series/MrLovenstein
1240
    name = 'mrlovenstein'
1241
    long_name = 'Mr. Lovenstein'
1242
    url = 'http://www.mrlovenstein.com'
1243
1244
    @classmethod
1245
    def get_next_comic(cls, last_comic):
1246
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1247
        # TODO: more info from http://www.mrlovenstein.com/archive
1248
        comic_num_re = re.compile('^/comic/([0-9]*)$')
1249
        nums = [int(comic_num_re.match(link['href']).groups()[0])
1250
                for link in get_soup_at_url(cls.url).find_all('a', href=comic_num_re)]
1251
        first, last = min(nums), max(nums)
1252
        if last_comic:
1253
            first = last_comic['num'] + 1
1254
        for num in range(first, last + 1):
1255
            url = urljoin_wrapper(cls.url, '/comic/%d' % num)
1256
            soup = get_soup_at_url(url)
1257
            imgs = list(
1258
                reversed(soup.find_all('img', src=re.compile('^/images/comics/'))))
1259
            description = soup.find('meta', attrs={'name': 'description'})['content']
1260
            yield {
1261
                'url': url,
1262
                'num': num,
1263
                'texts': '  '.join(t for t in (i.get('title') for i in imgs) if t),
1264
                'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1265
                'description': description,
1266
            }
1267
1268
1269
class DinosaurComics(GenericListableComic):
1270
    """Class to retrieve Dinosaur Comics comics."""
1271
    name = 'dinosaur'
1272
    long_name = 'Dinosaur Comics'
1273
    url = 'http://www.qwantz.com'
1274
    get_url_from_archive_element = get_href
1275
    comic_link_re = re.compile('^%s/index.php\\?comic=([0-9]*)$' % url)
1276
1277
    @classmethod
1278
    def get_archive_elements(cls):
1279
        archive_url = urljoin_wrapper(cls.url, 'archive.php')
1280
        # first link is random -> skip it
1281
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re)[1:])
1282
1283
    @classmethod
1284
    def get_comic_info(cls, soup, link):
1285
        """Get information about a particular comics."""
1286
        url = cls.get_url_from_archive_element(link)
1287
        num = int(cls.comic_link_re.match(url).groups()[0])
1288
        date_str = link.string
1289
        text = link.next_sibling.string
1290
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1291
        comic_img_re = re.compile('^%s/comics/' % cls.url)
1292
        img = soup.find('img', src=comic_img_re)
1293
        return {
1294
            'month': day.month,
1295
            'year': day.year,
1296
            'day': day.day,
1297
            'img': [img.get('src')],
1298
            'title': img.get('title'),
1299
            'text': text,
1300
            'num': num,
1301
        }
1302
1303
1304
class ButterSafe(GenericListableComic):
1305
    """Class to retrieve Butter Safe comics."""
1306
    name = 'butter'
1307
    long_name = 'ButterSafe'
1308
    url = 'http://buttersafe.com'
1309
    get_url_from_archive_element = get_href
1310
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)
1311
1312
    @classmethod
1313
    def get_archive_elements(cls):
1314
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1315
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
1316
1317
    @classmethod
1318
    def get_comic_info(cls, soup, link):
1319
        """Get information about a particular comics."""
1320
        url = cls.get_url_from_archive_element(link)
1321
        title = link.string
1322
        year, month, day = [int(s) for s in cls.comic_link_re.match(url).groups()]
1323
        img = soup.find('div', id='comic').find('img')
1324
        assert img['alt'] == title
1325
        return {
1326
            'title': title,
1327 View Code Duplication
            'day': day,
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1328
            'month': month,
1329
            'year': year,
1330
            'img': [img['src']],
1331
        }
1332
1333
1334
class CalvinAndHobbes(GenericComic):
1335
    """Class to retrieve Calvin and Hobbes comics."""
1336
    # Also on http://www.gocomics.com/calvinandhobbes/
1337
    name = 'calvin'
1338
    long_name = 'Calvin and Hobbes'
1339
    # This is not through any official webpage but eh...
1340
    url = 'http://marcel-oehler.marcellosendos.ch/comics/ch/'
1341
1342
    @classmethod
1343
    def get_next_comic(cls, last_comic):
1344
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1345
        last_date = get_date_for_comic(
1346
            last_comic) if last_comic else date(1985, 11, 1)
1347
        link_re = re.compile('^([0-9]*)/([0-9]*)/')
1348
        img_re = re.compile('')
1349
        for link in get_soup_at_url(cls.url).find_all('a', href=link_re):
1350
            url = link['href']
1351
            year, month = link_re.match(url).groups()
1352
            if date(int(year), int(month), 1) + timedelta(days=31) >= last_date:
1353
                img_re = re.compile('^%s%s([0-9]*)' % (year, month))
1354
                month_url = urljoin_wrapper(cls.url, url)
1355
                for img in get_soup_at_url(month_url).find_all('img', src=img_re):
1356
                    img_src = img['src']
1357
                    day = int(img_re.match(img_src).groups()[0])
1358
                    comic_date = date(int(year), int(month), day)
1359
                    if comic_date > last_date:
1360
                        yield {
1361
                            'url': month_url,
1362
                            'year': int(year),
1363
                            'month': int(month),
1364
                            'day': int(day),
1365
                            'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)],
1366
                        }
1367
                        last_date = comic_date
1368
1369
1370
class AbstruseGoose(GenericListableComic):
1371
    """Class to retrieve AbstruseGoose Comics."""
1372
    name = 'abstruse'
1373
    long_name = 'Abstruse Goose'
1374
    url = 'http://abstrusegoose.com'
1375
    get_url_from_archive_element = get_href
1376
    comic_url_re = re.compile('^%s/([0-9]*)$' % url)
1377
    comic_img_re = re.compile('^%s/strips/.*' % url)
1378
1379
    @classmethod
1380
    def get_archive_elements(cls):
1381
        archive_url = urljoin_wrapper(cls.url, 'archive')
1382
        return get_soup_at_url(archive_url).find_all('a', href=cls.comic_url_re)
1383
1384
    @classmethod
1385
    def get_comic_info(cls, soup, archive_elt):
1386
        comic_url = cls.get_url_from_archive_element(archive_elt)
1387
        num = int(cls.comic_url_re.match(comic_url).groups()[0])
1388
        return {
1389
            'num': num,
1390
            'title': archive_elt.string,
1391
            'img': [soup.find('img', src=cls.comic_img_re)['src']]
1392
        }
1393 View Code Duplication
1394
1395
class PhDComics(GenericNavigableComic):
1396
    """Class to retrieve PHD Comics."""
1397
    name = 'phd'
1398
    long_name = 'PhD Comics'
1399
    url = 'http://phdcomics.com/comics/archive.php'
1400
    get_url_from_link = join_cls_url_to_href
1401
1402
    @classmethod
1403
    def get_first_comic_link(cls):
1404
        """Get link to first comics."""
1405
        return get_soup_at_url(cls.url).find('img', src='images/first_button.gif').parent
1406
1407
    @classmethod
1408
    def get_navi_link(cls, last_soup, next_):
1409
        """Get link to next or previous comic."""
1410
        img = last_soup.find('img', src='images/next_button.gif' if next_ else 'images/prev_button.gif')
1411
        return None if img is None else img.parent
1412
1413
    @classmethod
1414
    def get_comic_info(cls, soup, link):
1415
        """Get information about a particular comics."""
1416
        date_str = soup.find('font', face='Arial,Helvetica,Geneva,Swiss,SunSans-Regular', color='white').string.strip()
1417
        try:
1418
            day = string_to_date(date_str, '%m/%d/%Y')
1419
        except ValueError:
1420
            print("Invalid date %s" % date_str)
1421
            day = date.today()
1422
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
1423
        return {
1424
            'year': day.year,
1425
            'month': day.month,
1426
            'day': day.day,
1427
            'img': [soup.find('img', id='comic')['src']],
1428
            'title': title,
1429
        }
1430
1431
1432 View Code Duplication
class Octopuns(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1433
    """Class to retrieve Octopuns comics."""
1434
    # Also on http://octopuns.tumblr.com
1435
    name = 'octopuns'
1436
    long_name = 'Octopuns'
1437
    url = 'http://www.octopuns.net'
1438
1439
    @classmethod
1440
    def get_first_comic_link(cls):
1441
        """Get link to first comics."""
1442
        return get_soup_at_url(cls.url).find('img', src=re.compile('.*/First.png')).parent
1443
1444
    @classmethod
1445
    def get_navi_link(cls, last_soup, next_):
1446
        """Get link to next or previous comic."""
1447
        link = last_soup.find('img', src=re.compile('.*/Next.png' if next_ else '.*/Back.png')).parent
1448
        return None if link.get('href') is None else link
1449
1450
    @classmethod
1451
    def get_comic_info(cls, soup, link):
1452
        """Get information about a particular comics."""
1453
        title = soup.find('h3', class_='post-title entry-title').string
1454
        date_str = soup.find('h2', class_='date-header').string
1455
        day = string_to_date(date_str, "%A, %B %d, %Y")
1456
        imgs = soup.find_all('link', rel='image_src')
1457
        return {
1458
            'img': [i['href'] for i in imgs],
1459
            'title': title,
1460
            'day': day.day,
1461
            'month': day.month,
1462
            'year': day.year,
1463
        }
1464
1465
1466
class Quarktees(GenericNavigableComic):
1467
    """Class to retrieve the Quarktees comics."""
1468
    name = 'quarktees'
1469
    long_name = 'Quarktees'
1470
    url = 'http://www.quarktees.com/blogs/news'
1471
    get_url_from_link = join_cls_url_to_href
1472
    get_first_comic_link = simulate_first_link
1473
    first_url = 'http://www.quarktees.com/blogs/news/12486621-coming-soon'
1474
1475
    @classmethod
1476
    def get_navi_link(cls, last_soup, next_):
1477
        """Get link to next or previous comic."""
1478
        return last_soup.find('a', id='article-next' if next_ else 'article-prev')
1479
1480
    @classmethod
1481
    def get_comic_info(cls, soup, link):
1482
        """Get information about a particular comics."""
1483
        title = soup.find('meta', property='og:title')['content']
1484
        article = soup.find('div', class_='single-article')
1485
        imgs = article.find_all('img')
1486
        return {
1487
            'title': title,
1488
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1489
        }
1490
1491
1492
class OverCompensating(GenericNavigableComic):
1493
    """Class to retrieve the Over Compensating comics."""
1494
    name = 'compensating'
1495
    long_name = 'Over Compensating'
1496
    url = 'http://www.overcompensating.com'
1497
    get_url_from_link = join_cls_url_to_href
1498
1499
    @classmethod
1500
    def get_first_comic_link(cls):
1501
        """Get link to first comics."""
1502
        return get_soup_at_url(cls.url).find('a', href=re.compile('comic=1$'))
1503
1504
    @classmethod
1505
    def get_navi_link(cls, last_soup, next_):
1506
        """Get link to next or previous comic."""
1507
        return last_soup.find('a', title='next comic' if next_ else 'go back already')
1508
1509
    @classmethod
1510
    def get_comic_info(cls, soup, link):
1511
        """Get information about a particular comics."""
1512
        img_src_re = re.compile('^/oc/comics/.*')
1513
        comic_num_re = re.compile('.*comic=([0-9]*)$')
1514
        comic_url = cls.get_url_from_link(link)
1515
        num = int(comic_num_re.match(comic_url).groups()[0])
1516
        img = soup.find('img', src=img_src_re)
1517
        return {
1518
            'num': num,
1519
            'img': [urljoin_wrapper(comic_url, img['src'])],
1520
            'title': img.get('title')
1521
        }
1522
1523
1524
class Oglaf(GenericNavigableComic):
1525
    """Class to retrieve Oglaf comics."""
1526
    name = 'oglaf'
1527
    long_name = 'Oglaf [NSFW]'
1528
    url = 'http://oglaf.com'
1529
    get_url_from_link = join_cls_url_to_href
1530
1531
    @classmethod
1532
    def get_first_comic_link(cls):
1533
        """Get link to first comics."""
1534
        return get_soup_at_url(cls.url).find("div", id="st").parent
1535
1536
    @classmethod
1537
    def get_navi_link(cls, last_soup, next_):
1538
        """Get link to next or previous comic."""
1539
        div = last_soup.find("div", id="nx" if next_ else "pvs")
1540
        return div.parent if div else None
1541
1542
    @classmethod
1543
    def get_comic_info(cls, soup, link):
1544
        """Get information about a particular comics."""
1545
        title = soup.find('title').string
1546
        title_imgs = soup.find('div', id='tt').find_all('img')
1547
        assert len(title_imgs) == 1
1548
        strip_imgs = soup.find_all('img', id='strip')
1549
        assert len(strip_imgs) == 1
1550
        imgs = title_imgs + strip_imgs
1551
        desc = ' '.join(i['title'] for i in imgs)
1552
        return {
1553
            'title': title,
1554
            'img': [i['src'] for i in imgs],
1555
            'description': desc,
1556
        }
1557
1558
1559
class ScandinaviaAndTheWorld(GenericNavigableComic):
1560
    """Class to retrieve Scandinavia And The World comics."""
1561
    name = 'satw'
1562
    long_name = 'Scandinavia And The World'
1563
    url = 'http://satwcomic.com'
1564
    get_first_comic_link = simulate_first_link
1565
    first_url = 'http://satwcomic.com/sweden-denmark-and-norway'
1566
1567
    @classmethod
1568
    def get_navi_link(cls, last_soup, next_):
1569
        """Get link to next or previous comic."""
1570
        return last_soup.find('a', accesskey='n' if next_ else 'p')
1571
1572
    @classmethod
1573
    def get_comic_info(cls, soup, link):
1574
        """Get information about a particular comics."""
1575
        title = soup.find('meta', attrs={'name': 'twitter:label1'})['content']
1576
        desc = soup.find('meta', property='og:description')['content']
1577
        imgs = soup.find_all('img', itemprop="image")
1578
        return {
1579
            'title': title,
1580
            'description': desc,
1581
            'img': [i['src'] for i in imgs],
1582
        }
1583
1584
1585
class SomethingOfThatIlk(GenericEmptyComic):  # Does not exist anymore
1586
    """Class to retrieve the Something Of That Ilk comics."""
1587
    name = 'somethingofthatilk'
1588
    long_name = 'Something Of That Ilk'
1589
    url = 'http://www.somethingofthatilk.com'
1590
1591
1592
class InfiniteMonkeyBusiness(GenericNavigableComic):
1593
    """Generic class to retrieve InfiniteMonkeyBusiness comics."""
1594
    name = 'monkey'
1595
    long_name = 'Infinite Monkey Business'
1596
    url = 'http://infinitemonkeybusiness.net'
1597
    get_navi_link = get_a_navi_comicnavnext_navinext
1598
    get_first_comic_link = simulate_first_link
1599
    first_url = 'http://infinitemonkeybusiness.net/comic/pillory/'
1600
1601
    @classmethod
1602
    def get_comic_info(cls, soup, link):
1603
        """Get information about a particular comics."""
1604
        title = soup.find('meta', property='og:title')['content']
1605
        imgs = soup.find('div', id='comic').find_all('img')
1606
        return {
1607
            'title': title,
1608
            'img': [i['src'] for i in imgs],
1609
        }
1610
1611
1612
class Wondermark(GenericListableComic):
1613
    """Class to retrieve the Wondermark comics."""
1614
    name = 'wondermark'
1615
    long_name = 'Wondermark'
1616
    url = 'http://wondermark.com'
1617
    get_url_from_archive_element = get_href
1618
1619
    @classmethod
1620
    def get_archive_elements(cls):
1621
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1622
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1623
1624
    @classmethod
1625
    def get_comic_info(cls, soup, link):
1626
        """Get information about a particular comics."""
1627
        date_str = soup.find('div', class_='postdate').find('em').string
1628
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1629
        div = soup.find('div', id='comic')
1630
        if div:
1631
            img = div.find('img')
1632
            img_src = [img['src']]
1633
            alt = img['alt']
1634
            assert alt == img['title']
1635
            title = soup.find('meta', property='og:title')['content']
1636
        else:
1637
            img_src = []
1638
            alt = ''
1639
            title = ''
1640
        return {
1641
            'month': day.month,
1642
            'year': day.year,
1643
            'day': day.day,
1644
            'img': img_src,
1645
            'title': title,
1646
            'alt': alt,
1647
            'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')),
1648
        }
1649
1650 View Code Duplication
1651
class WarehouseComic(GenericNavigableComic):
1652
    """Class to retrieve Warehouse Comic comics."""
1653
    name = 'warehouse'
1654
    long_name = 'Warehouse Comic'
1655
    url = 'http://warehousecomic.com'
1656
    get_first_comic_link = get_a_navi_navifirst
1657
    get_navi_link = get_link_rel_next
1658
1659
    @classmethod
1660
    def get_comic_info(cls, soup, link):
1661
        """Get information about a particular comics."""
1662
        title = soup.find('h2', class_='post-title').string
1663
        date_str = soup.find('span', class_='post-date').string
1664
        day = string_to_date(date_str, "%B %d, %Y")
1665
        imgs = soup.find('div', id='comic').find_all('img')
1666
        return {
1667
            'img': [i['src'] for i in imgs],
1668
            'title': title,
1669
            'day': day.day,
1670
            'month': day.month,
1671
            'year': day.year,
1672
        }
1673
1674
1675
class JustSayEh(GenericNavigableComic):
1676
    """Class to retrieve Just Say Eh comics."""
1677
    # Also on http//tapastic.com/series/Just-Say-Eh
1678
    name = 'justsayeh'
1679
    long_name = 'Just Say Eh'
1680
    url = 'http://www.justsayeh.com'
1681
    get_first_comic_link = get_a_navi_navifirst
1682
    get_navi_link = get_a_navi_comicnavnext_navinext
1683
1684
    @classmethod
1685
    def get_comic_info(cls, soup, link):
1686
        """Get information about a particular comics."""
1687
        title = soup.find('h2', class_='post-title').string
1688
        imgs = soup.find("div", id="comic").find_all("img")
1689
        assert all(i['alt'] == i['title'] for i in imgs)
1690
        alt = imgs[0]['alt']
1691
        return {
1692
            'img': [i['src'] for i in imgs],
1693
            'title': title,
1694
            'alt': alt,
1695
        }
1696
1697
1698
class MouseBearComedy(GenericNavigableComic):
1699
    """Class to retrieve Mouse Bear Comedy comics."""
1700
    # Also on http://mousebearcomedy.tumblr.com
1701
    name = 'mousebear'
1702
    long_name = 'Mouse Bear Comedy'
1703
    url = 'http://www.mousebearcomedy.com'
1704
    get_first_comic_link = get_a_navi_navifirst
1705
    get_navi_link = get_a_navi_comicnavnext_navinext
1706
1707
    @classmethod
1708
    def get_comic_info(cls, soup, link):
1709
        """Get information about a particular comics."""
1710
        title = soup.find('h2', class_='post-title').string
1711
        author = soup.find("span", class_="post-author").find("a").string
1712
        date_str = soup.find("span", class_="post-date").string
1713
        day = string_to_date(date_str, '%B %d, %Y')
1714
        imgs = soup.find("div", id="comic").find_all("img")
1715
        assert all(i['alt'] == i['title'] == title for i in imgs)
1716
        return {
1717
            'day': day.day,
1718
            'month': day.month,
1719
            'year': day.year,
1720
            'img': [i['src'] for i in imgs],
1721
            'title': title,
1722
            'author': author,
1723
        }
1724
1725 View Code Duplication
1726
class BigFootJustice(GenericNavigableComic):
1727
    """Class to retrieve Big Foot Justice comics."""
1728
    # Also on http://tapastic.com/series/bigfoot-justice
1729
    name = 'bigfoot'
1730
    long_name = 'Big Foot Justice'
1731
    url = 'http://bigfootjustice.com'
1732
    get_first_comic_link = get_a_navi_navifirst
1733
    get_navi_link = get_a_navi_comicnavnext_navinext
1734
1735
    @classmethod
1736
    def get_comic_info(cls, soup, link):
1737
        """Get information about a particular comics."""
1738
        imgs = soup.find('div', id='comic').find_all('img')
1739
        assert all(i['title'] == i['alt'] for i in imgs)
1740
        title = ' '.join(i['title'] for i in imgs)
1741
        return {
1742
            'img': [i['src'] for i in imgs],
1743
            'title': title,
1744
        }
1745
1746
1747 View Code Duplication
class RespawnComic(GenericNavigableComic):
1748
    """Class to retrieve Respawn Comic."""
1749
    # Also on http://respawncomic.tumblr.com
1750
    name = 'respawn'
1751
    long_name = 'Respawn Comic'
1752
    url = 'http://respawncomic.com '
1753
    get_navi_link = get_a_rel_next
1754
    get_first_comic_link = simulate_first_link
1755
    first_url = 'http://respawncomic.com/comic/c0001/'
1756
1757
    @classmethod
1758
    def get_comic_info(cls, soup, link):
1759
        """Get information about a particular comics."""
1760
        title = soup.find('meta', property='og:title')['content']
1761
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1762
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1763
        date_str = date_str[:10]
1764
        day = string_to_date(date_str, "%Y-%m-%d")
1765
        imgs = soup.find_all('meta', property='og:image')
1766
        skip_imgs = {
1767
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1768
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1769
        }
1770
        return {
1771
            'title': title,
1772
            'author': author,
1773
            'day': day.day,
1774
            'month': day.month,
1775
            'year': day.year,
1776
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1777
        }
1778
1779
1780
class SafelyEndangered(GenericNavigableComic):
1781
    """Class to retrieve Safely Endangered comics."""
1782
    # Also on http://tumblr.safelyendangered.com
1783
    name = 'endangered'
1784
    long_name = 'Safely Endangered'
1785
    url = 'http://www.safelyendangered.com'
1786
    get_navi_link = get_link_rel_next
1787
    get_first_comic_link = simulate_first_link
1788
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1789
1790 View Code Duplication
    @classmethod
1791
    def get_comic_info(cls, soup, link):
1792
        """Get information about a particular comics."""
1793
        title = soup.find('h2', class_='post-title').string
1794
        date_str = soup.find('span', class_='post-date').string
1795
        day = string_to_date(date_str, '%B %d, %Y')
1796
        imgs = soup.find('div', id='comic').find_all('img')
1797
        alt = imgs[0]['alt']
1798
        assert all(i['alt'] == i['title'] for i in imgs)
1799
        return {
1800
            'day': day.day,
1801
            'month': day.month,
1802
            'year': day.year,
1803
            'img': [i['src'] for i in imgs],
1804
            'title': title,
1805
            'alt': alt,
1806
        }
1807
1808
1809
class PicturesInBoxes(GenericNavigableComic):
1810
    """Class to retrieve Pictures In Boxes comics."""
1811
    # Also on http://picturesinboxescomic.tumblr.com
1812
    name = 'picturesinboxes'
1813
    long_name = 'Pictures in Boxes'
1814
    url = 'http://www.picturesinboxes.com'
1815
    get_navi_link = get_a_navi_navinext
1816
    get_first_comic_link = simulate_first_link
1817
    first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'
1818
1819
    @classmethod
1820
    def get_comic_info(cls, soup, link):
1821
        """Get information about a particular comics."""
1822
        title = soup.find('h2', class_='post-title').string
1823
        author = soup.find("span", class_="post-author").find("a").string
1824
        date_str = soup.find('span', class_='post-date').string
1825
        day = string_to_date(date_str, '%B %d, %Y')
1826
        imgs = soup.find('div', class_='comicpane').find_all('img')
1827
        assert imgs
1828
        assert all(i['title'] == i['alt'] == title for i in imgs)
1829
        return {
1830
            'day': day.day,
1831
            'month': day.month,
1832
            'year': day.year,
1833
            'img': [i['src'] for i in imgs],
1834
            'title': title,
1835
            'author': author,
1836
        }
1837
1838
1839
class Penmen(GenericEmptyComic):
1840
    """Class to retrieve Penmen comics."""
1841
    name = 'penmen'
1842
    long_name = 'Penmen'
1843
    url = 'http://penmen.com'
1844
1845
1846
class TheDoghouseDiaries(GenericNavigableComic):
1847
    """Class to retrieve The Dog House Diaries comics."""
1848
    name = 'doghouse'
1849
    long_name = 'The Dog House Diaries'
1850
    url = 'http://thedoghousediaries.com'
1851
1852
    @classmethod
1853
    def get_first_comic_link(cls):
1854
        """Get link to first comics."""
1855
        return get_soup_at_url(cls.url).find('a', id='firstlink')
1856
1857
    @classmethod
1858
    def get_navi_link(cls, last_soup, next_):
1859
        """Get link to next or previous comic."""
1860
        return last_soup.find('a', id='nextlink' if next_ else 'previouslink')
1861
1862
    @classmethod
1863
    def get_comic_info(cls, soup, link):
1864
        """Get information about a particular comics."""
1865
        comic_img_re = re.compile('^dhdcomics/.*')
1866
        img = soup.find('img', src=comic_img_re)
1867
        comic_url = cls.get_url_from_link(link)
1868
        return {
1869
            'title': soup.find('h2', id='titleheader').string,
1870
            'title2': soup.find('div', id='subtext').string,
1871
            'alt': img.get('title'),
1872
            'img': [urljoin_wrapper(comic_url, img['src'].strip())],
1873
            'num': int(comic_url.split('/')[-1]),
1874
        }
1875
1876
1877
class InvisibleBread(GenericListableComic):
1878
    """Class to retrieve Invisible Bread comics."""
1879
    # Also on http://www.gocomics.com/invisible-bread
1880
    name = 'invisiblebread'
1881
    long_name = 'Invisible Bread'
1882
    url = 'http://invisiblebread.com'
1883
1884
    @classmethod
1885
    def get_archive_elements(cls):
1886
        archive_url = urljoin_wrapper(cls.url, 'archives/')
1887
        return reversed(get_soup_at_url(archive_url).find_all('td', class_='archive-title'))
1888
1889
    @classmethod
1890
    def get_url_from_archive_element(cls, td):
1891
        return td.find('a')['href']
1892
1893
    @classmethod
1894
    def get_comic_info(cls, soup, td):
1895
        """Get information about a particular comics."""
1896
        url = cls.get_url_from_archive_element(td)
1897
        title = td.find('a').string
1898
        month_and_day = td.previous_sibling.string
1899
        link_re = re.compile('^%s/([0-9]+)/' % cls.url)
1900
        year = link_re.match(url).groups()[0]
1901
        date_str = month_and_day + ' ' + year
1902
        day = string_to_date(date_str, '%b %d %Y')
1903
        imgs = [soup.find('div', id='comic').find('img')]
1904
        assert len(imgs) == 1
1905
        assert all(i['title'] == i['alt'] == title for i in imgs)
1906
        return {
1907
            'month': day.month,
1908
            'year': day.year,
1909
            'day': day.day,
1910
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1911
            'title': title,
1912
        }
1913
1914
1915
class DiscoBleach(GenericEmptyComic):  # Does not work anymore
1916
    """Class to retrieve Disco Bleach Comics."""
1917
    name = 'discobleach'
1918
    long_name = 'Disco Bleach'
1919
    url = 'http://discobleach.com'
1920
1921
1922
class TubeyToons(GenericEmptyComic):  # Does not work anymore
1923
    """Class to retrieve TubeyToons comics."""
1924
    # Also on http://tapastic.com/series/Tubey-Toons
1925
    # Also on http://tubeytoons.tumblr.com
1926
    name = 'tubeytoons'
1927
    long_name = 'Tubey Toons'
1928
    url = 'http://tubeytoons.com'
1929
1930
1931
class CompletelySeriousComics(GenericNavigableComic):
1932
    """Class to retrieve Completely Serious comics."""
1933
    name = 'completelyserious'
1934
    long_name = 'Completely Serious Comics'
1935
    url = 'http://completelyseriouscomics.com'
1936
    get_first_comic_link = get_a_navi_navifirst
1937
    get_navi_link = get_a_navi_navinext
1938
1939
    @classmethod
1940
    def get_comic_info(cls, soup, link):
1941
        """Get information about a particular comics."""
1942
        title = soup.find('h2', class_='post-title').string
1943
        author = soup.find('span', class_='post-author').contents[1].string
1944
        date_str = soup.find('span', class_='post-date').string
1945
        day = string_to_date(date_str, '%B %d, %Y')
1946
        imgs = soup.find('div', class_='comicpane').find_all('img')
1947
        assert imgs
1948
        alt = imgs[0]['title']
1949
        assert all(i['title'] == i['alt'] == alt for i in imgs)
1950
        return {
1951
            'month': day.month,
1952
            'year': day.year,
1953
            'day': day.day,
1954
            'img': [i['src'] for i in imgs],
1955
            'title': title,
1956
            'alt': alt,
1957
            'author': author,
1958
        }
1959
1960
1961
class PoorlyDrawnLines(GenericListableComic):
1962
    """Class to retrieve Poorly Drawn Lines comics."""
1963
    # Also on http://pdlcomics.tumblr.com
1964
    name = 'poorlydrawn'
1965
    long_name = 'Poorly Drawn Lines'
1966
    url = 'http://poorlydrawnlines.com'
1967
    get_url_from_archive_element = get_href
1968
1969
    @classmethod
1970
    def get_comic_info(cls, soup, link):
1971
        """Get information about a particular comics."""
1972
        imgs = soup.find('div', class_='post').find_all('img')
1973
        assert len(imgs) <= 1
1974
        return {
1975
            'img': [i['src'] for i in imgs],
1976
            'title': imgs[0].get('title', "") if imgs else "",
1977
        }
1978
1979
    @classmethod
1980
    def get_archive_elements(cls):
1981
        archive_url = urljoin_wrapper(cls.url, 'archive')
1982
        url_re = re.compile('^%s/comic/.' % cls.url)
1983
        return reversed(get_soup_at_url(archive_url).find_all('a', href=url_re))
1984
1985
1986 View Code Duplication
class LoadingComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1987
    """Class to retrieve Loading Artist comics."""
1988
    name = 'loadingartist'
1989
    long_name = 'Loading Artist'
1990
    url = 'http://www.loadingartist.com/latest'
1991
1992
    @classmethod
1993
    def get_first_comic_link(cls):
1994
        """Get link to first comics."""
1995
        return get_soup_at_url(cls.url).find('a', title="First")
1996
1997
    @classmethod
1998
    def get_navi_link(cls, last_soup, next_):
1999
        """Get link to next or previous comic."""
2000
        return last_soup.find('a', title='Next' if next_ else 'Previous')
2001
2002
    @classmethod
2003
    def get_comic_info(cls, soup, link):
2004
        """Get information about a particular comics."""
2005
        title = soup.find('h1').string
2006
        date_str = soup.find('span', class_='date').string.strip()
2007
        day = string_to_date(date_str, "%B %d, %Y")
2008
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
2009
        return {
2010
            'title': title,
2011
            'img': [i['src'] for i in imgs],
2012
            'month': day.month,
2013
            'year': day.year,
2014
            'day': day.day,
2015
        }
2016
2017
2018
class ChuckleADuck(GenericNavigableComic):
2019
    """Class to retrieve Chuckle-A-Duck comics."""
2020
    name = 'chuckleaduck'
2021
    long_name = 'Chuckle-A-duck'
2022
    url = 'http://chuckleaduck.com'
2023
    get_first_comic_link = get_div_navfirst_a
2024
    get_navi_link = get_link_rel_next
2025
2026
    @classmethod
2027
    def get_comic_info(cls, soup, link):
2028
        """Get information about a particular comics."""
2029
        date_str = soup.find('span', class_='post-date').string
2030
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
2031
        author = soup.find('span', class_='post-author').string
2032
        div = soup.find('div', id='comic')
2033
        imgs = div.find_all('img') if div else []
2034
        title = imgs[0]['title'] if imgs else ""
2035
        assert all(i['title'] == i['alt'] == title for i in imgs)
2036
        return {
2037
            'month': day.month,
2038
            'year': day.year,
2039
            'day': day.day,
2040
            'img': [i['src'] for i in imgs],
2041
            'title': title,
2042
            'author': author,
2043
        }
2044
2045
2046
class DepressedAlien(GenericNavigableComic):
2047
    """Class to retrieve Depressed Alien Comics."""
2048
    name = 'depressedalien'
2049
    long_name = 'Depressed Alien'
2050
    url = 'http://depressedalien.com'
2051
    get_url_from_link = join_cls_url_to_href
2052
2053
    @classmethod
2054
    def get_first_comic_link(cls):
2055
        """Get link to first comics."""
2056
        return get_soup_at_url(cls.url).find('img', attrs={'name': 'beginArrow'}).parent
2057
2058
    @classmethod
2059
    def get_navi_link(cls, last_soup, next_):
2060
        """Get link to next or previous comic."""
2061
        return last_soup.find('img', attrs={'name': 'rightArrow' if next_ else 'leftArrow'}).parent
2062
2063
    @classmethod
2064
    def get_comic_info(cls, soup, link):
2065
        """Get information about a particular comics."""
2066
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2067
        imgs = soup.find_all('meta', property='og:image')
2068
        return {
2069
            'title': title,
2070
            'img': [i['content'] for i in imgs],
2071
        }
2072
2073
2074
class ThingsInSquares(GenericListableComic):
2075
    """Class to retrieve Things In Squares comics."""
2076
    # This can be retrieved in other languages
2077
    # Also on https://tapastic.com/series/Things-in-Squares
2078
    name = 'squares'
2079
    long_name = 'Things in squares'
2080
    url = 'http://www.thingsinsquares.com'
2081
2082
    @classmethod
2083
    def get_comic_info(cls, soup, tr):
2084
        """Get information about a particular comics."""
2085
        _, td2, td3 = tr.find_all('td')
2086
        a = td2.find('a')
2087
        date_str = td3.string
2088
        day = string_to_date(date_str, "%m.%d.%y")
2089
        title = a.string
2090
        title2 = soup.find('meta', property='og:title')['content']
2091
        desc = soup.find('meta', property='og:description')
2092
        description = desc['content'] if desc else ''
2093
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2094
        imgs = soup.find('div', class_='entry-content').find_all('img')
2095
        return {
2096
            'day': day.day,
2097
            'month': day.month,
2098
            'year': day.year,
2099
            'title': title,
2100
            'title2': title2,
2101
            'description': description,
2102
            'tags': tags,
2103
            'img': [i['src'] for i in imgs],
2104
            'alt': ' '.join(i['alt'] for i in imgs),
2105
        }
2106
2107
    @classmethod
2108
    def get_url_from_archive_element(cls, tr):
2109
        _, td2, td3 = tr.find_all('td')
2110
        return td2.find('a')['href']
2111
2112
    @classmethod
2113
    def get_archive_elements(cls):
2114
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
2115
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2116
2117
2118
class HappleTea(GenericNavigableComic):
2119
    """Class to retrieve Happle Tea Comics."""
2120
    name = 'happletea'
2121
    long_name = 'Happle Tea'
2122
    url = 'http://www.happletea.com'
2123
    get_first_comic_link = get_a_navi_navifirst
2124
    get_navi_link = get_link_rel_next
2125
2126
    @classmethod
2127
    def get_comic_info(cls, soup, link):
2128
        """Get information about a particular comics."""
2129
        imgs = soup.find('div', id='comic').find_all('img')
2130
        post = soup.find('div', class_='post-content')
2131
        title = post.find('h2', class_='post-title').string
2132
        author = post.find('a', rel='author').string
2133
        date_str = post.find('span', class_='post-date').string
2134
        day = string_to_date(date_str, "%B %d, %Y")
2135
        assert all(i['alt'] == i['title'] for i in imgs)
2136
        return {
2137
            'title': title,
2138
            'img': [i['src'] for i in imgs],
2139
            'alt': ''.join(i['alt'] for i in imgs),
2140
            'month': day.month,
2141
            'year': day.year,
2142
            'day': day.day,
2143
            'author': author,
2144
        }
2145
2146
2147
class FatAwesomeComics(GenericNavigableComic):
2148
    """Class to retrieve Fat Awesome Comics."""
2149
    # Also on http://fatawesomecomedy.tumblr.com
2150
    name = 'fatawesome'
2151
    long_name = 'Fat Awesome'
2152
    url = 'http://fatawesome.com/comics'
2153
    get_navi_link = get_a_rel_next
2154
    get_first_comic_link = simulate_first_link
2155
    first_url = 'http://fatawesome.com/shortbus/'
2156
2157
    @classmethod
2158
    def get_comic_info(cls, soup, link):
2159
        """Get information about a particular comics."""
2160
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2161
        description = soup.find('meta', attrs={'name': 'description'})['content']
2162
        tags_prop = soup.find('meta', property='article:tag')
2163
        tags = tags_prop['content'] if tags_prop else ""
2164
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2165
        day = string_to_date(date_str, "%Y-%m-%d")
2166
        imgs = soup.find_all('img', attrs={'data-recalc-dims': "1"})
2167
        assert len(imgs) == 1
2168
        return {
2169
            'title': title,
2170
            'description': description,
2171
            'tags': tags,
2172
            'alt': "".join(i['alt'] for i in imgs),
2173
            'img': [i['src'].rsplit('?', 1)[0] for i in imgs],
2174
            'month': day.month,
2175
            'year': day.year,
2176
            'day': day.day,
2177
        }
2178
2179
2180
class AnythingComic(GenericListableComic):
2181
    """Class to retrieve Anything Comics."""
2182
    # Also on http://tapastic.com/series/anything
2183
    name = 'anythingcomic'
2184
    long_name = 'Anything Comic'
2185
    url = 'http://www.anythingcomic.com'
2186
2187
    @classmethod
2188
    def get_archive_elements(cls):
2189
        archive_url = urljoin_wrapper(cls.url, 'archive/')
2190
        # The first 2 <tr>'s do not correspond to comics
2191
        return get_soup_at_url(archive_url).find('table', id='chapter_table').find_all('tr')[2:]
2192
2193
    @classmethod
2194
    def get_url_from_archive_element(cls, tr):
2195
        """Get url corresponding to an archive element."""
2196
        td_num, td_comic, td_date, _ = tr.find_all('td')
2197
        link = td_comic.find('a')
2198
        return urljoin_wrapper(cls.url, link['href'])
2199
2200
    @classmethod
2201
    def get_comic_info(cls, soup, tr):
2202
        """Get information about a particular comics."""
2203
        td_num, td_comic, td_date, _ = tr.find_all('td')
2204
        num = int(td_num.string)
2205
        link = td_comic.find('a')
2206
        title = link.string
2207
        imgs = soup.find_all('img', id='comic_image')
2208
        day = string_to_date(td_date.string, '%d %b %Y %I:%M %p')
2209
        assert len(imgs) == 1
2210
        assert all(i.get('alt') == i.get('title') for i in imgs)
2211
        return {
2212
            'num': num,
2213
            'title': title,
2214
            'alt': imgs[0].get('alt', ''),
2215
            'img': [i['src'] for i in imgs],
2216
            'month': day.month,
2217
            'year': day.year,
2218
            'day': day.day,
2219
        }
2220
2221
2222
class LonnieMillsap(GenericNavigableComic):
2223
    """Class to retrieve Lonnie Millsap's comics."""
2224
    name = 'millsap'
2225
    long_name = 'Lonnie Millsap'
2226
    url = 'http://www.lonniemillsap.com'
2227
    get_navi_link = get_link_rel_next
2228
    get_first_comic_link = simulate_first_link
2229
    first_url = 'http://www.lonniemillsap.com/?p=42'
2230
2231
    @classmethod
2232
    def get_comic_info(cls, soup, link):
2233
        """Get information about a particular comics."""
2234
        title = soup.find('h2', class_='post-title').string
2235
        post = soup.find('div', class_='post-content')
2236
        author = post.find("span", class_="post-author").find("a").string
2237
        date_str = post.find("span", class_="post-date").string
2238
        day = string_to_date(date_str, "%B %d, %Y")
2239
        imgs = post.find("div", class_="entry").find_all("img")
2240
        return {
2241
            'title': title,
2242
            'author': author,
2243
            'img': [i['src'] for i in imgs],
2244
            'month': day.month,
2245
            'year': day.year,
2246
            'day': day.day,
2247
        }
2248
2249
2250 View Code Duplication
class LinsEditions(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2251
    """Class to retrieve L.I.N.S. Editions comics."""
2252
    # Also on http://linscomics.tumblr.com
2253
    name = 'lins'
2254
    long_name = 'L.I.N.S. Editions'
2255
    url = 'https://linsedition.com'
2256
    get_navi_link = get_link_rel_next
2257
    get_first_comic_link = simulate_first_link
2258
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2259
2260
    @classmethod
2261
    def get_comic_info(cls, soup, link):
2262
        """Get information about a particular comics."""
2263
        title = soup.find('meta', property='og:title')['content']
2264
        imgs = soup.find_all('meta', property='og:image')
2265
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2266
        day = string_to_date(date_str, "%Y-%m-%d")
2267
        return {
2268
            'title': title,
2269
            'img': [i['content'] for i in imgs],
2270
            'month': day.month,
2271
            'year': day.year,
2272
            'day': day.day,
2273
        }
2274
2275
2276
class ThorsThundershack(GenericNavigableComic):
2277
    """Class to retrieve Thor's Thundershack comics."""
2278
    # Also on http://tapastic.com/series/Thors-Thundershac
2279
    name = 'thor'
2280
    long_name = 'Thor\'s Thundershack'
2281
    url = 'http://www.thorsthundershack.com'
2282
    get_url_from_link = join_cls_url_to_href
2283
2284
    @classmethod
2285
    def get_first_comic_link(cls):
2286
        """Get link to first comics."""
2287
        return get_soup_at_url(cls.url).find('a', class_='first navlink')
2288
2289
    @classmethod
2290
    def get_navi_link(cls, last_soup, next_):
2291
        """Get link to next or previous comic."""
2292
        for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
2293
            if link['href'] != '/comic':
2294
                return link
2295
        return None
2296
2297
    @classmethod
2298
    def get_comic_info(cls, soup, link):
2299
        """Get information about a particular comics."""
2300
        title = soup.find('meta', attrs={'name': 'description'})["content"]
2301
        description = soup.find('div', itemprop='articleBody').text
2302
        author = soup.find('span', itemprop='author copyrightHolder').string
2303
        imgs = soup.find_all('img', itemprop='image')
2304
        assert all(i['title'] == i['alt'] for i in imgs)
2305
        alt = imgs[0]['alt'] if imgs else ""
2306
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
2307
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2308
        return {
2309
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2310
            'month': day.month,
2311
            'year': day.year,
2312
            'day': day.day,
2313
            'author': author,
2314
            'title': title,
2315
            'alt': alt,
2316
            'description': description,
2317
        }
2318
2319
2320
class GerbilWithAJetpack(GenericNavigableComic):
2321
    """Class to retrieve GerbilWithAJetpack comics."""
2322
    name = 'gerbil'
2323
    long_name = 'Gerbil With A Jetpack'
2324
    url = 'http://gerbilwithajetpack.com'
2325
    get_first_comic_link = get_a_navi_navifirst
2326
    get_navi_link = get_a_rel_next
2327
2328
    @classmethod
2329
    def get_comic_info(cls, soup, link):
2330
        """Get information about a particular comics."""
2331
        title = soup.find('h2', class_='post-title').string
2332
        author = soup.find("span", class_="post-author").find("a").string
2333
        date_str = soup.find("span", class_="post-date").string
2334
        day = string_to_date(date_str, "%B %d, %Y")
2335
        imgs = soup.find("div", id="comic").find_all("img")
2336
        alt = imgs[0]['alt']
2337
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2338
        return {
2339
            'img': [i['src'] for i in imgs],
2340
            'title': title,
2341
            'alt': alt,
2342
            'author': author,
2343
            'day': day.day,
2344
            'month': day.month,
2345
            'year': day.year
2346
        }
2347
2348
2349
class EveryDayBlues(GenericNavigableComic):
2350
    """Class to retrieve EveryDayBlues Comics."""
2351
    name = "blues"
2352
    long_name = "Every Day Blues"
2353
    url = "http://everydayblues.net"
2354
    get_first_comic_link = get_a_navi_navifirst
2355
    get_navi_link = get_link_rel_next
2356
2357
    @classmethod
2358
    def get_comic_info(cls, soup, link):
2359
        """Get information about a particular comics."""
2360
        title = soup.find("h2", class_="post-title").string
2361
        author = soup.find("span", class_="post-author").find("a").string
2362
        date_str = soup.find("span", class_="post-date").string
2363
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2364
        imgs = soup.find("div", id="comic").find_all("img")
2365
        assert all(i['alt'] == i['title'] == title for i in imgs)
2366
        assert len(imgs) <= 1
2367
        return {
2368
            'img': [i['src'] for i in imgs],
2369
            'title': title,
2370
            'author': author,
2371
            'day': day.day,
2372
            'month': day.month,
2373
            'year': day.year
2374
        }
2375
2376
2377
class BiterComics(GenericNavigableComic):
2378
    """Class to retrieve Biter Comics."""
2379
    name = "biter"
2380
    long_name = "Biter Comics"
2381
    url = "http://www.bitercomics.com"
2382
    get_first_comic_link = get_a_navi_navifirst
2383
    get_navi_link = get_link_rel_next
2384
2385
    @classmethod
2386
    def get_comic_info(cls, soup, link):
2387
        """Get information about a particular comics."""
2388
        title = soup.find("h1", class_="entry-title").string
2389
        author = soup.find("span", class_="author vcard").find("a").string
2390
        date_str = soup.find("span", class_="entry-date").string
2391
        day = string_to_date(date_str, "%B %d, %Y")
2392
        imgs = soup.find("div", id="comic").find_all("img")
2393
        assert all(i['alt'] == i['title'] for i in imgs)
2394
        assert len(imgs) == 1
2395
        alt = imgs[0]['alt']
2396
        return {
2397
            'img': [i['src'] for i in imgs],
2398
            'title': title,
2399
            'alt': alt,
2400
            'author': author,
2401
            'day': day.day,
2402
            'month': day.month,
2403
            'year': day.year
2404
        }
2405
2406
2407 View Code Duplication
class TheAwkwardYeti(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2408
    """Class to retrieve The Awkward Yeti comics."""
2409
    # Also on http://www.gocomics.com/the-awkward-yeti
2410
    # Also on http://larstheyeti.tumblr.com
2411
    # Also on https://tapastic.com/series/TheAwkwardYeti
2412
    name = 'yeti'
2413
    long_name = 'The Awkward Yeti'
2414
    url = 'http://theawkwardyeti.com'
2415
    get_first_comic_link = get_a_navi_navifirst
2416
    get_navi_link = get_link_rel_next
2417
2418
    @classmethod
2419
    def get_comic_info(cls, soup, link):
2420
        """Get information about a particular comics."""
2421
        title = soup.find('h2', class_='post-title').string
2422
        date_str = soup.find("span", class_="post-date").string
2423
        day = string_to_date(date_str, "%B %d, %Y")
2424
        imgs = soup.find("div", id="comic").find_all("img")
2425
        assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
2426
        return {
2427
            'img': [i['src'] for i in imgs],
2428
            'title': title,
2429
            'day': day.day,
2430
            'month': day.month,
2431
            'year': day.year
2432
        }
2433
2434
2435
class PleasantThoughts(GenericNavigableComic):
2436
    """Class to retrieve Pleasant Thoughts comics."""
2437
    name = 'pleasant'
2438
    long_name = 'Pleasant Thoughts'
2439
    url = 'http://pleasant-thoughts.com'
2440
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2441
    get_navi_link = get_link_rel_next
2442
2443
    @classmethod
2444
    def get_comic_info(cls, soup, link):
2445
        """Get information about a particular comics."""
2446
        post = soup.find('div', class_='post-content')
2447
        title = post.find('h2', class_='post-title').string
2448
        imgs = post.find("div", class_="entry").find_all("img")
2449
        return {
2450
            'title': title,
2451
            'img': [i['src'] for i in imgs],
2452
        }
2453
2454
2455
class MisterAndMe(GenericNavigableComic):
2456
    """Class to retrieve Mister & Me Comics."""
2457
    # Also on http://www.gocomics.com/mister-and-me
2458
    # Also on https://tapastic.com/series/Mister-and-Me
2459
    name = 'mister'
2460
    long_name = 'Mister & Me'
2461
    url = 'http://www.mister-and-me.com'
2462
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2463
    get_navi_link = get_link_rel_next
2464
2465
    @classmethod
2466
    def get_comic_info(cls, soup, link):
2467
        """Get information about a particular comics."""
2468
        title = soup.find('h2', class_='post-title').string
2469
        author = soup.find("span", class_="post-author").find("a").string
2470
        date_str = soup.find("span", class_="post-date").string
2471
        day = string_to_date(date_str, "%B %d, %Y")
2472
        imgs = soup.find("div", id="comic").find_all("img")
2473
        assert all(i['alt'] == i['title'] for i in imgs)
2474
        assert len(imgs) <= 1
2475
        alt = imgs[0]['alt'] if imgs else ""
2476
        return {
2477
            'img': [i['src'] for i in imgs],
2478
            'title': title,
2479
            'alt': alt,
2480
            'author': author,
2481
            'day': day.day,
2482
            'month': day.month,
2483
            'year': day.year
2484
        }
2485
2486
2487
class LastPlaceComics(GenericNavigableComic):
2488
    """Class to retrieve Last Place Comics."""
2489
    name = 'lastplace'
2490
    long_name = 'Last Place Comics'
2491
    url = "http://lastplacecomics.com"
2492
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2493
    get_navi_link = get_link_rel_next
2494
2495
    @classmethod
2496
    def get_comic_info(cls, soup, link):
2497
        """Get information about a particular comics."""
2498
        title = soup.find('h2', class_='post-title').string
2499
        author = soup.find("span", class_="post-author").find("a").string
2500
        date_str = soup.find("span", class_="post-date").string
2501
        day = string_to_date(date_str, "%B %d, %Y")
2502
        imgs = soup.find("div", id="comic").find_all("img")
2503
        assert all(i['alt'] == i['title'] for i in imgs)
2504
        assert len(imgs) <= 1
2505
        alt = imgs[0]['alt'] if imgs else ""
2506
        return {
2507
            'img': [i['src'] for i in imgs],
2508
            'title': title,
2509
            'alt': alt,
2510
            'author': author,
2511
            'day': day.day,
2512
            'month': day.month,
2513
            'year': day.year
2514
        }
2515
2516
2517
class TalesOfAbsurdity(GenericNavigableComic):
2518
    """Class to retrieve Tales Of Absurdity comics."""
2519
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
2520
    # Also on http://talesofabsurdity.tumblr.com
2521
    name = 'absurdity'
2522
    long_name = 'Tales of Absurdity'
2523
    url = 'http://talesofabsurdity.com'
2524
    get_first_comic_link = get_a_navi_navifirst
2525
    get_navi_link = get_a_navi_comicnavnext_navinext
2526
2527
    @classmethod
2528
    def get_comic_info(cls, soup, link):
2529
        """Get information about a particular comics."""
2530
        title = soup.find('h2', class_='post-title').string
2531
        author = soup.find("span", class_="post-author").find("a").string
2532
        date_str = soup.find("span", class_="post-date").string
2533
        day = string_to_date(date_str, "%B %d, %Y")
2534
        imgs = soup.find("div", id="comic").find_all("img")
2535
        assert all(i['alt'] == i['title'] for i in imgs)
2536
        alt = imgs[0]['alt'] if imgs else ""
2537
        return {
2538
            'img': [i['src'] for i in imgs],
2539
            'title': title,
2540
            'alt': alt,
2541
            'author': author,
2542
            'day': day.day,
2543
            'month': day.month,
2544
            'year': day.year
2545
        }
2546
2547
2548
class EndlessOrigami(GenericNavigableComic):
2549
    """Class to retrieve Endless Origami Comics."""
2550
    name = "origami"
2551
    long_name = "Endless Origami"
2552
    url = "http://endlessorigami.com"
2553
    get_first_comic_link = get_a_navi_navifirst
2554
    get_navi_link = get_link_rel_next
2555
2556
    @classmethod
2557
    def get_comic_info(cls, soup, link):
2558
        """Get information about a particular comics."""
2559
        title = soup.find('h2', class_='post-title').string
2560
        author = soup.find("span", class_="post-author").find("a").string
2561
        date_str = soup.find("span", class_="post-date").string
2562
        day = string_to_date(date_str, "%B %d, %Y")
2563
        imgs = soup.find("div", id="comic").find_all("img")
2564
        assert all(i['alt'] == i['title'] for i in imgs)
2565
        alt = imgs[0]['alt'] if imgs else ""
2566
        return {
2567
            'img': [i['src'] for i in imgs],
2568
            'title': title,
2569
            'alt': alt,
2570
            'author': author,
2571
            'day': day.day,
2572
            'month': day.month,
2573
            'year': day.year
2574
        }
2575
2576
2577
class PlanC(GenericNavigableComic):
2578
    """Class to retrieve Plan C comics."""
2579
    name = 'planc'
2580
    long_name = 'Plan C'
2581
    url = 'http://www.plancomic.com'
2582
    get_first_comic_link = get_a_navi_navifirst
2583
    get_navi_link = get_a_navi_comicnavnext_navinext
2584
2585
    @classmethod
2586
    def get_comic_info(cls, soup, link):
2587
        """Get information about a particular comics."""
2588
        title = soup.find('h2', class_='post-title').string
2589
        date_str = soup.find("span", class_="post-date").string
2590
        day = string_to_date(date_str, "%B %d, %Y")
2591
        imgs = soup.find('div', id='comic').find_all('img')
2592
        return {
2593
            'title': title,
2594
            'img': [i['src'] for i in imgs],
2595
            'month': day.month,
2596
            'year': day.year,
2597
            'day': day.day,
2598
        }
2599
2600 View Code Duplication
2601
class BuniComic(GenericNavigableComic):
2602
    """Class to retrieve Buni Comics."""
2603
    name = 'buni'
2604
    long_name = 'BuniComics'
2605
    url = 'http://www.bunicomic.com'
2606
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2607
    get_navi_link = get_link_rel_next
2608
2609
    @classmethod
2610
    def get_comic_info(cls, soup, link):
2611
        """Get information about a particular comics."""
2612
        imgs = soup.find('div', id='comic').find_all('img')
2613
        assert all(i['alt'] == i['title'] for i in imgs)
2614
        assert len(imgs) == 1
2615
        return {
2616
            'img': [i['src'] for i in imgs],
2617
            'title': imgs[0]['title'],
2618
        }
2619
2620
2621
class GenericCommitStrip(GenericNavigableComic):
2622
    """Generic class to retrieve Commit Strips in different languages."""
2623
    get_navi_link = get_a_rel_next
2624
    get_first_comic_link = simulate_first_link
2625
    first_url = NotImplemented
2626
2627 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2628
    def get_comic_info(cls, soup, link):
2629
        """Get information about a particular comics."""
2630
        desc = soup.find('meta', property='og:description')['content']
2631
        title = soup.find('meta', property='og:title')['content']
2632
        imgs = soup.find('div', class_='entry-content').find_all('img')
2633
        title2 = ' '.join(i.get('title', '') for i in imgs)
2634
        return {
2635
            'title': title,
2636
            'title2': title2,
2637
            'description': desc,
2638
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2639
        }
2640
2641
2642
class CommitStripFr(GenericCommitStrip):
2643
    """Class to retrieve Commit Strips in French."""
2644
    name = 'commit_fr'
2645
    long_name = 'Commit Strip (Fr)'
2646
    url = 'http://www.commitstrip.com/fr'
2647
    first_url = 'http://www.commitstrip.com/fr/2012/02/22/interview/'
2648
2649
2650
class CommitStripEn(GenericCommitStrip):
2651
    """Class to retrieve Commit Strips in English."""
2652
    name = 'commit_en'
2653
    long_name = 'Commit Strip (En)'
2654
    url = 'http://www.commitstrip.com/en'
2655
    first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'
2656
2657
2658
class GenericBoumerie(GenericNavigableComic):
2659
    """Generic class to retrieve Boumeries comics in different languages."""
2660
    get_first_comic_link = get_a_navi_navifirst
2661
    get_navi_link = get_link_rel_next
2662
    date_format = NotImplemented
2663
    lang = NotImplemented
2664
2665
    @classmethod
2666
    def get_comic_info(cls, soup, link):
2667
        """Get information about a particular comics."""
2668
        title = soup.find('h2', class_='post-title').string
2669
        short_url = soup.find('link', rel='shortlink')['href']
2670
        author = soup.find("span", class_="post-author").find("a").string
2671
        date_str = soup.find('span', class_='post-date').string
2672
        day = string_to_date(date_str, cls.date_format, cls.lang)
2673
        imgs = soup.find('div', id='comic').find_all('img')
2674
        assert all(i['alt'] == i['title'] for i in imgs)
2675
        return {
2676
            'short_url': short_url,
2677
            'img': [i['src'] for i in imgs],
2678
            'title': title,
2679
            'author': author,
2680
            'month': day.month,
2681
            'year': day.year,
2682
            'day': day.day,
2683
        }
2684
2685
2686
class BoumerieEn(GenericBoumerie):
2687
    """Class to retrieve Boumeries comics in English."""
2688
    name = 'boumeries_en'
2689
    long_name = 'Boumeries (En)'
2690
    url = 'http://comics.boumerie.com'
2691
    date_format = "%B %d, %Y"
2692
    lang = 'en_GB.UTF-8'
2693
2694
2695
class BoumerieFr(GenericBoumerie):
2696
    """Class to retrieve Boumeries comics in French."""
2697
    name = 'boumeries_fr'
2698
    long_name = 'Boumeries (Fr)'
2699
    url = 'http://bd.boumerie.com'
2700
    date_format = "%A, %d %B %Y"
2701
    lang = "fr_FR.utf8"
2702
2703
2704 View Code Duplication
class UnearthedComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2705
    """Class to retrieve Unearthed comics."""
2706
    # Also on http://tapastic.com/series/UnearthedComics
2707
    # Also on http://unearthedcomics.tumblr.com
2708
    name = 'unearthed'
2709
    long_name = 'Unearthed Comics'
2710
    url = 'http://unearthedcomics.com'
2711
    get_navi_link = get_link_rel_next
2712
    get_first_comic_link = simulate_first_link
2713
    first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'
2714
2715
    @classmethod
2716
    def get_comic_info(cls, soup, link):
2717
        """Get information about a particular comics."""
2718
        short_url = soup.find('link', rel='shortlink')['href']
2719
        title_elt = soup.find('h1') or soup.find('h2')
2720
        title = title_elt.string if title_elt else ""
2721
        desc = soup.find('meta', property='og:description')
2722
        date_str = soup.find('time', class_='published updated hidden')['datetime']
2723
        day = string_to_date(date_str, "%Y-%m-%d")
2724
        post = soup.find('div', class_="entry content entry-content type-portfolio")
2725
        imgs = post.find_all('img')
2726
        return {
2727
            'title': title,
2728
            'description': desc,
2729
            'url2': short_url,
2730
            'img': [i['src'] for i in imgs],
2731
            'month': day.month,
2732
            'year': day.year,
2733
            'day': day.day,
2734
        }
2735
2736
2737
class Optipess(GenericNavigableComic):
2738
    """Class to retrieve Optipess comics."""
2739
    name = 'optipess'
2740
    long_name = 'Optipess'
2741
    url = 'http://www.optipess.com'
2742
    get_first_comic_link = get_a_navi_navifirst
2743
    get_navi_link = get_link_rel_next
2744
2745
    @classmethod
2746
    def get_comic_info(cls, soup, link):
2747
        """Get information about a particular comics."""
2748
        title = soup.find('h2', class_='post-title').string
2749
        author = soup.find("span", class_="post-author").find("a").string
2750
        comic = soup.find('div', id='comic')
2751
        imgs = comic.find_all('img') if comic else []
2752
        alt = imgs[0]['title'] if imgs else ""
2753
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2754
        date_str = soup.find('span', class_='post-date').string
2755
        day = string_to_date(date_str, "%B %d, %Y")
2756
        return {
2757
            'title': title,
2758
            'alt': alt,
2759
            'author': author,
2760
            'img': [i['src'] for i in imgs],
2761
            'month': day.month,
2762
            'year': day.year,
2763
            'day': day.day,
2764
        }
2765
2766
2767
class PainTrainComic(GenericNavigableComic):
2768
    """Class to retrieve Pain Train Comics."""
2769
    name = 'paintrain'
2770
    long_name = 'Pain Train Comics'
2771
    url = 'http://paintraincomic.com'
2772
    get_first_comic_link = get_a_navi_navifirst
2773
    get_navi_link = get_link_rel_next
2774
2775
    @classmethod
2776
    def get_comic_info(cls, soup, link):
2777
        """Get information about a particular comics."""
2778
        title = soup.find('h2', class_='post-title').string
2779
        short_url = soup.find('link', rel='shortlink')['href']
2780
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2781
        num = int(short_url_re.match(short_url).groups()[0])
2782
        imgs = soup.find('div', id='comic').find_all('img')
2783
        alt = imgs[0]['title']
2784
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2785
        date_str = soup.find('span', class_='post-date').string
2786
        day = string_to_date(date_str, "%d/%m/%Y")
2787
        return {
2788
            'short_url': short_url,
2789
            'num': num,
2790
            'img': [i['src'] for i in imgs],
2791
            'month': day.month,
2792
            'year': day.year,
2793
            'day': day.day,
2794
            'alt': alt,
2795
            'title': title,
2796
        }
2797
2798
2799
class MoonBeard(GenericNavigableComic):
2800
    """Class to retrieve MoonBeard comics."""
2801
    # Also on http://blog.squiresjam.es/moonbeard
2802
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
2803
    name = 'moonbeard'
2804
    long_name = 'Moon Beard'
2805
    url = 'http://moonbeard.com'
2806
    get_first_comic_link = get_a_navi_navifirst
2807
    get_navi_link = get_a_navi_navinext
2808
2809
    @classmethod
2810
    def get_comic_info(cls, soup, link):
2811
        """Get information about a particular comics."""
2812
        title = soup.find('h2', class_='post-title').string
2813
        short_url = soup.find('link', rel='shortlink')['href']
2814
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2815
        num = int(short_url_re.match(short_url).groups()[0])
2816
        imgs = soup.find('div', id='comic').find_all('img')
2817
        alt = imgs[0]['title']
2818
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2819
        date_str = soup.find('span', class_='post-date').string
2820
        day = string_to_date(date_str, "%B %d, %Y")
2821
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2822
        author = soup.find('span', class_='post-author').string
2823
        return {
2824
            'short_url': short_url,
2825
            'num': num,
2826
            'img': [i['src'] for i in imgs],
2827
            'month': day.month,
2828
            'year': day.year,
2829
            'day': day.day,
2830
            'title': title,
2831
            'tags': tags,
2832
            'alt': alt,
2833
            'author': author,
2834
        }
2835
2836
2837
class AHamADay(GenericNavigableComic):
2838
    """Class to retrieve class A Ham A Day comics."""
2839
    name = 'ham'
2840
    long_name = 'A Ham A Day'
2841
    url = 'http://www.ahammaday.com'
2842
    get_url_from_link = join_cls_url_to_href
2843
    get_first_comic_link = simulate_first_link
2844
    first_url = 'http://www.ahammaday.com/today/3/6/french'
2845
2846
    @classmethod
2847
    def get_navi_link(cls, last_soup, next_):
2848
        """Get link to next or previous comic."""
2849
        # prev is next / next is prev
2850
        return last_soup.find('li', class_='previous' if next_ else 'next').find('a')
2851
2852 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2853
    def get_comic_info(cls, soup, link):
2854
        """Get information about a particular comics."""
2855
        date_str = soup.find('time', class_='published')['datetime']
2856
        day = string_to_date(date_str, "%Y-%m-%d")
2857
        author = soup.find('span', class_='blog-author').find('a').string
2858
        title = soup.find('meta', property='og:title')['content']
2859
        imgs = soup.find_all('meta', itemprop='image')
2860
        return {
2861
            'img': [i['content'] for i in imgs],
2862
            'title': title,
2863
            'author': author,
2864
            'day': day.day,
2865
            'month': day.month,
2866
            'year': day.year,
2867
        }
2868
2869 View Code Duplication
2870
class LittleLifeLines(GenericNavigableComic):
2871
    """Class to retrieve Little Life Lines comics."""
2872
    name = 'life'
2873
    long_name = 'Little Life Lines'
2874
    url = 'http://www.littlelifelines.com'
2875
    get_url_from_link = join_cls_url_to_href
2876
    get_first_comic_link = simulate_first_link
2877
    first_url = 'http://www.littlelifelines.com/comics/well-done'
2878
2879
    @classmethod
2880
    def get_navi_link(cls, last_soup, next_):
2881
        """Get link to next or previous comic."""
2882
        # prev is next / next is prev
2883
        li = last_soup.find('li', class_='prev' if next_ else 'next')
2884
        return li.find('a') if li else None
2885
2886
    @classmethod
2887
    def get_comic_info(cls, soup, link):
2888
        """Get information about a particular comics."""
2889
        title = soup.find('meta', property='og:title')['content']
2890
        desc = soup.find('meta', property='og:description')['content']
2891
        date_str = soup.find('time', class_='published')['datetime']
2892
        day = string_to_date(date_str, "%Y-%m-%d")
2893
        author = soup.find('a', rel='author').string
2894
        div_content = soup.find('div', class_="body entry-content")
2895
        imgs = div_content.find_all('img')
2896
        imgs = [i for i in imgs if i.get('src') is not None]
2897
        alt = imgs[0]['alt']
2898
        return {
2899
            'title': title,
2900
            'alt': alt,
2901
            'description': desc,
2902
            'author': author,
2903
            'day': day.day,
2904
            'month': day.month,
2905
            'year': day.year,
2906
            'img': [i['src'] for i in imgs],
2907
        }
2908
2909
2910
class GenericWordPressInkblot(GenericNavigableComic):
2911
    """Generic class to retrieve comics using WordPress with Inkblot."""
2912
    get_navi_link = get_link_rel_next
2913
2914
    @classmethod
2915
    def get_first_comic_link(cls):
2916
        """Get link to first comics."""
2917
        return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')
2918
2919 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2920
    def get_comic_info(cls, soup, link):
2921
        """Get information about a particular comics."""
2922
        title = soup.find('meta', property='og:title')['content']
2923
        imgs = soup.find('div', class_='webcomic-image').find_all('img')
2924
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2925
        day = string_to_date(date_str, "%Y-%m-%d")
2926
        return {
2927
            'title': title,
2928
            'day': day.day,
2929
            'month': day.month,
2930
            'year': day.year,
2931
            'img': [i['src'] for i in imgs],
2932
        }
2933
2934
2935
class EverythingsStupid(GenericWordPressInkblot):
2936
    """Class to retrieve Everything's stupid Comics."""
2937
    # Also on http://tapastic.com/series/EverythingsStupid
2938
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
2939
    # Also on http://everythingsstupidcomics.tumblr.com
2940
    name = 'stupid'
2941
    long_name = "Everything's Stupid"
2942
    url = 'http://everythingsstupid.net'
2943
2944
2945
class TheIsmComics(GenericWordPressInkblot):
2946
    """Class to retrieve The Ism Comics."""
2947
    # Also on https://tapastic.com/series/TheIsm (?)
2948
    name = 'theism'
2949
    long_name = "The Ism"
2950
    url = 'http://www.theism-comics.com'
2951
2952
2953
class WoodenPlankStudios(GenericWordPressInkblot):
2954
    """Class to retrieve Wooden Plank Studios comics."""
2955
    name = 'woodenplank'
2956
    long_name = 'Wooden Plank Studios'
2957
    url = 'http://woodenplankstudios.com'
2958
2959
2960
class ElectricBunnyComic(GenericNavigableComic):
2961
    """Class to retrieve Electric Bunny Comics."""
2962
    # Also on http://electricbunnycomics.tumblr.com
2963
    name = 'bunny'
2964
    long_name = 'Electric Bunny Comic'
2965
    url = 'http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell'
2966
    get_url_from_link = join_cls_url_to_href
2967
2968
    @classmethod
2969
    def get_first_comic_link(cls):
2970
        """Get link to first comics."""
2971
        return get_soup_at_url(cls.url).find('img', alt='First').parent
2972
2973
    @classmethod
2974
    def get_navi_link(cls, last_soup, next_):
2975
        """Get link to next or previous comic."""
2976
        img = last_soup.find('img', alt='Next' if next_ else 'Back')
2977
        return img.parent if img else None
2978
2979
    @classmethod
2980
    def get_comic_info(cls, soup, link):
2981
        """Get information about a particular comics."""
2982
        title = soup.find('meta', property='og:title')['content']
2983
        imgs = soup.find_all('meta', property='og:image')
2984
        return {
2985
            'title': title,
2986
            'img': [i['content'] for i in imgs],
2987
        }
2988
2989
2990
class SheldonComics(GenericNavigableComic):
2991
    """Class to retrieve Sheldon comics."""
2992
    # Also on http://www.gocomics.com/sheldon
2993
    name = 'sheldon'
2994
    long_name = 'Sheldon Comics'
2995
    url = 'http://www.sheldoncomics.com'
2996
2997
    @classmethod
2998
    def get_first_comic_link(cls):
2999
        """Get link to first comics."""
3000
        return get_soup_at_url(cls.url).find("a", id="nav-first")
3001
3002
    @classmethod
3003
    def get_navi_link(cls, last_soup, next_):
3004
        """Get link to next or previous comic."""
3005
        for link in last_soup.find_all("a", id="nav-next" if next_ else "nav-prev"):
3006
            if link['href'] != 'http://www.sheldoncomics.com':
3007
                return link
3008
        return None
3009
3010
    @classmethod
3011
    def get_comic_info(cls, soup, link):
3012
        """Get information about a particular comics."""
3013
        imgs = soup.find("div", id="comic-foot").find_all("img")
3014
        assert all(i['alt'] == i['title'] for i in imgs)
3015
        assert len(imgs) == 1
3016
        title = imgs[0]['title']
3017
        return {
3018
            'title': title,
3019
            'img': [i['src'] for i in imgs],
3020
        }
3021
3022
3023
class CubeDrone(GenericNavigableComic):
3024
    """Class to retrieve Cube Drone comics."""
3025
    name = 'cubedrone'
3026
    long_name = 'Cube Drone'
3027
    url = 'http://cube-drone.com/comics'
3028
    get_url_from_link = join_cls_url_to_href
3029
3030
    @classmethod
3031
    def get_first_comic_link(cls):
3032
        """Get link to first comics."""
3033
        return get_soup_at_url(cls.url).find('span', class_='glyphicon glyphicon-backward').parent
3034
3035
    @classmethod
3036
    def get_navi_link(cls, last_soup, next_):
3037
        """Get link to next or previous comic."""
3038
        class_ = 'glyphicon glyphicon-chevron-' + ('right' if next_ else 'left')
3039
        return last_soup.find('span', class_=class_).parent
3040
3041
    @classmethod
3042
    def get_comic_info(cls, soup, link):
3043
        """Get information about a particular comics."""
3044
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
3045
        url2 = soup.find('meta', attrs={'name': 'twitter:url'})['content']
3046
        # date_str = soup.find('h2', class_='comic_title').find('small').string
3047
        # day = string_to_date(date_str, "%B %d, %Y, %I:%M %p")
3048
        imgs = soup.find_all('img', class_='comic img-responsive')
3049
        title2 = imgs[0]['title']
3050
        alt = imgs[0]['alt']
3051
        return {
3052
            'url2': url2,
3053
            'title': title,
3054
            'title2': title2,
3055
            'alt': alt,
3056
            'img': [i['src'] for i in imgs],
3057
        }
3058
3059
3060
class MakeItStoopid(GenericNavigableComic):
3061
    """Class to retrieve Make It Stoopid Comics."""
3062
    name = 'stoopid'
3063
    long_name = 'Make it stoopid'
3064
    url = 'http://makeitstoopid.com/comic.php'
3065
3066
    @classmethod
3067
    def get_nav(cls, soup):
3068
        """Get the navigation elements from soup object."""
3069
        cnav = soup.find_all(class_='cnav')
3070
        nav1, nav2 = cnav[:5], cnav[5:]
3071
        assert nav1 == nav2
3072
        # begin, prev, archive, next_, end = nav1
3073
        return [None if i.get('href') is None else i for i in nav1]
3074
3075
    @classmethod
3076
    def get_first_comic_link(cls):
3077
        """Get link to first comics."""
3078
        return cls.get_nav(get_soup_at_url(cls.url))[0]
3079
3080
    @classmethod
3081
    def get_navi_link(cls, last_soup, next_):
3082
        """Get link to next or previous comic."""
3083
        return cls.get_nav(last_soup)[3 if next_ else 1]
3084
3085
    @classmethod
3086
    def get_comic_info(cls, soup, link):
3087
        """Get information about a particular comics."""
3088
        title = link['title']
3089
        imgs = soup.find_all('img', id='comicimg')
3090
        return {
3091
            'title': title,
3092
            'img': [i['src'] for i in imgs],
3093
        }
3094
3095
3096
class GeekAndPoke(GenericNavigableComic):
3097
    """Class to retrieve Geek And Poke comics."""
3098
    name = 'geek'
3099
    long_name = 'Geek And Poke'
3100
    url = 'http://geek-and-poke.com'
3101
    get_url_from_link = join_cls_url_to_href
3102
    get_first_comic_link = simulate_first_link
3103
    first_url = 'http://geek-and-poke.com/geekandpoke/2006/8/27/a-new-place-for-a-not-so-old-blog.html'
3104
3105
    @classmethod
3106
    def get_navi_link(cls, last_soup, next_):
3107
        """Get link to next or previous comic."""
3108
        return last_soup.find('a', class_='prev-item' if next_ else 'next-item')
3109
3110
    @classmethod
3111
    def get_comic_info(cls, soup, link):
3112
        """Get information about a particular comics."""
3113
        title = soup.find('meta', property='og:title')['content']
3114
        desc = soup.find('meta', property='og:description')['content']
3115
        date_str = soup.find('time', class_='published')['datetime']
3116
        day = string_to_date(date_str, "%Y-%m-%d")
3117
        author = soup.find('a', rel='author').string
3118
        div_content = (soup.find('div', class_="body entry-content") or
3119
                       soup.find('div', class_="special-content"))
3120
        imgs = div_content.find_all('img')
3121
        imgs = [i for i in imgs if i.get('src') is not None]
3122
        assert all('title' not in i or i['alt'] == i['title'] for i in imgs)
3123
        alt = imgs[0].get('alt', "") if imgs else []
3124
        return {
3125
            'title': title,
3126
            'alt': alt,
3127
            'description': desc,
3128
            'author': author,
3129
            'day': day.day,
3130
            'month': day.month,
3131
            'year': day.year,
3132
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3133
        }
3134
3135
3136
class GenericTumblrV1(GenericComic):
3137
    """Generic class to retrieve comics from Tumblr using the V1 API."""
3138
3139
    @classmethod
3140
    def get_next_comic(cls, last_comic):
3141
        """Generic implementation of get_next_comic for Tumblr comics."""
3142
        for p in cls.get_posts(last_comic):
3143
            comic = cls.get_comic_info(p)
3144
            if comic is not None:
3145
                yield comic
3146
3147
    @classmethod
3148
    def get_url_from_post(cls, post):
3149
        return post['url']
3150
3151
    @classmethod
3152
    def get_api_url(cls):
3153
        return urljoin_wrapper(cls.url, '/api/read/')
3154
3155
    @classmethod
3156
    def get_comic_info(cls, post):
3157
        """Get information about a particular comics."""
3158
        # print(post)
3159
        type_ = post['type']
3160
        if type_ != 'photo':
3161
            # print("Type is %s" % type_)
3162
            return None
3163
        tumblr_id = int(post['id'])
3164
        api_url = cls.get_api_url() + '?id=%d' % (tumblr_id)
3165
        day = datetime.datetime.fromtimestamp(int(post['unix-timestamp'])).date()
3166
        caption = post.find('photo-caption')
3167
        title = caption.string if caption else ""
3168
        tags = ' '.join(t.string for t in post.find_all('tag'))
3169
        # Photos may appear in 'photo' tags and/or straight in the post
3170
        photo_tags = post.find_all('photo')
3171
        if not photo_tags:
3172
            photo_tags = [post]
3173
        # Images are in multiple resolutions - taking the first one
3174
        imgs = [photo.find('photo-url') for photo in photo_tags]
3175
        return {
3176
            'url': cls.get_url_from_post(post),
3177
            'url2': post['url-with-slug'],
3178
            'day': day.day,
3179
            'month': day.month,
3180
            'year': day.year,
3181
            'title': title,
3182
            'tags': tags,
3183
            'img': [i.string for i in imgs],
3184
            'tumblr-id': tumblr_id,
3185
            'api_url': api_url,  # for debug purposes
3186
        }
3187
3188
    @classmethod
3189
    def get_posts(cls, last_comic, nb_post_per_call=10):
3190
        """Get posts using API. nb_post_per_call is max 50.
3191
3192
        Posts are retrieved from newer to older as per the tumblr v1 api
3193
        but are returned in chronological order."""
3194
        waiting_for_url = last_comic['url'] if last_comic else None
3195
        posts_acc = []
3196
        if last_comic is not None:
3197
            # Sometimes, tumblr posts are deleted. We previous post is deleted, we
3198
            # might end up spending a lot of time looking for something that
3199
            # doesn't exist. Failing early and clearly might be a better option.
3200
            last_api_url = last_comic['api_url']
3201
            try:
3202
                get_soup_at_url(last_api_url)
3203
            except urllib.error.HTTPError:
3204
                try:
3205
                    get_soup_at_url(cls.url)
3206
                except urllib.error.HTTPError:
3207
                    print("Did not find previous post nor main url %s" % cls.url)
3208
                else:
3209
                    print("Did not find previous post %s : it might have been deleted" % last_api_url)
3210
                return reversed(posts_acc)
3211
        api_url = cls.get_api_url()
3212
        posts = get_soup_at_url(api_url).find('posts')
3213
        start, total = int(posts['start']), int(posts['total'])
3214
        assert start == 0
3215
        for starting_num in range(0, total, nb_post_per_call):
3216
            api_url2 = api_url + '?start=%d&num=%d' % (starting_num, nb_post_per_call)
3217
            # print(api_url2)
3218
            posts2 = get_soup_at_url(api_url2).find('posts')
3219
            start2, total2 = int(posts2['start']), int(posts2['total'])
3220
            assert starting_num == start2, "%d != %d" % (starting_num, start2)
3221
            # This may happen and should be handled in the future
3222
            assert total == total2, "%d != %d" % (total, total2)
3223
            for p in posts2.find_all('post'):
3224
                if waiting_for_url and waiting_for_url == cls.get_url_from_post(p):
3225
                    return reversed(posts_acc)
3226
                posts_acc.append(p)
3227
        if waiting_for_url is None:
3228
            return reversed(posts_acc)
3229
        print("Did not find %s : there might be a problem" % waiting_for_url)
3230
        return []
3231
3232
3233
class IrwinCardozo(GenericTumblrV1):
3234
    """Class to retrieve Irwin Cardozo Comics."""
3235
    name = 'irwinc'
3236
    long_name = 'Irwin Cardozo'
3237
    url = 'http://irwincardozocomics.tumblr.com'
3238
3239
3240
class AccordingToDevin(GenericTumblrV1):
3241
    """Class to retrieve According To Devin comics."""
3242
    name = 'devin'
3243
    long_name = 'According To Devin'
3244
    url = 'http://accordingtodevin.tumblr.com'
3245
3246
3247
class ItsTheTieTumblr(GenericTumblrV1):
3248
    """Class to retrieve It's the tie comics."""
3249
    # Also on http://itsthetie.com
3250
    # Also on https://tapastic.com/series/itsthetie
3251
    name = 'tie-tumblr'
3252
    long_name = "It's the tie (from Tumblr)"
3253
    url = "http://itsthetie.tumblr.com"
3254
3255
3256
class OctopunsTumblr(GenericTumblrV1):
3257
    """Class to retrieve Octopuns comics."""
3258
    # Also on http://www.octopuns.net
3259
    name = 'octopuns-tumblr'
3260
    long_name = 'Octopuns (from Tumblr)'
3261
    url = 'http://octopuns.tumblr.com'
3262
3263
3264
class PicturesInBoxesTumblr(GenericTumblrV1):
3265
    """Class to retrieve Pictures In Boxes comics."""
3266
    # Also on http://www.picturesinboxes.com
3267
    name = 'picturesinboxes-tumblr'
3268
    long_name = 'Pictures in Boxes (from Tumblr)'
3269
    url = 'http://picturesinboxescomic.tumblr.com'
3270
3271
3272
class TubeyToonsTumblr(GenericTumblrV1):
3273
    """Class to retrieve TubeyToons comics."""
3274
    # Also on http://tapastic.com/series/Tubey-Toons
3275
    # Also on http://tubeytoons.com
3276
    name = 'tubeytoons-tumblr'
3277
    long_name = 'Tubey Toons (from Tumblr)'
3278
    url = 'http://tubeytoons.tumblr.com'
3279
3280
3281
class UnearthedComicsTumblr(GenericTumblrV1):
3282
    """Class to retrieve Unearthed comics."""
3283
    # Also on http://tapastic.com/series/UnearthedComics
3284
    # Also on http://unearthedcomics.com
3285
    name = 'unearthed-tumblr'
3286
    long_name = 'Unearthed Comics (from Tumblr)'
3287
    url = 'http://unearthedcomics.tumblr.com'
3288
3289
3290
class PieComic(GenericTumblrV1):
3291
    """Class to retrieve Pie Comic comics."""
3292
    name = 'pie'
3293
    long_name = 'Pie Comic'
3294
    url = "http://piecomic.tumblr.com"
3295
3296
3297
class MrEthanDiamond(GenericTumblrV1):
3298
    """Class to retrieve Mr Ethan Diamond comics."""
3299
    name = 'diamond'
3300
    long_name = 'Mr Ethan Diamond'
3301
    url = 'http://mrethandiamond.tumblr.com'
3302
3303
3304
class Flocci(GenericTumblrV1):
3305
    """Class to retrieve floccinaucinihilipilification comics."""
3306
    name = 'flocci'
3307
    long_name = 'floccinaucinihilipilification'
3308
    url = "http://floccinaucinihilipilificationa.tumblr.com"
3309
3310
3311
class UpAndOut(GenericTumblrV1):
3312
    """Class to retrieve Up & Out comics."""
3313
    # Also on http://tapastic.com/series/UP-and-OUT
3314
    name = 'upandout'
3315
    long_name = 'Up And Out (from Tumblr)'
3316
    url = 'http://upandoutcomic.tumblr.com'
3317
3318
3319
class Pundemonium(GenericTumblrV1):
3320
    """Class to retrieve Pundemonium comics."""
3321
    name = 'pundemonium'
3322
    long_name = 'Pundemonium'
3323
    url = 'http://monstika.tumblr.com'
3324
3325
3326
class PoorlyDrawnLinesTumblr(GenericEmptyComic, GenericTumblrV1):
3327
    """Class to retrieve Poorly Drawn Lines comics."""
3328
    # Also on http://poorlydrawnlines.com
3329
    name = 'poorlydrawn-tumblr'
3330
    long_name = 'Poorly Drawn Lines (from Tumblr)'
3331
    url = 'http://pdlcomics.tumblr.com'
3332
3333
3334
class PearShapedComics(GenericTumblrV1):
3335
    """Class to retrieve Pear Shaped Comics."""
3336
    name = 'pearshaped'
3337
    long_name = 'Pear-Shaped Comics'
3338
    url = 'http://pearshapedcomics.com'
3339
3340
3341
class PondScumComics(GenericTumblrV1):
3342
    """Class to retrieve Pond Scum Comics."""
3343
    name = 'pond'
3344
    long_name = 'Pond Scum'
3345
    url = 'http://pondscumcomic.tumblr.com'
3346
3347
3348
class MercworksTumblr(GenericTumblrV1):
3349
    """Class to retrieve Mercworks comics."""
3350
    # Also on http://mercworks.net
3351
    name = 'mercworks-tumblr'
3352
    long_name = 'Mercworks (from Tumblr)'
3353
    url = 'http://mercworks.tumblr.com'
3354
3355
3356
class OwlTurdTumblr(GenericEmptyComic, GenericTumblrV1):
3357
    """Class to retrieve Owl Turd comics."""
3358
    # Also on http://tapastic.com/series/Owl-Turd-Comix
3359
    name = 'owlturd-tumblr'
3360
    long_name = 'Owl Turd (from Tumblr)'
3361
    url = 'http://owlturd.com'
3362
3363
3364
class VectorBelly(GenericTumblrV1):
3365
    """Class to retrieve Vector Belly comics."""
3366
    # Also on http://vectorbelly.com
3367
    name = 'vector'
3368
    long_name = 'Vector Belly'
3369
    url = 'http://vectorbelly.tumblr.com'
3370
3371
3372
class GoneIntoRapture(GenericTumblrV1):
3373
    """Class to retrieve Gone Into Rapture comics."""
3374
    # Also on http://goneintorapture.tumblr.com
3375
    # Also on http://tapastic.com/series/Goneintorapture
3376
    name = 'rapture'
3377
    long_name = 'Gone Into Rapture'
3378
    url = 'http://www.goneintorapture.com'
3379
3380
3381
class TheOatmealTumblr(GenericTumblrV1):
3382
    """Class to retrieve The Oatmeal comics."""
3383
    # Also on http://theoatmeal.com
3384
    name = 'oatmeal-tumblr'
3385
    long_name = 'The Oatmeal (from Tumblr)'
3386
    url = 'http://oatmeal.tumblr.com'
3387
3388
3389
class HeckIfIKnowComicsTumblr(GenericTumblrV1):
3390
    """Class to retrieve Heck If I Know Comics."""
3391
    # Also on http://tapastic.com/series/Regular
3392
    name = 'heck-tumblr'
3393
    long_name = 'Heck if I Know comics (from Tumblr)'
3394
    url = 'http://heckifiknowcomics.com'
3395
3396
3397
class MyJetPack(GenericTumblrV1):
3398
    """Class to retrieve My Jet Pack comics."""
3399
    name = 'jetpack'
3400
    long_name = 'My Jet Pack'
3401
    url = 'http://myjetpack.tumblr.com'
3402
3403
3404
class CheerUpEmoKidTumblr(GenericTumblrV1):
3405
    """Class to retrieve CheerUpEmoKid comics."""
3406
    # Also on http://www.cheerupemokid.com
3407
    # Also on http://tapastic.com/series/CUEK
3408
    name = 'cuek-tumblr'
3409
    long_name = 'Cheer Up Emo Kid (from Tumblr)'
3410
    url = 'http://enzocomics.tumblr.com'
3411
3412
3413
class ForLackOfABetterComic(GenericEmptyComic, GenericTumblrV1):
3414
    """Class to retrieve For Lack Of A Better Comics."""
3415
    # Also on http://forlackofabettercomic.com
3416
    name = 'lack'
3417
    long_name = 'For Lack Of A Better Comic'
3418
    url = 'http://forlackofabettercomic.tumblr.com'
3419
3420
3421
class ZenPencilsTumblr(GenericTumblrV1):
3422
    """Class to retrieve ZenPencils comics."""
3423
    # Also on http://zenpencils.com
3424
    # Also on http://www.gocomics.com/zen-pencils
3425
    name = 'zenpencils-tumblr'
3426
    long_name = 'Zen Pencils (from Tumblr)'
3427
    url = 'http://zenpencils.tumblr.com'
3428
3429
3430
class ThreeWordPhraseTumblr(GenericTumblrV1):
3431
    """Class to retrieve Three Word Phrase comics."""
3432
    # Also on http://threewordphrase.com
3433
    name = 'threeword-tumblr'
3434
    long_name = 'Three Word Phrase (from Tumblr)'
3435
    url = 'http://www.threewordphrase.tumblr.com'
3436
3437
3438
class TimeTrabbleTumblr(GenericTumblrV1):
3439
    """Class to retrieve Time Trabble comics."""
3440
    # Also on http://timetrabble.com
3441
    name = 'timetrabble-tumblr'
3442
    long_name = 'Time Trabble (from Tumblr)'
3443
    url = 'http://timetrabble.tumblr.com'
3444
3445
3446
class SafelyEndangeredTumblr(GenericTumblrV1):
3447
    """Class to retrieve Safely Endangered comics."""
3448
    # Also on http://www.safelyendangered.com
3449
    name = 'endangered-tumblr'
3450
    long_name = 'Safely Endangered (from Tumblr)'
3451
    url = 'http://tumblr.safelyendangered.com'
3452
3453
3454
class MouseBearComedyTumblr(GenericTumblrV1):
3455
    """Class to retrieve Mouse Bear Comedy comics."""
3456
    # Also on http://www.mousebearcomedy.com
3457
    name = 'mousebear-tumblr'
3458
    long_name = 'Mouse Bear Comedy (from Tumblr)'
3459
    url = 'http://mousebearcomedy.tumblr.com'
3460
3461
3462
class BouletCorpTumblr(GenericTumblrV1):
3463
    """Class to retrieve BouletCorp comics."""
3464
    # Also on http://www.bouletcorp.com
3465
    name = 'boulet-tumblr'
3466
    long_name = 'Boulet Corp (from Tumblr)'
3467
    url = 'http://bouletcorp.tumblr.com'
3468
3469
3470
class TheAwkwardYetiTumblr(GenericEmptyComic, GenericTumblrV1):
3471
    """Class to retrieve The Awkward Yeti comics."""
3472
    # Also on http://www.gocomics.com/the-awkward-yeti
3473
    # Also on http://theawkwardyeti.com
3474
    # Also on https://tapastic.com/series/TheAwkwardYeti
3475
    name = 'yeti-tumblr'
3476
    long_name = 'The Awkward Yeti (from Tumblr)'
3477
    url = 'http://larstheyeti.tumblr.com'
3478
3479
3480
class NellucNhoj(GenericTumblrV1):
3481
    """Class to retrieve NellucNhoj comics."""
3482
    name = 'nhoj'
3483
    long_name = 'Nelluc Nhoj'
3484
    url = 'http://nellucnhoj.com'
3485
3486
3487
class DownTheUpwardSpiralTumblr(GenericTumblrV1):
3488
    """Class to retrieve Down The Upward Spiral comics."""
3489
    # Also on http://www.downtheupwardspiral.com
3490
    name = 'spiral-tumblr'
3491
    long_name = 'Down the Upward Spiral (from Tumblr)'
3492
    url = 'http://downtheupwardspiral.tumblr.com'
3493
3494
3495
class AsPerUsualTumblr(GenericTumblrV1):
3496
    """Class to retrieve As Per Usual comics."""
3497
    # Also on https://tapastic.com/series/AsPerUsual
3498
    name = 'usual-tumblr'
3499
    long_name = 'As Per Usual (from Tumblr)'
3500
    url = 'http://as-per-usual.tumblr.com'
3501
3502
3503
class OneOneOneOneComicTumblr(GenericTumblrV1):
3504
    """Class to retrieve 1111 Comics."""
3505
    # Also on http://www.1111comics.me
3506
    # Also on https://tapastic.com/series/1111-Comics
3507
    name = '1111-tumblr'
3508
    long_name = '1111 Comics (from Tumblr)'
3509
    url = 'http://comics1111.tumblr.com'
3510
3511
3512
class JhallComicsTumblr(GenericTumblrV1):
3513
    """Class to retrieve Jhall Comics."""
3514
    # Also on http://jhallcomics.com
3515
    name = 'jhall-tumblr'
3516
    long_name = 'Jhall Comics (from Tumblr)'
3517
    url = 'http://jhallcomics.tumblr.com'
3518
3519
3520
class BerkeleyMewsTumblr(GenericTumblrV1):
3521
    """Class to retrieve Berkeley Mews comics."""
3522
    # Also on http://www.gocomics.com/berkeley-mews
3523
    # Also on http://www.berkeleymews.com
3524
    name = 'berkeley-tumblr'
3525
    long_name = 'Berkeley Mews (from Tumblr)'
3526
    url = 'http://mews.tumblr.com'
3527
3528
3529
class JoanCornellaTumblr(GenericTumblrV1):
3530
    """Class to retrieve Joan Cornella comics."""
3531
    # Also on http://joancornella.net
3532
    name = 'cornella-tumblr'
3533
    long_name = 'Joan Cornella (from Tumblr)'
3534
    url = 'http://cornellajoan.tumblr.com'
3535
3536
3537
class RespawnComicTumblr(GenericTumblrV1):
3538
    """Class to retrieve Respawn Comic."""
3539
    # Also on http://respawncomic.com
3540
    name = 'respawn-tumblr'
3541
    long_name = 'Respawn Comic (from Tumblr)'
3542
    url = 'http://respawncomic.tumblr.com'
3543
3544
3545
class ChrisHallbeckTumblr(GenericTumblrV1):
3546
    """Class to retrieve Chris Hallbeck comics."""
3547
    # Also on https://tapastic.com/ChrisHallbeck
3548
    # Also on http://maximumble.com
3549
    # Also on http://minimumble.com
3550
    # Also on http://thebookofbiff.com
3551
    name = 'hallbeck-tumblr'
3552
    long_name = 'Chris Hallback (from Tumblr)'
3553
    url = 'http://chrishallbeck.tumblr.com'
3554
3555
3556
class ComicNuggets(GenericTumblrV1):
3557
    """Class to retrieve Comic Nuggets."""
3558
    name = 'nuggets'
3559
    long_name = 'Comic Nuggets'
3560
    url = 'http://comicnuggets.com'
3561
3562
3563
class PigeonGazetteTumblr(GenericTumblrV1):
3564
    """Class to retrieve The Pigeon Gazette comics."""
3565
    # Also on https://tapastic.com/series/The-Pigeon-Gazette
3566
    name = 'pigeon-tumblr'
3567
    long_name = 'The Pigeon Gazette (from Tumblr)'
3568
    url = 'http://thepigeongazette.tumblr.com'
3569
3570
3571
class CancerOwl(GenericTumblrV1):
3572
    """Class to retrieve Cancer Owl comics."""
3573
    # Also on http://cancerowl.com
3574
    name = 'cancerowl-tumblr'
3575
    long_name = 'Cancer Owl (from Tumblr)'
3576
    url = 'http://cancerowl.tumblr.com'
3577
3578
3579
class FowlLanguageTumblr(GenericTumblrV1):
3580
    """Class to retrieve Fowl Language comics."""
3581
    # Also on http://www.fowllanguagecomics.com
3582
    # Also on http://tapastic.com/series/Fowl-Language-Comics
3583
    # Also on http://www.gocomics.com/fowl-language
3584
    name = 'fowllanguage-tumblr'
3585
    long_name = 'Fowl Language Comics (from Tumblr)'
3586
    url = 'http://fowllanguagecomics.tumblr.com'
3587
3588
3589
class TheOdd1sOutTumblr(GenericTumblrV1):
3590
    """Class to retrieve The Odd 1s Out comics."""
3591
    # Also on http://theodd1sout.com
3592
    # Also on https://tapastic.com/series/Theodd1sout
3593
    name = 'theodd-tumblr'
3594
    long_name = 'The Odd 1s Out (from Tumblr)'
3595
    url = 'http://theodd1sout.tumblr.com'
3596
3597
3598
class TheUnderfoldTumblr(GenericTumblrV1):
3599
    """Class to retrieve The Underfold comics."""
3600
    # Also on http://theunderfold.com
3601
    name = 'underfold-tumblr'
3602
    long_name = 'The Underfold (from Tumblr)'
3603
    url = 'http://theunderfold.tumblr.com'
3604
3605
3606
class LolNeinTumblr(GenericTumblrV1):
3607
    """Class to retrieve Lol Nein comics."""
3608
    # Also on http://lolnein.com
3609
    name = 'lolnein-tumblr'
3610
    long_name = 'Lol Nein (from Tumblr)'
3611
    url = 'http://lolneincom.tumblr.com'
3612
3613
3614
class FatAwesomeComicsTumblr(GenericTumblrV1):
3615
    """Class to retrieve Fat Awesome Comics."""
3616
    # Also on http://fatawesome.com/comics
3617
    name = 'fatawesome-tumblr'
3618
    long_name = 'Fat Awesome (from Tumblr)'
3619
    url = 'http://fatawesomecomedy.tumblr.com'
3620
3621
3622
class TheWorldIsFlatTumblr(GenericTumblrV1):
3623
    """Class to retrieve The World Is Flat Comics."""
3624
    # Also on https://tapastic.com/series/The-World-is-Flat
3625
    name = 'flatworld-tumblr'
3626
    long_name = 'The World Is Flat (from Tumblr)'
3627
    url = 'http://theworldisflatcomics.tumblr.com'
3628
3629
3630
class DorrisMc(GenericEmptyComic, GenericTumblrV1):
3631
    """Class to retrieve Dorris Mc Comics"""
3632
    # Also on http://www.gocomics.com/dorris-mccomics
3633
    name = 'dorrismc'
3634
    long_name = 'Dorris Mc'
3635
    url = 'http://dorrismccomics.com'
3636
3637
3638
class LeleozTumblr(GenericEmptyComic, GenericTumblrV1):
3639
    """Class to retrieve Leleoz comics."""
3640
    # Also on https://tapastic.com/series/Leleoz
3641
    name = 'leleoz-tumblr'
3642
    long_name = 'Leleoz (from Tumblr)'
3643
    url = 'http://leleozcomics.tumblr.com'
3644
3645
3646
class MoonBeardTumblr(GenericTumblrV1):
3647
    """Class to retrieve MoonBeard comics."""
3648
    # Also on http://moonbeard.com
3649
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
3650
    name = 'moonbeard-tumblr'
3651
    long_name = 'Moon Beard (from Tumblr)'
3652
    url = 'http://blog.squiresjam.es/moonbeard'
3653
3654
3655
class AComik(GenericTumblrV1):
3656
    """Class to retrieve A Comik"""
3657
    name = 'comik'
3658
    long_name = 'A Comik'
3659
    url = 'http://acomik.com'
3660
3661
3662
class ClassicRandy(GenericTumblrV1):
3663
    """Class to retrieve Classic Randy comics."""
3664
    name = 'randy'
3665
    long_name = 'Classic Randy'
3666
    url = 'http://classicrandy.tumblr.com'
3667
3668
3669
class DagssonTumblr(GenericTumblrV1):
3670
    """Class to retrieve Dagsson comics."""
3671
    # Also on http://www.dagsson.com
3672
    name = 'dagsson-tumblr'
3673
    long_name = 'Dagsson Hugleikur (from Tumblr)'
3674
    url = 'http://hugleikurdagsson.tumblr.com'
3675
3676
3677
class LinsEditionsTumblr(GenericTumblrV1):
3678
    """Class to retrieve L.I.N.S. Editions comics."""
3679
    # Also on https://linsedition.com
3680
    name = 'lins-tumblr'
3681
    long_name = 'L.I.N.S. Editions (from Tumblr)'
3682
    url = 'http://linscomics.tumblr.com'
3683
3684
3685
class OrigamiHotDish(GenericTumblrV1):
3686
    """Class to retrieve Origami Hot Dish comics."""
3687
    name = 'origamihotdish'
3688
    long_name = 'Origami Hot Dish'
3689
    url = 'http://origamihotdish.com'
3690
3691
3692
class HitAndMissComicsTumblr(GenericTumblrV1):
3693
    """Class to retrieve Hit and Miss Comics."""
3694
    name = 'hitandmiss'
3695
    long_name = 'Hit and Miss Comics'
3696
    url = 'http://hitandmisscomics.tumblr.com'
3697
3698
3699
class HMBlanc(GenericTumblrV1):
3700
    """Class to retrieve HM Blanc comics."""
3701
    name = 'hmblanc'
3702
    long_name = 'HM Blanc'
3703
    url = 'http://hmblanc.tumblr.com'
3704
3705
3706
class TalesOfAbsurdityTumblr(GenericTumblrV1):
3707
    """Class to retrieve Tales Of Absurdity comics."""
3708
    # Also on http://talesofabsurdity.com
3709
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
3710
    name = 'absurdity-tumblr'
3711
    long_name = 'Tales of Absurdity (from Tumblr)'
3712
    url = 'http://talesofabsurdity.tumblr.com'
3713
3714
3715
class RobbieAndBobby(GenericTumblrV1):
3716
    """Class to retrieve Robbie And Bobby comics."""
3717
    # Also on http://robbieandbobby.com
3718
    name = 'robbie-tumblr'
3719
    long_name = 'Robbie And Bobby (from Tumblr)'
3720
    url = 'http://robbieandbobby.tumblr.com'
3721
3722
3723
class ElectricBunnyComicTumblr(GenericTumblrV1):
3724
    """Class to retrieve Electric Bunny Comics."""
3725
    # Also on http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell
3726
    name = 'bunny-tumblr'
3727
    long_name = 'Electric Bunny Comic (from Tumblr)'
3728
    url = 'http://electricbunnycomics.tumblr.com'
3729
3730
3731
class Hoomph(GenericTumblrV1):
3732
    """Class to retrieve Hoomph comics."""
3733
    name = 'hoomph'
3734
    long_name = 'Hoomph'
3735
    url = 'http://hoom.ph'
3736
3737
3738
class BFGFSTumblr(GenericTumblrV1):
3739
    """Class to retrieve BFGFS comics."""
3740
    # Also on https://tapastic.com/series/BFGFS
3741
    # Also on http://bfgfs.com
3742
    name = 'bfgfs-tumblr'
3743
    long_name = 'BFGFS (from Tumblr)'
3744
    url = 'http://bfgfs.tumblr.com'
3745
3746
3747
class DoodleForFood(GenericTumblrV1):
3748
    """Class to retrieve Doodle For Food comics."""
3749
    # Also on http://doodleforfood.com
3750
    name = 'doodle'
3751
    long_name = 'Doodle For Food'
3752
    url = 'http://doodleforfood.com'
3753
3754
3755
class CassandraCalinTumblr(GenericEmptyComic, GenericTumblrV1):
3756
    """Class to retrieve C. Cassandra comics."""
3757
    # Also on http://cassandracalin.com
3758
    # Also on https://tapastic.com/series/C-Cassandra-comics
3759
    name = 'cassandra-tumblr'
3760
    long_name = 'Cassandra Calin (from Tumblr)'
3761
    url = 'http://c-cassandra.tumblr.com'
3762
3763
3764
class DougWasTaken(GenericTumblrV1):
3765
    """Class to retrieve Doug Was Taken comics."""
3766
    name = 'doog'
3767
    long_name = 'Doug Was Taken'
3768
    url = 'http://dougwastaken.tumblr.com'
3769
3770
3771
class MandatoryRollerCoaster(GenericEmptyComic, GenericTumblrV1):
3772
    """Class to retrieve Mandatory Roller Coaster comics."""
3773
    name = 'rollercoaster'
3774
    long_name = 'Mandatory Roller Coaster'
3775
    url = 'http://mandatoryrollercoaster.com'
3776
3777
3778
class CEstPasEnRegardantSesPompes(GenericEmptyComic, GenericTumblrV1):
3779
    """Class to retrieve C'Est Pas En Regardant Ses Pompes (...)  comics."""
3780
    name = 'cperspqccltt'
3781
    long_name = 'C Est Pas En Regardant Ses Pompes (...)'
3782
    url = 'http://cperspqccltt.tumblr.com'
3783
3784
3785
class TheGrohlTroll(GenericEmptyComic, GenericTumblrV1):
3786
    """Class to retrieve The Grohl Troll comics."""
3787
    name = 'grohltroll'
3788
    long_name = 'The Grohl Troll'
3789
    url = 'http://thegrohltroll.com'
3790
3791
3792
class WebcomicName(GenericEmptyComic, GenericTumblrV1):
3793
    """Class to retrieve Webcomic Name comics."""
3794
    name = 'webcomicname'
3795
    long_name = 'Webcomic Name'
3796
    url = 'http://webcomicname.com'
3797
3798
3799
class BooksOfAdam(GenericEmptyComic, GenericTumblrV1):
3800
    """Class to retrieve Books of Adam comics."""
3801
    # Also on http://www.booksofadam.com
3802
    name = 'booksofadam'
3803
    long_name = 'Books of Adam'
3804
    url = 'http://booksofadam.tumblr.com'
3805
3806
3807
class HarkAVagrant(GenericEmptyComic, GenericTumblrV1):
3808
    """Class to retrieve Hark A Vagrant comics."""
3809
    # Also on http://www.harkavagrant.com
3810
    name = 'hark-tumblr'
3811
    long_name = 'Hark A Vagrant (from Tumblr)'
3812
    url = 'http://beatonna.tumblr.com'
3813
3814
3815
class OurSuperAdventureTumblr(GenericEmptyComic, GenericTumblrV1):
3816
    """Class to retrieve Our Super Adventure comics."""
3817
    # Also on https://tapastic.com/series/Our-Super-Adventure
3818
    # Also on http://www.oursuperadventure.com
3819
    # http://sarahgraley.com
3820
    name = 'superadventure-tumblr'
3821
    long_name = 'Our Super Adventure (from Tumblr)'
3822
    url = 'http://sarahssketchbook.tumblr.com'
3823
3824
3825
class JakeLikesOnions(GenericTumblrV1):
3826
    """Class to retrieve Jake Likes Onions comics."""
3827
    name = 'jake'
3828
    long_name = 'Jake Likes Onions'
3829
    url = 'http://jakelikesonions.com'
3830
3831
3832
class InYourFaceCake(GenericEmptyComic, GenericTumblrV1):
3833
    """Class to retrieve In Your Face Cake comics."""
3834
    name = 'inyourfacecake-tumblr'
3835
    long_name = 'In Your Face Cake (from Tumblr)'
3836
    url = 'http://in-your-face-cake.tumblr.com'
3837
3838
3839
class BananaTwinky(GenericTumblrV1):
3840
    """Class to retrieve Banana Twinky comics."""
3841
    name = 'banana'
3842
    long_name = 'Banana Twinky'
3843
    url = 'http://bananatwinky.tumblr.com'
3844
3845
3846
class HorovitzComics(GenericListableComic):
3847
    """Generic class to handle the logic common to the different comics from Horovitz."""
3848
    url = 'http://www.horovitzcomics.com'
3849
    img_re = re.compile('.*comics/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
3850
    link_re = NotImplemented
3851
    get_url_from_archive_element = join_cls_url_to_href
3852
3853
    @classmethod
3854
    def get_comic_info(cls, soup, link):
3855
        """Get information about a particular comics."""
3856
        href = link['href']
3857
        num = int(cls.link_re.match(href).groups()[0])
3858
        title = link.string
3859
        imgs = soup.find_all('img', id='comic')
3860
        assert len(imgs) == 1
3861
        year, month, day = [int(s)
3862
                            for s in cls.img_re.match(imgs[0]['src']).groups()]
3863
        return {
3864
            'title': title,
3865
            'day': day,
3866
            'month': month,
3867
            'year': year,
3868
            'img': [i['src'] for i in imgs],
3869
            'num': num,
3870
        }
3871
3872
    @classmethod
3873
    def get_archive_elements(cls):
3874
        archive_url = 'http://www.horovitzcomics.com/comics/archive/'
3875
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.link_re))
3876
3877
3878
class HorovitzNew(HorovitzComics):
3879
    """Class to retrieve Horovitz new comics."""
3880
    name = 'horovitznew'
3881
    long_name = 'Horovitz New'
3882
    link_re = re.compile('^/comics/new/([0-9]+)$')
3883
3884
3885
class HorovitzClassic(HorovitzComics):
3886
    """Class to retrieve Horovitz classic comics."""
3887
    name = 'horovitzclassic'
3888
    long_name = 'Horovitz Classic'
3889
    link_re = re.compile('^/comics/classic/([0-9]+)$')
3890
3891
3892
class GenericGoComic(GenericNavigableComic):
3893
    """Generic class to handle the logic common to comics from gocomics.com."""
3894
    url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)$')
3895
3896
    @classmethod
3897
    def get_first_comic_link(cls):
3898
        """Get link to first comics."""
3899
        return get_soup_at_url(cls.url).find('a', class_='beginning')
3900
3901
    @classmethod
3902
    def get_navi_link(cls, last_soup, next_):
3903
        """Get link to next or previous comic."""
3904
        return last_soup.find('a', class_='next' if next_ else 'prev', href=cls.url_date_re)
3905
3906
    @classmethod
3907
    def get_url_from_link(cls, link):
3908
        gocomics = 'http://www.gocomics.com'
3909
        return urljoin_wrapper(gocomics, link['href'])
3910
3911
    @classmethod
3912
    def get_comic_info(cls, soup, link):
3913
        """Get information about a particular comics."""
3914
        url = cls.get_url_from_link(link)
3915
        year, month, day = [int(s)
3916
                            for s in cls.url_date_re.match(url).groups()]
3917
        return {
3918
            'day': day,
3919
            'month': month,
3920
            'year': year,
3921
            'img': [soup.find_all('img', class_='strip')[-1]['src']],
3922
            'author': soup.find('meta', attrs={'name': 'author'})['content']
3923
        }
3924
3925
3926
class PearlsBeforeSwine(GenericGoComic):
3927
    """Class to retrieve Pearls Before Swine comics."""
3928
    name = 'pearls'
3929
    long_name = 'Pearls Before Swine'
3930
    url = 'http://www.gocomics.com/pearlsbeforeswine'
3931
3932
3933
class Peanuts(GenericGoComic):
3934
    """Class to retrieve Peanuts comics."""
3935
    name = 'peanuts'
3936
    long_name = 'Peanuts'
3937
    url = 'http://www.gocomics.com/peanuts'
3938
3939
3940
class MattWuerker(GenericGoComic):
3941
    """Class to retrieve Matt Wuerker comics."""
3942
    name = 'wuerker'
3943
    long_name = 'Matt Wuerker'
3944
    url = 'http://www.gocomics.com/mattwuerker'
3945
3946
3947
class TomToles(GenericGoComic):
3948
    """Class to retrieve Tom Toles comics."""
3949
    name = 'toles'
3950
    long_name = 'Tom Toles'
3951
    url = 'http://www.gocomics.com/tomtoles'
3952
3953
3954
class BreakOfDay(GenericGoComic):
3955
    """Class to retrieve Break Of Day comics."""
3956
    name = 'breakofday'
3957
    long_name = 'Break Of Day'
3958
    url = 'http://www.gocomics.com/break-of-day'
3959
3960
3961
class Brevity(GenericGoComic):
3962
    """Class to retrieve Brevity comics."""
3963
    name = 'brevity'
3964
    long_name = 'Brevity'
3965
    url = 'http://www.gocomics.com/brevity'
3966
3967
3968
class MichaelRamirez(GenericGoComic):
3969
    """Class to retrieve Michael Ramirez comics."""
3970
    name = 'ramirez'
3971
    long_name = 'Michael Ramirez'
3972
    url = 'http://www.gocomics.com/michaelramirez'
3973
3974
3975
class MikeLuckovich(GenericGoComic):
3976
    """Class to retrieve Mike Luckovich comics."""
3977
    name = 'luckovich'
3978
    long_name = 'Mike Luckovich'
3979
    url = 'http://www.gocomics.com/mikeluckovich'
3980
3981
3982
class JimBenton(GenericGoComic):
3983
    """Class to retrieve Jim Benton comics."""
3984
    # Also on http://jimbenton.tumblr.com
3985
    name = 'benton'
3986
    long_name = 'Jim Benton'
3987
    url = 'http://www.gocomics.com/jim-benton-cartoons'
3988
3989
3990
class TheArgyleSweater(GenericGoComic):
3991
    """Class to retrieve the Argyle Sweater comics."""
3992
    name = 'argyle'
3993
    long_name = 'Argyle Sweater'
3994
    url = 'http://www.gocomics.com/theargylesweater'
3995
3996
3997
class SunnyStreet(GenericGoComic):
3998
    """Class to retrieve Sunny Street comics."""
3999
    # Also on http://www.sunnystreetcomics.com
4000
    name = 'sunny'
4001
    long_name = 'Sunny Street'
4002
    url = 'http://www.gocomics.com/sunny-street'
4003
4004
4005
class OffTheMark(GenericGoComic):
4006
    """Class to retrieve Off The Mark comics."""
4007
    # Also on https://www.offthemark.com
4008
    name = 'offthemark'
4009
    long_name = 'Off The Mark'
4010
    url = 'http://www.gocomics.com/offthemark'
4011
4012
4013
class WuMo(GenericGoComic):
4014
    """Class to retrieve WuMo comics."""
4015
    # Also on http://wumo.com
4016
    name = 'wumo'
4017
    long_name = 'WuMo'
4018
    url = 'http://www.gocomics.com/wumo'
4019
4020
4021
class LunarBaboon(GenericGoComic):
4022
    """Class to retrieve Lunar Baboon comics."""
4023
    # Also on http://www.lunarbaboon.com
4024
    # Also on https://tapastic.com/series/Lunarbaboon
4025
    name = 'lunarbaboon'
4026
    long_name = 'Lunar Baboon'
4027
    url = 'http://www.gocomics.com/lunarbaboon'
4028
4029
4030
class SandersenGocomic(GenericGoComic):
4031
    """Class to retrieve Sarah Andersen comics."""
4032
    # Also on http://sarahcandersen.com
4033
    # Also on http://tapastic.com/series/Doodle-Time
4034
    name = 'sandersen-goc'
4035
    long_name = 'Sarah Andersen (from GoComics)'
4036
    url = 'http://www.gocomics.com/sarahs-scribbles'
4037
4038
4039
class CalvinAndHobbesGoComic(GenericGoComic):
4040
    """Class to retrieve Calvin and Hobbes comics."""
4041
    # From gocomics, not http://marcel-oehler.marcellosendos.ch/comics/ch/
4042
    name = 'calvin-goc'
4043
    long_name = 'Calvin and Hobbes (from GoComics)'
4044
    url = 'http://www.gocomics.com/calvinandhobbes'
4045
4046
4047
class RallGoComic(GenericGoComic):
4048
    """Class to retrieve Ted Rall comics."""
4049
    # Also on http://rall.com/comic
4050
    name = 'rall-goc'
4051
    long_name = "Ted Rall (from GoComics)"
4052
    url = "http://www.gocomics.com/tedrall"
4053
4054
4055
class TheAwkwardYetiGoComic(GenericGoComic):
4056
    """Class to retrieve The Awkward Yeti comics."""
4057
    # Also on http://larstheyeti.tumblr.com
4058
    # Also on http://theawkwardyeti.com
4059
    # Also on https://tapastic.com/series/TheAwkwardYeti
4060
    name = 'yeti-goc'
4061
    long_name = 'The Awkward Yeti (from GoComics)'
4062
    url = 'http://www.gocomics.com/the-awkward-yeti'
4063
4064
4065
class BerkeleyMewsGoComics(GenericGoComic):
4066
    """Class to retrieve Berkeley Mews comics."""
4067
    # Also on http://mews.tumblr.com
4068
    # Also on http://www.berkeleymews.com
4069
    name = 'berkeley-goc'
4070
    long_name = 'Berkeley Mews (from GoComics)'
4071
    url = 'http://www.gocomics.com/berkeley-mews'
4072
4073
4074
class SheldonGoComics(GenericGoComic):
4075
    """Class to retrieve Sheldon comics."""
4076
    # Also on http://www.sheldoncomics.com
4077
    name = 'sheldon-goc'
4078
    long_name = 'Sheldon Comics (from GoComics)'
4079
    url = 'http://www.gocomics.com/sheldon'
4080
4081
4082
class FowlLanguageGoComics(GenericGoComic):
4083
    """Class to retrieve Fowl Language comics."""
4084
    # Also on http://www.fowllanguagecomics.com
4085
    # Also on http://tapastic.com/series/Fowl-Language-Comics
4086
    # Also on http://fowllanguagecomics.tumblr.com
4087
    name = 'fowllanguage-goc'
4088
    long_name = 'Fowl Language Comics (from GoComics)'
4089
    url = 'http://www.gocomics.com/fowl-language'
4090
4091
4092
class NickAnderson(GenericGoComic):
4093
    """Class to retrieve Nick Anderson comics."""
4094
    name = 'nickanderson'
4095
    long_name = 'Nick Anderson'
4096
    url = 'http://www.gocomics.com/nickanderson'
4097
4098
4099
class GarfieldGoComics(GenericGoComic):
4100
    """Class to retrieve Garfield comics."""
4101
    # Also on http://garfield.com
4102
    name = 'garfield-goc'
4103
    long_name = 'Garfield (from GoComics)'
4104
    url = 'http://www.gocomics.com/garfield'
4105
4106
4107
class DorrisMcGoComics(GenericGoComic):
4108
    """Class to retrieve Dorris Mc Comics"""
4109
    # Also on http://dorrismccomics.com
4110
    name = 'dorrismc-goc'
4111
    long_name = 'Dorris Mc (from GoComics)'
4112
    url = 'http://www.gocomics.com/dorris-mccomics'
4113
4114
4115
class FoxTrot(GenericGoComic):
4116
    """Class to retrieve FoxTrot comics."""
4117
    name = 'foxtrot'
4118
    long_name = 'FoxTrot'
4119
    url = 'http://www.gocomics.com/foxtrot'
4120
4121
4122
class FoxTrotClassics(GenericGoComic):
4123
    """Class to retrieve FoxTrot Classics comics."""
4124
    name = 'foxtrot-classics'
4125
    long_name = 'FoxTrot Classics'
4126
    url = 'http://www.gocomics.com/foxtrotclassics'
4127
4128
4129
class MisterAndMeGoComics(GenericGoComic):
4130
    """Class to retrieve Mister & Me Comics."""
4131
    # Also on http://www.mister-and-me.com
4132
    # Also on https://tapastic.com/series/Mister-and-Me
4133
    name = 'mister-goc'
4134
    long_name = 'Mister & Me (from GoComics)'
4135
    url = 'http://www.gocomics.com/mister-and-me'
4136
4137
4138
class NonSequitur(GenericGoComic):
4139
    """Class to retrieve Non Sequitur (Wiley Miller) comics."""
4140
    name = 'nonsequitur'
4141
    long_name = 'Non Sequitur'
4142
    url = 'http://www.gocomics.com/nonsequitur'
4143
4144
4145
class GenericTapasticComic(GenericListableComic):
4146
    """Generic class to handle the logic common to comics from tapastic.com."""
4147
4148
    @classmethod
4149
    def get_comic_info(cls, soup, archive_elt):
4150
        """Get information about a particular comics."""
4151
        timestamp = int(archive_elt['publishDate']) / 1000.0
4152
        day = datetime.datetime.fromtimestamp(timestamp).date()
4153
        imgs = soup.find_all('img', class_='art-image')
4154
        if not imgs:
4155
            print("Comic %s is being uploaded, retry later" % cls.get_url_from_archive_element(archive_elt))
4156
            return None
4157
        assert len(imgs) > 0
4158
        return {
4159
            'day': day.day,
4160
            'year': day.year,
4161
            'month': day.month,
4162
            'img': [i['src'] for i in imgs],
4163
            'title': archive_elt['title'],
4164
        }
4165
4166
    @classmethod
4167
    def get_url_from_archive_element(cls, archive_elt):
4168
        return 'http://tapastic.com/episode/' + str(archive_elt['id'])
4169
4170
    @classmethod
4171
    def get_archive_elements(cls):
4172
        pref, suff = 'episodeList : ', ','
4173
        # Information is stored in the javascript part
4174
        # I don't know the clean way to get it so this is the ugly way.
4175
        string = [s[len(pref):-len(suff)] for s in (s.decode('utf-8').strip() for s in urlopen_wrapper(cls.url).readlines()) if s.startswith(pref) and s.endswith(suff)][0]
4176
        return json.loads(string)
4177
4178
4179
class VegetablesForDessert(GenericTapasticComic):
4180
    """Class to retrieve Vegetables For Dessert comics."""
4181
    # Also on http://vegetablesfordessert.tumblr.com
4182
    name = 'vegetables'
4183
    long_name = 'Vegetables For Dessert'
4184
    url = 'http://tapastic.com/series/vegetablesfordessert'
4185
4186
4187
class FowlLanguageTapa(GenericTapasticComic):
4188
    """Class to retrieve Fowl Language comics."""
4189
    # Also on http://www.fowllanguagecomics.com
4190
    # Also on http://fowllanguagecomics.tumblr.com
4191
    # Also on http://www.gocomics.com/fowl-language
4192
    name = 'fowllanguage-tapa'
4193
    long_name = 'Fowl Language Comics (from Tapastic)'
4194
    url = 'http://tapastic.com/series/Fowl-Language-Comics'
4195
4196
4197
class OscillatingProfundities(GenericTapasticComic):
4198
    """Class to retrieve Oscillating Profundities comics."""
4199
    name = 'oscillating'
4200
    long_name = 'Oscillating Profundities'
4201
    url = 'http://tapastic.com/series/oscillatingprofundities'
4202
4203
4204
class ZnoflatsComics(GenericTapasticComic):
4205
    """Class to retrieve Znoflats comics."""
4206
    name = 'znoflats'
4207
    long_name = 'Znoflats Comics'
4208
    url = 'http://tapastic.com/series/Znoflats-Comics'
4209
4210
4211
class SandersenTapastic(GenericTapasticComic):
4212
    """Class to retrieve Sarah Andersen comics."""
4213
    # Also on http://sarahcandersen.com
4214
    # Also on http://www.gocomics.com/sarahs-scribbles
4215
    name = 'sandersen-tapa'
4216
    long_name = 'Sarah Andersen (from Tapastic)'
4217
    url = 'http://tapastic.com/series/Doodle-Time'
4218
4219
4220
class TubeyToonsTapastic(GenericTapasticComic):
4221
    """Class to retrieve TubeyToons comics."""
4222
    # Also on http://tubeytoons.com
4223
    # Also on http://tubeytoons.tumblr.com
4224
    name = 'tubeytoons-tapa'
4225
    long_name = 'Tubey Toons (from Tapastic)'
4226
    url = 'http://tapastic.com/series/Tubey-Toons'
4227
4228
4229
class AnythingComicTapastic(GenericTapasticComic):
4230
    """Class to retrieve Anything Comics."""
4231
    # Also on http://www.anythingcomic.com
4232
    name = 'anythingcomic-tapa'
4233
    long_name = 'Anything Comic (from Tapastic)'
4234
    url = 'http://tapastic.com/series/anything'
4235
4236
4237
class UnearthedComicsTapastic(GenericTapasticComic):
4238
    """Class to retrieve Unearthed comics."""
4239
    # Also on http://unearthedcomics.com
4240
    # Also on http://unearthedcomics.tumblr.com
4241
    name = 'unearthed-tapa'
4242
    long_name = 'Unearthed Comics (from Tapastic)'
4243
    url = 'http://tapastic.com/series/UnearthedComics'
4244
4245
4246
class EverythingsStupidTapastic(GenericTapasticComic):
4247
    """Class to retrieve Everything's stupid Comics."""
4248
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
4249
    # Also on http://everythingsstupid.net
4250
    name = 'stupid-tapa'
4251
    long_name = "Everything's Stupid (from Tapastic)"
4252
    url = 'http://tapastic.com/series/EverythingsStupid'
4253
4254
4255
class JustSayEhTapastic(GenericTapasticComic):
4256
    """Class to retrieve Just Say Eh comics."""
4257
    # Also on http://www.justsayeh.com
4258
    name = 'justsayeh-tapa'
4259
    long_name = 'Just Say Eh (from Tapastic)'
4260
    url = 'http://tapastic.com/series/Just-Say-Eh'
4261
4262
4263
class ThorsThundershackTapastic(GenericTapasticComic):
4264
    """Class to retrieve Thor's Thundershack comics."""
4265
    # Also on http://www.thorsthundershack.com
4266
    name = 'thor-tapa'
4267
    long_name = 'Thor\'s Thundershack (from Tapastic)'
4268
    url = 'http://tapastic.com/series/Thors-Thundershac'
4269
4270
4271
class OwlTurdTapastic(GenericTapasticComic):
4272
    """Class to retrieve Owl Turd comics."""
4273
    # Also on http://owlturd.com
4274
    name = 'owlturd-tapa'
4275
    long_name = 'Owl Turd (from Tapastic)'
4276
    url = 'http://tapastic.com/series/Owl-Turd-Comix'
4277
4278
4279
class GoneIntoRaptureTapastic(GenericTapasticComic):
4280
    """Class to retrieve Gone Into Rapture comics."""
4281
    # Also on http://goneintorapture.tumblr.com
4282
    # Also on http://www.goneintorapture.com
4283
    name = 'rapture-tapa'
4284
    long_name = 'Gone Into Rapture (from Tapastic)'
4285
    url = 'http://tapastic.com/series/Goneintorapture'
4286
4287
4288
class HeckIfIKnowComicsTapa(GenericTapasticComic):
4289
    """Class to retrieve Heck If I Know Comics."""
4290
    # Also on http://heckifiknowcomics.com
4291
    name = 'heck-tapa'
4292
    long_name = 'Heck if I Know comics (from Tapastic)'
4293
    url = 'http://tapastic.com/series/Regular'
4294
4295
4296
class CheerUpEmoKidTapa(GenericTapasticComic):
4297
    """Class to retrieve CheerUpEmoKid comics."""
4298
    # Also on http://www.cheerupemokid.com
4299
    # Also on http://enzocomics.tumblr.com
4300
    name = 'cuek-tapa'
4301
    long_name = 'Cheer Up Emo Kid (from Tapastic)'
4302
    url = 'http://tapastic.com/series/CUEK'
4303
4304
4305
class BigFootJusticeTapa(GenericTapasticComic):
4306
    """Class to retrieve Big Foot Justice comics."""
4307
    # Also on http://bigfootjustice.com
4308
    name = 'bigfoot-tapa'
4309
    long_name = 'Big Foot Justice (from Tapastic)'
4310
    url = 'http://tapastic.com/series/bigfoot-justice'
4311
4312
4313
class UpAndOutTapa(GenericTapasticComic):
4314
    """Class to retrieve Up & Out comics."""
4315
    # Also on http://upandoutcomic.tumblr.com
4316
    name = 'upandout-tapa'
4317
    long_name = 'Up And Out (from Tapastic)'
4318
    url = 'http://tapastic.com/series/UP-and-OUT'
4319
4320
4321
class ToonHoleTapa(GenericTapasticComic):
4322
    """Class to retrieve Toon Holes comics."""
4323
    # Also on http://www.toonhole.com
4324
    name = 'toonhole-tapa'
4325
    long_name = 'Toon Hole (from Tapastic)'
4326
    url = 'http://tapastic.com/series/TOONHOLE'
4327
4328
4329
class AngryAtNothingTapa(GenericTapasticComic):
4330
    """Class to retrieve Angry at Nothing comics."""
4331
    # Also on http://www.angryatnothing.net
4332
    name = 'angry-tapa'
4333
    long_name = 'Angry At Nothing (from Tapastic)'
4334
    url = 'http://tapastic.com/series/Comics-yeah-definitely-comics-'
4335
4336
4337
class LeleozTapa(GenericTapasticComic):
4338
    """Class to retrieve Leleoz comics."""
4339
    # Also on http://leleozcomics.tumblr.com
4340
    name = 'leleoz-tapa'
4341
    long_name = 'Leleoz (from Tapastic)'
4342
    url = 'https://tapastic.com/series/Leleoz'
4343
4344
4345
class TheAwkwardYetiTapa(GenericTapasticComic):
4346
    """Class to retrieve The Awkward Yeti comics."""
4347
    # Also on http://www.gocomics.com/the-awkward-yeti
4348
    # Also on http://theawkwardyeti.com
4349
    # Also on http://larstheyeti.tumblr.com
4350
    name = 'yeti-tapa'
4351
    long_name = 'The Awkward Yeti (from Tapastic)'
4352
    url = 'https://tapastic.com/series/TheAwkwardYeti'
4353
4354
4355
class AsPerUsualTapa(GenericTapasticComic):
4356
    """Class to retrieve As Per Usual comics."""
4357
    # Also on http://as-per-usual.tumblr.com
4358
    name = 'usual-tapa'
4359
    long_name = 'As Per Usual (from Tapastic)'
4360
    url = 'https://tapastic.com/series/AsPerUsual'
4361
4362
4363
class OneOneOneOneComicTapa(GenericTapasticComic):
4364
    """Class to retrieve 1111 Comics."""
4365
    # Also on http://www.1111comics.me
4366
    # Also on http://comics1111.tumblr.com
4367
    name = '1111-tapa'
4368
    long_name = '1111 Comics (from Tapastic)'
4369
    url = 'https://tapastic.com/series/1111-Comics'
4370
4371
4372
class TumbleDryTapa(GenericTapasticComic):
4373
    """Class to retrieve Tumble Dry comics."""
4374
    # Also on http://tumbledrycomics.com
4375
    name = 'tumbledry-tapa'
4376
    long_name = 'Tumblr Dry (from Tapastic)'
4377
    url = 'https://tapastic.com/series/TumbleDryComics'
4378
4379
4380
class DeadlyPanelTapa(GenericTapasticComic):
4381
    """Class to retrieve Deadly Panel comics."""
4382
    # Also on http://www.deadlypanel.com
4383
    name = 'deadly-tapa'
4384
    long_name = 'Deadly Panel (from Tapastic)'
4385
    url = 'https://tapastic.com/series/deadlypanel'
4386
4387
4388
class ChrisHallbeckMaxiTapa(GenericTapasticComic):
4389
    """Class to retrieve Chris Hallbeck comics."""
4390
    # Also on http://chrishallbeck.tumblr.com
4391
    # Also on http://maximumble.com
4392
    name = 'hallbeckmaxi-tapa'
4393
    long_name = 'Chris Hallback - Maximumble (from Tapastic)'
4394
    url = 'https://tapastic.com/series/Maximumble'
4395
4396
4397
class ChrisHallbeckMiniTapa(GenericTapasticComic):
4398
    """Class to retrieve Chris Hallbeck comics."""
4399
    # Also on http://chrishallbeck.tumblr.com
4400
    # Also on http://minimumble.com
4401
    name = 'hallbeckmini-tapa'
4402
    long_name = 'Chris Hallback - Minimumble (from Tapastic)'
4403
    url = 'https://tapastic.com/series/Minimumble'
4404
4405
4406
class ChrisHallbeckBiffTapa(GenericTapasticComic):
4407
    """Class to retrieve Chris Hallbeck comics."""
4408
    # Also on http://chrishallbeck.tumblr.com
4409
    # Also on http://thebookofbiff.com
4410
    name = 'hallbeckbiff-tapa'
4411
    long_name = 'Chris Hallback - The Book of Biff (from Tapastic)'
4412
    url = 'https://tapastic.com/series/Biff'
4413
4414
4415
class RandoWisTapa(GenericTapasticComic):
4416
    """Class to retrieve RandoWis comics."""
4417
    # Also on https://randowis.com
4418
    name = 'randowis-tapa'
4419
    long_name = 'RandoWis (from Tapastic)'
4420
    url = 'https://tapastic.com/series/RandoWis'
4421
4422
4423
class PigeonGazetteTapa(GenericTapasticComic):
4424
    """Class to retrieve The Pigeon Gazette comics."""
4425
    # Also on http://thepigeongazette.tumblr.com
4426
    name = 'pigeon-tapa'
4427
    long_name = 'The Pigeon Gazette (from Tapastic)'
4428
    url = 'https://tapastic.com/series/The-Pigeon-Gazette'
4429
4430
4431
class TheOdd1sOutTapa(GenericTapasticComic):
4432
    """Class to retrieve The Odd 1s Out comics."""
4433
    # Also on http://theodd1sout.com
4434
    # Also on http://theodd1sout.tumblr.com
4435
    name = 'theodd-tapa'
4436
    long_name = 'The Odd 1s Out (from Tapastic)'
4437
    url = 'https://tapastic.com/series/Theodd1sout'
4438
4439
4440
class TheWorldIsFlatTapa(GenericTapasticComic):
4441
    """Class to retrieve The World Is Flat Comics."""
4442
    # Also on http://theworldisflatcomics.tumblr.com
4443
    name = 'flatworld-tapa'
4444
    long_name = 'The World Is Flat (from Tapastic)'
4445
    url = 'https://tapastic.com/series/The-World-is-Flat'
4446
4447
4448
class MisterAndMeTapa(GenericTapasticComic):
4449
    """Class to retrieve Mister & Me Comics."""
4450
    # Also on http://www.mister-and-me.com
4451
    # Also on http://www.gocomics.com/mister-and-me
4452
    name = 'mister-tapa'
4453
    long_name = 'Mister & Me (from Tapastic)'
4454
    url = 'https://tapastic.com/series/Mister-and-Me'
4455
4456
4457
class TalesOfAbsurdityTapa(GenericTapasticComic):
4458
    """Class to retrieve Tales Of Absurdity comics."""
4459
    # Also on http://talesofabsurdity.com
4460
    # Also on http://talesofabsurdity.tumblr.com
4461
    name = 'absurdity-tapa'
4462
    long_name = 'Tales of Absurdity (from Tapastic)'
4463
    url = 'http://tapastic.com/series/Tales-Of-Absurdity'
4464
4465
4466
class BFGFSTapa(GenericTapasticComic):
4467
    """Class to retrieve BFGFS comics."""
4468
    # Also on http://bfgfs.com
4469
    # Also on http://bfgfs.tumblr.com
4470
    name = 'bfgfs-tapa'
4471
    long_name = 'BFGFS (from Tapastic)'
4472
    url = 'https://tapastic.com/series/BFGFS'
4473
4474
4475
class DoodleForFoodTapa(GenericTapasticComic):
4476
    """Class to retrieve Doodle For Food comics."""
4477
    # Also on http://doodleforfood.com
4478
    name = 'doodle-tapa'
4479
    long_name = 'Doodle For Food (from Tapastic)'
4480
    url = 'https://tapastic.com/series/Doodle-for-Food'
4481
4482
4483
class MrLovensteinTapa(GenericTapasticComic):
4484
    """Class to retrieve Mr Lovenstein comics."""
4485
    # Also on  https://tapastic.com/series/MrLovenstein
4486
    name = 'mrlovenstein-tapa'
4487
    long_name = 'Mr. Lovenstein (from Tapastic)'
4488
    url = 'https://tapastic.com/series/MrLovenstein'
4489
4490
4491
class CassandraCalinTapa(GenericTapasticComic):
4492
    """Class to retrieve C. Cassandra comics."""
4493
    # Also on http://cassandracalin.com
4494
    # Also on http://c-cassandra.tumblr.com
4495
    name = 'cassandra-tapa'
4496
    long_name = 'Cassandra Calin (from Tapastic)'
4497
    url = 'https://tapastic.com/series/C-Cassandra-comics'
4498
4499
4500
class WafflesAndPancakes(GenericTapasticComic):
4501
    """Class to retrieve Waffles And Pancakes comics."""
4502
    # Also on http://wandpcomic.com
4503
    name = 'waffles'
4504
    long_name = 'Waffles And Pancakes'
4505
    url = 'https://tapastic.com/series/Waffles-and-Pancakes'
4506
4507
4508
class OurSuperAdventureTapastic(GenericTapasticComic):
4509
    """Class to retrieve Our Super Adventure comics."""
4510
    # Also on http://www.oursuperadventure.com
4511
    # http://sarahssketchbook.tumblr.com
4512
    # http://sarahgraley.com
4513
    name = 'superadventure-tapastic'
4514
    long_name = 'Our Super Adventure (from Tapastic)'
4515
    url = 'https://tapastic.com/series/Our-Super-Adventure'
4516
4517
4518
def get_subclasses(klass):
4519
    """Gets the list of direct/indirect subclasses of a class"""
4520
    subclasses = klass.__subclasses__()
4521
    for derived in list(subclasses):
4522
        subclasses.extend(get_subclasses(derived))
4523
    return subclasses
4524
4525
4526
def remove_st_nd_rd_th_from_date(string):
4527
    """Function to transform 1st/2nd/3rd/4th in a parsable date format."""
4528
    # Hackish way to convert string with numeral "1st"/"2nd"/etc to date
4529
    return (string.replace('st', '')
4530
            .replace('nd', '')
4531
            .replace('rd', '')
4532
            .replace('th', '')
4533
            .replace('Augu', 'August'))
4534
4535
4536
def string_to_date(string, date_format, local=DEFAULT_LOCAL):
4537
    """Function to convert string to date object.
4538
    Wrapper around datetime.datetime.strptime."""
4539
    # format described in https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior
4540
    prev_locale = locale.setlocale(locale.LC_ALL)
4541
    if local != prev_locale:
4542
        locale.setlocale(locale.LC_ALL, local)
4543
    ret = datetime.datetime.strptime(string, date_format).date()
4544
    if local != prev_locale:
4545
        locale.setlocale(locale.LC_ALL, prev_locale)
4546
    return ret
4547
4548
4549
COMICS = set(get_subclasses(GenericComic))
4550
VALID_COMICS = [c for c in COMICS if c.name is not None]
4551
COMIC_NAMES = {c.name: c for c in VALID_COMICS}
4552
assert len(VALID_COMICS) == len(COMIC_NAMES)
4553
CLASS_NAMES = {c.__name__ for c in VALID_COMICS}
4554
assert len(VALID_COMICS) == len(CLASS_NAMES)
4555