Completed
Push — master ( 0de88c...4d1c41 )
by De
01:30
created

comics.py (34 issues)

Code
1
#! /usr/bin/python3
2
# vim: set expandtab tabstop=4 shiftwidth=4 :
3
"""Module to retrieve webcomics"""
4
5
from comic_abstract import GenericComic, get_date_for_comic
6
import re
7
from datetime import date, timedelta
8
import datetime
9
from urlfunctions import get_soup_at_url, urljoin_wrapper,\
10
    convert_iri_to_plain_ascii_uri, load_json_at_url, urlopen_wrapper
11
import json
12
import locale
13
import urllib
14
15
DEFAULT_LOCAL = 'en_GB.UTF-8'
16
17
18
class Xkcd(GenericComic):
19
    """Class to retrieve Xkcd comics."""
20
    name = 'xkcd'
21
    long_name = 'xkcd'
22
    url = 'http://xkcd.com'
23
24
    @classmethod
25
    def get_next_comic(cls, last_comic):
26
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
27
        first_num = last_comic['num'] if last_comic else 0
28
        last_num = load_json_at_url(
29
            urljoin_wrapper(cls.url, 'info.0.json'))['num']
30
31
        for num in range(first_num + 1, last_num + 1):
32
            if num != 404:
33
                json_url = urljoin_wrapper(cls.url, '%d/info.0.json' % num)
34
                comic = load_json_at_url(json_url)
35
                comic['img'] = [comic['img']]
36
                comic['prefix'] = '%d-' % num
37
                comic['json_url'] = json_url
38
                comic['url'] = urljoin_wrapper(cls.url, str(num))
39
                comic['day'] = int(comic['day'])
40
                comic['month'] = int(comic['month'])
41
                comic['year'] = int(comic['year'])
42
                assert comic['num'] == num
43
                yield comic
44
45
46
# Helper functions corresponding to get_url_from_link/get_url_from_archive_element
47
48
49
@classmethod
50
def get_href(cls, link):
51
    """Implementation of get_url_from_link/get_url_from_archive_element."""
52
    return link['href']
53
54
55
@classmethod
56
def join_cls_url_to_href(cls, link):
57
    """Implementation of get_url_from_link/get_url_from_archive_element."""
58
    return urljoin_wrapper(cls.url, link['href'])
59
60
61
class GenericNavigableComic(GenericComic):
62
    """Generic class for "navigable" comics : with first/next arrows.
63
64
    This class applies to comic where previous and next comics can be
65
    accessed from a given comic. Once given a starting point (either
66
    the first comic or the last comic retrieved), it will handle the
67
    navigation, the retrieval of the soup object and the setting of
68
    the 'url' attribute on retrieved comics. This limits a lot the
69
    amount of boilerplate code in the different implementation classes.
70
71
    The method `get_next_comic` methods is implemented in terms of new
72
    more specialized methods to be implemented/overridden:
73
        - get_first_comic_link
74
        - get_navi_link
75
        - get_comic_info
76
        - get_url_from_link
77
    """
78
79
    @classmethod
80
    def get_first_comic_link(cls):
81
        """Get link to first comics.
82
83
        Sometimes this can be retrieved of any comic page, sometimes on
84
        the archive page, sometimes it doesn't exist at all and one has
85
        to iterate backward to find it before hardcoding the result found.
86
        """
87
        raise NotImplementedError
88
89
    @classmethod
90
    def get_navi_link(cls, last_soup, next_):
91
        """Get link to next (or previous - for dev purposes) comic."""
92
        raise NotImplementedError
93
94
    @classmethod
95
    def get_comic_info(cls, soup, link):
96
        """Get information about a particular comics."""
97
        raise NotImplementedError
98
99
    @classmethod
100
    def get_url_from_link(cls, link):
101
        """Get url corresponding to a link. Default implementation is similar to get_href."""
102
        return link['href']
103
104
    @classmethod
105
    def get_next_link(cls, last_soup):
106
        """Get link to next comic."""
107
        return cls.get_navi_link(last_soup, True)
108
109
    @classmethod
110
    def get_prev_link(cls, last_soup):
111
        """Get link to previous comic."""
112
        return cls.get_navi_link(last_soup, False)
113
114
    @classmethod
115
    def get_next_comic(cls, last_comic):
116
        """Generic implementation of get_next_comic for navigable comics."""
117
        url = last_comic['url'] if last_comic else None
118
        next_comic = \
119
            cls.get_next_link(get_soup_at_url(url)) \
120
            if url else \
121
            cls.get_first_comic_link()
122
        cls.log("next/first comic will be %s (url is %s)" % (str(next_comic), url))
123
        while next_comic:
124
            prev_url, url = url, cls.get_url_from_link(next_comic)
125
            if prev_url == url:
126
                cls.log("got same url %s" % url)
127
                break
128
            cls.log("about to get %s (%s)" % (url, str(next_comic)))
129
            soup = get_soup_at_url(url)
130
            comic = cls.get_comic_info(soup, next_comic)
131
            if comic is not None:
132
                assert 'url' not in comic
133
                comic['url'] = url
134
                yield comic
135
            next_comic = cls.get_next_link(soup)
136
            cls.log("next comic will be %s" % str(next_comic))
137
138
    @classmethod
139
    def check_first_link(cls):
140
        """Check that navigation to first comic seems to be working - for dev purposes."""
141
        cls.log("about to check first link")
142
        ok = True
143
        firstlink = cls.get_first_comic_link()
144
        if firstlink is None:
145
            print("From %s : no first link" % cls.url)
146
            ok = False
147
        else:
148
            firsturl = cls.get_url_from_link(firstlink)
149
            try:
150
                get_soup_at_url(firsturl)
151
            except urllib.error.HTTPError:
152
                print("From %s : invalid first url" % cls.url)
153
                ok = False
154
        cls.log("checked first link -> returned %d" % ok)
155
        return ok
156
157
    @classmethod
158
    def check_prev_next_links(cls, url):
159
        """Check that navigation to prev/next from a given URL seems to be working - for dev purposes."""
160
        cls.log("about to check prev/next from %s" % url)
161
        ok = True
162
        if url is None:
163
            prevlink, nextlink = None, None
164
        else:
165
            soup = get_soup_at_url(url)
166
            prevlink, nextlink = cls.get_prev_link(soup), cls.get_next_link(soup)
167
        if prevlink is None and nextlink is None:
168
            print("From %s : no previous nor next" % url)
169
            ok = False
170
        else:
171
            if prevlink:
172
                prevurl = cls.get_url_from_link(prevlink)
173
                prevsoup = get_soup_at_url(prevurl)
174
                prevnext = cls.get_url_from_link(cls.get_next_link(prevsoup))
175
                if prevnext != url:
176
                    print("From %s, going backward then forward leads to %s" % (url, prevnext))
177
                    ok = False
178
            if nextlink:
179
                nexturl = cls.get_url_from_link(nextlink)
180
                if nexturl != url:
181
                    nextsoup = get_soup_at_url(nexturl)
182
                    nextprev = cls.get_url_from_link(cls.get_prev_link(nextsoup))
183
                    if nextprev != url:
184
                        print("From %s, going forward then backward leads to %s" % (url, nextprev))
185
                        ok = False
186
        cls.log("checked prev/next from %s -> returned %d" % (url, ok))
187
        return ok
188
189
    @classmethod
190
    def check_navigation(cls, url):
191
        """Check that navigation functions seem to be working - for dev purposes."""
192
        cls.log("about to check navigation from %s" % url)
193
        first = cls.check_first_link()
194
        prevnext = cls.check_prev_next_links(url)
195
        ok = first and prevnext
196
        cls.log("checked navigation from %s -> returned %d" % (url, ok))
197
        return ok
198
199
200
class GenericListableComic(GenericComic):
201
    """Generic class for "listable" comics : with a list of comics (aka 'archive')
202
203
    The method `get_next_comic` methods is implemented in terms of new
204
    more specialized methods to be implemented/overridden:
205
        - get_archive_elements
206
        - get_url_from_archive_element
207
        - get_comic_info
208
    """
209
210
    @classmethod
211
    def get_archive_elements(cls):
212
        """Get the archive elements (iterable)."""
213
        raise NotImplementedError
214
215
    @classmethod
216
    def get_url_from_archive_element(cls, archive_elt):
217
        """Get url corresponding to an archive element."""
218
        raise NotImplementedError
219
220
    @classmethod
221
    def get_comic_info(cls, soup, archive_elt):
222
        """Get information about a particular comics."""
223
        raise NotImplementedError
224
225
    @classmethod
226
    def get_next_comic(cls, last_comic):
227
        """Generic implementation of get_next_comic for listable comics."""
228
        waiting_for_url = last_comic['url'] if last_comic else None
229
        for archive_elt in cls.get_archive_elements():
230
            url = cls.get_url_from_archive_element(archive_elt)
231
            cls.log("considering %s" % url)
232
            if waiting_for_url and waiting_for_url == url:
233
                waiting_for_url = None
234
            elif waiting_for_url is None:
235
                cls.log("about to get %s (%s)" % (url, str(archive_elt)))
236
                soup = get_soup_at_url(url)
237
                comic = cls.get_comic_info(soup, archive_elt)
238
                if comic is not None:
239
                    assert 'url' not in comic
240
                    comic['url'] = url
241
                    yield comic
242
        if waiting_for_url is not None:
243
            print("Did not find %s : there might be a problem" % waiting_for_url)
244
245
# Helper functions corresponding to get_first_comic_link/get_navi_link
246
247
248
@classmethod
249
def get_link_rel_next(cls, last_soup, next_):
250
    """Implementation of get_navi_link."""
251
    return last_soup.find('link', rel='next' if next_ else 'prev')
252
253
254
@classmethod
255
def get_a_rel_next(cls, last_soup, next_):
256
    """Implementation of get_navi_link."""
257
    return last_soup.find('a', rel='next' if next_ else 'prev')
258
259
260
@classmethod
261
def get_a_navi_navinext(cls, last_soup, next_):
262
    """Implementation of get_navi_link."""
263
    return last_soup.find('a', class_='navi navi-next' if next_ else 'navi navi-prev')
264
265
266
@classmethod
267
def get_a_navi_comicnavnext_navinext(cls, last_soup, next_):
268
    """Implementation of get_navi_link."""
269
    return last_soup.find('a', class_='navi comic-nav-next navi-next' if next_ else 'navi comic-nav-previous navi-prev')
270
271
272
@classmethod
273
def get_a_comicnavbase_comicnavnext(cls, last_soup, next_):
274
    """Implementation of get_navi_link."""
275
    return last_soup.find('a', class_='comic-nav-base comic-nav-next' if next_ else 'comic-nav-base comic-nav-previous')
276
277
278
@classmethod
279
def get_a_navi_navifirst(cls):
280
    """Implementation of get_first_comic_link."""
281
    return get_soup_at_url(cls.url).find('a', class_='navi navi-first')
282
283
284
@classmethod
285
def get_div_navfirst_a(cls):
286
    """Implementation of get_first_comic_link."""
287
    return get_soup_at_url(cls.url).find('div', class_="nav-first").find('a')
288
289
290
@classmethod
291
def get_a_comicnavbase_comicnavfirst(cls):
292
    """Implementation of get_first_comic_link."""
293
    return get_soup_at_url(cls.url).find('a', class_='comic-nav-base comic-nav-first')
294
295
296
@classmethod
297
def simulate_first_link(cls):
298
    """Implementation of get_first_comic_link creating a link-like object from
299
    an URL provided by the class."""
300
    return {'href': cls.first_url}
301
302
303
@classmethod
304
def navigate_to_first_comic(cls):
305
    """Implementation of get_first_comic_link navigating from a user provided
306
    URL to the first comic.
307
308
    Sometimes, the first comic cannot be reached directly so to start
309
    from the first comic one has to go to the previous comic until
310
    there is no previous comics. Once this URL is reached, it
311
    is better to hardcode it but for development purposes, it
312
    is convenient to have an automatic way to find it.
313
    """
314
    url = input("Get starting URL: ")
315
    print(url)
316
    comic = cls.get_prev_link(get_soup_at_url(url))
317
    while comic:
318
        url = cls.get_url_from_link(comic)
319
        print(url)
320
        comic = cls.get_prev_link(get_soup_at_url(url))
321
    return {'href': url}
322
323
324
class GenericEmptyComic(GenericComic):
325
    """Generic class for comics where nothing is to be done.
326
327
    It can be useful to deactivate temporarily comics that do not work
328
    properly by replacing `def MyComic(GenericWhateverComic)` with
329
    `def MyComic(GenericEmptyComic, GenericWhateverComic)`."""
330
331
    @classmethod
332
    def get_next_comic(cls, last_comic):
333
        """Implementation of get_next_comic returning no comics."""
334
        cls.log("comic is considered as empty - returning no comic")
335
        return []
336
337
338 View Code Duplication
class ExtraFabulousComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
339
    """Class to retrieve Extra Fabulous Comics."""
340
    name = 'efc'
341
    long_name = 'Extra Fabulous Comics'
342
    url = 'http://extrafabulouscomics.com'
343
    get_first_comic_link = get_a_navi_navifirst
344
    get_navi_link = get_link_rel_next
345
346
    @classmethod
347
    def get_comic_info(cls, soup, link):
348
        """Get information about a particular comics."""
349
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
350
        imgs = soup.find_all('img', src=img_src_re)
351
        title = soup.find('meta', property='og:title')['content']
352
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
353
        day = string_to_date(date_str, "%Y-%m-%d")
354
        return {
355
            'title': title,
356
            'img': [i['src'] for i in imgs],
357
            'month': day.month,
358
            'year': day.year,
359
            'day': day.day,
360
            'prefix': title + '-'
361
        }
362
363
364 View Code Duplication
class GenericLeMondeBlog(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
365
    """Generic class to retrieve comics from Le Monde blogs."""
366
    get_navi_link = get_link_rel_next
367
    get_first_comic_link = simulate_first_link
368
    first_url = NotImplemented
369
370
    @classmethod
371
    def get_comic_info(cls, soup, link):
372
        """Get information about a particular comics."""
373
        url2 = soup.find('link', rel='shortlink')['href']
374
        title = soup.find('meta', property='og:title')['content']
375
        date_str = soup.find("span", class_="entry-date").string
376
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
377
        imgs = soup.find_all('meta', property='og:image')
378
        return {
379
            'title': title,
380
            'url2': url2,
381
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
382
            'month': day.month,
383
            'year': day.year,
384
            'day': day.day,
385
        }
386
387
388
class ZepWorld(GenericLeMondeBlog):
389
    """Class to retrieve Zep World comics."""
390
    name = "zep"
391
    long_name = "Zep World"
392
    url = "http://zepworld.blog.lemonde.fr"
393
    first_url = "http://zepworld.blog.lemonde.fr/2014/10/31/bientot-le-blog-de-zep/"
394
395
396
class Vidberg(GenericLeMondeBlog):
397
    """Class to retrieve Vidberg comics."""
398
    name = 'vidberg'
399
    long_name = "Vidberg - l'actu en patates"
400
    url = "http://vidberg.blog.lemonde.fr"
401
    # Not the first but I didn't find an efficient way to retrieve it
402
    first_url = "http://vidberg.blog.lemonde.fr/2012/02/09/revue-de-campagne-la-campagne-du-modem-semballe/"
403
404
405
class Plantu(GenericLeMondeBlog):
406
    """Class to retrieve Plantu comics."""
407
    name = 'plantu'
408
    long_name = "Plantu"
409
    url = "http://plantu.blog.lemonde.fr"
410
    first_url = "http://plantu.blog.lemonde.fr/2014/10/28/stress-test-a-bruxelles/"
411
412
413
class XavierGorce(GenericLeMondeBlog):
414
    """Class to retrieve Xavier Gorce comics."""
415
    name = 'gorce'
416
    long_name = "Xavier Gorce"
417
    url = "http://xaviergorce.blog.lemonde.fr"
418
    first_url = "http://xaviergorce.blog.lemonde.fr/2015/01/09/distinction/"
419
420
421
class CartooningForPeace(GenericLeMondeBlog):
422
    """Class to retrieve Cartooning For Peace comics."""
423
    name = 'forpeace'
424
    long_name = "Cartooning For Peace"
425
    url = "http://cartooningforpeace.blog.lemonde.fr"
426
    first_url = "http://cartooningforpeace.blog.lemonde.fr/2014/12/15/bado/"
427
428
429
class Aurel(GenericLeMondeBlog):
430
    """Class to retrieve Aurel comics."""
431
    name = 'aurel'
432
    long_name = "Aurel"
433
    url = "http://aurel.blog.lemonde.fr"
434
    first_url = "http://aurel.blog.lemonde.fr/2014/09/29/le-senat-repasse-a-droite/"
435
436
437
class LesCulottees(GenericLeMondeBlog):
438
    """Class to retrieve Les Culottees comics."""
439
    name = 'culottees'
440
    long_name = 'Les Culottees'
441
    url = "http://lesculottees.blog.lemonde.fr"
442
    first_url = "http://lesculottees.blog.lemonde.fr/2016/01/11/clementine-delait-femme-a-barbe/"
443
444
445
class UneAnneeAuLycee(GenericLeMondeBlog):
446
    """Class to retrieve Une Annee Au Lycee comics."""
447
    name = 'lycee'
448
    long_name = 'Une Annee au Lycee'
449
    url = 'http://uneanneeaulycee.blog.lemonde.fr'
450
    first_url = "http://uneanneeaulycee.blog.lemonde.fr/2016/06/13/la-semaine-du-bac-est-arrivee/"
451
452
453 View Code Duplication
class Rall(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
454
    """Class to retrieve Ted Rall comics."""
455
    # Also on http://www.gocomics.com/tedrall
456
    name = 'rall'
457
    long_name = "Ted Rall"
458
    url = "http://rall.com/comic"
459
    get_navi_link = get_link_rel_next
460
    get_first_comic_link = simulate_first_link
461
    # Not the first but I didn't find an efficient way to retrieve it
462
    first_url = "http://rall.com/2014/01/30/los-angeles-times-cartoon-well-miss-those-california-flowers"
463
464
    @classmethod
465
    def get_comic_info(cls, soup, link):
466
        """Get information about a particular comics."""
467
        title = soup.find('meta', property='og:title')['content']
468
        author = soup.find("span", class_="author vcard").find("a").string
469
        date_str = soup.find("span", class_="entry-date").string
470
        day = string_to_date(date_str, "%B %d, %Y")
471
        desc = soup.find('meta', property='og:description')['content']
472
        imgs = soup.find('div', class_='entry-content').find_all('img')
473
        imgs = imgs[:-7]  # remove social media buttons
474
        return {
475
            'title': title,
476
            'author': author,
477
            'month': day.month,
478
            'year': day.year,
479
            'day': day.day,
480
            'description': desc,
481
            'img': [i['src'] for i in imgs],
482
        }
483
484
485
class Dilem(GenericNavigableComic):
486
    """Class to retrieve Ali Dilem comics."""
487
    name = 'dilem'
488
    long_name = 'Ali Dilem'
489
    url = 'http://information.tv5monde.com/dilem'
490
    get_url_from_link = join_cls_url_to_href
491
    get_first_comic_link = simulate_first_link
492
    first_url = "http://information.tv5monde.com/dilem/2004-06-26"
493
494
    @classmethod
495
    def get_navi_link(cls, last_soup, next_):
496
        """Get link to next or previous comic."""
497
        # prev is next / next is prev
498
        li = last_soup.find('li', class_='prev' if next_ else 'next')
499
        return li.find('a') if li else None
500
501
    @classmethod
502
    def get_comic_info(cls, soup, link):
503
        """Get information about a particular comics."""
504
        short_url = soup.find('link', rel='shortlink')['href']
505
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
506
        imgs = soup.find_all('meta', property='og:image')
507
        date_str = soup.find('span', property='dc:date')['content']
508
        date_str = date_str[:10]
509
        day = string_to_date(date_str, "%Y-%m-%d")
510
        return {
511
            'short_url': short_url,
512
            'title': title,
513
            'img': [i['content'] for i in imgs],
514
            'day': day.day,
515
            'month': day.month,
516
            'year': day.year,
517
        }
518
519
520
class SpaceAvalanche(GenericNavigableComic):
521
    """Class to retrieve Space Avalanche comics."""
522
    name = 'avalanche'
523
    long_name = 'Space Avalanche'
524
    url = 'http://www.spaceavalanche.com'
525
    get_navi_link = get_link_rel_next
526
527
    @classmethod
528
    def get_first_comic_link(cls):
529
        """Get link to first comics."""
530
        return {'href': "http://www.spaceavalanche.com/2009/02/02/irish-sea/", 'title': "Irish Sea"}
531
532
    @classmethod
533
    def get_comic_info(cls, soup, link):
534
        """Get information about a particular comics."""
535
        url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
536
        title = link['title']
537
        url = cls.get_url_from_link(link)
538
        year, month, day = [int(s)
539
                            for s in url_date_re.match(url).groups()]
540
        imgs = soup.find("div", class_="entry").find_all("img")
541
        return {
542
            'title': title,
543 View Code Duplication
            'day': day,
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
544
            'month': month,
545
            'year': year,
546
            'img': [i['src'] for i in imgs],
547
        }
548
549
550
class ZenPencils(GenericNavigableComic):
551
    """Class to retrieve ZenPencils comics."""
552
    # Also on http://zenpencils.tumblr.com
553
    # Also on http://www.gocomics.com/zen-pencils
554
    name = 'zenpencils'
555
    long_name = 'Zen Pencils'
556
    url = 'http://zenpencils.com'
557
    get_navi_link = get_link_rel_next
558
    get_first_comic_link = simulate_first_link
559
    first_url = "http://zenpencils.com/comic/1-ralph-waldo-emerson-make-them-cry/"
560
561
    @classmethod
562
    def get_comic_info(cls, soup, link):
563
        """Get information about a particular comics."""
564
        imgs = soup.find('div', id='comic').find_all('img')
565
        post = soup.find('div', class_='post-content')
566
        author = post.find("span", class_="post-author").find("a").string
567
        title = post.find('h2', class_='post-title').string
568
        date_str = post.find('span', class_='post-date').string
569
        day = string_to_date(date_str, "%B %d, %Y")
570
        assert imgs
571
        assert all(i['alt'] == i['title'] for i in imgs)
572
        assert all(i['alt'] in (title, "") for i in imgs)
573
        desc = soup.find('meta', property='og:description')['content']
574
        return {
575
            'title': title,
576
            'description': desc,
577
            'author': author,
578
            'day': day.day,
579
            'month': day.month,
580
            'year': day.year,
581
            'img': [i['src'] for i in imgs],
582
        }
583
584
585
class ItsTheTie(GenericNavigableComic):
586
    """Class to retrieve It's the tie comics."""
587
    # Also on http://itsthetie.tumblr.com
588
    # Also on https://tapastic.com/series/itsthetie
589
    name = 'tie'
590
    long_name = "It's the tie"
591
    url = "http://itsthetie.com"
592
    get_first_comic_link = get_div_navfirst_a
593
    get_navi_link = get_a_rel_next
594
595
    @classmethod
596
    def get_comic_info(cls, soup, link):
597
        """Get information about a particular comics."""
598
        title = soup.find('h1', class_='comic-title').find('a').string
599
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
600
        day = string_to_date(date_str, "%B %d, %Y")
601
        # Bonus images may or may not be in meta og:image.
602
        imgs = soup.find_all('meta', property='og:image')
603
        imgs_src = [i['content'] for i in imgs]
604
        bonus = soup.find_all('img', attrs={'data-oversrc': True})
605
        bonus_src = [b['data-oversrc'] for b in bonus]
606
        all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src]
607
        all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")]
608
        tag_meta = soup.find('meta', property='article:tag')
609
        tags = tag_meta['content'] if tag_meta else ""
610
        return {
611
            'title': title,
612
            'month': day.month,
613
            'year': day.year,
614
            'day': day.day,
615
            'img': all_imgs_src,
616
            'tags': tags,
617
        }
618
619
620
class PenelopeBagieu(GenericNavigableComic):
621
    """Class to retrieve comics from Penelope Bagieu's blog."""
622
    name = 'bagieu'
623
    long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
624
    url = 'http://www.penelope-jolicoeur.com'
625
    get_navi_link = get_link_rel_next
626
    get_first_comic_link = simulate_first_link
627
    first_url = 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'
628
629
    @classmethod
630
    def get_comic_info(cls, soup, link):
631
        """Get information about a particular comics."""
632
        date_str = soup.find('h2', class_='date-header').string
633
        day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
634
        imgs = soup.find('div', class_='entry-body').find_all('img')
635
        title = soup.find('h3', class_='entry-header').string
636
        return {
637
            'title': title,
638
            'img': [i['src'] for i in imgs],
639
            'month': day.month,
640
            'year': day.year,
641
            'day': day.day,
642
        }
643 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
644
645
class OneOneOneOneComic(GenericNavigableComic):
646
    """Class to retrieve 1111 Comics."""
647
    # Also on http://comics1111.tumblr.com
648
    # Also on https://tapastic.com/series/1111-Comics
649
    name = '1111'
650
    long_name = '1111 Comics'
651
    url = 'http://www.1111comics.me'
652
    get_first_comic_link = get_div_navfirst_a
653
    get_navi_link = get_link_rel_next
654
655
    @classmethod
656
    def get_comic_info(cls, soup, link):
657
        """Get information about a particular comics."""
658
        title = soup.find('h1', class_='comic-title').find('a').string
659
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
660
        day = string_to_date(date_str, "%B %d, %Y")
661
        imgs = soup.find_all('meta', property='og:image')
662
        return {
663
            'title': title,
664
            'month': day.month,
665
            'year': day.year,
666
            'day': day.day,
667
            'img': [i['content'] for i in imgs],
668
        }
669
670
671
class AngryAtNothing(GenericNavigableComic):
672
    """Class to retrieve Angry at Nothing comics."""
673
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
674
    name = 'angry'
675
    long_name = 'Angry At Nothing'
676
    url = 'http://www.angryatnothing.net'
677
    get_first_comic_link = get_div_navfirst_a
678
    get_navi_link = get_a_rel_next
679
680
    @classmethod
681
    def get_comic_info(cls, soup, link):
682
        """Get information about a particular comics."""
683
        title = soup.find('h1', class_='comic-title').find('a').string
684
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
685
        day = string_to_date(date_str, "%B %d, %Y")
686
        imgs = soup.find_all('meta', property='og:image')
687
        return {
688
            'title': title,
689
            'month': day.month,
690
            'year': day.year,
691
            'day': day.day,
692
            'img': [i['content'] for i in imgs],
693
        }
694 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
695
696
class NeDroid(GenericNavigableComic):
697
    """Class to retrieve NeDroid comics."""
698
    name = 'nedroid'
699
    long_name = 'NeDroid'
700
    url = 'http://nedroid.com'
701
    get_first_comic_link = get_div_navfirst_a
702
    get_navi_link = get_link_rel_next
703
    get_url_from_link = join_cls_url_to_href
704
705
    @classmethod
706
    def get_comic_info(cls, soup, link):
707
        """Get information about a particular comics."""
708
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
709
        comic_url_re = re.compile('//nedroid.com/comics/([0-9]*)-([0-9]*)-([0-9]*).*')
710
        short_url = cls.get_url_from_link(soup.find('link', rel='shortlink'))
711
        num = int(short_url_re.match(short_url).groups()[0])
712
        imgs = soup.find('div', id='comic').find_all('img')
713
        year, month, day = [int(s) for s in comic_url_re.match(imgs[0]['src']).groups()]
714
        assert len(imgs) == 1
715
        title = imgs[0]['alt']
716
        title2 = imgs[0]['title']
717
        return {
718
            'short_url': short_url,
719
            'title': title,
720
            'title2': title2,
721
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
722
            'day': day,
723
            'month': month,
724
            'year': year,
725
            'num': num,
726
        }
727
728
729
class Garfield(GenericNavigableComic):
730
    """Class to retrieve Garfield comics."""
731
    # Also on http://www.gocomics.com/garfield
732
    name = 'garfield'
733
    long_name = 'Garfield'
734
    url = 'https://garfield.com'
735
    get_first_comic_link = simulate_first_link
736
    first_url = 'https://garfield.com/comic/1978/06/19'
737
738
    @classmethod
739
    def get_navi_link(cls, last_soup, next_):
740
        """Get link to next or previous comic."""
741
        return last_soup.find('a', class_='comic-arrow-right' if next_ else 'comic-arrow-left')
742
743
    @classmethod
744
    def get_comic_info(cls, soup, link):
745
        """Get information about a particular comics."""
746
        url = cls.get_url_from_link(link)
747
        date_re = re.compile('^%s/comic/([0-9]*)/([0-9]*)/([0-9]*)' % cls.url)
748
        year, month, day = [int(s) for s in date_re.match(url).groups()]
749
        imgs = soup.find('div', class_='comic-display').find_all('img', class_='img-responsive')
750
        return {
751
            'month': month,
752 View Code Duplication
            'year': year,
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
753
            'day': day,
754
            'img': [i['src'] for i in imgs],
755
        }
756
757
758
class Dilbert(GenericNavigableComic):
759
    """Class to retrieve Dilbert comics."""
760
    # Also on http://www.gocomics.com/dilbert-classics
761
    name = 'dilbert'
762
    long_name = 'Dilbert'
763
    url = 'http://dilbert.com'
764
    get_url_from_link = join_cls_url_to_href
765
    get_first_comic_link = simulate_first_link
766
    first_url = 'http://dilbert.com/strip/1989-04-16'
767
768
    @classmethod
769
    def get_navi_link(cls, last_soup, next_):
770
        """Get link to next or previous comic."""
771
        link = last_soup.find('div', class_='nav-comic nav-right' if next_ else 'nav-comic nav-left')
772
        return link.find('a') if link else None
773
774
    @classmethod
775
    def get_comic_info(cls, soup, link):
776
        """Get information about a particular comics."""
777
        title = soup.find('meta', property='og:title')['content']
778
        imgs = soup.find_all('meta', property='og:image')
779
        desc = soup.find('meta', property='og:description')['content']
780
        date_str = soup.find('meta', property='article:publish_date')['content']
781 View Code Duplication
        day = string_to_date(date_str, "%B %d, %Y")
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
782
        author = soup.find('meta', property='article:author')['content']
783
        tags = soup.find('meta', property='article:tag')['content']
784
        return {
785
            'title': title,
786
            'description': desc,
787
            'img': [i['content'] for i in imgs],
788
            'author': author,
789
            'tags': tags,
790
            'day': day.day,
791
            'month': day.month,
792
            'year': day.year
793
        }
794
795
796
class VictimsOfCircumsolar(GenericNavigableComic):
797
    """Class to retrieve VictimsOfCircumsolar comics."""
798
    name = 'circumsolar'
799
    long_name = 'Victims Of Circumsolar'
800
    url = 'http://www.victimsofcircumsolar.com'
801
    get_navi_link = get_a_navi_comicnavnext_navinext
802
    get_first_comic_link = simulate_first_link
803
    first_url = 'http://www.victimsofcircumsolar.com/comic/modern-addiction'
804
805
    @classmethod
806
    def get_comic_info(cls, soup, link):
807
        """Get information about a particular comics."""
808
        # Date is on the archive page
809
        title = soup.find_all('meta', property='og:title')[-1]['content']
810
        desc = soup.find_all('meta', property='og:description')[-1]['content']
811
        imgs = soup.find('div', id='comic').find_all('img')
812
        assert all(i['title'] == i['alt'] == title for i in imgs)
813
        return {
814
            'title': title,
815
            'description': desc,
816
            'img': [i['src'] for i in imgs],
817
        }
818
819
820
class ThreeWordPhrase(GenericNavigableComic):
821
    """Class to retrieve Three Word Phrase comics."""
822
    # Also on http://www.threewordphrase.tumblr.com
823
    name = 'threeword'
824
    long_name = 'Three Word Phrase'
825
    url = 'http://threewordphrase.com'
826
    get_url_from_link = join_cls_url_to_href
827
828
    @classmethod
829
    def get_first_comic_link(cls):
830
        """Get link to first comics."""
831
        return get_soup_at_url(cls.url).find('img', src='/firstlink.gif').parent
832
833
    @classmethod
834
    def get_navi_link(cls, last_soup, next_):
835
        """Get link to next or previous comic."""
836
        link = last_soup.find('img', src='/nextlink.gif' if next_ else '/prevlink.gif').parent
837
        return None if link.get('href') is None else link
838
839
    @classmethod
840
    def get_comic_info(cls, soup, link):
841
        """Get information about a particular comics."""
842
        title = soup.find('title')
843
        imgs = [img for img in soup.find_all('img')
844
                if not img['src'].endswith(
845
                    ('link.gif', '32.png', 'twpbookad.jpg',
846
                     'merchad.jpg', 'header.gif', 'tipjar.jpg'))]
847
        return {
848
            'title': title.string if title else None,
849
            'title2': '  '.join(img.get('alt') for img in imgs if img.get('alt')),
850
            'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs],
851
        }
852
853
854
class DeadlyPanel(GenericEmptyComic, GenericNavigableComic):
855
    """Class to retrieve Deadly Panel comics."""
856
    # Also on https://tapastic.com/series/deadlypanel
857
    name = 'deadly'
858
    long_name = 'Deadly Panel'
859
    url = 'http://www.deadlypanel.com'
860
    get_first_comic_link = get_a_navi_navifirst
861
    get_navi_link = get_a_navi_comicnavnext_navinext
862
863
    @classmethod
864
    def get_comic_info(cls, soup, link):
865
        """Get information about a particular comics."""
866
        imgs = soup.find('div', id='comic').find_all('img')
867
        assert all(i['alt'] == i['title'] for i in imgs)
868
        return {
869
            'img': [i['src'] for i in imgs],
870
        }
871
872
873
class TheGentlemanArmchair(GenericNavigableComic):
874
    """Class to retrieve The Gentleman Armchair comics."""
875
    name = 'gentlemanarmchair'
876
    long_name = 'The Gentleman Armchair'
877
    url = 'http://thegentlemansarmchair.com'
878
    get_first_comic_link = get_a_navi_navifirst
879
    get_navi_link = get_link_rel_next
880
881
    @classmethod
882
    def get_comic_info(cls, soup, link):
883
        """Get information about a particular comics."""
884
        title = soup.find('h2', class_='post-title').string
885
        author = soup.find("span", class_="post-author").find("a").string
886
        date_str = soup.find('span', class_='post-date').string
887
        day = string_to_date(date_str, "%B %d, %Y")
888
        imgs = soup.find('div', id='comic').find_all('img')
889
        return {
890
            'img': [i['src'] for i in imgs],
891
            'title': title,
892
            'author': author,
893
            'month': day.month,
894
            'year': day.year,
895
            'day': day.day,
896
        }
897
898
899 View Code Duplication
class MyExtraLife(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
900
    """Class to retrieve My Extra Life comics."""
901
    name = 'extralife'
902
    long_name = 'My Extra Life'
903
    url = 'http://www.myextralife.com'
904
    get_navi_link = get_link_rel_next
905
906
    @classmethod
907
    def get_first_comic_link(cls):
908
        """Get link to first comics."""
909
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
910
911
    @classmethod
912
    def get_comic_info(cls, soup, link):
913
        """Get information about a particular comics."""
914
        title = soup.find("h1", class_="comic_title").string
915
        date_str = soup.find("span", class_="comic_date").string
916
        day = string_to_date(date_str, "%B %d, %Y")
917
        imgs = soup.find_all("img", class_="comic")
918
        assert all(i['alt'] == i['title'] == title for i in imgs)
919
        return {
920
            'title': title,
921
            'img': [i['src'] for i in imgs if i["src"]],
922
            'day': day.day,
923
            'month': day.month,
924
            'year': day.year
925
        }
926
927
928
class SaturdayMorningBreakfastCereal(GenericNavigableComic):
929
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
930
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
931
    # Also on http://smbc-comics.tumblr.com
932
    name = 'smbc'
933
    long_name = 'Saturday Morning Breakfast Cereal'
934
    url = 'http://www.smbc-comics.com'
935
    get_navi_link = get_a_rel_next
936
937
    @classmethod
938
    def get_first_comic_link(cls):
939
        """Get link to first comics."""
940
        return get_soup_at_url(cls.url).find('a', rel='start')
941
942
    @classmethod
943
    def get_comic_info(cls, soup, link):
944
        """Get information about a particular comics."""
945
        image1 = soup.find('img', id='cc-comic')
946
        image_url1 = image1['src']
947
        aftercomic = soup.find('div', id='aftercomic')
948
        image_url2 = aftercomic.find('img')['src'] if aftercomic else ''
949
        imgs = [image_url1] + ([image_url2] if image_url2 else [])
950
        date_str = soup.find('div', class_='cc-publishtime').contents[0]
951
        day = string_to_date(date_str, "%B %d, %Y")
952
        return {
953
            'title': image1['title'],
954
            'img': [urljoin_wrapper(cls.url, i) for i in imgs],
955
            'day': day.day,
956
            'month': day.month,
957
            'year': day.year
958
        }
959
960
961
class PerryBibleFellowship(GenericListableComic):
962
    """Class to retrieve Perry Bible Fellowship comics."""
963
    name = 'pbf'
964
    long_name = 'Perry Bible Fellowship'
965
    url = 'http://pbfcomics.com'
966
    get_url_from_archive_element = join_cls_url_to_href
967
968
    @classmethod
969
    def get_archive_elements(cls):
970
        comic_link_re = re.compile('^/[0-9]*/$')
971
        return reversed(get_soup_at_url(cls.url).find_all('a', href=comic_link_re))
972
973
    @classmethod
974
    def get_comic_info(cls, soup, link):
975
        """Get information about a particular comics."""
976
        url = cls.get_url_from_archive_element(link)
977
        comic_img_re = re.compile('^/archive_b/PBF.*')
978
        name = link.string
979
        num = int(link['name'])
980
        href = link['href']
981
        assert href == '/%d/' % num
982
        imgs = soup.find_all('img', src=comic_img_re)
983
        assert len(imgs) == 1
984
        assert imgs[0]['alt'] == name
985
        return {
986
            'num': num,
987
            'name': name,
988
            'img': [urljoin_wrapper(url, i['src']) for i in imgs],
989
            'prefix': '%d-' % num,
990
        }
991
992
993 View Code Duplication
class Mercworks(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
994
    """Class to retrieve Mercworks comics."""
995
    # Also on http://mercworks.tumblr.com
996
    name = 'mercworks'
997
    long_name = 'Mercworks'
998
    url = 'http://mercworks.net'
999
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1000
    get_navi_link = get_a_rel_next
1001
1002
    @classmethod
1003
    def get_comic_info(cls, soup, link):
1004
        """Get information about a particular comics."""
1005
        title = soup.find('meta', property='og:title')['content']
1006
        metadesc = soup.find('meta', property='og:description')
1007
        desc = metadesc['content'] if metadesc else ""
1008
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1009
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1010
        date_str = date_str[:10]
1011
        day = string_to_date(date_str, "%Y-%m-%d")
1012
        imgs = soup.find_all('meta', property='og:image')
1013
        return {
1014
            'img': [i['content'] for i in imgs],
1015
            'title': title,
1016
            'author': author,
1017
            'desc': desc,
1018
            'day': day.day,
1019
            'month': day.month,
1020
            'year': day.year
1021
        }
1022
1023
1024
class BerkeleyMews(GenericListableComic):
1025
    """Class to retrieve Berkeley Mews comics."""
1026
    # Also on http://mews.tumblr.com
1027
    # Also on http://www.gocomics.com/berkeley-mews
1028
    name = 'berkeley'
1029
    long_name = 'Berkeley Mews'
1030
    url = 'http://www.berkeleymews.com'
1031
    get_url_from_archive_element = get_href
1032
    comic_num_re = re.compile('%s/\\?p=([0-9]*)$' % url)
1033
1034
    @classmethod
1035
    def get_archive_elements(cls):
1036
        archive_url = urljoin_wrapper(cls.url, "?page_id=2")
1037
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_num_re))
1038
1039
    @classmethod
1040
    def get_comic_info(cls, soup, link):
1041
        """Get information about a particular comics."""
1042
        comic_date_re = re.compile('.*/([0-9]*)-([0-9]*)-([0-9]*)-.*')
1043
        url = cls.get_url_from_archive_element(link)
1044
        num = int(cls.comic_num_re.match(url).groups()[0])
1045
        img = soup.find('div', id='comic').find('img')
1046
        assert all(i['alt'] == i['title'] for i in [img])
1047
        title2 = img['title']
1048
        img_url = img['src']
1049
        year, month, day = [int(s) for s in comic_date_re.match(img_url).groups()]
1050
        return {
1051
            'num': num,
1052
            'title': link.string,
1053
            'title2': title2,
1054
            'img': [img_url],
1055
            'year': year,
1056
            'month': month,
1057
            'day': day,
1058
        }
1059
1060
1061
class GenericBouletCorp(GenericNavigableComic):
1062
    """Generic class to retrieve BouletCorp comics in different languages."""
1063
    # Also on http://bouletcorp.tumblr.com
1064
    get_navi_link = get_link_rel_next
1065
1066
    @classmethod
1067
    def get_first_comic_link(cls):
1068
        """Get link to first comics."""
1069
        return get_soup_at_url(cls.url).find('div', id='centered_nav').find_all('a')[0]
1070
1071
    @classmethod
1072
    def get_comic_info(cls, soup, link):
1073
        """Get information about a particular comics."""
1074
        url = cls.get_url_from_link(link)
1075
        date_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/' % cls.url)
1076
        year, month, day = [int(s) for s in date_re.match(url).groups()]
1077
        imgs = soup.find('div', id='notes').find('div', class_='storycontent').find_all('img')
1078
        texts = '  '.join(t for t in (i.get('title') for i in imgs) if t)
1079
        title = soup.find('title').string
1080
        return {
1081
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs if i.get('src') is not None],
1082
            'title': title,
1083
            'texts': texts,
1084
            'year': year,
1085
            'month': month,
1086
            'day': day,
1087
        }
1088
1089
1090
class BouletCorp(GenericBouletCorp):
1091
    """Class to retrieve BouletCorp comics."""
1092
    name = 'boulet'
1093
    long_name = 'Boulet Corp'
1094
    url = 'http://www.bouletcorp.com'
1095
1096
1097
class BouletCorpEn(GenericBouletCorp):
1098
    """Class to retrieve EnglishBouletCorp comics."""
1099
    name = 'boulet_en'
1100
    long_name = 'Boulet Corp English'
1101
    url = 'http://english.bouletcorp.com'
1102
1103
1104
class AmazingSuperPowers(GenericNavigableComic):
1105
    """Class to retrieve Amazing Super Powers comics."""
1106
    name = 'asp'
1107
    long_name = 'Amazing Super Powers'
1108
    url = 'http://www.amazingsuperpowers.com'
1109
    get_first_comic_link = get_a_navi_navifirst
1110
    get_navi_link = get_a_navi_navinext
1111
1112
    @classmethod
1113
    def get_comic_info(cls, soup, link):
1114 View Code Duplication
        """Get information about a particular comics."""
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1115
        author = soup.find("span", class_="post-author").find("a").string
1116
        date_str = soup.find('span', class_='post-date').string
1117
        day = string_to_date(date_str, "%B %d, %Y")
1118
        imgs = soup.find('div', id='comic').find_all('img')
1119
        title = ' '.join(i['title'] for i in imgs)
1120
        assert all(i['alt'] == i['title'] for i in imgs)
1121
        return {
1122
            'title': title,
1123
            'author': author,
1124
            'img': [img['src'] for img in imgs],
1125
            'day': day.day,
1126
            'month': day.month,
1127
            'year': day.year
1128
        }
1129
1130
1131
class ToonHole(GenericListableComic):
1132
    """Class to retrieve Toon Holes comics."""
1133
    # Also on http://tapastic.com/series/TOONHOLE
1134
    name = 'toonhole'
1135
    long_name = 'Toon Hole'
1136
    url = 'http://www.toonhole.com'
1137
    get_url_from_archive_element = get_href
1138
1139
    @classmethod
1140
    def get_comic_info(cls, soup, link):
1141
        """Get information about a particular comics."""
1142
        title = link.string
1143
        date_str = remove_st_nd_rd_th_from_date(soup.find('div', class_='comicdate').string.strip())
1144
        day = string_to_date(date_str, "%B %d, %Y")
1145
        imgs = soup.find('div', id='comic').find_all('img')
1146
        assert all(i['alt'] == i['title'] == title for i in imgs)
1147
        return {
1148
            'title': title,
1149
            'month': day.month,
1150
            'year': day.year,
1151
            'day': day.day,
1152
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs],
1153
        }
1154
1155
    @classmethod
1156
    def get_archive_elements(cls):
1157
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1158
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1159
1160
1161
class Channelate(GenericNavigableComic):
1162
    """Class to retrieve Channelate comics."""
1163
    name = 'channelate'
1164
    long_name = 'Channelate'
1165
    url = 'http://www.channelate.com'
1166
    get_first_comic_link = get_div_navfirst_a
1167
    get_navi_link = get_link_rel_next
1168
1169
    @classmethod
1170
    def get_comic_info(cls, soup, link):
1171
        """Get information about a particular comics."""
1172
        author = soup.find("span", class_="post-author").find("a").string
1173
        date_str = soup.find('span', class_='post-date').string
1174
        day = string_to_date(date_str, '%Y/%m/%d')
1175
        title = soup.find('meta', property='og:title')['content']
1176
        post = soup.find('div', id='comic')
1177
        imgs = post.find_all('img') if post else []
1178
        extra_url = None
1179
        extra_div = soup.find('div', id='extrapanelbutton')
1180
        if extra_div:
1181
            extra_url = extra_div.find('a')['href']
1182
            extra_soup = get_soup_at_url(extra_url)
1183
            extra_imgs = extra_soup.find_all('img', class_='extrapanelimage')
1184
            imgs.extend(extra_imgs)
1185
        return {
1186
            'url_extra': extra_url,
1187
            'title': title,
1188
            'author': author,
1189
            'month': day.month,
1190
            'year': day.year,
1191
            'day': day.day,
1192
            'img': [i['src'] for i in imgs],
1193
        }
1194
1195
1196
class CyanideAndHappiness(GenericNavigableComic):
1197
    """Class to retrieve Cyanide And Happiness comics."""
1198
    name = 'cyanide'
1199
    long_name = 'Cyanide and Happiness'
1200
    url = 'http://explosm.net'
1201
    get_url_from_link = join_cls_url_to_href
1202
1203
    @classmethod
1204
    def get_first_comic_link(cls):
1205
        """Get link to first comics."""
1206
        return get_soup_at_url(cls.url).find('a', title='Oldest comic')
1207
1208
    @classmethod
1209
    def get_navi_link(cls, last_soup, next_):
1210
        """Get link to next or previous comic."""
1211
        link = last_soup.find('a', class_='next-comic' if next_ else 'previous-comic ')
1212
        return None if link.get('href') is None else link
1213
1214
    @classmethod
1215
    def get_comic_info(cls, soup, link):
1216
        """Get information about a particular comics."""
1217
        url2 = soup.find('meta', property='og:url')['content']
1218
        num = int(url2.split('/')[-2])
1219
        date_str = soup.find('h3').find('a').string
1220
        day = string_to_date(date_str, '%Y.%m.%d')
1221
        author = soup.find('small', class_="author-credit-name").string
1222
        assert author.startswith('by ')
1223
        author = author[3:]
1224
        imgs = soup.find_all('img', id='main-comic')
1225
        return {
1226
            'num': num,
1227
            'author': author,
1228
            'month': day.month,
1229
            'year': day.year,
1230
            'day': day.day,
1231
            'prefix': '%d-' % num,
1232
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i['src'])) for i in imgs]
1233
        }
1234
1235
1236
class MrLovenstein(GenericComic):
1237
    """Class to retrieve Mr Lovenstein comics."""
1238
    # Also on https://tapastic.com/series/MrLovenstein
1239
    name = 'mrlovenstein'
1240
    long_name = 'Mr. Lovenstein'
1241
    url = 'http://www.mrlovenstein.com'
1242
1243
    @classmethod
1244
    def get_next_comic(cls, last_comic):
1245
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1246
        # TODO: more info from http://www.mrlovenstein.com/archive
1247
        comic_num_re = re.compile('^/comic/([0-9]*)$')
1248
        nums = [int(comic_num_re.match(link['href']).groups()[0])
1249
                for link in get_soup_at_url(cls.url).find_all('a', href=comic_num_re)]
1250
        first, last = min(nums), max(nums)
1251
        if last_comic:
1252
            first = last_comic['num'] + 1
1253
        for num in range(first, last + 1):
1254
            url = urljoin_wrapper(cls.url, '/comic/%d' % num)
1255
            soup = get_soup_at_url(url)
1256
            imgs = list(
1257
                reversed(soup.find_all('img', src=re.compile('^/images/comics/'))))
1258
            description = soup.find('meta', attrs={'name': 'description'})['content']
1259
            yield {
1260
                'url': url,
1261
                'num': num,
1262
                'texts': '  '.join(t for t in (i.get('title') for i in imgs) if t),
1263
                'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1264
                'description': description,
1265
            }
1266
1267
1268
class DinosaurComics(GenericListableComic):
1269
    """Class to retrieve Dinosaur Comics comics."""
1270
    name = 'dinosaur'
1271
    long_name = 'Dinosaur Comics'
1272
    url = 'http://www.qwantz.com'
1273
    get_url_from_archive_element = get_href
1274
    comic_link_re = re.compile('^%s/index.php\\?comic=([0-9]*)$' % url)
1275
1276
    @classmethod
1277
    def get_archive_elements(cls):
1278
        archive_url = urljoin_wrapper(cls.url, 'archive.php')
1279
        # first link is random -> skip it
1280
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re)[1:])
1281
1282
    @classmethod
1283
    def get_comic_info(cls, soup, link):
1284
        """Get information about a particular comics."""
1285
        url = cls.get_url_from_archive_element(link)
1286
        num = int(cls.comic_link_re.match(url).groups()[0])
1287
        date_str = link.string
1288
        text = link.next_sibling.string
1289
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1290
        comic_img_re = re.compile('^%s/comics/' % cls.url)
1291
        img = soup.find('img', src=comic_img_re)
1292
        return {
1293
            'month': day.month,
1294
            'year': day.year,
1295
            'day': day.day,
1296
            'img': [img.get('src')],
1297
            'title': img.get('title'),
1298
            'text': text,
1299
            'num': num,
1300
        }
1301
1302
1303
class ButterSafe(GenericListableComic):
1304
    """Class to retrieve Butter Safe comics."""
1305
    name = 'butter'
1306
    long_name = 'ButterSafe'
1307
    url = 'http://buttersafe.com'
1308
    get_url_from_archive_element = get_href
1309
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)
1310
1311
    @classmethod
1312
    def get_archive_elements(cls):
1313
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1314
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
1315
1316
    @classmethod
1317
    def get_comic_info(cls, soup, link):
1318
        """Get information about a particular comics."""
1319
        url = cls.get_url_from_archive_element(link)
1320
        title = link.string
1321
        year, month, day = [int(s) for s in cls.comic_link_re.match(url).groups()]
1322
        img = soup.find('div', id='comic').find('img')
1323
        assert img['alt'] == title
1324
        return {
1325
            'title': title,
1326
            'day': day,
1327 View Code Duplication
            'month': month,
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1328
            'year': year,
1329
            'img': [img['src']],
1330
        }
1331
1332
1333
class CalvinAndHobbes(GenericComic):
1334
    """Class to retrieve Calvin and Hobbes comics."""
1335
    # Also on http://www.gocomics.com/calvinandhobbes/
1336
    name = 'calvin'
1337
    long_name = 'Calvin and Hobbes'
1338
    # This is not through any official webpage but eh...
1339
    url = 'http://marcel-oehler.marcellosendos.ch/comics/ch/'
1340
1341
    @classmethod
1342
    def get_next_comic(cls, last_comic):
1343
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1344
        last_date = get_date_for_comic(
1345
            last_comic) if last_comic else date(1985, 11, 1)
1346
        link_re = re.compile('^([0-9]*)/([0-9]*)/')
1347
        img_re = re.compile('')
1348
        for link in get_soup_at_url(cls.url).find_all('a', href=link_re):
1349
            url = link['href']
1350
            year, month = link_re.match(url).groups()
1351
            if date(int(year), int(month), 1) + timedelta(days=31) >= last_date:
1352
                img_re = re.compile('^%s%s([0-9]*)' % (year, month))
1353
                month_url = urljoin_wrapper(cls.url, url)
1354
                for img in get_soup_at_url(month_url).find_all('img', src=img_re):
1355
                    img_src = img['src']
1356
                    day = int(img_re.match(img_src).groups()[0])
1357
                    comic_date = date(int(year), int(month), day)
1358
                    if comic_date > last_date:
1359
                        yield {
1360
                            'url': month_url,
1361
                            'year': int(year),
1362
                            'month': int(month),
1363
                            'day': int(day),
1364
                            'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)],
1365
                        }
1366
                        last_date = comic_date
1367
1368
1369
class AbstruseGoose(GenericListableComic):
1370
    """Class to retrieve AbstruseGoose Comics."""
1371
    name = 'abstruse'
1372
    long_name = 'Abstruse Goose'
1373
    url = 'http://abstrusegoose.com'
1374
    get_url_from_archive_element = get_href
1375
    comic_url_re = re.compile('^%s/([0-9]*)$' % url)
1376
    comic_img_re = re.compile('^%s/strips/.*' % url)
1377
1378
    @classmethod
1379
    def get_archive_elements(cls):
1380
        archive_url = urljoin_wrapper(cls.url, 'archive')
1381
        return get_soup_at_url(archive_url).find_all('a', href=cls.comic_url_re)
1382
1383
    @classmethod
1384
    def get_comic_info(cls, soup, archive_elt):
1385
        comic_url = cls.get_url_from_archive_element(archive_elt)
1386
        num = int(cls.comic_url_re.match(comic_url).groups()[0])
1387
        return {
1388
            'num': num,
1389
            'title': archive_elt.string,
1390
            'img': [soup.find('img', src=cls.comic_img_re)['src']]
1391
        }
1392
1393 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1394
class PhDComics(GenericNavigableComic):
1395
    """Class to retrieve PHD Comics."""
1396
    name = 'phd'
1397
    long_name = 'PhD Comics'
1398
    url = 'http://phdcomics.com/comics/archive.php'
1399
    get_url_from_link = join_cls_url_to_href
1400
1401
    @classmethod
1402
    def get_first_comic_link(cls):
1403
        """Get link to first comics."""
1404
        return get_soup_at_url(cls.url).find('img', src='images/first_button.gif').parent
1405
1406
    @classmethod
1407
    def get_navi_link(cls, last_soup, next_):
1408
        """Get link to next or previous comic."""
1409
        img = last_soup.find('img', src='images/next_button.gif' if next_ else 'images/prev_button.gif')
1410
        return None if img is None else img.parent
1411
1412
    @classmethod
1413
    def get_comic_info(cls, soup, link):
1414
        """Get information about a particular comics."""
1415
        date_str = soup.find('font', face='Arial,Helvetica,Geneva,Swiss,SunSans-Regular', color='white').string.strip()
1416
        try:
1417
            day = string_to_date(date_str, '%m/%d/%Y')
1418
        except ValueError:
1419
            print("Invalid date %s" % date_str)
1420
            day = date.today()
1421
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
1422
        return {
1423
            'year': day.year,
1424
            'month': day.month,
1425
            'day': day.day,
1426
            'img': [soup.find('img', id='comic')['src']],
1427
            'title': title,
1428
        }
1429
1430
1431 View Code Duplication
class Octopuns(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1432
    """Class to retrieve Octopuns comics."""
1433
    # Also on http://octopuns.tumblr.com
1434
    name = 'octopuns'
1435
    long_name = 'Octopuns'
1436
    url = 'http://www.octopuns.net'
1437
1438
    @classmethod
1439
    def get_first_comic_link(cls):
1440
        """Get link to first comics."""
1441
        return get_soup_at_url(cls.url).find('img', src=re.compile('.*/First.png')).parent
1442
1443
    @classmethod
1444
    def get_navi_link(cls, last_soup, next_):
1445
        """Get link to next or previous comic."""
1446
        link = last_soup.find('img', src=re.compile('.*/Next.png' if next_ else '.*/Back.png')).parent
1447
        return None if link.get('href') is None else link
1448
1449
    @classmethod
1450
    def get_comic_info(cls, soup, link):
1451
        """Get information about a particular comics."""
1452
        title = soup.find('h3', class_='post-title entry-title').string
1453
        date_str = soup.find('h2', class_='date-header').string
1454
        day = string_to_date(date_str, "%A, %B %d, %Y")
1455
        imgs = soup.find_all('link', rel='image_src')
1456
        return {
1457
            'img': [i['href'] for i in imgs],
1458
            'title': title,
1459
            'day': day.day,
1460
            'month': day.month,
1461
            'year': day.year,
1462
        }
1463
1464
1465
class Quarktees(GenericNavigableComic):
1466
    """Class to retrieve the Quarktees comics."""
1467
    name = 'quarktees'
1468
    long_name = 'Quarktees'
1469
    url = 'http://www.quarktees.com/blogs/news'
1470
    get_url_from_link = join_cls_url_to_href
1471
    get_first_comic_link = simulate_first_link
1472
    first_url = 'http://www.quarktees.com/blogs/news/12486621-coming-soon'
1473
1474
    @classmethod
1475
    def get_navi_link(cls, last_soup, next_):
1476
        """Get link to next or previous comic."""
1477
        return last_soup.find('a', id='article-next' if next_ else 'article-prev')
1478
1479
    @classmethod
1480
    def get_comic_info(cls, soup, link):
1481
        """Get information about a particular comics."""
1482
        title = soup.find('meta', property='og:title')['content']
1483
        article = soup.find('div', class_='single-article')
1484
        imgs = article.find_all('img')
1485
        return {
1486
            'title': title,
1487
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1488
        }
1489
1490
1491
class OverCompensating(GenericNavigableComic):
1492
    """Class to retrieve the Over Compensating comics."""
1493
    name = 'compensating'
1494
    long_name = 'Over Compensating'
1495
    url = 'http://www.overcompensating.com'
1496
    get_url_from_link = join_cls_url_to_href
1497
1498
    @classmethod
1499
    def get_first_comic_link(cls):
1500
        """Get link to first comics."""
1501
        return get_soup_at_url(cls.url).find('a', href=re.compile('comic=1$'))
1502
1503
    @classmethod
1504
    def get_navi_link(cls, last_soup, next_):
1505
        """Get link to next or previous comic."""
1506
        return last_soup.find('a', title='next comic' if next_ else 'go back already')
1507
1508
    @classmethod
1509
    def get_comic_info(cls, soup, link):
1510
        """Get information about a particular comics."""
1511
        img_src_re = re.compile('^/oc/comics/.*')
1512
        comic_num_re = re.compile('.*comic=([0-9]*)$')
1513
        comic_url = cls.get_url_from_link(link)
1514
        num = int(comic_num_re.match(comic_url).groups()[0])
1515
        img = soup.find('img', src=img_src_re)
1516
        return {
1517
            'num': num,
1518
            'img': [urljoin_wrapper(comic_url, img['src'])],
1519
            'title': img.get('title')
1520
        }
1521
1522
1523
class Oglaf(GenericNavigableComic):
1524
    """Class to retrieve Oglaf comics."""
1525
    name = 'oglaf'
1526
    long_name = 'Oglaf [NSFW]'
1527
    url = 'http://oglaf.com'
1528
    get_url_from_link = join_cls_url_to_href
1529
1530
    @classmethod
1531
    def get_first_comic_link(cls):
1532
        """Get link to first comics."""
1533
        return get_soup_at_url(cls.url).find("div", id="st").parent
1534
1535
    @classmethod
1536
    def get_navi_link(cls, last_soup, next_):
1537
        """Get link to next or previous comic."""
1538
        div = last_soup.find("div", id="nx" if next_ else "pvs")
1539
        return div.parent if div else None
1540
1541
    @classmethod
1542
    def get_comic_info(cls, soup, link):
1543
        """Get information about a particular comics."""
1544
        title = soup.find('title').string
1545
        title_imgs = soup.find('div', id='tt').find_all('img')
1546
        assert len(title_imgs) == 1
1547
        strip_imgs = soup.find_all('img', id='strip')
1548
        assert len(strip_imgs) == 1
1549
        imgs = title_imgs + strip_imgs
1550
        desc = ' '.join(i['title'] for i in imgs)
1551
        return {
1552
            'title': title,
1553
            'img': [i['src'] for i in imgs],
1554
            'description': desc,
1555
        }
1556
1557
1558
class ScandinaviaAndTheWorld(GenericNavigableComic):
1559
    """Class to retrieve Scandinavia And The World comics."""
1560
    name = 'satw'
1561
    long_name = 'Scandinavia And The World'
1562
    url = 'http://satwcomic.com'
1563
    get_first_comic_link = simulate_first_link
1564
    first_url = 'http://satwcomic.com/sweden-denmark-and-norway'
1565
1566
    @classmethod
1567
    def get_navi_link(cls, last_soup, next_):
1568
        """Get link to next or previous comic."""
1569
        return last_soup.find('a', accesskey='n' if next_ else 'p')
1570
1571
    @classmethod
1572
    def get_comic_info(cls, soup, link):
1573
        """Get information about a particular comics."""
1574
        title = soup.find('meta', attrs={'name': 'twitter:label1'})['content']
1575
        desc = soup.find('meta', property='og:description')['content']
1576
        imgs = soup.find_all('img', itemprop="image")
1577
        return {
1578
            'title': title,
1579
            'description': desc,
1580
            'img': [i['src'] for i in imgs],
1581
        }
1582
1583
1584
class SomethingOfThatIlk(GenericEmptyComic):  # Does not exist anymore
1585
    """Class to retrieve the Something Of That Ilk comics."""
1586
    name = 'somethingofthatilk'
1587
    long_name = 'Something Of That Ilk'
1588
    url = 'http://www.somethingofthatilk.com'
1589
1590
1591
class InfiniteMonkeyBusiness(GenericNavigableComic):
1592
    """Generic class to retrieve InfiniteMonkeyBusiness comics."""
1593
    name = 'monkey'
1594
    long_name = 'Infinite Monkey Business'
1595
    url = 'http://infinitemonkeybusiness.net'
1596
    get_navi_link = get_a_navi_comicnavnext_navinext
1597
    get_first_comic_link = simulate_first_link
1598
    first_url = 'http://infinitemonkeybusiness.net/comic/pillory/'
1599
1600
    @classmethod
1601
    def get_comic_info(cls, soup, link):
1602
        """Get information about a particular comics."""
1603
        title = soup.find('meta', property='og:title')['content']
1604
        imgs = soup.find('div', id='comic').find_all('img')
1605
        return {
1606
            'title': title,
1607
            'img': [i['src'] for i in imgs],
1608
        }
1609
1610
1611
class Wondermark(GenericListableComic):
1612
    """Class to retrieve the Wondermark comics."""
1613
    name = 'wondermark'
1614
    long_name = 'Wondermark'
1615
    url = 'http://wondermark.com'
1616
    get_url_from_archive_element = get_href
1617
1618
    @classmethod
1619
    def get_archive_elements(cls):
1620
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1621
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1622
1623
    @classmethod
1624
    def get_comic_info(cls, soup, link):
1625
        """Get information about a particular comics."""
1626
        date_str = soup.find('div', class_='postdate').find('em').string
1627
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1628
        div = soup.find('div', id='comic')
1629
        if div:
1630
            img = div.find('img')
1631
            img_src = [img['src']]
1632
            alt = img['alt']
1633
            assert alt == img['title']
1634
            title = soup.find('meta', property='og:title')['content']
1635
        else:
1636
            img_src = []
1637
            alt = ''
1638
            title = ''
1639
        return {
1640
            'month': day.month,
1641
            'year': day.year,
1642
            'day': day.day,
1643
            'img': img_src,
1644
            'title': title,
1645
            'alt': alt,
1646
            'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')),
1647
        }
1648
1649
1650
class WarehouseComic(GenericNavigableComic):
1651
    """Class to retrieve Warehouse Comic comics."""
1652
    name = 'warehouse'
1653
    long_name = 'Warehouse Comic'
1654
    url = 'http://warehousecomic.com'
1655
    get_first_comic_link = get_a_navi_navifirst
1656
    get_navi_link = get_link_rel_next
1657
1658
    @classmethod
1659
    def get_comic_info(cls, soup, link):
1660
        """Get information about a particular comics."""
1661
        title = soup.find('h2', class_='post-title').string
1662
        date_str = soup.find('span', class_='post-date').string
1663
        day = string_to_date(date_str, "%B %d, %Y")
1664
        imgs = soup.find('div', id='comic').find_all('img')
1665
        return {
1666
            'img': [i['src'] for i in imgs],
1667
            'title': title,
1668
            'day': day.day,
1669
            'month': day.month,
1670
            'year': day.year,
1671
        }
1672
1673
1674
class JustSayEh(GenericNavigableComic):
1675
    """Class to retrieve Just Say Eh comics."""
1676
    # Also on http//tapastic.com/series/Just-Say-Eh
1677
    name = 'justsayeh'
1678
    long_name = 'Just Say Eh'
1679
    url = 'http://www.justsayeh.com'
1680
    get_first_comic_link = get_a_navi_navifirst
1681
    get_navi_link = get_a_navi_comicnavnext_navinext
1682
1683
    @classmethod
1684
    def get_comic_info(cls, soup, link):
1685
        """Get information about a particular comics."""
1686
        title = soup.find('h2', class_='post-title').string
1687
        imgs = soup.find("div", id="comic").find_all("img")
1688
        assert all(i['alt'] == i['title'] for i in imgs)
1689
        alt = imgs[0]['alt']
1690
        return {
1691
            'img': [i['src'] for i in imgs],
1692
            'title': title,
1693
            'alt': alt,
1694
        }
1695
1696
1697
class MouseBearComedy(GenericNavigableComic):
1698
    """Class to retrieve Mouse Bear Comedy comics."""
1699 View Code Duplication
    # Also on http://mousebearcomedy.tumblr.com
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1700
    name = 'mousebear'
1701
    long_name = 'Mouse Bear Comedy'
1702
    url = 'http://www.mousebearcomedy.com'
1703
    get_first_comic_link = get_a_navi_navifirst
1704
    get_navi_link = get_a_navi_comicnavnext_navinext
1705
1706
    @classmethod
1707
    def get_comic_info(cls, soup, link):
1708
        """Get information about a particular comics."""
1709
        title = soup.find('h2', class_='post-title').string
1710
        author = soup.find("span", class_="post-author").find("a").string
1711
        date_str = soup.find("span", class_="post-date").string
1712
        day = string_to_date(date_str, '%B %d, %Y')
1713
        imgs = soup.find("div", id="comic").find_all("img")
1714
        assert all(i['alt'] == i['title'] == title for i in imgs)
1715
        return {
1716
            'day': day.day,
1717
            'month': day.month,
1718
            'year': day.year,
1719
            'img': [i['src'] for i in imgs],
1720
            'title': title,
1721
            'author': author,
1722
        }
1723
1724
1725
class BigFootJustice(GenericNavigableComic):
1726
    """Class to retrieve Big Foot Justice comics."""
1727
    # Also on http://tapastic.com/series/bigfoot-justice
1728
    name = 'bigfoot'
1729
    long_name = 'Big Foot Justice'
1730
    url = 'http://bigfootjustice.com'
1731
    get_first_comic_link = get_a_navi_navifirst
1732
    get_navi_link = get_a_navi_comicnavnext_navinext
1733
1734
    @classmethod
1735
    def get_comic_info(cls, soup, link):
1736
        """Get information about a particular comics."""
1737
        imgs = soup.find('div', id='comic').find_all('img')
1738
        assert all(i['title'] == i['alt'] for i in imgs)
1739
        title = ' '.join(i['title'] for i in imgs)
1740
        return {
1741
            'img': [i['src'] for i in imgs],
1742
            'title': title,
1743
        }
1744
1745
1746 View Code Duplication
class RespawnComic(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1747
    """Class to retrieve Respawn Comic."""
1748
    # Also on http://respawncomic.tumblr.com
1749
    name = 'respawn'
1750
    long_name = 'Respawn Comic'
1751
    url = 'http://respawncomic.com '
1752
    get_navi_link = get_a_navi_comicnavnext_navinext
1753
    get_first_comic_link = simulate_first_link
1754
    first_url = 'http://respawncomic.com/comic/c0001/'
1755
1756
    @classmethod
1757
    def get_comic_info(cls, soup, link):
1758
        """Get information about a particular comics."""
1759
        title = soup.find('meta', property='og:title')['content']
1760
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1761
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1762
        date_str = date_str[:10]
1763
        day = string_to_date(date_str, "%Y-%m-%d")
1764
        imgs = soup.find_all('meta', property='og:image')
1765
        skip_imgs = {
1766
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1767
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1768
        }
1769
        return {
1770
            'title': title,
1771
            'author': author,
1772
            'day': day.day,
1773
            'month': day.month,
1774
            'year': day.year,
1775
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1776
        }
1777
1778
1779
class SafelyEndangered(GenericNavigableComic):
1780
    """Class to retrieve Safely Endangered comics."""
1781 View Code Duplication
    # Also on http://tumblr.safelyendangered.com
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1782
    name = 'endangered'
1783
    long_name = 'Safely Endangered'
1784
    url = 'http://www.safelyendangered.com'
1785
    get_navi_link = get_link_rel_next
1786
    get_first_comic_link = simulate_first_link
1787
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1788
1789
    @classmethod
1790
    def get_comic_info(cls, soup, link):
1791
        """Get information about a particular comics."""
1792
        title = soup.find('h2', class_='post-title').string
1793
        date_str = soup.find('span', class_='post-date').string
1794
        day = string_to_date(date_str, '%B %d, %Y')
1795
        imgs = soup.find('div', id='comic').find_all('img')
1796
        alt = imgs[0]['alt']
1797
        assert all(i['alt'] == i['title'] for i in imgs)
1798
        return {
1799
            'day': day.day,
1800
            'month': day.month,
1801
            'year': day.year,
1802
            'img': [i['src'] for i in imgs],
1803
            'title': title,
1804
            'alt': alt,
1805
        }
1806
1807
1808
class PicturesInBoxes(GenericNavigableComic):
1809
    """Class to retrieve Pictures In Boxes comics."""
1810 View Code Duplication
    # Also on http://picturesinboxescomic.tumblr.com
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1811
    name = 'picturesinboxes'
1812
    long_name = 'Pictures in Boxes'
1813
    url = 'http://www.picturesinboxes.com'
1814
    get_navi_link = get_a_navi_navinext
1815
    get_first_comic_link = simulate_first_link
1816
    first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'
1817
1818
    @classmethod
1819
    def get_comic_info(cls, soup, link):
1820
        """Get information about a particular comics."""
1821
        title = soup.find('h2', class_='post-title').string
1822
        author = soup.find("span", class_="post-author").find("a").string
1823
        date_str = soup.find('span', class_='post-date').string
1824
        day = string_to_date(date_str, '%B %d, %Y')
1825
        imgs = soup.find('div', class_='comicpane').find_all('img')
1826
        assert imgs
1827
        assert all(i['title'] == i['alt'] == title for i in imgs)
1828
        return {
1829
            'day': day.day,
1830
            'month': day.month,
1831
            'year': day.year,
1832
            'img': [i['src'] for i in imgs],
1833
            'title': title,
1834
            'author': author,
1835
        }
1836
1837
1838
class Penmen(GenericEmptyComic):
1839
    """Class to retrieve Penmen comics."""
1840
    name = 'penmen'
1841
    long_name = 'Penmen'
1842
    url = 'http://penmen.com'
1843
1844
1845
class TheDoghouseDiaries(GenericNavigableComic):
1846
    """Class to retrieve The Dog House Diaries comics."""
1847
    name = 'doghouse'
1848
    long_name = 'The Dog House Diaries'
1849
    url = 'http://thedoghousediaries.com'
1850
1851
    @classmethod
1852
    def get_first_comic_link(cls):
1853
        """Get link to first comics."""
1854
        return get_soup_at_url(cls.url).find('a', id='firstlink')
1855
1856
    @classmethod
1857
    def get_navi_link(cls, last_soup, next_):
1858
        """Get link to next or previous comic."""
1859
        return last_soup.find('a', id='nextlink' if next_ else 'previouslink')
1860
1861
    @classmethod
1862
    def get_comic_info(cls, soup, link):
1863
        """Get information about a particular comics."""
1864
        comic_img_re = re.compile('^dhdcomics/.*')
1865
        img = soup.find('img', src=comic_img_re)
1866
        comic_url = cls.get_url_from_link(link)
1867
        return {
1868
            'title': soup.find('h2', id='titleheader').string,
1869
            'title2': soup.find('div', id='subtext').string,
1870
            'alt': img.get('title'),
1871
            'img': [urljoin_wrapper(comic_url, img['src'].strip())],
1872
            'num': int(comic_url.split('/')[-1]),
1873
        }
1874
1875
1876
class InvisibleBread(GenericListableComic):
1877
    """Class to retrieve Invisible Bread comics."""
1878
    # Also on http://www.gocomics.com/invisible-bread
1879
    name = 'invisiblebread'
1880
    long_name = 'Invisible Bread'
1881
    url = 'http://invisiblebread.com'
1882
1883
    @classmethod
1884
    def get_archive_elements(cls):
1885
        archive_url = urljoin_wrapper(cls.url, 'archives/')
1886
        return reversed(get_soup_at_url(archive_url).find_all('td', class_='archive-title'))
1887
1888
    @classmethod
1889
    def get_url_from_archive_element(cls, td):
1890
        return td.find('a')['href']
1891
1892
    @classmethod
1893
    def get_comic_info(cls, soup, td):
1894
        """Get information about a particular comics."""
1895
        url = cls.get_url_from_archive_element(td)
1896
        title = td.find('a').string
1897
        month_and_day = td.previous_sibling.string
1898
        link_re = re.compile('^%s/([0-9]+)/' % cls.url)
1899
        year = link_re.match(url).groups()[0]
1900
        date_str = month_and_day + ' ' + year
1901
        day = string_to_date(date_str, '%b %d %Y')
1902
        imgs = [soup.find('div', id='comic').find('img')]
1903
        assert len(imgs) == 1
1904
        assert all(i['title'] == i['alt'] == title for i in imgs)
1905
        return {
1906
            'month': day.month,
1907
            'year': day.year,
1908
            'day': day.day,
1909
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1910
            'title': title,
1911
        }
1912
1913
1914
class DiscoBleach(GenericEmptyComic):  # Does not work anymore
1915
    """Class to retrieve Disco Bleach Comics."""
1916
    name = 'discobleach'
1917
    long_name = 'Disco Bleach'
1918
    url = 'http://discobleach.com'
1919
1920
1921
class TubeyToons(GenericEmptyComic):  # Does not work anymore
1922
    """Class to retrieve TubeyToons comics."""
1923
    # Also on http://tapastic.com/series/Tubey-Toons
1924
    # Also on http://tubeytoons.tumblr.com
1925
    name = 'tubeytoons'
1926
    long_name = 'Tubey Toons'
1927
    url = 'http://tubeytoons.com'
1928
1929
1930
class CompletelySeriousComics(GenericNavigableComic):
1931 View Code Duplication
    """Class to retrieve Completely Serious comics."""
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1932
    name = 'completelyserious'
1933
    long_name = 'Completely Serious Comics'
1934
    url = 'http://completelyseriouscomics.com'
1935
    get_first_comic_link = get_a_navi_navifirst
1936
    get_navi_link = get_a_navi_navinext
1937
1938
    @classmethod
1939
    def get_comic_info(cls, soup, link):
1940
        """Get information about a particular comics."""
1941
        title = soup.find('h2', class_='post-title').string
1942
        author = soup.find('span', class_='post-author').contents[1].string
1943
        date_str = soup.find('span', class_='post-date').string
1944
        day = string_to_date(date_str, '%B %d, %Y')
1945
        imgs = soup.find('div', class_='comicpane').find_all('img')
1946
        assert imgs
1947
        alt = imgs[0]['title']
1948
        assert all(i['title'] == i['alt'] == alt for i in imgs)
1949
        return {
1950
            'month': day.month,
1951
            'year': day.year,
1952
            'day': day.day,
1953
            'img': [i['src'] for i in imgs],
1954
            'title': title,
1955
            'alt': alt,
1956
            'author': author,
1957
        }
1958
1959
1960
class PoorlyDrawnLines(GenericListableComic):
1961
    """Class to retrieve Poorly Drawn Lines comics."""
1962
    # Also on http://pdlcomics.tumblr.com
1963
    name = 'poorlydrawn'
1964
    long_name = 'Poorly Drawn Lines'
1965
    url = 'http://poorlydrawnlines.com'
1966
    get_url_from_archive_element = get_href
1967
1968
    @classmethod
1969
    def get_comic_info(cls, soup, link):
1970
        """Get information about a particular comics."""
1971
        imgs = soup.find('div', class_='post').find_all('img')
1972
        assert len(imgs) <= 1
1973
        return {
1974
            'img': [i['src'] for i in imgs],
1975
            'title': imgs[0].get('title', "") if imgs else "",
1976
        }
1977
1978
    @classmethod
1979
    def get_archive_elements(cls):
1980
        archive_url = urljoin_wrapper(cls.url, 'archive')
1981
        url_re = re.compile('^%s/comic/.' % cls.url)
1982
        return reversed(get_soup_at_url(archive_url).find_all('a', href=url_re))
1983
1984
1985 View Code Duplication
class LoadingComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1986
    """Class to retrieve Loading Artist comics."""
1987
    name = 'loadingartist'
1988
    long_name = 'Loading Artist'
1989
    url = 'http://www.loadingartist.com/latest'
1990
1991
    @classmethod
1992
    def get_first_comic_link(cls):
1993
        """Get link to first comics."""
1994
        return get_soup_at_url(cls.url).find('a', title="First")
1995
1996
    @classmethod
1997
    def get_navi_link(cls, last_soup, next_):
1998
        """Get link to next or previous comic."""
1999
        return last_soup.find('a', title='Next' if next_ else 'Previous')
2000
2001
    @classmethod
2002
    def get_comic_info(cls, soup, link):
2003
        """Get information about a particular comics."""
2004
        title = soup.find('h1').string
2005
        date_str = soup.find('span', class_='date').string.strip()
2006
        day = string_to_date(date_str, "%B %d, %Y")
2007
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
2008
        return {
2009
            'title': title,
2010
            'img': [i['src'] for i in imgs],
2011
            'month': day.month,
2012
            'year': day.year,
2013
            'day': day.day,
2014
        }
2015
2016
2017 View Code Duplication
class ChuckleADuck(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2018
    """Class to retrieve Chuckle-A-Duck comics."""
2019
    name = 'chuckleaduck'
2020
    long_name = 'Chuckle-A-duck'
2021
    url = 'http://chuckleaduck.com'
2022
    get_first_comic_link = get_div_navfirst_a
2023
    get_navi_link = get_link_rel_next
2024
2025
    @classmethod
2026
    def get_comic_info(cls, soup, link):
2027
        """Get information about a particular comics."""
2028
        date_str = soup.find('span', class_='post-date').string
2029
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
2030
        author = soup.find('span', class_='post-author').string
2031
        div = soup.find('div', id='comic')
2032
        imgs = div.find_all('img') if div else []
2033
        title = imgs[0]['title'] if imgs else ""
2034
        assert all(i['title'] == i['alt'] == title for i in imgs)
2035
        return {
2036
            'month': day.month,
2037
            'year': day.year,
2038
            'day': day.day,
2039
            'img': [i['src'] for i in imgs],
2040
            'title': title,
2041
            'author': author,
2042
        }
2043
2044
2045
class DepressedAlien(GenericNavigableComic):
2046
    """Class to retrieve Depressed Alien Comics."""
2047
    name = 'depressedalien'
2048
    long_name = 'Depressed Alien'
2049
    url = 'http://depressedalien.com'
2050
    get_url_from_link = join_cls_url_to_href
2051
2052
    @classmethod
2053
    def get_first_comic_link(cls):
2054
        """Get link to first comics."""
2055
        return get_soup_at_url(cls.url).find('img', attrs={'name': 'beginArrow'}).parent
2056
2057
    @classmethod
2058
    def get_navi_link(cls, last_soup, next_):
2059
        """Get link to next or previous comic."""
2060
        return last_soup.find('img', attrs={'name': 'rightArrow' if next_ else 'leftArrow'}).parent
2061
2062
    @classmethod
2063
    def get_comic_info(cls, soup, link):
2064
        """Get information about a particular comics."""
2065
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2066
        imgs = soup.find_all('meta', property='og:image')
2067
        return {
2068
            'title': title,
2069
            'img': [i['content'] for i in imgs],
2070
        }
2071
2072
2073
class ThingsInSquares(GenericListableComic):
2074
    """Class to retrieve Things In Squares comics."""
2075
    # This can be retrieved in other languages
2076
    # Also on https://tapastic.com/series/Things-in-Squares
2077
    name = 'squares'
2078
    long_name = 'Things in squares'
2079
    url = 'http://www.thingsinsquares.com'
2080
2081
    @classmethod
2082
    def get_comic_info(cls, soup, tr):
2083
        """Get information about a particular comics."""
2084
        _, td2, td3 = tr.find_all('td')
2085
        a = td2.find('a')
2086
        date_str = td3.string
2087
        day = string_to_date(date_str, "%m.%d.%y")
2088
        title = a.string
2089
        title2 = soup.find('meta', property='og:title')['content']
2090
        desc = soup.find('meta', property='og:description')
2091
        description = desc['content'] if desc else ''
2092
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2093
        imgs = soup.find('div', class_='entry-content').find_all('img')
2094
        return {
2095
            'day': day.day,
2096
            'month': day.month,
2097
            'year': day.year,
2098
            'title': title,
2099
            'title2': title2,
2100
            'description': description,
2101
            'tags': tags,
2102
            'img': [i['src'] for i in imgs],
2103
            'alt': ' '.join(i['alt'] for i in imgs),
2104
        }
2105
2106
    @classmethod
2107
    def get_url_from_archive_element(cls, tr):
2108
        _, td2, td3 = tr.find_all('td')
2109
        return td2.find('a')['href']
2110
2111
    @classmethod
2112
    def get_archive_elements(cls):
2113
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
2114
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2115
2116 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2117
class HappleTea(GenericNavigableComic):
2118
    """Class to retrieve Happle Tea Comics."""
2119
    name = 'happletea'
2120
    long_name = 'Happle Tea'
2121
    url = 'http://www.happletea.com'
2122
    get_first_comic_link = get_a_navi_navifirst
2123
    get_navi_link = get_link_rel_next
2124
2125
    @classmethod
2126
    def get_comic_info(cls, soup, link):
2127
        """Get information about a particular comics."""
2128
        imgs = soup.find('div', id='comic').find_all('img')
2129
        post = soup.find('div', class_='post-content')
2130
        title = post.find('h2', class_='post-title').string
2131
        author = post.find('a', rel='author').string
2132
        date_str = post.find('span', class_='post-date').string
2133
        day = string_to_date(date_str, "%B %d, %Y")
2134
        assert all(i['alt'] == i['title'] for i in imgs)
2135
        return {
2136
            'title': title,
2137
            'img': [i['src'] for i in imgs],
2138
            'alt': ''.join(i['alt'] for i in imgs),
2139
            'month': day.month,
2140
            'year': day.year,
2141
            'day': day.day,
2142
            'author': author,
2143
        }
2144
2145
2146
class FatAwesomeComics(GenericNavigableComic):
2147
    """Class to retrieve Fat Awesome Comics."""
2148
    # Also on http://fatawesomecomedy.tumblr.com
2149
    name = 'fatawesome'
2150
    long_name = 'Fat Awesome'
2151
    url = 'http://fatawesome.com/comics'
2152
    get_navi_link = get_a_rel_next
2153
    get_first_comic_link = simulate_first_link
2154
    first_url = 'http://fatawesome.com/shortbus/'
2155
2156
    @classmethod
2157
    def get_comic_info(cls, soup, link):
2158
        """Get information about a particular comics."""
2159
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2160
        description = soup.find('meta', attrs={'name': 'description'})['content']
2161
        tags_prop = soup.find('meta', property='article:tag')
2162
        tags = tags_prop['content'] if tags_prop else ""
2163
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2164
        day = string_to_date(date_str, "%Y-%m-%d")
2165
        imgs = soup.find_all('img', attrs={'data-recalc-dims': "1"})
2166
        assert len(imgs) == 1
2167
        return {
2168
            'title': title,
2169
            'description': description,
2170
            'tags': tags,
2171
            'alt': "".join(i['alt'] for i in imgs),
2172
            'img': [i['src'].rsplit('?', 1)[0] for i in imgs],
2173
            'month': day.month,
2174
            'year': day.year,
2175
            'day': day.day,
2176
        }
2177
2178
2179
class AnythingComic(GenericListableComic):
2180
    """Class to retrieve Anything Comics."""
2181
    # Also on http://tapastic.com/series/anything
2182
    name = 'anythingcomic'
2183
    long_name = 'Anything Comic'
2184
    url = 'http://www.anythingcomic.com'
2185
2186
    @classmethod
2187
    def get_archive_elements(cls):
2188
        archive_url = urljoin_wrapper(cls.url, 'archive/')
2189
        # The first 2 <tr>'s do not correspond to comics
2190
        return get_soup_at_url(archive_url).find('table', id='chapter_table').find_all('tr')[2:]
2191
2192
    @classmethod
2193
    def get_url_from_archive_element(cls, tr):
2194
        """Get url corresponding to an archive element."""
2195
        td_num, td_comic, td_date, _ = tr.find_all('td')
2196
        link = td_comic.find('a')
2197
        return urljoin_wrapper(cls.url, link['href'])
2198
2199
    @classmethod
2200
    def get_comic_info(cls, soup, tr):
2201
        """Get information about a particular comics."""
2202
        td_num, td_comic, td_date, _ = tr.find_all('td')
2203
        num = int(td_num.string)
2204
        link = td_comic.find('a')
2205
        title = link.string
2206
        imgs = soup.find_all('img', id='comic_image')
2207
        day = string_to_date(td_date.string, '%d %b %Y %I:%M %p')
2208
        assert len(imgs) == 1
2209
        assert all(i.get('alt') == i.get('title') for i in imgs)
2210
        return {
2211
            'num': num,
2212
            'title': title,
2213
            'alt': imgs[0].get('alt', ''),
2214
            'img': [i['src'] for i in imgs],
2215
            'month': day.month,
2216
            'year': day.year,
2217
            'day': day.day,
2218
        }
2219
2220
2221
class LonnieMillsap(GenericNavigableComic):
2222
    """Class to retrieve Lonnie Millsap's comics."""
2223
    name = 'millsap'
2224
    long_name = 'Lonnie Millsap'
2225
    url = 'http://www.lonniemillsap.com'
2226
    get_navi_link = get_link_rel_next
2227
    get_first_comic_link = simulate_first_link
2228
    first_url = 'http://www.lonniemillsap.com/?p=42'
2229
2230
    @classmethod
2231
    def get_comic_info(cls, soup, link):
2232
        """Get information about a particular comics."""
2233
        title = soup.find('h2', class_='post-title').string
2234
        post = soup.find('div', class_='post-content')
2235
        author = post.find("span", class_="post-author").find("a").string
2236
        date_str = post.find("span", class_="post-date").string
2237
        day = string_to_date(date_str, "%B %d, %Y")
2238
        imgs = post.find("div", class_="entry").find_all("img")
2239
        return {
2240
            'title': title,
2241
            'author': author,
2242
            'img': [i['src'] for i in imgs],
2243
            'month': day.month,
2244
            'year': day.year,
2245
            'day': day.day,
2246
        }
2247
2248
2249 View Code Duplication
class LinsEditions(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2250
    """Class to retrieve L.I.N.S. Editions comics."""
2251
    # Also on http://linscomics.tumblr.com
2252
    name = 'lins'
2253
    long_name = 'L.I.N.S. Editions'
2254
    url = 'https://linsedition.com'
2255
    get_navi_link = get_link_rel_next
2256
    get_first_comic_link = simulate_first_link
2257
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2258
2259
    @classmethod
2260
    def get_comic_info(cls, soup, link):
2261
        """Get information about a particular comics."""
2262
        title = soup.find('meta', property='og:title')['content']
2263
        imgs = soup.find_all('meta', property='og:image')
2264
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2265
        day = string_to_date(date_str, "%Y-%m-%d")
2266
        return {
2267
            'title': title,
2268
            'img': [i['content'] for i in imgs],
2269
            'month': day.month,
2270
            'year': day.year,
2271
            'day': day.day,
2272
        }
2273
2274
2275
class ThorsThundershack(GenericNavigableComic):
2276
    """Class to retrieve Thor's Thundershack comics."""
2277
    # Also on http://tapastic.com/series/Thors-Thundershac
2278
    name = 'thor'
2279
    long_name = 'Thor\'s Thundershack'
2280
    url = 'http://www.thorsthundershack.com'
2281
    get_url_from_link = join_cls_url_to_href
2282
2283
    @classmethod
2284
    def get_first_comic_link(cls):
2285
        """Get link to first comics."""
2286
        return get_soup_at_url(cls.url).find('a', class_='first navlink')
2287
2288
    @classmethod
2289
    def get_navi_link(cls, last_soup, next_):
2290
        """Get link to next or previous comic."""
2291
        for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
2292
            if link['href'] != '/comic':
2293
                return link
2294
        return None
2295
2296
    @classmethod
2297
    def get_comic_info(cls, soup, link):
2298
        """Get information about a particular comics."""
2299
        title = soup.find('meta', attrs={'name': 'description'})["content"]
2300
        description = soup.find('div', itemprop='articleBody').text
2301
        author = soup.find('span', itemprop='author copyrightHolder').string
2302
        imgs = soup.find_all('img', itemprop='image')
2303
        assert all(i['title'] == i['alt'] for i in imgs)
2304
        alt = imgs[0]['alt'] if imgs else ""
2305
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
2306
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2307
        return {
2308
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2309
            'month': day.month,
2310
            'year': day.year,
2311
            'day': day.day,
2312
            'author': author,
2313
            'title': title,
2314
            'alt': alt,
2315
            'description': description,
2316
        }
2317 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2318
2319
class GerbilWithAJetpack(GenericNavigableComic):
2320
    """Class to retrieve GerbilWithAJetpack comics."""
2321
    name = 'gerbil'
2322
    long_name = 'Gerbil With A Jetpack'
2323
    url = 'http://gerbilwithajetpack.com'
2324
    get_first_comic_link = get_a_navi_navifirst
2325
    get_navi_link = get_a_rel_next
2326
2327
    @classmethod
2328
    def get_comic_info(cls, soup, link):
2329
        """Get information about a particular comics."""
2330
        title = soup.find('h2', class_='post-title').string
2331
        author = soup.find("span", class_="post-author").find("a").string
2332
        date_str = soup.find("span", class_="post-date").string
2333
        day = string_to_date(date_str, "%B %d, %Y")
2334
        imgs = soup.find("div", id="comic").find_all("img")
2335
        alt = imgs[0]['alt']
2336
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2337
        return {
2338
            'img': [i['src'] for i in imgs],
2339
            'title': title,
2340
            'alt': alt,
2341
            'author': author,
2342
            'day': day.day,
2343
            'month': day.month,
2344
            'year': day.year
2345
        }
2346 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2347
2348
class EveryDayBlues(GenericNavigableComic):
2349
    """Class to retrieve EveryDayBlues Comics."""
2350
    name = "blues"
2351
    long_name = "Every Day Blues"
2352
    url = "http://everydayblues.net"
2353
    get_first_comic_link = get_a_navi_navifirst
2354
    get_navi_link = get_link_rel_next
2355
2356
    @classmethod
2357
    def get_comic_info(cls, soup, link):
2358
        """Get information about a particular comics."""
2359
        title = soup.find("h2", class_="post-title").string
2360
        author = soup.find("span", class_="post-author").find("a").string
2361
        date_str = soup.find("span", class_="post-date").string
2362
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2363
        imgs = soup.find("div", id="comic").find_all("img")
2364
        assert all(i['alt'] == i['title'] == title for i in imgs)
2365
        assert len(imgs) <= 1
2366
        return {
2367
            'img': [i['src'] for i in imgs],
2368
            'title': title,
2369
            'author': author,
2370
            'day': day.day,
2371
            'month': day.month,
2372
            'year': day.year
2373
        }
2374 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2375
2376
class BiterComics(GenericNavigableComic):
2377
    """Class to retrieve Biter Comics."""
2378
    name = "biter"
2379
    long_name = "Biter Comics"
2380
    url = "http://www.bitercomics.com"
2381
    get_first_comic_link = get_a_navi_navifirst
2382
    get_navi_link = get_link_rel_next
2383
2384
    @classmethod
2385
    def get_comic_info(cls, soup, link):
2386
        """Get information about a particular comics."""
2387
        title = soup.find("h1", class_="entry-title").string
2388
        author = soup.find("span", class_="author vcard").find("a").string
2389
        date_str = soup.find("span", class_="entry-date").string
2390
        day = string_to_date(date_str, "%B %d, %Y")
2391
        imgs = soup.find("div", id="comic").find_all("img")
2392
        assert all(i['alt'] == i['title'] for i in imgs)
2393
        assert len(imgs) == 1
2394
        alt = imgs[0]['alt']
2395
        return {
2396
            'img': [i['src'] for i in imgs],
2397
            'title': title,
2398
            'alt': alt,
2399
            'author': author,
2400
            'day': day.day,
2401
            'month': day.month,
2402
            'year': day.year
2403
        }
2404
2405
2406
class TheAwkwardYeti(GenericNavigableComic):
2407
    """Class to retrieve The Awkward Yeti comics."""
2408
    # Also on http://www.gocomics.com/the-awkward-yeti
2409
    # Also on http://larstheyeti.tumblr.com
2410
    # Also on https://tapastic.com/series/TheAwkwardYeti
2411
    name = 'yeti'
2412
    long_name = 'The Awkward Yeti'
2413
    url = 'http://theawkwardyeti.com'
2414
    get_first_comic_link = get_a_navi_navifirst
2415
    get_navi_link = get_link_rel_next
2416
2417
    @classmethod
2418
    def get_comic_info(cls, soup, link):
2419
        """Get information about a particular comics."""
2420
        title = soup.find('h2', class_='post-title').string
2421
        date_str = soup.find("span", class_="post-date").string
2422
        day = string_to_date(date_str, "%B %d, %Y")
2423
        imgs = soup.find("div", id="comic").find_all("img")
2424
        assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
2425
        return {
2426
            'img': [i['src'] for i in imgs],
2427
            'title': title,
2428
            'day': day.day,
2429
            'month': day.month,
2430
            'year': day.year
2431
        }
2432
2433
2434
class PleasantThoughts(GenericNavigableComic):
2435
    """Class to retrieve Pleasant Thoughts comics."""
2436
    name = 'pleasant'
2437
    long_name = 'Pleasant Thoughts'
2438
    url = 'http://pleasant-thoughts.com'
2439
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2440
    get_navi_link = get_link_rel_next
2441
2442
    @classmethod
2443
    def get_comic_info(cls, soup, link):
2444
        """Get information about a particular comics."""
2445
        post = soup.find('div', class_='post-content')
2446
        title = post.find('h2', class_='post-title').string
2447
        imgs = post.find("div", class_="entry").find_all("img")
2448
        return {
2449
            'title': title,
2450
            'img': [i['src'] for i in imgs],
2451
        }
2452
2453
2454
class MisterAndMe(GenericNavigableComic):
2455
    """Class to retrieve Mister & Me Comics."""
2456
    # Also on http://www.gocomics.com/mister-and-me
2457
    # Also on https://tapastic.com/series/Mister-and-Me
2458
    name = 'mister'
2459
    long_name = 'Mister & Me'
2460
    url = 'http://www.mister-and-me.com'
2461
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2462
    get_navi_link = get_link_rel_next
2463
2464
    @classmethod
2465
    def get_comic_info(cls, soup, link):
2466
        """Get information about a particular comics."""
2467
        title = soup.find('h2', class_='post-title').string
2468
        author = soup.find("span", class_="post-author").find("a").string
2469
        date_str = soup.find("span", class_="post-date").string
2470
        day = string_to_date(date_str, "%B %d, %Y")
2471
        imgs = soup.find("div", id="comic").find_all("img")
2472
        assert all(i['alt'] == i['title'] for i in imgs)
2473
        assert len(imgs) <= 1
2474
        alt = imgs[0]['alt'] if imgs else ""
2475
        return {
2476
            'img': [i['src'] for i in imgs],
2477
            'title': title,
2478
            'alt': alt,
2479
            'author': author,
2480
            'day': day.day,
2481
            'month': day.month,
2482
            'year': day.year
2483
        }
2484 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2485
2486
class LastPlaceComics(GenericNavigableComic):
2487
    """Class to retrieve Last Place Comics."""
2488
    name = 'lastplace'
2489
    long_name = 'LastPlaceComics'
2490
    url = "http://lastplacecomics.com"
2491
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2492
    get_navi_link = get_link_rel_next
2493
2494
    @classmethod
2495
    def get_comic_info(cls, soup, link):
2496
        """Get information about a particular comics."""
2497
        title = soup.find('h2', class_='post-title').string
2498
        author = soup.find("span", class_="post-author").find("a").string
2499
        date_str = soup.find("span", class_="post-date").string
2500
        day = string_to_date(date_str, "%B %d, %Y")
2501
        imgs = soup.find("div", id="comic").find_all("img")
2502
        assert all(i['alt'] == i['title'] for i in imgs)
2503
        assert len(imgs) <= 1
2504
        alt = imgs[0]['alt'] if imgs else ""
2505
        return {
2506
            'img': [i['src'] for i in imgs],
2507
            'title': title,
2508
            'alt': alt,
2509
            'author': author,
2510
            'day': day.day,
2511
            'month': day.month,
2512
            'year': day.year
2513
        }
2514
2515
2516
class TalesOfAbsurdity(GenericNavigableComic):
2517
    """Class to retrieve Tales Of Absurdity comics."""
2518
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
2519
    # Also on http://talesofabsurdity.tumblr.com
2520
    name = 'absurdity'
2521
    long_name = 'Tales of Absurdity'
2522
    url = 'http://talesofabsurdity.com'
2523
    get_first_comic_link = get_a_navi_navifirst
2524
    get_navi_link = get_a_navi_comicnavnext_navinext
2525
2526
    @classmethod
2527
    def get_comic_info(cls, soup, link):
2528
        """Get information about a particular comics."""
2529
        title = soup.find('h2', class_='post-title').string
2530
        author = soup.find("span", class_="post-author").find("a").string
2531
        date_str = soup.find("span", class_="post-date").string
2532
        day = string_to_date(date_str, "%B %d, %Y")
2533
        imgs = soup.find("div", id="comic").find_all("img")
2534
        assert all(i['alt'] == i['title'] for i in imgs)
2535
        alt = imgs[0]['alt'] if imgs else ""
2536
        return {
2537
            'img': [i['src'] for i in imgs],
2538
            'title': title,
2539
            'alt': alt,
2540
            'author': author,
2541
            'day': day.day,
2542
            'month': day.month,
2543
            'year': day.year
2544
        }
2545 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2546
2547
class EndlessOrigami(GenericNavigableComic):
2548
    """Class to retrieve Endless Origami Comics."""
2549
    name = "origami"
2550
    long_name = "Endless Origami"
2551
    url = "http://endlessorigami.com"
2552
    get_first_comic_link = get_a_navi_navifirst
2553
    get_navi_link = get_link_rel_next
2554
2555
    @classmethod
2556
    def get_comic_info(cls, soup, link):
2557
        """Get information about a particular comics."""
2558
        title = soup.find('h2', class_='post-title').string
2559
        author = soup.find("span", class_="post-author").find("a").string
2560
        date_str = soup.find("span", class_="post-date").string
2561
        day = string_to_date(date_str, "%B %d, %Y")
2562
        imgs = soup.find("div", id="comic").find_all("img")
2563
        assert all(i['alt'] == i['title'] for i in imgs)
2564
        alt = imgs[0]['alt'] if imgs else ""
2565
        return {
2566
            'img': [i['src'] for i in imgs],
2567
            'title': title,
2568
            'alt': alt,
2569
            'author': author,
2570
            'day': day.day,
2571
            'month': day.month,
2572
            'year': day.year
2573
        }
2574
2575
2576
class PlanC(GenericNavigableComic):
2577
    """Class to retrieve Plan C comics."""
2578
    name = 'planc'
2579
    long_name = 'Plan C'
2580
    url = 'http://www.plancomic.com'
2581
    get_first_comic_link = get_a_navi_navifirst
2582
    get_navi_link = get_a_navi_comicnavnext_navinext
2583
2584
    @classmethod
2585
    def get_comic_info(cls, soup, link):
2586
        """Get information about a particular comics."""
2587
        title = soup.find('h2', class_='post-title').string
2588
        date_str = soup.find("span", class_="post-date").string
2589
        day = string_to_date(date_str, "%B %d, %Y")
2590
        imgs = soup.find('div', id='comic').find_all('img')
2591
        return {
2592
            'title': title,
2593
            'img': [i['src'] for i in imgs],
2594
            'month': day.month,
2595
            'year': day.year,
2596
            'day': day.day,
2597
        }
2598
2599
2600 View Code Duplication
class BuniComic(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2601
    """Class to retrieve Buni Comics."""
2602
    name = 'buni'
2603
    long_name = 'BuniComics'
2604
    url = 'http://www.bunicomic.com'
2605
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2606
    get_navi_link = get_link_rel_next
2607
2608
    @classmethod
2609
    def get_comic_info(cls, soup, link):
2610
        """Get information about a particular comics."""
2611
        imgs = soup.find('div', id='comic').find_all('img')
2612
        assert all(i['alt'] == i['title'] for i in imgs)
2613
        assert len(imgs) == 1
2614
        return {
2615
            'img': [i['src'] for i in imgs],
2616
            'title': imgs[0]['title'],
2617
        }
2618
2619
2620
class GenericCommitStrip(GenericNavigableComic):
2621
    """Generic class to retrieve Commit Strips in different languages."""
2622
    get_navi_link = get_a_rel_next
2623
    get_first_comic_link = simulate_first_link
2624
    first_url = NotImplemented
2625
2626
    @classmethod
2627
    def get_comic_info(cls, soup, link):
2628
        """Get information about a particular comics."""
2629
        desc = soup.find('meta', property='og:description')['content']
2630
        title = soup.find('meta', property='og:title')['content']
2631
        imgs = soup.find('div', class_='entry-content').find_all('img')
2632
        title2 = ' '.join(i.get('title', '') for i in imgs)
2633
        return {
2634
            'title': title,
2635
            'title2': title2,
2636
            'description': desc,
2637
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2638
        }
2639
2640
2641
class CommitStripFr(GenericCommitStrip):
2642
    """Class to retrieve Commit Strips in French."""
2643
    name = 'commit_fr'
2644
    long_name = 'Commit Strip (Fr)'
2645
    url = 'http://www.commitstrip.com/fr'
2646
    first_url = 'http://www.commitstrip.com/fr/2012/02/22/interview/'
2647
2648
2649
class CommitStripEn(GenericCommitStrip):
2650 View Code Duplication
    """Class to retrieve Commit Strips in English."""
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2651
    name = 'commit_en'
2652
    long_name = 'Commit Strip (En)'
2653
    url = 'http://www.commitstrip.com/en'
2654
    first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'
2655
2656
2657
class GenericBoumerie(GenericNavigableComic):
2658
    """Generic class to retrieve Boumeries comics in different languages."""
2659
    get_first_comic_link = get_a_navi_navifirst
2660
    get_navi_link = get_link_rel_next
2661
    date_format = NotImplemented
2662
    lang = NotImplemented
2663
2664
    @classmethod
2665
    def get_comic_info(cls, soup, link):
2666
        """Get information about a particular comics."""
2667
        title = soup.find('h2', class_='post-title').string
2668
        short_url = soup.find('link', rel='shortlink')['href']
2669
        author = soup.find("span", class_="post-author").find("a").string
2670
        date_str = soup.find('span', class_='post-date').string
2671
        day = string_to_date(date_str, cls.date_format, cls.lang)
2672
        imgs = soup.find('div', id='comic').find_all('img')
2673
        assert all(i['alt'] == i['title'] for i in imgs)
2674
        return {
2675
            'short_url': short_url,
2676
            'img': [i['src'] for i in imgs],
2677
            'title': title,
2678
            'author': author,
2679
            'month': day.month,
2680
            'year': day.year,
2681
            'day': day.day,
2682
        }
2683
2684
2685
class BoumerieEn(GenericBoumerie):
2686
    """Class to retrieve Boumeries comics in English."""
2687
    name = 'boumeries_en'
2688
    long_name = 'Boumeries (En)'
2689
    url = 'http://comics.boumerie.com'
2690
    date_format = "%B %d, %Y"
2691
    lang = 'en_GB.UTF-8'
2692
2693
2694
class BoumerieFr(GenericBoumerie):
2695
    """Class to retrieve Boumeries comics in French."""
2696
    name = 'boumeries_fr'
2697
    long_name = 'Boumeries (Fr)'
2698
    url = 'http://bd.boumerie.com'
2699
    date_format = "%A, %d %B %Y"
2700
    lang = "fr_FR.utf8"
2701
2702
2703 View Code Duplication
class UnearthedComics(GenericNavigableComic):
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2704
    """Class to retrieve Unearthed comics."""
2705
    # Also on http://tapastic.com/series/UnearthedComics
2706
    # Also on http://unearthedcomics.tumblr.com
2707
    name = 'unearthed'
2708
    long_name = 'Unearthed Comics'
2709
    url = 'http://unearthedcomics.com'
2710
    get_navi_link = get_link_rel_next
2711
    get_first_comic_link = simulate_first_link
2712
    first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'
2713
2714
    @classmethod
2715
    def get_comic_info(cls, soup, link):
2716
        """Get information about a particular comics."""
2717
        short_url = soup.find('link', rel='shortlink')['href']
2718
        title_elt = soup.find('h1') or soup.find('h2')
2719
        title = title_elt.string if title_elt else ""
2720
        desc = soup.find('meta', property='og:description')
2721
        date_str = soup.find('time', class_='published updated hidden')['datetime']
2722
        day = string_to_date(date_str, "%Y-%m-%d")
2723
        post = soup.find('div', class_="entry content entry-content type-portfolio")
2724
        imgs = post.find_all('img')
2725
        return {
2726
            'title': title,
2727
            'description': desc,
2728
            'url2': short_url,
2729
            'img': [i['src'] for i in imgs],
2730
            'month': day.month,
2731
            'year': day.year,
2732
            'day': day.day,
2733
        }
2734 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2735
2736
class Optipess(GenericNavigableComic):
2737
    """Class to retrieve Optipess comics."""
2738
    name = 'optipess'
2739
    long_name = 'Optipess'
2740
    url = 'http://www.optipess.com'
2741
    get_first_comic_link = get_a_navi_navifirst
2742
    get_navi_link = get_link_rel_next
2743
2744
    @classmethod
2745
    def get_comic_info(cls, soup, link):
2746
        """Get information about a particular comics."""
2747
        title = soup.find('h2', class_='post-title').string
2748
        author = soup.find("span", class_="post-author").find("a").string
2749
        comic = soup.find('div', id='comic')
2750
        imgs = comic.find_all('img') if comic else []
2751
        alt = imgs[0]['title'] if imgs else ""
2752
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2753
        date_str = soup.find('span', class_='post-date').string
2754
        day = string_to_date(date_str, "%B %d, %Y")
2755
        return {
2756
            'title': title,
2757
            'alt': alt,
2758
            'author': author,
2759
            'img': [i['src'] for i in imgs],
2760
            'month': day.month,
2761
            'year': day.year,
2762
            'day': day.day,
2763
        }
2764
2765
2766
class PainTrainComic(GenericNavigableComic):
2767
    """Class to retrieve Pain Train Comics."""
2768
    name = 'paintrain'
2769
    long_name = 'Pain Train Comics'
2770
    url = 'http://paintraincomic.com'
2771
    get_first_comic_link = get_a_navi_navifirst
2772
    get_navi_link = get_link_rel_next
2773
2774
    @classmethod
2775
    def get_comic_info(cls, soup, link):
2776
        """Get information about a particular comics."""
2777
        title = soup.find('h2', class_='post-title').string
2778
        short_url = soup.find('link', rel='shortlink')['href']
2779
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2780
        num = int(short_url_re.match(short_url).groups()[0])
2781
        imgs = soup.find('div', id='comic').find_all('img')
2782
        alt = imgs[0]['title']
2783
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2784
        date_str = soup.find('span', class_='post-date').string
2785
        day = string_to_date(date_str, "%d/%m/%Y")
2786
        return {
2787
            'short_url': short_url,
2788
            'num': num,
2789
            'img': [i['src'] for i in imgs],
2790
            'month': day.month,
2791
            'year': day.year,
2792
            'day': day.day,
2793
            'alt': alt,
2794
            'title': title,
2795
        }
2796
2797
2798
class MoonBeard(GenericNavigableComic):
2799
    """Class to retrieve MoonBeard comics."""
2800
    # Also on http://blog.squiresjam.es/moonbeard
2801
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
2802
    name = 'moonbeard'
2803
    long_name = 'Moon Beard'
2804
    url = 'http://moonbeard.com'
2805
    get_first_comic_link = get_a_navi_navifirst
2806
    get_navi_link = get_a_navi_navinext
2807
2808
    @classmethod
2809
    def get_comic_info(cls, soup, link):
2810
        """Get information about a particular comics."""
2811
        title = soup.find('h2', class_='post-title').string
2812
        short_url = soup.find('link', rel='shortlink')['href']
2813
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2814
        num = int(short_url_re.match(short_url).groups()[0])
2815
        imgs = soup.find('div', id='comic').find_all('img')
2816
        alt = imgs[0]['title']
2817
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2818
        date_str = soup.find('span', class_='post-date').string
2819
        day = string_to_date(date_str, "%B %d, %Y")
2820
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2821
        author = soup.find('span', class_='post-author').string
2822
        return {
2823
            'short_url': short_url,
2824
            'num': num,
2825
            'img': [i['src'] for i in imgs],
2826
            'month': day.month,
2827
            'year': day.year,
2828
            'day': day.day,
2829
            'title': title,
2830
            'tags': tags,
2831
            'alt': alt,
2832
            'author': author,
2833
        }
2834
2835
2836
class AHamADay(GenericNavigableComic):
2837
    """Class to retrieve class A Ham A Day comics."""
2838
    name = 'ham'
2839
    long_name = 'A Ham A Day'
2840
    url = 'http://www.ahammaday.com'
2841
    get_url_from_link = join_cls_url_to_href
2842
    get_first_comic_link = simulate_first_link
2843
    first_url = 'http://www.ahammaday.com/today/3/6/french'
2844
2845
    @classmethod
2846
    def get_navi_link(cls, last_soup, next_):
2847
        """Get link to next or previous comic."""
2848
        # prev is next / next is prev
2849
        return last_soup.find('li', class_='previous' if next_ else 'next').find('a')
2850
2851
    @classmethod
2852
    def get_comic_info(cls, soup, link):
2853
        """Get information about a particular comics."""
2854
        date_str = soup.find('time', class_='published')['datetime']
2855
        day = string_to_date(date_str, "%Y-%m-%d")
2856
        author = soup.find('span', class_='blog-author').find('a').string
2857
        title = soup.find('meta', property='og:title')['content']
2858
        imgs = soup.find_all('meta', itemprop='image')
2859
        return {
2860
            'img': [i['content'] for i in imgs],
2861
            'title': title,
2862
            'author': author,
2863
            'day': day.day,
2864
            'month': day.month,
2865
            'year': day.year,
2866
        }
2867
2868
2869
class LittleLifeLines(GenericNavigableComic):
2870
    """Class to retrieve Little Life Lines comics."""
2871
    name = 'life'
2872
    long_name = 'Little Life Lines'
2873
    url = 'http://www.littlelifelines.com'
2874
    get_url_from_link = join_cls_url_to_href
2875
    get_first_comic_link = simulate_first_link
2876
    first_url = 'http://www.littlelifelines.com/comics/well-done'
2877
2878
    @classmethod
2879
    def get_navi_link(cls, last_soup, next_):
2880
        """Get link to next or previous comic."""
2881
        # prev is next / next is prev
2882
        li = last_soup.find('li', class_='prev' if next_ else 'next')
2883
        return li.find('a') if li else None
2884
2885
    @classmethod
2886
    def get_comic_info(cls, soup, link):
2887
        """Get information about a particular comics."""
2888
        title = soup.find('meta', property='og:title')['content']
2889
        desc = soup.find('meta', property='og:description')['content']
2890
        date_str = soup.find('time', class_='published')['datetime']
2891
        day = string_to_date(date_str, "%Y-%m-%d")
2892
        author = soup.find('a', rel='author').string
2893 View Code Duplication
        div_content = soup.find('div', class_="body entry-content")
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2894
        imgs = div_content.find_all('img')
2895
        imgs = [i for i in imgs if i.get('src') is not None]
2896
        alt = imgs[0]['alt']
2897
        return {
2898
            'title': title,
2899
            'alt': alt,
2900
            'description': desc,
2901
            'author': author,
2902
            'day': day.day,
2903
            'month': day.month,
2904
            'year': day.year,
2905
            'img': [i['src'] for i in imgs],
2906
        }
2907
2908
2909
class GenericWordPressInkblot(GenericNavigableComic):
2910
    """Generic class to retrieve comics using WordPress with Inkblot."""
2911
    get_navi_link = get_link_rel_next
2912
2913
    @classmethod
2914
    def get_first_comic_link(cls):
2915
        """Get link to first comics."""
2916
        return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')
2917
2918
    @classmethod
2919
    def get_comic_info(cls, soup, link):
2920
        """Get information about a particular comics."""
2921
        title = soup.find('meta', property='og:title')['content']
2922
        imgs = soup.find('div', class_='webcomic-image').find_all('img')
2923
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2924
        day = string_to_date(date_str, "%Y-%m-%d")
2925
        return {
2926
            'title': title,
2927
            'day': day.day,
2928
            'month': day.month,
2929
            'year': day.year,
2930
            'img': [i['src'] for i in imgs],
2931
        }
2932
2933
2934
class EverythingsStupid(GenericWordPressInkblot):
2935
    """Class to retrieve Everything's stupid Comics."""
2936
    # Also on http://tapastic.com/series/EverythingsStupid
2937
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
2938
    # Also on http://everythingsstupidcomics.tumblr.com
2939
    name = 'stupid'
2940
    long_name = "Everything's Stupid"
2941
    url = 'http://everythingsstupid.net'
2942 View Code Duplication
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2943
2944
class TheIsmComics(GenericWordPressInkblot):
2945
    """Class to retrieve The Ism Comics."""
2946
    # Also on https://tapastic.com/series/TheIsm (?)
2947
    name = 'theism'
2948
    long_name = "The Ism"
2949
    url = 'http://www.theism-comics.com'
2950
2951
2952
class WoodenPlankStudios(GenericWordPressInkblot):
2953
    """Class to retrieve Wooden Plank Studios comics."""
2954
    name = 'woodenplank'
2955
    long_name = 'Wooden Plank Studios'
2956
    url = 'http://woodenplankstudios.com'
2957
2958
2959
class ElectricBunnyComic(GenericNavigableComic):
2960
    """Class to retrieve Electric Bunny Comics."""
2961
    # Also on http://electricbunnycomics.tumblr.com
2962
    name = 'bunny'
2963
    long_name = 'Electric Bunny Comic'
2964
    url = 'http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell'
2965
    get_url_from_link = join_cls_url_to_href
2966
2967
    @classmethod
2968
    def get_first_comic_link(cls):
2969
        """Get link to first comics."""
2970
        return get_soup_at_url(cls.url).find('img', alt='First').parent
2971
2972
    @classmethod
2973
    def get_navi_link(cls, last_soup, next_):
2974
        """Get link to next or previous comic."""
2975
        img = last_soup.find('img', alt='Next' if next_ else 'Back')
2976
        return img.parent if img else None
2977
2978
    @classmethod
2979
    def get_comic_info(cls, soup, link):
2980
        """Get information about a particular comics."""
2981
        title = soup.find('meta', property='og:title')['content']
2982
        imgs = soup.find_all('meta', property='og:image')
2983
        return {
2984
            'title': title,
2985
            'img': [i['content'] for i in imgs],
2986
        }
2987
2988
2989
class SheldonComics(GenericNavigableComic):
2990
    """Class to retrieve Sheldon comics."""
2991
    # Also on http://www.gocomics.com/sheldon
2992
    name = 'sheldon'
2993
    long_name = 'Sheldon Comics'
2994
    url = 'http://www.sheldoncomics.com'
2995
2996
    @classmethod
2997
    def get_first_comic_link(cls):
2998
        """Get link to first comics."""
2999
        return get_soup_at_url(cls.url).find("a", id="nav-first")
3000
3001
    @classmethod
3002
    def get_navi_link(cls, last_soup, next_):
3003
        """Get link to next or previous comic."""
3004
        for link in last_soup.find_all("a", id="nav-next" if next_ else "nav-prev"):
3005
            if link['href'] != 'http://www.sheldoncomics.com':
3006
                return link
3007
        return None
3008
3009
    @classmethod
3010
    def get_comic_info(cls, soup, link):
3011
        """Get information about a particular comics."""
3012
        imgs = soup.find("div", id="comic-foot").find_all("img")
3013
        assert all(i['alt'] == i['title'] for i in imgs)
3014
        assert len(imgs) == 1
3015
        title = imgs[0]['title']
3016
        return {
3017
            'title': title,
3018
            'img': [i['src'] for i in imgs],
3019
        }
3020
3021
3022
class CubeDrone(GenericNavigableComic):
3023
    """Class to retrieve Cube Drone comics."""
3024
    name = 'cubedrone'
3025
    long_name = 'Cube Drone'
3026
    url = 'http://cube-drone.com/comics'
3027
    get_url_from_link = join_cls_url_to_href
3028
3029
    @classmethod
3030
    def get_first_comic_link(cls):
3031
        """Get link to first comics."""
3032
        return get_soup_at_url(cls.url).find('span', class_='glyphicon glyphicon-backward').parent
3033
3034
    @classmethod
3035
    def get_navi_link(cls, last_soup, next_):
3036
        """Get link to next or previous comic."""
3037
        class_ = 'glyphicon glyphicon-chevron-' + ('right' if next_ else 'left')
3038
        return last_soup.find('span', class_=class_).parent
3039
3040
    @classmethod
3041
    def get_comic_info(cls, soup, link):
3042
        """Get information about a particular comics."""
3043
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
3044
        url2 = soup.find('meta', attrs={'name': 'twitter:url'})['content']
3045
        # date_str = soup.find('h2', class_='comic_title').find('small').string
3046
        # day = string_to_date(date_str, "%B %d, %Y, %I:%M %p")
3047
        imgs = soup.find_all('img', class_='comic img-responsive')
3048
        title2 = imgs[0]['title']
3049
        alt = imgs[0]['alt']
3050
        return {
3051
            'url2': url2,
3052
            'title': title,
3053
            'title2': title2,
3054
            'alt': alt,
3055
            'img': [i['src'] for i in imgs],
3056
        }
3057
3058
3059
class MakeItStoopid(GenericNavigableComic):
3060
    """Class to retrieve Make It Stoopid Comics."""
3061
    name = 'stoopid'
3062
    long_name = 'Make it stoopid'
3063
    url = 'http://makeitstoopid.com/comic.php'
3064
3065
    @classmethod
3066
    def get_nav(cls, soup):
3067
        """Get the navigation elements from soup object."""
3068
        cnav = soup.find_all(class_='cnav')
3069
        nav1, nav2 = cnav[:5], cnav[5:]
3070
        assert nav1 == nav2
3071
        # begin, prev, archive, next_, end = nav1
3072
        return [None if i.get('href') is None else i for i in nav1]
3073
3074
    @classmethod
3075
    def get_first_comic_link(cls):
3076
        """Get link to first comics."""
3077
        return cls.get_nav(get_soup_at_url(cls.url))[0]
3078
3079
    @classmethod
3080
    def get_navi_link(cls, last_soup, next_):
3081
        """Get link to next or previous comic."""
3082
        return cls.get_nav(last_soup)[3 if next_ else 1]
3083
3084
    @classmethod
3085
    def get_comic_info(cls, soup, link):
3086
        """Get information about a particular comics."""
3087
        title = link['title']
3088
        imgs = soup.find_all('img', id='comicimg')
3089
        return {
3090
            'title': title,
3091
            'img': [i['src'] for i in imgs],
3092
        }
3093
3094
3095
class GeekAndPoke(GenericNavigableComic):
3096
    """Class to retrieve Geek And Poke comics."""
3097
    name = 'geek'
3098
    long_name = 'Geek And Poke'
3099
    url = 'http://geek-and-poke.com'
3100
    get_url_from_link = join_cls_url_to_href
3101
    get_first_comic_link = simulate_first_link
3102
    first_url = 'http://geek-and-poke.com/geekandpoke/2006/8/27/a-new-place-for-a-not-so-old-blog.html'
3103
3104
    @classmethod
3105
    def get_navi_link(cls, last_soup, next_):
3106
        """Get link to next or previous comic."""
3107
        return last_soup.find('a', class_='prev-item' if next_ else 'next-item')
3108
3109
    @classmethod
3110
    def get_comic_info(cls, soup, link):
3111
        """Get information about a particular comics."""
3112
        title = soup.find('meta', property='og:title')['content']
3113
        desc = soup.find('meta', property='og:description')['content']
3114
        date_str = soup.find('time', class_='published')['datetime']
3115
        day = string_to_date(date_str, "%Y-%m-%d")
3116
        author = soup.find('a', rel='author').string
3117
        div_content = (soup.find('div', class_="body entry-content") or
3118
                       soup.find('div', class_="special-content"))
3119
        imgs = div_content.find_all('img')
3120
        imgs = [i for i in imgs if i.get('src') is not None]
3121
        assert all('title' not in i or i['alt'] == i['title'] for i in imgs)
3122
        alt = imgs[0].get('alt', "") if imgs else []
3123
        return {
3124
            'title': title,
3125
            'alt': alt,
3126
            'description': desc,
3127
            'author': author,
3128
            'day': day.day,
3129
            'month': day.month,
3130
            'year': day.year,
3131
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3132
        }
3133
3134
3135
class GenericTumblrV1(GenericComic):
3136
    """Generic class to retrieve comics from Tumblr using the V1 API."""
3137
3138
    @classmethod
3139
    def get_next_comic(cls, last_comic):
3140
        """Generic implementation of get_next_comic for Tumblr comics."""
3141
        for p in cls.get_posts(last_comic):
3142
            comic = cls.get_comic_info(p)
3143
            if comic is not None:
3144
                yield comic
3145
3146
    @classmethod
3147
    def get_url_from_post(cls, post):
3148
        return post['url']
3149
3150
    @classmethod
3151
    def get_api_url(cls):
3152
        return urljoin_wrapper(cls.url, '/api/read/')
3153
3154
    @classmethod
3155
    def get_comic_info(cls, post):
3156
        """Get information about a particular comics."""
3157
        # print(post)
3158
        type_ = post['type']
3159
        if type_ != 'photo':
3160
            # print("Type is %s" % type_)
3161
            return None
3162
        tumblr_id = int(post['id'])
3163
        api_url = cls.get_api_url() + '?id=%d' % (tumblr_id)
3164
        day = datetime.datetime.fromtimestamp(int(post['unix-timestamp'])).date()
3165
        caption = post.find('photo-caption')
3166
        title = caption.string if caption else ""
3167
        tags = ' '.join(t.string for t in post.find_all('tag'))
3168
        # Photos may appear in 'photo' tags and/or straight in the post
3169
        photo_tags = post.find_all('photo')
3170
        if not photo_tags:
3171
            photo_tags = [post]
3172
        # Images are in multiple resolutions - taking the first one
3173
        imgs = [photo.find('photo-url') for photo in photo_tags]
3174
        return {
3175
            'url': cls.get_url_from_post(post),
3176
            'url2': post['url-with-slug'],
3177
            'day': day.day,
3178
            'month': day.month,
3179
            'year': day.year,
3180
            'title': title,
3181
            'tags': tags,
3182
            'img': [i.string for i in imgs],
3183
            'tumblr-id': tumblr_id,
3184
            'api_url': api_url,  # for debug purposes
3185
        }
3186
3187
    @classmethod
3188
    def get_posts(cls, last_comic, nb_post_per_call=10):
3189
        """Get posts using API. nb_post_per_call is max 50.
3190
3191
        Posts are retrieved from newer to older as per the tumblr v1 api
3192
        but are returned in chronological order."""
3193
        waiting_for_url = last_comic['url'] if last_comic else None
3194
        posts_acc = []
3195
        if last_comic is not None:
3196
            # Sometimes, tumblr posts are deleted. We previous post is deleted, we
3197
            # might end up spending a lot of time looking for something that
3198
            # doesn't exist. Failing early and clearly might be a better option.
3199
            last_api_url = last_comic['api_url']
3200
            try:
3201
                get_soup_at_url(last_api_url)
3202
            except urllib.error.HTTPError:
3203
                try:
3204
                    get_soup_at_url(cls.url)
3205
                except urllib.error.HTTPError:
3206
                    print("Did not find previous post nor main url %s" % cls.url)
3207
                else:
3208
                    print("Did not find previous post %s : it might have been deleted" % last_api_url)
3209
                return reversed(posts_acc)
3210
        api_url = cls.get_api_url()
3211
        posts = get_soup_at_url(api_url).find('posts')
3212
        start, total = int(posts['start']), int(posts['total'])
3213
        assert start == 0
3214
        for starting_num in range(0, total, nb_post_per_call):
3215
            api_url2 = api_url + '?start=%d&num=%d' % (starting_num, nb_post_per_call)
3216
            # print(api_url2)
3217
            posts2 = get_soup_at_url(api_url2).find('posts')
3218
            start2, total2 = int(posts2['start']), int(posts2['total'])
3219
            assert starting_num == start2, "%d != %d" % (starting_num, start2)
3220
            # This may happen and should be handled in the future
3221
            assert total == total2, "%d != %d" % (total, total2)
3222
            for p in posts2.find_all('post'):
3223
                if waiting_for_url and waiting_for_url == cls.get_url_from_post(p):
3224
                    return reversed(posts_acc)
3225
                posts_acc.append(p)
3226
        if waiting_for_url is None:
3227
            return reversed(posts_acc)
3228
        print("Did not find %s : there might be a problem" % waiting_for_url)
3229
        return []
3230
3231
3232
class IrwinCardozo(GenericTumblrV1):
3233
    """Class to retrieve Irwin Cardozo Comics."""
3234
    name = 'irwinc'
3235
    long_name = 'Irwin Cardozo'
3236
    url = 'http://irwincardozocomics.tumblr.com'
3237
3238
3239
class AccordingToDevin(GenericTumblrV1):
3240
    """Class to retrieve According To Devin comics."""
3241
    name = 'devin'
3242
    long_name = 'According To Devin'
3243
    url = 'http://accordingtodevin.tumblr.com'
3244
3245
3246
class ItsTheTieTumblr(GenericTumblrV1):
3247
    """Class to retrieve It's the tie comics."""
3248
    # Also on http://itsthetie.com
3249
    # Also on https://tapastic.com/series/itsthetie
3250
    name = 'tie-tumblr'
3251
    long_name = "It's the tie (from Tumblr)"
3252
    url = "http://itsthetie.tumblr.com"
3253
3254
3255
class OctopunsTumblr(GenericTumblrV1):
3256
    """Class to retrieve Octopuns comics."""
3257
    # Also on http://www.octopuns.net
3258
    name = 'octopuns-tumblr'
3259
    long_name = 'Octopuns (from Tumblr)'
3260
    url = 'http://octopuns.tumblr.com'
3261
3262
3263
class PicturesInBoxesTumblr(GenericTumblrV1):
3264
    """Class to retrieve Pictures In Boxes comics."""
3265
    # Also on http://www.picturesinboxes.com
3266
    name = 'picturesinboxes-tumblr'
3267
    long_name = 'Pictures in Boxes (from Tumblr)'
3268
    url = 'http://picturesinboxescomic.tumblr.com'
3269
3270
3271
class TubeyToonsTumblr(GenericTumblrV1):
3272
    """Class to retrieve TubeyToons comics."""
3273
    # Also on http://tapastic.com/series/Tubey-Toons
3274
    # Also on http://tubeytoons.com
3275
    name = 'tubeytoons-tumblr'
3276
    long_name = 'Tubey Toons (from Tumblr)'
3277
    url = 'http://tubeytoons.tumblr.com'
3278
3279
3280
class UnearthedComicsTumblr(GenericTumblrV1):
3281
    """Class to retrieve Unearthed comics."""
3282
    # Also on http://tapastic.com/series/UnearthedComics
3283
    # Also on http://unearthedcomics.com
3284
    name = 'unearthed-tumblr'
3285
    long_name = 'Unearthed Comics (from Tumblr)'
3286
    url = 'http://unearthedcomics.tumblr.com'
3287
3288
3289
class PieComic(GenericTumblrV1):
3290
    """Class to retrieve Pie Comic comics."""
3291
    name = 'pie'
3292
    long_name = 'Pie Comic'
3293
    url = "http://piecomic.tumblr.com"
3294
3295
3296
class MrEthanDiamond(GenericTumblrV1):
3297
    """Class to retrieve Mr Ethan Diamond comics."""
3298
    name = 'diamond'
3299
    long_name = 'Mr Ethan Diamond'
3300
    url = 'http://mrethandiamond.tumblr.com'
3301
3302
3303
class Flocci(GenericTumblrV1):
3304
    """Class to retrieve floccinaucinihilipilification comics."""
3305
    name = 'flocci'
3306
    long_name = 'floccinaucinihilipilification'
3307
    url = "http://floccinaucinihilipilificationa.tumblr.com"
3308
3309
3310
class UpAndOut(GenericTumblrV1):
3311
    """Class to retrieve Up & Out comics."""
3312
    # Also on http://tapastic.com/series/UP-and-OUT
3313
    name = 'upandout'
3314
    long_name = 'Up And Out (from Tumblr)'
3315
    url = 'http://upandoutcomic.tumblr.com'
3316
3317
3318
class Pundemonium(GenericTumblrV1):
3319
    """Class to retrieve Pundemonium comics."""
3320
    name = 'pundemonium'
3321
    long_name = 'Pundemonium'
3322
    url = 'http://monstika.tumblr.com'
3323
3324
3325
class PoorlyDrawnLinesTumblr(GenericEmptyComic, GenericTumblrV1):
3326
    """Class to retrieve Poorly Drawn Lines comics."""
3327
    # Also on http://poorlydrawnlines.com
3328
    name = 'poorlydrawn-tumblr'
3329
    long_name = 'Poorly Drawn Lines (from Tumblr)'
3330
    url = 'http://pdlcomics.tumblr.com'
3331
3332
3333
class PearShapedComics(GenericTumblrV1):
3334
    """Class to retrieve Pear Shaped Comics."""
3335
    name = 'pearshaped'
3336
    long_name = 'Pear-Shaped Comics'
3337
    url = 'http://pearshapedcomics.com'
3338
3339
3340
class PondScumComics(GenericTumblrV1):
3341
    """Class to retrieve Pond Scum Comics."""
3342
    name = 'pond'
3343
    long_name = 'Pond Scum'
3344
    url = 'http://pondscumcomic.tumblr.com'
3345
3346
3347
class MercworksTumblr(GenericTumblrV1):
3348
    """Class to retrieve Mercworks comics."""
3349
    # Also on http://mercworks.net
3350
    name = 'mercworks-tumblr'
3351
    long_name = 'Mercworks (from Tumblr)'
3352
    url = 'http://mercworks.tumblr.com'
3353
3354
3355
class OwlTurdTumblr(GenericEmptyComic, GenericTumblrV1):
3356
    """Class to retrieve Owl Turd comics."""
3357
    # Also on http://tapastic.com/series/Owl-Turd-Comix
3358
    name = 'owlturd-tumblr'
3359
    long_name = 'Owl Turd (from Tumblr)'
3360
    url = 'http://owlturd.com'
3361
3362
3363
class VectorBelly(GenericTumblrV1):
3364
    """Class to retrieve Vector Belly comics."""
3365
    # Also on http://vectorbelly.com
3366
    name = 'vector'
3367
    long_name = 'Vector Belly'
3368
    url = 'http://vectorbelly.tumblr.com'
3369
3370
3371
class GoneIntoRapture(GenericTumblrV1):
3372
    """Class to retrieve Gone Into Rapture comics."""
3373
    # Also on http://goneintorapture.tumblr.com
3374
    # Also on http://tapastic.com/series/Goneintorapture
3375
    name = 'rapture'
3376
    long_name = 'Gone Into Rapture'
3377
    url = 'http://www.goneintorapture.com'
3378
3379
3380
class TheOatmealTumblr(GenericTumblrV1):
3381
    """Class to retrieve The Oatmeal comics."""
3382
    # Also on http://theoatmeal.com
3383
    name = 'oatmeal-tumblr'
3384
    long_name = 'The Oatmeal (from Tumblr)'
3385
    url = 'http://oatmeal.tumblr.com'
3386
3387
3388
class HeckIfIKnowComicsTumblr(GenericTumblrV1):
3389
    """Class to retrieve Heck If I Know Comics."""
3390
    # Also on http://tapastic.com/series/Regular
3391
    name = 'heck-tumblr'
3392
    long_name = 'Heck if I Know comics (from Tumblr)'
3393
    url = 'http://heckifiknowcomics.com'
3394
3395
3396
class MyJetPack(GenericTumblrV1):
3397
    """Class to retrieve My Jet Pack comics."""
3398
    name = 'jetpack'
3399
    long_name = 'My Jet Pack'
3400
    url = 'http://myjetpack.tumblr.com'
3401
3402
3403
class CheerUpEmoKidTumblr(GenericTumblrV1):
3404
    """Class to retrieve CheerUpEmoKid comics."""
3405
    # Also on http://www.cheerupemokid.com
3406
    # Also on http://tapastic.com/series/CUEK
3407
    name = 'cuek-tumblr'
3408
    long_name = 'Cheer Up Emo Kid (from Tumblr)'
3409
    url = 'http://enzocomics.tumblr.com'
3410
3411
3412
class ForLackOfABetterComic(GenericEmptyComic, GenericTumblrV1):
3413
    """Class to retrieve For Lack Of A Better Comics."""
3414
    # Also on http://forlackofabettercomic.com
3415
    name = 'lack'
3416
    long_name = 'For Lack Of A Better Comic'
3417
    url = 'http://forlackofabettercomic.tumblr.com'
3418
3419
3420
class ZenPencilsTumblr(GenericTumblrV1):
3421
    """Class to retrieve ZenPencils comics."""
3422
    # Also on http://zenpencils.com
3423
    # Also on http://www.gocomics.com/zen-pencils
3424
    name = 'zenpencils-tumblr'
3425
    long_name = 'Zen Pencils (from Tumblr)'
3426
    url = 'http://zenpencils.tumblr.com'
3427
3428
3429
class ThreeWordPhraseTumblr(GenericTumblrV1):
3430
    """Class to retrieve Three Word Phrase comics."""
3431
    # Also on http://threewordphrase.com
3432
    name = 'threeword-tumblr'
3433
    long_name = 'Three Word Phrase (from Tumblr)'
3434
    url = 'http://www.threewordphrase.tumblr.com'
3435
3436
3437
class TimeTrabbleTumblr(GenericTumblrV1):
3438
    """Class to retrieve Time Trabble comics."""
3439
    # Also on http://timetrabble.com
3440
    name = 'timetrabble-tumblr'
3441
    long_name = 'Time Trabble (from Tumblr)'
3442
    url = 'http://timetrabble.tumblr.com'
3443
3444
3445
class SafelyEndangeredTumblr(GenericTumblrV1):
3446
    """Class to retrieve Safely Endangered comics."""
3447
    # Also on http://www.safelyendangered.com
3448
    name = 'endangered-tumblr'
3449
    long_name = 'Safely Endangered (from Tumblr)'
3450
    url = 'http://tumblr.safelyendangered.com'
3451
3452
3453
class MouseBearComedyTumblr(GenericTumblrV1):
3454
    """Class to retrieve Mouse Bear Comedy comics."""
3455
    # Also on http://www.mousebearcomedy.com
3456
    name = 'mousebear-tumblr'
3457
    long_name = 'Mouse Bear Comedy (from Tumblr)'
3458
    url = 'http://mousebearcomedy.tumblr.com'
3459
3460
3461
class BouletCorpTumblr(GenericTumblrV1):
3462
    """Class to retrieve BouletCorp comics."""
3463
    # Also on http://www.bouletcorp.com
3464
    name = 'boulet-tumblr'
3465
    long_name = 'Boulet Corp (from Tumblr)'
3466
    url = 'http://bouletcorp.tumblr.com'
3467
3468
3469
class TheAwkwardYetiTumblr(GenericEmptyComic, GenericTumblrV1):
3470
    """Class to retrieve The Awkward Yeti comics."""
3471
    # Also on http://www.gocomics.com/the-awkward-yeti
3472
    # Also on http://theawkwardyeti.com
3473
    # Also on https://tapastic.com/series/TheAwkwardYeti
3474
    name = 'yeti-tumblr'
3475
    long_name = 'The Awkward Yeti (from Tumblr)'
3476
    url = 'http://larstheyeti.tumblr.com'
3477
3478
3479
class NellucNhoj(GenericTumblrV1):
3480
    """Class to retrieve NellucNhoj comics."""
3481
    name = 'nhoj'
3482
    long_name = 'Nelluc Nhoj'
3483
    url = 'http://nellucnhoj.com'
3484
3485
3486
class DownTheUpwardSpiralTumblr(GenericTumblrV1):
3487
    """Class to retrieve Down The Upward Spiral comics."""
3488
    # Also on http://www.downtheupwardspiral.com
3489
    name = 'spiral-tumblr'
3490
    long_name = 'Down the Upward Spiral (from Tumblr)'
3491
    url = 'http://downtheupwardspiral.tumblr.com'
3492
3493
3494
class AsPerUsualTumblr(GenericTumblrV1):
3495
    """Class to retrieve As Per Usual comics."""
3496
    # Also on https://tapastic.com/series/AsPerUsual
3497
    name = 'usual-tumblr'
3498
    long_name = 'As Per Usual (from Tumblr)'
3499
    url = 'http://as-per-usual.tumblr.com'
3500
3501
3502
class OneOneOneOneComicTumblr(GenericTumblrV1):
3503
    """Class to retrieve 1111 Comics."""
3504
    # Also on http://www.1111comics.me
3505
    # Also on https://tapastic.com/series/1111-Comics
3506
    name = '1111-tumblr'
3507
    long_name = '1111 Comics (from Tumblr)'
3508
    url = 'http://comics1111.tumblr.com'
3509
3510
3511
class JhallComicsTumblr(GenericTumblrV1):
3512
    """Class to retrieve Jhall Comics."""
3513
    # Also on http://jhallcomics.com
3514
    name = 'jhall-tumblr'
3515
    long_name = 'Jhall Comics (from Tumblr)'
3516
    url = 'http://jhallcomics.tumblr.com'
3517
3518
3519
class BerkeleyMewsTumblr(GenericTumblrV1):
3520
    """Class to retrieve Berkeley Mews comics."""
3521
    # Also on http://www.gocomics.com/berkeley-mews
3522
    # Also on http://www.berkeleymews.com
3523
    name = 'berkeley-tumblr'
3524
    long_name = 'Berkeley Mews (from Tumblr)'
3525
    url = 'http://mews.tumblr.com'
3526
3527
3528
class JoanCornellaTumblr(GenericTumblrV1):
3529
    """Class to retrieve Joan Cornella comics."""
3530
    # Also on http://joancornella.net
3531
    name = 'cornella-tumblr'
3532
    long_name = 'Joan Cornella (from Tumblr)'
3533
    url = 'http://cornellajoan.tumblr.com'
3534
3535
3536
class RespawnComicTumblr(GenericTumblrV1):
3537
    """Class to retrieve Respawn Comic."""
3538
    # Also on http://respawncomic.com
3539
    name = 'respawn-tumblr'
3540
    long_name = 'Respawn Comic (from Tumblr)'
3541
    url = 'http://respawncomic.tumblr.com'
3542
3543
3544
class ChrisHallbeckTumblr(GenericTumblrV1):
3545
    """Class to retrieve Chris Hallbeck comics."""
3546
    # Also on https://tapastic.com/ChrisHallbeck
3547
    # Also on http://maximumble.com
3548
    # Also on http://minimumble.com
3549
    # Also on http://thebookofbiff.com
3550
    name = 'hallbeck-tumblr'
3551
    long_name = 'Chris Hallback (from Tumblr)'
3552
    url = 'http://chrishallbeck.tumblr.com'
3553
3554
3555
class ComicNuggets(GenericTumblrV1):
3556
    """Class to retrieve Comic Nuggets."""
3557
    name = 'nuggets'
3558
    long_name = 'Comic Nuggets'
3559
    url = 'http://comicnuggets.com'
3560
3561
3562
class PigeonGazetteTumblr(GenericTumblrV1):
3563
    """Class to retrieve The Pigeon Gazette comics."""
3564
    # Also on https://tapastic.com/series/The-Pigeon-Gazette
3565
    name = 'pigeon-tumblr'
3566
    long_name = 'The Pigeon Gazette (from Tumblr)'
3567
    url = 'http://thepigeongazette.tumblr.com'
3568
3569
3570
class CancerOwl(GenericTumblrV1):
3571
    """Class to retrieve Cancer Owl comics."""
3572
    # Also on http://cancerowl.com
3573
    name = 'cancerowl-tumblr'
3574
    long_name = 'Cancer Owl (from Tumblr)'
3575
    url = 'http://cancerowl.tumblr.com'
3576
3577
3578
class FowlLanguageTumblr(GenericTumblrV1):
3579
    """Class to retrieve Fowl Language comics."""
3580
    # Also on http://www.fowllanguagecomics.com
3581
    # Also on http://tapastic.com/series/Fowl-Language-Comics
3582
    # Also on http://www.gocomics.com/fowl-language
3583
    name = 'fowllanguage-tumblr'
3584
    long_name = 'Fowl Language Comics (from Tumblr)'
3585
    url = 'http://fowllanguagecomics.tumblr.com'
3586
3587
3588
class TheOdd1sOutTumblr(GenericTumblrV1):
3589
    """Class to retrieve The Odd 1s Out comics."""
3590
    # Also on http://theodd1sout.com
3591
    # Also on https://tapastic.com/series/Theodd1sout
3592
    name = 'theodd-tumblr'
3593
    long_name = 'The Odd 1s Out (from Tumblr)'
3594
    url = 'http://theodd1sout.tumblr.com'
3595
3596
3597
class TheUnderfoldTumblr(GenericTumblrV1):
3598
    """Class to retrieve The Underfold comics."""
3599
    # Also on http://theunderfold.com
3600
    name = 'underfold-tumblr'
3601
    long_name = 'The Underfold (from Tumblr)'
3602
    url = 'http://theunderfold.tumblr.com'
3603
3604
3605
class LolNeinTumblr(GenericTumblrV1):
3606
    """Class to retrieve Lol Nein comics."""
3607
    # Also on http://lolnein.com
3608
    name = 'lolnein-tumblr'
3609
    long_name = 'Lol Nein (from Tumblr)'
3610
    url = 'http://lolneincom.tumblr.com'
3611
3612
3613
class FatAwesomeComicsTumblr(GenericTumblrV1):
3614
    """Class to retrieve Fat Awesome Comics."""
3615
    # Also on http://fatawesome.com/comics
3616
    name = 'fatawesome-tumblr'
3617
    long_name = 'Fat Awesome (from Tumblr)'
3618
    url = 'http://fatawesomecomedy.tumblr.com'
3619
3620
3621
class TheWorldIsFlatTumblr(GenericTumblrV1):
3622
    """Class to retrieve The World Is Flat Comics."""
3623
    # Also on https://tapastic.com/series/The-World-is-Flat
3624
    name = 'flatworld-tumblr'
3625
    long_name = 'The World Is Flat (from Tumblr)'
3626
    url = 'http://theworldisflatcomics.tumblr.com'
3627
3628
3629
class DorrisMc(GenericEmptyComic, GenericTumblrV1):
3630
    """Class to retrieve Dorris Mc Comics"""
3631
    # Also on http://www.gocomics.com/dorris-mccomics
3632
    name = 'dorrismc'
3633
    long_name = 'Dorris Mc'
3634
    url = 'http://dorrismccomics.com'
3635
3636
3637
class LeleozTumblr(GenericEmptyComic, GenericTumblrV1):
3638
    """Class to retrieve Leleoz comics."""
3639
    # Also on https://tapastic.com/series/Leleoz
3640
    name = 'leleoz-tumblr'
3641
    long_name = 'Leleoz (from Tumblr)'
3642
    url = 'http://leleozcomics.tumblr.com'
3643
3644
3645
class MoonBeardTumblr(GenericTumblrV1):
3646
    """Class to retrieve MoonBeard comics."""
3647
    # Also on http://moonbeard.com
3648
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
3649
    name = 'moonbeard-tumblr'
3650
    long_name = 'Moon Beard (from Tumblr)'
3651
    url = 'http://blog.squiresjam.es/moonbeard'
3652
3653
3654
class AComik(GenericTumblrV1):
3655
    """Class to retrieve A Comik"""
3656
    name = 'comik'
3657
    long_name = 'A Comik'
3658
    url = 'http://acomik.com'
3659
3660
3661
class ClassicRandy(GenericTumblrV1):
3662
    """Class to retrieve Classic Randy comics."""
3663
    name = 'randy'
3664
    long_name = 'Classic Randy'
3665
    url = 'http://classicrandy.tumblr.com'
3666
3667
3668
class DagssonTumblr(GenericTumblrV1):
3669
    """Class to retrieve Dagsson comics."""
3670
    # Also on http://www.dagsson.com
3671
    name = 'dagsson-tumblr'
3672
    long_name = 'Dagsson Hugleikur (from Tumblr)'
3673
    url = 'http://hugleikurdagsson.tumblr.com'
3674
3675
3676
class LinsEditionsTumblr(GenericTumblrV1):
3677
    """Class to retrieve L.I.N.S. Editions comics."""
3678
    # Also on https://linsedition.com
3679
    name = 'lins-tumblr'
3680
    long_name = 'L.I.N.S. Editions (from Tumblr)'
3681
    url = 'http://linscomics.tumblr.com'
3682
3683
3684
class OrigamiHotDish(GenericTumblrV1):
3685
    """Class to retrieve Origami Hot Dish comics."""
3686
    name = 'origamihotdish'
3687
    long_name = 'Origami Hot Dish'
3688
    url = 'http://origamihotdish.com'
3689
3690
3691
class HitAndMissComicsTumblr(GenericTumblrV1):
3692
    """Class to retrieve Hit and Miss Comics."""
3693
    name = 'hitandmiss'
3694
    long_name = 'Hit and Miss Comics'
3695
    url = 'http://hitandmisscomics.tumblr.com'
3696
3697
3698
class HMBlanc(GenericTumblrV1):
3699
    """Class to retrieve HM Blanc comics."""
3700
    name = 'hmblanc'
3701
    long_name = 'HM Blanc'
3702
    url = 'http://hmblanc.tumblr.com'
3703
3704
3705
class TalesOfAbsurdityTumblr(GenericTumblrV1):
3706
    """Class to retrieve Tales Of Absurdity comics."""
3707
    # Also on http://talesofabsurdity.com
3708
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
3709
    name = 'absurdity-tumblr'
3710
    long_name = 'Tales of Absurdity (from Tumblr)'
3711
    url = 'http://talesofabsurdity.tumblr.com'
3712
3713
3714
class RobbieAndBobby(GenericTumblrV1):
3715
    """Class to retrieve Robbie And Bobby comics."""
3716
    # Also on http://robbieandbobby.com
3717
    name = 'robbie-tumblr'
3718
    long_name = 'Robbie And Bobby (from Tumblr)'
3719
    url = 'http://robbieandbobby.tumblr.com'
3720
3721
3722
class ElectricBunnyComicTumblr(GenericTumblrV1):
3723
    """Class to retrieve Electric Bunny Comics."""
3724
    # Also on http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell
3725
    name = 'bunny-tumblr'
3726
    long_name = 'Electric Bunny Comic (from Tumblr)'
3727
    url = 'http://electricbunnycomics.tumblr.com'
3728
3729
3730
class Hoomph(GenericTumblrV1):
3731
    """Class to retrieve Hoomph comics."""
3732
    name = 'hoomph'
3733
    long_name = 'Hoomph'
3734
    url = 'http://hoom.ph'
3735
3736
3737
class BFGFSTumblr(GenericTumblrV1):
3738
    """Class to retrieve BFGFS comics."""
3739
    # Also on https://tapastic.com/series/BFGFS
3740
    # Also on http://bfgfs.com
3741
    name = 'bfgfs-tumblr'
3742
    long_name = 'BFGFS (from Tumblr)'
3743
    url = 'http://bfgfs.tumblr.com'
3744
3745
3746
class DoodleForFood(GenericTumblrV1):
3747
    """Class to retrieve Doodle For Food comics."""
3748
    # Also on http://doodleforfood.com
3749
    name = 'doodle'
3750
    long_name = 'Doodle For Food'
3751
    url = 'http://doodleforfood.com'
3752
3753
3754
class CassandraCalinTumblr(GenericEmptyComic, GenericTumblrV1):
3755
    """Class to retrieve C. Cassandra comics."""
3756
    # Also on http://cassandracalin.com
3757
    # Also on https://tapastic.com/series/C-Cassandra-comics
3758
    name = 'cassandra-tumblr'
3759
    long_name = 'Cassandra Calin (from Tumblr)'
3760
    url = 'http://c-cassandra.tumblr.com'
3761
3762
3763
class DougWasTaken(GenericTumblrV1):
3764
    """Class to retrieve Doug Was Taken comics."""
3765
    name = 'doog'
3766
    long_name = 'Doug Was Taken'
3767
    url = 'http://dougwastaken.tumblr.com'
3768
3769
3770
class MandatoryRollerCoaster(GenericEmptyComic, GenericTumblrV1):
3771
    """Class to retrieve Mandatory Roller Coaster comics."""
3772
    name = 'rollercoaster'
3773
    long_name = 'Mandatory Roller Coaster'
3774
    url = 'http://mandatoryrollercoaster.com'
3775
3776
3777
class CEstPasEnRegardantSesPompes(GenericEmptyComic, GenericTumblrV1):
3778
    """Class to retrieve C'Est Pas En Regardant Ses Pompes (...)  comics."""
3779
    name = 'cperspqccltt'
3780
    long_name = 'C Est Pas En Regardant Ses Pompes (...)'
3781
    url = 'http://cperspqccltt.tumblr.com'
3782
3783
3784
class HorovitzComics(GenericListableComic):
3785
    """Generic class to handle the logic common to the different comics from Horovitz."""
3786
    url = 'http://www.horovitzcomics.com'
3787
    img_re = re.compile('.*comics/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
3788
    link_re = NotImplemented
3789
    get_url_from_archive_element = join_cls_url_to_href
3790
3791
    @classmethod
3792
    def get_comic_info(cls, soup, link):
3793
        """Get information about a particular comics."""
3794
        href = link['href']
3795
        num = int(cls.link_re.match(href).groups()[0])
3796
        title = link.string
3797
        imgs = soup.find_all('img', id='comic')
3798
        assert len(imgs) == 1
3799
        year, month, day = [int(s)
3800
                            for s in cls.img_re.match(imgs[0]['src']).groups()]
3801
        return {
3802
            'title': title,
3803
            'day': day,
3804
            'month': month,
3805
            'year': year,
3806
            'img': [i['src'] for i in imgs],
3807
            'num': num,
3808
        }
3809
3810
    @classmethod
3811
    def get_archive_elements(cls):
3812
        archive_url = 'http://www.horovitzcomics.com/comics/archive/'
3813
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.link_re))
3814
3815
3816
class HorovitzNew(HorovitzComics):
3817
    """Class to retrieve Horovitz new comics."""
3818
    name = 'horovitznew'
3819
    long_name = 'Horovitz New'
3820
    link_re = re.compile('^/comics/new/([0-9]+)$')
3821
3822
3823
class HorovitzClassic(HorovitzComics):
3824
    """Class to retrieve Horovitz classic comics."""
3825
    name = 'horovitzclassic'
3826
    long_name = 'Horovitz Classic'
3827
    link_re = re.compile('^/comics/classic/([0-9]+)$')
3828
3829
3830
class GenericGoComic(GenericNavigableComic):
3831
    """Generic class to handle the logic common to comics from gocomics.com."""
3832
    url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)$')
3833
3834
    @classmethod
3835
    def get_first_comic_link(cls):
3836
        """Get link to first comics."""
3837
        return get_soup_at_url(cls.url).find('a', class_='beginning')
3838
3839
    @classmethod
3840
    def get_navi_link(cls, last_soup, next_):
3841
        """Get link to next or previous comic."""
3842
        return last_soup.find('a', class_='next' if next_ else 'prev', href=cls.url_date_re)
3843
3844
    @classmethod
3845
    def get_url_from_link(cls, link):
3846
        gocomics = 'http://www.gocomics.com'
3847
        return urljoin_wrapper(gocomics, link['href'])
3848
3849
    @classmethod
3850
    def get_comic_info(cls, soup, link):
3851
        """Get information about a particular comics."""
3852
        url = cls.get_url_from_link(link)
3853
        year, month, day = [int(s)
3854
                            for s in cls.url_date_re.match(url).groups()]
3855
        return {
3856
            'day': day,
3857
            'month': month,
3858
            'year': year,
3859
            'img': [soup.find_all('img', class_='strip')[-1]['src']],
3860
            'author': soup.find('meta', attrs={'name': 'author'})['content']
3861
        }
3862
3863
3864
class PearlsBeforeSwine(GenericGoComic):
3865
    """Class to retrieve Pearls Before Swine comics."""
3866
    name = 'pearls'
3867
    long_name = 'Pearls Before Swine'
3868
    url = 'http://www.gocomics.com/pearlsbeforeswine'
3869
3870
3871
class Peanuts(GenericGoComic):
3872
    """Class to retrieve Peanuts comics."""
3873
    name = 'peanuts'
3874
    long_name = 'Peanuts'
3875
    url = 'http://www.gocomics.com/peanuts'
3876
3877
3878
class MattWuerker(GenericGoComic):
3879
    """Class to retrieve Matt Wuerker comics."""
3880
    name = 'wuerker'
3881
    long_name = 'Matt Wuerker'
3882
    url = 'http://www.gocomics.com/mattwuerker'
3883
3884
3885
class TomToles(GenericGoComic):
3886
    """Class to retrieve Tom Toles comics."""
3887
    name = 'toles'
3888
    long_name = 'Tom Toles'
3889
    url = 'http://www.gocomics.com/tomtoles'
3890
3891
3892
class BreakOfDay(GenericGoComic):
3893
    """Class to retrieve Break Of Day comics."""
3894
    name = 'breakofday'
3895
    long_name = 'Break Of Day'
3896
    url = 'http://www.gocomics.com/break-of-day'
3897
3898
3899
class Brevity(GenericGoComic):
3900
    """Class to retrieve Brevity comics."""
3901
    name = 'brevity'
3902
    long_name = 'Brevity'
3903
    url = 'http://www.gocomics.com/brevity'
3904
3905
3906
class MichaelRamirez(GenericGoComic):
3907
    """Class to retrieve Michael Ramirez comics."""
3908
    name = 'ramirez'
3909
    long_name = 'Michael Ramirez'
3910
    url = 'http://www.gocomics.com/michaelramirez'
3911
3912
3913
class MikeLuckovich(GenericGoComic):
3914
    """Class to retrieve Mike Luckovich comics."""
3915
    name = 'luckovich'
3916
    long_name = 'Mike Luckovich'
3917
    url = 'http://www.gocomics.com/mikeluckovich'
3918
3919
3920
class JimBenton(GenericGoComic):
3921
    """Class to retrieve Jim Benton comics."""
3922
    # Also on http://jimbenton.tumblr.com
3923
    name = 'benton'
3924
    long_name = 'Jim Benton'
3925
    url = 'http://www.gocomics.com/jim-benton-cartoons'
3926
3927
3928
class TheArgyleSweater(GenericGoComic):
3929
    """Class to retrieve the Argyle Sweater comics."""
3930
    name = 'argyle'
3931
    long_name = 'Argyle Sweater'
3932
    url = 'http://www.gocomics.com/theargylesweater'
3933
3934
3935
class SunnyStreet(GenericGoComic):
3936
    """Class to retrieve Sunny Street comics."""
3937
    # Also on http://www.sunnystreetcomics.com
3938
    name = 'sunny'
3939
    long_name = 'Sunny Street'
3940
    url = 'http://www.gocomics.com/sunny-street'
3941
3942
3943
class OffTheMark(GenericGoComic):
3944
    """Class to retrieve Off The Mark comics."""
3945
    # Also on https://www.offthemark.com
3946
    name = 'offthemark'
3947
    long_name = 'Off The Mark'
3948
    url = 'http://www.gocomics.com/offthemark'
3949
3950
3951
class WuMo(GenericGoComic):
3952
    """Class to retrieve WuMo comics."""
3953
    # Also on http://wumo.com
3954
    name = 'wumo'
3955
    long_name = 'WuMo'
3956
    url = 'http://www.gocomics.com/wumo'
3957
3958
3959
class LunarBaboon(GenericGoComic):
3960
    """Class to retrieve Lunar Baboon comics."""
3961
    # Also on http://www.lunarbaboon.com
3962
    # Also on https://tapastic.com/series/Lunarbaboon
3963
    name = 'lunarbaboon'
3964
    long_name = 'Lunar Baboon'
3965
    url = 'http://www.gocomics.com/lunarbaboon'
3966
3967
3968
class SandersenGocomic(GenericGoComic):
3969
    """Class to retrieve Sarah Andersen comics."""
3970
    # Also on http://sarahcandersen.com
3971
    # Also on http://tapastic.com/series/Doodle-Time
3972
    name = 'sandersen-goc'
3973
    long_name = 'Sarah Andersen (from GoComics)'
3974
    url = 'http://www.gocomics.com/sarahs-scribbles'
3975
3976
3977
class CalvinAndHobbesGoComic(GenericGoComic):
3978
    """Class to retrieve Calvin and Hobbes comics."""
3979
    # From gocomics, not http://marcel-oehler.marcellosendos.ch/comics/ch/
3980
    name = 'calvin-goc'
3981
    long_name = 'Calvin and Hobbes (from GoComics)'
3982
    url = 'http://www.gocomics.com/calvinandhobbes'
3983
3984
3985
class RallGoComic(GenericGoComic):
3986
    """Class to retrieve Ted Rall comics."""
3987
    # Also on http://rall.com/comic
3988
    name = 'rall-goc'
3989
    long_name = "Ted Rall (from GoComics)"
3990
    url = "http://www.gocomics.com/tedrall"
3991
3992
3993
class TheAwkwardYetiGoComic(GenericGoComic):
3994
    """Class to retrieve The Awkward Yeti comics."""
3995
    # Also on http://larstheyeti.tumblr.com
3996
    # Also on http://theawkwardyeti.com
3997
    # Also on https://tapastic.com/series/TheAwkwardYeti
3998
    name = 'yeti-goc'
3999
    long_name = 'The Awkward Yeti (from GoComics)'
4000
    url = 'http://www.gocomics.com/the-awkward-yeti'
4001
4002
4003
class BerkeleyMewsGoComics(GenericGoComic):
4004
    """Class to retrieve Berkeley Mews comics."""
4005
    # Also on http://mews.tumblr.com
4006
    # Also on http://www.berkeleymews.com
4007
    name = 'berkeley-goc'
4008
    long_name = 'Berkeley Mews (from GoComics)'
4009
    url = 'http://www.gocomics.com/berkeley-mews'
4010
4011
4012
class SheldonGoComics(GenericGoComic):
4013
    """Class to retrieve Sheldon comics."""
4014
    # Also on http://www.sheldoncomics.com
4015
    name = 'sheldon-goc'
4016
    long_name = 'Sheldon Comics (from GoComics)'
4017
    url = 'http://www.gocomics.com/sheldon'
4018
4019
4020
class FowlLanguageGoComics(GenericGoComic):
4021
    """Class to retrieve Fowl Language comics."""
4022
    # Also on http://www.fowllanguagecomics.com
4023
    # Also on http://tapastic.com/series/Fowl-Language-Comics
4024
    # Also on http://fowllanguagecomics.tumblr.com
4025
    name = 'fowllanguage-goc'
4026
    long_name = 'Fowl Language Comics (from GoComics)'
4027
    url = 'http://www.gocomics.com/fowl-language'
4028
4029
4030
class NickAnderson(GenericGoComic):
4031
    """Class to retrieve Nick Anderson comics."""
4032
    name = 'nickanderson'
4033
    long_name = 'Nick Anderson'
4034
    url = 'http://www.gocomics.com/nickanderson'
4035
4036
4037
class GarfieldGoComics(GenericGoComic):
4038
    """Class to retrieve Garfield comics."""
4039
    # Also on http://garfield.com
4040
    name = 'garfield-goc'
4041
    long_name = 'Garfield (from GoComics)'
4042
    url = 'http://www.gocomics.com/garfield'
4043
4044
4045
class DorrisMcGoComics(GenericGoComic):
4046
    """Class to retrieve Dorris Mc Comics"""
4047
    # Also on http://dorrismccomics.com
4048
    name = 'dorrismc-goc'
4049
    long_name = 'Dorris Mc (from GoComics)'
4050
    url = 'http://www.gocomics.com/dorris-mccomics'
4051
4052
4053
class FoxTrot(GenericGoComic):
4054
    """Class to retrieve FoxTrot comics."""
4055
    name = 'foxtrot'
4056
    long_name = 'FoxTrot'
4057
    url = 'http://www.gocomics.com/foxtrot'
4058
4059
4060
class FoxTrotClassics(GenericGoComic):
4061
    """Class to retrieve FoxTrot Classics comics."""
4062
    name = 'foxtrot-classics'
4063
    long_name = 'FoxTrot Classics'
4064
    url = 'http://www.gocomics.com/foxtrotclassics'
4065
4066
4067
class MisterAndMeGoComics(GenericGoComic):
4068
    """Class to retrieve Mister & Me Comics."""
4069
    # Also on http://www.mister-and-me.com
4070
    # Also on https://tapastic.com/series/Mister-and-Me
4071
    name = 'mister-goc'
4072
    long_name = 'Mister & Me (from GoComics)'
4073
    url = 'http://www.gocomics.com/mister-and-me'
4074
4075
4076
class NonSequitur(GenericGoComic):
4077
    """Class to retrieve Non Sequitur (Wiley Miller) comics."""
4078
    name = 'nonsequitur'
4079
    long_name = 'Non Sequitur'
4080
    url = 'http://www.gocomics.com/nonsequitur'
4081
4082
4083
class GenericTapasticComic(GenericListableComic):
4084
    """Generic class to handle the logic common to comics from tapastic.com."""
4085
4086
    @classmethod
4087
    def get_comic_info(cls, soup, archive_elt):
4088
        """Get information about a particular comics."""
4089
        timestamp = int(archive_elt['publishDate']) / 1000.0
4090
        day = datetime.datetime.fromtimestamp(timestamp).date()
4091
        imgs = soup.find_all('img', class_='art-image')
4092
        if not imgs:
4093
            print("Comic %s is being uploaded, retry later" % cls.get_url_from_archive_element(archive_elt))
4094
            return None
4095
        assert len(imgs) > 0
4096
        return {
4097
            'day': day.day,
4098
            'year': day.year,
4099
            'month': day.month,
4100
            'img': [i['src'] for i in imgs],
4101
            'title': archive_elt['title'],
4102
        }
4103
4104
    @classmethod
4105
    def get_url_from_archive_element(cls, archive_elt):
4106
        return 'http://tapastic.com/episode/' + str(archive_elt['id'])
4107
4108
    @classmethod
4109
    def get_archive_elements(cls):
4110
        pref, suff = 'episodeList : ', ','
4111
        # Information is stored in the javascript part
4112
        # I don't know the clean way to get it so this is the ugly way.
4113
        string = [s[len(pref):-len(suff)] for s in (s.decode('utf-8').strip() for s in urlopen_wrapper(cls.url).readlines()) if s.startswith(pref) and s.endswith(suff)][0]
4114
        return json.loads(string)
4115
4116
4117
class VegetablesForDessert(GenericTapasticComic):
4118
    """Class to retrieve Vegetables For Dessert comics."""
4119
    # Also on http://vegetablesfordessert.tumblr.com
4120
    name = 'vegetables'
4121
    long_name = 'Vegetables For Dessert'
4122
    url = 'http://tapastic.com/series/vegetablesfordessert'
4123
4124
4125
class FowlLanguageTapa(GenericTapasticComic):
4126
    """Class to retrieve Fowl Language comics."""
4127
    # Also on http://www.fowllanguagecomics.com
4128
    # Also on http://fowllanguagecomics.tumblr.com
4129
    # Also on http://www.gocomics.com/fowl-language
4130
    name = 'fowllanguage-tapa'
4131
    long_name = 'Fowl Language Comics (from Tapastic)'
4132
    url = 'http://tapastic.com/series/Fowl-Language-Comics'
4133
4134
4135
class OscillatingProfundities(GenericTapasticComic):
4136
    """Class to retrieve Oscillating Profundities comics."""
4137
    name = 'oscillating'
4138
    long_name = 'Oscillating Profundities'
4139
    url = 'http://tapastic.com/series/oscillatingprofundities'
4140
4141
4142
class ZnoflatsComics(GenericTapasticComic):
4143
    """Class to retrieve Znoflats comics."""
4144
    name = 'znoflats'
4145
    long_name = 'Znoflats Comics'
4146
    url = 'http://tapastic.com/series/Znoflats-Comics'
4147
4148
4149
class SandersenTapastic(GenericTapasticComic):
4150
    """Class to retrieve Sarah Andersen comics."""
4151
    # Also on http://sarahcandersen.com
4152
    # Also on http://www.gocomics.com/sarahs-scribbles
4153
    name = 'sandersen-tapa'
4154
    long_name = 'Sarah Andersen (from Tapastic)'
4155
    url = 'http://tapastic.com/series/Doodle-Time'
4156
4157
4158
class TubeyToonsTapastic(GenericTapasticComic):
4159
    """Class to retrieve TubeyToons comics."""
4160
    # Also on http://tubeytoons.com
4161
    # Also on http://tubeytoons.tumblr.com
4162
    name = 'tubeytoons-tapa'
4163
    long_name = 'Tubey Toons (from Tapastic)'
4164
    url = 'http://tapastic.com/series/Tubey-Toons'
4165
4166
4167
class AnythingComicTapastic(GenericTapasticComic):
4168
    """Class to retrieve Anything Comics."""
4169
    # Also on http://www.anythingcomic.com
4170
    name = 'anythingcomic-tapa'
4171
    long_name = 'Anything Comic (from Tapastic)'
4172
    url = 'http://tapastic.com/series/anything'
4173
4174
4175
class UnearthedComicsTapastic(GenericTapasticComic):
4176
    """Class to retrieve Unearthed comics."""
4177
    # Also on http://unearthedcomics.com
4178
    # Also on http://unearthedcomics.tumblr.com
4179
    name = 'unearthed-tapa'
4180
    long_name = 'Unearthed Comics (from Tapastic)'
4181
    url = 'http://tapastic.com/series/UnearthedComics'
4182
4183
4184
class EverythingsStupidTapastic(GenericTapasticComic):
4185
    """Class to retrieve Everything's stupid Comics."""
4186
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
4187
    # Also on http://everythingsstupid.net
4188
    name = 'stupid-tapa'
4189
    long_name = "Everything's Stupid (from Tapastic)"
4190
    url = 'http://tapastic.com/series/EverythingsStupid'
4191
4192
4193
class JustSayEhTapastic(GenericTapasticComic):
4194
    """Class to retrieve Just Say Eh comics."""
4195
    # Also on http://www.justsayeh.com
4196
    name = 'justsayeh-tapa'
4197
    long_name = 'Just Say Eh (from Tapastic)'
4198
    url = 'http://tapastic.com/series/Just-Say-Eh'
4199
4200
4201
class ThorsThundershackTapastic(GenericTapasticComic):
4202
    """Class to retrieve Thor's Thundershack comics."""
4203
    # Also on http://www.thorsthundershack.com
4204
    name = 'thor-tapa'
4205
    long_name = 'Thor\'s Thundershack (from Tapastic)'
4206
    url = 'http://tapastic.com/series/Thors-Thundershac'
4207
4208
4209
class OwlTurdTapastic(GenericTapasticComic):
4210
    """Class to retrieve Owl Turd comics."""
4211
    # Also on http://owlturd.com
4212
    name = 'owlturd-tapa'
4213
    long_name = 'Owl Turd (from Tapastic)'
4214
    url = 'http://tapastic.com/series/Owl-Turd-Comix'
4215
4216
4217
class GoneIntoRaptureTapastic(GenericTapasticComic):
4218
    """Class to retrieve Gone Into Rapture comics."""
4219
    # Also on http://goneintorapture.tumblr.com
4220
    # Also on http://www.goneintorapture.com
4221
    name = 'rapture-tapa'
4222
    long_name = 'Gone Into Rapture (from Tapastic)'
4223
    url = 'http://tapastic.com/series/Goneintorapture'
4224
4225
4226
class HeckIfIKnowComicsTapa(GenericTapasticComic):
4227
    """Class to retrieve Heck If I Know Comics."""
4228
    # Also on http://heckifiknowcomics.com
4229
    name = 'heck-tapa'
4230
    long_name = 'Heck if I Know comics (from Tapastic)'
4231
    url = 'http://tapastic.com/series/Regular'
4232
4233
4234
class CheerUpEmoKidTapa(GenericTapasticComic):
4235
    """Class to retrieve CheerUpEmoKid comics."""
4236
    # Also on http://www.cheerupemokid.com
4237
    # Also on http://enzocomics.tumblr.com
4238
    name = 'cuek-tapa'
4239
    long_name = 'Cheer Up Emo Kid (from Tapastic)'
4240
    url = 'http://tapastic.com/series/CUEK'
4241
4242
4243
class BigFootJusticeTapa(GenericTapasticComic):
4244
    """Class to retrieve Big Foot Justice comics."""
4245
    # Also on http://bigfootjustice.com
4246
    name = 'bigfoot-tapa'
4247
    long_name = 'Big Foot Justice (from Tapastic)'
4248
    url = 'http://tapastic.com/series/bigfoot-justice'
4249
4250
4251
class UpAndOutTapa(GenericTapasticComic):
4252
    """Class to retrieve Up & Out comics."""
4253
    # Also on http://upandoutcomic.tumblr.com
4254
    name = 'upandout-tapa'
4255
    long_name = 'Up And Out (from Tapastic)'
4256
    url = 'http://tapastic.com/series/UP-and-OUT'
4257
4258
4259
class ToonHoleTapa(GenericTapasticComic):
4260
    """Class to retrieve Toon Holes comics."""
4261
    # Also on http://www.toonhole.com
4262
    name = 'toonhole-tapa'
4263
    long_name = 'Toon Hole (from Tapastic)'
4264
    url = 'http://tapastic.com/series/TOONHOLE'
4265
4266
4267
class AngryAtNothingTapa(GenericTapasticComic):
4268
    """Class to retrieve Angry at Nothing comics."""
4269
    # Also on http://www.angryatnothing.net
4270
    name = 'angry-tapa'
4271
    long_name = 'Angry At Nothing (from Tapastic)'
4272
    url = 'http://tapastic.com/series/Comics-yeah-definitely-comics-'
4273
4274
4275
class LeleozTapa(GenericTapasticComic):
4276
    """Class to retrieve Leleoz comics."""
4277
    # Also on http://leleozcomics.tumblr.com
4278
    name = 'leleoz-tapa'
4279
    long_name = 'Leleoz (from Tapastic)'
4280
    url = 'https://tapastic.com/series/Leleoz'
4281
4282
4283
class TheAwkwardYetiTapa(GenericTapasticComic):
4284
    """Class to retrieve The Awkward Yeti comics."""
4285
    # Also on http://www.gocomics.com/the-awkward-yeti
4286
    # Also on http://theawkwardyeti.com
4287
    # Also on http://larstheyeti.tumblr.com
4288
    name = 'yeti-tapa'
4289
    long_name = 'The Awkward Yeti (from Tapastic)'
4290
    url = 'https://tapastic.com/series/TheAwkwardYeti'
4291
4292
4293
class AsPerUsualTapa(GenericTapasticComic):
4294
    """Class to retrieve As Per Usual comics."""
4295
    # Also on http://as-per-usual.tumblr.com
4296
    name = 'usual-tapa'
4297
    long_name = 'As Per Usual (from Tapastic)'
4298
    url = 'https://tapastic.com/series/AsPerUsual'
4299
4300
4301
class OneOneOneOneComicTapa(GenericTapasticComic):
4302
    """Class to retrieve 1111 Comics."""
4303
    # Also on http://www.1111comics.me
4304
    # Also on http://comics1111.tumblr.com
4305
    name = '1111-tapa'
4306
    long_name = '1111 Comics (from Tapastic)'
4307
    url = 'https://tapastic.com/series/1111-Comics'
4308
4309
4310
class TumbleDryTapa(GenericTapasticComic):
4311
    """Class to retrieve Tumble Dry comics."""
4312
    # Also on http://tumbledrycomics.com
4313
    name = 'tumbledry-tapa'
4314
    long_name = 'Tumblr Dry (from Tapastic)'
4315
    url = 'https://tapastic.com/series/TumbleDryComics'
4316
4317
4318
class DeadlyPanelTapa(GenericTapasticComic):
4319
    """Class to retrieve Deadly Panel comics."""
4320
    # Also on http://www.deadlypanel.com
4321
    name = 'deadly-tapa'
4322
    long_name = 'Deadly Panel (from Tapastic)'
4323
    url = 'https://tapastic.com/series/deadlypanel'
4324
4325
4326
class ChrisHallbeckMaxiTapa(GenericTapasticComic):
4327
    """Class to retrieve Chris Hallbeck comics."""
4328
    # Also on http://chrishallbeck.tumblr.com
4329
    # Also on http://maximumble.com
4330
    name = 'hallbeckmaxi-tapa'
4331
    long_name = 'Chris Hallback - Maximumble (from Tapastic)'
4332
    url = 'https://tapastic.com/series/Maximumble'
4333
4334
4335
class ChrisHallbeckMiniTapa(GenericTapasticComic):
4336
    """Class to retrieve Chris Hallbeck comics."""
4337
    # Also on http://chrishallbeck.tumblr.com
4338
    # Also on http://minimumble.com
4339
    name = 'hallbeckmini-tapa'
4340
    long_name = 'Chris Hallback - Minimumble (from Tapastic)'
4341
    url = 'https://tapastic.com/series/Minimumble'
4342
4343
4344
class ChrisHallbeckBiffTapa(GenericTapasticComic):
4345
    """Class to retrieve Chris Hallbeck comics."""
4346
    # Also on http://chrishallbeck.tumblr.com
4347
    # Also on http://thebookofbiff.com
4348
    name = 'hallbeckbiff-tapa'
4349
    long_name = 'Chris Hallback - The Book of Biff (from Tapastic)'
4350
    url = 'https://tapastic.com/series/Biff'
4351
4352
4353
class RandoWisTapa(GenericTapasticComic):
4354
    """Class to retrieve RandoWis comics."""
4355
    # Also on https://randowis.com
4356
    name = 'randowis-tapa'
4357
    long_name = 'RandoWis (from Tapastic)'
4358
    url = 'https://tapastic.com/series/RandoWis'
4359
4360
4361
class PigeonGazetteTapa(GenericTapasticComic):
4362
    """Class to retrieve The Pigeon Gazette comics."""
4363
    # Also on http://thepigeongazette.tumblr.com
4364
    name = 'pigeon-tapa'
4365
    long_name = 'The Pigeon Gazette (from Tapastic)'
4366
    url = 'https://tapastic.com/series/The-Pigeon-Gazette'
4367
4368
4369
class TheOdd1sOutTapa(GenericTapasticComic):
4370
    """Class to retrieve The Odd 1s Out comics."""
4371
    # Also on http://theodd1sout.com
4372
    # Also on http://theodd1sout.tumblr.com
4373
    name = 'theodd-tapa'
4374
    long_name = 'The Odd 1s Out (from Tapastic)'
4375
    url = 'https://tapastic.com/series/Theodd1sout'
4376
4377
4378
class TheWorldIsFlatTapa(GenericTapasticComic):
4379
    """Class to retrieve The World Is Flat Comics."""
4380
    # Also on http://theworldisflatcomics.tumblr.com
4381
    name = 'flatworld-tapa'
4382
    long_name = 'The World Is Flat (from Tapastic)'
4383
    url = 'https://tapastic.com/series/The-World-is-Flat'
4384
4385
4386
class MisterAndMeTapa(GenericTapasticComic):
4387
    """Class to retrieve Mister & Me Comics."""
4388
    # Also on http://www.mister-and-me.com
4389
    # Also on http://www.gocomics.com/mister-and-me
4390
    name = 'mister-tapa'
4391
    long_name = 'Mister & Me (from Tapastic)'
4392
    url = 'https://tapastic.com/series/Mister-and-Me'
4393
4394
4395
class TalesOfAbsurdityTapa(GenericTapasticComic):
4396
    """Class to retrieve Tales Of Absurdity comics."""
4397
    # Also on http://talesofabsurdity.com
4398
    # Also on http://talesofabsurdity.tumblr.com
4399
    name = 'absurdity-tapa'
4400
    long_name = 'Tales of Absurdity (from Tapastic)'
4401
    url = 'http://tapastic.com/series/Tales-Of-Absurdity'
4402
4403
4404
class BFGFSTapa(GenericTapasticComic):
4405
    """Class to retrieve BFGFS comics."""
4406
    # Also on http://bfgfs.com
4407
    # Also on http://bfgfs.tumblr.com
4408
    name = 'bfgfs-tapa'
4409
    long_name = 'BFGFS (from Tapastic)'
4410
    url = 'https://tapastic.com/series/BFGFS'
4411
4412
4413
class DoodleForFoodTapa(GenericTapasticComic):
4414
    """Class to retrieve Doodle For Food comics."""
4415
    # Also on http://doodleforfood.com
4416
    name = 'doodle-tapa'
4417
    long_name = 'Doodle For Food (from Tapastic)'
4418
    url = 'https://tapastic.com/series/Doodle-for-Food'
4419
4420
4421
class MrLovensteinTapa(GenericTapasticComic):
4422
    """Class to retrieve Mr Lovenstein comics."""
4423
    # Also on  https://tapastic.com/series/MrLovenstein
4424
    name = 'mrlovenstein-tapa'
4425
    long_name = 'Mr. Lovenstein (from Tapastic)'
4426
    url = 'https://tapastic.com/series/MrLovenstein'
4427
4428
4429
class CassandraCalinTapa(GenericTapasticComic):
4430
    """Class to retrieve C. Cassandra comics."""
4431
    # Also on http://cassandracalin.com
4432
    # Also on http://c-cassandra.tumblr.com
4433
    name = 'cassandra-tapa'
4434
    long_name = 'Cassandra Calin (from Tapastic)'
4435
    url = 'https://tapastic.com/series/C-Cassandra-comics'
4436
4437
4438
class WafflesAndPancakes(GenericTapasticComic):
4439
    """Class to retrieve Waffles And Pancakes comics."""
4440
    # Also on http://wandpcomic.com
4441
    name = 'waffles'
4442
    long_name = 'Waffles And Pancakes'
4443
    url = 'https://tapastic.com/series/Waffles-and-Pancakes'
4444
4445
4446
def get_subclasses(klass):
4447
    """Gets the list of direct/indirect subclasses of a class"""
4448
    subclasses = klass.__subclasses__()
4449
    for derived in list(subclasses):
4450
        subclasses.extend(get_subclasses(derived))
4451
    return subclasses
4452
4453
4454
def remove_st_nd_rd_th_from_date(string):
4455
    """Function to transform 1st/2nd/3rd/4th in a parsable date format."""
4456
    # Hackish way to convert string with numeral "1st"/"2nd"/etc to date
4457
    return (string.replace('st', '')
4458
            .replace('nd', '')
4459
            .replace('rd', '')
4460
            .replace('th', '')
4461
            .replace('Augu', 'August'))
4462
4463
4464
def string_to_date(string, date_format, local=DEFAULT_LOCAL):
4465
    """Function to convert string to date object.
4466
    Wrapper around datetime.datetime.strptime."""
4467
    # format described in https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior
4468
    prev_locale = locale.setlocale(locale.LC_ALL)
4469
    if local != prev_locale:
4470
        locale.setlocale(locale.LC_ALL, local)
4471
    ret = datetime.datetime.strptime(string, date_format).date()
4472
    if local != prev_locale:
4473
        locale.setlocale(locale.LC_ALL, prev_locale)
4474
    return ret
4475
4476
4477
COMICS = set(get_subclasses(GenericComic))
4478
VALID_COMICS = [c for c in COMICS if c.name is not None]
4479
COMIC_NAMES = {c.name: c for c in VALID_COMICS}
4480
assert len(VALID_COMICS) == len(COMIC_NAMES)
4481
CLASS_NAMES = {c.__name__ for c in VALID_COMICS}
4482
assert len(VALID_COMICS) == len(CLASS_NAMES)
4483