Completed
Push — master ( a52e41...3f1894 )
by De
01:21
created

comics.py (5 issues)

1
#! /usr/bin/python3
2
# vim: set expandtab tabstop=4 shiftwidth=4 :
3
"""Module to retrieve webcomics"""
4
5
from comic_abstract import GenericComic, get_date_for_comic
6
import re
7
from datetime import date, timedelta
8
import datetime
9
from urlfunctions import get_soup_at_url, urljoin_wrapper,\
10
    convert_iri_to_plain_ascii_uri, load_json_at_url, urlopen_wrapper
11
import json
12
import locale
13
import urllib
14
15
DEFAULT_LOCAL = 'en_GB.UTF-8'
16
17
18
class Xkcd(GenericComic):
19
    """Class to retrieve Xkcd comics."""
20
    name = 'xkcd'
21
    long_name = 'xkcd'
22
    url = 'http://xkcd.com'
23
24
    @classmethod
25
    def get_next_comic(cls, last_comic):
26
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
27
        first_num = last_comic['num'] if last_comic else 0
28
        last_num = load_json_at_url(
29
            urljoin_wrapper(cls.url, 'info.0.json'))['num']
30
31
        for num in range(first_num + 1, last_num + 1):
32
            if num != 404:
33
                json_url = urljoin_wrapper(cls.url, '%d/info.0.json' % num)
34
                comic = load_json_at_url(json_url)
35
                comic['img'] = [comic['img']]
36
                comic['prefix'] = '%d-' % num
37
                comic['json_url'] = json_url
38
                comic['url'] = urljoin_wrapper(cls.url, str(num))
39
                comic['day'] = int(comic['day'])
40
                comic['month'] = int(comic['month'])
41
                comic['year'] = int(comic['year'])
42
                assert comic['num'] == num
43
                yield comic
44
45
46
# Helper functions corresponding to get_url_from_link/get_url_from_archive_element
47
48
49
@classmethod
50
def get_href(cls, link):
51
    """Implementation of get_url_from_link/get_url_from_archive_element."""
52
    return link['href']
53
54
55
@classmethod
56
def join_cls_url_to_href(cls, link):
57
    """Implementation of get_url_from_link/get_url_from_archive_element."""
58
    return urljoin_wrapper(cls.url, link['href'])
59
60
61
class GenericNavigableComic(GenericComic):
62
    """Generic class for "navigable" comics : with first/next arrows.
63
64
    This class applies to comic where previous and next comics can be
65
    accessed from a given comic. Once given a starting point (either
66
    the first comic or the last comic retrieved), it will handle the
67
    navigation, the retrieval of the soup object and the setting of
68
    the 'url' attribute on retrieved comics. This limits a lot the
69
    amount of boilerplate code in the different implementation classes.
70
71
    The method `get_next_comic` methods is implemented in terms of new
72
    more specialized methods to be implemented/overridden:
73
        - get_first_comic_link
74
        - get_navi_link
75
        - get_comic_info
76
        - get_url_from_link
77
    """
78
79
    @classmethod
80
    def get_first_comic_link(cls):
81
        """Get link to first comics.
82
83
        Sometimes this can be retrieved of any comic page, sometimes on
84
        the archive page, sometimes it doesn't exist at all and one has
85
        to iterate backward to find it before hardcoding the result found.
86
        """
87
        raise NotImplementedError
88
89
    @classmethod
90
    def get_navi_link(cls, last_soup, next_):
91
        """Get link to next (or previous - for dev purposes) comic."""
92
        raise NotImplementedError
93
94
    @classmethod
95
    def get_comic_info(cls, soup, link):
96
        """Get information about a particular comics."""
97
        raise NotImplementedError
98
99
    @classmethod
100
    def get_url_from_link(cls, link):
101
        """Get url corresponding to a link. Default implementation is similar to get_href."""
102
        return link['href']
103
104
    @classmethod
105
    def get_next_link(cls, last_soup):
106
        """Get link to next comic."""
107
        return cls.get_navi_link(last_soup, True)
108
109
    @classmethod
110
    def get_prev_link(cls, last_soup):
111
        """Get link to previous comic."""
112
        return cls.get_navi_link(last_soup, False)
113
114
    @classmethod
115
    def get_next_comic(cls, last_comic):
116
        """Generic implementation of get_next_comic for navigable comics."""
117
        url = last_comic['url'] if last_comic else None
118
        next_comic = \
119
            cls.get_next_link(get_soup_at_url(url)) \
120
            if url else \
121
            cls.get_first_comic_link()
122
        cls.log("next/first comic will be %s (url is %s)" % (str(next_comic), url))
123
        while next_comic:
124
            prev_url, url = url, cls.get_url_from_link(next_comic)
125
            if prev_url == url:
126
                cls.log("got same url %s" % url)
127
                break
128
            cls.log("about to get %s (%s)" % (url, str(next_comic)))
129
            soup = get_soup_at_url(url)
130
            comic = cls.get_comic_info(soup, next_comic)
131
            if comic is not None:
132
                assert 'url' not in comic
133
                comic['url'] = url
134
                yield comic
135
            next_comic = cls.get_next_link(soup)
136
            cls.log("next comic will be %s" % str(next_comic))
137
138
    @classmethod
139
    def check_first_link(cls):
140
        """Check that navigation to first comic seems to be working - for dev purposes."""
141
        cls.log("about to check first link")
142
        ok = True
143
        firstlink = cls.get_first_comic_link()
144
        if firstlink is None:
145
            print("From %s : no first link" % cls.url)
146
            ok = False
147
        else:
148
            firsturl = cls.get_url_from_link(firstlink)
149
            try:
150
                get_soup_at_url(firsturl)
151
            except urllib.error.HTTPError:
152
                print("From %s : invalid first url" % cls.url)
153
                ok = False
154
        cls.log("checked first link -> returned %d" % ok)
155
        return ok
156
157
    @classmethod
158
    def check_prev_next_links(cls, url):
159
        """Check that navigation to prev/next from a given URL seems to be working - for dev purposes."""
160
        cls.log("about to check prev/next from %s" % url)
161
        ok = True
162
        if url is None:
163
            prevlink, nextlink = None, None
164
        else:
165
            soup = get_soup_at_url(url)
166
            prevlink, nextlink = cls.get_prev_link(soup), cls.get_next_link(soup)
167
        if prevlink is None and nextlink is None:
168
            print("From %s : no previous nor next" % url)
169
            ok = False
170
        else:
171
            if prevlink:
172
                prevurl = cls.get_url_from_link(prevlink)
173
                prevsoup = get_soup_at_url(prevurl)
174
                prevnext = cls.get_url_from_link(cls.get_next_link(prevsoup))
175
                if prevnext != url:
176
                    print("From %s, going backward then forward leads to %s" % (url, prevnext))
177
                    ok = False
178
            if nextlink:
179
                nexturl = cls.get_url_from_link(nextlink)
180
                if nexturl != url:
181
                    nextsoup = get_soup_at_url(nexturl)
182
                    nextprev = cls.get_url_from_link(cls.get_prev_link(nextsoup))
183
                    if nextprev != url:
184
                        print("From %s, going forward then backward leads to %s" % (url, nextprev))
185
                        ok = False
186
        cls.log("checked prev/next from %s -> returned %d" % (url, ok))
187
        return ok
188
189
    @classmethod
190
    def check_navigation(cls, url):
191
        """Check that navigation functions seem to be working - for dev purposes."""
192
        cls.log("about to check navigation from %s" % url)
193
        first = cls.check_first_link()
194
        prevnext = cls.check_prev_next_links(url)
195
        ok = first and prevnext
196
        cls.log("checked navigation from %s -> returned %d" % (url, ok))
197
        return ok
198
199
200
class GenericListableComic(GenericComic):
201
    """Generic class for "listable" comics : with a list of comics (aka 'archive')
202
203
    The method `get_next_comic` methods is implemented in terms of new
204
    more specialized methods to be implemented/overridden:
205
        - get_archive_elements
206
        - get_url_from_archive_element
207
        - get_comic_info
208
    """
209
210
    @classmethod
211
    def get_archive_elements(cls):
212
        """Get the archive elements (iterable)."""
213
        raise NotImplementedError
214
215
    @classmethod
216
    def get_url_from_archive_element(cls, archive_elt):
217
        """Get url corresponding to an archive element."""
218
        raise NotImplementedError
219
220
    @classmethod
221
    def get_comic_info(cls, soup, archive_elt):
222
        """Get information about a particular comics."""
223
        raise NotImplementedError
224
225
    @classmethod
226
    def get_next_comic(cls, last_comic):
227
        """Generic implementation of get_next_comic for listable comics."""
228
        waiting_for_url = last_comic['url'] if last_comic else None
229
        for archive_elt in cls.get_archive_elements():
230
            url = cls.get_url_from_archive_element(archive_elt)
231
            cls.log("considering %s" % url)
232
            if waiting_for_url and waiting_for_url == url:
233
                waiting_for_url = None
234
            elif waiting_for_url is None:
235
                cls.log("about to get %s (%s)" % (url, str(archive_elt)))
236
                soup = get_soup_at_url(url)
237
                comic = cls.get_comic_info(soup, archive_elt)
238
                if comic is not None:
239
                    assert 'url' not in comic
240
                    comic['url'] = url
241
                    yield comic
242
        if waiting_for_url is not None:
243
            print("Did not find %s : there might be a problem" % waiting_for_url)
244
245
# Helper functions corresponding to get_first_comic_link/get_navi_link
246
247
248
@classmethod
249
def get_link_rel_next(cls, last_soup, next_):
250
    """Implementation of get_navi_link."""
251
    return last_soup.find('link', rel='next' if next_ else 'prev')
252
253
254
@classmethod
255
def get_a_rel_next(cls, last_soup, next_):
256
    """Implementation of get_navi_link."""
257
    return last_soup.find('a', rel='next' if next_ else 'prev')
258
259
260
@classmethod
261
def get_a_navi_navinext(cls, last_soup, next_):
262
    """Implementation of get_navi_link."""
263
    return last_soup.find('a', class_='navi navi-next' if next_ else 'navi navi-prev')
264
265
266
@classmethod
267
def get_a_navi_comicnavnext_navinext(cls, last_soup, next_):
268
    """Implementation of get_navi_link."""
269
    return last_soup.find('a', class_='navi comic-nav-next navi-next' if next_ else 'navi comic-nav-previous navi-prev')
270
271
272
@classmethod
273
def get_a_comicnavbase_comicnavnext(cls, last_soup, next_):
274
    """Implementation of get_navi_link."""
275
    return last_soup.find('a', class_='comic-nav-base comic-nav-next' if next_ else 'comic-nav-base comic-nav-previous')
276
277
278
@classmethod
279
def get_a_navi_navifirst(cls):
280
    """Implementation of get_first_comic_link."""
281
    return get_soup_at_url(cls.url).find('a', class_='navi navi-first')
282
283
284
@classmethod
285
def get_div_navfirst_a(cls):
286
    """Implementation of get_first_comic_link."""
287
    return get_soup_at_url(cls.url).find('div', class_="nav-first").find('a')
288
289
290
@classmethod
291
def get_a_comicnavbase_comicnavfirst(cls):
292
    """Implementation of get_first_comic_link."""
293
    return get_soup_at_url(cls.url).find('a', class_='comic-nav-base comic-nav-first')
294
295
296
@classmethod
297
def simulate_first_link(cls):
298
    """Implementation of get_first_comic_link creating a link-like object from
299
    an URL provided by the class."""
300
    return {'href': cls.first_url}
301
302
303
@classmethod
304
def navigate_to_first_comic(cls):
305
    """Implementation of get_first_comic_link navigating from a user provided
306
    URL to the first comic.
307
308
    Sometimes, the first comic cannot be reached directly so to start
309
    from the first comic one has to go to the previous comic until
310
    there is no previous comics. Once this URL is reached, it
311
    is better to hardcode it but for development purposes, it
312
    is convenient to have an automatic way to find it.
313
    """
314
    url = input("Get starting URL: ")
315
    print(url)
316
    comic = cls.get_prev_link(get_soup_at_url(url))
317
    while comic:
318
        url = cls.get_url_from_link(comic)
319
        print(url)
320
        comic = cls.get_prev_link(get_soup_at_url(url))
321
    return {'href': url}
322
323
324
class GenericEmptyComic(GenericComic):
325
    """Generic class for comics where nothing is to be done.
326
327
    It can be useful to deactivate temporarily comics that do not work
328
    properly by replacing `def MyComic(GenericWhateverComic)` with
329
    `def MyComic(GenericEmptyComic, GenericWhateverComic)`."""
330
331
    @classmethod
332
    def get_next_comic(cls, last_comic):
333
        """Implementation of get_next_comic returning no comics."""
334
        cls.log("comic is considered as empty - returning no comic")
335
        return []
336
337
338 View Code Duplication
class ExtraFabulousComics(GenericNavigableComic):
339
    """Class to retrieve Extra Fabulous Comics."""
340
    name = 'efc'
341
    long_name = 'Extra Fabulous Comics'
342
    url = 'http://extrafabulouscomics.com'
343
    get_first_comic_link = get_a_navi_navifirst
344
    get_navi_link = get_link_rel_next
345
346
    @classmethod
347
    def get_comic_info(cls, soup, link):
348
        """Get information about a particular comics."""
349
        img_src_re = re.compile('^%s/wp-content/uploads/' % cls.url)
350
        imgs = soup.find_all('img', src=img_src_re)
351
        title = soup.find('meta', property='og:title')['content']
352
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
353
        day = string_to_date(date_str, "%Y-%m-%d")
354
        return {
355
            'title': title,
356
            'img': [i['src'] for i in imgs],
357
            'month': day.month,
358
            'year': day.year,
359
            'day': day.day,
360
            'prefix': title + '-'
361
        }
362
363
364
class GenericLeMondeBlog(GenericNavigableComic):
365
    """Generic class to retrieve comics from Le Monde blogs."""
366
    get_navi_link = get_link_rel_next
367
    get_first_comic_link = simulate_first_link
368
    first_url = NotImplemented
369
370
    @classmethod
371
    def get_comic_info(cls, soup, link):
372
        """Get information about a particular comics."""
373
        url2 = soup.find('link', rel='shortlink')['href']
374
        title = soup.find('meta', property='og:title')['content']
375
        date_str = soup.find("span", class_="entry-date").string
376
        day = string_to_date(date_str, "%d %B %Y", "fr_FR.utf8")
377
        imgs = soup.find_all('meta', property='og:image')
378
        return {
379
            'title': title,
380
            'url2': url2,
381
            'img': [convert_iri_to_plain_ascii_uri(i['content']) for i in imgs],
382
            'month': day.month,
383
            'year': day.year,
384
            'day': day.day,
385
        }
386
387
388
class ZepWorld(GenericLeMondeBlog):
389
    """Class to retrieve Zep World comics."""
390
    name = "zep"
391
    long_name = "Zep World"
392
    url = "http://zepworld.blog.lemonde.fr"
393
    first_url = "http://zepworld.blog.lemonde.fr/2014/10/31/bientot-le-blog-de-zep/"
394
395
396
class Vidberg(GenericLeMondeBlog):
397
    """Class to retrieve Vidberg comics."""
398
    name = 'vidberg'
399
    long_name = "Vidberg - l'actu en patates"
400
    url = "http://vidberg.blog.lemonde.fr"
401
    # Not the first but I didn't find an efficient way to retrieve it
402
    first_url = "http://vidberg.blog.lemonde.fr/2012/02/09/revue-de-campagne-la-campagne-du-modem-semballe/"
403
404
405
class Plantu(GenericLeMondeBlog):
406
    """Class to retrieve Plantu comics."""
407
    name = 'plantu'
408
    long_name = "Plantu"
409
    url = "http://plantu.blog.lemonde.fr"
410
    first_url = "http://plantu.blog.lemonde.fr/2014/10/28/stress-test-a-bruxelles/"
411
412
413
class XavierGorce(GenericLeMondeBlog):
414
    """Class to retrieve Xavier Gorce comics."""
415
    name = 'gorce'
416
    long_name = "Xavier Gorce"
417
    url = "http://xaviergorce.blog.lemonde.fr"
418
    first_url = "http://xaviergorce.blog.lemonde.fr/2015/01/09/distinction/"
419
420
421
class CartooningForPeace(GenericLeMondeBlog):
422
    """Class to retrieve Cartooning For Peace comics."""
423
    name = 'forpeace'
424
    long_name = "Cartooning For Peace"
425
    url = "http://cartooningforpeace.blog.lemonde.fr"
426
    first_url = "http://cartooningforpeace.blog.lemonde.fr/2014/12/15/bado/"
427
428
429
class Aurel(GenericLeMondeBlog):
430
    """Class to retrieve Aurel comics."""
431
    name = 'aurel'
432
    long_name = "Aurel"
433
    url = "http://aurel.blog.lemonde.fr"
434
    first_url = "http://aurel.blog.lemonde.fr/2014/09/29/le-senat-repasse-a-droite/"
435
436
437
class LesCulottees(GenericLeMondeBlog):
438
    """Class to retrieve Les Culottees comics."""
439
    name = 'culottees'
440
    long_name = 'Les Culottees'
441
    url = "http://lesculottees.blog.lemonde.fr"
442
    first_url = "http://lesculottees.blog.lemonde.fr/2016/01/11/clementine-delait-femme-a-barbe/"
443
444
445
class UneAnneeAuLycee(GenericLeMondeBlog):
446
    """Class to retrieve Une Annee Au Lycee comics."""
447
    name = 'lycee'
448
    long_name = 'Une Annee au Lycee'
449
    url = 'http://uneanneeaulycee.blog.lemonde.fr'
450
    first_url = "http://uneanneeaulycee.blog.lemonde.fr/2016/06/13/la-semaine-du-bac-est-arrivee/"
451
452
453 View Code Duplication
class Rall(GenericNavigableComic):
454
    """Class to retrieve Ted Rall comics."""
455
    # Also on http://www.gocomics.com/tedrall
456
    name = 'rall'
457
    long_name = "Ted Rall"
458
    url = "http://rall.com/comic"
459
    get_navi_link = get_link_rel_next
460
    get_first_comic_link = simulate_first_link
461
    # Not the first but I didn't find an efficient way to retrieve it
462
    first_url = "http://rall.com/2014/01/30/los-angeles-times-cartoon-well-miss-those-california-flowers"
463
464
    @classmethod
465
    def get_comic_info(cls, soup, link):
466
        """Get information about a particular comics."""
467
        title = soup.find('meta', property='og:title')['content']
468
        author = soup.find("span", class_="author vcard").find("a").string
469
        date_str = soup.find("span", class_="entry-date").string
470
        day = string_to_date(date_str, "%B %d, %Y")
471
        desc = soup.find('meta', property='og:description')['content']
472
        imgs = soup.find('div', class_='entry-content').find_all('img')
473
        imgs = imgs[:-7]  # remove social media buttons
474
        return {
475
            'title': title,
476
            'author': author,
477
            'month': day.month,
478
            'year': day.year,
479
            'day': day.day,
480
            'description': desc,
481
            'img': [i['src'] for i in imgs],
482
        }
483
484
485
class Dilem(GenericNavigableComic):
486
    """Class to retrieve Ali Dilem comics."""
487
    name = 'dilem'
488
    long_name = 'Ali Dilem'
489
    url = 'http://information.tv5monde.com/dilem'
490
    get_url_from_link = join_cls_url_to_href
491
    get_first_comic_link = simulate_first_link
492
    first_url = "http://information.tv5monde.com/dilem/2004-06-26"
493
494
    @classmethod
495
    def get_navi_link(cls, last_soup, next_):
496
        """Get link to next or previous comic."""
497
        # prev is next / next is prev
498
        li = last_soup.find('li', class_='prev' if next_ else 'next')
499
        return li.find('a') if li else None
500
501 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
502
    def get_comic_info(cls, soup, link):
503
        """Get information about a particular comics."""
504
        short_url = soup.find('link', rel='shortlink')['href']
505
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
506
        imgs = soup.find_all('meta', property='og:image')
507
        date_str = soup.find('span', property='dc:date')['content']
508
        date_str = date_str[:10]
509
        day = string_to_date(date_str, "%Y-%m-%d")
510
        return {
511
            'short_url': short_url,
512
            'title': title,
513
            'img': [i['content'] for i in imgs],
514
            'day': day.day,
515
            'month': day.month,
516
            'year': day.year,
517
        }
518
519
520 View Code Duplication
class SpaceAvalanche(GenericNavigableComic):
521
    """Class to retrieve Space Avalanche comics."""
522
    name = 'avalanche'
523
    long_name = 'Space Avalanche'
524
    url = 'http://www.spaceavalanche.com'
525
    get_navi_link = get_link_rel_next
526
527
    @classmethod
528
    def get_first_comic_link(cls):
529
        """Get link to first comics."""
530
        return {'href': "http://www.spaceavalanche.com/2009/02/02/irish-sea/", 'title': "Irish Sea"}
531
532
    @classmethod
533
    def get_comic_info(cls, soup, link):
534
        """Get information about a particular comics."""
535
        url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
536
        title = link['title']
537
        url = cls.get_url_from_link(link)
538
        year, month, day = [int(s)
539
                            for s in url_date_re.match(url).groups()]
540
        imgs = soup.find("div", class_="entry").find_all("img")
541
        return {
542
            'title': title,
543
            'day': day,
544
            'month': month,
545
            'year': year,
546
            'img': [i['src'] for i in imgs],
547
        }
548
549
550
class ZenPencils(GenericNavigableComic):
551
    """Class to retrieve ZenPencils comics."""
552
    # Also on http://zenpencils.tumblr.com
553
    # Also on http://www.gocomics.com/zen-pencils
554
    name = 'zenpencils'
555
    long_name = 'Zen Pencils'
556
    url = 'http://zenpencils.com'
557
    get_navi_link = get_link_rel_next
558
    get_first_comic_link = simulate_first_link
559
    first_url = "http://zenpencils.com/comic/1-ralph-waldo-emerson-make-them-cry/"
560
561
    @classmethod
562
    def get_comic_info(cls, soup, link):
563
        """Get information about a particular comics."""
564
        imgs = soup.find('div', id='comic').find_all('img')
565
        # imgs2 = soup.find_all('meta', property='og:image')
566
        post = soup.find('div', class_='post-content')
567
        author = post.find("span", class_="post-author").find("a").string
568
        title = soup.find('meta', property='og:title')['content']
569
        date_str = post.find('span', class_='post-date').string
570
        day = string_to_date(date_str, "%B %d, %Y")
571
        assert imgs
572
        assert all(i['alt'] == i['title'] for i in imgs)
573
        assert all(i['alt'] in (title, "") for i in imgs)
574
        desc = soup.find('meta', property='og:description')['content']
575
        return {
576
            'title': title,
577
            'description': desc,
578
            'author': author,
579
            'day': day.day,
580
            'month': day.month,
581
            'year': day.year,
582
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
583
        }
584
585
586
class ItsTheTie(GenericNavigableComic):
587
    """Class to retrieve It's the tie comics."""
588
    # Also on http://itsthetie.tumblr.com
589
    # Also on https://tapastic.com/series/itsthetie
590
    name = 'tie'
591
    long_name = "It's the tie"
592
    url = "http://itsthetie.com"
593
    get_first_comic_link = get_div_navfirst_a
594
    get_navi_link = get_a_rel_next
595
596
    @classmethod
597
    def get_comic_info(cls, soup, link):
598
        """Get information about a particular comics."""
599
        title = soup.find('h1', class_='comic-title').find('a').string
600
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
601
        day = string_to_date(date_str, "%B %d, %Y")
602
        # Bonus images may or may not be in meta og:image.
603
        imgs = soup.find_all('meta', property='og:image')
604
        imgs_src = [i['content'] for i in imgs]
605
        bonus = soup.find_all('img', attrs={'data-oversrc': True})
606
        bonus_src = [b['data-oversrc'] for b in bonus]
607
        all_imgs_src = imgs_src + [s for s in bonus_src if s not in imgs_src]
608
        all_imgs_src = [s for s in all_imgs_src if not s.endswith("/2016/01/bonus-panel.png")]
609
        tag_meta = soup.find('meta', property='article:tag')
610
        tags = tag_meta['content'] if tag_meta else ""
611
        return {
612
            'title': title,
613
            'month': day.month,
614
            'year': day.year,
615
            'day': day.day,
616
            'img': all_imgs_src,
617
            'tags': tags,
618
        }
619
620 View Code Duplication
621
class PenelopeBagieu(GenericNavigableComic):
622
    """Class to retrieve comics from Penelope Bagieu's blog."""
623
    name = 'bagieu'
624
    long_name = 'Ma vie est tout a fait fascinante (Bagieu)'
625
    url = 'http://www.penelope-jolicoeur.com'
626
    get_navi_link = get_link_rel_next
627
    get_first_comic_link = simulate_first_link
628
    first_url = 'http://www.penelope-jolicoeur.com/2007/02/ma-vie-mon-oeuv.html'
629
630
    @classmethod
631
    def get_comic_info(cls, soup, link):
632
        """Get information about a particular comics."""
633
        date_str = soup.find('h2', class_='date-header').string
634
        day = string_to_date(date_str, "%A %d %B %Y", "fr_FR.utf8")
635
        imgs = soup.find('div', class_='entry-body').find_all('img')
636
        title = soup.find('h3', class_='entry-header').string
637
        return {
638
            'title': title,
639
            'img': [i['src'] for i in imgs],
640
            'month': day.month,
641
            'year': day.year,
642
            'day': day.day,
643
        }
644
645 View Code Duplication
646
class OneOneOneOneComic(GenericNavigableComic):
647
    """Class to retrieve 1111 Comics."""
648
    # Also on http://comics1111.tumblr.com
649
    # Also on https://tapastic.com/series/1111-Comics
650
    name = '1111'
651
    long_name = '1111 Comics'
652
    url = 'http://www.1111comics.me'
653
    get_first_comic_link = get_div_navfirst_a
654
    get_navi_link = get_link_rel_next
655
656
    @classmethod
657
    def get_comic_info(cls, soup, link):
658
        """Get information about a particular comics."""
659
        title = soup.find('h1', class_='comic-title').find('a').string
660
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
661
        day = string_to_date(date_str, "%B %d, %Y")
662
        imgs = soup.find_all('meta', property='og:image')
663
        return {
664
            'title': title,
665
            'month': day.month,
666
            'year': day.year,
667
            'day': day.day,
668
            'img': [i['content'] for i in imgs],
669
        }
670
671
672
class AngryAtNothing(GenericNavigableComic):
673
    """Class to retrieve Angry at Nothing comics."""
674
    # Also on http://tapastic.com/series/Comics-yeah-definitely-comics-
675
    name = 'angry'
676
    long_name = 'Angry At Nothing'
677
    url = 'http://www.angryatnothing.net'
678
    get_first_comic_link = get_div_navfirst_a
679
    get_navi_link = get_a_rel_next
680
681
    @classmethod
682
    def get_comic_info(cls, soup, link):
683
        """Get information about a particular comics."""
684
        title = soup.find('h1', class_='comic-title').find('a').string
685
        date_str = soup.find('header', class_='comic-meta entry-meta').find('a').string
686
        day = string_to_date(date_str, "%B %d, %Y")
687
        imgs = soup.find_all('meta', property='og:image')
688
        return {
689
            'title': title,
690
            'month': day.month,
691
            'year': day.year,
692
            'day': day.day,
693
            'img': [i['content'] for i in imgs],
694
        }
695
696
697
class NeDroid(GenericNavigableComic):
698
    """Class to retrieve NeDroid comics."""
699
    name = 'nedroid'
700
    long_name = 'NeDroid'
701
    url = 'http://nedroid.com'
702
    get_first_comic_link = get_div_navfirst_a
703
    get_navi_link = get_link_rel_next
704
    get_url_from_link = join_cls_url_to_href
705
706
    @classmethod
707
    def get_comic_info(cls, soup, link):
708
        """Get information about a particular comics."""
709
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
710
        comic_url_re = re.compile('//nedroid.com/comics/([0-9]*)-([0-9]*)-([0-9]*).*')
711
        short_url = cls.get_url_from_link(soup.find('link', rel='shortlink'))
712
        num = int(short_url_re.match(short_url).groups()[0])
713
        imgs = soup.find('div', id='comic').find_all('img')
714
        year, month, day = [int(s) for s in comic_url_re.match(imgs[0]['src']).groups()]
715
        assert len(imgs) == 1
716
        title = imgs[0]['alt']
717
        title2 = imgs[0]['title']
718
        return {
719
            'short_url': short_url,
720
            'title': title,
721
            'title2': title2,
722
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
723
            'day': day,
724
            'month': month,
725
            'year': year,
726
            'num': num,
727
        }
728
729 View Code Duplication
730
class Garfield(GenericNavigableComic):
731
    """Class to retrieve Garfield comics."""
732
    # Also on http://www.gocomics.com/garfield
733
    name = 'garfield'
734
    long_name = 'Garfield'
735
    url = 'https://garfield.com'
736
    get_first_comic_link = simulate_first_link
737
    first_url = 'https://garfield.com/comic/1978/06/19'
738
739
    @classmethod
740
    def get_navi_link(cls, last_soup, next_):
741
        """Get link to next or previous comic."""
742
        return last_soup.find('a', class_='comic-arrow-right' if next_ else 'comic-arrow-left')
743
744
    @classmethod
745
    def get_comic_info(cls, soup, link):
746
        """Get information about a particular comics."""
747
        url = cls.get_url_from_link(link)
748
        date_re = re.compile('^%s/comic/([0-9]*)/([0-9]*)/([0-9]*)' % cls.url)
749
        year, month, day = [int(s) for s in date_re.match(url).groups()]
750
        imgs = soup.find('div', class_='comic-display').find_all('img', class_='img-responsive')
751
        return {
752
            'month': month,
753
            'year': year,
754
            'day': day,
755
            'img': [i['src'] for i in imgs],
756
        }
757
758 View Code Duplication
759
class Dilbert(GenericNavigableComic):
760
    """Class to retrieve Dilbert comics."""
761
    # Also on http://www.gocomics.com/dilbert-classics
762
    name = 'dilbert'
763
    long_name = 'Dilbert'
764
    url = 'http://dilbert.com'
765
    get_url_from_link = join_cls_url_to_href
766
    get_first_comic_link = simulate_first_link
767
    first_url = 'http://dilbert.com/strip/1989-04-16'
768
769
    @classmethod
770
    def get_navi_link(cls, last_soup, next_):
771
        """Get link to next or previous comic."""
772
        link = last_soup.find('div', class_='nav-comic nav-right' if next_ else 'nav-comic nav-left')
773
        return link.find('a') if link else None
774
775
    @classmethod
776
    def get_comic_info(cls, soup, link):
777
        """Get information about a particular comics."""
778
        title = soup.find('meta', property='og:title')['content']
779
        imgs = soup.find_all('meta', property='og:image')
780
        desc = soup.find('meta', property='og:description')['content']
781
        date_str = soup.find('meta', property='article:publish_date')['content']
782
        day = string_to_date(date_str, "%B %d, %Y")
783
        author = soup.find('meta', property='article:author')['content']
784
        tags = soup.find('meta', property='article:tag')['content']
785
        return {
786
            'title': title,
787
            'description': desc,
788
            'img': [i['content'] for i in imgs],
789
            'author': author,
790
            'tags': tags,
791
            'day': day.day,
792
            'month': day.month,
793
            'year': day.year
794
        }
795
796
797
class VictimsOfCircumsolar(GenericNavigableComic):
798
    """Class to retrieve VictimsOfCircumsolar comics."""
799
    name = 'circumsolar'
800
    long_name = 'Victims Of Circumsolar'
801
    url = 'http://www.victimsofcircumsolar.com'
802
    get_navi_link = get_a_navi_comicnavnext_navinext
803
    get_first_comic_link = simulate_first_link
804
    first_url = 'http://www.victimsofcircumsolar.com/comic/modern-addiction'
805
806
    @classmethod
807
    def get_comic_info(cls, soup, link):
808
        """Get information about a particular comics."""
809
        # Date is on the archive page
810
        title = soup.find_all('meta', property='og:title')[-1]['content']
811
        desc = soup.find_all('meta', property='og:description')[-1]['content']
812
        imgs = soup.find('div', id='comic').find_all('img')
813
        assert all(i['title'] == i['alt'] == title for i in imgs)
814
        return {
815
            'title': title,
816
            'description': desc,
817
            'img': [i['src'] for i in imgs],
818
        }
819
820
821
class ThreeWordPhrase(GenericNavigableComic):
822
    """Class to retrieve Three Word Phrase comics."""
823
    # Also on http://www.threewordphrase.tumblr.com
824
    name = 'threeword'
825
    long_name = 'Three Word Phrase'
826
    url = 'http://threewordphrase.com'
827
    get_url_from_link = join_cls_url_to_href
828
829
    @classmethod
830
    def get_first_comic_link(cls):
831
        """Get link to first comics."""
832
        return get_soup_at_url(cls.url).find('img', src='/firstlink.gif').parent
833
834
    @classmethod
835
    def get_navi_link(cls, last_soup, next_):
836
        """Get link to next or previous comic."""
837
        link = last_soup.find('img', src='/nextlink.gif' if next_ else '/prevlink.gif').parent
838
        return None if link.get('href') is None else link
839
840
    @classmethod
841
    def get_comic_info(cls, soup, link):
842
        """Get information about a particular comics."""
843
        title = soup.find('title')
844
        imgs = [img for img in soup.find_all('img')
845
                if not img['src'].endswith(
846
                    ('link.gif', '32.png', 'twpbookad.jpg',
847
                     'merchad.jpg', 'header.gif', 'tipjar.jpg'))]
848
        return {
849
            'title': title.string if title else None,
850
            'title2': '  '.join(img.get('alt') for img in imgs if img.get('alt')),
851
            'img': [urljoin_wrapper(cls.url, img['src']) for img in imgs],
852
        }
853
854
855
class DeadlyPanel(GenericEmptyComic, GenericNavigableComic):
856
    """Class to retrieve Deadly Panel comics."""
857
    # Also on https://tapastic.com/series/deadlypanel
858
    name = 'deadly'
859
    long_name = 'Deadly Panel'
860
    url = 'http://www.deadlypanel.com'
861
    get_first_comic_link = get_a_navi_navifirst
862
    get_navi_link = get_a_navi_comicnavnext_navinext
863
864
    @classmethod
865
    def get_comic_info(cls, soup, link):
866
        """Get information about a particular comics."""
867
        imgs = soup.find('div', id='comic').find_all('img')
868
        assert all(i['alt'] == i['title'] for i in imgs)
869
        return {
870
            'img': [i['src'] for i in imgs],
871
        }
872
873
874
class TheGentlemanArmchair(GenericNavigableComic):
875
    """Class to retrieve The Gentleman Armchair comics."""
876
    name = 'gentlemanarmchair'
877
    long_name = 'The Gentleman Armchair'
878
    url = 'http://thegentlemansarmchair.com'
879
    get_first_comic_link = get_a_navi_navifirst
880
    get_navi_link = get_link_rel_next
881
882
    @classmethod
883
    def get_comic_info(cls, soup, link):
884
        """Get information about a particular comics."""
885
        title = soup.find('h2', class_='post-title').string
886
        author = soup.find("span", class_="post-author").find("a").string
887
        date_str = soup.find('span', class_='post-date').string
888
        day = string_to_date(date_str, "%B %d, %Y")
889
        imgs = soup.find('div', id='comic').find_all('img')
890
        return {
891
            'img': [i['src'] for i in imgs],
892
            'title': title,
893
            'author': author,
894
            'month': day.month,
895
            'year': day.year,
896
            'day': day.day,
897
        }
898
899
900
class MyExtraLife(GenericNavigableComic):
901
    """Class to retrieve My Extra Life comics."""
902
    name = 'extralife'
903
    long_name = 'My Extra Life'
904
    url = 'http://www.myextralife.com'
905
    get_navi_link = get_link_rel_next
906
907
    @classmethod
908
    def get_first_comic_link(cls):
909
        """Get link to first comics."""
910
        return get_soup_at_url(cls.url).find('a', class_='comic_nav_link first_comic_link')
911
912
    @classmethod
913
    def get_comic_info(cls, soup, link):
914
        """Get information about a particular comics."""
915
        title = soup.find("h1", class_="comic_title").string
916
        date_str = soup.find("span", class_="comic_date").string
917
        day = string_to_date(date_str, "%B %d, %Y")
918
        imgs = soup.find_all("img", class_="comic")
919
        assert all(i['alt'] == i['title'] == title for i in imgs)
920
        return {
921
            'title': title,
922
            'img': [i['src'] for i in imgs if i["src"]],
923
            'day': day.day,
924
            'month': day.month,
925
            'year': day.year
926
        }
927
928
929
class SaturdayMorningBreakfastCereal(GenericNavigableComic):
930
    """Class to retrieve Saturday Morning Breakfast Cereal comics."""
931
    # Also on http://www.gocomics.com/saturday-morning-breakfast-cereal
932
    # Also on http://smbc-comics.tumblr.com
933
    name = 'smbc'
934
    long_name = 'Saturday Morning Breakfast Cereal'
935
    url = 'http://www.smbc-comics.com'
936
    get_navi_link = get_a_rel_next
937
938
    @classmethod
939
    def get_first_comic_link(cls):
940
        """Get link to first comics."""
941
        return get_soup_at_url(cls.url).find('a', rel='start')
942
943
    @classmethod
944
    def get_comic_info(cls, soup, link):
945
        """Get information about a particular comics."""
946
        image1 = soup.find('img', id='cc-comic')
947
        image_url1 = image1['src']
948
        aftercomic = soup.find('div', id='aftercomic')
949
        image_url2 = aftercomic.find('img')['src'] if aftercomic else ''
950
        imgs = [image_url1] + ([image_url2] if image_url2 else [])
951
        date_str = soup.find('div', class_='cc-publishtime').contents[0]
952
        day = string_to_date(date_str, "%B %d, %Y")
953
        return {
954
            'title': image1['title'],
955
            'img': [urljoin_wrapper(cls.url, i) for i in imgs],
956
            'day': day.day,
957
            'month': day.month,
958
            'year': day.year
959
        }
960
961
962
class PerryBibleFellowship(GenericListableComic):
963
    """Class to retrieve Perry Bible Fellowship comics."""
964
    name = 'pbf'
965
    long_name = 'Perry Bible Fellowship'
966
    url = 'http://pbfcomics.com'
967
    get_url_from_archive_element = join_cls_url_to_href
968
969
    @classmethod
970
    def get_archive_elements(cls):
971
        comic_link_re = re.compile('^/[0-9]*/$')
972
        return reversed(get_soup_at_url(cls.url).find_all('a', href=comic_link_re))
973
974
    @classmethod
975
    def get_comic_info(cls, soup, link):
976
        """Get information about a particular comics."""
977
        url = cls.get_url_from_archive_element(link)
978
        comic_img_re = re.compile('^/archive_b/PBF.*')
979
        name = link.string
980
        num = int(link['name'])
981
        href = link['href']
982
        assert href == '/%d/' % num
983
        imgs = soup.find_all('img', src=comic_img_re)
984
        assert len(imgs) == 1
985
        assert imgs[0]['alt'] == name
986
        return {
987
            'num': num,
988
            'name': name,
989
            'img': [urljoin_wrapper(url, i['src']) for i in imgs],
990
            'prefix': '%d-' % num,
991
        }
992
993
994
class Mercworks(GenericNavigableComic):
995
    """Class to retrieve Mercworks comics."""
996
    # Also on http://mercworks.tumblr.com
997
    name = 'mercworks'
998
    long_name = 'Mercworks'
999
    url = 'http://mercworks.net'
1000
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
1001
    get_navi_link = get_a_rel_next
1002
1003 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1004
    def get_comic_info(cls, soup, link):
1005
        """Get information about a particular comics."""
1006
        title = soup.find('meta', property='og:title')['content']
1007
        metadesc = soup.find('meta', property='og:description')
1008
        desc = metadesc['content'] if metadesc else ""
1009
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1010
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1011
        date_str = date_str[:10]
1012
        day = string_to_date(date_str, "%Y-%m-%d")
1013
        imgs = soup.find_all('meta', property='og:image')
1014
        return {
1015
            'img': [i['content'] for i in imgs],
1016
            'title': title,
1017
            'author': author,
1018
            'desc': desc,
1019
            'day': day.day,
1020
            'month': day.month,
1021
            'year': day.year
1022
        }
1023
1024
1025
class BerkeleyMews(GenericListableComic):
1026
    """Class to retrieve Berkeley Mews comics."""
1027
    # Also on http://mews.tumblr.com
1028
    # Also on http://www.gocomics.com/berkeley-mews
1029
    name = 'berkeley'
1030
    long_name = 'Berkeley Mews'
1031
    url = 'http://www.berkeleymews.com'
1032
    get_url_from_archive_element = get_href
1033
    comic_num_re = re.compile('%s/\\?p=([0-9]*)$' % url)
1034
1035
    @classmethod
1036
    def get_archive_elements(cls):
1037
        archive_url = urljoin_wrapper(cls.url, "?page_id=2")
1038
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_num_re))
1039
1040
    @classmethod
1041
    def get_comic_info(cls, soup, link):
1042
        """Get information about a particular comics."""
1043
        comic_date_re = re.compile('.*/([0-9]*)-([0-9]*)-([0-9]*)-.*')
1044
        url = cls.get_url_from_archive_element(link)
1045
        num = int(cls.comic_num_re.match(url).groups()[0])
1046
        img = soup.find('div', id='comic').find('img')
1047
        assert all(i['alt'] == i['title'] for i in [img])
1048
        title2 = img['title']
1049
        img_url = img['src']
1050
        year, month, day = [int(s) for s in comic_date_re.match(img_url).groups()]
1051
        return {
1052
            'num': num,
1053
            'title': link.string,
1054
            'title2': title2,
1055
            'img': [img_url],
1056
            'year': year,
1057
            'month': month,
1058
            'day': day,
1059
        }
1060
1061
1062
class GenericBouletCorp(GenericNavigableComic):
1063
    """Generic class to retrieve BouletCorp comics in different languages."""
1064
    # Also on http://bouletcorp.tumblr.com
1065
    get_navi_link = get_link_rel_next
1066
1067
    @classmethod
1068
    def get_first_comic_link(cls):
1069
        """Get link to first comics."""
1070
        return get_soup_at_url(cls.url).find('div', id='centered_nav').find_all('a')[0]
1071
1072
    @classmethod
1073
    def get_comic_info(cls, soup, link):
1074
        """Get information about a particular comics."""
1075
        url = cls.get_url_from_link(link)
1076
        date_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/' % cls.url)
1077
        year, month, day = [int(s) for s in date_re.match(url).groups()]
1078
        imgs = soup.find('div', id='notes').find('div', class_='storycontent').find_all('img')
1079
        texts = '  '.join(t for t in (i.get('title') for i in imgs) if t)
1080
        title = soup.find('title').string
1081
        return {
1082
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs if i.get('src') is not None],
1083
            'title': title,
1084
            'texts': texts,
1085
            'year': year,
1086
            'month': month,
1087
            'day': day,
1088
        }
1089
1090
1091
class BouletCorp(GenericBouletCorp):
1092
    """Class to retrieve BouletCorp comics."""
1093
    name = 'boulet'
1094
    long_name = 'Boulet Corp'
1095
    url = 'http://www.bouletcorp.com'
1096
1097
1098
class BouletCorpEn(GenericBouletCorp):
1099
    """Class to retrieve EnglishBouletCorp comics."""
1100
    name = 'boulet_en'
1101
    long_name = 'Boulet Corp English'
1102
    url = 'http://english.bouletcorp.com'
1103
1104
1105
class AmazingSuperPowers(GenericNavigableComic):
1106
    """Class to retrieve Amazing Super Powers comics."""
1107
    name = 'asp'
1108
    long_name = 'Amazing Super Powers'
1109
    url = 'http://www.amazingsuperpowers.com'
1110
    get_first_comic_link = get_a_navi_navifirst
1111
    get_navi_link = get_a_navi_navinext
1112
1113
    @classmethod
1114
    def get_comic_info(cls, soup, link):
1115
        """Get information about a particular comics."""
1116
        author = soup.find("span", class_="post-author").find("a").string
1117
        date_str = soup.find('span', class_='post-date').string
1118
        day = string_to_date(date_str, "%B %d, %Y")
1119
        imgs = soup.find('div', id='comic').find_all('img')
1120
        title = ' '.join(i['title'] for i in imgs)
1121
        assert all(i['alt'] == i['title'] for i in imgs)
1122
        return {
1123
            'title': title,
1124
            'author': author,
1125
            'img': [img['src'] for img in imgs],
1126
            'day': day.day,
1127
            'month': day.month,
1128
            'year': day.year
1129
        }
1130
1131
1132
class ToonHole(GenericListableComic):
1133
    """Class to retrieve Toon Holes comics."""
1134
    # Also on http://tapastic.com/series/TOONHOLE
1135
    name = 'toonhole'
1136
    long_name = 'Toon Hole'
1137
    url = 'http://www.toonhole.com'
1138
    get_url_from_archive_element = get_href
1139
1140
    @classmethod
1141
    def get_comic_info(cls, soup, link):
1142
        """Get information about a particular comics."""
1143
        title = link.string
1144
        date_str = remove_st_nd_rd_th_from_date(soup.find('div', class_='comicdate').string.strip())
1145
        day = string_to_date(date_str, "%B %d, %Y")
1146
        imgs = soup.find('div', id='comic').find_all('img')
1147
        assert all(i['alt'] == i['title'] == title for i in imgs)
1148
        return {
1149
            'title': title,
1150
            'month': day.month,
1151
            'year': day.year,
1152
            'day': day.day,
1153
            'img': [convert_iri_to_plain_ascii_uri(i['src']) for i in imgs],
1154
        }
1155
1156
    @classmethod
1157
    def get_archive_elements(cls):
1158
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1159
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1160
1161
1162
class Channelate(GenericNavigableComic):
1163
    """Class to retrieve Channelate comics."""
1164
    name = 'channelate'
1165
    long_name = 'Channelate'
1166
    url = 'http://www.channelate.com'
1167
    get_first_comic_link = get_div_navfirst_a
1168
    get_navi_link = get_link_rel_next
1169
    get_url_from_link = join_cls_url_to_href
1170
1171
    @classmethod
1172
    def get_comic_info(cls, soup, link):
1173
        """Get information about a particular comics."""
1174
        author = soup.find("span", class_="post-author").find("a").string
1175
        date_str = soup.find('span', class_='post-date').string
1176
        day = string_to_date(date_str, '%Y/%m/%d')
1177
        title = soup.find('meta', property='og:title')['content']
1178
        post = soup.find('div', id='comic')
1179
        imgs = post.find_all('img') if post else []
1180
        extra_url = None
1181
        extra_div = soup.find('div', id='extrapanelbutton')
1182
        if extra_div:
1183
            extra_url = extra_div.find('a')['href']
1184
            extra_soup = get_soup_at_url(extra_url)
1185
            extra_imgs = extra_soup.find_all('img', class_='extrapanelimage')
1186
            imgs.extend(extra_imgs)
1187
        return {
1188
            'url_extra': extra_url,
1189
            'title': title,
1190
            'author': author,
1191
            'month': day.month,
1192
            'year': day.year,
1193
            'day': day.day,
1194
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1195
        }
1196
1197
1198
class CyanideAndHappiness(GenericNavigableComic):
1199
    """Class to retrieve Cyanide And Happiness comics."""
1200
    name = 'cyanide'
1201
    long_name = 'Cyanide and Happiness'
1202
    url = 'http://explosm.net'
1203
    get_url_from_link = join_cls_url_to_href
1204
1205
    @classmethod
1206
    def get_first_comic_link(cls):
1207
        """Get link to first comics."""
1208
        return get_soup_at_url(cls.url).find('a', title='Oldest comic')
1209
1210
    @classmethod
1211
    def get_navi_link(cls, last_soup, next_):
1212
        """Get link to next or previous comic."""
1213
        link = last_soup.find('a', class_='next-comic' if next_ else 'previous-comic ')
1214
        return None if link.get('href') is None else link
1215
1216
    @classmethod
1217
    def get_comic_info(cls, soup, link):
1218
        """Get information about a particular comics."""
1219
        url2 = soup.find('meta', property='og:url')['content']
1220
        num = int(url2.split('/')[-2])
1221
        date_str = soup.find('h3').find('a').string
1222
        day = string_to_date(date_str, '%Y.%m.%d')
1223
        author = soup.find('small', class_="author-credit-name").string
1224
        assert author.startswith('by ')
1225
        author = author[3:]
1226
        imgs = soup.find_all('img', id='main-comic')
1227
        return {
1228
            'num': num,
1229
            'author': author,
1230
            'month': day.month,
1231
            'year': day.year,
1232
            'day': day.day,
1233
            'prefix': '%d-' % num,
1234
            'img': [convert_iri_to_plain_ascii_uri(urljoin_wrapper(cls.url, i['src'])) for i in imgs]
1235
        }
1236
1237
1238
class MrLovenstein(GenericComic):
1239
    """Class to retrieve Mr Lovenstein comics."""
1240
    # Also on https://tapastic.com/series/MrLovenstein
1241
    name = 'mrlovenstein'
1242
    long_name = 'Mr. Lovenstein'
1243
    url = 'http://www.mrlovenstein.com'
1244
1245
    @classmethod
1246
    def get_next_comic(cls, last_comic):
1247
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1248
        # TODO: more info from http://www.mrlovenstein.com/archive
1249
        comic_num_re = re.compile('^/comic/([0-9]*)$')
1250
        nums = [int(comic_num_re.match(link['href']).groups()[0])
1251
                for link in get_soup_at_url(cls.url).find_all('a', href=comic_num_re)]
1252
        first, last = min(nums), max(nums)
1253
        if last_comic:
1254
            first = last_comic['num'] + 1
1255
        for num in range(first, last + 1):
1256
            url = urljoin_wrapper(cls.url, '/comic/%d' % num)
1257
            soup = get_soup_at_url(url)
1258
            imgs = list(
1259
                reversed(soup.find_all('img', src=re.compile('^/images/comics/'))))
1260
            description = soup.find('meta', attrs={'name': 'description'})['content']
1261
            yield {
1262
                'url': url,
1263
                'num': num,
1264
                'texts': '  '.join(t for t in (i.get('title') for i in imgs) if t),
1265
                'img': [urljoin_wrapper(url, i['src']) for i in imgs],
1266
                'description': description,
1267
            }
1268
1269
1270
class DinosaurComics(GenericListableComic):
1271
    """Class to retrieve Dinosaur Comics comics."""
1272
    name = 'dinosaur'
1273
    long_name = 'Dinosaur Comics'
1274
    url = 'http://www.qwantz.com'
1275
    get_url_from_archive_element = get_href
1276
    comic_link_re = re.compile('^%s/index.php\\?comic=([0-9]*)$' % url)
1277
1278
    @classmethod
1279
    def get_archive_elements(cls):
1280
        archive_url = urljoin_wrapper(cls.url, 'archive.php')
1281
        # first link is random -> skip it
1282
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re)[1:])
1283
1284
    @classmethod
1285
    def get_comic_info(cls, soup, link):
1286
        """Get information about a particular comics."""
1287
        url = cls.get_url_from_archive_element(link)
1288
        num = int(cls.comic_link_re.match(url).groups()[0])
1289
        date_str = link.string
1290
        text = link.next_sibling.string
1291
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1292
        comic_img_re = re.compile('^%s/comics/' % cls.url)
1293
        img = soup.find('img', src=comic_img_re)
1294
        return {
1295
            'month': day.month,
1296
            'year': day.year,
1297
            'day': day.day,
1298
            'img': [img.get('src')],
1299
            'title': img.get('title'),
1300
            'text': text,
1301
            'num': num,
1302
        }
1303
1304
1305
class ButterSafe(GenericListableComic):
1306
    """Class to retrieve Butter Safe comics."""
1307
    name = 'butter'
1308
    long_name = 'ButterSafe'
1309
    url = 'http://buttersafe.com'
1310
    get_url_from_archive_element = get_href
1311
    comic_link_re = re.compile('^%s/([0-9]*)/([0-9]*)/([0-9]*)/.*' % url)
1312
1313
    @classmethod
1314
    def get_archive_elements(cls):
1315
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1316
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re))
1317
1318
    @classmethod
1319
    def get_comic_info(cls, soup, link):
1320
        """Get information about a particular comics."""
1321
        url = cls.get_url_from_archive_element(link)
1322
        title = link.string
1323
        year, month, day = [int(s) for s in cls.comic_link_re.match(url).groups()]
1324
        img = soup.find('div', id='comic').find('img')
1325
        assert img['alt'] == title
1326
        return {
1327 View Code Duplication
            'title': title,
1328
            'day': day,
1329
            'month': month,
1330
            'year': year,
1331
            'img': [img['src']],
1332
        }
1333
1334
1335
class CalvinAndHobbes(GenericComic):
1336
    """Class to retrieve Calvin and Hobbes comics."""
1337
    # Also on http://www.gocomics.com/calvinandhobbes/
1338
    name = 'calvin'
1339
    long_name = 'Calvin and Hobbes'
1340
    # This is not through any official webpage but eh...
1341
    url = 'http://marcel-oehler.marcellosendos.ch/comics/ch/'
1342
1343
    @classmethod
1344
    def get_next_comic(cls, last_comic):
1345
        """Generator to get the next comic. Implementation of GenericComic's abstract method."""
1346
        last_date = get_date_for_comic(
1347
            last_comic) if last_comic else date(1985, 11, 1)
1348
        link_re = re.compile('^([0-9]*)/([0-9]*)/')
1349
        img_re = re.compile('')
1350
        for link in get_soup_at_url(cls.url).find_all('a', href=link_re):
1351
            url = link['href']
1352
            year, month = link_re.match(url).groups()
1353
            if date(int(year), int(month), 1) + timedelta(days=31) >= last_date:
1354
                img_re = re.compile('^%s%s([0-9]*)' % (year, month))
1355
                month_url = urljoin_wrapper(cls.url, url)
1356
                for img in get_soup_at_url(month_url).find_all('img', src=img_re):
1357
                    img_src = img['src']
1358
                    day = int(img_re.match(img_src).groups()[0])
1359
                    comic_date = date(int(year), int(month), day)
1360
                    if comic_date > last_date:
1361
                        yield {
1362
                            'url': month_url,
1363
                            'year': int(year),
1364
                            'month': int(month),
1365
                            'day': int(day),
1366
                            'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)],
1367
                        }
1368
                        last_date = comic_date
1369
1370
1371
class AbstruseGoose(GenericListableComic):
1372
    """Class to retrieve AbstruseGoose Comics."""
1373
    name = 'abstruse'
1374
    long_name = 'Abstruse Goose'
1375
    url = 'http://abstrusegoose.com'
1376
    get_url_from_archive_element = get_href
1377
    comic_url_re = re.compile('^%s/([0-9]*)$' % url)
1378
    comic_img_re = re.compile('^%s/strips/.*' % url)
1379
1380
    @classmethod
1381
    def get_archive_elements(cls):
1382
        archive_url = urljoin_wrapper(cls.url, 'archive')
1383
        return get_soup_at_url(archive_url).find_all('a', href=cls.comic_url_re)
1384
1385
    @classmethod
1386
    def get_comic_info(cls, soup, archive_elt):
1387
        comic_url = cls.get_url_from_archive_element(archive_elt)
1388
        num = int(cls.comic_url_re.match(comic_url).groups()[0])
1389
        return {
1390
            'num': num,
1391
            'title': archive_elt.string,
1392
            'img': [soup.find('img', src=cls.comic_img_re)['src']]
1393 View Code Duplication
        }
1394
1395
1396
class PhDComics(GenericNavigableComic):
1397
    """Class to retrieve PHD Comics."""
1398
    name = 'phd'
1399
    long_name = 'PhD Comics'
1400
    url = 'http://phdcomics.com/comics/archive.php'
1401
    get_url_from_link = join_cls_url_to_href
1402
1403
    @classmethod
1404
    def get_first_comic_link(cls):
1405
        """Get link to first comics."""
1406
        return get_soup_at_url(cls.url).find('img', src='images/first_button.gif').parent
1407
1408
    @classmethod
1409
    def get_navi_link(cls, last_soup, next_):
1410
        """Get link to next or previous comic."""
1411
        img = last_soup.find('img', src='images/next_button.gif' if next_ else 'images/prev_button.gif')
1412
        return None if img is None else img.parent
1413
1414
    @classmethod
1415
    def get_comic_info(cls, soup, link):
1416
        """Get information about a particular comics."""
1417
        date_str = soup.find('font', face='Arial,Helvetica,Geneva,Swiss,SunSans-Regular', color='white').string.strip()
1418
        try:
1419
            day = string_to_date(date_str, '%m/%d/%Y')
1420
        except ValueError:
1421
            print("Invalid date %s" % date_str)
1422
            day = date.today()
1423
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
1424
        return {
1425
            'year': day.year,
1426
            'month': day.month,
1427
            'day': day.day,
1428
            'img': [soup.find('img', id='comic')['src']],
1429
            'title': title,
1430
        }
1431
1432
1433 View Code Duplication
class Octopuns(GenericNavigableComic):
1434
    """Class to retrieve Octopuns comics."""
1435
    # Also on http://octopuns.tumblr.com
1436
    name = 'octopuns'
1437
    long_name = 'Octopuns'
1438
    url = 'http://www.octopuns.net'
1439
1440
    @classmethod
1441
    def get_first_comic_link(cls):
1442
        """Get link to first comics."""
1443
        return get_soup_at_url(cls.url).find('img', src=re.compile('.*/First.png')).parent
1444
1445
    @classmethod
1446
    def get_navi_link(cls, last_soup, next_):
1447
        """Get link to next or previous comic."""
1448
        link = last_soup.find('img', src=re.compile('.*/Next.png' if next_ else '.*/Back.png')).parent
1449
        return None if link.get('href') is None else link
1450
1451
    @classmethod
1452
    def get_comic_info(cls, soup, link):
1453
        """Get information about a particular comics."""
1454
        title = soup.find('h3', class_='post-title entry-title').string
1455
        date_str = soup.find('h2', class_='date-header').string
1456
        day = string_to_date(date_str, "%A, %B %d, %Y")
1457
        imgs = soup.find_all('link', rel='image_src')
1458
        return {
1459
            'img': [i['href'] for i in imgs],
1460
            'title': title,
1461
            'day': day.day,
1462
            'month': day.month,
1463
            'year': day.year,
1464
        }
1465
1466
1467
class Quarktees(GenericNavigableComic):
1468
    """Class to retrieve the Quarktees comics."""
1469
    name = 'quarktees'
1470
    long_name = 'Quarktees'
1471
    url = 'http://www.quarktees.com/blogs/news'
1472
    get_url_from_link = join_cls_url_to_href
1473
    get_first_comic_link = simulate_first_link
1474
    first_url = 'http://www.quarktees.com/blogs/news/12486621-coming-soon'
1475
1476
    @classmethod
1477
    def get_navi_link(cls, last_soup, next_):
1478
        """Get link to next or previous comic."""
1479
        return last_soup.find('a', id='article-next' if next_ else 'article-prev')
1480
1481
    @classmethod
1482
    def get_comic_info(cls, soup, link):
1483
        """Get information about a particular comics."""
1484
        title = soup.find('meta', property='og:title')['content']
1485
        article = soup.find('div', class_='single-article')
1486
        imgs = article.find_all('img')
1487
        return {
1488
            'title': title,
1489
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1490
        }
1491
1492
1493
class OverCompensating(GenericNavigableComic):
1494
    """Class to retrieve the Over Compensating comics."""
1495
    name = 'compensating'
1496
    long_name = 'Over Compensating'
1497
    url = 'http://www.overcompensating.com'
1498
    get_url_from_link = join_cls_url_to_href
1499
1500
    @classmethod
1501
    def get_first_comic_link(cls):
1502
        """Get link to first comics."""
1503
        return get_soup_at_url(cls.url).find('a', href=re.compile('comic=1$'))
1504
1505
    @classmethod
1506
    def get_navi_link(cls, last_soup, next_):
1507
        """Get link to next or previous comic."""
1508
        return last_soup.find('a', title='next comic' if next_ else 'go back already')
1509
1510
    @classmethod
1511
    def get_comic_info(cls, soup, link):
1512
        """Get information about a particular comics."""
1513
        img_src_re = re.compile('^/oc/comics/.*')
1514
        comic_num_re = re.compile('.*comic=([0-9]*)$')
1515
        comic_url = cls.get_url_from_link(link)
1516
        num = int(comic_num_re.match(comic_url).groups()[0])
1517
        img = soup.find('img', src=img_src_re)
1518
        return {
1519
            'num': num,
1520
            'img': [urljoin_wrapper(comic_url, img['src'])],
1521
            'title': img.get('title')
1522
        }
1523
1524
1525
class Oglaf(GenericNavigableComic):
1526
    """Class to retrieve Oglaf comics."""
1527
    name = 'oglaf'
1528
    long_name = 'Oglaf [NSFW]'
1529
    url = 'http://oglaf.com'
1530
    get_url_from_link = join_cls_url_to_href
1531
1532
    @classmethod
1533
    def get_first_comic_link(cls):
1534
        """Get link to first comics."""
1535
        return get_soup_at_url(cls.url).find("div", id="st").parent
1536
1537
    @classmethod
1538
    def get_navi_link(cls, last_soup, next_):
1539
        """Get link to next or previous comic."""
1540
        div = last_soup.find("div", id="nx" if next_ else "pvs")
1541
        return div.parent if div else None
1542
1543
    @classmethod
1544
    def get_comic_info(cls, soup, link):
1545
        """Get information about a particular comics."""
1546
        title = soup.find('title').string
1547
        title_imgs = soup.find('div', id='tt').find_all('img')
1548
        assert len(title_imgs) == 1
1549
        strip_imgs = soup.find_all('img', id='strip')
1550
        assert len(strip_imgs) == 1
1551
        imgs = title_imgs + strip_imgs
1552
        desc = ' '.join(i['title'] for i in imgs)
1553
        return {
1554
            'title': title,
1555
            'img': [i['src'] for i in imgs],
1556
            'description': desc,
1557
        }
1558
1559
1560
class ScandinaviaAndTheWorld(GenericNavigableComic):
1561
    """Class to retrieve Scandinavia And The World comics."""
1562
    name = 'satw'
1563
    long_name = 'Scandinavia And The World'
1564
    url = 'http://satwcomic.com'
1565
    get_first_comic_link = simulate_first_link
1566
    first_url = 'http://satwcomic.com/sweden-denmark-and-norway'
1567
1568
    @classmethod
1569
    def get_navi_link(cls, last_soup, next_):
1570
        """Get link to next or previous comic."""
1571
        return last_soup.find('a', accesskey='n' if next_ else 'p')
1572
1573
    @classmethod
1574
    def get_comic_info(cls, soup, link):
1575
        """Get information about a particular comics."""
1576
        title = soup.find('meta', attrs={'name': 'twitter:label1'})['content']
1577
        desc = soup.find('meta', property='og:description')['content']
1578
        imgs = soup.find_all('img', itemprop="image")
1579
        return {
1580
            'title': title,
1581
            'description': desc,
1582
            'img': [i['src'] for i in imgs],
1583
        }
1584
1585
1586
class SomethingOfThatIlk(GenericEmptyComic):  # Does not exist anymore
1587
    """Class to retrieve the Something Of That Ilk comics."""
1588
    name = 'somethingofthatilk'
1589
    long_name = 'Something Of That Ilk'
1590
    url = 'http://www.somethingofthatilk.com'
1591
1592
1593
class InfiniteMonkeyBusiness(GenericNavigableComic):
1594
    """Generic class to retrieve InfiniteMonkeyBusiness comics."""
1595
    name = 'monkey'
1596
    long_name = 'Infinite Monkey Business'
1597
    url = 'http://infinitemonkeybusiness.net'
1598
    get_navi_link = get_a_navi_comicnavnext_navinext
1599
    get_first_comic_link = simulate_first_link
1600
    first_url = 'http://infinitemonkeybusiness.net/comic/pillory/'
1601
1602
    @classmethod
1603
    def get_comic_info(cls, soup, link):
1604
        """Get information about a particular comics."""
1605
        title = soup.find('meta', property='og:title')['content']
1606
        imgs = soup.find('div', id='comic').find_all('img')
1607
        return {
1608
            'title': title,
1609
            'img': [i['src'] for i in imgs],
1610
        }
1611
1612
1613
class Wondermark(GenericListableComic):
1614
    """Class to retrieve the Wondermark comics."""
1615
    name = 'wondermark'
1616
    long_name = 'Wondermark'
1617
    url = 'http://wondermark.com'
1618
    get_url_from_archive_element = get_href
1619
1620
    @classmethod
1621
    def get_archive_elements(cls):
1622
        archive_url = urljoin_wrapper(cls.url, 'archive/')
1623
        return reversed(get_soup_at_url(archive_url).find_all('a', rel='bookmark'))
1624
1625
    @classmethod
1626
    def get_comic_info(cls, soup, link):
1627
        """Get information about a particular comics."""
1628
        date_str = soup.find('div', class_='postdate').find('em').string
1629
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
1630
        div = soup.find('div', id='comic')
1631
        if div:
1632
            img = div.find('img')
1633
            img_src = [img['src']]
1634
            alt = img['alt']
1635
            assert alt == img['title']
1636
            title = soup.find('meta', property='og:title')['content']
1637
        else:
1638
            img_src = []
1639
            alt = ''
1640
            title = ''
1641
        return {
1642
            'month': day.month,
1643
            'year': day.year,
1644
            'day': day.day,
1645
            'img': img_src,
1646
            'title': title,
1647
            'alt': alt,
1648
            'tags': ' '.join(t.string for t in soup.find('div', class_='postmeta').find_all('a', rel='tag')),
1649
        }
1650 View Code Duplication
1651
1652
class WarehouseComic(GenericNavigableComic):
1653
    """Class to retrieve Warehouse Comic comics."""
1654
    name = 'warehouse'
1655
    long_name = 'Warehouse Comic'
1656
    url = 'http://warehousecomic.com'
1657
    get_first_comic_link = get_a_navi_navifirst
1658
    get_navi_link = get_link_rel_next
1659
1660
    @classmethod
1661
    def get_comic_info(cls, soup, link):
1662
        """Get information about a particular comics."""
1663
        title = soup.find('h2', class_='post-title').string
1664
        date_str = soup.find('span', class_='post-date').string
1665
        day = string_to_date(date_str, "%B %d, %Y")
1666
        imgs = soup.find('div', id='comic').find_all('img')
1667
        return {
1668
            'img': [i['src'] for i in imgs],
1669
            'title': title,
1670
            'day': day.day,
1671
            'month': day.month,
1672
            'year': day.year,
1673
        }
1674
1675
1676
class JustSayEh(GenericNavigableComic):
1677
    """Class to retrieve Just Say Eh comics."""
1678
    # Also on http//tapastic.com/series/Just-Say-Eh
1679
    name = 'justsayeh'
1680
    long_name = 'Just Say Eh'
1681
    url = 'http://www.justsayeh.com'
1682
    get_first_comic_link = get_a_navi_navifirst
1683
    get_navi_link = get_a_navi_comicnavnext_navinext
1684
1685
    @classmethod
1686
    def get_comic_info(cls, soup, link):
1687
        """Get information about a particular comics."""
1688
        title = soup.find('h2', class_='post-title').string
1689
        imgs = soup.find("div", id="comic").find_all("img")
1690
        assert all(i['alt'] == i['title'] for i in imgs)
1691
        alt = imgs[0]['alt']
1692
        return {
1693
            'img': [i['src'] for i in imgs],
1694
            'title': title,
1695
            'alt': alt,
1696
        }
1697
1698
1699
class MouseBearComedy(GenericNavigableComic):
1700
    """Class to retrieve Mouse Bear Comedy comics."""
1701
    # Also on http://mousebearcomedy.tumblr.com
1702
    name = 'mousebear'
1703
    long_name = 'Mouse Bear Comedy'
1704
    url = 'http://www.mousebearcomedy.com'
1705
    get_first_comic_link = get_a_navi_navifirst
1706
    get_navi_link = get_a_navi_comicnavnext_navinext
1707
1708
    @classmethod
1709
    def get_comic_info(cls, soup, link):
1710
        """Get information about a particular comics."""
1711
        title = soup.find('h2', class_='post-title').string
1712
        author = soup.find("span", class_="post-author").find("a").string
1713
        date_str = soup.find("span", class_="post-date").string
1714
        day = string_to_date(date_str, '%B %d, %Y')
1715
        imgs = soup.find("div", id="comic").find_all("img")
1716
        assert all(i['alt'] == i['title'] == title for i in imgs)
1717
        return {
1718
            'day': day.day,
1719
            'month': day.month,
1720
            'year': day.year,
1721
            'img': [i['src'] for i in imgs],
1722
            'title': title,
1723
            'author': author,
1724
        }
1725 View Code Duplication
1726
1727
class BigFootJustice(GenericNavigableComic):
1728
    """Class to retrieve Big Foot Justice comics."""
1729
    # Also on http://tapastic.com/series/bigfoot-justice
1730
    name = 'bigfoot'
1731
    long_name = 'Big Foot Justice'
1732
    url = 'http://bigfootjustice.com'
1733
    get_first_comic_link = get_a_navi_navifirst
1734
    get_navi_link = get_a_navi_comicnavnext_navinext
1735
1736
    @classmethod
1737
    def get_comic_info(cls, soup, link):
1738
        """Get information about a particular comics."""
1739
        imgs = soup.find('div', id='comic').find_all('img')
1740
        assert all(i['title'] == i['alt'] for i in imgs)
1741
        title = ' '.join(i['title'] for i in imgs)
1742
        return {
1743
            'img': [i['src'] for i in imgs],
1744
            'title': title,
1745
        }
1746
1747
1748
class RespawnComic(GenericNavigableComic):
1749
    """Class to retrieve Respawn Comic."""
1750
    # Also on http://respawncomic.tumblr.com
1751
    name = 'respawn'
1752
    long_name = 'Respawn Comic'
1753
    url = 'http://respawncomic.com '
1754
    get_navi_link = get_a_rel_next
1755
    get_first_comic_link = simulate_first_link
1756
    first_url = 'http://respawncomic.com/comic/c0001/'
1757
1758 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
1759
    def get_comic_info(cls, soup, link):
1760
        """Get information about a particular comics."""
1761
        title = soup.find('meta', property='og:title')['content']
1762
        author = soup.find('meta', attrs={'name': 'shareaholic:article_author_name'})['content']
1763
        date_str = soup.find('meta', attrs={'name': 'shareaholic:article_published_time'})['content']
1764
        date_str = date_str[:10]
1765
        day = string_to_date(date_str, "%Y-%m-%d")
1766
        imgs = soup.find_all('meta', property='og:image')
1767
        skip_imgs = {
1768
            'http://respawncomic.com/wp-content/uploads/2016/03/site/HAROLD2.png',
1769
            'http://respawncomic.com/wp-content/uploads/2016/03/site/DEVA.png'
1770
        }
1771
        return {
1772
            'title': title,
1773
            'author': author,
1774
            'day': day.day,
1775
            'month': day.month,
1776
            'year': day.year,
1777
            'img': [i['content'] for i in imgs if i['content'] not in skip_imgs],
1778
        }
1779
1780
1781 View Code Duplication
class SafelyEndangered(GenericNavigableComic):
1782
    """Class to retrieve Safely Endangered comics."""
1783
    # Also on http://tumblr.safelyendangered.com
1784
    name = 'endangered'
1785
    long_name = 'Safely Endangered'
1786
    url = 'http://www.safelyendangered.com'
1787
    get_navi_link = get_link_rel_next
1788
    get_first_comic_link = simulate_first_link
1789
    first_url = 'http://www.safelyendangered.com/comic/ignored/'
1790
1791
    @classmethod
1792
    def get_comic_info(cls, soup, link):
1793
        """Get information about a particular comics."""
1794
        title = soup.find('h2', class_='post-title').string
1795
        date_str = soup.find('span', class_='post-date').string
1796
        day = string_to_date(date_str, '%B %d, %Y')
1797
        imgs = soup.find('div', id='comic').find_all('img')
1798
        alt = imgs[0]['alt']
1799
        assert all(i['alt'] == i['title'] for i in imgs)
1800
        return {
1801
            'day': day.day,
1802
            'month': day.month,
1803
            'year': day.year,
1804
            'img': [i['src'] for i in imgs],
1805
            'title': title,
1806
            'alt': alt,
1807
        }
1808
1809
1810 View Code Duplication
class PicturesInBoxes(GenericNavigableComic):
1811
    """Class to retrieve Pictures In Boxes comics."""
1812
    # Also on http://picturesinboxescomic.tumblr.com
1813
    name = 'picturesinboxes'
1814
    long_name = 'Pictures in Boxes'
1815
    url = 'http://www.picturesinboxes.com'
1816
    get_navi_link = get_a_navi_navinext
1817
    get_first_comic_link = simulate_first_link
1818
    first_url = 'http://www.picturesinboxes.com/2013/10/26/tetris/'
1819
1820
    @classmethod
1821
    def get_comic_info(cls, soup, link):
1822
        """Get information about a particular comics."""
1823
        title = soup.find('h2', class_='post-title').string
1824
        author = soup.find("span", class_="post-author").find("a").string
1825
        date_str = soup.find('span', class_='post-date').string
1826
        day = string_to_date(date_str, '%B %d, %Y')
1827
        imgs = soup.find('div', class_='comicpane').find_all('img')
1828
        assert imgs
1829
        assert all(i['title'] == i['alt'] == title for i in imgs)
1830
        return {
1831
            'day': day.day,
1832
            'month': day.month,
1833
            'year': day.year,
1834
            'img': [i['src'] for i in imgs],
1835
            'title': title,
1836
            'author': author,
1837
        }
1838
1839
1840
class Penmen(GenericEmptyComic):
1841
    """Class to retrieve Penmen comics."""
1842
    name = 'penmen'
1843
    long_name = 'Penmen'
1844
    url = 'http://penmen.com'
1845
1846
1847
class TheDoghouseDiaries(GenericNavigableComic):
1848
    """Class to retrieve The Dog House Diaries comics."""
1849
    name = 'doghouse'
1850
    long_name = 'The Dog House Diaries'
1851
    url = 'http://thedoghousediaries.com'
1852
1853
    @classmethod
1854
    def get_first_comic_link(cls):
1855
        """Get link to first comics."""
1856
        return get_soup_at_url(cls.url).find('a', id='firstlink')
1857
1858
    @classmethod
1859
    def get_navi_link(cls, last_soup, next_):
1860
        """Get link to next or previous comic."""
1861
        return last_soup.find('a', id='nextlink' if next_ else 'previouslink')
1862
1863
    @classmethod
1864
    def get_comic_info(cls, soup, link):
1865
        """Get information about a particular comics."""
1866
        comic_img_re = re.compile('^dhdcomics/.*')
1867
        img = soup.find('img', src=comic_img_re)
1868
        comic_url = cls.get_url_from_link(link)
1869
        return {
1870
            'title': soup.find('h2', id='titleheader').string,
1871
            'title2': soup.find('div', id='subtext').string,
1872
            'alt': img.get('title'),
1873
            'img': [urljoin_wrapper(comic_url, img['src'].strip())],
1874
            'num': int(comic_url.split('/')[-1]),
1875
        }
1876
1877
1878
class InvisibleBread(GenericListableComic):
1879
    """Class to retrieve Invisible Bread comics."""
1880
    # Also on http://www.gocomics.com/invisible-bread
1881
    name = 'invisiblebread'
1882
    long_name = 'Invisible Bread'
1883
    url = 'http://invisiblebread.com'
1884
1885
    @classmethod
1886
    def get_archive_elements(cls):
1887
        archive_url = urljoin_wrapper(cls.url, 'archives/')
1888
        return reversed(get_soup_at_url(archive_url).find_all('td', class_='archive-title'))
1889
1890
    @classmethod
1891
    def get_url_from_archive_element(cls, td):
1892
        return td.find('a')['href']
1893
1894
    @classmethod
1895
    def get_comic_info(cls, soup, td):
1896
        """Get information about a particular comics."""
1897
        url = cls.get_url_from_archive_element(td)
1898
        title = td.find('a').string
1899
        month_and_day = td.previous_sibling.string
1900
        link_re = re.compile('^%s/([0-9]+)/' % cls.url)
1901
        year = link_re.match(url).groups()[0]
1902
        date_str = month_and_day + ' ' + year
1903
        day = string_to_date(date_str, '%b %d %Y')
1904
        imgs = [soup.find('div', id='comic').find('img')]
1905
        assert len(imgs) == 1
1906
        assert all(i['title'] == i['alt'] == title for i in imgs)
1907
        return {
1908
            'month': day.month,
1909
            'year': day.year,
1910
            'day': day.day,
1911
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
1912
            'title': title,
1913
        }
1914
1915
1916
class DiscoBleach(GenericEmptyComic):  # Does not work anymore
1917
    """Class to retrieve Disco Bleach Comics."""
1918
    name = 'discobleach'
1919
    long_name = 'Disco Bleach'
1920
    url = 'http://discobleach.com'
1921
1922
1923
class TubeyToons(GenericEmptyComic):  # Does not work anymore
1924
    """Class to retrieve TubeyToons comics."""
1925
    # Also on http://tapastic.com/series/Tubey-Toons
1926
    # Also on http://tubeytoons.tumblr.com
1927
    name = 'tubeytoons'
1928
    long_name = 'Tubey Toons'
1929
    url = 'http://tubeytoons.com'
1930
1931
1932 View Code Duplication
class CompletelySeriousComics(GenericNavigableComic):
1933
    """Class to retrieve Completely Serious comics."""
1934
    name = 'completelyserious'
1935
    long_name = 'Completely Serious Comics'
1936
    url = 'http://completelyseriouscomics.com'
1937
    get_first_comic_link = get_a_navi_navifirst
1938
    get_navi_link = get_a_navi_navinext
1939
1940
    @classmethod
1941
    def get_comic_info(cls, soup, link):
1942
        """Get information about a particular comics."""
1943
        title = soup.find('h2', class_='post-title').string
1944
        author = soup.find('span', class_='post-author').contents[1].string
1945
        date_str = soup.find('span', class_='post-date').string
1946
        day = string_to_date(date_str, '%B %d, %Y')
1947
        imgs = soup.find('div', class_='comicpane').find_all('img')
1948
        assert imgs
1949
        alt = imgs[0]['title']
1950
        assert all(i['title'] == i['alt'] == alt for i in imgs)
1951
        return {
1952
            'month': day.month,
1953
            'year': day.year,
1954
            'day': day.day,
1955
            'img': [i['src'] for i in imgs],
1956
            'title': title,
1957
            'alt': alt,
1958
            'author': author,
1959
        }
1960
1961
1962
class PoorlyDrawnLines(GenericListableComic):
1963
    """Class to retrieve Poorly Drawn Lines comics."""
1964
    # Also on http://pdlcomics.tumblr.com
1965
    name = 'poorlydrawn'
1966
    long_name = 'Poorly Drawn Lines'
1967
    url = 'http://poorlydrawnlines.com'
1968
    get_url_from_archive_element = get_href
1969
1970
    @classmethod
1971
    def get_comic_info(cls, soup, link):
1972
        """Get information about a particular comics."""
1973
        imgs = soup.find('div', class_='post').find_all('img')
1974
        assert len(imgs) <= 1
1975
        return {
1976
            'img': [i['src'] for i in imgs],
1977
            'title': imgs[0].get('title', "") if imgs else "",
1978
        }
1979
1980
    @classmethod
1981
    def get_archive_elements(cls):
1982
        archive_url = urljoin_wrapper(cls.url, 'archive')
1983
        url_re = re.compile('^%s/comic/.' % cls.url)
1984
        return reversed(get_soup_at_url(archive_url).find_all('a', href=url_re))
1985
1986
1987 View Code Duplication
class LoadingComics(GenericNavigableComic):
1988
    """Class to retrieve Loading Artist comics."""
1989
    name = 'loadingartist'
1990
    long_name = 'Loading Artist'
1991
    url = 'http://www.loadingartist.com/latest'
1992
1993
    @classmethod
1994
    def get_first_comic_link(cls):
1995
        """Get link to first comics."""
1996
        return get_soup_at_url(cls.url).find('a', title="First")
1997
1998
    @classmethod
1999
    def get_navi_link(cls, last_soup, next_):
2000
        """Get link to next or previous comic."""
2001
        return last_soup.find('a', title='Next' if next_ else 'Previous')
2002
2003
    @classmethod
2004
    def get_comic_info(cls, soup, link):
2005
        """Get information about a particular comics."""
2006
        title = soup.find('h1').string
2007
        date_str = soup.find('span', class_='date').string.strip()
2008
        day = string_to_date(date_str, "%B %d, %Y")
2009
        imgs = soup.find('div', class_='comic').find_all('img', alt='', title='')
2010
        return {
2011
            'title': title,
2012
            'img': [i['src'] for i in imgs],
2013
            'month': day.month,
2014
            'year': day.year,
2015
            'day': day.day,
2016
        }
2017
2018
2019 View Code Duplication
class ChuckleADuck(GenericNavigableComic):
2020
    """Class to retrieve Chuckle-A-Duck comics."""
2021
    name = 'chuckleaduck'
2022
    long_name = 'Chuckle-A-duck'
2023
    url = 'http://chuckleaduck.com'
2024
    get_first_comic_link = get_div_navfirst_a
2025
    get_navi_link = get_link_rel_next
2026
2027
    @classmethod
2028
    def get_comic_info(cls, soup, link):
2029
        """Get information about a particular comics."""
2030
        date_str = soup.find('span', class_='post-date').string
2031
        day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y")
2032
        author = soup.find('span', class_='post-author').string
2033
        div = soup.find('div', id='comic')
2034
        imgs = div.find_all('img') if div else []
2035
        title = imgs[0]['title'] if imgs else ""
2036
        assert all(i['title'] == i['alt'] == title for i in imgs)
2037
        return {
2038
            'month': day.month,
2039
            'year': day.year,
2040
            'day': day.day,
2041
            'img': [i['src'] for i in imgs],
2042
            'title': title,
2043
            'author': author,
2044
        }
2045
2046
2047
class DepressedAlien(GenericNavigableComic):
2048
    """Class to retrieve Depressed Alien Comics."""
2049
    name = 'depressedalien'
2050
    long_name = 'Depressed Alien'
2051
    url = 'http://depressedalien.com'
2052
    get_url_from_link = join_cls_url_to_href
2053
2054
    @classmethod
2055
    def get_first_comic_link(cls):
2056
        """Get link to first comics."""
2057
        return get_soup_at_url(cls.url).find('img', attrs={'name': 'beginArrow'}).parent
2058
2059
    @classmethod
2060
    def get_navi_link(cls, last_soup, next_):
2061
        """Get link to next or previous comic."""
2062
        return last_soup.find('img', attrs={'name': 'rightArrow' if next_ else 'leftArrow'}).parent
2063
2064
    @classmethod
2065
    def get_comic_info(cls, soup, link):
2066
        """Get information about a particular comics."""
2067
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2068
        imgs = soup.find_all('meta', property='og:image')
2069
        return {
2070
            'title': title,
2071
            'img': [i['content'] for i in imgs],
2072
        }
2073
2074
2075
class ThingsInSquares(GenericListableComic):
2076
    """Class to retrieve Things In Squares comics."""
2077
    # This can be retrieved in other languages
2078
    # Also on https://tapastic.com/series/Things-in-Squares
2079
    name = 'squares'
2080
    long_name = 'Things in squares'
2081
    url = 'http://www.thingsinsquares.com'
2082
2083
    @classmethod
2084
    def get_comic_info(cls, soup, tr):
2085
        """Get information about a particular comics."""
2086
        _, td2, td3 = tr.find_all('td')
2087
        a = td2.find('a')
2088
        date_str = td3.string
2089
        day = string_to_date(date_str, "%m.%d.%y")
2090
        title = a.string
2091
        title2 = soup.find('meta', property='og:title')['content']
2092
        desc = soup.find('meta', property='og:description')
2093
        description = desc['content'] if desc else ''
2094
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2095
        imgs = soup.find('div', class_='entry-content').find_all('img')
2096
        return {
2097
            'day': day.day,
2098
            'month': day.month,
2099
            'year': day.year,
2100
            'title': title,
2101
            'title2': title2,
2102
            'description': description,
2103
            'tags': tags,
2104
            'img': [i['src'] for i in imgs],
2105
            'alt': ' '.join(i['alt'] for i in imgs),
2106
        }
2107
2108
    @classmethod
2109
    def get_url_from_archive_element(cls, tr):
2110
        _, td2, td3 = tr.find_all('td')
2111
        return td2.find('a')['href']
2112
2113
    @classmethod
2114
    def get_archive_elements(cls):
2115
        archive_url = urljoin_wrapper(cls.url, 'archive-2')
2116
        return reversed(get_soup_at_url(archive_url).find('tbody').find_all('tr'))
2117
2118
2119 View Code Duplication
class HappleTea(GenericNavigableComic):
2120
    """Class to retrieve Happle Tea Comics."""
2121
    name = 'happletea'
2122
    long_name = 'Happle Tea'
2123
    url = 'http://www.happletea.com'
2124
    get_first_comic_link = get_a_navi_navifirst
2125
    get_navi_link = get_link_rel_next
2126
2127
    @classmethod
2128
    def get_comic_info(cls, soup, link):
2129
        """Get information about a particular comics."""
2130
        imgs = soup.find('div', id='comic').find_all('img')
2131
        post = soup.find('div', class_='post-content')
2132
        title = post.find('h2', class_='post-title').string
2133
        author = post.find('a', rel='author').string
2134
        date_str = post.find('span', class_='post-date').string
2135
        day = string_to_date(date_str, "%B %d, %Y")
2136
        assert all(i['alt'] == i['title'] for i in imgs)
2137
        return {
2138
            'title': title,
2139
            'img': [i['src'] for i in imgs],
2140
            'alt': ''.join(i['alt'] for i in imgs),
2141
            'month': day.month,
2142
            'year': day.year,
2143
            'day': day.day,
2144
            'author': author,
2145
        }
2146
2147
2148
class FatAwesomeComics(GenericNavigableComic):
2149
    """Class to retrieve Fat Awesome Comics."""
2150
    # Also on http://fatawesomecomedy.tumblr.com
2151
    name = 'fatawesome'
2152
    long_name = 'Fat Awesome'
2153
    url = 'http://fatawesome.com/comics'
2154
    get_navi_link = get_a_rel_next
2155
    get_first_comic_link = simulate_first_link
2156
    first_url = 'http://fatawesome.com/shortbus/'
2157
2158
    @classmethod
2159
    def get_comic_info(cls, soup, link):
2160
        """Get information about a particular comics."""
2161
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
2162
        description = soup.find('meta', attrs={'name': 'description'})['content']
2163
        tags_prop = soup.find('meta', property='article:tag')
2164
        tags = tags_prop['content'] if tags_prop else ""
2165
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2166
        day = string_to_date(date_str, "%Y-%m-%d")
2167
        imgs = soup.find_all('img', attrs={'data-recalc-dims': "1"})
2168
        assert len(imgs) == 1
2169
        return {
2170
            'title': title,
2171
            'description': description,
2172
            'tags': tags,
2173
            'alt': "".join(i['alt'] for i in imgs),
2174
            'img': [i['src'].rsplit('?', 1)[0] for i in imgs],
2175
            'month': day.month,
2176
            'year': day.year,
2177
            'day': day.day,
2178
        }
2179
2180
2181
class AnythingComic(GenericListableComic):
2182
    """Class to retrieve Anything Comics."""
2183
    # Also on http://tapastic.com/series/anything
2184
    name = 'anythingcomic'
2185
    long_name = 'Anything Comic'
2186
    url = 'http://www.anythingcomic.com'
2187
2188
    @classmethod
2189
    def get_archive_elements(cls):
2190
        archive_url = urljoin_wrapper(cls.url, 'archive/')
2191
        # The first 2 <tr>'s do not correspond to comics
2192
        return get_soup_at_url(archive_url).find('table', id='chapter_table').find_all('tr')[2:]
2193
2194
    @classmethod
2195
    def get_url_from_archive_element(cls, tr):
2196
        """Get url corresponding to an archive element."""
2197
        td_num, td_comic, td_date, _ = tr.find_all('td')
2198
        link = td_comic.find('a')
2199
        return urljoin_wrapper(cls.url, link['href'])
2200
2201
    @classmethod
2202
    def get_comic_info(cls, soup, tr):
2203
        """Get information about a particular comics."""
2204
        td_num, td_comic, td_date, _ = tr.find_all('td')
2205
        num = int(td_num.string)
2206
        link = td_comic.find('a')
2207
        title = link.string
2208
        imgs = soup.find_all('img', id='comic_image')
2209
        day = string_to_date(td_date.string, '%d %b %Y %I:%M %p')
2210
        assert len(imgs) == 1
2211
        assert all(i.get('alt') == i.get('title') for i in imgs)
2212
        return {
2213
            'num': num,
2214
            'title': title,
2215
            'alt': imgs[0].get('alt', ''),
2216
            'img': [i['src'] for i in imgs],
2217
            'month': day.month,
2218
            'year': day.year,
2219
            'day': day.day,
2220
        }
2221
2222
2223
class LonnieMillsap(GenericNavigableComic):
2224
    """Class to retrieve Lonnie Millsap's comics."""
2225
    name = 'millsap'
2226
    long_name = 'Lonnie Millsap'
2227
    url = 'http://www.lonniemillsap.com'
2228
    get_navi_link = get_link_rel_next
2229
    get_first_comic_link = simulate_first_link
2230
    first_url = 'http://www.lonniemillsap.com/?p=42'
2231
2232
    @classmethod
2233
    def get_comic_info(cls, soup, link):
2234
        """Get information about a particular comics."""
2235
        title = soup.find('h2', class_='post-title').string
2236
        post = soup.find('div', class_='post-content')
2237
        author = post.find("span", class_="post-author").find("a").string
2238
        date_str = post.find("span", class_="post-date").string
2239
        day = string_to_date(date_str, "%B %d, %Y")
2240
        imgs = post.find("div", class_="entry").find_all("img")
2241
        return {
2242
            'title': title,
2243
            'author': author,
2244
            'img': [i['src'] for i in imgs],
2245
            'month': day.month,
2246
            'year': day.year,
2247
            'day': day.day,
2248
        }
2249
2250
2251 View Code Duplication
class LinsEditions(GenericNavigableComic):
2252
    """Class to retrieve L.I.N.S. Editions comics."""
2253
    # Also on http://linscomics.tumblr.com
2254
    name = 'lins'
2255
    long_name = 'L.I.N.S. Editions'
2256
    url = 'https://linsedition.com'
2257
    get_navi_link = get_link_rel_next
2258
    get_first_comic_link = simulate_first_link
2259
    first_url = 'https://linsedition.com/2011/09/07/l-i-n-s/'
2260
2261
    @classmethod
2262
    def get_comic_info(cls, soup, link):
2263
        """Get information about a particular comics."""
2264
        title = soup.find('meta', property='og:title')['content']
2265
        imgs = soup.find_all('meta', property='og:image')
2266
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2267
        day = string_to_date(date_str, "%Y-%m-%d")
2268
        return {
2269
            'title': title,
2270
            'img': [i['content'] for i in imgs],
2271
            'month': day.month,
2272
            'year': day.year,
2273
            'day': day.day,
2274
        }
2275
2276
2277
class ThorsThundershack(GenericNavigableComic):
2278
    """Class to retrieve Thor's Thundershack comics."""
2279
    # Also on http://tapastic.com/series/Thors-Thundershac
2280
    name = 'thor'
2281
    long_name = 'Thor\'s Thundershack'
2282
    url = 'http://www.thorsthundershack.com'
2283
    get_url_from_link = join_cls_url_to_href
2284
2285
    @classmethod
2286
    def get_first_comic_link(cls):
2287
        """Get link to first comics."""
2288
        return get_soup_at_url(cls.url).find('a', class_='first navlink')
2289
2290
    @classmethod
2291
    def get_navi_link(cls, last_soup, next_):
2292
        """Get link to next or previous comic."""
2293
        for link in last_soup.find_all('a', rel='next' if next_ else 'prev'):
2294
            if link['href'] != '/comic':
2295
                return link
2296
        return None
2297
2298
    @classmethod
2299
    def get_comic_info(cls, soup, link):
2300
        """Get information about a particular comics."""
2301
        title = soup.find('meta', attrs={'name': 'description'})["content"]
2302
        description = soup.find('div', itemprop='articleBody').text
2303
        author = soup.find('span', itemprop='author copyrightHolder').string
2304
        imgs = soup.find_all('img', itemprop='image')
2305
        assert all(i['title'] == i['alt'] for i in imgs)
2306
        alt = imgs[0]['alt'] if imgs else ""
2307
        date_str = soup.find('time', itemprop='datePublished')["datetime"]
2308
        day = string_to_date(date_str, "%Y-%m-%d %H:%M:%S")
2309
        return {
2310
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
2311
            'month': day.month,
2312
            'year': day.year,
2313
            'day': day.day,
2314
            'author': author,
2315
            'title': title,
2316
            'alt': alt,
2317
            'description': description,
2318
        }
2319
2320
2321 View Code Duplication
class GerbilWithAJetpack(GenericNavigableComic):
2322
    """Class to retrieve GerbilWithAJetpack comics."""
2323
    name = 'gerbil'
2324
    long_name = 'Gerbil With A Jetpack'
2325
    url = 'http://gerbilwithajetpack.com'
2326
    get_first_comic_link = get_a_navi_navifirst
2327
    get_navi_link = get_a_rel_next
2328
2329
    @classmethod
2330
    def get_comic_info(cls, soup, link):
2331
        """Get information about a particular comics."""
2332
        title = soup.find('h2', class_='post-title').string
2333
        author = soup.find("span", class_="post-author").find("a").string
2334
        date_str = soup.find("span", class_="post-date").string
2335
        day = string_to_date(date_str, "%B %d, %Y")
2336
        imgs = soup.find("div", id="comic").find_all("img")
2337
        alt = imgs[0]['alt']
2338
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2339
        return {
2340
            'img': [i['src'] for i in imgs],
2341
            'title': title,
2342
            'alt': alt,
2343
            'author': author,
2344
            'day': day.day,
2345
            'month': day.month,
2346
            'year': day.year
2347
        }
2348
2349
2350
class EveryDayBlues(GenericNavigableComic):
2351
    """Class to retrieve EveryDayBlues Comics."""
2352
    name = "blues"
2353
    long_name = "Every Day Blues"
2354
    url = "http://everydayblues.net"
2355
    get_first_comic_link = get_a_navi_navifirst
2356
    get_navi_link = get_link_rel_next
2357
2358
    @classmethod
2359
    def get_comic_info(cls, soup, link):
2360
        """Get information about a particular comics."""
2361
        title = soup.find("h2", class_="post-title").string
2362
        author = soup.find("span", class_="post-author").find("a").string
2363
        date_str = soup.find("span", class_="post-date").string
2364
        day = string_to_date(date_str, "%d. %B %Y", "de_DE.utf8")
2365
        imgs = soup.find("div", id="comic").find_all("img")
2366
        assert all(i['alt'] == i['title'] == title for i in imgs)
2367
        assert len(imgs) <= 1
2368
        return {
2369
            'img': [i['src'] for i in imgs],
2370
            'title': title,
2371
            'author': author,
2372
            'day': day.day,
2373
            'month': day.month,
2374
            'year': day.year
2375
        }
2376
2377
2378 View Code Duplication
class BiterComics(GenericNavigableComic):
2379
    """Class to retrieve Biter Comics."""
2380
    name = "biter"
2381
    long_name = "Biter Comics"
2382
    url = "http://www.bitercomics.com"
2383
    get_first_comic_link = get_a_navi_navifirst
2384
    get_navi_link = get_link_rel_next
2385
2386
    @classmethod
2387
    def get_comic_info(cls, soup, link):
2388
        """Get information about a particular comics."""
2389
        title = soup.find("h1", class_="entry-title").string
2390
        author = soup.find("span", class_="author vcard").find("a").string
2391
        date_str = soup.find("span", class_="entry-date").string
2392
        day = string_to_date(date_str, "%B %d, %Y")
2393
        imgs = soup.find("div", id="comic").find_all("img")
2394
        assert all(i['alt'] == i['title'] for i in imgs)
2395
        assert len(imgs) == 1
2396
        alt = imgs[0]['alt']
2397
        return {
2398
            'img': [i['src'] for i in imgs],
2399
            'title': title,
2400
            'alt': alt,
2401
            'author': author,
2402
            'day': day.day,
2403
            'month': day.month,
2404
            'year': day.year
2405
        }
2406
2407
2408
class TheAwkwardYeti(GenericNavigableComic):
2409
    """Class to retrieve The Awkward Yeti comics."""
2410
    # Also on http://www.gocomics.com/the-awkward-yeti
2411
    # Also on http://larstheyeti.tumblr.com
2412
    # Also on https://tapastic.com/series/TheAwkwardYeti
2413
    name = 'yeti'
2414
    long_name = 'The Awkward Yeti'
2415
    url = 'http://theawkwardyeti.com'
2416
    get_first_comic_link = get_a_navi_navifirst
2417
    get_navi_link = get_link_rel_next
2418
2419
    @classmethod
2420
    def get_comic_info(cls, soup, link):
2421
        """Get information about a particular comics."""
2422
        title = soup.find('h2', class_='post-title').string
2423
        date_str = soup.find("span", class_="post-date").string
2424
        day = string_to_date(date_str, "%B %d, %Y")
2425
        imgs = soup.find("div", id="comic").find_all("img")
2426
        assert all(idx > 0 or i['alt'] == i['title'] for idx, i in enumerate(imgs))
2427
        return {
2428
            'img': [i['src'] for i in imgs],
2429
            'title': title,
2430
            'day': day.day,
2431
            'month': day.month,
2432
            'year': day.year
2433
        }
2434
2435
2436
class PleasantThoughts(GenericNavigableComic):
2437
    """Class to retrieve Pleasant Thoughts comics."""
2438
    name = 'pleasant'
2439
    long_name = 'Pleasant Thoughts'
2440
    url = 'http://pleasant-thoughts.com'
2441
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2442
    get_navi_link = get_link_rel_next
2443
2444
    @classmethod
2445
    def get_comic_info(cls, soup, link):
2446
        """Get information about a particular comics."""
2447
        post = soup.find('div', class_='post-content')
2448
        title = post.find('h2', class_='post-title').string
2449
        imgs = post.find("div", class_="entry").find_all("img")
2450
        return {
2451
            'title': title,
2452
            'img': [i['src'] for i in imgs],
2453
        }
2454
2455
2456
class MisterAndMe(GenericNavigableComic):
2457
    """Class to retrieve Mister & Me Comics."""
2458
    # Also on http://www.gocomics.com/mister-and-me
2459
    # Also on https://tapastic.com/series/Mister-and-Me
2460
    name = 'mister'
2461
    long_name = 'Mister & Me'
2462
    url = 'http://www.mister-and-me.com'
2463
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2464
    get_navi_link = get_link_rel_next
2465
2466
    @classmethod
2467
    def get_comic_info(cls, soup, link):
2468
        """Get information about a particular comics."""
2469
        title = soup.find('h2', class_='post-title').string
2470
        author = soup.find("span", class_="post-author").find("a").string
2471
        date_str = soup.find("span", class_="post-date").string
2472
        day = string_to_date(date_str, "%B %d, %Y")
2473
        imgs = soup.find("div", id="comic").find_all("img")
2474
        assert all(i['alt'] == i['title'] for i in imgs)
2475
        assert len(imgs) <= 1
2476
        alt = imgs[0]['alt'] if imgs else ""
2477
        return {
2478
            'img': [i['src'] for i in imgs],
2479
            'title': title,
2480
            'alt': alt,
2481
            'author': author,
2482
            'day': day.day,
2483
            'month': day.month,
2484
            'year': day.year
2485
        }
2486
2487
2488 View Code Duplication
class LastPlaceComics(GenericNavigableComic):
2489
    """Class to retrieve Last Place Comics."""
2490
    name = 'lastplace'
2491
    long_name = 'Last Place Comics'
2492
    url = "http://lastplacecomics.com"
2493
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2494
    get_navi_link = get_link_rel_next
2495
2496
    @classmethod
2497
    def get_comic_info(cls, soup, link):
2498
        """Get information about a particular comics."""
2499
        title = soup.find('h2', class_='post-title').string
2500
        author = soup.find("span", class_="post-author").find("a").string
2501
        date_str = soup.find("span", class_="post-date").string
2502
        day = string_to_date(date_str, "%B %d, %Y")
2503
        imgs = soup.find("div", id="comic").find_all("img")
2504
        assert all(i['alt'] == i['title'] for i in imgs)
2505
        assert len(imgs) <= 1
2506
        alt = imgs[0]['alt'] if imgs else ""
2507
        return {
2508
            'img': [i['src'] for i in imgs],
2509
            'title': title,
2510
            'alt': alt,
2511
            'author': author,
2512
            'day': day.day,
2513
            'month': day.month,
2514
            'year': day.year
2515
        }
2516
2517
2518 View Code Duplication
class TalesOfAbsurdity(GenericNavigableComic):
2519
    """Class to retrieve Tales Of Absurdity comics."""
2520
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
2521
    # Also on http://talesofabsurdity.tumblr.com
2522
    name = 'absurdity'
2523
    long_name = 'Tales of Absurdity'
2524
    url = 'http://talesofabsurdity.com'
2525
    get_first_comic_link = get_a_navi_navifirst
2526
    get_navi_link = get_a_navi_comicnavnext_navinext
2527
2528
    @classmethod
2529
    def get_comic_info(cls, soup, link):
2530
        """Get information about a particular comics."""
2531
        title = soup.find('h2', class_='post-title').string
2532
        author = soup.find("span", class_="post-author").find("a").string
2533
        date_str = soup.find("span", class_="post-date").string
2534
        day = string_to_date(date_str, "%B %d, %Y")
2535
        imgs = soup.find("div", id="comic").find_all("img")
2536
        assert all(i['alt'] == i['title'] for i in imgs)
2537
        alt = imgs[0]['alt'] if imgs else ""
2538
        return {
2539
            'img': [i['src'] for i in imgs],
2540
            'title': title,
2541
            'alt': alt,
2542
            'author': author,
2543
            'day': day.day,
2544
            'month': day.month,
2545
            'year': day.year
2546
        }
2547
2548
2549
class EndlessOrigami(GenericNavigableComic):
2550
    """Class to retrieve Endless Origami Comics."""
2551
    name = "origami"
2552
    long_name = "Endless Origami"
2553
    url = "http://endlessorigami.com"
2554
    get_first_comic_link = get_a_navi_navifirst
2555
    get_navi_link = get_link_rel_next
2556
2557
    @classmethod
2558
    def get_comic_info(cls, soup, link):
2559
        """Get information about a particular comics."""
2560
        title = soup.find('h2', class_='post-title').string
2561
        author = soup.find("span", class_="post-author").find("a").string
2562
        date_str = soup.find("span", class_="post-date").string
2563
        day = string_to_date(date_str, "%B %d, %Y")
2564
        imgs = soup.find("div", id="comic").find_all("img")
2565
        assert all(i['alt'] == i['title'] for i in imgs)
2566
        alt = imgs[0]['alt'] if imgs else ""
2567
        return {
2568
            'img': [i['src'] for i in imgs],
2569
            'title': title,
2570
            'alt': alt,
2571
            'author': author,
2572
            'day': day.day,
2573
            'month': day.month,
2574
            'year': day.year
2575
        }
2576
2577
2578
class PlanC(GenericNavigableComic):
2579
    """Class to retrieve Plan C comics."""
2580
    name = 'planc'
2581
    long_name = 'Plan C'
2582
    url = 'http://www.plancomic.com'
2583
    get_first_comic_link = get_a_navi_navifirst
2584
    get_navi_link = get_a_navi_comicnavnext_navinext
2585
2586
    @classmethod
2587
    def get_comic_info(cls, soup, link):
2588
        """Get information about a particular comics."""
2589
        title = soup.find('h2', class_='post-title').string
2590
        date_str = soup.find("span", class_="post-date").string
2591
        day = string_to_date(date_str, "%B %d, %Y")
2592
        imgs = soup.find('div', id='comic').find_all('img')
2593
        return {
2594
            'title': title,
2595
            'img': [i['src'] for i in imgs],
2596
            'month': day.month,
2597
            'year': day.year,
2598
            'day': day.day,
2599
        }
2600 View Code Duplication
2601
2602
class BuniComic(GenericNavigableComic):
2603
    """Class to retrieve Buni Comics."""
2604
    name = 'buni'
2605
    long_name = 'BuniComics'
2606
    url = 'http://www.bunicomic.com'
2607
    get_first_comic_link = get_a_comicnavbase_comicnavfirst
2608
    get_navi_link = get_link_rel_next
2609
2610
    @classmethod
2611
    def get_comic_info(cls, soup, link):
2612
        """Get information about a particular comics."""
2613
        imgs = soup.find('div', id='comic').find_all('img')
2614
        assert all(i['alt'] == i['title'] for i in imgs)
2615
        assert len(imgs) == 1
2616
        return {
2617
            'img': [i['src'] for i in imgs],
2618
            'title': imgs[0]['title'],
2619
        }
2620
2621
2622
class GenericCommitStrip(GenericNavigableComic):
2623
    """Generic class to retrieve Commit Strips in different languages."""
2624
    get_navi_link = get_a_rel_next
2625
    get_first_comic_link = simulate_first_link
2626
    first_url = NotImplemented
2627
2628 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2629
    def get_comic_info(cls, soup, link):
2630
        """Get information about a particular comics."""
2631
        desc = soup.find('meta', property='og:description')['content']
2632
        title = soup.find('meta', property='og:title')['content']
2633
        imgs = soup.find('div', class_='entry-content').find_all('img')
2634
        title2 = ' '.join(i.get('title', '') for i in imgs)
2635
        return {
2636
            'title': title,
2637
            'title2': title2,
2638
            'description': desc,
2639
            'img': [urljoin_wrapper(cls.url, convert_iri_to_plain_ascii_uri(i['src'])) for i in imgs],
2640
        }
2641
2642
2643
class CommitStripFr(GenericCommitStrip):
2644
    """Class to retrieve Commit Strips in French."""
2645
    name = 'commit_fr'
2646
    long_name = 'Commit Strip (Fr)'
2647
    url = 'http://www.commitstrip.com/fr'
2648
    first_url = 'http://www.commitstrip.com/fr/2012/02/22/interview/'
2649
2650
2651
class CommitStripEn(GenericCommitStrip):
2652
    """Class to retrieve Commit Strips in English."""
2653
    name = 'commit_en'
2654
    long_name = 'Commit Strip (En)'
2655
    url = 'http://www.commitstrip.com/en'
2656
    first_url = 'http://www.commitstrip.com/en/2012/02/22/interview/'
2657
2658
2659 View Code Duplication
class GenericBoumerie(GenericNavigableComic):
2660
    """Generic class to retrieve Boumeries comics in different languages."""
2661
    get_first_comic_link = get_a_navi_navifirst
2662
    get_navi_link = get_link_rel_next
2663
    date_format = NotImplemented
2664
    lang = NotImplemented
2665
2666
    @classmethod
2667
    def get_comic_info(cls, soup, link):
2668
        """Get information about a particular comics."""
2669
        title = soup.find('h2', class_='post-title').string
2670
        short_url = soup.find('link', rel='shortlink')['href']
2671
        author = soup.find("span", class_="post-author").find("a").string
2672
        date_str = soup.find('span', class_='post-date').string
2673
        day = string_to_date(date_str, cls.date_format, cls.lang)
2674
        imgs = soup.find('div', id='comic').find_all('img')
2675
        assert all(i['alt'] == i['title'] for i in imgs)
2676
        return {
2677
            'short_url': short_url,
2678
            'img': [i['src'] for i in imgs],
2679
            'title': title,
2680
            'author': author,
2681
            'month': day.month,
2682
            'year': day.year,
2683
            'day': day.day,
2684
        }
2685
2686
2687
class BoumerieEn(GenericBoumerie):
2688
    """Class to retrieve Boumeries comics in English."""
2689
    name = 'boumeries_en'
2690
    long_name = 'Boumeries (En)'
2691
    url = 'http://comics.boumerie.com'
2692
    date_format = "%B %d, %Y"
2693
    lang = 'en_GB.UTF-8'
2694
2695
2696
class BoumerieFr(GenericBoumerie):
2697
    """Class to retrieve Boumeries comics in French."""
2698
    name = 'boumeries_fr'
2699
    long_name = 'Boumeries (Fr)'
2700
    url = 'http://bd.boumerie.com'
2701
    date_format = "%A, %d %B %Y"
2702
    lang = "fr_FR.utf8"
2703
2704
2705 View Code Duplication
class UnearthedComics(GenericNavigableComic):
2706
    """Class to retrieve Unearthed comics."""
2707
    # Also on http://tapastic.com/series/UnearthedComics
2708
    # Also on http://unearthedcomics.tumblr.com
2709
    name = 'unearthed'
2710
    long_name = 'Unearthed Comics'
2711
    url = 'http://unearthedcomics.com'
2712
    get_navi_link = get_link_rel_next
2713
    get_first_comic_link = simulate_first_link
2714
    first_url = 'http://unearthedcomics.com/comics/world-with-turn-signals/'
2715
2716
    @classmethod
2717
    def get_comic_info(cls, soup, link):
2718
        """Get information about a particular comics."""
2719
        short_url = soup.find('link', rel='shortlink')['href']
2720
        title_elt = soup.find('h1') or soup.find('h2')
2721
        title = title_elt.string if title_elt else ""
2722
        desc = soup.find('meta', property='og:description')
2723
        date_str = soup.find('time', class_='published updated hidden')['datetime']
2724
        day = string_to_date(date_str, "%Y-%m-%d")
2725
        post = soup.find('div', class_="entry content entry-content type-portfolio")
2726
        imgs = post.find_all('img')
2727
        return {
2728
            'title': title,
2729
            'description': desc,
2730
            'url2': short_url,
2731
            'img': [i['src'] for i in imgs],
2732
            'month': day.month,
2733
            'year': day.year,
2734
            'day': day.day,
2735
        }
2736
2737
2738 View Code Duplication
class Optipess(GenericNavigableComic):
2739
    """Class to retrieve Optipess comics."""
2740
    name = 'optipess'
2741
    long_name = 'Optipess'
2742
    url = 'http://www.optipess.com'
2743
    get_first_comic_link = get_a_navi_navifirst
2744
    get_navi_link = get_link_rel_next
2745
2746
    @classmethod
2747
    def get_comic_info(cls, soup, link):
2748
        """Get information about a particular comics."""
2749
        title = soup.find('h2', class_='post-title').string
2750
        author = soup.find("span", class_="post-author").find("a").string
2751
        comic = soup.find('div', id='comic')
2752
        imgs = comic.find_all('img') if comic else []
2753
        alt = imgs[0]['title'] if imgs else ""
2754
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2755
        date_str = soup.find('span', class_='post-date').string
2756
        day = string_to_date(date_str, "%B %d, %Y")
2757
        return {
2758
            'title': title,
2759
            'alt': alt,
2760
            'author': author,
2761
            'img': [i['src'] for i in imgs],
2762
            'month': day.month,
2763
            'year': day.year,
2764
            'day': day.day,
2765
        }
2766
2767
2768
class PainTrainComic(GenericNavigableComic):
2769
    """Class to retrieve Pain Train Comics."""
2770
    name = 'paintrain'
2771
    long_name = 'Pain Train Comics'
2772
    url = 'http://paintraincomic.com'
2773
    get_first_comic_link = get_a_navi_navifirst
2774
    get_navi_link = get_link_rel_next
2775
2776
    @classmethod
2777
    def get_comic_info(cls, soup, link):
2778
        """Get information about a particular comics."""
2779
        title = soup.find('h2', class_='post-title').string
2780
        short_url = soup.find('link', rel='shortlink')['href']
2781
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2782
        num = int(short_url_re.match(short_url).groups()[0])
2783
        imgs = soup.find('div', id='comic').find_all('img')
2784
        alt = imgs[0]['title']
2785
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2786
        date_str = soup.find('span', class_='post-date').string
2787
        day = string_to_date(date_str, "%d/%m/%Y")
2788
        return {
2789
            'short_url': short_url,
2790
            'num': num,
2791
            'img': [i['src'] for i in imgs],
2792
            'month': day.month,
2793
            'year': day.year,
2794
            'day': day.day,
2795
            'alt': alt,
2796
            'title': title,
2797
        }
2798
2799
2800
class MoonBeard(GenericNavigableComic):
2801
    """Class to retrieve MoonBeard comics."""
2802
    # Also on http://blog.squiresjam.es/moonbeard
2803
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
2804
    name = 'moonbeard'
2805
    long_name = 'Moon Beard'
2806
    url = 'http://moonbeard.com'
2807
    get_first_comic_link = get_a_navi_navifirst
2808
    get_navi_link = get_a_navi_navinext
2809
2810
    @classmethod
2811
    def get_comic_info(cls, soup, link):
2812
        """Get information about a particular comics."""
2813
        title = soup.find('h2', class_='post-title').string
2814
        short_url = soup.find('link', rel='shortlink')['href']
2815
        short_url_re = re.compile('^%s/\\?p=([0-9]*)' % cls.url)
2816
        num = int(short_url_re.match(short_url).groups()[0])
2817
        imgs = soup.find('div', id='comic').find_all('img')
2818
        alt = imgs[0]['title']
2819
        assert all(i['alt'] == i['title'] == alt for i in imgs)
2820
        date_str = soup.find('span', class_='post-date').string
2821
        day = string_to_date(date_str, "%B %d, %Y")
2822
        tags = ' '.join(t['content'] for t in soup.find_all('meta', property='article:tag'))
2823
        author = soup.find('span', class_='post-author').string
2824
        return {
2825
            'short_url': short_url,
2826
            'num': num,
2827
            'img': [i['src'] for i in imgs],
2828
            'month': day.month,
2829
            'year': day.year,
2830
            'day': day.day,
2831
            'title': title,
2832
            'tags': tags,
2833
            'alt': alt,
2834
            'author': author,
2835
        }
2836
2837
2838
class AHamADay(GenericNavigableComic):
2839
    """Class to retrieve class A Ham A Day comics."""
2840
    name = 'ham'
2841
    long_name = 'A Ham A Day'
2842
    url = 'http://www.ahammaday.com'
2843
    get_url_from_link = join_cls_url_to_href
2844
    get_first_comic_link = simulate_first_link
2845
    first_url = 'http://www.ahammaday.com/today/3/6/french'
2846
2847
    @classmethod
2848
    def get_navi_link(cls, last_soup, next_):
2849
        """Get link to next or previous comic."""
2850
        # prev is next / next is prev
2851
        return last_soup.find('li', class_='previous' if next_ else 'next').find('a')
2852
2853
    @classmethod
2854
    def get_comic_info(cls, soup, link):
2855
        """Get information about a particular comics."""
2856
        date_str = soup.find('time', class_='published')['datetime']
2857
        day = string_to_date(date_str, "%Y-%m-%d")
2858
        author = soup.find('span', class_='blog-author').find('a').string
2859
        title = soup.find('meta', property='og:title')['content']
2860
        imgs = soup.find_all('meta', itemprop='image')
2861
        return {
2862
            'img': [i['content'] for i in imgs],
2863
            'title': title,
2864
            'author': author,
2865
            'day': day.day,
2866
            'month': day.month,
2867
            'year': day.year,
2868
        }
2869 View Code Duplication
2870
2871
class LittleLifeLines(GenericNavigableComic):
2872
    """Class to retrieve Little Life Lines comics."""
2873
    name = 'life'
2874
    long_name = 'Little Life Lines'
2875
    url = 'http://www.littlelifelines.com'
2876
    get_url_from_link = join_cls_url_to_href
2877
    get_first_comic_link = simulate_first_link
2878
    first_url = 'http://www.littlelifelines.com/comics/well-done'
2879
2880
    @classmethod
2881
    def get_navi_link(cls, last_soup, next_):
2882
        """Get link to next or previous comic."""
2883
        # prev is next / next is prev
2884
        li = last_soup.find('li', class_='prev' if next_ else 'next')
2885
        return li.find('a') if li else None
2886
2887
    @classmethod
2888
    def get_comic_info(cls, soup, link):
2889
        """Get information about a particular comics."""
2890
        title = soup.find('meta', property='og:title')['content']
2891
        desc = soup.find('meta', property='og:description')['content']
2892
        date_str = soup.find('time', class_='published')['datetime']
2893
        day = string_to_date(date_str, "%Y-%m-%d")
2894
        author = soup.find('a', rel='author').string
2895
        div_content = soup.find('div', class_="body entry-content")
2896
        imgs = div_content.find_all('img')
2897
        imgs = [i for i in imgs if i.get('src') is not None]
2898
        alt = imgs[0]['alt']
2899
        return {
2900
            'title': title,
2901
            'alt': alt,
2902
            'description': desc,
2903
            'author': author,
2904
            'day': day.day,
2905
            'month': day.month,
2906
            'year': day.year,
2907
            'img': [i['src'] for i in imgs],
2908
        }
2909
2910
2911
class GenericWordPressInkblot(GenericNavigableComic):
2912
    """Generic class to retrieve comics using WordPress with Inkblot."""
2913
    get_navi_link = get_link_rel_next
2914
2915
    @classmethod
2916
    def get_first_comic_link(cls):
2917
        """Get link to first comics."""
2918
        return get_soup_at_url(cls.url).find('a', class_='webcomic-link webcomic1-link first-webcomic-link first-webcomic1-link')
2919
2920 View Code Duplication
    @classmethod
0 ignored issues
show
This code seems to be duplicated in your project.
Loading history...
2921
    def get_comic_info(cls, soup, link):
2922
        """Get information about a particular comics."""
2923
        title = soup.find('meta', property='og:title')['content']
2924
        imgs = soup.find('div', class_='webcomic-image').find_all('img')
2925
        date_str = soup.find('meta', property='article:published_time')['content'][:10]
2926
        day = string_to_date(date_str, "%Y-%m-%d")
2927
        return {
2928
            'title': title,
2929
            'day': day.day,
2930
            'month': day.month,
2931
            'year': day.year,
2932
            'img': [i['src'] for i in imgs],
2933
        }
2934
2935
2936
class EverythingsStupid(GenericWordPressInkblot):
2937
    """Class to retrieve Everything's stupid Comics."""
2938
    # Also on http://tapastic.com/series/EverythingsStupid
2939
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
2940
    # Also on http://everythingsstupidcomics.tumblr.com
2941
    name = 'stupid'
2942
    long_name = "Everything's Stupid"
2943
    url = 'http://everythingsstupid.net'
2944
2945
2946
class TheIsmComics(GenericWordPressInkblot):
2947
    """Class to retrieve The Ism Comics."""
2948
    # Also on https://tapastic.com/series/TheIsm (?)
2949
    name = 'theism'
2950
    long_name = "The Ism"
2951
    url = 'http://www.theism-comics.com'
2952
2953
2954
class WoodenPlankStudios(GenericWordPressInkblot):
2955
    """Class to retrieve Wooden Plank Studios comics."""
2956
    name = 'woodenplank'
2957
    long_name = 'Wooden Plank Studios'
2958
    url = 'http://woodenplankstudios.com'
2959
2960
2961
class ElectricBunnyComic(GenericNavigableComic):
2962
    """Class to retrieve Electric Bunny Comics."""
2963
    # Also on http://electricbunnycomics.tumblr.com
2964
    name = 'bunny'
2965
    long_name = 'Electric Bunny Comic'
2966
    url = 'http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell'
2967
    get_url_from_link = join_cls_url_to_href
2968
2969
    @classmethod
2970
    def get_first_comic_link(cls):
2971
        """Get link to first comics."""
2972
        return get_soup_at_url(cls.url).find('img', alt='First').parent
2973
2974
    @classmethod
2975
    def get_navi_link(cls, last_soup, next_):
2976
        """Get link to next or previous comic."""
2977
        img = last_soup.find('img', alt='Next' if next_ else 'Back')
2978
        return img.parent if img else None
2979
2980
    @classmethod
2981
    def get_comic_info(cls, soup, link):
2982
        """Get information about a particular comics."""
2983
        title = soup.find('meta', property='og:title')['content']
2984
        imgs = soup.find_all('meta', property='og:image')
2985
        return {
2986
            'title': title,
2987
            'img': [i['content'] for i in imgs],
2988
        }
2989
2990
2991
class SheldonComics(GenericNavigableComic):
2992
    """Class to retrieve Sheldon comics."""
2993
    # Also on http://www.gocomics.com/sheldon
2994
    name = 'sheldon'
2995
    long_name = 'Sheldon Comics'
2996
    url = 'http://www.sheldoncomics.com'
2997
2998
    @classmethod
2999
    def get_first_comic_link(cls):
3000
        """Get link to first comics."""
3001
        return get_soup_at_url(cls.url).find("a", id="nav-first")
3002
3003
    @classmethod
3004
    def get_navi_link(cls, last_soup, next_):
3005
        """Get link to next or previous comic."""
3006
        for link in last_soup.find_all("a", id="nav-next" if next_ else "nav-prev"):
3007
            if link['href'] != 'http://www.sheldoncomics.com':
3008
                return link
3009
        return None
3010
3011
    @classmethod
3012
    def get_comic_info(cls, soup, link):
3013
        """Get information about a particular comics."""
3014
        imgs = soup.find("div", id="comic-foot").find_all("img")
3015
        assert all(i['alt'] == i['title'] for i in imgs)
3016
        assert len(imgs) == 1
3017
        title = imgs[0]['title']
3018
        return {
3019
            'title': title,
3020
            'img': [i['src'] for i in imgs],
3021
        }
3022
3023
3024
class CubeDrone(GenericNavigableComic):
3025
    """Class to retrieve Cube Drone comics."""
3026
    name = 'cubedrone'
3027
    long_name = 'Cube Drone'
3028
    url = 'http://cube-drone.com/comics'
3029
    get_url_from_link = join_cls_url_to_href
3030
3031
    @classmethod
3032
    def get_first_comic_link(cls):
3033
        """Get link to first comics."""
3034
        return get_soup_at_url(cls.url).find('span', class_='glyphicon glyphicon-backward').parent
3035
3036
    @classmethod
3037
    def get_navi_link(cls, last_soup, next_):
3038
        """Get link to next or previous comic."""
3039
        class_ = 'glyphicon glyphicon-chevron-' + ('right' if next_ else 'left')
3040
        return last_soup.find('span', class_=class_).parent
3041
3042
    @classmethod
3043
    def get_comic_info(cls, soup, link):
3044
        """Get information about a particular comics."""
3045
        title = soup.find('meta', attrs={'name': 'twitter:title'})['content']
3046
        url2 = soup.find('meta', attrs={'name': 'twitter:url'})['content']
3047
        # date_str = soup.find('h2', class_='comic_title').find('small').string
3048
        # day = string_to_date(date_str, "%B %d, %Y, %I:%M %p")
3049
        imgs = soup.find_all('img', class_='comic img-responsive')
3050
        title2 = imgs[0]['title']
3051
        alt = imgs[0]['alt']
3052
        return {
3053
            'url2': url2,
3054
            'title': title,
3055
            'title2': title2,
3056
            'alt': alt,
3057
            'img': [i['src'] for i in imgs],
3058
        }
3059
3060
3061
class MakeItStoopid(GenericNavigableComic):
3062
    """Class to retrieve Make It Stoopid Comics."""
3063
    name = 'stoopid'
3064
    long_name = 'Make it stoopid'
3065
    url = 'http://makeitstoopid.com/comic.php'
3066
3067
    @classmethod
3068
    def get_nav(cls, soup):
3069
        """Get the navigation elements from soup object."""
3070
        cnav = soup.find_all(class_='cnav')
3071
        nav1, nav2 = cnav[:5], cnav[5:]
3072
        assert nav1 == nav2
3073
        # begin, prev, archive, next_, end = nav1
3074
        return [None if i.get('href') is None else i for i in nav1]
3075
3076
    @classmethod
3077
    def get_first_comic_link(cls):
3078
        """Get link to first comics."""
3079
        return cls.get_nav(get_soup_at_url(cls.url))[0]
3080
3081
    @classmethod
3082
    def get_navi_link(cls, last_soup, next_):
3083
        """Get link to next or previous comic."""
3084
        return cls.get_nav(last_soup)[3 if next_ else 1]
3085
3086
    @classmethod
3087
    def get_comic_info(cls, soup, link):
3088
        """Get information about a particular comics."""
3089
        title = link['title']
3090
        imgs = soup.find_all('img', id='comicimg')
3091
        return {
3092
            'title': title,
3093
            'img': [i['src'] for i in imgs],
3094
        }
3095
3096
3097
class GeekAndPoke(GenericNavigableComic):
3098
    """Class to retrieve Geek And Poke comics."""
3099
    name = 'geek'
3100
    long_name = 'Geek And Poke'
3101
    url = 'http://geek-and-poke.com'
3102
    get_url_from_link = join_cls_url_to_href
3103
    get_first_comic_link = simulate_first_link
3104
    first_url = 'http://geek-and-poke.com/geekandpoke/2006/8/27/a-new-place-for-a-not-so-old-blog.html'
3105
3106
    @classmethod
3107
    def get_navi_link(cls, last_soup, next_):
3108
        """Get link to next or previous comic."""
3109
        return last_soup.find('a', class_='prev-item' if next_ else 'next-item')
3110
3111
    @classmethod
3112
    def get_comic_info(cls, soup, link):
3113
        """Get information about a particular comics."""
3114
        title = soup.find('meta', property='og:title')['content']
3115
        desc = soup.find('meta', property='og:description')['content']
3116
        date_str = soup.find('time', class_='published')['datetime']
3117
        day = string_to_date(date_str, "%Y-%m-%d")
3118
        author = soup.find('a', rel='author').string
3119
        div_content = (soup.find('div', class_="body entry-content") or
3120
                       soup.find('div', class_="special-content"))
3121
        imgs = div_content.find_all('img')
3122
        imgs = [i for i in imgs if i.get('src') is not None]
3123
        assert all('title' not in i or i['alt'] == i['title'] for i in imgs)
3124
        alt = imgs[0].get('alt', "") if imgs else []
3125
        return {
3126
            'title': title,
3127
            'alt': alt,
3128
            'description': desc,
3129
            'author': author,
3130
            'day': day.day,
3131
            'month': day.month,
3132
            'year': day.year,
3133
            'img': [urljoin_wrapper(cls.url, i['src']) for i in imgs],
3134
        }
3135
3136
3137
class GenericTumblrV1(GenericComic):
3138
    """Generic class to retrieve comics from Tumblr using the V1 API."""
3139
3140
    @classmethod
3141
    def get_next_comic(cls, last_comic):
3142
        """Generic implementation of get_next_comic for Tumblr comics."""
3143
        for p in cls.get_posts(last_comic):
3144
            comic = cls.get_comic_info(p)
3145
            if comic is not None:
3146
                yield comic
3147
3148
    @classmethod
3149
    def get_url_from_post(cls, post):
3150
        return post['url']
3151
3152
    @classmethod
3153
    def get_api_url(cls):
3154
        return urljoin_wrapper(cls.url, '/api/read/')
3155
3156
    @classmethod
3157
    def get_comic_info(cls, post):
3158
        """Get information about a particular comics."""
3159
        # print(post)
3160
        type_ = post['type']
3161
        if type_ != 'photo':
3162
            # print("Type is %s" % type_)
3163
            return None
3164
        tumblr_id = int(post['id'])
3165
        api_url = cls.get_api_url() + '?id=%d' % (tumblr_id)
3166
        day = datetime.datetime.fromtimestamp(int(post['unix-timestamp'])).date()
3167
        caption = post.find('photo-caption')
3168
        title = caption.string if caption else ""
3169
        tags = ' '.join(t.string for t in post.find_all('tag'))
3170
        # Photos may appear in 'photo' tags and/or straight in the post
3171
        photo_tags = post.find_all('photo')
3172
        if not photo_tags:
3173
            photo_tags = [post]
3174
        # Images are in multiple resolutions - taking the first one
3175
        imgs = [photo.find('photo-url') for photo in photo_tags]
3176
        return {
3177
            'url': cls.get_url_from_post(post),
3178
            'url2': post['url-with-slug'],
3179
            'day': day.day,
3180
            'month': day.month,
3181
            'year': day.year,
3182
            'title': title,
3183
            'tags': tags,
3184
            'img': [i.string for i in imgs],
3185
            'tumblr-id': tumblr_id,
3186
            'api_url': api_url,  # for debug purposes
3187
        }
3188
3189
    @classmethod
3190
    def get_posts(cls, last_comic, nb_post_per_call=10):
3191
        """Get posts using API. nb_post_per_call is max 50.
3192
3193
        Posts are retrieved from newer to older as per the tumblr v1 api
3194
        but are returned in chronological order."""
3195
        waiting_for_url = last_comic['url'] if last_comic else None
3196
        posts_acc = []
3197
        if last_comic is not None:
3198
            # Sometimes, tumblr posts are deleted. We previous post is deleted, we
3199
            # might end up spending a lot of time looking for something that
3200
            # doesn't exist. Failing early and clearly might be a better option.
3201
            last_api_url = last_comic['api_url']
3202
            try:
3203
                get_soup_at_url(last_api_url)
3204
            except urllib.error.HTTPError:
3205
                try:
3206
                    get_soup_at_url(cls.url)
3207
                except urllib.error.HTTPError:
3208
                    print("Did not find previous post nor main url %s" % cls.url)
3209
                else:
3210
                    print("Did not find previous post %s : it might have been deleted" % last_api_url)
3211
                return reversed(posts_acc)
3212
        api_url = cls.get_api_url()
3213
        posts = get_soup_at_url(api_url).find('posts')
3214
        start, total = int(posts['start']), int(posts['total'])
3215
        assert start == 0
3216
        for starting_num in range(0, total, nb_post_per_call):
3217
            api_url2 = api_url + '?start=%d&num=%d' % (starting_num, nb_post_per_call)
3218
            # print(api_url2)
3219
            posts2 = get_soup_at_url(api_url2).find('posts')
3220
            start2, total2 = int(posts2['start']), int(posts2['total'])
3221
            assert starting_num == start2, "%d != %d" % (starting_num, start2)
3222
            # This may happen and should be handled in the future
3223
            assert total == total2, "%d != %d" % (total, total2)
3224
            for p in posts2.find_all('post'):
3225
                if waiting_for_url and waiting_for_url == cls.get_url_from_post(p):
3226
                    return reversed(posts_acc)
3227
                posts_acc.append(p)
3228
        if waiting_for_url is None:
3229
            return reversed(posts_acc)
3230
        print("Did not find %s : there might be a problem" % waiting_for_url)
3231
        return []
3232
3233
3234
class IrwinCardozo(GenericTumblrV1):
3235
    """Class to retrieve Irwin Cardozo Comics."""
3236
    name = 'irwinc'
3237
    long_name = 'Irwin Cardozo'
3238
    url = 'http://irwincardozocomics.tumblr.com'
3239
3240
3241
class AccordingToDevin(GenericTumblrV1):
3242
    """Class to retrieve According To Devin comics."""
3243
    name = 'devin'
3244
    long_name = 'According To Devin'
3245
    url = 'http://accordingtodevin.tumblr.com'
3246
3247
3248
class ItsTheTieTumblr(GenericTumblrV1):
3249
    """Class to retrieve It's the tie comics."""
3250
    # Also on http://itsthetie.com
3251
    # Also on https://tapastic.com/series/itsthetie
3252
    name = 'tie-tumblr'
3253
    long_name = "It's the tie (from Tumblr)"
3254
    url = "http://itsthetie.tumblr.com"
3255
3256
3257
class OctopunsTumblr(GenericTumblrV1):
3258
    """Class to retrieve Octopuns comics."""
3259
    # Also on http://www.octopuns.net
3260
    name = 'octopuns-tumblr'
3261
    long_name = 'Octopuns (from Tumblr)'
3262
    url = 'http://octopuns.tumblr.com'
3263
3264
3265
class PicturesInBoxesTumblr(GenericTumblrV1):
3266
    """Class to retrieve Pictures In Boxes comics."""
3267
    # Also on http://www.picturesinboxes.com
3268
    name = 'picturesinboxes-tumblr'
3269
    long_name = 'Pictures in Boxes (from Tumblr)'
3270
    url = 'http://picturesinboxescomic.tumblr.com'
3271
3272
3273
class TubeyToonsTumblr(GenericTumblrV1):
3274
    """Class to retrieve TubeyToons comics."""
3275
    # Also on http://tapastic.com/series/Tubey-Toons
3276
    # Also on http://tubeytoons.com
3277
    name = 'tubeytoons-tumblr'
3278
    long_name = 'Tubey Toons (from Tumblr)'
3279
    url = 'http://tubeytoons.tumblr.com'
3280
3281
3282
class UnearthedComicsTumblr(GenericTumblrV1):
3283
    """Class to retrieve Unearthed comics."""
3284
    # Also on http://tapastic.com/series/UnearthedComics
3285
    # Also on http://unearthedcomics.com
3286
    name = 'unearthed-tumblr'
3287
    long_name = 'Unearthed Comics (from Tumblr)'
3288
    url = 'http://unearthedcomics.tumblr.com'
3289
3290
3291
class PieComic(GenericTumblrV1):
3292
    """Class to retrieve Pie Comic comics."""
3293
    name = 'pie'
3294
    long_name = 'Pie Comic'
3295
    url = "http://piecomic.tumblr.com"
3296
3297
3298
class MrEthanDiamond(GenericTumblrV1):
3299
    """Class to retrieve Mr Ethan Diamond comics."""
3300
    name = 'diamond'
3301
    long_name = 'Mr Ethan Diamond'
3302
    url = 'http://mrethandiamond.tumblr.com'
3303
3304
3305
class Flocci(GenericTumblrV1):
3306
    """Class to retrieve floccinaucinihilipilification comics."""
3307
    name = 'flocci'
3308
    long_name = 'floccinaucinihilipilification'
3309
    url = "http://floccinaucinihilipilificationa.tumblr.com"
3310
3311
3312
class UpAndOut(GenericTumblrV1):
3313
    """Class to retrieve Up & Out comics."""
3314
    # Also on http://tapastic.com/series/UP-and-OUT
3315
    name = 'upandout'
3316
    long_name = 'Up And Out (from Tumblr)'
3317
    url = 'http://upandoutcomic.tumblr.com'
3318
3319
3320
class Pundemonium(GenericTumblrV1):
3321
    """Class to retrieve Pundemonium comics."""
3322
    name = 'pundemonium'
3323
    long_name = 'Pundemonium'
3324
    url = 'http://monstika.tumblr.com'
3325
3326
3327
class PoorlyDrawnLinesTumblr(GenericEmptyComic, GenericTumblrV1):
3328
    """Class to retrieve Poorly Drawn Lines comics."""
3329
    # Also on http://poorlydrawnlines.com
3330
    name = 'poorlydrawn-tumblr'
3331
    long_name = 'Poorly Drawn Lines (from Tumblr)'
3332
    url = 'http://pdlcomics.tumblr.com'
3333
3334
3335
class PearShapedComics(GenericTumblrV1):
3336
    """Class to retrieve Pear Shaped Comics."""
3337
    name = 'pearshaped'
3338
    long_name = 'Pear-Shaped Comics'
3339
    url = 'http://pearshapedcomics.com'
3340
3341
3342
class PondScumComics(GenericTumblrV1):
3343
    """Class to retrieve Pond Scum Comics."""
3344
    name = 'pond'
3345
    long_name = 'Pond Scum'
3346
    url = 'http://pondscumcomic.tumblr.com'
3347
3348
3349
class MercworksTumblr(GenericTumblrV1):
3350
    """Class to retrieve Mercworks comics."""
3351
    # Also on http://mercworks.net
3352
    name = 'mercworks-tumblr'
3353
    long_name = 'Mercworks (from Tumblr)'
3354
    url = 'http://mercworks.tumblr.com'
3355
3356
3357
class OwlTurdTumblr(GenericEmptyComic, GenericTumblrV1):
3358
    """Class to retrieve Owl Turd comics."""
3359
    # Also on http://tapastic.com/series/Owl-Turd-Comix
3360
    name = 'owlturd-tumblr'
3361
    long_name = 'Owl Turd (from Tumblr)'
3362
    url = 'http://owlturd.com'
3363
3364
3365
class VectorBelly(GenericTumblrV1):
3366
    """Class to retrieve Vector Belly comics."""
3367
    # Also on http://vectorbelly.com
3368
    name = 'vector'
3369
    long_name = 'Vector Belly'
3370
    url = 'http://vectorbelly.tumblr.com'
3371
3372
3373
class GoneIntoRapture(GenericTumblrV1):
3374
    """Class to retrieve Gone Into Rapture comics."""
3375
    # Also on http://goneintorapture.tumblr.com
3376
    # Also on http://tapastic.com/series/Goneintorapture
3377
    name = 'rapture'
3378
    long_name = 'Gone Into Rapture'
3379
    url = 'http://www.goneintorapture.com'
3380
3381
3382
class TheOatmealTumblr(GenericTumblrV1):
3383
    """Class to retrieve The Oatmeal comics."""
3384
    # Also on http://theoatmeal.com
3385
    name = 'oatmeal-tumblr'
3386
    long_name = 'The Oatmeal (from Tumblr)'
3387
    url = 'http://oatmeal.tumblr.com'
3388
3389
3390
class HeckIfIKnowComicsTumblr(GenericTumblrV1):
3391
    """Class to retrieve Heck If I Know Comics."""
3392
    # Also on http://tapastic.com/series/Regular
3393
    name = 'heck-tumblr'
3394
    long_name = 'Heck if I Know comics (from Tumblr)'
3395
    url = 'http://heckifiknowcomics.com'
3396
3397
3398
class MyJetPack(GenericTumblrV1):
3399
    """Class to retrieve My Jet Pack comics."""
3400
    name = 'jetpack'
3401
    long_name = 'My Jet Pack'
3402
    url = 'http://myjetpack.tumblr.com'
3403
3404
3405
class CheerUpEmoKidTumblr(GenericTumblrV1):
3406
    """Class to retrieve CheerUpEmoKid comics."""
3407
    # Also on http://www.cheerupemokid.com
3408
    # Also on http://tapastic.com/series/CUEK
3409
    name = 'cuek-tumblr'
3410
    long_name = 'Cheer Up Emo Kid (from Tumblr)'
3411
    url = 'http://enzocomics.tumblr.com'
3412
3413
3414
class ForLackOfABetterComic(GenericEmptyComic, GenericTumblrV1):
3415
    """Class to retrieve For Lack Of A Better Comics."""
3416
    # Also on http://forlackofabettercomic.com
3417
    name = 'lack'
3418
    long_name = 'For Lack Of A Better Comic'
3419
    url = 'http://forlackofabettercomic.tumblr.com'
3420
3421
3422
class ZenPencilsTumblr(GenericTumblrV1):
3423
    """Class to retrieve ZenPencils comics."""
3424
    # Also on http://zenpencils.com
3425
    # Also on http://www.gocomics.com/zen-pencils
3426
    name = 'zenpencils-tumblr'
3427
    long_name = 'Zen Pencils (from Tumblr)'
3428
    url = 'http://zenpencils.tumblr.com'
3429
3430
3431
class ThreeWordPhraseTumblr(GenericTumblrV1):
3432
    """Class to retrieve Three Word Phrase comics."""
3433
    # Also on http://threewordphrase.com
3434
    name = 'threeword-tumblr'
3435
    long_name = 'Three Word Phrase (from Tumblr)'
3436
    url = 'http://www.threewordphrase.tumblr.com'
3437
3438
3439
class TimeTrabbleTumblr(GenericTumblrV1):
3440
    """Class to retrieve Time Trabble comics."""
3441
    # Also on http://timetrabble.com
3442
    name = 'timetrabble-tumblr'
3443
    long_name = 'Time Trabble (from Tumblr)'
3444
    url = 'http://timetrabble.tumblr.com'
3445
3446
3447
class SafelyEndangeredTumblr(GenericTumblrV1):
3448
    """Class to retrieve Safely Endangered comics."""
3449
    # Also on http://www.safelyendangered.com
3450
    name = 'endangered-tumblr'
3451
    long_name = 'Safely Endangered (from Tumblr)'
3452
    url = 'http://tumblr.safelyendangered.com'
3453
3454
3455
class MouseBearComedyTumblr(GenericTumblrV1):
3456
    """Class to retrieve Mouse Bear Comedy comics."""
3457
    # Also on http://www.mousebearcomedy.com
3458
    name = 'mousebear-tumblr'
3459
    long_name = 'Mouse Bear Comedy (from Tumblr)'
3460
    url = 'http://mousebearcomedy.tumblr.com'
3461
3462
3463
class BouletCorpTumblr(GenericTumblrV1):
3464
    """Class to retrieve BouletCorp comics."""
3465
    # Also on http://www.bouletcorp.com
3466
    name = 'boulet-tumblr'
3467
    long_name = 'Boulet Corp (from Tumblr)'
3468
    url = 'http://bouletcorp.tumblr.com'
3469
3470
3471
class TheAwkwardYetiTumblr(GenericEmptyComic, GenericTumblrV1):
3472
    """Class to retrieve The Awkward Yeti comics."""
3473
    # Also on http://www.gocomics.com/the-awkward-yeti
3474
    # Also on http://theawkwardyeti.com
3475
    # Also on https://tapastic.com/series/TheAwkwardYeti
3476
    name = 'yeti-tumblr'
3477
    long_name = 'The Awkward Yeti (from Tumblr)'
3478
    url = 'http://larstheyeti.tumblr.com'
3479
3480
3481
class NellucNhoj(GenericTumblrV1):
3482
    """Class to retrieve NellucNhoj comics."""
3483
    name = 'nhoj'
3484
    long_name = 'Nelluc Nhoj'
3485
    url = 'http://nellucnhoj.com'
3486
3487
3488
class DownTheUpwardSpiralTumblr(GenericTumblrV1):
3489
    """Class to retrieve Down The Upward Spiral comics."""
3490
    # Also on http://www.downtheupwardspiral.com
3491
    name = 'spiral-tumblr'
3492
    long_name = 'Down the Upward Spiral (from Tumblr)'
3493
    url = 'http://downtheupwardspiral.tumblr.com'
3494
3495
3496
class AsPerUsualTumblr(GenericTumblrV1):
3497
    """Class to retrieve As Per Usual comics."""
3498
    # Also on https://tapastic.com/series/AsPerUsual
3499
    name = 'usual-tumblr'
3500
    long_name = 'As Per Usual (from Tumblr)'
3501
    url = 'http://as-per-usual.tumblr.com'
3502
3503
3504
class OneOneOneOneComicTumblr(GenericTumblrV1):
3505
    """Class to retrieve 1111 Comics."""
3506
    # Also on http://www.1111comics.me
3507
    # Also on https://tapastic.com/series/1111-Comics
3508
    name = '1111-tumblr'
3509
    long_name = '1111 Comics (from Tumblr)'
3510
    url = 'http://comics1111.tumblr.com'
3511
3512
3513
class JhallComicsTumblr(GenericTumblrV1):
3514
    """Class to retrieve Jhall Comics."""
3515
    # Also on http://jhallcomics.com
3516
    name = 'jhall-tumblr'
3517
    long_name = 'Jhall Comics (from Tumblr)'
3518
    url = 'http://jhallcomics.tumblr.com'
3519
3520
3521
class BerkeleyMewsTumblr(GenericTumblrV1):
3522
    """Class to retrieve Berkeley Mews comics."""
3523
    # Also on http://www.gocomics.com/berkeley-mews
3524
    # Also on http://www.berkeleymews.com
3525
    name = 'berkeley-tumblr'
3526
    long_name = 'Berkeley Mews (from Tumblr)'
3527
    url = 'http://mews.tumblr.com'
3528
3529
3530
class JoanCornellaTumblr(GenericTumblrV1):
3531
    """Class to retrieve Joan Cornella comics."""
3532
    # Also on http://joancornella.net
3533
    name = 'cornella-tumblr'
3534
    long_name = 'Joan Cornella (from Tumblr)'
3535
    url = 'http://cornellajoan.tumblr.com'
3536
3537
3538
class RespawnComicTumblr(GenericTumblrV1):
3539
    """Class to retrieve Respawn Comic."""
3540
    # Also on http://respawncomic.com
3541
    name = 'respawn-tumblr'
3542
    long_name = 'Respawn Comic (from Tumblr)'
3543
    url = 'http://respawncomic.tumblr.com'
3544
3545
3546
class ChrisHallbeckTumblr(GenericEmptyComic, GenericTumblrV1):
3547
    """Class to retrieve Chris Hallbeck comics."""
3548
    # Also on https://tapastic.com/ChrisHallbeck
3549
    # Also on http://maximumble.com
3550
    # Also on http://minimumble.com
3551
    # Also on http://thebookofbiff.com
3552
    name = 'hallbeck-tumblr'
3553
    long_name = 'Chris Hallback (from Tumblr)'
3554
    url = 'http://chrishallbeck.tumblr.com'
3555
3556
3557
class ComicNuggets(GenericTumblrV1):
3558
    """Class to retrieve Comic Nuggets."""
3559
    name = 'nuggets'
3560
    long_name = 'Comic Nuggets'
3561
    url = 'http://comicnuggets.com'
3562
3563
3564
class PigeonGazetteTumblr(GenericTumblrV1):
3565
    """Class to retrieve The Pigeon Gazette comics."""
3566
    # Also on https://tapastic.com/series/The-Pigeon-Gazette
3567
    name = 'pigeon-tumblr'
3568
    long_name = 'The Pigeon Gazette (from Tumblr)'
3569
    url = 'http://thepigeongazette.tumblr.com'
3570
3571
3572
class CancerOwl(GenericTumblrV1):
3573
    """Class to retrieve Cancer Owl comics."""
3574
    # Also on http://cancerowl.com
3575
    name = 'cancerowl-tumblr'
3576
    long_name = 'Cancer Owl (from Tumblr)'
3577
    url = 'http://cancerowl.tumblr.com'
3578
3579
3580
class FowlLanguageTumblr(GenericTumblrV1):
3581
    """Class to retrieve Fowl Language comics."""
3582
    # Also on http://www.fowllanguagecomics.com
3583
    # Also on http://tapastic.com/series/Fowl-Language-Comics
3584
    # Also on http://www.gocomics.com/fowl-language
3585
    name = 'fowllanguage-tumblr'
3586
    long_name = 'Fowl Language Comics (from Tumblr)'
3587
    url = 'http://fowllanguagecomics.tumblr.com'
3588
3589
3590
class TheOdd1sOutTumblr(GenericTumblrV1):
3591
    """Class to retrieve The Odd 1s Out comics."""
3592
    # Also on http://theodd1sout.com
3593
    # Also on https://tapastic.com/series/Theodd1sout
3594
    name = 'theodd-tumblr'
3595
    long_name = 'The Odd 1s Out (from Tumblr)'
3596
    url = 'http://theodd1sout.tumblr.com'
3597
3598
3599
class TheUnderfoldTumblr(GenericTumblrV1):
3600
    """Class to retrieve The Underfold comics."""
3601
    # Also on http://theunderfold.com
3602
    name = 'underfold-tumblr'
3603
    long_name = 'The Underfold (from Tumblr)'
3604
    url = 'http://theunderfold.tumblr.com'
3605
3606
3607
class LolNeinTumblr(GenericTumblrV1):
3608
    """Class to retrieve Lol Nein comics."""
3609
    # Also on http://lolnein.com
3610
    name = 'lolnein-tumblr'
3611
    long_name = 'Lol Nein (from Tumblr)'
3612
    url = 'http://lolneincom.tumblr.com'
3613
3614
3615
class FatAwesomeComicsTumblr(GenericTumblrV1):
3616
    """Class to retrieve Fat Awesome Comics."""
3617
    # Also on http://fatawesome.com/comics
3618
    name = 'fatawesome-tumblr'
3619
    long_name = 'Fat Awesome (from Tumblr)'
3620
    url = 'http://fatawesomecomedy.tumblr.com'
3621
3622
3623
class TheWorldIsFlatTumblr(GenericTumblrV1):
3624
    """Class to retrieve The World Is Flat Comics."""
3625
    # Also on https://tapastic.com/series/The-World-is-Flat
3626
    name = 'flatworld-tumblr'
3627
    long_name = 'The World Is Flat (from Tumblr)'
3628
    url = 'http://theworldisflatcomics.tumblr.com'
3629
3630
3631
class DorrisMc(GenericEmptyComic, GenericTumblrV1):
3632
    """Class to retrieve Dorris Mc Comics"""
3633
    # Also on http://www.gocomics.com/dorris-mccomics
3634
    name = 'dorrismc'
3635
    long_name = 'Dorris Mc'
3636
    url = 'http://dorrismccomics.com'
3637
3638
3639
class LeleozTumblr(GenericEmptyComic, GenericTumblrV1):
3640
    """Class to retrieve Leleoz comics."""
3641
    # Also on https://tapastic.com/series/Leleoz
3642
    name = 'leleoz-tumblr'
3643
    long_name = 'Leleoz (from Tumblr)'
3644
    url = 'http://leleozcomics.tumblr.com'
3645
3646
3647
class MoonBeardTumblr(GenericTumblrV1):
3648
    """Class to retrieve MoonBeard comics."""
3649
    # Also on http://moonbeard.com
3650
    # Also on http://www.webtoons.com/en/comedy/moon-beard/list?title_no=471
3651
    name = 'moonbeard-tumblr'
3652
    long_name = 'Moon Beard (from Tumblr)'
3653
    url = 'http://blog.squiresjam.es/moonbeard'
3654
3655
3656
class AComik(GenericTumblrV1):
3657
    """Class to retrieve A Comik"""
3658
    name = 'comik'
3659
    long_name = 'A Comik'
3660
    url = 'http://acomik.com'
3661
3662
3663
class ClassicRandy(GenericTumblrV1):
3664
    """Class to retrieve Classic Randy comics."""
3665
    name = 'randy'
3666
    long_name = 'Classic Randy'
3667
    url = 'http://classicrandy.tumblr.com'
3668
3669
3670
class DagssonTumblr(GenericTumblrV1):
3671
    """Class to retrieve Dagsson comics."""
3672
    # Also on http://www.dagsson.com
3673
    name = 'dagsson-tumblr'
3674
    long_name = 'Dagsson Hugleikur (from Tumblr)'
3675
    url = 'http://hugleikurdagsson.tumblr.com'
3676
3677
3678
class LinsEditionsTumblr(GenericTumblrV1):
3679
    """Class to retrieve L.I.N.S. Editions comics."""
3680
    # Also on https://linsedition.com
3681
    name = 'lins-tumblr'
3682
    long_name = 'L.I.N.S. Editions (from Tumblr)'
3683
    url = 'http://linscomics.tumblr.com'
3684
3685
3686
class OrigamiHotDish(GenericTumblrV1):
3687
    """Class to retrieve Origami Hot Dish comics."""
3688
    name = 'origamihotdish'
3689
    long_name = 'Origami Hot Dish'
3690
    url = 'http://origamihotdish.com'
3691
3692
3693
class HitAndMissComicsTumblr(GenericTumblrV1):
3694
    """Class to retrieve Hit and Miss Comics."""
3695
    name = 'hitandmiss'
3696
    long_name = 'Hit and Miss Comics'
3697
    url = 'http://hitandmisscomics.tumblr.com'
3698
3699
3700
class HMBlanc(GenericTumblrV1):
3701
    """Class to retrieve HM Blanc comics."""
3702
    name = 'hmblanc'
3703
    long_name = 'HM Blanc'
3704
    url = 'http://hmblanc.tumblr.com'
3705
3706
3707
class TalesOfAbsurdityTumblr(GenericTumblrV1):
3708
    """Class to retrieve Tales Of Absurdity comics."""
3709
    # Also on http://talesofabsurdity.com
3710
    # Also on http://tapastic.com/series/Tales-Of-Absurdity
3711
    name = 'absurdity-tumblr'
3712
    long_name = 'Tales of Absurdity (from Tumblr)'
3713
    url = 'http://talesofabsurdity.tumblr.com'
3714
3715
3716
class RobbieAndBobby(GenericTumblrV1):
3717
    """Class to retrieve Robbie And Bobby comics."""
3718
    # Also on http://robbieandbobby.com
3719
    name = 'robbie-tumblr'
3720
    long_name = 'Robbie And Bobby (from Tumblr)'
3721
    url = 'http://robbieandbobby.tumblr.com'
3722
3723
3724
class ElectricBunnyComicTumblr(GenericTumblrV1):
3725
    """Class to retrieve Electric Bunny Comics."""
3726
    # Also on http://www.electricbunnycomics.com/View/Comic/153/Welcome+to+Hell
3727
    name = 'bunny-tumblr'
3728
    long_name = 'Electric Bunny Comic (from Tumblr)'
3729
    url = 'http://electricbunnycomics.tumblr.com'
3730
3731
3732
class Hoomph(GenericTumblrV1):
3733
    """Class to retrieve Hoomph comics."""
3734
    name = 'hoomph'
3735
    long_name = 'Hoomph'
3736
    url = 'http://hoom.ph'
3737
3738
3739
class BFGFSTumblr(GenericTumblrV1):
3740
    """Class to retrieve BFGFS comics."""
3741
    # Also on https://tapastic.com/series/BFGFS
3742
    # Also on http://bfgfs.com
3743
    name = 'bfgfs-tumblr'
3744
    long_name = 'BFGFS (from Tumblr)'
3745
    url = 'http://bfgfs.tumblr.com'
3746
3747
3748
class DoodleForFood(GenericTumblrV1):
3749
    """Class to retrieve Doodle For Food comics."""
3750
    # Also on http://doodleforfood.com
3751
    name = 'doodle'
3752
    long_name = 'Doodle For Food'
3753
    url = 'http://doodleforfood.com'
3754
3755
3756
class CassandraCalinTumblr(GenericEmptyComic, GenericTumblrV1):
3757
    """Class to retrieve C. Cassandra comics."""
3758
    # Also on http://cassandracalin.com
3759
    # Also on https://tapastic.com/series/C-Cassandra-comics
3760
    name = 'cassandra-tumblr'
3761
    long_name = 'Cassandra Calin (from Tumblr)'
3762
    url = 'http://c-cassandra.tumblr.com'
3763
3764
3765
class DougWasTaken(GenericTumblrV1):
3766
    """Class to retrieve Doug Was Taken comics."""
3767
    name = 'doog'
3768
    long_name = 'Doug Was Taken'
3769
    url = 'http://dougwastaken.tumblr.com'
3770
3771
3772
class MandatoryRollerCoaster(GenericEmptyComic, GenericTumblrV1):
3773
    """Class to retrieve Mandatory Roller Coaster comics."""
3774
    name = 'rollercoaster'
3775
    long_name = 'Mandatory Roller Coaster'
3776
    url = 'http://mandatoryrollercoaster.com'
3777
3778
3779
class CEstPasEnRegardantSesPompes(GenericEmptyComic, GenericTumblrV1):
3780
    """Class to retrieve C'Est Pas En Regardant Ses Pompes (...)  comics."""
3781
    name = 'cperspqccltt'
3782
    long_name = 'C Est Pas En Regardant Ses Pompes (...)'
3783
    url = 'http://cperspqccltt.tumblr.com'
3784
3785
3786
class TheGrohlTroll(GenericEmptyComic, GenericTumblrV1):
3787
    """Class to retrieve The Grohl Troll comics."""
3788
    name = 'grohltroll'
3789
    long_name = 'The Grohl Troll'
3790
    url = 'http://thegrohltroll.com'
3791
3792
3793
class WebcomicName(GenericEmptyComic, GenericTumblrV1):
3794
    """Class to retrieve Webcomic Name comics."""
3795
    name = 'webcomicname'
3796
    long_name = 'Webcomic Name'
3797
    url = 'http://webcomicname.com'
3798
3799
3800
class BooksOfAdam(GenericEmptyComic, GenericTumblrV1):
3801
    """Class to retrieve Books of Adam comics."""
3802
    # Also on http://www.booksofadam.com
3803
    name = 'booksofadam'
3804
    long_name = 'Books of Adam'
3805
    url = 'http://booksofadam.tumblr.com'
3806
3807
3808
class HarkAVagrant(GenericEmptyComic, GenericTumblrV1):
3809
    """Class to retrieve Hark A Vagrant comics."""
3810
    # Also on http://www.harkavagrant.com
3811
    name = 'hark-tumblr'
3812
    long_name = 'Hark A Vagrant (from Tumblr)'
3813
    url = 'http://beatonna.tumblr.com'
3814
3815
3816
class OurSuperAdventureTumblr(GenericEmptyComic, GenericTumblrV1):
3817
    """Class to retrieve Our Super Adventure comics."""
3818
    # Also on https://tapastic.com/series/Our-Super-Adventure
3819
    # Also on http://www.oursuperadventure.com
3820
    # http://sarahgraley.com
3821
    name = 'superadventure-tumblr'
3822
    long_name = 'Our Super Adventure (from Tumblr)'
3823
    url = 'http://sarahssketchbook.tumblr.com'
3824
3825
3826
class JakeLikesOnions(GenericTumblrV1):
3827
    """Class to retrieve Jake Likes Onions comics."""
3828
    name = 'jake'
3829
    long_name = 'Jake Likes Onions'
3830
    url = 'http://jakelikesonions.com'
3831
3832
3833
class InYourFaceCake(GenericEmptyComic, GenericTumblrV1):
3834
    """Class to retrieve In Your Face Cake comics."""
3835
    name = 'inyourfacecake-tumblr'
3836
    long_name = 'In Your Face Cake (from Tumblr)'
3837
    url = 'http://in-your-face-cake.tumblr.com'
3838
3839
3840
class BananaTwinky(GenericTumblrV1):
3841
    """Class to retrieve Banana Twinky comics."""
3842
    name = 'banana'
3843
    long_name = 'Banana Twinky'
3844
    url = 'http://bananatwinky.tumblr.com'
3845
3846
3847
class HorovitzComics(GenericListableComic):
3848
    """Generic class to handle the logic common to the different comics from Horovitz."""
3849
    url = 'http://www.horovitzcomics.com'
3850
    img_re = re.compile('.*comics/([0-9]*)/([0-9]*)/([0-9]*)/.*$')
3851
    link_re = NotImplemented
3852
    get_url_from_archive_element = join_cls_url_to_href
3853
3854
    @classmethod
3855
    def get_comic_info(cls, soup, link):
3856
        """Get information about a particular comics."""
3857
        href = link['href']
3858
        num = int(cls.link_re.match(href).groups()[0])
3859
        title = link.string
3860
        imgs = soup.find_all('img', id='comic')
3861
        assert len(imgs) == 1
3862
        year, month, day = [int(s)
3863
                            for s in cls.img_re.match(imgs[0]['src']).groups()]
3864
        return {
3865
            'title': title,
3866
            'day': day,
3867
            'month': month,
3868
            'year': year,
3869
            'img': [i['src'] for i in imgs],
3870
            'num': num,
3871
        }
3872
3873
    @classmethod
3874
    def get_archive_elements(cls):
3875
        archive_url = 'http://www.horovitzcomics.com/comics/archive/'
3876
        return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.link_re))
3877
3878
3879
class HorovitzNew(HorovitzComics):
3880
    """Class to retrieve Horovitz new comics."""
3881
    name = 'horovitznew'
3882
    long_name = 'Horovitz New'
3883
    link_re = re.compile('^/comics/new/([0-9]+)$')
3884
3885
3886
class HorovitzClassic(HorovitzComics):
3887
    """Class to retrieve Horovitz classic comics."""
3888
    name = 'horovitzclassic'
3889
    long_name = 'Horovitz Classic'
3890
    link_re = re.compile('^/comics/classic/([0-9]+)$')
3891
3892
3893
class GenericGoComic(GenericNavigableComic):
3894
    """Generic class to handle the logic common to comics from gocomics.com."""
3895
    url_date_re = re.compile('.*/([0-9]*)/([0-9]*)/([0-9]*)$')
3896
3897
    @classmethod
3898
    def get_first_comic_link(cls):
3899
        """Get link to first comics."""
3900
        return get_soup_at_url(cls.url).find('a', class_='beginning')
3901
3902
    @classmethod
3903
    def get_navi_link(cls, last_soup, next_):
3904
        """Get link to next or previous comic."""
3905
        return last_soup.find('a', class_='next' if next_ else 'prev', href=cls.url_date_re)
3906
3907
    @classmethod
3908
    def get_url_from_link(cls, link):
3909
        gocomics = 'http://www.gocomics.com'
3910
        return urljoin_wrapper(gocomics, link['href'])
3911
3912
    @classmethod
3913
    def get_comic_info(cls, soup, link):
3914
        """Get information about a particular comics."""
3915
        url = cls.get_url_from_link(link)
3916
        year, month, day = [int(s)
3917
                            for s in cls.url_date_re.match(url).groups()]
3918
        return {
3919
            'day': day,
3920
            'month': month,
3921
            'year': year,
3922
            'img': [soup.find_all('img', class_='strip')[-1]['src']],
3923
            'author': soup.find('meta', attrs={'name': 'author'})['content']
3924
        }
3925
3926
3927
class PearlsBeforeSwine(GenericGoComic):
3928
    """Class to retrieve Pearls Before Swine comics."""
3929
    name = 'pearls'
3930
    long_name = 'Pearls Before Swine'
3931
    url = 'http://www.gocomics.com/pearlsbeforeswine'
3932
3933
3934
class Peanuts(GenericGoComic):
3935
    """Class to retrieve Peanuts comics."""
3936
    name = 'peanuts'
3937
    long_name = 'Peanuts'
3938
    url = 'http://www.gocomics.com/peanuts'
3939
3940
3941
class MattWuerker(GenericGoComic):
3942
    """Class to retrieve Matt Wuerker comics."""
3943
    name = 'wuerker'
3944
    long_name = 'Matt Wuerker'
3945
    url = 'http://www.gocomics.com/mattwuerker'
3946
3947
3948
class TomToles(GenericGoComic):
3949
    """Class to retrieve Tom Toles comics."""
3950
    name = 'toles'
3951
    long_name = 'Tom Toles'
3952
    url = 'http://www.gocomics.com/tomtoles'
3953
3954
3955
class BreakOfDay(GenericGoComic):
3956
    """Class to retrieve Break Of Day comics."""
3957
    name = 'breakofday'
3958
    long_name = 'Break Of Day'
3959
    url = 'http://www.gocomics.com/break-of-day'
3960
3961
3962
class Brevity(GenericGoComic):
3963
    """Class to retrieve Brevity comics."""
3964
    name = 'brevity'
3965
    long_name = 'Brevity'
3966
    url = 'http://www.gocomics.com/brevity'
3967
3968
3969
class MichaelRamirez(GenericGoComic):
3970
    """Class to retrieve Michael Ramirez comics."""
3971
    name = 'ramirez'
3972
    long_name = 'Michael Ramirez'
3973
    url = 'http://www.gocomics.com/michaelramirez'
3974
3975
3976
class MikeLuckovich(GenericGoComic):
3977
    """Class to retrieve Mike Luckovich comics."""
3978
    name = 'luckovich'
3979
    long_name = 'Mike Luckovich'
3980
    url = 'http://www.gocomics.com/mikeluckovich'
3981
3982
3983
class JimBenton(GenericGoComic):
3984
    """Class to retrieve Jim Benton comics."""
3985
    # Also on http://jimbenton.tumblr.com
3986
    name = 'benton'
3987
    long_name = 'Jim Benton'
3988
    url = 'http://www.gocomics.com/jim-benton-cartoons'
3989
3990
3991
class TheArgyleSweater(GenericGoComic):
3992
    """Class to retrieve the Argyle Sweater comics."""
3993
    name = 'argyle'
3994
    long_name = 'Argyle Sweater'
3995
    url = 'http://www.gocomics.com/theargylesweater'
3996
3997
3998
class SunnyStreet(GenericGoComic):
3999
    """Class to retrieve Sunny Street comics."""
4000
    # Also on http://www.sunnystreetcomics.com
4001
    name = 'sunny'
4002
    long_name = 'Sunny Street'
4003
    url = 'http://www.gocomics.com/sunny-street'
4004
4005
4006
class OffTheMark(GenericGoComic):
4007
    """Class to retrieve Off The Mark comics."""
4008
    # Also on https://www.offthemark.com
4009
    name = 'offthemark'
4010
    long_name = 'Off The Mark'
4011
    url = 'http://www.gocomics.com/offthemark'
4012
4013
4014
class WuMo(GenericGoComic):
4015
    """Class to retrieve WuMo comics."""
4016
    # Also on http://wumo.com
4017
    name = 'wumo'
4018
    long_name = 'WuMo'
4019
    url = 'http://www.gocomics.com/wumo'
4020
4021
4022
class LunarBaboon(GenericGoComic):
4023
    """Class to retrieve Lunar Baboon comics."""
4024
    # Also on http://www.lunarbaboon.com
4025
    # Also on https://tapastic.com/series/Lunarbaboon
4026
    name = 'lunarbaboon'
4027
    long_name = 'Lunar Baboon'
4028
    url = 'http://www.gocomics.com/lunarbaboon'
4029
4030
4031
class SandersenGocomic(GenericGoComic):
4032
    """Class to retrieve Sarah Andersen comics."""
4033
    # Also on http://sarahcandersen.com
4034
    # Also on http://tapastic.com/series/Doodle-Time
4035
    name = 'sandersen-goc'
4036
    long_name = 'Sarah Andersen (from GoComics)'
4037
    url = 'http://www.gocomics.com/sarahs-scribbles'
4038
4039
4040
class CalvinAndHobbesGoComic(GenericGoComic):
4041
    """Class to retrieve Calvin and Hobbes comics."""
4042
    # From gocomics, not http://marcel-oehler.marcellosendos.ch/comics/ch/
4043
    name = 'calvin-goc'
4044
    long_name = 'Calvin and Hobbes (from GoComics)'
4045
    url = 'http://www.gocomics.com/calvinandhobbes'
4046
4047
4048
class RallGoComic(GenericGoComic):
4049
    """Class to retrieve Ted Rall comics."""
4050
    # Also on http://rall.com/comic
4051
    name = 'rall-goc'
4052
    long_name = "Ted Rall (from GoComics)"
4053
    url = "http://www.gocomics.com/tedrall"
4054
4055
4056
class TheAwkwardYetiGoComic(GenericGoComic):
4057
    """Class to retrieve The Awkward Yeti comics."""
4058
    # Also on http://larstheyeti.tumblr.com
4059
    # Also on http://theawkwardyeti.com
4060
    # Also on https://tapastic.com/series/TheAwkwardYeti
4061
    name = 'yeti-goc'
4062
    long_name = 'The Awkward Yeti (from GoComics)'
4063
    url = 'http://www.gocomics.com/the-awkward-yeti'
4064
4065
4066
class BerkeleyMewsGoComics(GenericGoComic):
4067
    """Class to retrieve Berkeley Mews comics."""
4068
    # Also on http://mews.tumblr.com
4069
    # Also on http://www.berkeleymews.com
4070
    name = 'berkeley-goc'
4071
    long_name = 'Berkeley Mews (from GoComics)'
4072
    url = 'http://www.gocomics.com/berkeley-mews'
4073
4074
4075
class SheldonGoComics(GenericGoComic):
4076
    """Class to retrieve Sheldon comics."""
4077
    # Also on http://www.sheldoncomics.com
4078
    name = 'sheldon-goc'
4079
    long_name = 'Sheldon Comics (from GoComics)'
4080
    url = 'http://www.gocomics.com/sheldon'
4081
4082
4083
class FowlLanguageGoComics(GenericGoComic):
4084
    """Class to retrieve Fowl Language comics."""
4085
    # Also on http://www.fowllanguagecomics.com
4086
    # Also on http://tapastic.com/series/Fowl-Language-Comics
4087
    # Also on http://fowllanguagecomics.tumblr.com
4088
    name = 'fowllanguage-goc'
4089
    long_name = 'Fowl Language Comics (from GoComics)'
4090
    url = 'http://www.gocomics.com/fowl-language'
4091
4092
4093
class NickAnderson(GenericGoComic):
4094
    """Class to retrieve Nick Anderson comics."""
4095
    name = 'nickanderson'
4096
    long_name = 'Nick Anderson'
4097
    url = 'http://www.gocomics.com/nickanderson'
4098
4099
4100
class GarfieldGoComics(GenericGoComic):
4101
    """Class to retrieve Garfield comics."""
4102
    # Also on http://garfield.com
4103
    name = 'garfield-goc'
4104
    long_name = 'Garfield (from GoComics)'
4105
    url = 'http://www.gocomics.com/garfield'
4106
4107
4108
class DorrisMcGoComics(GenericGoComic):
4109
    """Class to retrieve Dorris Mc Comics"""
4110
    # Also on http://dorrismccomics.com
4111
    name = 'dorrismc-goc'
4112
    long_name = 'Dorris Mc (from GoComics)'
4113
    url = 'http://www.gocomics.com/dorris-mccomics'
4114
4115
4116
class FoxTrot(GenericGoComic):
4117
    """Class to retrieve FoxTrot comics."""
4118
    name = 'foxtrot'
4119
    long_name = 'FoxTrot'
4120
    url = 'http://www.gocomics.com/foxtrot'
4121
4122
4123
class FoxTrotClassics(GenericGoComic):
4124
    """Class to retrieve FoxTrot Classics comics."""
4125
    name = 'foxtrot-classics'
4126
    long_name = 'FoxTrot Classics'
4127
    url = 'http://www.gocomics.com/foxtrotclassics'
4128
4129
4130
class MisterAndMeGoComics(GenericGoComic):
4131
    """Class to retrieve Mister & Me Comics."""
4132
    # Also on http://www.mister-and-me.com
4133
    # Also on https://tapastic.com/series/Mister-and-Me
4134
    name = 'mister-goc'
4135
    long_name = 'Mister & Me (from GoComics)'
4136
    url = 'http://www.gocomics.com/mister-and-me'
4137
4138
4139
class NonSequitur(GenericGoComic):
4140
    """Class to retrieve Non Sequitur (Wiley Miller) comics."""
4141
    name = 'nonsequitur'
4142
    long_name = 'Non Sequitur'
4143
    url = 'http://www.gocomics.com/nonsequitur'
4144
4145
4146
class GenericTapasticComic(GenericListableComic):
4147
    """Generic class to handle the logic common to comics from tapastic.com."""
4148
4149
    @classmethod
4150
    def get_comic_info(cls, soup, archive_elt):
4151
        """Get information about a particular comics."""
4152
        timestamp = int(archive_elt['publishDate']) / 1000.0
4153
        day = datetime.datetime.fromtimestamp(timestamp).date()
4154
        imgs = soup.find_all('img', class_='art-image')
4155
        if not imgs:
4156
            print("Comic %s is being uploaded, retry later" % cls.get_url_from_archive_element(archive_elt))
4157
            return None
4158
        assert len(imgs) > 0
4159
        return {
4160
            'day': day.day,
4161
            'year': day.year,
4162
            'month': day.month,
4163
            'img': [i['src'] for i in imgs],
4164
            'title': archive_elt['title'],
4165
        }
4166
4167
    @classmethod
4168
    def get_url_from_archive_element(cls, archive_elt):
4169
        return 'http://tapastic.com/episode/' + str(archive_elt['id'])
4170
4171
    @classmethod
4172
    def get_archive_elements(cls):
4173
        pref, suff = 'episodeList : ', ','
4174
        # Information is stored in the javascript part
4175
        # I don't know the clean way to get it so this is the ugly way.
4176
        string = [s[len(pref):-len(suff)] for s in (s.decode('utf-8').strip() for s in urlopen_wrapper(cls.url).readlines()) if s.startswith(pref) and s.endswith(suff)][0]
4177
        return json.loads(string)
4178
4179
4180
class VegetablesForDessert(GenericTapasticComic):
4181
    """Class to retrieve Vegetables For Dessert comics."""
4182
    # Also on http://vegetablesfordessert.tumblr.com
4183
    name = 'vegetables'
4184
    long_name = 'Vegetables For Dessert'
4185
    url = 'http://tapastic.com/series/vegetablesfordessert'
4186
4187
4188
class FowlLanguageTapa(GenericTapasticComic):
4189
    """Class to retrieve Fowl Language comics."""
4190
    # Also on http://www.fowllanguagecomics.com
4191
    # Also on http://fowllanguagecomics.tumblr.com
4192
    # Also on http://www.gocomics.com/fowl-language
4193
    name = 'fowllanguage-tapa'
4194
    long_name = 'Fowl Language Comics (from Tapastic)'
4195
    url = 'http://tapastic.com/series/Fowl-Language-Comics'
4196
4197
4198
class OscillatingProfundities(GenericTapasticComic):
4199
    """Class to retrieve Oscillating Profundities comics."""
4200
    name = 'oscillating'
4201
    long_name = 'Oscillating Profundities'
4202
    url = 'http://tapastic.com/series/oscillatingprofundities'
4203
4204
4205
class ZnoflatsComics(GenericTapasticComic):
4206
    """Class to retrieve Znoflats comics."""
4207
    name = 'znoflats'
4208
    long_name = 'Znoflats Comics'
4209
    url = 'http://tapastic.com/series/Znoflats-Comics'
4210
4211
4212
class SandersenTapastic(GenericTapasticComic):
4213
    """Class to retrieve Sarah Andersen comics."""
4214
    # Also on http://sarahcandersen.com
4215
    # Also on http://www.gocomics.com/sarahs-scribbles
4216
    name = 'sandersen-tapa'
4217
    long_name = 'Sarah Andersen (from Tapastic)'
4218
    url = 'http://tapastic.com/series/Doodle-Time'
4219
4220
4221
class TubeyToonsTapastic(GenericTapasticComic):
4222
    """Class to retrieve TubeyToons comics."""
4223
    # Also on http://tubeytoons.com
4224
    # Also on http://tubeytoons.tumblr.com
4225
    name = 'tubeytoons-tapa'
4226
    long_name = 'Tubey Toons (from Tapastic)'
4227
    url = 'http://tapastic.com/series/Tubey-Toons'
4228
4229
4230
class AnythingComicTapastic(GenericTapasticComic):
4231
    """Class to retrieve Anything Comics."""
4232
    # Also on http://www.anythingcomic.com
4233
    name = 'anythingcomic-tapa'
4234
    long_name = 'Anything Comic (from Tapastic)'
4235
    url = 'http://tapastic.com/series/anything'
4236
4237
4238
class UnearthedComicsTapastic(GenericTapasticComic):
4239
    """Class to retrieve Unearthed comics."""
4240
    # Also on http://unearthedcomics.com
4241
    # Also on http://unearthedcomics.tumblr.com
4242
    name = 'unearthed-tapa'
4243
    long_name = 'Unearthed Comics (from Tapastic)'
4244
    url = 'http://tapastic.com/series/UnearthedComics'
4245
4246
4247
class EverythingsStupidTapastic(GenericTapasticComic):
4248
    """Class to retrieve Everything's stupid Comics."""
4249
    # Also on http://www.webtoons.com/en/challenge/everythings-stupid/list?title_no=14591
4250
    # Also on http://everythingsstupid.net
4251
    name = 'stupid-tapa'
4252
    long_name = "Everything's Stupid (from Tapastic)"
4253
    url = 'http://tapastic.com/series/EverythingsStupid'
4254
4255
4256
class JustSayEhTapastic(GenericTapasticComic):
4257
    """Class to retrieve Just Say Eh comics."""
4258
    # Also on http://www.justsayeh.com
4259
    name = 'justsayeh-tapa'
4260
    long_name = 'Just Say Eh (from Tapastic)'
4261
    url = 'http://tapastic.com/series/Just-Say-Eh'
4262
4263
4264
class ThorsThundershackTapastic(GenericTapasticComic):
4265
    """Class to retrieve Thor's Thundershack comics."""
4266
    # Also on http://www.thorsthundershack.com
4267
    name = 'thor-tapa'
4268
    long_name = 'Thor\'s Thundershack (from Tapastic)'
4269
    url = 'http://tapastic.com/series/Thors-Thundershac'
4270
4271
4272
class OwlTurdTapastic(GenericTapasticComic):
4273
    """Class to retrieve Owl Turd comics."""
4274
    # Also on http://owlturd.com
4275
    name = 'owlturd-tapa'
4276
    long_name = 'Owl Turd (from Tapastic)'
4277
    url = 'http://tapastic.com/series/Owl-Turd-Comix'
4278
4279
4280
class GoneIntoRaptureTapastic(GenericTapasticComic):
4281
    """Class to retrieve Gone Into Rapture comics."""
4282
    # Also on http://goneintorapture.tumblr.com
4283
    # Also on http://www.goneintorapture.com
4284
    name = 'rapture-tapa'
4285
    long_name = 'Gone Into Rapture (from Tapastic)'
4286
    url = 'http://tapastic.com/series/Goneintorapture'
4287
4288
4289
class HeckIfIKnowComicsTapa(GenericTapasticComic):
4290
    """Class to retrieve Heck If I Know Comics."""
4291
    # Also on http://heckifiknowcomics.com
4292
    name = 'heck-tapa'
4293
    long_name = 'Heck if I Know comics (from Tapastic)'
4294
    url = 'http://tapastic.com/series/Regular'
4295
4296
4297
class CheerUpEmoKidTapa(GenericTapasticComic):
4298
    """Class to retrieve CheerUpEmoKid comics."""
4299
    # Also on http://www.cheerupemokid.com
4300
    # Also on http://enzocomics.tumblr.com
4301
    name = 'cuek-tapa'
4302
    long_name = 'Cheer Up Emo Kid (from Tapastic)'
4303
    url = 'http://tapastic.com/series/CUEK'
4304
4305
4306
class BigFootJusticeTapa(GenericTapasticComic):
4307
    """Class to retrieve Big Foot Justice comics."""
4308
    # Also on http://bigfootjustice.com
4309
    name = 'bigfoot-tapa'
4310
    long_name = 'Big Foot Justice (from Tapastic)'
4311
    url = 'http://tapastic.com/series/bigfoot-justice'
4312
4313
4314
class UpAndOutTapa(GenericTapasticComic):
4315
    """Class to retrieve Up & Out comics."""
4316
    # Also on http://upandoutcomic.tumblr.com
4317
    name = 'upandout-tapa'
4318
    long_name = 'Up And Out (from Tapastic)'
4319
    url = 'http://tapastic.com/series/UP-and-OUT'
4320
4321
4322
class ToonHoleTapa(GenericTapasticComic):
4323
    """Class to retrieve Toon Holes comics."""
4324
    # Also on http://www.toonhole.com
4325
    name = 'toonhole-tapa'
4326
    long_name = 'Toon Hole (from Tapastic)'
4327
    url = 'http://tapastic.com/series/TOONHOLE'
4328
4329
4330
class AngryAtNothingTapa(GenericTapasticComic):
4331
    """Class to retrieve Angry at Nothing comics."""
4332
    # Also on http://www.angryatnothing.net
4333
    name = 'angry-tapa'
4334
    long_name = 'Angry At Nothing (from Tapastic)'
4335
    url = 'http://tapastic.com/series/Comics-yeah-definitely-comics-'
4336
4337
4338
class LeleozTapa(GenericTapasticComic):
4339
    """Class to retrieve Leleoz comics."""
4340
    # Also on http://leleozcomics.tumblr.com
4341
    name = 'leleoz-tapa'
4342
    long_name = 'Leleoz (from Tapastic)'
4343
    url = 'https://tapastic.com/series/Leleoz'
4344
4345
4346
class TheAwkwardYetiTapa(GenericTapasticComic):
4347
    """Class to retrieve The Awkward Yeti comics."""
4348
    # Also on http://www.gocomics.com/the-awkward-yeti
4349
    # Also on http://theawkwardyeti.com
4350
    # Also on http://larstheyeti.tumblr.com
4351
    name = 'yeti-tapa'
4352
    long_name = 'The Awkward Yeti (from Tapastic)'
4353
    url = 'https://tapastic.com/series/TheAwkwardYeti'
4354
4355
4356
class AsPerUsualTapa(GenericTapasticComic):
4357
    """Class to retrieve As Per Usual comics."""
4358
    # Also on http://as-per-usual.tumblr.com
4359
    name = 'usual-tapa'
4360
    long_name = 'As Per Usual (from Tapastic)'
4361
    url = 'https://tapastic.com/series/AsPerUsual'
4362
4363
4364
class OneOneOneOneComicTapa(GenericTapasticComic):
4365
    """Class to retrieve 1111 Comics."""
4366
    # Also on http://www.1111comics.me
4367
    # Also on http://comics1111.tumblr.com
4368
    name = '1111-tapa'
4369
    long_name = '1111 Comics (from Tapastic)'
4370
    url = 'https://tapastic.com/series/1111-Comics'
4371
4372
4373
class TumbleDryTapa(GenericTapasticComic):
4374
    """Class to retrieve Tumble Dry comics."""
4375
    # Also on http://tumbledrycomics.com
4376
    name = 'tumbledry-tapa'
4377
    long_name = 'Tumblr Dry (from Tapastic)'
4378
    url = 'https://tapastic.com/series/TumbleDryComics'
4379
4380
4381
class DeadlyPanelTapa(GenericTapasticComic):
4382
    """Class to retrieve Deadly Panel comics."""
4383
    # Also on http://www.deadlypanel.com
4384
    name = 'deadly-tapa'
4385
    long_name = 'Deadly Panel (from Tapastic)'
4386
    url = 'https://tapastic.com/series/deadlypanel'
4387
4388
4389
class ChrisHallbeckMaxiTapa(GenericTapasticComic):
4390
    """Class to retrieve Chris Hallbeck comics."""
4391
    # Also on http://chrishallbeck.tumblr.com
4392
    # Also on http://maximumble.com
4393
    name = 'hallbeckmaxi-tapa'
4394
    long_name = 'Chris Hallback - Maximumble (from Tapastic)'
4395
    url = 'https://tapastic.com/series/Maximumble'
4396
4397
4398
class ChrisHallbeckMiniTapa(GenericTapasticComic):
4399
    """Class to retrieve Chris Hallbeck comics."""
4400
    # Also on http://chrishallbeck.tumblr.com
4401
    # Also on http://minimumble.com
4402
    name = 'hallbeckmini-tapa'
4403
    long_name = 'Chris Hallback - Minimumble (from Tapastic)'
4404
    url = 'https://tapastic.com/series/Minimumble'
4405
4406
4407
class ChrisHallbeckBiffTapa(GenericTapasticComic):
4408
    """Class to retrieve Chris Hallbeck comics."""
4409
    # Also on http://chrishallbeck.tumblr.com
4410
    # Also on http://thebookofbiff.com
4411
    name = 'hallbeckbiff-tapa'
4412
    long_name = 'Chris Hallback - The Book of Biff (from Tapastic)'
4413
    url = 'https://tapastic.com/series/Biff'
4414
4415
4416
class RandoWisTapa(GenericTapasticComic):
4417
    """Class to retrieve RandoWis comics."""
4418
    # Also on https://randowis.com
4419
    name = 'randowis-tapa'
4420
    long_name = 'RandoWis (from Tapastic)'
4421
    url = 'https://tapastic.com/series/RandoWis'
4422
4423
4424
class PigeonGazetteTapa(GenericTapasticComic):
4425
    """Class to retrieve The Pigeon Gazette comics."""
4426
    # Also on http://thepigeongazette.tumblr.com
4427
    name = 'pigeon-tapa'
4428
    long_name = 'The Pigeon Gazette (from Tapastic)'
4429
    url = 'https://tapastic.com/series/The-Pigeon-Gazette'
4430
4431
4432
class TheOdd1sOutTapa(GenericTapasticComic):
4433
    """Class to retrieve The Odd 1s Out comics."""
4434
    # Also on http://theodd1sout.com
4435
    # Also on http://theodd1sout.tumblr.com
4436
    name = 'theodd-tapa'
4437
    long_name = 'The Odd 1s Out (from Tapastic)'
4438
    url = 'https://tapastic.com/series/Theodd1sout'
4439
4440
4441
class TheWorldIsFlatTapa(GenericTapasticComic):
4442
    """Class to retrieve The World Is Flat Comics."""
4443
    # Also on http://theworldisflatcomics.tumblr.com
4444
    name = 'flatworld-tapa'
4445
    long_name = 'The World Is Flat (from Tapastic)'
4446
    url = 'https://tapastic.com/series/The-World-is-Flat'
4447
4448
4449
class MisterAndMeTapa(GenericTapasticComic):
4450
    """Class to retrieve Mister & Me Comics."""
4451
    # Also on http://www.mister-and-me.com
4452
    # Also on http://www.gocomics.com/mister-and-me
4453
    name = 'mister-tapa'
4454
    long_name = 'Mister & Me (from Tapastic)'
4455
    url = 'https://tapastic.com/series/Mister-and-Me'
4456
4457
4458
class TalesOfAbsurdityTapa(GenericTapasticComic):
4459
    """Class to retrieve Tales Of Absurdity comics."""
4460
    # Also on http://talesofabsurdity.com
4461
    # Also on http://talesofabsurdity.tumblr.com
4462
    name = 'absurdity-tapa'
4463
    long_name = 'Tales of Absurdity (from Tapastic)'
4464
    url = 'http://tapastic.com/series/Tales-Of-Absurdity'
4465
4466
4467
class BFGFSTapa(GenericTapasticComic):
4468
    """Class to retrieve BFGFS comics."""
4469
    # Also on http://bfgfs.com
4470
    # Also on http://bfgfs.tumblr.com
4471
    name = 'bfgfs-tapa'
4472
    long_name = 'BFGFS (from Tapastic)'
4473
    url = 'https://tapastic.com/series/BFGFS'
4474
4475
4476
class DoodleForFoodTapa(GenericTapasticComic):
4477
    """Class to retrieve Doodle For Food comics."""
4478
    # Also on http://doodleforfood.com
4479
    name = 'doodle-tapa'
4480
    long_name = 'Doodle For Food (from Tapastic)'
4481
    url = 'https://tapastic.com/series/Doodle-for-Food'
4482
4483
4484
class MrLovensteinTapa(GenericTapasticComic):
4485
    """Class to retrieve Mr Lovenstein comics."""
4486
    # Also on  https://tapastic.com/series/MrLovenstein
4487
    name = 'mrlovenstein-tapa'
4488
    long_name = 'Mr. Lovenstein (from Tapastic)'
4489
    url = 'https://tapastic.com/series/MrLovenstein'
4490
4491
4492
class CassandraCalinTapa(GenericTapasticComic):
4493
    """Class to retrieve C. Cassandra comics."""
4494
    # Also on http://cassandracalin.com
4495
    # Also on http://c-cassandra.tumblr.com
4496
    name = 'cassandra-tapa'
4497
    long_name = 'Cassandra Calin (from Tapastic)'
4498
    url = 'https://tapastic.com/series/C-Cassandra-comics'
4499
4500
4501
class WafflesAndPancakes(GenericTapasticComic):
4502
    """Class to retrieve Waffles And Pancakes comics."""
4503
    # Also on http://wandpcomic.com
4504
    name = 'waffles'
4505
    long_name = 'Waffles And Pancakes'
4506
    url = 'https://tapastic.com/series/Waffles-and-Pancakes'
4507
4508
4509
class OurSuperAdventureTapastic(GenericTapasticComic):
4510
    """Class to retrieve Our Super Adventure comics."""
4511
    # Also on http://www.oursuperadventure.com
4512
    # http://sarahssketchbook.tumblr.com
4513
    # http://sarahgraley.com
4514
    name = 'superadventure-tapastic'
4515
    long_name = 'Our Super Adventure (from Tapastic)'
4516
    url = 'https://tapastic.com/series/Our-Super-Adventure'
4517
4518
4519
def get_subclasses(klass):
4520
    """Gets the list of direct/indirect subclasses of a class"""
4521
    subclasses = klass.__subclasses__()
4522
    for derived in list(subclasses):
4523
        subclasses.extend(get_subclasses(derived))
4524
    return subclasses
4525
4526
4527
def remove_st_nd_rd_th_from_date(string):
4528
    """Function to transform 1st/2nd/3rd/4th in a parsable date format."""
4529
    # Hackish way to convert string with numeral "1st"/"2nd"/etc to date
4530
    return (string.replace('st', '')
4531
            .replace('nd', '')
4532
            .replace('rd', '')
4533
            .replace('th', '')
4534
            .replace('Augu', 'August'))
4535
4536
4537
def string_to_date(string, date_format, local=DEFAULT_LOCAL):
4538
    """Function to convert string to date object.
4539
    Wrapper around datetime.datetime.strptime."""
4540
    # format described in https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior
4541
    prev_locale = locale.setlocale(locale.LC_ALL)
4542
    if local != prev_locale:
4543
        locale.setlocale(locale.LC_ALL, local)
4544
    ret = datetime.datetime.strptime(string, date_format).date()
4545
    if local != prev_locale:
4546
        locale.setlocale(locale.LC_ALL, prev_locale)
4547
    return ret
4548
4549
4550
COMICS = set(get_subclasses(GenericComic))
4551
VALID_COMICS = [c for c in COMICS if c.name is not None]
4552
COMIC_NAMES = {c.name: c for c in VALID_COMICS}
4553
assert len(VALID_COMICS) == len(COMIC_NAMES)
4554
CLASS_NAMES = {c.__name__ for c in VALID_COMICS}
4555
assert len(VALID_COMICS) == len(CLASS_NAMES)
4556